Skip to content

Commit a9daa98

Browse files
mergify[bot]fearful-symmetryhaesbaert
authored
[8.18](backport #42398) Handle leak of process info in hostfs provider for add_session_metadata (#45322)
* Handle leak of process info in `hostfs` provider for `add_session_metadata` (#42398) * handle leak in hostfs provider for sessionmd * add metrics, clean up * fix tests * add process reaper for dropped exit events * remove test code * linter * more testing, fix mock provider * fix error checks * clean up, add session maps to reaper, expand metrics * fix tests * fix tests * format * docs (cherry picked from commit d6ff82b) * Handle overflows We now check for G115, most overflows are impossible, like converting s63 seconds to u64 seconds for date (will overflow in 292 billion years). Pids are actually 32bit in the kernel so casting * -> u32 is safe. This is a backport, and I'd hate to introduce a bug by adding unecessarily overflow handling. --------- Co-authored-by: Alex K. <8418476+fearful-symmetry@users.noreply.github.com> Co-authored-by: Christiano Haesbaert <haesbaert@elastic.co>
1 parent 0cfa26e commit a9daa98

File tree

18 files changed

+755
-185
lines changed

18 files changed

+755
-185
lines changed

x-pack/auditbeat/processors/sessionmd/add_session_metadata.go

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ package sessionmd
99
import (
1010
"context"
1111
"fmt"
12+
"math"
1213
"reflect"
1314
"strconv"
15+
"sync/atomic"
1416

1517
"github.com/elastic/beats/v7/libbeat/beat"
1618
"github.com/elastic/beats/v7/libbeat/processors"
@@ -23,6 +25,7 @@ import (
2325
cfg "github.com/elastic/elastic-agent-libs/config"
2426
"github.com/elastic/elastic-agent-libs/logp"
2527
"github.com/elastic/elastic-agent-libs/mapstr"
28+
"github.com/elastic/elastic-agent-libs/monitoring"
2629
)
2730

2831
const (
@@ -37,6 +40,9 @@ func InitializeModule() {
3740
processors.RegisterPlugin(processorName, New)
3841
}
3942

43+
// instanceID assigns a uniqueID to every instance of the metrics handler for the procfs DB
44+
var instanceID atomic.Uint32
45+
4046
type addSessionMetadata struct {
4147
ctx context.Context
4248
cancel context.CancelFunc
@@ -56,9 +62,17 @@ func New(cfg *cfg.C) (beat.Processor, error) {
5662

5763
logger := logp.NewLogger(logName)
5864

65+
id := int(instanceID.Add(1))
66+
regName := "processor.add_session_metadata.processdb"
67+
// if more than one instance of the DB is running, start to increment the metrics keys.
68+
if id > 1 {
69+
regName = fmt.Sprintf("%s.%d", regName, id)
70+
}
71+
metricsReg := monitoring.Default.NewRegistry(regName)
72+
5973
ctx, cancel := context.WithCancel(context.Background())
6074
reader := procfs.NewProcfsReader(*logger)
61-
db, err := processdb.NewDB(reader, *logger)
75+
db, err := processdb.NewDB(ctx, metricsReg, reader, logger, c.DBReaperPeriod, c.ReapProcesses)
6276
if err != nil {
6377
cancel()
6478
return nil, fmt.Errorf("failed to create DB: %w", err)
@@ -182,7 +196,7 @@ func (p *addSessionMetadata) enrich(ev *beat.Event) (*beat.Event, error) {
182196
fullProcess, err = p.db.GetProcess(pid)
183197
if err != nil {
184198
e := fmt.Errorf("pid %v not found in db: %w", pid, err)
185-
p.logger.Debugw("PID not found in provider", "pid", pid, "error", err)
199+
p.logger.Debugf("PID %d not found in provider: %s", pid, err)
186200
return nil, e
187201
}
188202
}
@@ -210,22 +224,24 @@ func pidToUInt32(value interface{}) (pid uint32, err error) {
210224
switch v := value.(type) {
211225
case string:
212226
nr, err := strconv.Atoi(v)
213-
if err != nil {
227+
if err != nil || nr < 0 || nr > math.MaxUint32 {
214228
return 0, fmt.Errorf("error converting string to integer: %w", err)
215229
}
216230
pid = uint32(nr)
217231
case uint32:
218232
pid = v
219233
case int, int8, int16, int32, int64:
220234
pid64 := reflect.ValueOf(v).Int()
221-
if pid = uint32(pid64); int64(pid) != pid64 {
235+
if pid64 < 0 || pid64 > math.MaxUint32 {
222236
return 0, fmt.Errorf("integer out of range: %d", pid64)
223237
}
238+
pid = uint32(pid64)
224239
case uint, uintptr, uint8, uint16, uint64:
225240
pidu64 := reflect.ValueOf(v).Uint()
226-
if pid = uint32(pidu64); uint64(pid) != pidu64 {
241+
if pidu64 > math.MaxUint32 {
227242
return 0, fmt.Errorf("integer out of range: %d", pidu64)
228243
}
244+
pid = uint32(pidu64)
229245
default:
230246
return 0, fmt.Errorf("not an integer or string, but %T", v)
231247
}

x-pack/auditbeat/processors/sessionmd/add_session_metadata_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
package sessionmd
88

99
import (
10+
"context"
1011
"testing"
12+
"time"
1113

1214
"github.com/google/go-cmp/cmp"
1315
"github.com/stretchr/testify/require"
@@ -18,6 +20,7 @@ import (
1820
"github.com/elastic/beats/v7/x-pack/auditbeat/processors/sessionmd/types"
1921
"github.com/elastic/elastic-agent-libs/logp"
2022
"github.com/elastic/elastic-agent-libs/mapstr"
23+
"github.com/elastic/elastic-agent-libs/monitoring"
2124
)
2225

2326
var (
@@ -337,10 +340,12 @@ var (
337340
)
338341

339342
func TestEnrich(t *testing.T) {
343+
ctx, cancel := context.WithTimeout(context.Background(), time.Minute*15)
344+
defer cancel()
340345
for _, tt := range enrichTests {
341346
t.Run(tt.testName, func(t *testing.T) {
342347
reader := procfs.NewMockReader()
343-
db, err := processdb.NewDB(reader, *logger)
348+
db, err := processdb.NewDB(ctx, monitoring.NewRegistry(), reader, logger, time.Second*30, false)
344349
require.Nil(t, err)
345350

346351
for _, ev := range tt.mockProcesses {

x-pack/auditbeat/processors/sessionmd/config.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,28 @@
66

77
package sessionmd
88

9+
import "time"
10+
911
// Config for add_session_metadata processor.
1012
type config struct {
11-
Backend string `config:"backend"`
13+
// Backend specifies the data source for the processor. Possible values are `auto`, `procfs`, and `kernel_tracing`
14+
Backend string `config:"backend"`
15+
// PIDField specifies the event field used to locate the process ID
1216
PIDField string `config:"pid_field"`
17+
/// DBReaperPeriod specifies the interval of how often the backing process DB should remove orphaned and exited events.
18+
// Only valid for the `procfs` backend, or if `auto` falls back to `procfs`
19+
DBReaperPeriod time.Duration `config:"db_reaper_period"`
20+
// ReapProcesses, if enabled, will tell the process DB reaper thread to also remove orphaned process exec events, in addition to orphaned exit events and compleated process events.
21+
// This can result in data loss if auditbeat is running in an environment where it can't properly talk to procfs, but it can also reduce the memory footprint of auditbeat.
22+
// Only valid for the `procfs` backend.
23+
ReapProcesses bool `config:"reap_processes"`
1324
}
1425

1526
func defaultConfig() config {
1627
return config{
17-
Backend: "auto",
18-
PIDField: "process.pid",
28+
Backend: "auto",
29+
PIDField: "process.pid",
30+
DBReaperPeriod: time.Second * 30,
31+
ReapProcesses: false,
1932
}
2033
}

x-pack/auditbeat/processors/sessionmd/docs/add_session_metadata.asciidoc

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ auditbeat.modules:
7070
- module: auditd
7171
processors:
7272
- add_session_metadata:
73-
backend: "auto"
73+
backend: "auto"
7474
-------------------------------------
7575
+
7676
. Add audit rules in the modules configuration section of `auditbeat.yml` or the
@@ -96,3 +96,25 @@ auditbeat.modules:
9696
-------------------------------------
9797
sudo systemctl restart auditbeat
9898
-------------------------------------
99+
100+
===== Configuring the Process Database
101+
102+
When using the `procfs` backend, `add_session_metadata` will use an in-memory database to store and match events as they arrive to the processor.
103+
This processor has a number of additional config values:
104+
105+
[source,yaml]
106+
-------------------------------------
107+
auditbeat.modules:
108+
- module: auditd
109+
processors:
110+
- add_session_metadata:
111+
backend: "procfs"
112+
reap_processes: false
113+
db_reaper_period: 30s
114+
-------------------------------------
115+
116+
* `reap_processes` tells the database to remove orphan `execve` and `execveat` process events for which no matching `exit_group` event is found.
117+
This may result in incomplete data, but will reduce memory usage under high load. The default is `false`.
118+
* `db_reaper_period` specifies the time interval of the reaper process that will regularly remove exited and orphaned processes from the database.
119+
Setting this value lower my result in incomplete data, but will reduce memory pressure. Setting this to a higher value may help on systems with high load, but will increase memory usage.
120+
The default is `30s.`

0 commit comments

Comments
 (0)