From b9fc63c362c5176298ce0a0a903b7ecbfb20a770 Mon Sep 17 00:00:00 2001 From: Vihas Makwana <121151420+VihasMakwana@users.noreply.github.com> Date: Tue, 24 Dec 2024 16:26:10 +0530 Subject: [PATCH] [process] - Add a boolean to detect partial matches (#199) This PR follows up on https://github.com/elastic/elastic-agent-system-metrics/pull/195 and adds a new boolean to indicate if a given process state is partial. This is useful while returning errors to the caller. --- metric/system/process/process.go | 11 +++++++++-- metric/system/process/process_types.go | 4 +++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/metric/system/process/process.go b/metric/system/process/process.go index 7045af80d..698e98236 100644 --- a/metric/system/process/process.go +++ b/metric/system/process/process.go @@ -140,11 +140,11 @@ func (procStats *Stats) Get() ([]mapstr.M, []mapstr.M, error) { procs = append(procs, proc) rootEvents = append(rootEvents, rootMap) } - if len(failedPIDs) > 0 { + if wrappedErr != nil && len(failedPIDs) > 0 { procStats.logger.Debugf("error fetching process metrics: %v", wrappedErr) return procs, rootEvents, NonFatalErr{Err: fmt.Errorf(errFetchingPIDs, len(failedPIDs))} } - return procs, rootEvents, nil + return procs, rootEvents, toNonFatal(wrappedErr) } // GetOne fetches process data for a given PID if its name matches the regexes provided from the host. @@ -224,6 +224,10 @@ func (procStats *Stats) pidIter(pid int, procMap ProcsMap, proclist []ProcState) procStats.logger.Debugf("Process name does not match the provided regex; PID=%d; name=%s", pid, status.Name) return procMap, proclist, nonFatalErr } + // there was some non-fatal error and given state is partial + if nonFatalErr != nil { + status.Partial = true + } procMap[pid] = status proclist = append(proclist, status) @@ -422,12 +426,15 @@ func (procStats *Stats) isWhitelistedEnvVar(varName string) bool { } func extractFailedPIDs(procMap ProcsMap) []int { + // calculate the total amount of partial/failed PIDs list := make([]int, 0) for pid, state := range procMap { if state.Failed { list = append(list, pid) // delete the failed state so we don't return the state to caller delete(procMap, pid) + } else if state.Partial { + list = append(list, pid) } } return list diff --git a/metric/system/process/process_types.go b/metric/system/process/process_types.go index 16d7e2426..e66cfb538 100644 --- a/metric/system/process/process_types.go +++ b/metric/system/process/process_types.go @@ -57,8 +57,10 @@ type ProcState struct { // meta SampleTime time.Time `struct:"-,omitempty"` - // boolean to indicate that given PID has failed due to some error. + // boolean to indicate that given PID has completeley failed due to some error. Failed bool `struct:"-,omitempty"` + // boolean to indicate that given state is partially filled. + Partial bool `struct:"-,omitempty"` } // ProcCPUInfo is the main struct for CPU metrics