Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[system/process] add support for mutlierr #166

Merged
merged 28 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9374d23
chore: initial commit, use errors.join
VihasMakwana Jul 18, 2024
f27b495
chore: introduce new helper, nits
VihasMakwana Jul 18, 2024
ed559b2
fix: clean the functino
VihasMakwana Jul 18, 2024
0e93586
fix: add wrpped error
VihasMakwana Jul 18, 2024
1a83875
fix: update tests
VihasMakwana Jul 18, 2024
ced452b
Merge branch 'main' into multierror-enhancement
VihasMakwana Jul 18, 2024
63f95b8
fix: fix argument order
VihasMakwana Jul 18, 2024
1b4ad46
fix: tests
VihasMakwana Jul 18, 2024
6e2ab10
fix: update ListStates
VihasMakwana Jul 18, 2024
3ccb849
fix: windows support
VihasMakwana Jul 18, 2024
557c766
fix: verbose
VihasMakwana Jul 18, 2024
00fd657
fix: verbose
VihasMakwana Jul 18, 2024
13e6bf0
chore: update container tests
VihasMakwana Jul 18, 2024
8aa9dd0
chore: add helper
VihasMakwana Jul 18, 2024
5049500
chore: remame function
VihasMakwana Jul 18, 2024
82526c3
fix: comments
VihasMakwana Jul 18, 2024
33f4f40
chore: add comments
VihasMakwana Jul 18, 2024
a4b22de
simplify build tags
VihasMakwana Jul 19, 2024
c1f9abd
fix: don't exported canIgnore
VihasMakwana Jul 19, 2024
a98038a
Update metric/system/process/process.go
VihasMakwana Jul 22, 2024
6a7cb8a
chore: simplify code. remove helpers
VihasMakwana Jul 22, 2024
6c2f29e
fix: add wrappers and unwrap for recusive lookup
VihasMakwana Jul 22, 2024
ceb1dff
fix: fix bug, nil pointer
VihasMakwana Jul 22, 2024
4cada94
fix: bug, nil pointer
VihasMakwana Jul 22, 2024
0d0ff39
fix: nits
VihasMakwana Jul 22, 2024
30a6fa5
chore: add test cases
VihasMakwana Jul 22, 2024
7db28ef
chore: comments
VihasMakwana Jul 22, 2024
b020a9d
fix: add license
VihasMakwana Jul 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions metric/system/process/helper_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package process

import (
"errors"
"fmt"
"syscall"
"testing"

"github.com/stretchr/testify/require"
)

func TestErrors(t *testing.T) {
cases := []struct {
name string
check func(t *testing.T)
}{
{
name: "non fatal error",
check: func(t *testing.T) {
err := fmt.Errorf("Faced non-fatal error: %w", NonFatalErr{Err: syscall.EPERM})
require.True(t, isNonFatal(err), "Should be a non fatal error")
},
},
{
name: "non fatal error - unwrapped",
check: func(t *testing.T) {
err := fmt.Errorf("Faced non-fatal error: %w", syscall.EPERM)
require.True(t, isNonFatal(err), "Should be a non fatal error")
},
},
{
name: "non fatal error - hierarchy",
check: func(t *testing.T) {
err := fmt.Errorf("Faced non-fatal error: %w", syscall.EPERM)
err2 := errors.Join(toNonFatal(err))
require.True(t, isNonFatal(err2), "Should be a non fatal error")
},
},
{
name: "fatal error",
check: func(t *testing.T) {
err := fmt.Errorf("Faced fatal error: %w", errors.New("FATAL"))
err = toNonFatal(err) // shouldn't have any effect as it's a fatal error
require.Falsef(t, isNonFatal(err), "Should be a fatal error")
},
},
{
name: "fatal error - hierarchy",
check: func(t *testing.T) {
err := fmt.Errorf("Faced fatal error: %w", errors.New("FATAL"))
err2 := errors.Join(err)
require.Falsef(t, isNonFatal(err2), "Should be a fatal error")
},
},
}
for _, c := range cases {
t.Run(c.name, c.check)
}
}
10 changes: 10 additions & 0 deletions metric/system/process/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,13 @@ func GetProcCPUPercentage(s0, s1 ProcState) ProcState {
return s1

}

func toNonFatal(err error) error {
if err == nil {
return nil
}
if !isNonFatal(err) {
return err
}
return NonFatalErr{Err: err}
}
35 changes: 35 additions & 0 deletions metric/system/process/helpers_others.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//go:build !windows

package process

import (
"errors"
"syscall"
)

func isNonFatal(err error) bool {
if err == nil {
return true
}
return (errors.Is(err, syscall.EACCES) ||
errors.Is(err, syscall.EPERM) ||
errors.Is(err, syscall.EINVAL) ||
errors.Is(err, NonFatalErr{}))
}
37 changes: 37 additions & 0 deletions metric/system/process/helpers_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//go:build windows

package process

import (
"errors"
"syscall"

"golang.org/x/sys/windows"
)

func isNonFatal(err error) bool {
if err == nil {
return true
}
return errors.Is(err, windows.ERROR_ACCESS_DENIED) ||
errors.Is(err, syscall.EPERM) ||
errors.Is(err, syscall.EINVAL) ||
errors.Is(err, windows.ERROR_INVALID_PARAMETER) || errors.Is(err, NonFatalErr{})
}
55 changes: 34 additions & 21 deletions metric/system/process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"fmt"
"sort"
"strings"
"syscall"
"time"

psutil "github.com/shirou/gopsutil/v3/process"
Expand Down Expand Up @@ -54,11 +55,11 @@ func ListStates(hostfs resolve.Resolver) ([]ProcState, error) {

// actually fetch the PIDs from the OS-specific code
_, plist, err := init.FetchPids()
if err != nil {
if err != nil && !isNonFatal(err) {
return nil, fmt.Errorf("error gathering PIDs: %w", err)
}

return plist, nil
return plist, toNonFatal(err)
}

// GetPIDState returns the state of a given PID
Expand Down Expand Up @@ -90,10 +91,10 @@ func (procStats *Stats) Get() ([]mapstr.M, []mapstr.M, error) {
}

// actually fetch the PIDs from the OS-specific code
pidMap, plist, err := procStats.FetchPids()
pidMap, plist, wrappedErr := procStats.FetchPids()

if err != nil {
return nil, nil, fmt.Errorf("error gathering PIDs: %w", err)
if wrappedErr != nil && !isNonFatal(wrappedErr) {
return nil, nil, fmt.Errorf("error gathering PIDs: %w", wrappedErr)
}
// We use this to track processes over time.
procStats.ProcsMap.SetMap(pidMap)
Expand Down Expand Up @@ -133,13 +134,13 @@ func (procStats *Stats) Get() ([]mapstr.M, []mapstr.M, error) {
rootEvents = append(rootEvents, rootMap)
}

return procs, rootEvents, nil
return procs, rootEvents, toNonFatal(wrappedErr)
}

// GetOne fetches process data for a given PID if its name matches the regexes provided from the host.
func (procStats *Stats) GetOne(pid int) (mapstr.M, error) {
pidStat, _, err := procStats.pidFill(pid, false)
if err != nil {
if err != nil && !isNonFatal(err) {
return nil, fmt.Errorf("error fetching PID %d: %w", pid, err)
}

Expand All @@ -151,9 +152,9 @@ func (procStats *Stats) GetOne(pid int) (mapstr.M, error) {
// GetOneRootEvent is the same as `GetOne()` but it returns an
// event formatted as expected by ECS
func (procStats *Stats) GetOneRootEvent(pid int) (mapstr.M, mapstr.M, error) {
pidStat, _, err := procStats.pidFill(pid, false)
if err != nil {
return nil, nil, fmt.Errorf("error fetching PID %d: %w", pid, err)
pidStat, _, wrappedErr := procStats.pidFill(pid, false)
if wrappedErr != nil && !isNonFatal(wrappedErr) {
return nil, nil, fmt.Errorf("error fetching PID %d: %w", pid, wrappedErr)
}

procStats.ProcsMap.SetPid(pid, pidStat)
Expand All @@ -165,7 +166,7 @@ func (procStats *Stats) GetOneRootEvent(pid int) (mapstr.M, mapstr.M, error) {

rootMap := processRootEvent(&pidStat)

return procMap, rootMap, err
return procMap, rootMap, toNonFatal(wrappedErr)
}

// GetSelf gets process info for the beat itself
Expand All @@ -180,34 +181,41 @@ func (procStats *Stats) GetSelf() (ProcState, error) {
}

pidStat, _, err := procStats.pidFill(self, false)
if err != nil {
if err != nil && !isNonFatal(err) {
return ProcState{}, fmt.Errorf("error fetching PID %d: %w", self, err)
}

procStats.ProcsMap.SetPid(self, pidStat)

return pidStat, nil
return pidStat, toNonFatal(err)
}

// pidIter wraps a few lines of generic code that all OS-specific FetchPids() functions must call.
// this also handles the process of adding to the maps/lists in order to limit the code duplication in all the OS implementations
func (procStats *Stats) pidIter(pid int, procMap ProcsMap, proclist []ProcState) (ProcsMap, []ProcState) {
func (procStats *Stats) pidIter(pid int, procMap ProcsMap, proclist []ProcState) (ProcsMap, []ProcState, error) {
status, saved, err := procStats.pidFill(pid, true)
var nonFatalErr error
if err != nil {
if !errors.Is(err, NonFatalErr{}) {
procStats.logger.Debugf("Error fetching PID info for %d, skipping: %s", pid, err)
return procMap, proclist
// While monitoring a set of processes, some processes might get killed after we get all the PIDs
// So, there's no need to capture "process not found" error.
if errors.Is(err, syscall.ESRCH) {
return procMap, proclist, nil
}
return procMap, proclist, err
}
procStats.logger.Debugf("Non fatal error fetching PID some info for %d, metrics are valid, but partial: %s", pid, err)
nonFatalErr = fmt.Errorf("non fatal error fetching PID some info for %d, metrics are valid, but partial: %w", pid, err)
procStats.logger.Debugf(err.Error())
}
if !saved {
procStats.logger.Debugf("Process name does not match the provided regex; PID=%d; name=%s", pid, status.Name)
return procMap, proclist
return procMap, proclist, nonFatalErr
}
procMap[pid] = status
proclist = append(proclist, status)

return procMap, proclist
return procMap, proclist, nonFatalErr
}

// NonFatalErr is returned when there was an error
Expand All @@ -232,13 +240,17 @@ func (c NonFatalErr) Is(other error) bool {
return is
}

func (c NonFatalErr) Unwrap() error {
return c.Err
}

// pidFill is an entrypoint used by OS-specific code to fill out a pid.
// This in turn calls various OS-specific code to fill out the various bits of PID data
// This is done to minimize the code duplication between different OS implementations
// The second return value will only be false if an event has been filtered out.
func (procStats *Stats) pidFill(pid int, filter bool) (ProcState, bool, error) {
// Fetch proc state so we can get the name for filtering based on user's filter.

var wrappedErr error
// OS-specific entrypoint, get basic info so we can at least run matchProcess
status, err := GetInfoForPid(procStats.Hostfs, pid)
if err != nil {
Expand All @@ -265,7 +277,8 @@ func (procStats *Stats) pidFill(pid int, filter bool) (ProcState, bool, error) {
if !errors.Is(err, NonFatalErr{}) {
return status, true, fmt.Errorf("FillPidMetrics: %w", err)
}
procStats.logger.Debugf("Non-fatal error fetching PID metrics for %d, metrics are valid, but partial: %s", pid, err)
wrappedErr = errors.Join(wrappedErr, fmt.Errorf("non-fatal error fetching PID metrics for %d, metrics are valid, but partial: %w", pid, err))
procStats.logger.Debugf(wrappedErr.Error())
}

if status.CPU.Total.Ticks.Exists() {
Expand Down Expand Up @@ -320,7 +333,7 @@ func (procStats *Stats) pidFill(pid int, filter bool) (ProcState, bool, error) {
}
}

return status, true, nil
return status, true, wrappedErr
}

// cacheCmdLine fills out Env and arg metrics from any stored previous metrics for the pid
Expand Down
6 changes: 4 additions & 2 deletions metric/system/process/process_aix.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,22 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) {
pid := C.pid_t(0)

procMap := make(ProcsMap, 0)
var wrappedErr err
var plist []ProcState
for {
// getprocs first argument is a void*
num, err := C.getprocs(unsafe.Pointer(&info), C.sizeof_struct_procsinfo64, nil, 0, &pid, 1)
if err != nil {
return nil, nil, fmt.Errorf("error fetching PIDs: %w", err)
}
procMap, plist = procStats.pidIter(int(info.pi_pid), procMap, plist)
procMap, plist, err = procStats.pidIter(int(pid), procMap, plist)
wrappedErr = errors.Join(wrappedErr, err)

if num == 0 {
break
}
}
return procMap, plist, nil
return procMap, plist, toNonFatal(wrappedErr)
}

// GetInfoForPid returns basic info for the process
Expand Down
3 changes: 2 additions & 1 deletion metric/system/process/process_container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package process

import (
"fmt"
"os"
"os/user"
"runtime"
Expand Down Expand Up @@ -112,7 +113,7 @@ func TestSystemHostFromContainer(t *testing.T) {
validateProcResult(t, result)
} else {
_, roots, err := testStats.Get()
require.NoError(t, err)
require.True(t, isNonFatal(err), fmt.Sprintf("Fatal error: %s", err))

for _, proc := range roots {
t.Logf("proc: %d: %s", proc["process"].(map[string]interface{})["pid"],
Expand Down
Loading
Loading