Skip to content

Commit

Permalink
kola: Do failed units/SELinux checks both before *and* after tests
Browse files Browse the repository at this point in the history
First: Revert "Revert "kola: Add --no-default-checks""
 (this reverts commit: 57b3520 )

Now, when I went to add the SELinux denials check, I completely missed
that we only were checking for denials *before* the test runs.

We obviously want to check both before and after - the same
way we do with e.g. looking for errors on the console.

As part of this, I'd also added `--no-default-checks` but it
actually didn't work.  Fix that so that it does work, and add
a test case with a failing systemd unit that verifies this,
so we can test the test system.

In order to help ratchet things, also add a special `default-checks`
entry to the denylist.  This way if e.g. we are hitting SELinux AVC
denials just on one arch/test we can turn them off temporarily.
  • Loading branch information
cgwalters committed Mar 7, 2021
1 parent 306fc3a commit d53a4f6
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 13 deletions.
14 changes: 14 additions & 0 deletions ci/run-kola-self-tests
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@ set -euo pipefail
dn=$(cd $(dirname $0) && pwd)
toplevel=$(cd ${dn} && git rev-parse --show-toplevel)
set -x
# Unit failure
if cosa kola run --output-dir tmp/kolaself -E ${toplevel}/tests/kola-ci-unit-failure 'ext.kola-ci*' |& tee tmp/err.txt; then
echo "expected kola-ci-unit-failure to fail" 1>&2
exit 1
fi
if ! grep -qF 'failed basic checks' tmp/err.txt; then
echo "Missing expected failed basic checks error" 1>&2
exit 1
fi
# And this run should pass
cosa kola run --output-dir tmp/kolaself -E ${toplevel}/tests/kola-ci-unit-failure 'ext.kola-ci*' --no-default-checks

rm tmp/kolaself -rf

cosa kola run --output-dir tmp/kolaself -E ${toplevel}/tests/kola-ci-self 'ext.kola-ci-self*' "$@"
log=$(find tmp/kolaself -type f -name kola-runext-autopkgtest-reboot.txt)
test -n "${log}"
Expand Down
1 change: 1 addition & 0 deletions mantle/cmd/kola/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ func init() {
bv(&kola.NoNet, "no-net", false, "Don't run tests that require an Internet connection")
ssv(&kola.Tags, "tag", []string{}, "Test tag to run. Can be specified multiple times.")
bv(&kola.Options.SSHOnTestFailure, "ssh-on-test-failure", false, "SSH into a machine when tests fail")
bv(&kola.Options.SuppressDefaultChecks, "no-default-checks", false, "Disable default checks for failed systemd units and SELinux AVC denials")
sv(&kola.Options.Stream, "stream", "", "CoreOS stream ID (e.g. for Fedora CoreOS: stable, testing, next)")
sv(&kola.Options.CosaWorkdir, "workdir", "", "coreos-assembler working directory")
sv(&kola.Options.CosaBuildId, "build", "", "coreos-assembler build ID")
Expand Down
17 changes: 14 additions & 3 deletions mantle/kola/harness.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package kola

import (
"bufio"
"context"
"encoding/json"
"fmt"
"io"
Expand Down Expand Up @@ -883,9 +884,12 @@ func getClusterSemver(flight platform.Flight, outputDir string) (*semver.Version
return nil, err
}

cluster, err := flight.NewCluster(&platform.RuntimeConfig{
OutputDir: testDir,
})
cfg := &platform.RuntimeConfig{
OutputDir: testDir,
AllowFailedUnits: Options.SuppressDefaultChecks,
}

cluster, err := flight.NewCluster(cfg)
if err != nil {
return nil, errors.Wrapf(err, "creating cluster for semver check")
}
Expand Down Expand Up @@ -933,6 +937,7 @@ func runTest(h *harness.H, t *register.Test, pltfrm string, flight platform.Flig
NoSSHKeyInUserData: t.HasFlag(register.NoSSHKeyInUserData),
NoSSHKeyInMetadata: t.HasFlag(register.NoSSHKeyInMetadata),
InternetAccess: testRequiresInternet(t),
AllowFailedUnits: Options.SuppressDefaultChecks,
}
c, err := flight.NewCluster(rconf)
if err != nil {
Expand Down Expand Up @@ -1036,6 +1041,12 @@ func runTest(h *harness.H, t *register.Test, pltfrm string, flight platform.Flig

// run test
t.Run(tcluster)

for _, mach := range c.Machines() {
if err := platform.CheckMachineBasics(context.TODO(), mach); err != nil {
h.Errorf("Machine %s failed basic checks: %v", mach.ID(), err)
}
}
}

// scpKolet searches for a kolet binary and copies it to the machine.
Expand Down
46 changes: 37 additions & 9 deletions mantle/platform/platform.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ type Options struct {
OSContainer string

SSHOnTestFailure bool
// SuppressDefaultChecks will skip built-in checks for systemd unit failures and SELinux AVC denials, etc.
SuppressDefaultChecks bool
}

// RuntimeConfig contains cluster-specific configuration.
Expand Down Expand Up @@ -435,20 +437,46 @@ func CheckMachine(ctx context.Context, m Machine) error {
return fmt.Errorf("not a supported instance: %v", string(out))
}

if !m.RuntimeConf().AllowFailedUnits {
// ensure no systemd units failed during boot
out, stderr, err = m.SSH("systemctl --no-legend --state failed list-units")
if err != nil {
return fmt.Errorf("systemctl: %s: %v: %s", out, err, stderr)
}
if len(out) > 0 {
return fmt.Errorf("some systemd units failed:\n%s", out)
}
// Validate that nothing is wrong before we even run the test
if err := CheckMachineBasics(ctx, m); err != nil {
return err
}

return ctx.Err()
}

// CheckMachineBasics validates that no systemd units have
// failed and no SELinux AVC denials were logged. It may be
// extended in the future - the idea is these are "baseline"
// errors that shouldn't be seen across any tests.
func CheckMachineBasics(ctx context.Context, m Machine) error {
if m.RuntimeConf().AllowFailedUnits {
plog.Debug("Allowing failed units")
return nil
}

// ensure no systemd units failed during boot
out, stderr, err := m.SSH("systemctl --no-legend --state failed list-units")
if err != nil {
return fmt.Errorf("systemctl: %s: %v: %s", out, err, stderr)
}
if len(out) > 0 {
return fmt.Errorf("some systemd units failed:\n%s", out)
}
// Also no SELinux denials; RHCOS currently ships auditd, FCOS doesn't, so handle
// both places.
out, stderr, err = m.SSH("if test -f /var/log/audit/audit.log; then grep 'avc.*denied' /var/log/audit/audit.log || true; else journalctl -q --no-pager --grep='avc.*denied' _TRANSPORT=audit || true; fi")
if err != nil {
return fmt.Errorf("failed to query audit logs: %s: %v: %s", out, err, stderr)
}
if len(out) > 0 {
return fmt.Errorf("Found SELinux AVC denials:\n%s", out)
}
plog.Debug("Validated machine")

return nil
}

type machineInfo struct {
Id string `json:"id"`
PublicIp string `json:"public_ip"`
Expand Down
9 changes: 8 additions & 1 deletion src/cmd-kola
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import platform
import sys
import yaml

# Special pattern that skips systemd unit checks/SELinux AVCs for all tests
SPECIAL_DEFAULT_CHECKS = 'default-checks'

# Just test these boot to start with. In the future we should at least
# do ostree upgrades with uefi etc. But we don't really need the *full*
# suite...if podman somehow broke with nvme or uefi I'd be amazed and impressed.
Expand Down Expand Up @@ -57,7 +60,11 @@ try:
continue
print(f"⚠️ Skipping kola test pattern \"{obj['pattern']}\":")
print(f"⚠️ {obj['tracker']}")
kolaargs.extend(['--denylist-test', obj['pattern']])
pattern = obj['pattern']
if pattern == SPECIAL_DEFAULT_CHECKS:
kolaargs.append('--no-default-checks')
else:
kolaargs.extend(['--denylist-test', pattern])
except FileNotFoundError:
pass

Expand Down
6 changes: 6 additions & 0 deletions tests/kola-ci-unit-failure/tests/kola/unit-failure-skip
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
# Intentionally cause a systemd unit failure that should be skipped with --no-default-checks
set -xeuo pipefail
systemd-run --wait --unit should-fail.service -q false || true
echo "ok started failed service"

0 comments on commit d53a4f6

Please sign in to comment.