From fb0422c5587132d269ced2522937dd2c0d2f3603 Mon Sep 17 00:00:00 2001 From: Delyan Kratunov Date: Thu, 3 Nov 2022 12:50:40 -0700 Subject: [PATCH] tests: teach runtime runner about multiple before clauses As noted in iovisor/bpftrace#2402, usdt flakiness was made better by 508538a but not fully fixed. This commit is what I should have done all along: it allows the test runner to parse and wait for multiple BEFORE clauses and thus ensures the processes have started before the test runs. There are two minor changes: 1. The check for child processes is now `ps --ppid` based to eventually allow parallel process runs in the same environment. That requires to use `ps` from the `procps` package on Alpine as the default BusyBox one doesn't have the `--ppid` option. 2. Because of the `ps` usage, the name check is now truncated to 15 chars, which will fail if TASK_COMM_LEN is not 16. That looks like a constant in the kernel, so I think we're good. --- docker/Dockerfile.alpine | 1 + tests/runtime/engine/parser.py | 8 +-- tests/runtime/engine/runner.py | 70 +++++++++++++++------- tests/runtime/usdt | 3 +- tests/testprogs/two_usdt_semaphore_tests.c | 17 ------ 5 files changed, 55 insertions(+), 44 deletions(-) delete mode 100644 tests/testprogs/two_usdt_semaphore_tests.c diff --git a/docker/Dockerfile.alpine b/docker/Dockerfile.alpine index 3166bfccd6ab..85f32898e8ee 100644 --- a/docker/Dockerfile.alpine +++ b/docker/Dockerfile.alpine @@ -29,6 +29,7 @@ RUN apk add --update \ llvm${LLVM_VERSION}-dev \ llvm${LLVM_VERSION}-static \ musl-obstack-dev \ + procps \ python3 \ wget \ xxd \ diff --git a/tests/runtime/engine/parser.py b/tests/runtime/engine/parser.py index 6e6fb0b529ef..82b9240ac401 100644 --- a/tests/runtime/engine/parser.py +++ b/tests/runtime/engine/parser.py @@ -25,7 +25,7 @@ class InvalidFieldError(Exception): 'prog', 'expect', 'timeout', - 'before', + 'befores', 'after', 'suite', 'kernel_min', @@ -101,7 +101,7 @@ def __read_test_struct(test, test_suite): prog = '' expect = '' timeout = '' - before = '' + befores = [] after = '' kernel_min = '' kernel_max = '' @@ -128,7 +128,7 @@ def __read_test_struct(test, test_suite): elif item_name == 'TIMEOUT': timeout = int(line.strip(' ')) elif item_name == 'BEFORE': - before = line + befores.append(line) elif item_name == 'AFTER': after = line elif item_name == 'MIN_KERNEL': @@ -194,7 +194,7 @@ def __read_test_struct(test, test_suite): prog, expect, timeout, - before, + befores, after, test_suite, kernel_min, diff --git a/tests/runtime/engine/runner.py b/tests/runtime/engine/runner.py index 63dcd1a76d9a..6181a64876c5 100644 --- a/tests/runtime/engine/runner.py +++ b/tests/runtime/engine/runner.py @@ -158,11 +158,13 @@ def run_test(test): signal.signal(signal.SIGALRM, Runner.__handler) p = None - before = None + befores = [] bpftrace = None after = None try: + result = None timeout = False + output = "" print(ok("[ RUN ] ") + "%s.%s" % (test.suite, test.name)) if test.requirement: @@ -195,22 +197,45 @@ def run_test(test): print(warn("[ SKIP ] ") + "%s.%s" % (test.suite, test.name)) return Runner.SKIP_FEATURE_REQUIREMENT_UNSATISFIED - if test.before: - before = subprocess.Popen(test.before, shell=True, preexec_fn=os.setsid) - waited=0 + if test.befores: + for before in test.befores: + before = subprocess.Popen(before.split(), preexec_fn=os.setsid) + befores.append(before) + with open(os.devnull, 'w') as dn: - # This might not work for complicated cases, such as if - # a test program needs to accept arguments. It covers the - # current simple calls with no arguments - child_name = os.path.basename(test.before.split()[-1]) - while subprocess.call(["pidof", child_name], stdout=dn, stderr=dn) != 0: + child_names = [os.path.basename(x.strip().split()[-1]) for x in test.befores] + child_names = sorted((x[:15] for x in child_names)) # cut to comm length + print(f"child_names: %{child_names}") + + # Print the names of all of our children and look + # for the ones from BEFORE clauses + waited=0 + while waited <= test.timeout: + children = subprocess.run(["ps", "--ppid", str(os.getpid()), "--no-headers", "-o", "comm"], + check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if children.returncode == 0 and children.stdout: + lines = [line.decode('utf-8') for line in children.stdout.splitlines()] + lines = sorted((line.strip() for line in lines if line != 'ps')) + print(f"lines: %{lines}") + if lines == child_names: + break + else: + print(children.stderr) + time.sleep(0.1) waited+=0.1 - if waited > test.timeout: - raise TimeoutError('Timed out waiting for BEFORE %s ', test.before) + + if waited > test.timeout: + raise TimeoutError(f'Timed out waiting for BEFORE(s) {test.befores}') bpf_call = Runner.prepare_bpf_call(test) - if test.before and '{{BEFORE_PID}}' in bpf_call: + if test.befores and '{{BEFORE_PID}}' in bpf_call: + if len(test.befores) > 1: + raise ValueError(f"test has {len(test.befores)} BEFORE clauses but BEFORE_PID usage requires exactly one") + + child_name = test.befores[0].strip().split()[-1] + child_name = os.path.basename(child_name) + childpid = subprocess.Popen(["pidof", child_name], stdout=subprocess.PIPE, universal_newlines=True).communicate()[0].split()[0] bpf_call = re.sub("{{BEFORE_PID}}", str(childpid), bpf_call) env = { @@ -234,8 +259,6 @@ def run_test(test): signal.alarm(ATTACH_TIMEOUT) - output = "" - while p.poll() is None: nextline = p.stdout.readline() output += nextline @@ -268,15 +291,18 @@ def run_test(test): os.killpg(os.getpgid(p.pid), signal.SIGTERM) output += p.communicate()[0] result = re.search(test.expect, output) - if not result: - print(fail("[ TIMEOUT ] ") + "%s.%s" % (test.suite, test.name)) - print('\tCommand: %s' % bpf_call) - print('\tTimeout: %s' % test.timeout) - print('\tCurrent output: %s' % output) - return Runner.TIMEOUT + + if not result: + print(fail("[ TIMEOUT ] ") + "%s.%s" % (test.suite, test.name)) + print('\tCommand: %s' % bpf_call) + print('\tTimeout: %s' % test.timeout) + print('\tCurrent output: %s' % output) + return Runner.TIMEOUT finally: - if before and before.poll() is None: - os.killpg(os.getpgid(before.pid), signal.SIGKILL) + if befores: + for before in befores: + if before.poll() is None: + os.killpg(os.getpgid(before.pid), signal.SIGKILL) if bpftrace and bpftrace.poll() is None: os.killpg(os.getpgid(p.pid), signal.SIGKILL) diff --git a/tests/runtime/usdt b/tests/runtime/usdt index 34c90edb3eb5..21231b823f6a 100644 --- a/tests/runtime/usdt +++ b/tests/runtime/usdt @@ -247,7 +247,8 @@ NAME "usdt probes - file based semaphore activation multi process" RUN {{BPFTRACE}} runtime/scripts/usdt_file_activation_multiprocess.bt --usdt-file-activation EXPECT found 2 processes TIMEOUT 5 -BEFORE ./testprogs/two_usdt_semaphore_tests +BEFORE ./testprogs/usdt_semaphore_test +BEFORE ./testprogs/usdt_semaphore_test REQUIRES ./testprogs/usdt_semaphore_test should_not_skip NAME "usdt probes - list probes by pid in separate mountns" diff --git a/tests/testprogs/two_usdt_semaphore_tests.c b/tests/testprogs/two_usdt_semaphore_tests.c deleted file mode 100644 index d271ea2cb6da..000000000000 --- a/tests/testprogs/two_usdt_semaphore_tests.c +++ /dev/null @@ -1,17 +0,0 @@ -#include - -int main(int argc __attribute__((unused)), char **argv) -{ - int pid = fork(); - - if (pid < 0) - return 1; - else if (pid == 0) - // child, test 1 - execv("./testprogs/usdt_semaphore_test", argv); - else - // parent, test 2 - execv("./testprogs/usdt_semaphore_test", argv); - - return 0; -}