[ADXT-816] Dump e2e logs with failing / flaky categories (#32653)

DataDog · Jan 10, 2025 · bdc78a5 · bdc78a5
1 parent aff2dd4
commit bdc78a5
Show file tree

Hide file tree

Showing 2 changed files with 110 additions and 17 deletions.
diff --git a/tasks/new_e2e_tests.py b/tasks/new_e2e_tests.py
@@ -11,6 +11,7 @@
 import re
 import shutil
 import tempfile
+from collections import defaultdict
 from pathlib import Path
 
 import yaml
@@ -20,13 +21,28 @@
 
 from tasks.flavor import AgentFlavor
 from tasks.gotest import process_test_result, test_flavor
+from tasks.libs.common.color import Color
 from tasks.libs.common.git import get_commit_sha
 from tasks.libs.common.go import download_go_dependencies
 from tasks.libs.common.gomodules import get_default_modules
 from tasks.libs.common.utils import REPO_PATH, color_message, gitlab_section, running_in_ci
+from tasks.testwasher import TestWasher
 from tasks.tools.e2e_stacks import destroy_remote_stack
 
 
+class TestState:
+    """Describes the state of a test, if it has failed and if it is flaky."""
+
+    FAILED = True, False
+    FLAKY_FAILED = True, True
+    SUCCESS = False, False
+    FLAKY_SUCCESS = False, True
+
+    @staticmethod
+    def get_human_readable_state(failing: bool, flaky: bool) -> str:
+        return f'{"Failing" if failing else "Successful"} / {"Flaky" if flaky else "Non-flaky"}'
+
+
 @task(
     iterable=['tags', 'targets', 'configparams'],
     help={
@@ -73,6 +89,7 @@ def run(
     """
     Run E2E Tests based on test-infra-definitions infrastructure provisioning.
     """
+
     if shutil.which("pulumi") is None:
         raise Exit(
             "pulumi CLI not found, Pulumi needs to be installed on the system (see https://github.com/DataDog/test-infra-definitions/blob/main/README.md)",
@@ -179,16 +196,10 @@ def run(
             post_processed_output = post_process_output(
                 test_res[0].result_json_path, test_depth=logs_post_processing_test_depth
             )
-
             os.makedirs(logs_folder, exist_ok=True)
             write_result_to_log_files(post_processed_output, logs_folder)
-            try:
-                pretty_print_logs(post_processed_output)
-            except TooManyLogsError:
-                print(
-                    color_message("WARNING", "yellow")
-                    + f": Too many logs to print, skipping logs printing to avoid Gitlab collapse. You can find your logs properly organized in the job artifacts: https://gitlab.ddbuild.io/DataDog/datadog-agent/-/jobs/{os.getenv('CI_JOB_ID')}/artifacts/browse/e2e-output/logs/"
-                )
+
+            pretty_print_logs(test_res[0].result_json_path, post_processed_output)
         else:
             print(
                 color_message("WARNING", "yellow")
@@ -356,20 +367,62 @@ class TooManyLogsError(Exception):
     pass
 
 
-def pretty_print_logs(logs_per_test, max_size=250000):
+def pretty_print_test_logs(logs_per_test: list[tuple[str, str, str]], max_size):
     # Compute size in bytes of what we are about to print. If it exceeds max_size, we skip printing because it will make the Gitlab logs almost completely collapsed.
     # By default Gitlab has a limit of 500KB per job log, so we want to avoid printing too much.
     size = 0
-    for _, tests in logs_per_test.items():
-        for _, logs in tests.items():
-            size += len("".join(logs).encode())
+    for _, _, logs in logs_per_test:
+        size += len("".join(logs).encode())
     if size > max_size and running_in_ci():
         raise TooManyLogsError
-    for package, tests in logs_per_test.items():
-        for test, logs in tests.items():
-            with gitlab_section("Complete logs for " + package + "." + test, collapsed=True):
-                print("Complete logs for " + package + "." + test)
-                print("".join(logs))
+    for package, test, logs in logs_per_test:
+        with gitlab_section("Complete logs for " + package + "." + test, collapsed=True):
+            print("".join(logs))
+
+    return size
+
+
+def pretty_print_logs(result_json_path, logs_per_test, max_size=250000, flakes_file="flakes.yaml"):
+    """Pretty prints logs with a specific order.
+
+    Print order:
+        1. Failing and non flaky tests
+        2. Failing and flaky tests
+        3. Successful and non flaky tests
+        4. Successful and flaky tests
+    """
+
+    result_json_name = result_json_path.split("/")[-1]
+    result_json_dir = result_json_path.removesuffix('/' + result_json_name)
+    washer = TestWasher(test_output_json_file=result_json_name, flakes_file_path=flakes_file)
+    failing_tests, marked_flaky_tests = washer.parse_test_results(result_json_dir)
+    all_known_flakes = washer.merge_known_flakes(marked_flaky_tests)
+
+    try:
+        # (failing, flaky) -> [(package, test_name, logs)]
+        categorized_logs = defaultdict(list)
+
+        # Split flaky / non flaky tests
+        for package, tests in logs_per_test.items():
+            package_flaky = all_known_flakes.get(package, set())
+            package_failing = failing_tests.get(package, set())
+            for test_name, logs in tests.items():
+                state = test_name in package_failing, test_name in package_flaky
+                categorized_logs[state].append((package, test_name, logs))
+
+        for failing, flaky in [TestState.FAILED, TestState.FLAKY_FAILED, TestState.SUCCESS, TestState.FLAKY_SUCCESS]:
+            logs_to_print = categorized_logs[failing, flaky]
+            if not logs_to_print:
+                continue
+
+            print(f'* {color_message(TestState.get_human_readable_state(failing, flaky), Color.BOLD)} job logs:')
+            # Print till the size limit is reached
+            max_size -= pretty_print_test_logs(logs_to_print, max_size)
+    except TooManyLogsError:
+        print(
+            color_message("WARNING", "yellow")
+            + f": Too many logs to print, skipping logs printing to avoid Gitlab collapse. You can find your logs properly organized in the job artifacts: https://gitlab.ddbuild.io/DataDog/datadog-agent/-/jobs/{os.getenv('CI_JOB_ID')}/artifacts/browse/e2e-output/logs/"
+        )
 
 
 @task

diff --git a/tasks/unit_tests/e2e_testing_tests.py b/tasks/unit_tests/e2e_testing_tests.py
@@ -0,0 +1,40 @@
+import unittest
+from unittest.mock import MagicMock, patch
+
+from tasks.new_e2e_tests import post_process_output, pretty_print_logs
+
+
+class TestE2ETesting(unittest.TestCase):
+    @patch("tasks.new_e2e_tests.pretty_print_test_logs")
+    @patch("tasks.libs.common.utils.running_in_ci", new=MagicMock(return_value=True))
+    def test_pretty_print(self, p):
+        flakes_file = "tasks/unit_tests/testdata/flakes_2.yaml"
+        path = "tasks/unit_tests/testdata/test_output_failure_marker.json"
+
+        pretty_print_logs(path, post_process_output(path), flakes_file=flakes_file)
+
+        # Failing / flaky, successful / non flaky
+        self.assertEqual(p.call_count, 2)
+        args1 = p.call_args_list[0][0][0][0]
+        args2 = p.call_args_list[1][0][0][0]
+        args3 = p.call_args_list[1][0][0][1]
+        self.assertEqual(args1[1], "TestGetPayload")
+        self.assertEqual(args2[1], "TestGetPayloadContainerized")
+        self.assertEqual(args3[1], "TestGetPayloadContainerizedWithDocker0")
+
+    @patch("tasks.new_e2e_tests.pretty_print_test_logs")
+    @patch("tasks.libs.common.utils.running_in_ci", new=MagicMock(return_value=True))
+    def test_pretty_print2(self, p=None):
+        flakes_file = "tasks/unit_tests/testdata/flakes_1.yaml"
+        path = "tasks/unit_tests/testdata/test_output_failure_no_marker.json"
+
+        pretty_print_logs(path, post_process_output(path), flakes_file=flakes_file)
+
+        # Failing / flaky, successful / non flaky
+        self.assertEqual(p.call_count, 2)
+        args1 = p.call_args_list[0][0][0][0]
+        args2 = p.call_args_list[1][0][0][0]
+        args3 = p.call_args_list[1][0][0][1]
+        self.assertEqual(args1[1], "TestGetPayload")
+        self.assertEqual(args2[1], "TestFilterDev")
+        self.assertEqual(args3[1], "TestGetTimeout")