Revert "revert static quality gates" (#33710)

DataDog · Feb 4, 2025 · d5b752a · d5b752a
1 parent 8454f56
commit d5b752a
Show file tree

Hide file tree

Showing 14 changed files with 701 additions and 0 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -94,6 +94,7 @@
 /.gitlab/binary_build/include.yml                    @DataDog/agent-devx-infra
 /.gitlab/binary_build/linux.yml                      @DataDog/agent-devx-infra @DataDog/agent-delivery
 /.gitlab/functional_test/include.yml                 @DataDog/agent-devx-infra
+/.gitlab/functional_test/static_quality_gate.yml     @DataDog/agent-delivery
 /.gitlab/install_script_testing/install_script_testing.yml          @DataDog/agent-delivery @DataDog/container-ecosystems
 /.gitlab/integration_test/dogstatsd.yml              @DataDog/agent-devx-infra @DataDog/agent-metric-pipelines
 /.gitlab/integration_test/include.yml                @DataDog/agent-devx-infra
@@ -588,6 +589,8 @@
 /tasks/kernel_matrix_testing/           @DataDog/ebpf-platform
 /tasks/ebpf_verifier/                   @DataDog/ebpf-platform
 /tasks/trace_agent.py                   @DataDog/agent-apm
+/tasks/quality_gates.py                 @DataDog/agent-delivery
+/tasks/static_quality_gates/            @DataDog/agent-delivery
 /tasks/rtloader.py                      @DataDog/agent-metric-pipelines
 /tasks/security_agent.py                @DataDog/agent-security
 /tasks/sds.py                           @DataDog/agent-log-pipelines
@@ -650,6 +653,7 @@
 /test/new-e2e/tests/installer/script          @DataDog/fleet @DataDog/data-jobs-monitoring
 /test/new-e2e/tests/gpu                       @Datadog/ebpf-platform
 /test/otel/                                   @DataDog/opentelemetry @DataDog/opentelemetry-agent
+/test/static/                                 @DataDog/agent-delivery
 /test/system/                                 @DataDog/agent-runtimes
 /test/system/dogstatsd/                       @DataDog/agent-metric-pipelines
 /test/benchmarks/apm_scripts/                 @DataDog/agent-apm

diff --git a/.gitlab/JOBOWNERS b/.gitlab/JOBOWNERS
@@ -140,6 +140,7 @@ docker_integration_tests      @DataDog/container-integrations
 
 # Functional test
 serverless_cold_start_performance-deb_x64      @DataDog/serverless
+static_quality_gates                           @DataDog/agent-delivery
 oracle*                                        @DataDog/database-monitoring
 
 # E2E

diff --git a/.gitlab/functional_test/include.yml b/.gitlab/functional_test/include.yml
@@ -4,6 +4,7 @@
 
 include:
   - .gitlab/functional_test/serverless.yml
+  - .gitlab/functional_test/static_quality_gate.yml
   - .gitlab/functional_test/regression_detector.yml
   - .gitlab/kernel_matrix_testing/common.yml
   - .gitlab/kernel_matrix_testing/system_probe.yml

diff --git a/.gitlab/functional_test/static_quality_gate.yml b/.gitlab/functional_test/static_quality_gate.yml
@@ -0,0 +1,24 @@
+static_quality_gates:
+  stage: functional_test
+  rules:
+    - !reference [.except_main_or_release_branch]
+    - when: on_success
+  image: registry.ddbuild.io/ci/datadog-agent-buildimages/docker_x64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
+  tags: ["arch:amd64"]
+  needs:
+    - job: agent_deb-x64-a7
+      artifacts: true
+    - job: docker_build_agent7
+      artifacts: true
+  # Static Quality Gates aren't enforced until Q1
+  allow_failure: true
+  script:
+    # DockerHub login for build to limit rate limit when pulling base images
+    - DOCKER_LOGIN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $DOCKER_REGISTRY_RO user) || exit $?
+    - $CI_PROJECT_DIR/tools/ci/fetch_secret.sh $DOCKER_REGISTRY_RO token | crane auth login --username "$DOCKER_LOGIN" --password-stdin "$DOCKER_REGISTRY_URL"
+    - EXIT="${PIPESTATUS[0]}"; if [ $EXIT -ne 0 ]; then echo "Unable to locate credentials needs gitlab runner restart"; exit $EXIT; fi
+    # `datadog-ci` relies on `DATADOG_API_KEY` so we get that here.
+    - DATADOG_API_KEY="$("$CI_PROJECT_DIR"/tools/ci/fetch_secret.sh "$AGENT_API_KEY_ORG2" token)" || exit $?; export DATADOG_API_KEY
+    - export DD_API_KEY="$DATADOG_API_KEY"
+    - !reference [ .setup_agent_github_app ]
+    - inv quality-gates.parse-and-trigger-gates
diff --git a/tasks/__init__.py b/tasks/__init__.py
@@ -52,6 +52,7 @@
     pre_commit,
     process_agent,
     protobuf,
+    quality_gates,
     release,
     rtloader,
     sds,
@@ -184,6 +185,7 @@
 ns.add_collection(issue)
 ns.add_collection(package)
 ns.add_collection(pipeline)
+ns.add_collection(quality_gates)
 ns.add_collection(protobuf)
 ns.add_collection(notes)
 ns.add_collection(notify)

diff --git a/tasks/quality_gates.py b/tasks/quality_gates.py
@@ -0,0 +1,142 @@
+import os
+import traceback
+import typing
+
+import yaml
+from invoke import task
+
+from tasks.github_tasks import pr_commenter
+from tasks.libs.ciproviders.github_api import GithubAPI
+from tasks.libs.common.color import color_message
+from tasks.static_quality_gates.lib.gates_lib import GateMetricHandler
+
+FAIL_CHAR = "❌"
+SUCCESS_CHAR = "✅"
+
+body_pattern = """### {}
+
+|Result|Quality gate|On disk size|On disk size limit|On wire size|On wire size limit|
+|----|----|----|----|----|----|
+"""
+
+body_error_footer_pattern = """<details>
+<summary>Gate failure full details</summary>
+
+|Quality gate|Error type|Error message|
+|----|---|--------|
+"""
+
+
+def display_pr_comment(
+    ctx, final_state: bool, gate_states: list[dict[str, typing.Any]], metric_handler: GateMetricHandler
+):
+    """
+    Display a comment on a PR with results from our static quality gates checks
+    :param ctx: Invoke task context
+    :param final_state: Boolean that represents the overall state of quality gates checks
+    :param gate_states: State of each quality gate
+    :param metric_handler: Precise metrics of each quality gate
+    :return:
+    """
+    title = f"Static quality checks {SUCCESS_CHAR if final_state else FAIL_CHAR}"
+    body_info = body_pattern.format("Info")
+    body_error = body_pattern.format("Error")
+    body_error_footer = body_error_footer_pattern
+
+    with_error = False
+    with_info = False
+    # Sort gates by error_types to group in between NoError, AssertionError and StackTrace
+    for gate in sorted(gate_states, key=lambda x: x["error_type"] is None):
+
+        def getMetric(metric_name, gate_name=gate['name']):
+            try:
+                return metric_handler.get_formatted_metric(gate_name, metric_name)
+            except KeyError:
+                return "DataNotFound"
+
+        if gate["error_type"] is None:
+            body_info += f"|{SUCCESS_CHAR}|{gate['name']}|{getMetric('current_on_disk_size')}|{getMetric('max_on_disk_size')}|{getMetric('current_on_wire_size')}|{getMetric('max_on_wire_size')}|\n"
+            with_info = True
+        else:
+            body_error += f"|{FAIL_CHAR}|{gate['name']}|{getMetric('current_on_disk_size')}|{getMetric('max_on_disk_size')}|{getMetric('current_on_wire_size')}|{getMetric('max_on_wire_size')}|\n"
+            error_message = gate['message'].replace('\n', '<br>')
+            body_error_footer += f"|{gate['name']}|{gate['error_type']}|{error_message}|\n"
+            with_error = True
+
+    body_error_footer += "\n</details>\n"
+    body = f"Please find below the results from static quality gates\n{body_error+body_error_footer if with_error else ''}\n\n{body_info if with_info else ''}"
+
+    pr_commenter(ctx, title=title, body=body)
+
+
+def _print_quality_gates_report(gate_states: list[dict[str, typing.Any]]):
+    print(color_message("======== Static Quality Gates Report ========", "magenta"))
+    for gate in sorted(gate_states, key=lambda x: x["error_type"] is not None):
+        if gate["error_type"] is None:
+            print(color_message(f"Gate {gate['name']} succeeded {SUCCESS_CHAR}", "blue"))
+        elif gate["error_type"] == "AssertionError":
+            print(
+                color_message(
+                    f"Gate {gate['name']} failed {FAIL_CHAR} because of the following assertion failures :\n{gate['message']}",
+                    "orange",
+                )
+            )
+        else:
+            print(
+                color_message(
+                    f"Gate {gate['name']} failed {FAIL_CHAR} with the following stack trace :\n{gate['message']}",
+                    "orange",
+                )
+            )
+
+
+@task
+def parse_and_trigger_gates(ctx, config_path="test/static/static_quality_gates.yml"):
+    """
+    Parse and executes static quality gates
+    :param ctx: Invoke context
+    :param config_path: Static quality gates configuration file path
+    :return:
+    """
+    with open(config_path) as file:
+        config = yaml.safe_load(file)
+
+    gate_list = list(config.keys())
+    quality_gates_mod = __import__("tasks.static_quality_gates", fromlist=gate_list)
+    print(f"{config_path} correctly parsed !")
+    metric_handler = GateMetricHandler(
+        git_ref=os.environ["CI_COMMIT_REF_SLUG"], bucket_branch=os.environ["BUCKET_BRANCH"]
+    )
+    newline_tab = "\n\t"
+    print(f"The following gates are going to run:{newline_tab}- {(newline_tab+'- ').join(gate_list)}")
+    final_state = "success"
+    gate_states = []
+    for gate in gate_list:
+        gate_inputs = config[gate]
+        gate_inputs["ctx"] = ctx
+        gate_inputs["metricHandler"] = metric_handler
+        try:
+            gate_mod = getattr(quality_gates_mod, gate)
+            gate_mod.entrypoint(**gate_inputs)
+            print(f"Gate {gate} succeeded !")
+            gate_states.append({"name": gate, "state": True, "error_type": None, "message": None})
+        except AssertionError as e:
+            print(f"Gate {gate} failed ! (AssertionError)")
+            final_state = "failure"
+            gate_states.append({"name": gate, "state": False, "error_type": "AssertionError", "message": str(e)})
+        except Exception:
+            print(f"Gate {gate} failed ! (StackTrace)")
+            final_state = "failure"
+            gate_states.append(
+                {"name": gate, "state": False, "error_type": "StackTrace", "message": traceback.format_exc()}
+            )
+    ctx.run(f"datadog-ci tag --level job --tags static_quality_gates:\"{final_state}\"")
+
+    _print_quality_gates_report(gate_states)
+
+    metric_handler.send_metrics_to_datadog()
+
+    github = GithubAPI()
+    branch = os.environ["CI_COMMIT_BRANCH"]
+    if github.get_pr_for_branch(branch).totalCount > 0:
+        display_pr_comment(ctx, final_state == "success", gate_states, metric_handler)
diff --git a/tasks/static_quality_gates/__init__.py b/tasks/static_quality_gates/__init__.py
diff --git a/tasks/static_quality_gates/lib/gates_lib.py b/tasks/static_quality_gates/lib/gates_lib.py
@@ -0,0 +1,173 @@
+import glob
+import math
+import os
+import types
+from datetime import datetime
+from types import SimpleNamespace
+
+from invoke.exceptions import Exit
+
+from tasks.libs.common.color import color_message
+from tasks.libs.common.constants import ORIGIN_CATEGORY, ORIGIN_PRODUCT, ORIGIN_SERVICE
+from tasks.libs.common.datadog_api import create_gauge, send_metrics
+from tasks.libs.common.utils import get_metric_origin
+
+
+def argument_extractor(entry_args, **kwargs) -> SimpleNamespace:
+    """
+    Allow clean extraction of arguments from parsed quality gates, also allows to execute pre-process function on kwargs
+
+    :param entry_args: Dictionary containing parsed arguments from a static quality gate
+    :param kwargs: Dictionary containing arguments that we want to extract (optionally pre-process function to apply as values)
+    :return: SimpleNamespace with extracted arguments as attributes
+    """
+    for key in kwargs.keys():
+        if isinstance(kwargs[key], types.FunctionType):
+            kwargs[key] = kwargs[key](entry_args[key])
+        else:
+            kwargs[key] = entry_args[key]
+    return SimpleNamespace(**kwargs)
+
+
+def byte_to_string(size):
+    if not size:
+        return "0B"
+    size_name = ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")
+    i = int(math.log(size, 1024))
+    p = math.pow(1024, i)
+    s = round(size / p, 2)
+    return f"{s}{size_name[i]}"
+
+
+def string_to_byte(size: str):
+    if not size:
+        return 0
+    size_name = ("KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")
+    value = None
+    power = 0
+    for k, unit in enumerate(size_name):
+        if unit in size:
+            value = float(size.replace(unit, ""))
+            power = k + 1  # We start with KiB = 1024^1
+            break
+    if value:
+        return int(value * math.pow(1024, power))
+    elif "B" in size:
+        return int(size.replace("B", ""))
+    else:
+        return int(size)
+
+
+def read_byte_input(byte_input):
+    if isinstance(byte_input, str):
+        return string_to_byte(byte_input)
+    else:
+        return byte_input
+
+
+def find_package_path(flavor, package_os, arch):
+    package_dir = os.environ['OMNIBUS_PACKAGE_DIR']
+    separator = '_' if package_os == 'debian' else '-'
+    extension = "deb" if package_os == 'debian' else "rpm"
+    glob_pattern = f'{package_dir}/{flavor}{separator}7*{arch}.{extension}'
+    package_paths = glob.glob(glob_pattern)
+    if len(package_paths) > 1:
+        raise Exit(code=1, message=color_message(f"Too many files matching {glob_pattern}: {package_paths}", "red"))
+    elif len(package_paths) == 0:
+        raise Exit(code=1, message=color_message(f"Couldn't find any file matching {glob_pattern}", "red"))
+    return package_paths[0]
+
+
+class GateMetricHandler:
+    def __init__(self, git_ref, bucket_branch):
+        self.metrics = {}
+        self.metadata = {}
+        self.git_ref = git_ref
+        self.bucket_branch = bucket_branch
+
+    def get_formatted_metric(self, gate_name, metric_name):
+        return byte_to_string(self.metrics[gate_name][metric_name])
+
+    def register_metric(self, gate_name, metric_name, metric_value):
+        if self.metrics.get(gate_name, None) is None:
+            self.metrics[gate_name] = {}
+
+        self.metrics[gate_name][metric_name] = metric_value
+
+    def register_gate_tags(self, gate, **kwargs):
+        if self.metadata.get(gate, None) is None:
+            self.metadata[gate] = {}
+
+        for key in kwargs:
+            self.metadata[gate][key] = kwargs[key]
+
+    def _generate_series(self):
+        if not self.git_ref or not self.bucket_branch:
+            return None
+
+        series = []
+        timestamp = int(datetime.utcnow().timestamp())
+        for gate in self.metrics:
+            common_tags = [
+                f"git_ref:{self.git_ref}",
+                f"bucket_branch:{self.bucket_branch}",
+            ]
+
+            if self.metadata.get(gate, None) is None:
+                print(color_message(f"[WARN] gate {gate} doesn't have gate tags registered ! skipping...", "orange"))
+                continue
+
+            for tag in self.metadata[gate]:
+                common_tags.append(f"{tag}:{self.metadata[gate][tag]}")
+
+            series.append(
+                create_gauge(
+                    "datadog.agent.static_quality_gate.on_wire_size",
+                    timestamp,
+                    self.metrics[gate]["current_on_wire_size"],
+                    tags=common_tags,
+                    metric_origin=get_metric_origin(ORIGIN_PRODUCT, ORIGIN_CATEGORY, ORIGIN_SERVICE),
+                    unit="byte",
+                ),
+            )
+            series.append(
+                create_gauge(
+                    "datadog.agent.static_quality_gate.on_disk_size",
+                    timestamp,
+                    self.metrics[gate]["current_on_disk_size"],
+                    tags=common_tags,
+                    metric_origin=get_metric_origin(ORIGIN_PRODUCT, ORIGIN_CATEGORY, ORIGIN_SERVICE),
+                    unit="byte",
+                ),
+            )
+            series.append(
+                create_gauge(
+                    "datadog.agent.static_quality_gate.max_allowed_on_wire_size",
+                    timestamp,
+                    self.metrics[gate]["max_on_wire_size"],
+                    tags=common_tags,
+                    metric_origin=get_metric_origin(ORIGIN_PRODUCT, ORIGIN_CATEGORY, ORIGIN_SERVICE),
+                    unit="byte",
+                ),
+            )
+            series.append(
+                create_gauge(
+                    "datadog.agent.static_quality_gate.max_allowed_on_disk_size",
+                    timestamp,
+                    self.metrics[gate]["max_on_disk_size"],
+                    tags=common_tags,
+                    metric_origin=get_metric_origin(ORIGIN_PRODUCT, ORIGIN_CATEGORY, ORIGIN_SERVICE),
+                    unit="byte",
+                ),
+            )
+        return series
+
+    def send_metrics_to_datadog(self):
+        series = self._generate_series()
+
+        print(color_message("Data collected:", "blue"))
+        print(series)
+        if series:
+            print(color_message("Sending metrics to Datadog", "blue"))
+            send_metrics(series=series)
+            print(color_message("Done", "green"))