diff --git a/scanpipe/models.py b/scanpipe/models.py index e59fddb2f..5621a9f2f 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -1232,6 +1232,7 @@ def add_message( details=None, exception=None, resource=None, + package=None, ): """ Create a ProjectMessage record for this Project. @@ -1253,9 +1254,15 @@ def add_message( description = str(exception) details = details or {} + + # Do not change the following field names as those have special behavior in + # templates. if resource: - # Do not change this field name as it has special behavior in templates. details["resource_path"] = resource.path + if package: + details.update( + {"package_url": package.package_url, "package_uuid": package.uuid} + ) return ProjectMessage.objects.create( project=self, @@ -1273,11 +1280,12 @@ def add_info( details=None, exception=None, resource=None, + package=None, ): """Create an INFO ProjectMessage record for this project.""" severity = ProjectMessage.Severity.INFO return self.add_message( - severity, description, model, details, exception, resource + severity, description, model, details, exception, resource, package ) def add_warning( @@ -1287,11 +1295,12 @@ def add_warning( details=None, exception=None, resource=None, + package=None, ): """Create a WARNING ProjectMessage record for this project.""" severity = ProjectMessage.Severity.WARNING return self.add_message( - severity, description, model, details, exception, resource + severity, description, model, details, exception, resource, package ) def add_error( @@ -1301,11 +1310,12 @@ def add_error( details=None, exception=None, resource=None, + package=None, ): """Create an ERROR ProjectMessage record using for this project.""" severity = ProjectMessage.Severity.ERROR return self.add_message( - severity, description, model, details, exception, resource + severity, description, model, details, exception, resource, package ) def get_absolute_url(self): diff --git a/scanpipe/pipes/purldb.py b/scanpipe/pipes/purldb.py index 5ed1962ff..e643fb232 100644 --- a/scanpipe/pipes/purldb.py +++ b/scanpipe/pipes/purldb.py @@ -69,6 +69,27 @@ class PurlDBException(Exception): # This key can be used for filtering ENRICH_EXTRA_DATA_KEY = "enrich_with_purldb" +# Subset of fields kept when multiple entries are found in the PurlDB. +CROSS_VERSION_COMMON_FIELDS = [ + "primary_language", + "description", + "parties", + "keywords", + "homepage_url", + "bug_tracking_url", + "code_view_url", + "vcs_url", + "repository_homepage_url", + "copyright", + "holder", + "declared_license_expression", + "declared_license_expression_spdx", + "other_license_expression", + "other_license_expression_spdx", + "extracted_license_statement", + "notice_text", +] + def is_configured(): """Return True if the required PurlDB settings have been set.""" @@ -101,7 +122,7 @@ def check_service_availability(*args): raise Exception(f"{label} is not available.") -def request_get(url, payload=None, timeout=DEFAULT_TIMEOUT): +def request_get(url, payload=None, timeout=DEFAULT_TIMEOUT, raise_on_error=False): """Wrap the HTTP request calls on the API.""" if not url: return @@ -112,13 +133,17 @@ def request_get(url, payload=None, timeout=DEFAULT_TIMEOUT): if payload: params.update(payload) - logger.debug(f"{label}: url={url} params={params}") + logger.debug(f"[{label}] Requesting URL: {url} with params: {params}") try: response = session.get(url, params=params, timeout=timeout) response.raise_for_status() return response.json() - except (requests.RequestException, ValueError, TypeError) as exception: - logger.debug(f"{label} [Exception] {exception}") + except requests.RequestException: # raise_for_status + return + except (ValueError, TypeError) as exception: + logger.debug(f"[{label}] Request to {url} failed with exception: {exception}") + if raise_on_error: + raise PurlDBException(exception) def request_post(url, data=None, headers=None, files=None, timeout=DEFAULT_TIMEOUT): @@ -353,12 +378,6 @@ def populate_purldb_with_discovered_dependencies(project, logger=logger.info): ) -def get_package_by_purl(package_url): - """Get a Package details entry providing its `package_url`.""" - if results := find_packages({"purl": str(package_url)}): - return results[0] - - def find_packages(payload): """Get Packages using provided `payload` filters on the PurlDB package list.""" package_api_url = f"{PURLDB_API_URL}packages/" @@ -367,6 +386,31 @@ def find_packages(payload): return response.get("results") +def get_packages_for_purl(package_url): + """Get Package details entries providing a `package_url`.""" + payload = { + "purl": str(package_url), + "sort": "-version", + } + return find_packages(payload) + + +def collect_data_for_purl(package_url, raise_on_error=False): + collect_api_url = f"{PURLDB_API_URL}collect/" + payload = { + "purl": str(package_url), + "sort": "-version", + } + purldb_entries = request_get( + url=collect_api_url, + payload=payload, + raise_on_error=raise_on_error, + ) + + if purldb_entries: + return purldb_entries + + def get_next_download_url(timeout=DEFAULT_TIMEOUT, api_url=PURLDB_API_URL): """ Return the ScannableURI UUID, download URL, and pipelines for the next @@ -464,12 +508,43 @@ def get_run_status(run, **kwargs): def enrich_package(package): """Enrich the provided ``package`` with the PurlDB data.""" - purldb_entry = get_package_by_purl(package.package_url) - if purldb_entry: - package_data = _clean_package_data(purldb_entry) - if updated_fields := package.update_from_data(package_data): - package.update_extra_data({ENRICH_EXTRA_DATA_KEY: updated_fields}) - return updated_fields + package_url = package.package_url + project = package.project + + try: + purldb_entries = collect_data_for_purl(package_url, raise_on_error=True) + except PurlDBException as exception: + project.add_error(model="PurlDB", exception=exception, package=package) + return + + if not purldb_entries: + return + + if len(purldb_entries) == 1: + # Single match, all the PurlDB data are used to enrich the package. + purldb_entry = purldb_entries[0] + else: + project.add_warning( + model="PurlDB", + description=( + f'Multiple entries found in the PurlDB for "{package_url}". ' + f"Using data from the most recent version." + ), + package=package, + ) + # Do not set version-specific fields, such as the download_url. + purldb_entry = { + field: value + for field, value in purldb_entries[0].items() + if field in CROSS_VERSION_COMMON_FIELDS + } + + # Remove package_uid as it is not relevant to capture the value from PurlDB. + purldb_entry.pop("package_uid", None) + package_data = _clean_package_data(purldb_entry) + if updated_fields := package.update_from_data(package_data): + package.update_extra_data({ENRICH_EXTRA_DATA_KEY: updated_fields}) + return updated_fields def enrich_discovered_packages(project, logger=logger.info): diff --git a/scanpipe/templates/scanpipe/message_list.html b/scanpipe/templates/scanpipe/message_list.html index 5c4a53e84..f94b3f5a2 100644 --- a/scanpipe/templates/scanpipe/message_list.html +++ b/scanpipe/templates/scanpipe/message_list.html @@ -46,6 +46,14 @@ {% endif %} + {% if message.details.package_uuid %} +
+ {% endif %} {% for key, value in message.details.items %} {{ key }}: {{ value }}