diff --git a/scanpipe/models.py b/scanpipe/models.py index e59fddb2f..5621a9f2f 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -1232,6 +1232,7 @@ def add_message( details=None, exception=None, resource=None, + package=None, ): """ Create a ProjectMessage record for this Project. @@ -1253,9 +1254,15 @@ def add_message( description = str(exception) details = details or {} + + # Do not change the following field names as those have special behavior in + # templates. if resource: - # Do not change this field name as it has special behavior in templates. details["resource_path"] = resource.path + if package: + details.update( + {"package_url": package.package_url, "package_uuid": package.uuid} + ) return ProjectMessage.objects.create( project=self, @@ -1273,11 +1280,12 @@ def add_info( details=None, exception=None, resource=None, + package=None, ): """Create an INFO ProjectMessage record for this project.""" severity = ProjectMessage.Severity.INFO return self.add_message( - severity, description, model, details, exception, resource + severity, description, model, details, exception, resource, package ) def add_warning( @@ -1287,11 +1295,12 @@ def add_warning( details=None, exception=None, resource=None, + package=None, ): """Create a WARNING ProjectMessage record for this project.""" severity = ProjectMessage.Severity.WARNING return self.add_message( - severity, description, model, details, exception, resource + severity, description, model, details, exception, resource, package ) def add_error( @@ -1301,11 +1310,12 @@ def add_error( details=None, exception=None, resource=None, + package=None, ): """Create an ERROR ProjectMessage record using for this project.""" severity = ProjectMessage.Severity.ERROR return self.add_message( - severity, description, model, details, exception, resource + severity, description, model, details, exception, resource, package ) def get_absolute_url(self): diff --git a/scanpipe/pipes/purldb.py b/scanpipe/pipes/purldb.py index 5ed1962ff..e643fb232 100644 --- a/scanpipe/pipes/purldb.py +++ b/scanpipe/pipes/purldb.py @@ -69,6 +69,27 @@ class PurlDBException(Exception): # This key can be used for filtering ENRICH_EXTRA_DATA_KEY = "enrich_with_purldb" +# Subset of fields kept when multiple entries are found in the PurlDB. +CROSS_VERSION_COMMON_FIELDS = [ + "primary_language", + "description", + "parties", + "keywords", + "homepage_url", + "bug_tracking_url", + "code_view_url", + "vcs_url", + "repository_homepage_url", + "copyright", + "holder", + "declared_license_expression", + "declared_license_expression_spdx", + "other_license_expression", + "other_license_expression_spdx", + "extracted_license_statement", + "notice_text", +] + def is_configured(): """Return True if the required PurlDB settings have been set.""" @@ -101,7 +122,7 @@ def check_service_availability(*args): raise Exception(f"{label} is not available.") -def request_get(url, payload=None, timeout=DEFAULT_TIMEOUT): +def request_get(url, payload=None, timeout=DEFAULT_TIMEOUT, raise_on_error=False): """Wrap the HTTP request calls on the API.""" if not url: return @@ -112,13 +133,17 @@ def request_get(url, payload=None, timeout=DEFAULT_TIMEOUT): if payload: params.update(payload) - logger.debug(f"{label}: url={url} params={params}") + logger.debug(f"[{label}] Requesting URL: {url} with params: {params}") try: response = session.get(url, params=params, timeout=timeout) response.raise_for_status() return response.json() - except (requests.RequestException, ValueError, TypeError) as exception: - logger.debug(f"{label} [Exception] {exception}") + except requests.RequestException: # raise_for_status + return + except (ValueError, TypeError) as exception: + logger.debug(f"[{label}] Request to {url} failed with exception: {exception}") + if raise_on_error: + raise PurlDBException(exception) def request_post(url, data=None, headers=None, files=None, timeout=DEFAULT_TIMEOUT): @@ -353,12 +378,6 @@ def populate_purldb_with_discovered_dependencies(project, logger=logger.info): ) -def get_package_by_purl(package_url): - """Get a Package details entry providing its `package_url`.""" - if results := find_packages({"purl": str(package_url)}): - return results[0] - - def find_packages(payload): """Get Packages using provided `payload` filters on the PurlDB package list.""" package_api_url = f"{PURLDB_API_URL}packages/" @@ -367,6 +386,31 @@ def find_packages(payload): return response.get("results") +def get_packages_for_purl(package_url): + """Get Package details entries providing a `package_url`.""" + payload = { + "purl": str(package_url), + "sort": "-version", + } + return find_packages(payload) + + +def collect_data_for_purl(package_url, raise_on_error=False): + collect_api_url = f"{PURLDB_API_URL}collect/" + payload = { + "purl": str(package_url), + "sort": "-version", + } + purldb_entries = request_get( + url=collect_api_url, + payload=payload, + raise_on_error=raise_on_error, + ) + + if purldb_entries: + return purldb_entries + + def get_next_download_url(timeout=DEFAULT_TIMEOUT, api_url=PURLDB_API_URL): """ Return the ScannableURI UUID, download URL, and pipelines for the next @@ -464,12 +508,43 @@ def get_run_status(run, **kwargs): def enrich_package(package): """Enrich the provided ``package`` with the PurlDB data.""" - purldb_entry = get_package_by_purl(package.package_url) - if purldb_entry: - package_data = _clean_package_data(purldb_entry) - if updated_fields := package.update_from_data(package_data): - package.update_extra_data({ENRICH_EXTRA_DATA_KEY: updated_fields}) - return updated_fields + package_url = package.package_url + project = package.project + + try: + purldb_entries = collect_data_for_purl(package_url, raise_on_error=True) + except PurlDBException as exception: + project.add_error(model="PurlDB", exception=exception, package=package) + return + + if not purldb_entries: + return + + if len(purldb_entries) == 1: + # Single match, all the PurlDB data are used to enrich the package. + purldb_entry = purldb_entries[0] + else: + project.add_warning( + model="PurlDB", + description=( + f'Multiple entries found in the PurlDB for "{package_url}". ' + f"Using data from the most recent version." + ), + package=package, + ) + # Do not set version-specific fields, such as the download_url. + purldb_entry = { + field: value + for field, value in purldb_entries[0].items() + if field in CROSS_VERSION_COMMON_FIELDS + } + + # Remove package_uid as it is not relevant to capture the value from PurlDB. + purldb_entry.pop("package_uid", None) + package_data = _clean_package_data(purldb_entry) + if updated_fields := package.update_from_data(package_data): + package.update_extra_data({ENRICH_EXTRA_DATA_KEY: updated_fields}) + return updated_fields def enrich_discovered_packages(project, logger=logger.info): diff --git a/scanpipe/templates/scanpipe/message_list.html b/scanpipe/templates/scanpipe/message_list.html index 5c4a53e84..f94b3f5a2 100644 --- a/scanpipe/templates/scanpipe/message_list.html +++ b/scanpipe/templates/scanpipe/message_list.html @@ -46,6 +46,14 @@ {% endif %} + {% if message.details.package_uuid %} +
+ Package: + + {{ message.details.package_url|default_if_none:message.details.package_uuid }} + +
+ {% endif %} {% for key, value in message.details.items %} {{ key }}: {{ value }}
{% endfor %} diff --git a/scanpipe/templates/scanpipe/tabset/tab_purldb_content.html b/scanpipe/templates/scanpipe/tabset/tab_purldb_content.html index faf985ec6..4f8d2f7b4 100644 --- a/scanpipe/templates/scanpipe/tabset/tab_purldb_content.html +++ b/scanpipe/templates/scanpipe/tabset/tab_purldb_content.html @@ -4,6 +4,13 @@ You are looking at the details for this software package as defined in the PurlDB which was scanned automatically from a public source. + {% if has_multiple_purldb_entries %} +
+ + Multiple packages were found in the PurlDB for "{{ object.package_url }}". + The data below corresponds to the most recent version of this package. +
+ {% endif %} {% include 'scanpipe/tabset/tab_default.html' %} {% else %}
diff --git a/scanpipe/tests/pipes/test_purldb.py b/scanpipe/tests/pipes/test_purldb.py index 59c70cf9e..af34fd109 100644 --- a/scanpipe/tests/pipes/test_purldb.py +++ b/scanpipe/tests/pipes/test_purldb.py @@ -237,17 +237,17 @@ def test_scanpipe_pipes_purldb_create_project_name(self): project_name = purldb.create_project_name(download_url, scannable_uri_uuid) self.assertEqual("httpsregistrynpmjsorgasdf-asdf-101tgz-52b2930d", project_name) - @mock.patch("scanpipe.pipes.purldb.get_package_by_purl") - def test_scanpipe_pipes_purldb_enrich_package(self, mock_get_package_by_purl): + @mock.patch("scanpipe.pipes.purldb.collect_data_for_purl") + def test_scanpipe_pipes_purldb_enrich_package(self, mock_collect_data): package1 = make_package(self.project1, package_url="pkg:npm/csvtojson@2.0.10") - mock_get_package_by_purl.return_value = {} + mock_collect_data.return_value = [] updated_fields = purldb.enrich_package(package=package1) self.assertIsNone(updated_fields) purldb_entry_file = self.data / "purldb" / "csvtojson-2.0.10.json" purldb_entry = json.loads(purldb_entry_file.read_text()) - mock_get_package_by_purl.return_value = purldb_entry + mock_collect_data.return_value = [purldb_entry] updated_fields = purldb.enrich_package(package=package1) self.assertTrue(updated_fields) self.assertIn("homepage_url", updated_fields) @@ -258,13 +258,11 @@ def test_scanpipe_pipes_purldb_enrich_package(self, mock_get_package_by_purl): self.assertEqual(purldb_entry.get("sha256"), package1.sha256) self.assertEqual(purldb_entry.get("copyright"), package1.copyright) - @mock.patch("scanpipe.pipes.purldb.get_package_by_purl") - def test_scanpipe_pipes_purldb_enrich_discovered_packages( - self, mock_get_package_by_purl - ): + @mock.patch("scanpipe.pipes.purldb.collect_data_for_purl") + def test_scanpipe_pipes_purldb_enrich_discovered_packages(self, mock_collect_data): package1 = make_package(self.project1, package_url="pkg:npm/csvtojson@2.0.10") - mock_get_package_by_purl.return_value = {} + mock_collect_data.return_value = [] buffer = io.StringIO() updated_package_count = purldb.enrich_discovered_packages( project=self.project1, @@ -276,7 +274,7 @@ def test_scanpipe_pipes_purldb_enrich_discovered_packages( purldb_entry_file = self.data / "purldb" / "csvtojson-2.0.10.json" purldb_entry = json.loads(purldb_entry_file.read_text()) - mock_get_package_by_purl.return_value = purldb_entry + mock_collect_data.return_value = [purldb_entry] buffer = io.StringIO() updated_package_count = purldb.enrich_discovered_packages( project=self.project1, diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 48c51d9bf..90d260b8e 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -1709,9 +1709,9 @@ def test_scanpipe_collect_symbols_tree_sitter_pipeline_integration(self): @mock.patch("scanpipe.pipes.purldb.is_available") @mock.patch("scanpipe.pipes.purldb.is_configured") - @mock.patch("scanpipe.pipes.purldb.get_package_by_purl") + @mock.patch("scanpipe.pipes.purldb.collect_data_for_purl") def test_scanpipe_enrich_with_purldb_pipeline_integration( - self, mock_get_package, mock_is_configured, mock_is_available + self, mock_collect_data, mock_is_configured, mock_is_available ): pipeline_name = "enrich_with_purldb" project1 = Project.objects.create(name="Analysis") @@ -1722,7 +1722,7 @@ def test_scanpipe_enrich_with_purldb_pipeline_integration( purldb_entry_file = self.data / "purldb" / "csvtojson-2.0.10.json" purldb_entry = json.loads(purldb_entry_file.read_text()) - mock_get_package.return_value = purldb_entry + mock_collect_data.return_value = [purldb_entry] run = project1.add_pipeline(pipeline_name) pipeline = run.make_pipeline_instance() diff --git a/scanpipe/tests/test_views.py b/scanpipe/tests/test_views.py index b8fdf5cd2..ba6507f34 100644 --- a/scanpipe/tests/test_views.py +++ b/scanpipe/tests/test_views.py @@ -1048,21 +1048,23 @@ def test_scanpipe_views_discovered_package_purldb_tab_view(self, mock_configured self.assertContains(response, "tab-purldb") self.assertContains(response, '
1: + context["has_multiple_purldb_entries"] = True return context