Skip to content

Commit

Permalink
Merge branch 'main' into 1325-webhooks-trigger
Browse files Browse the repository at this point in the history
  • Loading branch information
tdruez committed Jul 24, 2024
2 parents 4d3aadb + 2663ec0 commit fe1b5b1
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 45 deletions.
18 changes: 14 additions & 4 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1232,6 +1232,7 @@ def add_message(
details=None,
exception=None,
resource=None,
package=None,
):
"""
Create a ProjectMessage record for this Project.
Expand All @@ -1253,9 +1254,15 @@ def add_message(
description = str(exception)

details = details or {}

# Do not change the following field names as those have special behavior in
# templates.
if resource:
# Do not change this field name as it has special behavior in templates.
details["resource_path"] = resource.path
if package:
details.update(
{"package_url": package.package_url, "package_uuid": package.uuid}
)

return ProjectMessage.objects.create(
project=self,
Expand All @@ -1273,11 +1280,12 @@ def add_info(
details=None,
exception=None,
resource=None,
package=None,
):
"""Create an INFO ProjectMessage record for this project."""
severity = ProjectMessage.Severity.INFO
return self.add_message(
severity, description, model, details, exception, resource
severity, description, model, details, exception, resource, package
)

def add_warning(
Expand All @@ -1287,11 +1295,12 @@ def add_warning(
details=None,
exception=None,
resource=None,
package=None,
):
"""Create a WARNING ProjectMessage record for this project."""
severity = ProjectMessage.Severity.WARNING
return self.add_message(
severity, description, model, details, exception, resource
severity, description, model, details, exception, resource, package
)

def add_error(
Expand All @@ -1301,11 +1310,12 @@ def add_error(
details=None,
exception=None,
resource=None,
package=None,
):
"""Create an ERROR ProjectMessage record using for this project."""
severity = ProjectMessage.Severity.ERROR
return self.add_message(
severity, description, model, details, exception, resource
severity, description, model, details, exception, resource, package
)

def get_absolute_url(self):
Expand Down
107 changes: 91 additions & 16 deletions scanpipe/pipes/purldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,27 @@ class PurlDBException(Exception):
# This key can be used for filtering
ENRICH_EXTRA_DATA_KEY = "enrich_with_purldb"

# Subset of fields kept when multiple entries are found in the PurlDB.
CROSS_VERSION_COMMON_FIELDS = [
"primary_language",
"description",
"parties",
"keywords",
"homepage_url",
"bug_tracking_url",
"code_view_url",
"vcs_url",
"repository_homepage_url",
"copyright",
"holder",
"declared_license_expression",
"declared_license_expression_spdx",
"other_license_expression",
"other_license_expression_spdx",
"extracted_license_statement",
"notice_text",
]


def is_configured():
"""Return True if the required PurlDB settings have been set."""
Expand Down Expand Up @@ -101,7 +122,7 @@ def check_service_availability(*args):
raise Exception(f"{label} is not available.")


def request_get(url, payload=None, timeout=DEFAULT_TIMEOUT):
def request_get(url, payload=None, timeout=DEFAULT_TIMEOUT, raise_on_error=False):
"""Wrap the HTTP request calls on the API."""
if not url:
return
Expand All @@ -112,13 +133,17 @@ def request_get(url, payload=None, timeout=DEFAULT_TIMEOUT):
if payload:
params.update(payload)

logger.debug(f"{label}: url={url} params={params}")
logger.debug(f"[{label}] Requesting URL: {url} with params: {params}")
try:
response = session.get(url, params=params, timeout=timeout)
response.raise_for_status()
return response.json()
except (requests.RequestException, ValueError, TypeError) as exception:
logger.debug(f"{label} [Exception] {exception}")
except requests.RequestException: # raise_for_status
return
except (ValueError, TypeError) as exception:
logger.debug(f"[{label}] Request to {url} failed with exception: {exception}")
if raise_on_error:
raise PurlDBException(exception)


def request_post(url, data=None, headers=None, files=None, timeout=DEFAULT_TIMEOUT):
Expand Down Expand Up @@ -353,12 +378,6 @@ def populate_purldb_with_discovered_dependencies(project, logger=logger.info):
)


def get_package_by_purl(package_url):
"""Get a Package details entry providing its `package_url`."""
if results := find_packages({"purl": str(package_url)}):
return results[0]


def find_packages(payload):
"""Get Packages using provided `payload` filters on the PurlDB package list."""
package_api_url = f"{PURLDB_API_URL}packages/"
Expand All @@ -367,6 +386,31 @@ def find_packages(payload):
return response.get("results")


def get_packages_for_purl(package_url):
"""Get Package details entries providing a `package_url`."""
payload = {
"purl": str(package_url),
"sort": "-version",
}
return find_packages(payload)


def collect_data_for_purl(package_url, raise_on_error=False):
collect_api_url = f"{PURLDB_API_URL}collect/"
payload = {
"purl": str(package_url),
"sort": "-version",
}
purldb_entries = request_get(
url=collect_api_url,
payload=payload,
raise_on_error=raise_on_error,
)

if purldb_entries:
return purldb_entries


def get_next_download_url(timeout=DEFAULT_TIMEOUT, api_url=PURLDB_API_URL):
"""
Return the ScannableURI UUID, download URL, and pipelines for the next
Expand Down Expand Up @@ -464,12 +508,43 @@ def get_run_status(run, **kwargs):

def enrich_package(package):
"""Enrich the provided ``package`` with the PurlDB data."""
purldb_entry = get_package_by_purl(package.package_url)
if purldb_entry:
package_data = _clean_package_data(purldb_entry)
if updated_fields := package.update_from_data(package_data):
package.update_extra_data({ENRICH_EXTRA_DATA_KEY: updated_fields})
return updated_fields
package_url = package.package_url
project = package.project

try:
purldb_entries = collect_data_for_purl(package_url, raise_on_error=True)
except PurlDBException as exception:
project.add_error(model="PurlDB", exception=exception, package=package)
return

if not purldb_entries:
return

if len(purldb_entries) == 1:
# Single match, all the PurlDB data are used to enrich the package.
purldb_entry = purldb_entries[0]
else:
project.add_warning(
model="PurlDB",
description=(
f'Multiple entries found in the PurlDB for "{package_url}". '
f"Using data from the most recent version."
),
package=package,
)
# Do not set version-specific fields, such as the download_url.
purldb_entry = {
field: value
for field, value in purldb_entries[0].items()
if field in CROSS_VERSION_COMMON_FIELDS
}

# Remove package_uid as it is not relevant to capture the value from PurlDB.
purldb_entry.pop("package_uid", None)
package_data = _clean_package_data(purldb_entry)
if updated_fields := package.update_from_data(package_data):
package.update_extra_data({ENRICH_EXTRA_DATA_KEY: updated_fields})
return updated_fields


def enrich_discovered_packages(project, logger=logger.info):
Expand Down
8 changes: 8 additions & 0 deletions scanpipe/templates/scanpipe/message_list.html
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@
</a>
</div>
{% endif %}
{% if message.details.package_uuid %}
<div>
<strong>Package</strong>:
<a href="{% url 'package_detail' project.slug message.details.package_uuid %}" target="_blank">
{{ message.details.package_url|default_if_none:message.details.package_uuid }}
</a>
</div>
{% endif %}
{% for key, value in message.details.items %}
<strong>{{ key }}</strong>: {{ value }}<br>
{% endfor %}
Expand Down
7 changes: 7 additions & 0 deletions scanpipe/templates/scanpipe/tabset/tab_purldb_content.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
You are looking at the details for this software package as defined
in the PurlDB which was scanned automatically from a public source.
</div>
{% if has_multiple_purldb_entries %}
<div class="notification is-warning is-light has-text-weight-semibold p-3 mb-4">
<i class="fa-solid fa-warning mr-1"></i>
Multiple packages were found in the PurlDB for "{{ object.package_url }}".
The data below corresponds to the most recent version of this package.
</div>
{% endif %}
{% include 'scanpipe/tabset/tab_default.html' %}
{% else %}
<div class="notification is-warning is-light has-text-weight-semibold p-3 mb-4">
Expand Down
18 changes: 8 additions & 10 deletions scanpipe/tests/pipes/test_purldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,17 +237,17 @@ def test_scanpipe_pipes_purldb_create_project_name(self):
project_name = purldb.create_project_name(download_url, scannable_uri_uuid)
self.assertEqual("httpsregistrynpmjsorgasdf-asdf-101tgz-52b2930d", project_name)

@mock.patch("scanpipe.pipes.purldb.get_package_by_purl")
def test_scanpipe_pipes_purldb_enrich_package(self, mock_get_package_by_purl):
@mock.patch("scanpipe.pipes.purldb.collect_data_for_purl")
def test_scanpipe_pipes_purldb_enrich_package(self, mock_collect_data):
package1 = make_package(self.project1, package_url="pkg:npm/[email protected]")

mock_get_package_by_purl.return_value = {}
mock_collect_data.return_value = []
updated_fields = purldb.enrich_package(package=package1)
self.assertIsNone(updated_fields)

purldb_entry_file = self.data / "purldb" / "csvtojson-2.0.10.json"
purldb_entry = json.loads(purldb_entry_file.read_text())
mock_get_package_by_purl.return_value = purldb_entry
mock_collect_data.return_value = [purldb_entry]
updated_fields = purldb.enrich_package(package=package1)
self.assertTrue(updated_fields)
self.assertIn("homepage_url", updated_fields)
Expand All @@ -258,13 +258,11 @@ def test_scanpipe_pipes_purldb_enrich_package(self, mock_get_package_by_purl):
self.assertEqual(purldb_entry.get("sha256"), package1.sha256)
self.assertEqual(purldb_entry.get("copyright"), package1.copyright)

@mock.patch("scanpipe.pipes.purldb.get_package_by_purl")
def test_scanpipe_pipes_purldb_enrich_discovered_packages(
self, mock_get_package_by_purl
):
@mock.patch("scanpipe.pipes.purldb.collect_data_for_purl")
def test_scanpipe_pipes_purldb_enrich_discovered_packages(self, mock_collect_data):
package1 = make_package(self.project1, package_url="pkg:npm/[email protected]")

mock_get_package_by_purl.return_value = {}
mock_collect_data.return_value = []
buffer = io.StringIO()
updated_package_count = purldb.enrich_discovered_packages(
project=self.project1,
Expand All @@ -276,7 +274,7 @@ def test_scanpipe_pipes_purldb_enrich_discovered_packages(

purldb_entry_file = self.data / "purldb" / "csvtojson-2.0.10.json"
purldb_entry = json.loads(purldb_entry_file.read_text())
mock_get_package_by_purl.return_value = purldb_entry
mock_collect_data.return_value = [purldb_entry]
buffer = io.StringIO()
updated_package_count = purldb.enrich_discovered_packages(
project=self.project1,
Expand Down
6 changes: 3 additions & 3 deletions scanpipe/tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -1709,9 +1709,9 @@ def test_scanpipe_collect_symbols_tree_sitter_pipeline_integration(self):

@mock.patch("scanpipe.pipes.purldb.is_available")
@mock.patch("scanpipe.pipes.purldb.is_configured")
@mock.patch("scanpipe.pipes.purldb.get_package_by_purl")
@mock.patch("scanpipe.pipes.purldb.collect_data_for_purl")
def test_scanpipe_enrich_with_purldb_pipeline_integration(
self, mock_get_package, mock_is_configured, mock_is_available
self, mock_collect_data, mock_is_configured, mock_is_available
):
pipeline_name = "enrich_with_purldb"
project1 = Project.objects.create(name="Analysis")
Expand All @@ -1722,7 +1722,7 @@ def test_scanpipe_enrich_with_purldb_pipeline_integration(

purldb_entry_file = self.data / "purldb" / "csvtojson-2.0.10.json"
purldb_entry = json.loads(purldb_entry_file.read_text())
mock_get_package.return_value = purldb_entry
mock_collect_data.return_value = [purldb_entry]

run = project1.add_pipeline(pipeline_name)
pipeline = run.make_pipeline_instance()
Expand Down
22 changes: 12 additions & 10 deletions scanpipe/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1048,21 +1048,23 @@ def test_scanpipe_views_discovered_package_purldb_tab_view(self, mock_configured
self.assertContains(response, "tab-purldb")
self.assertContains(response, '<section id="tab-purldb"')

with mock.patch("scanpipe.pipes.purldb.get_package_by_purl") as get_package:
get_package.return_value = None
with mock.patch("scanpipe.pipes.purldb.get_packages_for_purl") as get_packages:
get_packages.return_value = None
purldb_tab_url = f"{package_url}purldb_tab/"
response = self.client.get(purldb_tab_url)
msg = "No entries found in the PurlDB for this package"
self.assertContains(response, msg)

get_package.return_value = {
"uuid": "9261605f-e2fb-4db9-94ab-0d82d3273cdf",
"filename": "abab-2.0.3.tgz",
"type": "npm",
"name": "abab",
"version": "2.0.3",
"primary_language": "JavaScript",
}
get_packages.return_value = [
{
"uuid": "9261605f-e2fb-4db9-94ab-0d82d3273cdf",
"filename": "abab-2.0.3.tgz",
"type": "npm",
"name": "abab",
"version": "2.0.3",
"primary_language": "JavaScript",
}
]
response = self.client.get(purldb_tab_url)
self.assertContains(response, "abab-2.0.3.tgz")
self.assertContains(response, "2.0.3")
Expand Down
8 changes: 6 additions & 2 deletions scanpipe/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1999,9 +1999,13 @@ def get_context_data(self, **kwargs):
if not purldb.is_configured():
raise Http404("PurlDB access is not configured.")

if purldb_entry := purldb.get_package_by_purl(self.object.package_url):
fields = self.get_fields_data(purldb_entry)
if purldb_entries := purldb.get_packages_for_purl(self.object.package_url):
# Always display the most recent version entry.
fields = self.get_fields_data(purldb_entries[0])
context["tab_data"] = {"fields": fields}
# Display a warning if multiple packages found in PurlDB for this purl.
if len(purldb_entries) > 1:
context["has_multiple_purldb_entries"] = True

return context

Expand Down

0 comments on commit fe1b5b1

Please sign in to comment.