Skip to content

Commit

Permalink
Resolve dependencies from lockfiles #1237
Browse files Browse the repository at this point in the history
Reference: #1237
Reference: #1066
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Jun 13, 2024
1 parent 49f9c07 commit 5cf11ac
Show file tree
Hide file tree
Showing 38 changed files with 2,169 additions and 249 deletions.
3 changes: 3 additions & 0 deletions scanpipe/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ class Meta:
"source_packages",
"extra_data",
"package_uid",
"is_private",
"is_virtual",
"datasource_ids",
"datafile_paths",
"file_references",
Expand All @@ -405,6 +407,7 @@ class Meta:
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"dependency_uid",
"for_package_uid",
"resolved_to_package_uid",
Expand Down
4 changes: 4 additions & 0 deletions scanpipe/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"datasource_id",
"is_vulnerable",
]
Expand All @@ -747,6 +748,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"for_package",
"resolved_to_package",
"datafile_resource",
Expand All @@ -761,6 +763,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
is_runtime = StrictBooleanFilter()
is_optional = StrictBooleanFilter()
is_resolved = StrictBooleanFilter()
is_direct = StrictBooleanFilter()
is_vulnerable = IsVulnerable(field_name="affected_by_vulnerabilities")

class Meta:
Expand All @@ -779,6 +782,7 @@ class Meta:
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"datasource_id",
"is_vulnerable",
]
Expand Down
34 changes: 34 additions & 0 deletions scanpipe/migrations/0061_dependency_resolver_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Generated by Django 5.0.6 on 2024-06-04 20:48

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("scanpipe", "0060_discovereddependency_renames"),
]

operations = [
migrations.AddField(
model_name="discovereddependency",
name="is_direct",
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_private",
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_virtual",
field=models.BooleanField(default=False),
),
migrations.AddIndex(
model_name="discovereddependency",
index=models.Index(
fields=["is_direct"], name="scanpipe_di_is_dire_6dc594_idx"
),
),
]
51 changes: 51 additions & 0 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,19 @@ def walk_codebase_path(self):
"""Return files and directories path of the codebase/ directory recursively."""
return self.codebase_path.rglob("*")

def get_resource(self, path):
"""
Return the codebase resource present for a given path,
or None the resource with that path does not exist.
This path is relative to the scan location.
This is same as the Codebase.get_resource() function.
"""
# We don't want to raise an exception if there is no resource
# as this function is also called from the SCTK side
resource = self.codebaseresources.get_or_none(path=path)
if resource:
return resource

@cached_property
def can_change_inputs(self):
"""
Expand Down Expand Up @@ -2971,6 +2984,8 @@ class AbstractPackage(models.Model):
blank=True,
help_text=_("A notice text for this package."),
)
is_private = models.BooleanField(default=False)
is_virtual = models.BooleanField(default=False)
datasource_ids = models.JSONField(
default=list,
blank=True,
Expand Down Expand Up @@ -3432,6 +3447,7 @@ class DiscoveredDependency(
is_runtime = models.BooleanField(default=False)
is_optional = models.BooleanField(default=False)
is_resolved = models.BooleanField(default=False)
is_direct = models.BooleanField(default=False)

objects = DiscoveredDependencyQuerySet.as_manager()

Expand All @@ -3452,6 +3468,7 @@ class Meta:
models.Index(fields=["is_runtime"]),
models.Index(fields=["is_optional"]),
models.Index(fields=["is_resolved"]),
models.Index(fields=["is_direct"]),
]
constraints = [
models.UniqueConstraint(
Expand Down Expand Up @@ -3498,6 +3515,7 @@ def create_from_data(
project,
dependency_data,
for_package=None,
resolved_to_package=None,
datafile_resource=None,
datasource_id=None,
strip_datafile_path_root=False,
Expand Down Expand Up @@ -3537,6 +3555,13 @@ def create_from_data(
package_uid=for_package_uid
)

if not resolved_to_package:
resolved_to_uid = dependency_data.get("resolved_to_uid")
if resolved_to_uid:
resolved_to_package = project.discoveredpackages.get(
package_uid=resolved_to_uid
)

if not datafile_resource:
datafile_path = dependency_data.get("datafile_path")
if datafile_path:
Expand All @@ -3562,10 +3587,36 @@ def create_from_data(
return cls.objects.create(
project=project,
for_package=for_package,
resolved_to_package=resolved_to_package,
datafile_resource=datafile_resource,
**cleaned_data,
)

@classmethod
def extract_purl_data(cls, dependency_data, ignore_nulls=False):
purl_mapping = PackageURL.from_string(
purl=dependency_data.get("purl"),
).to_dict()
purl_data = {}

for field_name in PURL_FIELDS:
value = purl_mapping.get(field_name)
if field_name == "qualifiers":
value = normalize_qualifiers(value, encode=True)
if not ignore_nulls:
purl_data[field_name] = value or ""
else:
if value:
purl_data[field_name] = value or ""

return purl_data

@classmethod
def populate_dependency_uuid(cls, dependency_data):
purl = PackageURL.from_string(purl=dependency_data.get("purl"))
purl.qualifiers["uuid"] = str(uuid.uuid4())
dependency_data["dependency_uid"] = purl.to_string()

@property
def spdx_id(self):
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.dependency_uid}"
Expand Down
20 changes: 12 additions & 8 deletions scanpipe/pipelines/inspect_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.pipelines import group
from scanpipe.pipelines.scan_codebase import ScanCodebase
from scanpipe.pipes import scancode

Expand Down Expand Up @@ -49,23 +50,26 @@ def steps(cls):
cls.flag_empty_files,
cls.flag_ignored_resources,
cls.scan_for_application_packages,
cls.create_packages_and_dependencies,
cls.resolve_dependencies,
)

def scan_for_application_packages(self):
"""
Scan resources for package information to add DiscoveredPackage
and DiscoveredDependency objects from detected package data.
"""
# `assemble` is set to False because here in this pipeline we
# only detect package_data in resources and create
# Package/Dependency instances directly instead of assembling
# the packages and assigning files to them
scancode.scan_for_application_packages(
project=self.project,
assemble=False,
assemble=True,
package_only=True,
progress_logger=self.log,
)

def create_packages_and_dependencies(self):
scancode.process_package_data(self.project)
@group("Static Resolver")
def resolve_dependencies(self):
"""
Create packages and dependency relationships from
lockfiles or manifests containing pre-resolved
dependencies.
"""
scancode.resolve_dependencies(project=self.project)
19 changes: 19 additions & 0 deletions scanpipe/pipelines/resolve_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.pipelines import group
from scanpipe.pipelines.scan_codebase import ScanCodebase
from scanpipe.pipes import resolve
from scanpipe.pipes import scancode


class ResolveDependencies(ScanCodebase):
Expand All @@ -45,6 +47,8 @@ def steps(cls):
cls.collect_and_create_codebase_resources,
cls.flag_ignored_resources,
cls.get_manifest_inputs,
cls.scan_for_application_packages,
cls.create_packages_and_dependencies,
cls.get_packages_from_manifest,
cls.create_resolved_packages,
)
Expand All @@ -53,6 +57,20 @@ def get_manifest_inputs(self):
"""Locate package manifest files with a supported package resolver."""
self.manifest_resources = resolve.get_manifest_resources(self.project)

@group("Static Resolver")
def scan_for_application_packages(self):
scancode.scan_for_application_packages(
self.project,
assemble=False,
resource_qs=self.manifest_resources,
progress_logger=self.log,
)

@group("Static Resolver")
def create_packages_and_dependencies(self):
scancode.process_package_data(self.project, static_resolve=True)

@group("Dynamic Resolver")
def get_packages_from_manifest(self):
"""
Resolve package data from lockfiles/requirement files with package
Expand All @@ -65,6 +83,7 @@ def get_packages_from_manifest(self):
model="get_packages_from_manifest",
)

@group("Dynamic Resolver")
def create_resolved_packages(self):
"""Create the resolved packages and their dependencies in the database."""
resolve.create_packages_and_dependencies(
Expand Down
32 changes: 29 additions & 3 deletions scanpipe/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,12 @@ def _clean_package_data(package_data):
return package_data


def update_or_create_package(project, package_data, codebase_resources=None):
def update_or_create_package(
project,
package_data,
codebase_resources=None,
is_virtual=False,
):
"""
Get, update or create a DiscoveredPackage then return it.
Use the `project` and `package_data` mapping to lookup and creates the
Expand All @@ -194,6 +199,9 @@ def update_or_create_package(project, package_data, codebase_resources=None):
package = DiscoveredPackage.create_from_data(project, package_data)

if package:
if is_virtual:
package.update(is_virtual=is_virtual)

if datasource_id and datasource_id not in package.datasource_ids:
datasource_ids = package.datasource_ids.copy()
datasource_ids.append(datasource_id)
Expand Down Expand Up @@ -239,6 +247,7 @@ def update_or_create_dependency(
project,
dependency_data,
for_package=None,
resolved_to_package=None,
datafile_resource=None,
datasource_id=None,
strip_datafile_path_root=False,
Expand All @@ -254,27 +263,44 @@ def update_or_create_dependency(
corresponding CodebaseResource for `datafile_path`. This is used in the case
where Dependency data is imported from a scancode-toolkit scan, where the
root path segments are not stripped for `datafile_path`.
If the dependency is resolved and a resolved package is created, we have the
corresponsing package_uid at `resolved_to`.
"""
dependency = None
dependency_uid = dependency_data.get("dependency_uid")
extracted_requirement = dependency_data.get("extracted_requirement")

if ignore_dependency_scope(project, dependency_data):
return # Do not create the DiscoveredDependency record.

if not dependency_uid:
dependency_data["dependency_uid"] = uuid.uuid4()
purl_data = DiscoveredDependency.extract_purl_data(dependency_data)
dependency = DiscoveredDependency.objects.get_or_none(
project=project,
extracted_requirement=extracted_requirement,
**purl_data,
)
else:
dependency = project.discovereddependencies.get_or_none(
dependency = DiscoveredDependency.objects.get_or_none(
project=project,
dependency_uid=dependency_uid,
)

if dependency:
dependency.update_from_data(dependency_data)
if resolved_to_package and not dependency.resolved_to_package:
dependency.update(resolved_to_package=resolved_to_package)
else:
is_direct = dependency_data.get("is_direct")
if not is_direct:
pass

DiscoveredDependency.populate_dependency_uuid(dependency_data)
dependency = DiscoveredDependency.create_from_data(
project,
dependency_data,
for_package=for_package,
resolved_to_package=resolved_to_package,
datafile_resource=datafile_resource,
datasource_id=datasource_id,
strip_datafile_path_root=strip_datafile_path_root,
Expand Down
Loading

0 comments on commit 5cf11ac

Please sign in to comment.