Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

411 toolkit upgrade 2 #435

Merged
merged 49 commits into from
Jun 14, 2022
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
1178bfe
Upgrade scancode-toolkit to latest beta release #411
tdruez May 2, 2022
f9c6e77
Add a test class to regen test data #411
tdruez May 9, 2022
5dd16d4
Upgrade container_inspector to latest 31.0.0 version #411
tdruez May 9, 2022
2db2a8e
Handle new scan format in scancode pipes #411
JonoYang May 5, 2022
a42b374
Handle package_uids for DiscoveredPackages #411
JonoYang May 6, 2022
1c2bdc3
Update deprecated code #411
JonoYang May 6, 2022
2add897
Regenerate asgiref 3.3.0 test data #411
JonoYang May 10, 2022
ede8858
Add asgiref-3.3.0_scancode_scan.json #411
JonoYang May 10, 2022
4a4fa99
Add asgiref-3.3.0_walk_test_fixtures.json #411
JonoYang May 10, 2022
74d78b8
Signed-off-by: Jono Yang <[email protected]>
JonoYang May 10, 2022
496f826
Update make_results_summary() #411
JonoYang May 10, 2022
ef74863
Exclude system_environment from diff #411
JonoYang May 10, 2022
ad4b056
Upgrade scancode-toolkit and extractcode to latest version #411
tdruez May 11, 2022
bcca2d7
Update package_getter #434 #438
JonoYang May 12, 2022
7917727
Allow packages to be created without versions #438
JonoYang May 12, 2022
b6b1927
Update expected test results
JonoYang May 12, 2022
f46ea17
Report DiscoveredPackage correctly in summary #411
JonoYang May 12, 2022
7e0d39a
Add test for docker pipeline for alpine #411
JonoYang May 12, 2022
abd3a5c
Add docker pipeline test for rpm images #411
JonoYang May 13, 2022
a50f79b
Track package_uids in make_results_summary #435
JonoYang May 13, 2022
9823790
Add truncated ubuntu docker image for testing #435
JonoYang May 13, 2022
92f6e98
Bump scancode and commoncode versions #435
JonoYang May 18, 2022
0491ad5
Update docker pipeline #435
JonoYang May 18, 2022
5ef693b
Fix code validity #411
tdruez May 18, 2022
9757841
Simplify the filtering of key_files_packages using a QuerySet #411
tdruez May 18, 2022
2b7ba71
Remove copied code from docker.py #411 #435
JonoYang May 18, 2022
009d4e3
Update alpine test image and results #411 #435
JonoYang May 18, 2022
d867c52
Properly create multiple package instances #411
JonoYang May 19, 2022
a73e7ea
Sort packages in JSON output by type and name #411
JonoYang May 19, 2022
befe574
Get file info and packages in initial scan #438
JonoYang May 25, 2022
093a52e
Revert changes to docker pipes and pipeline #438
JonoYang Jun 7, 2022
c58771a
Use generic package_getter for all distros #438
JonoYang Jun 8, 2022
4a8713d
Use get_path() with strip_root to get paths #438
JonoYang Jun 8, 2022
a0705c3
Remove distro specific pipes #438
JonoYang Jun 9, 2022
0c26b7c
Use list comprehension for key_file_packages #438
JonoYang Jun 9, 2022
c8424b3
Add package_uid field to DiscoveredPackage #411
JonoYang Jun 9, 2022
552bdb8
Add test docker image for Ubuntu #438
JonoYang Jun 9, 2022
155fe97
Update formatting #411 #438
JonoYang Jun 9, 2022
18cc997
Use smaller rpm docker image for testing #438
JonoYang Jun 9, 2022
23dc0e1
Replace ubuntu docker test image #438
JonoYang Jun 9, 2022
b3f4656
Use purl data in update_or_create_packages #438
JonoYang Jun 9, 2022
1592cd3
Bump scancode version to v31.0.0rc1 #438 #411
JonoYang Jun 13, 2022
9fcec67
Consider all PURL fields when ordering Packages #411 #438
JonoYang Jun 13, 2022
20ffbe0
Create Packages before Resources #411 #438
JonoYang Jun 13, 2022
784dbbc
Add test for load_inventory pipeline #411
JonoYang Jun 14, 2022
d57afa6
Code cleanups and formatting #411
tdruez Jun 14, 2022
413cf0d
Upgrade dependencies #411
tdruez Jun 14, 2022
c8cb574
Refactor create_inventory_from_scan to remove duplicated code #411
tdruez Jun 14, 2022
5aed5e8
Add changelog entry #411
tdruez Jun 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ local
policies.yml
*.rdb
*.aof
.vscode

# This is only created when packaging for external redistribution
/thirdparty/
2 changes: 1 addition & 1 deletion scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1769,7 +1769,7 @@ def create_from_data(cls, project, package_data):
If one of the values of the required fields is not available, a "ProjectError"
is created instead of a new DiscoveredPackage instance.
"""
required_fields = ["type", "name", "version"]
required_fields = ["type", "name"]
missing_values = [
field_name
for field_name in required_fields
Expand Down
5 changes: 1 addition & 4 deletions scanpipe/pipelines/scan_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,8 @@ def steps(cls):
"--url",
] + [
"--classify",
"--consolidate",
"--is-license-text",
"--license-clarity-score",
"--summary",
"--summary-key-files",
]

def get_package_archive_input(self):
Expand Down Expand Up @@ -121,8 +118,8 @@ def build_inventory_from_scan(self):
"""
project = self.project
scanned_codebase = scancode.get_virtual_codebase(project, str(self.scan_output))
scancode.create_codebase_resources(project, scanned_codebase)
JonoYang marked this conversation as resolved.
Show resolved Hide resolved
JonoYang marked this conversation as resolved.
Show resolved Hide resolved
scancode.create_discovered_packages(project, scanned_codebase)
scancode.create_codebase_resources(project, scanned_codebase)

def make_summary_from_scan_results(self):
"""
Expand Down
16 changes: 14 additions & 2 deletions scanpipe/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,27 @@ def update_or_create_package(project, package_data, codebase_resource=None):
"""
Gets, updates or creates a DiscoveredPackage then returns it.
Uses the `project` and `package_data` mapping to lookup and creates the
DiscoveredPackage using its Package URL as a unique key.
DiscoveredPackage using its Package URL and package_uid as a unique key.
"""
purl_data = DiscoveredPackage.extract_purl_data(package_data)
package_uid = package_data.get("package_uid")
purl_data_and_package_uid = {
**purl_data,
"extra_data": {"package_uid": package_uid},
}

try:
package = DiscoveredPackage.objects.get(project=project, **purl_data)
package = DiscoveredPackage.objects.get(
project=project,
**purl_data_and_package_uid
)
except DiscoveredPackage.DoesNotExist:
package = None

package_data = {
**package_data,
"extra_data": {"package_uid": package_uid},
JonoYang marked this conversation as resolved.
Show resolved Hide resolved
}
if package:
package.update_from_data(package_data)

Expand Down
32 changes: 0 additions & 32 deletions scanpipe/pipes/alpine.py

This file was deleted.

2 changes: 2 additions & 0 deletions scanpipe/pipes/codebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ def get_tree(resource, fields, codebase=None):
return resource_dict


# TODO: Walking the ProjectCodebase is broken as we do not have a consistent way
# to get the root of a codebase.
class ProjectCodebase:
"""
Represents the codebase of a project stored in the database.
Expand Down
35 changes: 0 additions & 35 deletions scanpipe/pipes/debian.py

This file was deleted.

22 changes: 10 additions & 12 deletions scanpipe/pipes/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

import logging
import posixpath
from functools import partial
from pathlib import Path

from container_inspector.image import Image
Expand Down Expand Up @@ -152,23 +151,21 @@ def scan_image_for_system_packages(project, image, detect_licenses=True):
raise rootfs.DistroNotFound(f"Distro not found.")

distro_id = image.distro.identifier
if distro_id not in rootfs.PACKAGE_GETTER_BY_DISTRO:
if distro_id not in rootfs.SUPPORTED_DISTROS:
raise rootfs.DistroNotSupported(f'Distro "{distro_id}" is not supported.')

package_getter = partial(
rootfs.PACKAGE_GETTER_BY_DISTRO[distro_id],
distro=distro_id,
detect_licenses=detect_licenses,
)

installed_packages = image.get_installed_packages(package_getter)
installed_packages = image.get_installed_packages(rootfs.package_getter)

for i, (purl, package, layer) in enumerate(installed_packages):
logger.info(f"Creating package #{i}: {purl}")
created_package = pipes.update_or_create_package(project, package.to_dict())

installed_files = []
if hasattr(package, "resources"):
installed_files = package.resources

# We have no files for this installed package, we cannot go further.
if not package.installed_files:
if not installed_files:
logger.info(f" No installed_files for: {purl}")
continue

Expand All @@ -177,8 +174,9 @@ def scan_image_for_system_packages(project, image, detect_licenses=True):

codebase_resources = project.codebaseresources.all()

for install_file in package.installed_files:
install_file_path = pipes.normalize_path(install_file.path)
for install_file in installed_files:
install_file_path = install_file.get_path(strip_root=True)
install_file_path = pipes.normalize_path(install_file_path)
layer_rootfs_path = posixpath.join(
layer.layer_id,
install_file_path.strip("/"),
Expand Down
8 changes: 4 additions & 4 deletions scanpipe/pipes/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def get_headers(self, project):
def get_packages(self, project):
from scanpipe.api.serializers import DiscoveredPackageSerializer

packages = project.discoveredpackages.all()
packages = project.discoveredpackages.all().order_by("type", "name")

for obj in packages.iterator():
yield self.encode(DiscoveredPackageSerializer(obj).data)
Expand Down Expand Up @@ -280,9 +280,9 @@ def _add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
# https://github.com/nexB/scancode-toolkit/pull/2381
# https://github.com/nexB/scancode-toolkit/issues/2350
mappings_key_by_fieldname = {
"copyrights": "value",
"holders": "value",
"authors": "value",
"copyrights": "copyright",
"holders": "holder",
"authors": "author",
"emails": "email",
"urls": "url",
}
Expand Down
65 changes: 39 additions & 26 deletions scanpipe/pipes/rootfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,36 +23,32 @@
import fnmatch
import logging
import os
from functools import partial

from django.core.exceptions import ObjectDoesNotExist
from django.db.models import Q

import attr
from commoncode.ignore import default_ignores
from container_inspector.distro import Distro
from packagedcode import plugin_package

from scanpipe import pipes
from scanpipe.pipes import alpine
from scanpipe.pipes import debian
from scanpipe.pipes import rpm
from scanpipe.pipes import windows

logger = logging.getLogger(__name__)

PACKAGE_GETTER_BY_DISTRO = {
"alpine": alpine.package_getter,
"debian": partial(debian.package_getter, distro="debian"),
"ubuntu": partial(debian.package_getter, distro="ubuntu"),
"rhel": rpm.package_getter,
"centos": rpm.package_getter,
"fedora": rpm.package_getter,
"sles": rpm.package_getter,
"opensuse": rpm.package_getter,
"opensuse-tumbleweed": rpm.package_getter,
"photon": rpm.package_getter,
"windows": windows.package_getter,
}
SUPPORTED_DISTROS = [
"alpine",
"debian",
"ubuntu",
"rhel",
"centos",
"fedora",
"sles",
"opensuse",
"opensuse-tumbleweed",
"photon",
"windows",
]


class DistroNotFound(Exception):
Expand Down Expand Up @@ -175,6 +171,14 @@ def has_hash_diff(install_file, codebase_resource):
hash_types = ["sha512", "sha256", "sha1", "md5"]

for hash_type in hash_types:
# Find a suitable hash type that is present on both install_file and
# codebase_resource, skip otherwise.
if not (
hasattr(install_file, hash_type)
and hasattr(codebase_resource, hash_type)
):
continue

install_file_sum = getattr(install_file, hash_type)
codebase_resource_sum = getattr(codebase_resource, hash_type)
hashes_differ = all(
Expand All @@ -190,6 +194,15 @@ def has_hash_diff(install_file, codebase_resource):
return False


def package_getter(root_dir, **kwargs):
"""
Returns installed package objects.
"""
packages = plugin_package.get_installed_packages(root_dir)
for package in packages:
yield package.purl, package


def scan_rootfs_for_system_packages(project, rootfs, detect_licenses=True):
"""
Given a `project` Project and a `rootfs` RootFs, scan the `rootfs` for
Expand All @@ -203,23 +216,23 @@ def scan_rootfs_for_system_packages(project, rootfs, detect_licenses=True):
raise DistroNotFound(f"Distro not found.")

distro_id = rootfs.distro.identifier
if distro_id not in PACKAGE_GETTER_BY_DISTRO:
if distro_id not in SUPPORTED_DISTROS:
raise DistroNotSupported(f'Distro "{distro_id}" is not supported.')

package_getter = partial(
PACKAGE_GETTER_BY_DISTRO[distro_id],
distro=distro_id,
detect_licenses=detect_licenses,
)
logger.info(f"rootfs location: {rootfs.location}")

installed_packages = rootfs.get_installed_packages(package_getter)

for i, (purl, package) in enumerate(installed_packages):
logger.info(f"Creating package #{i}: {purl}")
created_package = pipes.update_or_create_package(project, package.to_dict())

installed_files = []
if hasattr(package, "resources"):
installed_files = package.resources

# We have no files for this installed package, we cannot go further.
if not package.installed_files:
if not installed_files:
logger.info(f" No installed_files for: {purl}")
continue

Expand All @@ -228,7 +241,7 @@ def scan_rootfs_for_system_packages(project, rootfs, detect_licenses=True):

codebase_resources = project.codebaseresources.all()

for install_file in package.installed_files:
for install_file in installed_files:
rootfs_path = pipes.normalize_path(install_file.path)
logger.info(f" installed file rootfs_path: {rootfs_path}")

Expand Down
32 changes: 0 additions & 32 deletions scanpipe/pipes/rpm.py

This file was deleted.

Loading