Skip to content

Commit

Permalink
Merge branch 'develop' into purls-only-v2
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Mar 18, 2024
2 parents 86e4e90 + 0c9fd4b commit 3b53f43
Show file tree
Hide file tree
Showing 88 changed files with 3,340 additions and 13,964 deletions.
11 changes: 8 additions & 3 deletions src/cluecode/copyrights.py
Original file line number Diff line number Diff line change
Expand Up @@ -2517,7 +2517,8 @@ def build_detection_from_node(
COMPANY: {<COMPANY> <MAINT>} #19603
#######################################
################################# #COPYRIGHT: {<COPY> <COPY> <MIT>} #1802
######
# VARIOUS FORMS OF COPYRIGHT
#######################################
Expand Down Expand Up @@ -2572,6 +2573,8 @@ def build_detection_from_node(
COPYRIGHT: {<COPY> <COPY> <COMP>+} #1690
COPYRIGHT: {<COPY> <COPY> <MIT>} #1802
COPYRIGHT: {<COPY> <COPY> <NN>+ <COMPANY|NAME|NAME-EMAIL>+} #1710
COPYRIGHT: {<COPY> <COPY> <NN> <NNP> <NN> <COMPANY>} #1711
Expand Down Expand Up @@ -4125,8 +4128,10 @@ def prepare_text_line(line, dedeb=True, to_ascii=True):

# normalize (possibly repeated) quotes to unique single quote '
# backticks ` and "
.replace('`', u"'")
.replace('"', u"'")
.replace('`', "'")
.replace('"', "'")
# see https://github.com/nexB/scancode-toolkit/issues/3667
.replace('§', " ")
)

if TRACE_TOK:
Expand Down
10 changes: 2 additions & 8 deletions src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from packagedcode import godeps
from packagedcode import golang
from packagedcode import haxe
from packagedcode import jar_manifest
from packagedcode import maven
from packagedcode import misc
from packagedcode import npm
Expand Down Expand Up @@ -84,6 +83,7 @@
cran.CranDescriptionFileHandler,

debian_copyright.DebianCopyrightFileInPackageHandler,
debian_copyright.StandaloneDebianCopyrightFileHandler,
debian.DebianDscFileHandler,

debian.DebianControlFileInExtractedDebHandler,
Expand Down Expand Up @@ -216,13 +216,7 @@

debian.DebianInstalledFilelistHandler,
debian.DebianInstalledMd5sumFilelistHandler,
debian.DebianInstalledStatusDatabaseHandler,
debian.DebianControlFileInSourceHandler,
debian.DebianDscFileHandler,
debian.DebianSourcePackageTarballHandler,
debian.DebianSourcePackageMetadataTarballHandler,
debian.DebianDebPackageHandler,
debian_copyright.StandaloneDebianCopyrightFileHandler
debian.DebianInstalledStatusDatabaseHandler
]

if on_linux:
Expand Down
10 changes: 1 addition & 9 deletions src/packagedcode/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def logger_debug(*args):
)


class AutotoolsConfigureHandler(models.DatafileHandler):
class AutotoolsConfigureHandler(models.NonAssemblableDatafileHandler):
datasource_id = 'autotools_configure'
path_patterns = ('*/configure', '*/configure.ac',)
default_package_type = 'autotools'
Expand Down Expand Up @@ -75,14 +75,6 @@ def parse(cls, location, package_only=False):
)
yield models.PackageData.from_data(package_data, package_only)

@classmethod
def assign_package_to_resources(cls, package, resource, codebase, package_adder):
models.DatafileHandler.assign_package_to_parent_tree(
package=package,
resource=resource,
codebase=codebase,
package_adder=package_adder,
)


def check_rule_name_ending(rule_name, starlark_rule_types=('binary', 'library')):
Expand Down
126 changes: 99 additions & 27 deletions src/packagedcode/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import os
import re

import saneyaml
Expand All @@ -20,7 +21,81 @@
"""


class CargoTomlHandler(models.DatafileHandler):
class CargoBaseHandler(models.DatafileHandler):
@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Cargo.toml and possible Cargo.lock datafiles. Also
support cargo workspaces where we have multiple packages from
a repository and some shared information present at top-level.
"""
workspace = package_data.extra_data.get("workspace", {})
workspace_members = workspace.get("members", [])
workspace_package_data = workspace.get("package", {})
attributes_to_copy = [
"license_detections",
"declared_license_expression",
"declared_license_expression_spdx"
]
if "license" in workspace_package_data:
for attribute in attributes_to_copy:
workspace_package_data[attribute] = getattr(package_data, attribute)

workspace_root_path = resource.parent(codebase).path
if workspace_package_data and workspace_members:
for workspace_member_path in workspace_members:
workspace_directory_path = os.path.join(workspace_root_path, workspace_member_path)
workspace_directory = codebase.get_resource(path=workspace_directory_path)
if not workspace_directory:
continue

# Update the package data for all members with the
# workspace package data
for resource in workspace_directory.children(codebase):
if cls.is_datafile(location=resource.location):
if not resource.package_data:
continue

updated_package_data = cls.update_resource_package_data(
package_data=workspace_package_data,
old_package_data=resource.package_data.pop(),
mapping=CARGO_ATTRIBUTE_MAPPING,
)
resource.package_data.append(updated_package_data)
resource.save(codebase)

yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
directory=workspace_directory,
codebase=codebase,
package_adder=package_adder,
)
else:
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
)

@classmethod
def update_resource_package_data(cls, package_data, old_package_data, mapping=None):

for attribute in old_package_data.keys():
if attribute in mapping:
replace_by_attribute = mapping.get(attribute)
old_package_data[attribute] = package_data.get(replace_by_attribute)
elif attribute == "parties":
old_package_data[attribute] = list(get_parties(
person_names=package_data.get("authors"),
party_role='author',
))

return old_package_data



class CargoTomlHandler(CargoBaseHandler):
datasource_id = 'cargo_toml'
path_patterns = ('*/Cargo.toml', '*/cargo.toml',)
default_package_type = 'cargo'
Expand All @@ -31,11 +106,16 @@ class CargoTomlHandler(models.DatafileHandler):
@classmethod
def parse(cls, location, package_only=False):
package_data = toml.load(location, _dict=dict)

core_package_data = package_data.get('package', {})
workspace = package_data.get('workspace', {})
extra_data = {}

name = core_package_data.get('name')
version = core_package_data.get('version')
if isinstance(version, dict) and "workspace" in version:
version = None
extra_data["version"] = "workspace"

description = core_package_data.get('description') or ''
description = description.strip()

Expand Down Expand Up @@ -66,6 +146,8 @@ def parse(cls, location, package_only=False):
repository_homepage_url = name and f'https://crates.io/crates/{name}'
repository_download_url = name and version and f'https://crates.io/api/v1/crates/{name}/{version}/download'
api_data_url = name and f'https://crates.io/api/v1/crates/{name}'
if workspace:
extra_data["workspace"] = workspace

package_data = dict(
datasource_id=cls.datasource_id,
Expand All @@ -82,23 +164,25 @@ def parse(cls, location, package_only=False):
repository_download_url=repository_download_url,
api_data_url=api_data_url,
dependencies=dependencies,
extra_data=extra_data,
)
yield models.PackageData.from_data(package_data, package_only)

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Cargo.toml and possible Cargo.lock datafiles
"""
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
)

CARGO_ATTRIBUTE_MAPPING = {
# Fields in PackageData model: Fields in cargo
"homepage_url": "homepage",
"vcs_url": "repository",
"keywords": "categories",
"extracted_license_statement": "license",
# These are fields carried over to avoid re-detection of licenses
"license_detections": "license_detections",
"declared_license_expression": "declared_license_expression",
"declared_license_expression_spdx": "declared_license_expression_spdx",
}


class CargoLockHandler(models.DatafileHandler):
class CargoLockHandler(CargoBaseHandler):
datasource_id = 'cargo_lock'
path_patterns = ('*/Cargo.lock', '*/cargo.lock',)
default_package_type = 'cargo'
Expand Down Expand Up @@ -146,18 +230,6 @@ def parse(cls, location, package_only=False):
)
yield models.PackageData.from_data(package_data, package_only)

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Cargo.toml and possible Cargo.lock datafiles
"""
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'Cargo.lock',),
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
)


def dependency_mapper(dependencies, scope='dependencies'):
"""
Expand Down Expand Up @@ -199,7 +271,7 @@ def get_parties(person_names, party_role):
name=name,
role=party_role,
email=email,
)
).to_dict()


person_parser = re.compile(
Expand Down
31 changes: 28 additions & 3 deletions src/packagedcode/debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,10 @@ def parse_debian_files_list(location, datasource_id, package_type):
name, _, arch = filename.partition(':')
qualifiers['arch'] = arch
else:
name = filename
name = None
# For DebianMd5sumFilelistInPackageHandler we cannot infer name
if not name == "md5sums":
name = filename

file_references = []
with open(location) as info_file:
Expand Down Expand Up @@ -650,12 +653,14 @@ def build_package_data(debian_data, datasource_id, package_type='deb', distro=No

maintainer = debian_data.get('maintainer')
if maintainer:
party = models.Party(role='maintainer', name=maintainer)
maintainer_name, maintainer_email = parse_debian_maintainers(maintainer)
party = models.Party(role='maintainer', name=maintainer_name, email=maintainer_email)
parties.append(party)

orig_maintainer = debian_data.get('original_maintainer')
if orig_maintainer:
party = models.Party(role='original_maintainer', name=orig_maintainer)
maintainer_name, maintainer_email = parse_debian_maintainers(orig_maintainer)
party = models.Party(role='maintainer', name=maintainer_name, email=maintainer_email)
parties.append(party)

keywords = []
Expand Down Expand Up @@ -716,6 +721,26 @@ def build_package_data(debian_data, datasource_id, package_type='deb', distro=No
return models.PackageData.from_data(package_data, package_only)


def parse_debian_maintainers(maintainer):
"""
Get name and email values from a debian maintainer string.
Example string:
Debian systemd Maintainers <[email protected]>
"""
email_wrappers = ["<", ">"]
has_email = "@" in maintainer and all([
True
for char in email_wrappers
if char in maintainer
])
if not has_email:
return maintainer, None

name, _, email = maintainer.rpartition("<")
return name.rstrip(" "), email.rstrip(">")


def populate_debian_namespace(packages):
"""
For an iterable of debian `packages`, populate the
Expand Down
4 changes: 2 additions & 2 deletions src/packagedcode/gemfile_lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,8 @@ def get_option(s):
'%(NAME_VERSION)s'
'$' % locals()).match

PLATS = re.compile('^ (?P<platform>.*)$').match
BUNDLED_WITH = re.compile('^\s+(?P<version>(?:\d+.)+\d+)\s*$').match
PLATS = re.compile(r'^ (?P<platform>.*)$').match
BUNDLED_WITH = re.compile(r'^\s+(?P<version>(?:\d+.)+\d+)\s*$').match


class GemfileLockParser:
Expand Down
8 changes: 7 additions & 1 deletion src/packagedcode/licensing.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,12 @@ def get_normalized_license_detections(
if detections:
license_detections.extend(detections)

if not license_detections:
unknown_dict_object = repr(dict(extracted_license.items()))
unknown_detection = get_unknown_license_detection(query_string=unknown_dict_object)
license_detections.append(unknown_detection)
if TRACE:
logger_debug(f'get_normalized_license_detections: dict: unknown_dict_object: {unknown_dict_object}, unknown_detection: {saneyaml.dump(unknown_detection.to_dict())}')
else:
extracted_license_statement = saneyaml.dump(extracted_license)
license_detections = get_license_detections_for_extracted_license_statement(
Expand Down Expand Up @@ -753,7 +759,6 @@ def get_normalized_license_detections(

else:
extracted_license_statement = saneyaml.dump(extracted_license_item)

detections = get_license_detections_for_extracted_license_statement(
extracted_license_statement=extracted_license_statement,
try_as_expression=try_as_expression,
Expand Down Expand Up @@ -819,6 +824,7 @@ def get_license_detections_and_expression(
if not license_detections:
if not isinstance(extracted_license_statement, str):
extracted_license_statement = saneyaml.dump(extracted_license_statement)

license_detection = get_unknown_license_detection(query_string=extracted_license_statement)
license_detections = [license_detection]

Expand Down
11 changes: 8 additions & 3 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,9 +759,14 @@ def normalize_extracted_license_statement(self):
self.extracted_license_statement and
not isinstance(self.extracted_license_statement, str)
):
self.extracted_license_statement = saneyaml.dump(
self.extracted_license_statement
)
if isinstance(self.extracted_license_statement, dict):
self.extracted_license_statement = saneyaml.dump(
dict(self.extracted_license_statement.items())
)
else:
self.extracted_license_statement = saneyaml.dump(
self.extracted_license_statement
)

def populate_holder_field(self):
if not self.copyright:
Expand Down
2 changes: 2 additions & 0 deletions src/packagedcode/plugin_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,8 @@ def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=Fals
for dfp in item.datafile_paths
]
packages.append(item)
if TRACE:
logger_debug(' get_package_and_deps: Package:', item.purl)

elif isinstance(item, Dependency):
if strip_root and not has_single_resource:
Expand Down
Loading

0 comments on commit 3b53f43

Please sign in to comment.