Skip to content

Commit

Permalink
Improve conda packages and dependencies parsing
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Jan 6, 2025
1 parent 93ca65c commit 5fce14d
Show file tree
Hide file tree
Showing 19 changed files with 2,932 additions and 156 deletions.
2 changes: 1 addition & 1 deletion src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@
cocoapods.PodfileLockHandler,
cocoapods.PodfileHandler,

conda.CondaYamlHandler,
conda.CondaMetaYamlHandler,
conda.CondaYamlHandler,

conan.ConanFileHandler,
conan.ConanDataHandler,
Expand Down
177 changes: 163 additions & 14 deletions src/packagedcode/conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from packagedcode import models
from packagedcode.pypi import BaseDependencyFileHandler
from dparse2.parser import parse_requirement_line

"""
Handle Conda manifests and metadata, see https://docs.conda.io/en/latest/
Expand All @@ -23,18 +24,37 @@
"""

# TODO: there are likely other package data files for Conda
# TODO: report platform


class CondaYamlHandler(BaseDependencyFileHandler):
# TODO: there are several other manifests worth adding
datasource_id = 'conda_yaml'
path_patterns = ('*conda.yaml', '*conda.yml',)
default_package_type = 'pypi'
path_patterns = ('*conda*.yaml', '*env*.yaml', '*environment*.yaml')
default_package_type = 'conda'
default_primary_language = 'Python'
description = 'Conda yaml manifest'
documentation_url = 'https://docs.conda.io/'

@classmethod
def parse(cls, location, package_only=False):
with open(location) as fi:
conda_data = saneyaml.load(fi.read())
dependencies = get_conda_yaml_dependencies(conda_data=conda_data)
name = conda_data.get('name')
extra_data = {}
channels = conda_data.get('channels')
if channels:
extra_data['channels'] = channels
if name or dependencies:
package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=name,
primary_language=cls.default_primary_language,
dependencies=dependencies,
extra_data=extra_data,
is_private=True,
)
yield models.PackageData.from_data(package_data, package_only)


class CondaMetaYamlHandler(models.DatafileHandler):
datasource_id = 'conda_meta_yaml'
Expand Down Expand Up @@ -83,9 +103,7 @@ def parse(cls, location, package_only=False):
metayaml = get_meta_yaml_data(location)
package_element = metayaml.get('package') or {}
package_name = package_element.get('name')
if not package_name:
return
version = package_element.get('version')
package_version = package_element.get('version')

# FIXME: source is source, not download
source = metayaml.get('source') or {}
Expand All @@ -99,6 +117,7 @@ def parse(cls, location, package_only=False):
vcs_url = about.get('dev_url')

dependencies = []
extra_data = {}
requirements = metayaml.get('requirements') or {}
for scope, reqs in requirements.items():
# requirements format is like:
Expand All @@ -107,33 +126,152 @@ def parse(cls, location, package_only=False):
# u'progressbar2', u'python >=3.6'])])
for req in reqs:
name, _, requirement = req.partition(" ")
purl = PackageURL(type=cls.default_package_type, name=name)
version = None
if requirement.startswith("=="):
_, version = requirement.split("==")

# requirements may have namespace, version too
# - conda-forge::numpy=1.15.4
namespace = None
if "::" in name:
namespace, name = name.split("::")

is_pinned = False
if "=" in name:
name, version = name.split("=")
is_pinned = True
requirement = f"={version}"

if name in ('pip', 'python'):
if not scope in extra_data:
extra_data[scope] = [req]
else:
extra_data[scope].append(req)
continue

purl = PackageURL(
type=cls.default_package_type,
name=name,
namespace=namespace,
version=version,
)
if "run" in scope:
is_runtime = True
is_optional = False
else:
is_runtime = False
is_optional = True

dependencies.append(
models.DependentPackage(
purl=purl.to_string(),
extracted_requirement=requirement,
scope=scope,
is_runtime=True,
is_optional=False,
is_runtime=is_runtime,
is_optional=is_optional,
is_pinned=is_pinned,
is_direct=True,
)
)

package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=package_name,
version=version,
version=package_version,
download_url=download_url,
homepage_url=homepage_url,
vcs_url=vcs_url,
description=description,
sha256=sha256,
extracted_license_statement=extracted_license_statement,
dependencies=dependencies,
extra_data=extra_data,
)
yield models.PackageData.from_data(package_data, package_only)


def get_conda_yaml_dependencies(conda_data):
"""
Return a list of DependentPackage mappins from conda and pypi
dependencies present in a `conda_data` mapping.
"""
dependencies = conda_data.get('dependencies') or []
deps = []
for dep in dependencies:
if isinstance(dep, str):
namespace = None
specs = None
is_pinned = False

if "::" in dep:
namespace, dep = dep.split("::")

req = parse_requirement_line(dep)
if req:
name = req.name
version = None

specs = str(req.specs)
if '==' in specs:
version = specs.replace('==','')
is_pinned = True
purl = PackageURL(type='pypi', name=name, version=version)
else:
if "=" in dep:
dep, version = dep.split("=")
is_pinned = True
specs = f"={version}"

purl = PackageURL(
type='conda',
namespace=namespace,
name=dep,
version=version,
)

if purl.name in ('pip', 'python'):
continue

deps.append(
models.DependentPackage(
purl=purl.to_string(),
extracted_requirement=specs,
scope='dependencies',
is_runtime=True,
is_optional=False,
is_pinned=is_pinned,
is_direct=True,
).to_dict()
)

elif isinstance(dep, dict):
for line in dep.get('pip', []):
req = parse_requirement_line(line)
if req:
name = req.name
version = None
is_pinned = False
specs = str(req.specs)
if '==' in specs:
version = specs.replace('==','')
is_pinned = True
purl = PackageURL(type='pypi', name=name, version=version)
deps.append(
models.DependentPackage(
purl=purl.to_string(),
extracted_requirement=specs,
scope='dependencies',
is_runtime=True,
is_optional=False,
is_pinned=is_pinned,
is_direct=True,
).to_dict()
)

return deps


def get_meta_yaml_data(location):
"""
Return a mapping of conda metadata loaded from a meta.yaml files. The format
Expand All @@ -158,10 +296,21 @@ def get_meta_yaml_data(location):
# Replace the variable with the value
if '{{' in line and '}}' in line:
for variable, value in variables.items():
line = line.replace('{{ ' + variable + ' }}', value)
if "|lower" in line:
line = line.replace('{{ ' + variable + '|lower' + ' }}', value.lower())
else:
line = line.replace('{{ ' + variable + ' }}', value)
yaml_lines.append(line)

return saneyaml.load('\n'.join(yaml_lines))
# Cleanup any remaining complex jinja template lines
# as the yaml load fails otherwise for unresolved jinja
cleaned_yaml_lines = [
line
for line in yaml_lines
if not "{{" in line
]

return saneyaml.load(''.join(cleaned_yaml_lines))


def get_variables(location):
Expand Down
11 changes: 10 additions & 1 deletion src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1100,7 +1100,16 @@ def is_datafile(cls, location, filetypes=tuple(), _bare_filename=False):
"""
if filetype.is_file(location) or _bare_filename:
loc = as_posixpath(location)
if any(fnmatchcase(loc, pat) for pat in cls.path_patterns):

# Some extension strings are used interchangebly
extension_aliases = {"yaml": "yml"}
path_patterns = list(cls.path_patterns)
for pattern in cls.path_patterns:
for extension, extension_alias in extension_aliases.items():
new_pattern = pattern.replace(extension, extension_alias)
path_patterns.append(new_pattern)

if any(fnmatchcase(loc, pat) for pat in path_patterns):
filetypes = filetypes or cls.filetypes
if not filetypes:
return True
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# from https://raw.githubusercontent.com/bayer-science-for-a-better-life/phc-gnn/refs/heads/master/environment_gpu.yml

name: phc-gnn

channels:
- anaconda
- pytorch
- conda-forge
- defaults

dependencies:
- pip=20.2.4
- anaconda::python=3.8.5
- anaconda::python-dateutil=2.8.1
- cudatoolkit=10.1
- magma-cuda101
- cudnn=7.6.5
- pytorch=1.7.1
- torchvision=0.8.2
- torchaudio=0.7.2
- conda-forge::numpy=1.19.2
- anaconda::scipy=1.5.2
- conda-forge::matplotlib=3.3.2
- anaconda::networkx=2.5
- anaconda::scikit-learn=0.23.2
- anaconda::notebook=6.1.4
- anaconda::jupyter_client=6.1.7
- anaconda::jupyter_core=4.6.3
- anaconda::h5py=2.10.0
- conda-forge::tqdm=4.50.0
- conda-forge::tensorboard=2.4.0
- pip:
- ogb==1.2.4
- pytest==6.2.1
- bottleneck==1.3.2
# - torch-cluster==1.5.8
# - torch-scatter==2.0.5
# - torch-sparse==0.6.8
# - torch-spline-conv==1.2.0
# - torch-geometric==1.6.1
Loading

0 comments on commit 5fce14d

Please sign in to comment.