Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a common class to store JSON / Simple API response data #71

Merged
merged 27 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
default_language_version:
python: "3.11"

exclude: (^micropip/externals|^tests/vendored)
exclude: (^micropip/externals|^tests/vendored|^tests/test_data)
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: "v4.4.0"
Expand Down
134 changes: 134 additions & 0 deletions micropip/_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import functools
from importlib.metadata import Distribution
from pathlib import Path
from sysconfig import get_platform

from packaging.tags import Tag
from packaging.tags import sys_tags as sys_tags_orig
from packaging.utils import BuildTag, InvalidWheelFilename
from packaging.utils import parse_wheel_filename as parse_wheel_filename_orig
from packaging.version import InvalidVersion, Version


def get_dist_info(dist: Distribution) -> Path:
Expand Down Expand Up @@ -47,3 +55,129 @@ def get_files_in_distribution(dist: Distribution) -> set[Path]:
files_to_remove.update(metadata_files)

return files_to_remove


@functools.cache
def sys_tags() -> list[Tag]:
return list(sys_tags_orig())


@functools.cache
def parse_wheel_filename(
filename: str,
) -> tuple[str, Version, BuildTag, frozenset[Tag]]:
return parse_wheel_filename_orig(filename)


# TODO: Move these helper functions back to WheelInfo
def parse_version(filename: str) -> Version:
return parse_wheel_filename(filename)[1]


def parse_tags(filename: str) -> frozenset[Tag]:
return parse_wheel_filename(filename)[3]


def best_compatible_tag_index(tags: frozenset[Tag]) -> int | None:
"""Get the index of the first tag in ``packaging.tags.sys_tags()`` that a wheel has.

Since ``packaging.tags.sys_tags()`` is sorted from most specific ("best") to most
general ("worst") compatibility, this index douples as a priority rank: given two
compatible wheels, the one whose best index is closer to zero should be installed.

Parameters
----------
tags
The tags to check.

Returns
-------
The index, or ``None`` if this wheel has no compatible tags.
"""
for index, tag in enumerate(sys_tags()):
if tag in tags:
return index
return None


def is_package_compatible(filename: str) -> bool:
"""
Check if a package is compatible with the current platform.

Parameters
----------
filename
Filename of the package to check.
"""

if not filename.endswith(".whl"):
return False

if filename.endswith("py3-none-any.whl"):
return True

try:
tags = parse_tags(filename)
except (InvalidVersion, InvalidWheelFilename):
return False

return best_compatible_tag_index(tags) is not None


def check_compatible(filename: str) -> None:
"""
Check if a package is compatible with the current platform.
If not, raise an exception with a error message that explains why.
"""
compatible = is_package_compatible(filename)
if compatible:
return

# Not compatible, now we need to figure out why.

try:
tags = parse_tags(filename)
except InvalidWheelFilename:
raise ValueError(f"Wheel filename is invalid: {filename}") from None
except InvalidVersion:
raise ValueError(f"Wheel version is invalid: {filename}") from None

tag: Tag = next(iter(tags))
if "emscripten" not in tag.platform:
raise ValueError(
f"Wheel platform '{tag.platform}' is not compatible with "
f"Pyodide's platform '{get_platform()}'"
)

def platform_to_version(platform: str) -> str:
return (
platform.replace("-", "_")
.removeprefix("emscripten_")
.removesuffix("_wasm32")
.replace("_", ".")
)

wheel_emscripten_version = platform_to_version(tag.platform)
pyodide_emscripten_version = platform_to_version(get_platform())
if wheel_emscripten_version != pyodide_emscripten_version:
raise ValueError(
f"Wheel was built with Emscripten v{wheel_emscripten_version} but "
f"Pyodide was built with Emscripten v{pyodide_emscripten_version}"
)

abi_incompatible = True
from sys import version_info

version = f"{version_info.major}{version_info.minor}"
abis = ["abi3", f"cp{version}"]
for tag in tags:
if tag.abi in abis:
abi_incompatible = False
break
if abi_incompatible:
abis_string = ",".join({tag.abi for tag in tags})
raise ValueError(
f"Wheel abi '{abis_string}' is not supported. Supported abis are 'abi3' and 'cp{version}'."
Copy link
Member

@rth rth Jul 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I though we were not supporting abi3? I think there was a discussion either in the cibuildwheel or some other wheel related MR that abi3 didn't make sense for us.

If this was copied from existing code fine but maybe we should open a follow up issue.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are planning to break ABI with every Python version, then abi3 is a bit pointless. I think we can support it and just check that the interpreter version matches exactly. So it's supposed to be that cp311-abi3 works with all Pythons <= 311 but instead we check that the interpreter is exactly cp311. For tools like cibuildwheel that generate wheels, there's no reason to support abi3. But if someone uses a tool like maturin to generate an abi3 wheel we should be able to consume it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we have some packages that has abi3 abi tags in Pyodide distribution.

image

)

raise ValueError(f"Wheel interpreter version '{tag.interpreter}' is not supported.")
188 changes: 188 additions & 0 deletions micropip/package_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import sys
from collections import defaultdict
from collections.abc import Generator
from dataclasses import dataclass
from typing import Any

from packaging.utils import InvalidWheelFilename
from packaging.version import InvalidVersion, Version

from ._utils import is_package_compatible, parse_version


# TODO: Merge this class with WheelInfo
@dataclass
class ProjectInfoFile:
filename: str # Name of the file
url: str # URL to download the file
version: Version # Version of the package
sha256: str # SHA256 hash of the file

# Size of the file in bytes, if available (PEP 700)
# This key is not available in the Simple API HTML response, so this field may be None
size: int | None = None


@dataclass
class ProjectInfo:
"""
This class stores common metadata that can be obtained from different APIs (JSON, Simple)
provided by PyPI. Responses received from PyPI or other package indexes that support the
same APIs must be converted to this class before being processed by micropip.
"""

name: str # Name of the package

# List of releases available for the package, sorted in ascending order by version.
# For each version, list of wheels compatible with the current platform are stored.
# If no such wheel is available, the list is empty.
releases: dict[Version, Generator[ProjectInfoFile, None, None]]

@staticmethod
def from_json_api(data: dict[str, Any]) -> "ProjectInfo":
"""
Parse JSON API response

https://warehouse.pypa.io/api-reference/json.html
"""

name: str = data.get("info", {}).get("name", "UNKNOWN")
releases_raw: dict[str, list[Any]] = data["releases"]

# Filter out non PEP 440 compliant versions
releases: dict[Version, list[Any]] = {}
for version_str, fileinfo in releases_raw.items():
try:
version = Version(version_str)
if str(version) != version_str:
continue
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to keep this? As long as there is a version it's fine no? And str(version) has a non negligible runtime.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I think it is okay to remove it now.

That check was introduced at pyodide/pyodide#2752, but I think it cannot happen on wheel files (unless someone modifies the version manually).


except InvalidVersion:
continue

# Skip empty releases
if not fileinfo:
continue

releases[version] = fileinfo

return ProjectInfo._compatible_only(name, releases)

@staticmethod
def from_simple_api(data: dict[str, Any]) -> "ProjectInfo":
"""
Parse Simple API response

https://peps.python.org/pep-0503/
https://peps.python.org/pep-0691/
"""
name = data["name"]

# List of versions (PEP 700), this key is not critical to find packages
# but it is required to ensure that the same class instance is returned
# from JSON and Simple APIs.
versions = data.get("versions", [])

# Group files by version
releases: dict[Version, list[Any]] = defaultdict(list)

for version in versions:
if not _is_valid_pep440_version(version):
continue

releases[Version(version)] = []

for file in data["files"]:
filename = file["filename"]

if not _fast_check_incompatibility(filename):
# parsing a wheel filename is expensive, so we do a quick check first
continue

try:
version = parse_version(filename)
except (InvalidVersion, InvalidWheelFilename):
continue

releases[version].append(file)

return ProjectInfo._compatible_only(name, releases)

@staticmethod
def _compatible_only(
name: str, releases: dict[Version, list[dict[str, Any]]]
) -> "ProjectInfo":
def _compatible_wheels(
files: list[dict[str, Any]], version: Version
) -> Generator[ProjectInfoFile, None, None]:
"""
Return a generator of wheels compatible with the current platform.
Checking compatibility takes a bit of time, so we use a generator to avoid doing it if not needed.
"""
for file in files:
filename = file["filename"]

# Checking compatibility takes a bit of time,
# so we use a generator to avoid doing it for all files.
compatible = is_package_compatible(filename)
if not compatible:
continue

# JSON API has a "digests" key, while Simple API has a "hashes" key.
hashes = file["digests"] if "digests" in file else file["hashes"]

# TODO: For now we expect that the sha256 hash is always available.
# This is true for PyPI, but may not be true for other package indexes,
# since it is not a hard requirement of PEP503.
sha256 = hashes["sha256"]

yield ProjectInfoFile(
filename=filename,
url=file["url"],
version=version,
sha256=sha256,
size=file.get("size"),
)

releases_compatible = {
version: _compatible_wheels(files, version)
Copy link
Member

@rth rth Jul 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want to go faster (no need to do it, just an idea). We can probably also subclass Version to something that has a frozen __str__ and __hash__ so it doesn't need to be re-computed at each dict comparison..

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opened an issue (#73) so that we can do it as a follow-up. I also think it is would be a good issue for new contributors if there is other sprint / tutorial events.

for version, files in releases.items()
}

# Unfortunately, the JSON API seems to compare versions as strings...
# For example, pytest 3.10.0 is considered newer than 3.2.0.
# So we need to sort the releases by version again here.
releases_compatible = dict(sorted(releases_compatible.items()))

return ProjectInfo(
name=name,
releases=releases_compatible,
)


def _is_valid_pep440_version(version_str: str) -> bool:
try:
version = Version(version_str)
if str(version) != version_str:
return False

return True
except InvalidVersion:
return False


def _fast_check_incompatibility(filename: str) -> bool:
"""
This function returns True is the package is incompatible with the current platform.
It can be used to quickly filter out incompatible packages before running heavy checks.

Note that this function may return False for some packages that are actually incompatible.
So it should only be used as a quick check.
"""
if not filename.endswith(".whl"):
return False

if sys.platform not in filename and not filename.endswith("-none-any.whl"):
return False

return True
Loading