diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index f6e7e5ff..541ff5ea 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -1,28 +1,28 @@ name: Oras Python Tests on: - pull_request: [] + pull_request: jobs: formatting: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - name: Check Spelling - uses: crate-ci/typos@7ad296c72fa8265059cc03d1eda562fbdfcd6df2 # v1.9.0 - with: - files: ./docs ./README.md + - uses: actions/checkout@v4 + - name: Check Spelling + uses: crate-ci/typos@7ad296c72fa8265059cc03d1eda562fbdfcd6df2 # v1.9.0 + with: + files: ./docs ./README.md - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.11 - - name: Lint Oras Python - run: | - python --version - python3 -m pip install pre-commit - python3 -m pip install black - make develop - make lint + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - name: Lint Oras Python + run: | + python --version + python3 -m pip install pre-commit + python3 -m pip install black + make develop + make lint test-oras-py: runs-on: ubuntu-latest @@ -30,18 +30,53 @@ jobs: registry: image: ghcr.io/oras-project/registry:latest ports: - - 5000:5000 + - 5000:5000 steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.11 - - name: Test Oras Python - env: - registry_host: localhost - registry_port: ${{ job.services.registry.ports[5000] }} - REGISTRY_STORAGE_DELETE_ENABLED: "true" - run: | - make install - make test + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - name: Make space for large files + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo apt-get remove -y firefox || true + sudo apt-get remove -y google-chrome-stable || true + sudo apt purge openjdk-* || echo "OpenJDK is not installed" + sudo apt remove --autoremove openjdk-* || echo "OpenJDK is not installed" + sudo apt purge oracle-java* || echo "Oracle Java is not installed" + sudo apt remove --autoremove adoptopenjdk-* || echo "Adopt open JDK is not installed" + sudo apt-get remove -y ant || echo "ant is not installed" + sudo rm -rf /opt/hostedtoolcache/Java_Adopt_jdk || true + sudo apt-get remove -y podman || echo "Podman is not installed" + sudo apt-get remove -y buildah || echo "Buidah is not installed" + sudo apt-get remove -y esl-erlang || echo "erlang is not installed" + sudo rm -rf /opt/google + sudo rm -rf /usr/share/az* /opt/az || true + sudo rm -rf /opt/microsoft + sudo rm -rf /opt/hostedtoolcache/Ruby + sudo apt-get remove -y swift || echo "swift is not installed" + sudo apt-get remove -y swig || echo "swig is not installed" + sudo apt-get remove -y texinfo || echo "texinfo is not installed" + sudo apt-get remove -y texlive || echo "texlive is not installed" + sudo apt-get remove -y r-base-core r-base || echo "R is not installed" + sudo rm -rf /opt/R + sudo rm -rf /usr/share/R + sudo rm -rf /opt/*.zip + sudo rm -rf /opt/*.tar.gz + sudo rm -rf /usr/share/*.zip + sudo rm -rf /usr/share/*.tar.gz + sudo rm -rf /opt/hhvm + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /opt/hostedtoolcache/node + sudo apt-get autoremove + - name: Test Oras Python + env: + registry_host: localhost + registry_port: ${{ job.services.registry.ports[5000] }} + REGISTRY_STORAGE_DELETE_ENABLED: "true" + run: | + make install + make test diff --git a/.gitignore b/.gitignore index 2eb46af4..55b5d328 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ oras.egg-info/ env __pycache__ .python-version +.venv diff --git a/CHANGELOG.md b/CHANGELOG.md index affe9f82..470cc7f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are: The versions coincide with releases on pip. Only major versions will be released as tags on Github. ## [0.0.x](https://github.com/oras-project/oras-py/tree/main) (0.0.x) + - re-enable chunked upload (0.2.1) - refactor of auth to be provided by backend modules (0.2.0) - bugfix maintain requests's verify valorization for all invocations, augment basic auth header to existing headers - Allow generating a Subject from a pre-existing Manifest (0.1.30) diff --git a/oras/defaults.py b/oras/defaults.py index 4082b0c6..a1452e24 100644 --- a/oras/defaults.py +++ b/oras/defaults.py @@ -38,6 +38,9 @@ class registry: # DefaultBlocksize default size of each slice of bytes read in each write through in gunzipand untar. default_blocksize = 32768 +# DefaultChunkSize default size of each chunk when uploading chunked blobs. +default_chunksize = 16777216 # 16MB + # what you get for a blank digest, so we don't need to save and recalculate blank_hash = "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" diff --git a/oras/provider.py b/oras/provider.py index 5049adaf..10a68fa2 100644 --- a/oras/provider.py +++ b/oras/provider.py @@ -251,12 +251,14 @@ def upload_blob( container: container_type, layer: dict, do_chunked: bool = False, + chunk_size: int = oras.defaults.default_chunksize, ) -> requests.Response: """ Prepare and upload a blob. - Sizes > 1024 are uploaded via a chunked approach (post, patch+, put) - and <= 1024 is a single post then put. + Large artifacts can be uploaded via a chunked approach (post, patch+, put) + to registries that support it. Larger chunks generally give better throughput. + Set do_chunked=True for chunked upload. :param blob: path to blob to upload :type blob: str @@ -264,6 +266,10 @@ def upload_blob( :type container: oras.container.Container or str :param layer: dict from oras.oci.NewLayer :type layer: dict + :param do_chunked: if true do chunked blob upload. This allows upload of larger oci artifacts. + :type do_chunked: bool + :param chunk_size: if true use chunked upload. + :type chunk_size: int """ blob = os.path.abspath(blob) container = self.get_container(container) @@ -274,7 +280,12 @@ def upload_blob( if not do_chunked: response = self.put_upload(blob, container, layer) else: - response = self.chunked_upload(blob, container, layer) + response = self.chunked_upload( + blob, + container, + layer, + chunk_size=chunk_size, + ) # If we have an empty layer digest and the registry didn't accept, just return dummy successful response if ( @@ -571,6 +582,7 @@ def chunked_upload( blob: str, container: oras.container.Container, layer: dict, + chunk_size: int = oras.defaults.default_chunksize, ) -> requests.Response: """ Upload via a chunked upload. @@ -581,9 +593,12 @@ def chunked_upload( :type container: oras.container.Container or str :param layer: dict from oras.oci.NewLayer :type layer: dict + :param chunk_size: chunk size in bytes + :type chunk_size: int """ # Start an upload session headers = {"Content-Type": "application/octet-stream", "Content-Length": "0"} + headers.update(self.headers) upload_url = f"{self.prefix}://{container.upload_blob_url()}" r = self.do_request(upload_url, "POST", headers=headers) @@ -596,10 +611,7 @@ def chunked_upload( # Read the blob in chunks, for each do a patch start = 0 with open(blob, "rb") as fd: - for chunk in oras.utils.read_in_chunks(fd): - if not chunk: - break - + for chunk in oras.utils.read_in_chunks(fd, chunk_size=chunk_size): end = start + len(chunk) - 1 content_range = "%s-%s" % (start, end) headers = { @@ -607,13 +619,19 @@ def chunked_upload( "Content-Length": str(len(chunk)), "Content-Type": "application/octet-stream", } + headers.update(self.headers) # Important to update with auth token if acquired # TODO call to auth here start = end + 1 self._check_200_response( - self.do_request(session_url, "PATCH", data=chunk, headers=headers) + r := self.do_request( + session_url, "PATCH", data=chunk, headers=headers + ) ) + session_url = self._get_location(r, container) + if not session_url: + raise ValueError(f"Issue retrieving session url: {r.json()}") # Finally, issue a PUT request to close blob session_url = oras.utils.append_url_params( @@ -682,6 +700,8 @@ def push( annotation_file: Optional[str] = None, manifest_annotations: Optional[dict] = None, subject: Optional[str] = None, + do_chunked: bool = False, + chunk_size: int = oras.defaults.default_chunksize, ) -> requests.Response: """ Push a set of files to a target @@ -700,6 +720,10 @@ def push( :type manifest_annotations: dict :param target: target location to push to :type target: str + :param do_chunked: if true do chunked blob upload + :type do_chunked: bool + :param chunk_size: chunk size in bytes + :type chunk_size: int :param subject: optional subject reference :type subject: oras.oci.Subject """ @@ -759,7 +783,13 @@ def push( logger.debug(f"Preparing layer {layer}") # Upload the blob layer - response = self.upload_blob(blob, container, layer) + response = self.upload_blob( + blob, + container, + layer, + do_chunked=do_chunked, + chunk_size=chunk_size, + ) self._check_200_response(response) # Do we need to cleanup a temporary targz? diff --git a/oras/tests/test_provider.py b/oras/tests/test_provider.py index 2babd5a5..f91711e8 100644 --- a/oras/tests/test_provider.py +++ b/oras/tests/test_provider.py @@ -3,6 +3,7 @@ __license__ = "Apache-2.0" import os +import subprocess from pathlib import Path import pytest @@ -13,7 +14,7 @@ import oras.provider import oras.utils -here = os.path.abspath(os.path.dirname(__file__)) +here = Path(__file__).resolve().parent @pytest.mark.with_auth(False) @@ -62,6 +63,60 @@ def test_annotated_registry_push(tmp_path, registry, credentials, target): ) +@pytest.mark.with_auth(False) +def test_chunked_push(tmp_path, registry, credentials, target): + """ + Basic tests for oras chunked push + """ + # Direct access to registry functions + client = oras.client.OrasClient(hostname=registry, insecure=True) + artifact = os.path.join(here, "artifact.txt") + + assert os.path.exists(artifact) + + res = client.push(files=[artifact], target=target, do_chunked=True) + assert res.status_code in [200, 201, 202] + + files = client.pull(target, outdir=tmp_path) + assert str(tmp_path / "artifact.txt") in files + assert oras.utils.get_file_hash(artifact) == oras.utils.get_file_hash(files[0]) + + # large file upload + base_size = oras.defaults.default_chunksize * 1024 # 16GB + tmp_chunked = here / "chunked" + try: + subprocess.run( + [ + "dd", + "if=/dev/null", + f"of={tmp_chunked}", + "bs=1", + "count=0", + f"seek={base_size}", + ], + ) + + res = client.push( + files=[tmp_chunked], + target=target, + do_chunked=True, + ) + assert res.status_code in [200, 201, 202] + + files = client.pull(target, outdir=tmp_path / "download") + download = str(tmp_path / "download/chunked") + assert download in files + assert oras.utils.get_file_hash(str(tmp_chunked)) == oras.utils.get_file_hash( + download + ) + finally: + tmp_chunked.unlink() + + # File that doesn't exist + with pytest.raises(FileNotFoundError): + res = client.push(files=[tmp_path / "none"], target=target) + + def test_parse_manifest(registry): """ Test parse manifest function. diff --git a/oras/version.py b/oras/version.py index c3a25566..5985032e 100644 --- a/oras/version.py +++ b/oras/version.py @@ -2,7 +2,7 @@ __copyright__ = "Copyright The ORAS Authors." __license__ = "Apache-2.0" -__version__ = "0.2.0" +__version__ = "0.2.1" AUTHOR = "Vanessa Sochat" EMAIL = "vsoch@users.noreply.github.com" NAME = "oras" diff --git a/scripts/test.sh b/scripts/test.sh index 47510f2b..8a5e9e89 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -1,10 +1,10 @@ #!/bin/bash -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd $DIR/../ # Ensure envars are defined - expected registry port and host -export ORAS_PORT=5000 +export ORAS_PORT=${ORAS_PORT:-5000} export ORAS_HOST=localhost export ORAS_REGISTRY=${ORAS_HOST}:${ORAS_PORT} export ORAS_USER=myuser