Skip to content

Commit

Permalink
Merge pull request #63 from 4Catalyzer/imps
Browse files Browse the repository at this point in the history
Test fixes, improvements to list_keys and parameterization of S3Annex APIs
  • Loading branch information
matt-m-mclaughlin authored Sep 6, 2024
2 parents f1e99a7 + 0817a1a commit a3c51d9
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 25 deletions.
56 changes: 40 additions & 16 deletions flask_annex/s3.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import boto3
import flask
import mimetypes
from botocore.config import Config

from .base import AnnexBase

Expand All @@ -10,6 +11,11 @@

MISSING = object()


def is_defined(obj):
return obj is not MISSING and obj is not None


# -----------------------------------------------------------------------------


Expand All @@ -23,12 +29,14 @@ def __init__(
secret_access_key=None,
expires_in=DEFAULT_EXPIRES_IN,
max_content_length=MISSING,
config: Config | None = None,
):
self._client = boto3.client(
"s3",
region,
aws_access_key_id=access_key_id,
aws_secret_access_key=secret_access_key,
config=config,
)

self._bucket_name = bucket_name
Expand Down Expand Up @@ -57,13 +65,17 @@ def get_file(self, key, out_file):
self._client.download_fileobj(self._bucket_name, key, out_file)

def list_keys(self, prefix):
response = self._client.list_objects_v2(
Bucket=self._bucket_name,
Prefix=prefix,
paginator = self._client.get_paginator("list_objects_v2")
page_iterator = paginator.paginate(
Bucket=self._bucket_name, Prefix=prefix
)

return (
item["Key"]
for page in page_iterator
if "Contents" in page
for item in page["Contents"]
)
if "Contents" not in response:
return ()
return tuple(item["Key"] for item in response["Contents"])

def save_file(self, key, in_file):
# Get the content type from the key, rather than letting Boto try to
Expand Down Expand Up @@ -91,32 +103,39 @@ def save_file(self, key, in_file):
extra_args,
)

def send_file(self, key):
url = self._client.generate_presigned_url(
def generate_presigned_url(self, key, content_disposition=None):
content_disposition = content_disposition or "attachment"
return self._client.generate_presigned_url(
ClientMethod="get_object",
Params={
"Bucket": self._bucket_name,
"Key": key,
# We don't need to specify filename explicitly; the basename
# of the key is in the URL and is appropriate here.
"ResponseContentDisposition": "attachment",
"ResponseContentDisposition": content_disposition,
},
ExpiresIn=self._expires_in,
)

def send_file(self, key, content_disposition=None):
url = self.generate_presigned_url(key, content_disposition)
return flask.redirect(url)

def get_upload_info(self, key):
def get_upload_info(self, key, max_content_length=MISSING):
fields = {}
conditions = []

content_type = mimetypes.guess_type(key)[0]
if content_type:
fields["Content-Type"] = content_type

if self._max_content_length is not MISSING:
if is_defined(max_content_length):
max_content_length = max_content_length
elif is_defined(self._max_content_length):
max_content_length = self._max_content_length
else:
elif flask.current_app.config["MAX_CONTENT_LENGTH"] is not None:
max_content_length = flask.current_app.config["MAX_CONTENT_LENGTH"]
else:
max_content_length = None

if max_content_length is not None:
conditions.append(
("content-length-range", 0, max_content_length),
Expand All @@ -134,8 +153,13 @@ def get_upload_info(self, key):
ExpiresIn=self._expires_in,
)

url = post_info["url"]

# Coerce this to entries to ensure order remains as S3 expects.
post_data = tuple(post_info["fields"].items())

return {
"method": "POST",
"url": post_info["url"],
"post_data": tuple(post_info["fields"].items()),
"url": url,
"post_data": post_data,
}
22 changes: 17 additions & 5 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,19 @@ def file(key):
@app.route("/upload_info/<path:key>")
def upload_info(key):
try:
upload_info = annex.get_upload_info(key)
known_query_args = {
"max_content_length": flask.request.args.get(
"max_content_length", type=int
)
}
upload_info = annex.get_upload_info(
key,
**{
k: v
for k, v in known_query_args.items()
if k in flask.request.args
},
)
except NotImplementedError:
upload_info = {
"method": "PUT",
Expand Down Expand Up @@ -103,14 +115,14 @@ def test_replace_file(self, annex):
assert_key_value(annex, "foo/bar.txt", b"5\n")

def test_delete(self, annex):
assert annex.list_keys("foo/bar.txt")
assert tuple(annex.list_keys("foo/bar.txt"))
annex.delete("foo/bar.txt")
assert not annex.list_keys("foo/bar.txt")
assert not tuple(annex.list_keys("foo/bar.txt"))

def test_delete_nonexistent(self, annex):
annex.delete("@@nonexistent")

def test_delete_many(self, annex):
assert annex.list_keys("")
assert tuple(annex.list_keys(""))
annex.delete_many(("foo/bar.txt", "foo/baz.json", "foo/@@nonexistent"))
assert not annex.list_keys("")
assert not tuple(annex.list_keys(""))
39 changes: 35 additions & 4 deletions tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
try:
import boto3
import requests
from botocore.config import Config
from moto import mock_aws
except ImportError:
pytestmark = pytest.mark.skipif(True, reason="S3 support not installed")
Expand Down Expand Up @@ -81,11 +82,14 @@ def test_send_file(self, client):
# FIXME: Workaround for spulec/moto#657.
assert "application/json" in s3_response.headers["Content-Type"]

def assert_upload_info_url_method(self, upload_info):
assert upload_info["method"] == "POST"
assert upload_info["url"] == "https://flask-annex.s3.amazonaws.com/"

def test_get_upload_info(self, client):
upload_info = get_upload_info(client, "foo/qux.txt")

assert upload_info["method"] == "POST"
assert upload_info["url"] == "https://flask-annex.s3.amazonaws.com/"
self.assert_upload_info_url_method(upload_info)
assert upload_info["post_data"][0] == ["Content-Type", "text/plain"]
assert upload_info["post_data"][1] == ["key", "foo/qux.txt"]
assert upload_info["post_data"][2] == ["AWSAccessKeyId", "FOOBARKEY"]
Expand All @@ -107,18 +111,30 @@ def test_get_upload_info_max_content_length(self, app, client):
app.config["MAX_CONTENT_LENGTH"] = 100

upload_info = get_upload_info(client, "foo/qux.txt")
self.assert_upload_info_url_method(upload_info)

conditions = get_policy(upload_info)["conditions"]
self.assert_app_config_content_length_range(conditions)

def test_get_upload_info_overridden_max_content_length(self, app, client):
upload_info = get_upload_info(
client, "foo/qux.txt", query_string={"max_content_length": 500}
)
self.assert_upload_info_url_method(upload_info)

conditions = get_policy(upload_info)["conditions"]
self.assert_overridden_content_length_range(conditions)

def assert_overridden_content_length_range(self, conditions):
assert get_condition(conditions, "content-length-range") == [0, 500]

def assert_app_config_content_length_range(self, conditions):
assert get_condition(conditions, "content-length-range") == [0, 100]

def test_get_upload_info_unknown_content_type(self, client):
upload_info = get_upload_info(client, "foo/qux.@@nonexistent")

assert upload_info["method"] == "POST"
assert upload_info["url"] == "https://flask-annex.s3.amazonaws.com/"
self.assert_upload_info_url_method(upload_info)

# filter for the "key" field; there should be only one instance
key_items = list(
Expand Down Expand Up @@ -162,3 +178,18 @@ def assert_default_content_length_range(self, conditions):

def assert_app_config_content_length_range(self, conditions):
assert get_condition(conditions, "content-length-range") == [0, 1000]


class TestS3AnnexAdvancedConfig(TestS3Annex):
@pytest.fixture
def annex_base(self, bucket_name):
return Annex(
"s3", bucket_name, config=Config(use_dualstack_endpoint=True)
)

def assert_upload_info_url_method(self, upload_info):
assert upload_info["method"] == "POST"
assert (
upload_info["url"]
== "https://flask-annex.s3.dualstack.us-east-1.amazonaws.com/"
)

0 comments on commit a3c51d9

Please sign in to comment.