Skip to content

Commit

Permalink
NO-REF: Asserting manifests are uploaded during ingest (#544)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylevillegas93 authored Jan 28, 2025
1 parent c722464 commit 8222305
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 21 deletions.
2 changes: 1 addition & 1 deletion ingestJSONFiles/chicagoISAC_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -6800,7 +6800,7 @@
"Ghent and A Joint Publication of the University of Ghent and the Oriental Institute of the University of Chicago"
],
"publicationDate": "2014",
"isbn": "Pp. xix + 102; 48 figures, 136 plates, 9 tables",
"isbn": "9781614910183",
"extent": "Hardback, 24 x 34.5 cm",
"url": [
"https://isac.uchicago.edu//sites/default/files/uploads/shared/docs/Mesopotamian_Pottery.pdf"
Expand Down
10 changes: 10 additions & 0 deletions model/postgres/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ class Part:
def get_file_bucket(self) -> Optional[str]:
parsed_url = urlparse(self.url)

if 'localhost' in parsed_url.hostname:
path_parts = parsed_url.path.split('/')

return path_parts[1]

if 's3' not in parsed_url.hostname:
return None

Expand All @@ -28,6 +33,11 @@ def get_file_bucket(self) -> Optional[str]:
def get_file_key(self) -> Optional[str]:
parsed_url = urlparse(self.url)

if 'localhost' in parsed_url.hostname:
path_parts = parsed_url.path.split('/')

return '/'.join(path_parts[2:])

if 's3' not in parsed_url.hostname:
return None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ def assert_ingested_records(source_name: str) -> list[Record]:
.all()
)

assert len(records) > 1
assert len(records) >= 1

return records
17 changes: 17 additions & 0 deletions tests/functional/processes/ingest/assert_uploaded_manifests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from model import Record
from managers.s3 import S3Manager


def assert_uploaded_manifests(records: list[Record]):
s3_manager = S3Manager()
s3_manager.createS3Client()

for record in records:
parts = record.get_parts()

manifest_part = next((part for part in parts if part.file_type == 'application/webpub+json'), None)

if manifest_part and 'epubs' not in manifest_part.url:
manifest_head_response = s3_manager.s3Client.head_object(Key=manifest_part.get_file_key(), Bucket=manifest_part.get_file_bucket())

assert manifest_head_response is not None
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from processes import ChicagoISACProcess
from .assert_ingested_records import assert_ingested_records
from .assert_uploaded_manifests import assert_uploaded_manifests


def test_chigaco_isac_process():
isac_process = ChicagoISACProcess('complete', None, None, None, 5, None)

isac_process.runProcess()

assert_ingested_records(source_name='isac')
records = assert_ingested_records(source_name='isac')
assert_uploaded_manifests(records)
4 changes: 3 additions & 1 deletion tests/functional/processes/ingest/test_doab_process.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from processes import DOABProcess
from .assert_ingested_records import assert_ingested_records
from .assert_uploaded_manifests import assert_uploaded_manifests


def test_doab_process():
doab_process = DOABProcess('complete', None, None, None, 1, None)

doab_process.runProcess()

assert_ingested_records(source_name='doab')
records = assert_ingested_records(source_name='doab')
assert_uploaded_manifests(records)
5 changes: 4 additions & 1 deletion tests/functional/processes/ingest/test_gutenberg_process.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from processes import GutenbergProcess
from .assert_ingested_records import assert_ingested_records
from .assert_uploaded_manifests import assert_uploaded_manifests


def test_gutenberg_process():
gutenberg_process = GutenbergProcess('complete', None, None, None, 5, None)

gutenberg_process.runProcess()

assert_ingested_records(source_name='gutenberg')
records = assert_ingested_records(source_name='gutenberg')

assert_uploaded_manifests(records)
4 changes: 3 additions & 1 deletion tests/functional/processes/ingest/test_loc_process.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from processes import LOCProcess
from .assert_ingested_records import assert_ingested_records
from .assert_uploaded_manifests import assert_uploaded_manifests


def test_loc_process():
loc_process = LOCProcess('complete', None, None, None, 5, None)

loc_process.runProcess()

assert_ingested_records(source_name='loc')
records = assert_ingested_records(source_name='loc')
assert_uploaded_manifests(records)
16 changes: 3 additions & 13 deletions tests/functional/processes/ingest/test_met_process.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,12 @@
from processes import METProcess
from .assert_ingested_records import assert_ingested_records
from managers.s3 import S3Manager
from .assert_uploaded_manifests import assert_uploaded_manifests


def test_met_process():
met_process = METProcess('complete', None, None, None, 5, None)

met_process.runProcess()
records = assert_ingested_records(source_name='met')

s3_manager = S3Manager()
s3_manager.createS3Client()

for record in records:
parts= record.get_parts()

manifest_part = next(part for part in parts if part.file_type == 'application/webpub+json')

manifest_head_response = s3_manager.s3Client.head_object(Key=manifest_part.get_file_key(), Bucket=manifest_part.get_file_bucket())

assert manifest_head_response is not None
records = assert_ingested_records(source_name='met')
assert_uploaded_manifests(records)
6 changes: 4 additions & 2 deletions tests/functional/processes/ingest/test_muse_process.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from processes import MUSEProcess
from .assert_ingested_records import assert_ingested_records

from .assert_uploaded_manifests import assert_uploaded_manifests


def test_muse_process():
muse_process = MUSEProcess('complete', None, None, None, 5, None)

muse_process.runProcess()

assert_ingested_records(source_name='muse')
records = assert_ingested_records(source_name='muse')

assert_uploaded_manifests(records)

0 comments on commit 8222305

Please sign in to comment.