Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SFR-2481/save_test_data #543

Merged
merged 8 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ jobs:
AWS_ACCESS: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
ENVIRONMENT: qa
ELASTICSEARCH_HOST: ${{ secrets.ELASTICSEARCH_HOST }}
ELASTICSEARCH_INDEX: ${{ secrets.ELASTICSEARCH_INDEX }}
ELASTICSEARCH_PORT: ${{ secrets.ELASTICSEARCH_PORT }}
ELASTICSEARCH_SCHEME: ${{ secrets.ELASTICSEARCH_SCHEME }}
REDIS_HOST: ${{ secrets.REDIS_HOST }}
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.9
Expand All @@ -39,4 +44,4 @@ jobs:
pip install -r requirements.txt
- name: Run API tests
run: |
pytest tests/integration/api
pytest tests/integration/api --env=qa
6 changes: 4 additions & 2 deletions api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class APIUtils():
'nypl': 11
}

DEFAULT_PRIORITY = 100

@staticmethod
def normalizeQueryParams(params):
paramDict = params.to_dict(flat=False)
Expand Down Expand Up @@ -374,13 +376,13 @@ def formatEdition(
for itemDict in editionDict['items']:
if itemDict['links'] == []:
editionDict['items']\
.sort(key=lambda x: (cls.SOURCE_PRIORITY[x['source']], x['links'] == []))
.sort(key=lambda x: (cls.SOURCE_PRIORITY.get(x['source'], cls.DEFAULT_PRIORITY), x['links'] == []))
emptyListFlag = True
break

if emptyListFlag == False:
editionDict['items']\
.sort(key=lambda x: (cls.SOURCE_PRIORITY[x['source']], cls.sortByMediaType(x['links'][0])))
.sort(key=lambda x: (cls.SOURCE_PRIORITY.get(x['source'], cls.DEFAULT_PRIORITY), cls.sortByMediaType(x['links'][0])))

if records is not None:
itemsByLink = {}
Expand Down
3 changes: 2 additions & 1 deletion config/qa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ENVIRONMENT: qa
LOG_LEVEL: info

# POSTGRES CONNECTION DETAILS
# POSTGRES_USER, POSTGRES_PSWD, POSTGRES_ADMIN_USER and POSTGRES_ADMIN_PSWD must be configured in secrets file
# POSTGRES_USER, POSTGRES_PSWD, POSTGRES_ADMIN_USER and POSTGRES_ADMIN_PSWD must be configured in secrets file
POSTGRES_HOST: sfr-new-metadata-production-cluster.cluster-cvy7z512hcjg.us-east-1.rds.amazonaws.com
POSTGRES_NAME: dcdw_qa
POSTGRES_PORT: '5432'
Expand Down Expand Up @@ -53,6 +53,7 @@ NYPL_API_CLIENT_TOKEN_URL: https://isso.nypl.org/oauth/token
# DRB API Credentials
DRB_API_HOST: 'drb-api-qa.nypl.org'
DRB_API_PORT: '80'
DRB_API_URL: https://drb-api-qa.nypl.org

# Bardo CCE API URL
BARDO_CCE_API: http://sfr-c-ecsal-14v3injrieoy5-258691445.us-east-1.elb.amazonaws.com/search/
Expand Down
1 change: 0 additions & 1 deletion processes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from .cluster import ClusterProcess
from .local_development.local_development_setup import LocalDevelopmentSetupProcess
from .local_development.seed_local_data import SeedLocalDataProcess
from .local_development.seed_test_data import SeedTestDataProcess
from .file.s3_files import S3Process
from .api import APIProcess
from .ingest.muse import MUSEProcess
Expand Down
70 changes: 0 additions & 70 deletions processes/local_development/seed_test_data.py

This file was deleted.

80 changes: 79 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
import os
import pytest

from datetime import datetime, timezone
import json
from uuid import uuid4
from processes import ClusterProcess
from model import Record, Item
from logger import create_log
from managers import DBManager
from load_env import load_env_file

logger = create_log(__name__)

TEST_SOURCE = 'test_source'

def pytest_addoption(parser):
parser.addoption('--env', action='store', default='local', help='Environment to use for tests')
Expand All @@ -16,3 +25,72 @@ def setup_env(pytestconfig, request):

if not running_unit_tests and environment in ['local', 'qa']:
load_env_file(environment, file_string=f'config/{environment}.yaml')

@pytest.fixture(scope='module')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question on the module scope. does this run during the unit tests?

we just want to run it during functional and integration tests

def db_manager():
db_manager = DBManager()
db_manager.createSession()
yield db_manager
db_manager.close_connection()
ayan1229 marked this conversation as resolved.
Show resolved Hide resolved

@pytest.fixture(scope='module')
def seed_test_data(db_manager):
flags = { 'catalog': False, 'download': False, 'reader': False, 'embed': True }
test_data = {
'title': 'test data 1',
'uuid': uuid4(),
'frbr_status': 'complete',
'cluster_status': False,
"source": TEST_SOURCE,
'authors': ['Ayan||true'],
'languages': ['Serbian'],
'dates': ['1907-|publication_date'],
'publisher': ['Project Gutenberg Literary Archive Foundation||'],
'identifiers': [],
'source_id': '4064148285|test',
'contributors': ['Metropolitan Museum of Art (New York, N.Y.)|||contributor','Metropolitan Museum of Art (New York, N.Y.)|||repository','Thomas J. Watson Library|||provider'],
'extent': ('11, 164 p. ;'),
'is_part_of': ['Tauchnitz edition|Vol. 4560|volume'],
'abstract': ['test abstract 1', 'test abstract 2'],
'subjects': ['test subjects 1||'],
'rights': ('hathitrust|public_domain|expiration of copyright term for non-US work with corporate author|Public Domain|2021-10-02 05:25:13'),
'has_part': [f'1|example.com/1.pdf|{TEST_SOURCE}|text/html|{json.dumps(flags)}']
}

existing_record = db_manager.session.query(Record).filter_by(source_id=test_data['source_id']).first()

if existing_record:
for key, value in test_data.items():
if key != 'uuid' and hasattr(existing_record, key):
setattr(existing_record, key, value)
existing_record.date_modified = datetime.now(timezone.utc).replace(tzinfo=None)
test_data['uuid'] = existing_record.uuid
test_record = existing_record
else:
test_record = Record(**test_data)
db_manager.session.add(test_record)

db_manager.session.commit()

ayan1229 marked this conversation as resolved.
Show resolved Hide resolved
cluster_process = ClusterProcess('complete', None, None, str(test_data['uuid']), None)
cluster_process.runProcess()

item = db_manager.session.query(Item).filter_by(record_id=test_record.id).first()
edition_id = str(item.edition_id) if item else None

return {
'edition_id': edition_id,
'uuid': str(test_data['uuid'])
}

@pytest.fixture(scope='module')
def seeded_edition_id(request, seed_test_data):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You may want to double check but I think because we have seed_test_data here it'll automatically make sure seed_test_data is resolved. Thus, checking the request may not make sense.

if 'functional' in request.keywords or 'integration' in request.keywords:
return seed_test_data['edition_id']
return None

@pytest.fixture(scope='module')
def seeded_uuid(request, seed_test_data):
if 'functional' in request.keywords or 'integration' in request.keywords:
return seed_test_data['uuid']
return None
10 changes: 6 additions & 4 deletions tests/integration/api/test_get_edition.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import pytest
import os
import requests
from .constants import API_URL
from .utils import assert_response_status


@pytest.mark.integration
@pytest.mark.parametrize("endpoint, expected_status", [
("/editions/1982731", 200),
("/editions/{seeded_edition_id}", 200),
("/editions/00000000-0000-0000-0000-000000000000", 400),
("/editions/invalid_id_format", 400),
("/editions/", 404),
("/editions/%$@!*", 400)
])
def test_get_edition(endpoint, expected_status):
url = API_URL + endpoint
def test_get_edition(endpoint, expected_status, seeded_edition_id):
url = os.getenv("DRB_API_URL") + endpoint.format(seeded_edition_id=seeded_edition_id)
response = requests.get(url)

assert response.status_code is not None
Expand Down
Loading