diff --git a/dspback/api.py b/dspback/api.py index 0f97049..3b8d775 100644 --- a/dspback/api.py +++ b/dspback/api.py @@ -1,11 +1,13 @@ import motor from beanie import init_beanie -from fastapi import FastAPI +from fastapi import FastAPI, status from fastapi.openapi.utils import get_openapi from fastapi.staticfiles import StaticFiles from starlette.middleware.cors import CORSMiddleware from starlette.middleware.sessions import SessionMiddleware from starlette.responses import PlainTextResponse +from pydantic import ValidationError + from dspback.config import get_settings from dspback.dependencies import RepositoryException @@ -50,6 +52,11 @@ async def http_exception_handler(request, exc): return PlainTextResponse(f"Repository exception response[{str(exc.detail)}]", status_code=exc.status_code) +@app.exception_handler(ValidationError) +async def validation_exception_handler(request, exc: ValidationError): + return PlainTextResponse(f"Request data validation errors: {str(exc)}", + status_code=status.HTTP_400_BAD_REQUEST) + @app.on_event("startup") async def startup_db_client(): diff --git a/dspback/dependencies.py b/dspback/dependencies.py index 8063e9d..f878523 100644 --- a/dspback/dependencies.py +++ b/dspback/dependencies.py @@ -10,7 +10,6 @@ from jose import JWTError, jwt from pydantic import BaseModel from starlette import status -from starlette.status import HTTP_403_FORBIDDEN from dspback.config import Settings, get_settings, oauth from dspback.database.procedures import delete_repository_access_token @@ -74,7 +73,7 @@ async def __call__(self, request: Request, access_token: Optional[str] = None) - if not authorization: if self.auto_error: - raise HTTPException(status_code=HTTP_403_FORBIDDEN, detail="Not authenticated") + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Not authenticated") else: return None return param @@ -189,13 +188,15 @@ async def get_current_repository_token( ) -> RepositoryToken: repository_token: RepositoryToken = user.repository_token(repository) if not repository_token: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"User has not authorized with {repository}") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, detail=f"User has not authorized with {repository}" + ) expiration_buffer: int = settings.access_token_expiration_buffer_seconds now = int(datetime.utcnow().timestamp()) if now > repository_token.expires_at: await delete_repository_access_token(repository, user) - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"User token for {repository} has expired") + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=f"User token for {repository} has expired") if now > repository_token.expires_at - expiration_buffer: if repository_token.refresh_token: client = getattr(oauth, repository) diff --git a/dspback/pydantic_schemas.py b/dspback/pydantic_schemas.py index 3c80690..c1cbd3f 100644 --- a/dspback/pydantic_schemas.py +++ b/dspback/pydantic_schemas.py @@ -288,6 +288,14 @@ class TemporalCoverage(BaseModel): start: datetime end: datetime + @validator('end') + def validate_end_date(cls, v, values, **kwargs): + if 'start' in values: + start = values.get('start') + if v < start: + raise ValueError('end date must be greater or equal to start date') + return v + class SpatialCoverage(BaseModel): type: Optional[str] name: Optional[str] diff --git a/dspback/schemas/external/schema.json b/dspback/schemas/external/schema.json index 104af01..351b137 100644 --- a/dspback/schemas/external/schema.json +++ b/dspback/schemas/external/schema.json @@ -24,9 +24,13 @@ "items": { "type": "string" }, - "default": ["CZNet"], + "default": [ + "CZNet" + ], "contains": { - "enum": ["CZNet"] + "enum": [ + "CZNet" + ] } }, "creators": { @@ -156,14 +160,21 @@ "description": "A datetime object containing the instant corresponding to the commencement of the time interval (ISO8601 formatted date) - YYYY-MM-DDTHH:MM.", "type": "string", "format": "date-time", - "options": { "placeholder": "YYYY-MM-DDTHH:MM" } + "options": { + "placeholder": "YYYY-MM-DDTHH:MM" + } }, "end": { "title": "End", "description": "A datetime object containing the instant corresponding to the termination of the time interval (ISO8601 formatted date) - YYYY-MM-DDTHH:MM.", "type": "string", "format": "date-time", - "options": { "placeholder": "YYYY-MM-DDTHH:MM" } + "options": { + "placeholder": "YYYY-MM-DDTHH:MM" + }, + "formatMinimum": { + "$data": "1/start" + } } }, "required": [ @@ -179,7 +190,7 @@ "title": "Point Coverage Metadata", "description": "Geographic coverage metadata for a resource or aggregation expressed as a point location", "type": "object", - "options": { + "options": { "detail": { "type": "VerticalLayout", "elements": [ @@ -190,7 +201,11 @@ { "type": "MapLayout", "options": { - "map": { "type": "point", "north": "north", "east": "east" } + "map": { + "type": "point", + "north": "north", + "east": "east" + } }, "elements": [ { @@ -271,7 +286,13 @@ { "type": "MapLayout", "options": { - "map": { "type": "box", "northlimit": "northlimit", "eastlimit": "eastlimit", "southlimit": "southlimit", "westlimit": "westlimit" } + "map": { + "type": "box", + "northlimit": "northlimit", + "eastlimit": "eastlimit", + "southlimit": "southlimit", + "westlimit": "westlimit" + } }, "elements": [ { @@ -300,7 +321,6 @@ "type": "Control", "scope": "#/properties/projection" } - ] } }, @@ -391,14 +411,18 @@ "title": "Date created", "type": "string", "format": "date-time", - "options": { "placeholder": "YYYY-MM-DDTHH:MM" }, + "options": { + "placeholder": "YYYY-MM-DDTHH:MM" + }, "description": "The date on which the resource was originally created (ISO8601 formatted date) - YYYY-MM-DDTHH:MM." }, "dateModified": { "title": "Date modified", "type": "string", "format": "date-time", - "options": { "placeholder": "YYYY-MM-DDTHH:MM" }, + "options": { + "placeholder": "YYYY-MM-DDTHH:MM" + }, "description": "The date on which the resource was last modified (ISO8601 formatted date) - YYYY-MM-DDTHH:MM." }, "datePublished": { @@ -408,21 +432,25 @@ { "title": "Year", "type": "integer", - "options": { "placeholder": "YYYY" }, + "options": { + "placeholder": "YYYY" + }, "description": "Publication year (YYYY)." }, { "title": "Date", "type": "string", "format": "date-time", - "options": { "placeholder": "YYYY-MM-DDTHH:MM" }, + "options": { + "placeholder": "YYYY-MM-DDTHH:MM" + }, "description": "ISO8601 formatted date (YYYY-MM-DDTHH:MM)." } ] }, "status": { "title": "Data collection is ongoing", - "type": "boolean", + "type": "boolean", "description": "Indicate whether data collection is ongoing for this dataset", "default": false } @@ -605,4 +633,4 @@ "provider", "datePublished" ] -} +} \ No newline at end of file diff --git a/management/submissions_report.py b/management/submissions_report.py new file mode 100644 index 0000000..4131692 --- /dev/null +++ b/management/submissions_report.py @@ -0,0 +1,72 @@ +import asyncio + +import motor +from beanie import init_beanie + +from dspback.config import get_settings +from dspback.pydantic_schemas import User, Submission +from dspback.utils.jsonld.clusters import cluster_by_id + +''' +This script generates a report for the number of discoverable submissions, funding identifiers and clusters. + +Example call: + +docker exec dspback python management/submission_report.py +''' + +async def initiaize_beanie(): + db = motor.motor_asyncio.AsyncIOMotorClient(get_settings().mongo_url) + await init_beanie( + database=db[get_settings().mongo_database], document_models=[User, Submission] + ) + return db[get_settings().mongo_database] + +async def main(): + db = await initiaize_beanie() + + submission_count_by_repository = {} + test_submission_count_by_repository = {} + submission_count_by_cluster = {} + discoverable_documents_count = 0 + table = [["id", "repository", "discoverable", "funding", "cluster"]] + for submission in await Submission.all().to_list(): + discoverable = False + if "test" == submission.title.lower() or "asdf" in submission.title: + submission_count = test_submission_count_by_repository.get(str(submission.repo_type), 0) + test_submission_count_by_repository[str(submission.repo_type)] = submission_count + 1 + else: + submission_count = submission_count_by_repository.get(str(submission.repo_type), 0) + submission_count_by_repository[str(submission.repo_type)] = submission_count + 1 + + discovery_document = await db["discovery"].find_one({"repository_identifier": submission.identifier}) + if discovery_document: + discoverable = True + discoverable_documents_count = discoverable_documents_count + 1 + for cluster in discovery_document.get("clusters", []): + cluster_count = submission_count_by_cluster.get(cluster, 0) + submission_count_by_cluster[cluster] = cluster_count + 1 + funding_identifiers = [] + clusters = [] + if "funding" in discovery_document: + for funding in discovery_document["funding"]: + funding_identifier = funding.get("identifier", None) + funding_identifiers.append(funding_identifier) + for cluster_funding_id, cluster in cluster_by_id.items(): + if cluster_funding_id in funding_identifier: + clusters.append(cluster) + submission_row = [submission.identifier, submission.repo_type.name, discoverable, funding_identifiers, clusters] + table.append(submission_row) + import csv + with open('filename.csv', "w") as f: + filewriter = csv.writer(f) + for row in table: + filewriter.writerow(row) + + + + + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/tests/test_authentication.py b/tests/test_authentication.py index 2cc718b..83863bf 100644 --- a/tests/test_authentication.py +++ b/tests/test_authentication.py @@ -13,7 +13,7 @@ async def test_submissions_not_logged_in(client_test): response = await client_test.get(url_for(client_test, "get_urls", repository="hydroshare")) assert response.json() == {"detail": "Not authenticated"} - assert response.status_code == 403 + assert response.status_code == 401 async def test_login(client_test): @@ -51,4 +51,4 @@ async def test_logout(client_test, authorize_response): logged_out_response = await client_test.get( url_for(client_test, "get_urls", repository="hydroshare"), follow_redirects=False ) - assert logged_out_response.status_code == 403 + assert logged_out_response.status_code == 401 diff --git a/tests/test_metadata_class.py b/tests/test_metadata_class.py index 8d0d6d8..57ae492 100644 --- a/tests/test_metadata_class.py +++ b/tests/test_metadata_class.py @@ -100,5 +100,5 @@ def test_create_hydroshare_record(user_cookie, hydroshare, authorize_response_hy async def test_unauthorized_hydroshare(client_test, user_cookie, hydroshare): response = await client_test.post(prefix + "/metadata/hydroshare?access_token=" + user_cookie, json=hydroshare) - assert response.status_code == 403 + assert response.status_code == 401 assert response.text == '{"detail":"User has not authorized with hydroshare"}'