Skip to content

Commit

Permalink
Merge pull request #187 from cznethub/main
Browse files Browse the repository at this point in the history
main -> develop 1.3.0
  • Loading branch information
sblack-usu authored Aug 23, 2023
2 parents ae97816 + f2a4509 commit 8764b84
Show file tree
Hide file tree
Showing 9 changed files with 191 additions and 20 deletions.
1 change: 1 addition & 0 deletions dspback/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class Settings(BaseSettings):
earthchem_file_delete_url: HttpUrl
earthchem_file_read_url: HttpUrl
earthchem_view_url: HttpUrl
earthchem_public_view_url: HttpUrl
earthchem_health_url: HttpUrl

mongo_username: str
Expand Down
5 changes: 2 additions & 3 deletions dspback/pydantic_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class Submission(Document):
identifier: str = None
submitted: datetime = datetime.utcnow()
url: HttpUrl = None
metadata_json: str = {}
metadata_json: str = "{}"

@validator('authors', pre=True, allow_reuse=True)
def extract_author_names(cls, values):
Expand Down Expand Up @@ -264,8 +264,7 @@ class License(BaseModel):

def to_submission(self, identifier) -> Submission:
settings = get_settings()
view_url = settings.earthchem_view_url
view_url = view_url % identifier
view_url = settings.earthchem_public_view_url % identifier
authors = [contributor.name for contributor in self.contributors]
authors.insert(0, self.leadAuthor.name)
return Submission(
Expand Down
2 changes: 1 addition & 1 deletion dspback/routers/earthchem.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ async def _retrieve_metadata_from_repository(self, request: Request, identifier)
json_metadata["leadAuthor"] = lead_author
json_metadata["contributors"] = all_contributors

return self.wrap_metadata(json_metadata, "status" in json_metadata and json_metadata["status"] != "incomplete")
return self.wrap_metadata(json_metadata, "status" in json_metadata and json_metadata["status"] == "published")

@router.get(
'/metadata/earthchem/{identifier}',
Expand Down
13 changes: 2 additions & 11 deletions dspback/schemas/earthchem/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@
"options": { "hidden": true },
"enum": [
"incomplete",
"submitted"
"submitted",
"published"
]
},
"additionalTypes": {
Expand Down Expand Up @@ -198,7 +199,6 @@
"url": {
"type": "string",
"default": "https://ror.org/021nxhr62",
"const": "https://ror.org/021nxhr62",
"options": { "hidden": true }
}
}
Expand All @@ -225,7 +225,6 @@
"url": {
"type": "string",
"default": "https://ror.org/052csg198",
"const": "https://ror.org/052csg198",
"options": { "hidden": true }
}
}
Expand All @@ -252,7 +251,6 @@
"url": {
"type": "string",
"default": "https://ror.org/01bj3aw27",
"const": "https://ror.org/01bj3aw27",
"options": { "hidden": true }
}
}
Expand All @@ -279,7 +277,6 @@
"url": {
"type": "string",
"default": "https://ror.org/027ka1x80",
"const": "https://ror.org/027ka1x80",
"options": { "hidden": true }
}
}
Expand All @@ -306,7 +303,6 @@
"url": {
"type": "string",
"default": "https://ror.org/0472cxd90",
"const": "https://ror.org/0472cxd90",
"options": { "hidden": true }
}
}
Expand All @@ -333,7 +329,6 @@
"url": {
"type": "string",
"default": "https://ror.org/018mejw64",
"const": "https://ror.org/018mejw64",
"options": { "hidden": true }
}
}
Expand All @@ -360,7 +355,6 @@
"url": {
"type": "string",
"default": "https://ror.org/01h0zpd94",
"const": "https://ror.org/01h0zpd94",
"options": { "hidden": true }
}
}
Expand All @@ -387,7 +381,6 @@
"url": {
"type": "string",
"default": "https://ror.org/05mmh0f86",
"const": "https://ror.org/05mmh0f86",
"options": { "hidden": true }
}
}
Expand All @@ -414,7 +407,6 @@
"url": {
"type": "string",
"default": "https://ror.org/03y2gwe85",
"const": "https://ror.org/03y2gwe85",
"options": { "hidden": true }
}
}
Expand All @@ -441,7 +433,6 @@
"url": {
"type": "string",
"default": "https://ror.org/02b5d8509",
"const": "https://ror.org/02b5d8509",
"options": { "hidden": true }
}
}
Expand Down
13 changes: 11 additions & 2 deletions dspback/utils/jsonld/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,16 @@ def format_fields(json_ld):
json_ld["license"] = {"text": json_ld["license"]}

if "author" in json_ld:
for author_role in [author_list['author'] for author_list in json_ld['author']['@list']]:
json_ld["creator"] = {'@list': author_role}
author_roles = [author_list for author_list in json_ld['author']['@list']]
author_list = []
for author_role in author_roles:
if author_role:
author_list = author_list + author_role["author"]

json_ld["creator"] = {'@list': author_list}

if "@context" in json_ld:
if not isinstance(json_ld["@context"], str):
json_ld["@context"] = json_ld["@context"]["@vocab"]

return json_ld
38 changes: 38 additions & 0 deletions management/refresh_submission_url_earthchem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import asyncio
from dspback.pydantic_schemas import RepositoryType

import motor
from beanie import init_beanie

from dspback.config import get_settings
from dspback.pydantic_schemas import Submission

'''
This python script updates the ECL submission urls.
Example call:
docker exec dspback python management/refresh_submission_url_earthchem.py
'''

async def initiaize_beanie():
db = motor.motor_asyncio.AsyncIOMotorClient(get_settings().mongo_url)
await init_beanie(
database=db[get_settings().mongo_database], document_models=[Submission]
)

async def main():
await initiaize_beanie()

count = 0
for submission in await Submission.find(Submission.repo_type == RepositoryType.EARTHCHEM).to_list():
print(f"updating {submission.url}")
submission.url = get_settings().earthchem_public_view_url % submission.identifier
await submission.save()
print(f"to {submission.url}")
count = count + 1
print(f"total submission updated {count}")


if __name__ == "__main__":
asyncio.run(main())
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ cryptography==36.0.2
dnspython==2.2.1
ecdsa==0.17.0
email-validator==1.1.3
fastapi
fastapi==0.89.0
fastapi-restful==0.4.3
greenlet==1.1.2
h11==0.12.0
Expand Down Expand Up @@ -51,7 +51,7 @@ rsa==4.8
six==1.16.0
sniffio==1.2.0
soupsieve==2.3.1
starlette
starlette==0.22.0
tomli==2.0.1
typing_extensions==4.3.0
urllib3==1.26.9
Expand Down
133 changes: 133 additions & 0 deletions tests/test_jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

from dspback.pydantic_schemas import RepositoryType, Submission
from dspback.scheduler import retrieve_submission_json_ld
from dspback.schemas.discovery import JSONLD
from dspback.utils.jsonld.clusters import clusters
from dspback.utils.jsonld.scraper import format_fields

ids_and_cluster = [
("2012073", "Bedrock Cluster"),
Expand Down Expand Up @@ -135,3 +137,134 @@ class MockSubmission(BaseModel):
public_jsonld = await retrieve_submission_json_ld(submission.dict())
assert len(public_jsonld["clusters"]) == 1
assert public_jsonld["clusters"][0] == "Drylands Cluster"


@pytest.mark.asyncio
async def test_earthchem_jsonld():
metadata_json = {
"@context": {"@vocab": "https://schema.org/", "datacite": "http://purl.org/spar/datacite/"},
"@id": "https://doi.org/10.1594/IEDA/100243",
"@type": "Dataset",
"name": "Susquehanna Shale Hills Critical Zone Observatory Stream Water Chemistry (2010)",
"sameAs": "https://ecl.earthchem.org/view.php?id=523",
"isAccessibleForFree": True,
"citation": ["https://doi.org/10.2136/vzj2010.0133"],
"author": {
"@list": [
{
"@type": "Role",
"author": [
{"@type": "Person", "name": "Susan L. Brantley", "givenName": "Susan", "familyName": "Brantley"}
],
"roleName": "Lead Author",
},
{
"@type": "Role",
"author": [
{
"@type": "Person",
"name": "Pamela L. Sullivan",
"givenName": "Pamela",
"familyName": "Sullivan",
},
{
"@type": "Person",
"name": "Danielle Andrews",
"givenName": "Danielle",
"familyName": "Andrews",
},
{"@type": "Person", "name": "George Holmes", "givenName": "George", "familyName": "Holmes"},
{"@type": "Person", "name": "Molly Holleran", "givenName": "Molly", "familyName": "Holleran"},
{
"@type": "Person",
"name": "Jennifer Z. Williams",
"givenName": "Jennifer",
"familyName": "Williams",
},
{
"@type": "Person",
"name": "Elizabeth Herndon",
"givenName": "Elizabeth",
"familyName": "Herndon",
},
{"@type": "Person", "name": "Maya Bhatt", "givenName": "Maya", "familyName": "Bhatt"},
{
"@type": "Person",
"name": "Ekaterina Bazilevskaya",
"givenName": "Ekaterina",
"familyName": "Bazilevskaya",
},
{
"@type": "Person",
"name": "Tiffany Yesavage",
"givenName": "Tiffany",
"familyName": "Yesavage",
},
{"@type": "Person", "name": "Evan Thomas", "givenName": "Evan", "familyName": "Thomas"},
{"@type": "Person", "name": "Chris J. Duffy", "givenName": "Chris", "familyName": "Duffy"},
],
"roleName": "Coauthor",
},
]
},
"description": "Stream water chemistry at Susquehanna Shale Hills Critical Zone Observatory in 2010. Weekly to monthly grab samples were collected at three locations along the first order Stream: at the Headwater (SH), Middle (SM) and adjacent to the Weir (SW). Daily stream water sample were also collected adjacent to the weir from using automatic samplers (2700 series, Teledyne Isco, Lincoln, NE) and were referenced as SW-ISCO. ",
"distribution": {
"datePublished": "2013-02-05 00:00:00",
"contentUrl": "https://ecl.earthchem.org/view.php?id=523",
"@type": "DataDownload",
"encodingFormat": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
},
"license": "https://spdx.org/licenses/CC-BY-SA-4.0",
"dateCreated": "2013-02-04",
"inLanguage": "English",
"keywords": [
"Susquehanna Shale Hills",
"Pennsylvania",
"Regional (Continents, Oceans)",
"Stream water",
"geochemistry",
"DOC",
"trace elements",
"major ions",
],
"publisher": {
"contactPoint": {
"@type": "ContactPoint",
"name": "Information Desk",
"contactType": "Customer Service",
"email": "[email protected]",
"url": "https://www.earthchem.org/contact/",
},
"@type": "Organization",
"name": "EarthChem Library",
"@id": "https://www.earthchem.org",
"url": "https://www.earthchem.org/library",
},
"provider": {"@type": "Organization", "name": "EarthChem Library"},
"spatialCoverage": {
"@type": "Place",
"geo": [
{"@type": "GeoCoordinates", "latitude": "40.6644474", "longitude": "-77.9056298"},
{"@type": "GeoCoordinates", "latitude": "40.6647643", "longitude": "-77.9040381"},
{"@type": "GeoCoordinates", "latitude": "40.664841", "longitude": "-77.9072532"},
{"@type": "GeoCoordinates", "latitude": "40.6648488", "longitude": "-77.9072458"},
],
},
"url": "https://doi.org/10.1594/IEDA/100243",
"funder": {
"@type": "MonetaryGrant",
"fundedItem": {"@id": "https://doi.org/10.1594/IEDA/100243"},
"funder": [
{
"@type": "Organization",
"name": "National Science Foundation",
"url": "http://www.nsf.gov/awardsearch/showAward.do?AwardNumber=0725019",
}
],
},
}

scraped_jsonld = format_fields(metadata_json)
jsonld = JSONLD(**scraped_jsonld)
assert jsonld.provider.name == "EarthChem Library"
assert jsonld.context == "https://schema.org/"
2 changes: 1 addition & 1 deletion tests/test_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,4 @@ async def test_earthchem_to_submission(earthchem):
assert earthchem_submission.repo_type == RepositoryType.EARTHCHEM
assert earthchem_submission.submitted <= datetime.utcnow()
assert earthchem_submission.identifier == "947940"
assert earthchem_submission.url == get_settings().earthchem_view_url % "947940"
assert earthchem_submission.url == get_settings().earthchem_public_view_url % "947940"

0 comments on commit 8764b84

Please sign in to comment.