Skip to content

Commit

Permalink
[#188] - reformat scraped ecl jsonld to match funding identifiers to …
Browse files Browse the repository at this point in the history
…clusters
  • Loading branch information
sblack-usu committed Aug 28, 2023
1 parent 8764b84 commit d4677c9
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 125 deletions.
2 changes: 2 additions & 0 deletions dspback/utils/jsonld/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def clusters(json_ld):
for funding in json_ld["funding"]:
if "identifier" in funding:
resource_funding_ids.append(funding["identifier"])
elif "url" in funding:
resource_funding_ids.append(funding["url"])

clusters = []
for cluster_funding_id, cluster in cluster_by_id.items():
Expand Down
4 changes: 4 additions & 0 deletions dspback/utils/jsonld/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,8 @@ def format_fields(json_ld):
if not isinstance(json_ld["@context"], str):
json_ld["@context"] = json_ld["@context"]["@vocab"]

if "funder" in json_ld and "funder" in json_ld["funder"]:
json_ld["funding"] = json_ld["funder"]["funder"]
del json_ld["funder"]

return json_ld
6 changes: 6 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,9 @@ async def external(change_test_dir):
async def earthchem(change_test_dir):
with open("data/earthchem.json", "r") as f:
return json.loads(f.read())


@pytest.fixture
async def earthchem_jsonld(change_test_dir):
with open("data/earthchem_jsonld.json", "r") as f:
return json.loads(f.read())
143 changes: 143 additions & 0 deletions tests/data/earthchem_jsonld.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
{
"@context": {
"@vocab": "https://schema.org/",
"datacite": "http://purl.org/spar/datacite/"
},
"@id": "https://doi.org/10.1594/IEDA/100243",
"@type": "Dataset",
"name": "Susquehanna Shale Hills Critical Zone Observatory Stream Water Chemistry (2010)",
"sameAs": "https://ecl.earthchem.org/view.php?id=523",
"isAccessibleForFree": true,
"citation": ["https://doi.org/10.2136/vzj2010.0133"],
"author": {
"@list": [{
"@type": "Role",
"author": [{
"@type": "Person",
"name": "Susan L. Brantley",
"givenName": "Susan",
"familyName": "Brantley"
}],
"roleName": "Lead Author"
}, {
"@type": "Role",
"author": [{
"@type": "Person",
"name": "Pamela L. Sullivan",
"givenName": "Pamela",
"familyName": "Sullivan"
}, {
"@type": "Person",
"name": "Danielle Andrews",
"givenName": "Danielle",
"familyName": "Andrews"
}, {
"@type": "Person",
"name": "George Holmes",
"givenName": "George",
"familyName": "Holmes"
}, {
"@type": "Person",
"name": "Molly Holleran",
"givenName": "Molly",
"familyName": "Holleran"
}, {
"@type": "Person",
"name": "Jennifer Z. Williams",
"givenName": "Jennifer",
"familyName": "Williams"
}, {
"@type": "Person",
"name": "Elizabeth Herndon",
"givenName": "Elizabeth",
"familyName": "Herndon"
}, {
"@type": "Person",
"name": "Maya Bhatt",
"givenName": "Maya",
"familyName": "Bhatt"
}, {
"@type": "Person",
"name": "Ekaterina Bazilevskaya",
"givenName": "Ekaterina",
"familyName": "Bazilevskaya"
}, {
"@type": "Person",
"name": "Tiffany Yesavage",
"givenName": "Tiffany",
"familyName": "Yesavage"
}, {
"@type": "Person",
"name": "Evan Thomas",
"givenName": "Evan",
"familyName": "Thomas"
}, {
"@type": "Person",
"name": "Chris J. Duffy",
"givenName": "Chris",
"familyName": "Duffy"
}],
"roleName": "Coauthor"
}]
},
"description": "Stream water chemistry at Susquehanna Shale Hills Critical Zone Observatory in 2010. Weekly to monthly grab samples were collected at three locations along the first order Stream: at the Headwater (SH), Middle (SM) and adjacent to the Weir (SW). Daily stream water sample were also collected adjacent to the weir from using automatic samplers (2700 series, Teledyne Isco, Lincoln, NE) and were referenced as SW-ISCO. ",
"distribution": {
"datePublished": "2013-02-05 00:00:00",
"contentUrl": "https://ecl.earthchem.org/view.php?id=523",
"@type": "DataDownload",
"encodingFormat": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
},
"license": "https://spdx.org/licenses/CC-BY-SA-4.0",
"dateCreated": "2013-02-04",
"inLanguage": "English",
"keywords": ["Susquehanna Shale Hills", "Pennsylvania", "Regional (Continents, Oceans)", "Stream water", "geochemistry", "DOC", "trace elements", "major ions"],
"publisher": {
"contactPoint": {
"@type": "ContactPoint",
"name": "Information Desk",
"contactType": "Customer Service",
"email": "[email protected]",
"url": "https://www.earthchem.org/contact/"
},
"@type": "Organization",
"name": "EarthChem Library",
"@id": "https://www.earthchem.org",
"url": "https://www.earthchem.org/library"
},
"provider": {
"@type": "Organization",
"name": "EarthChem Library"
},
"spatialCoverage": {
"@type": "Place",
"geo": [{
"@type": "GeoCoordinates",
"latitude": "40.6644474",
"longitude": "-77.9056298"
}, {
"@type": "GeoCoordinates",
"latitude": "40.6647643",
"longitude": "-77.9040381"
}, {
"@type": "GeoCoordinates",
"latitude": "40.664841",
"longitude": "-77.9072532"
}, {
"@type": "GeoCoordinates",
"latitude": "40.6648488",
"longitude": "-77.9072458"
}]
},
"url": "https://doi.org/10.1594/IEDA/100243",
"funder": {
"@type": "MonetaryGrant",
"fundedItem": {
"@id": "https://doi.org/10.1594/IEDA/100243"
},
"funder": [{
"@type": "Organization",
"name": "National Science Foundation",
"url": "http://www.nsf.gov/awardsearch/showAward.do?AwardNumber=2012123"
}]
}
}
131 changes: 6 additions & 125 deletions tests/test_jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dspback.schemas.discovery import JSONLD
from dspback.utils.jsonld.clusters import clusters
from dspback.utils.jsonld.scraper import format_fields
from tests import change_test_dir, earthchem_jsonld

ids_and_cluster = [
("2012073", "Bedrock Cluster"),
Expand Down Expand Up @@ -140,131 +141,11 @@ class MockSubmission(BaseModel):


@pytest.mark.asyncio
async def test_earthchem_jsonld():
metadata_json = {
"@context": {"@vocab": "https://schema.org/", "datacite": "http://purl.org/spar/datacite/"},
"@id": "https://doi.org/10.1594/IEDA/100243",
"@type": "Dataset",
"name": "Susquehanna Shale Hills Critical Zone Observatory Stream Water Chemistry (2010)",
"sameAs": "https://ecl.earthchem.org/view.php?id=523",
"isAccessibleForFree": True,
"citation": ["https://doi.org/10.2136/vzj2010.0133"],
"author": {
"@list": [
{
"@type": "Role",
"author": [
{"@type": "Person", "name": "Susan L. Brantley", "givenName": "Susan", "familyName": "Brantley"}
],
"roleName": "Lead Author",
},
{
"@type": "Role",
"author": [
{
"@type": "Person",
"name": "Pamela L. Sullivan",
"givenName": "Pamela",
"familyName": "Sullivan",
},
{
"@type": "Person",
"name": "Danielle Andrews",
"givenName": "Danielle",
"familyName": "Andrews",
},
{"@type": "Person", "name": "George Holmes", "givenName": "George", "familyName": "Holmes"},
{"@type": "Person", "name": "Molly Holleran", "givenName": "Molly", "familyName": "Holleran"},
{
"@type": "Person",
"name": "Jennifer Z. Williams",
"givenName": "Jennifer",
"familyName": "Williams",
},
{
"@type": "Person",
"name": "Elizabeth Herndon",
"givenName": "Elizabeth",
"familyName": "Herndon",
},
{"@type": "Person", "name": "Maya Bhatt", "givenName": "Maya", "familyName": "Bhatt"},
{
"@type": "Person",
"name": "Ekaterina Bazilevskaya",
"givenName": "Ekaterina",
"familyName": "Bazilevskaya",
},
{
"@type": "Person",
"name": "Tiffany Yesavage",
"givenName": "Tiffany",
"familyName": "Yesavage",
},
{"@type": "Person", "name": "Evan Thomas", "givenName": "Evan", "familyName": "Thomas"},
{"@type": "Person", "name": "Chris J. Duffy", "givenName": "Chris", "familyName": "Duffy"},
],
"roleName": "Coauthor",
},
]
},
"description": "Stream water chemistry at Susquehanna Shale Hills Critical Zone Observatory in 2010. Weekly to monthly grab samples were collected at three locations along the first order Stream: at the Headwater (SH), Middle (SM) and adjacent to the Weir (SW). Daily stream water sample were also collected adjacent to the weir from using automatic samplers (2700 series, Teledyne Isco, Lincoln, NE) and were referenced as SW-ISCO. ",
"distribution": {
"datePublished": "2013-02-05 00:00:00",
"contentUrl": "https://ecl.earthchem.org/view.php?id=523",
"@type": "DataDownload",
"encodingFormat": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
},
"license": "https://spdx.org/licenses/CC-BY-SA-4.0",
"dateCreated": "2013-02-04",
"inLanguage": "English",
"keywords": [
"Susquehanna Shale Hills",
"Pennsylvania",
"Regional (Continents, Oceans)",
"Stream water",
"geochemistry",
"DOC",
"trace elements",
"major ions",
],
"publisher": {
"contactPoint": {
"@type": "ContactPoint",
"name": "Information Desk",
"contactType": "Customer Service",
"email": "[email protected]",
"url": "https://www.earthchem.org/contact/",
},
"@type": "Organization",
"name": "EarthChem Library",
"@id": "https://www.earthchem.org",
"url": "https://www.earthchem.org/library",
},
"provider": {"@type": "Organization", "name": "EarthChem Library"},
"spatialCoverage": {
"@type": "Place",
"geo": [
{"@type": "GeoCoordinates", "latitude": "40.6644474", "longitude": "-77.9056298"},
{"@type": "GeoCoordinates", "latitude": "40.6647643", "longitude": "-77.9040381"},
{"@type": "GeoCoordinates", "latitude": "40.664841", "longitude": "-77.9072532"},
{"@type": "GeoCoordinates", "latitude": "40.6648488", "longitude": "-77.9072458"},
],
},
"url": "https://doi.org/10.1594/IEDA/100243",
"funder": {
"@type": "MonetaryGrant",
"fundedItem": {"@id": "https://doi.org/10.1594/IEDA/100243"},
"funder": [
{
"@type": "Organization",
"name": "National Science Foundation",
"url": "http://www.nsf.gov/awardsearch/showAward.do?AwardNumber=0725019",
}
],
},
}

scraped_jsonld = format_fields(metadata_json)
async def test_earthchem_jsonld(earthchem_jsonld):
scraped_jsonld = format_fields(earthchem_jsonld)
scraped_jsonld["clusters"] = clusters(scraped_jsonld)
jsonld = JSONLD(**scraped_jsonld)
assert jsonld.provider.name == "EarthChem Library"
assert jsonld.context == "https://schema.org/"
assert len(jsonld.funding) == 1
assert len(jsonld.clusters) == 1

0 comments on commit d4677c9

Please sign in to comment.