-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Using JSONLD to retrieve information
- Loading branch information
1 parent
647d5ef
commit 67ea43b
Showing
7 changed files
with
161 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
abstract type JSONLD <: DataRepo | ||
end | ||
|
||
export JSONLD_Web, JSONLD_DOI | ||
|
||
include("JSONLD_Web.jl") | ||
include("JSONLD_DOI.jl") | ||
|
||
function description(repo::JSONLD, mainpage) | ||
desc = handle_keys("description", "", mainpage) | ||
authors = handle_keys("author", "creator", mainpage) | ||
if authors != nothing | ||
stripauthors = [handle_keys("name", "", ii) for ii in authors if handle_keys("name", "", ii) != nothing] | ||
author = format_authors(stripauthors) | ||
else | ||
author = "Unknown Author" | ||
end | ||
license = get_license(mainpage) | ||
rawdate = Dates.DateTime(handle_keys("datePublished", "dateModified", mainpage)) | ||
date = Dates.format(rawdate, "U d, yyyy") | ||
|
||
""" | ||
Author: $(author) | ||
License: $(license) | ||
Date: $(date) | ||
$(desc) | ||
""" | ||
end | ||
|
||
function get_license(mainpage) | ||
license = handle_keys("license", "", mainpage) | ||
if license != nothing | ||
if isa(license, String) | ||
return license | ||
elseif isa(license, Dict) | ||
return handle_keys("url", "text", license) | ||
end | ||
end | ||
end | ||
|
||
function handle_keys(key1::String, key2::String, json) | ||
info = "" | ||
try | ||
info = json[key1] | ||
catch error | ||
try | ||
info = json[key2] | ||
catch KeyError | ||
info = nothing | ||
end | ||
end | ||
info | ||
end | ||
|
||
function get_urls(repo::JSONLD, page) | ||
urls = [] | ||
url_list = handle_keys("distribution", "", page) | ||
if url_list != nothing | ||
urls = [handle_keys("contentUrl", "", ii) for ii in url_list if handle_keys("contentUrl", "", ii) != nothing] | ||
else | ||
urls = [] | ||
end | ||
urls | ||
end | ||
|
||
function get_checksums(repo::JSONLD, page) | ||
checksums = [] | ||
checksums | ||
end | ||
|
||
function data_fullname(::JSONLD, mainpage) | ||
mainpage["name"] | ||
end | ||
|
||
function website(::JSONLD, mainpage_url, mainpage) | ||
mainpage_url | ||
end | ||
|
||
function mainpage_url(repo::JSONLD, dataname) | ||
#We are making it work for both figshare id or doi | ||
page=getpage(dataname) | ||
pattern = sel"script[type=\"application/ld+json\"]" | ||
jsonld_blocks = matchall(pattern, page.root) | ||
if length(jsonld_blocks)==0 | ||
error("No JSON-LD Linked Data Found") | ||
end | ||
@assert length(jsonld_blocks)==1 | ||
script_block = text_only(first(jsonld_blocks)) | ||
json = JSON.parse(script_block) | ||
json, dataname | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
struct JSONLD_DOI <: JSONLD end | ||
|
||
function mainpage_url(repo::JSONLD_DOI, dataname) | ||
if match_doi(dataname) != nothing | ||
url = joinpath("https://data.datacite.org/", match_doi(dataname)) | ||
resp = HTTP.get(url, ["Accept"=>"application/vnd.schemaorg.ld+json"]; forwardheaders=true) | ||
json = JSON.parse(resp.body |> String |> strip) | ||
end | ||
json, dataname | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
struct JSONLD_Web <: JSONLD end | ||
|
||
function mainpage_url(repo::JSONLD_Web, dataname) | ||
page=getpage(dataname) | ||
pattern = sel"script[type=\"application/ld+json\"]" | ||
jsonld_blocks = matchall(pattern, page.root) | ||
if length(jsonld_blocks)==0 | ||
error("No JSON-LD Linked Data Found") | ||
end | ||
@assert length(jsonld_blocks)==1 | ||
script_block = text_only(first(jsonld_blocks)) | ||
json = JSON.parse(script_block) | ||
json, dataname | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
using DataDepsGenerators | ||
using Base.Test | ||
|
||
using ReferenceTests | ||
|
||
@testset "JSONLD test" begin | ||
@test_reference "../references/JSONLD_Web Kaggle.txt" generate(JSONLD_Web(), "https://zenodo.org/record/1287281") | ||
@test_reference "../references/JSONLD_DOI Figshare.txt" generate(JSONLD_DOI(), "10.1371/journal.pbio.2001414") | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
register(DataDep( | ||
"Identifiers for the 21st century How to design, provision, and reuse persistent identifiers to maximize utility and impact of life science data", | ||
""" | ||
Dataset: Identifiers for the 21st century: How to design, provision, and reuse persistent identifiers to maximize utility and impact of life science data | ||
Website: 10.1371/journal.pbio.2001414 | ||
Author: Julie A. McMurry et al. | ||
License: http://creativecommons.org/licenses/by/4.0 | ||
Date: June 29, 2017 | ||
|
||
nothing | ||
""", | ||
Any[], | ||
[] | ||
)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
register(DataDep( | ||
"Map of Co-Seismic Landslides for the M 7.8 Kaikoura, New Zealand Earthquake", | ||
""" | ||
Dataset: Map of Co-Seismic Landslides for the M 7.8 Kaikoura, New Zealand Earthquake | ||
Website: https://zenodo.org/record/1287281 | ||
Author: Valkaniotis Sotiris et al. | ||
License: https://creativecommons.org/licenses/by/4.0/ | ||
Date: December 20, 2016 | ||
|
||
<p>Prepared by the Research Group on Earthquake Geology in Greece (http://eqgeogr.weebly.com/)</p> | ||
|
||
<p>Version 2 (updated)</p> | ||
|
||
<p>With the release of new Sentinel-2 images, and other available resources for the M7.8 Kaikoura earthquake, we present an update of the Map of Co-Seismic Landslides and Surfaces Ruptures (As of 27/11/2016). Landslides were mapped using Sentinel-2 satellite images from Copernicus, European Space Agency, dated November and December 2016. Images were visually compared with previous last available S2A images without cloud cover (13 September and 26 October) and landslides and large slope failures were manually mapped. Areas covered by cloud are omitted and shown on map. 5875 landslide sites are shown in the map. A small number of landslides could have been mis-identified due to insufficient resolution of the images, small gaps of cloud cover or for other reasons. Also, re-activated landslides on the central mountainous area were unabled to identify due to imagery restrictions (medium resolution, relief shadows etc). Some local gaps in Sentinel imagery still exist due to cloud cover, but we believe the current map is very close to the major distribution of mass movement effects. Surface ruptures were mapped using Sentinel-2 imagery and approximate position from photos of the post-earthquake aerial surveys of Environment Canterbury Regional Council (http://ecan.govt.nz)</p> | ||
|
||
<p>KML file contains7355 landslide spots.</p> | ||
""", | ||
String["https://zenodo.org/api/files/5a311c7a-bd5e-4df7-be61-341d03ec9a9b/Landslide_Map_V2_A2.pdf", "https://zenodo.org/api/files/5a311c7a-bd5e-4df7-be61-341d03ec9a9b/Landslides_Kaikoura_2016.kmz", "https://zenodo.org/api/files/5a311c7a-bd5e-4df7-be61-341d03ec9a9b/Prelim_Landslide_Map_A2.jpg"], | ||
[] | ||
)) |