Skip to content

Commit

Permalink
Using JSONLD to retrieve information
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastinSanty committed Jul 4, 2018
1 parent 647d5ef commit 67ea43b
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/DataDepsGenerators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using JSON
using HTTP

export generate, citation_text
export UCI, GitHub, DataDryad, DataOneV1, DataOneV2, CKAN, DataCite, Figshare
export UCI, GitHub, DataDryad, DataOneV1, DataOneV2, CKAN, DataCite, Figshare, JSONLD

abstract type DataRepo end

Expand Down Expand Up @@ -41,7 +41,7 @@ include("DataOneV2/DataOneV2.jl")
include("CKAN.jl")
include("DataCite.jl")
include("Figshare.jl")

include("JSONLD/JSONLD.jl")

function message(meta)
escape_multiline_string("""
Expand Down
92 changes: 92 additions & 0 deletions src/JSONLD/JSONLD.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
abstract type JSONLD <: DataRepo
end

export JSONLD_Web, JSONLD_DOI

include("JSONLD_Web.jl")
include("JSONLD_DOI.jl")

function description(repo::JSONLD, mainpage)
desc = handle_keys("description", "", mainpage)
authors = handle_keys("author", "creator", mainpage)
if authors != nothing
stripauthors = [handle_keys("name", "", ii) for ii in authors if handle_keys("name", "", ii) != nothing]
author = format_authors(stripauthors)
else
author = "Unknown Author"
end
license = get_license(mainpage)
rawdate = Dates.DateTime(handle_keys("datePublished", "dateModified", mainpage))
date = Dates.format(rawdate, "U d, yyyy")

"""
Author: $(author)
License: $(license)
Date: $(date)
$(desc)
"""
end

function get_license(mainpage)
license = handle_keys("license", "", mainpage)
if license != nothing
if isa(license, String)
return license
elseif isa(license, Dict)
return handle_keys("url", "text", license)
end
end
end

function handle_keys(key1::String, key2::String, json)
info = ""
try
info = json[key1]
catch error
try
info = json[key2]
catch KeyError
info = nothing
end
end
info
end

function get_urls(repo::JSONLD, page)
urls = []
url_list = handle_keys("distribution", "", page)
if url_list != nothing
urls = [handle_keys("contentUrl", "", ii) for ii in url_list if handle_keys("contentUrl", "", ii) != nothing]
else
urls = []
end
urls
end

function get_checksums(repo::JSONLD, page)
checksums = []
checksums
end

function data_fullname(::JSONLD, mainpage)
mainpage["name"]
end

function website(::JSONLD, mainpage_url, mainpage)
mainpage_url
end

function mainpage_url(repo::JSONLD, dataname)
#We are making it work for both figshare id or doi
page=getpage(dataname)
pattern = sel"script[type=\"application/ld+json\"]"
jsonld_blocks = matchall(pattern, page.root)
if length(jsonld_blocks)==0
error("No JSON-LD Linked Data Found")
end
@assert length(jsonld_blocks)==1
script_block = text_only(first(jsonld_blocks))
json = JSON.parse(script_block)
json, dataname
end
10 changes: 10 additions & 0 deletions src/JSONLD/JSONLD_DOI.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
struct JSONLD_DOI <: JSONLD end

function mainpage_url(repo::JSONLD_DOI, dataname)
if match_doi(dataname) != nothing
url = joinpath("https://data.datacite.org/", match_doi(dataname))
resp = HTTP.get(url, ["Accept"=>"application/vnd.schemaorg.ld+json"]; forwardheaders=true)
json = JSON.parse(resp.body |> String |> strip)
end
json, dataname
end
14 changes: 14 additions & 0 deletions src/JSONLD/JSONLD_Web.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
struct JSONLD_Web <: JSONLD end

function mainpage_url(repo::JSONLD_Web, dataname)
page=getpage(dataname)
pattern = sel"script[type=\"application/ld+json\"]"
jsonld_blocks = matchall(pattern, page.root)
if length(jsonld_blocks)==0
error("No JSON-LD Linked Data Found")
end
@assert length(jsonld_blocks)==1
script_block = text_only(first(jsonld_blocks))
json = JSON.parse(script_block)
json, dataname
end
9 changes: 9 additions & 0 deletions test/JSONLD/JSONLD.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using DataDepsGenerators
using Base.Test

using ReferenceTests

@testset "JSONLD test" begin
@test_reference "../references/JSONLD_Web Kaggle.txt" generate(JSONLD_Web(), "https://zenodo.org/record/1287281")
@test_reference "../references/JSONLD_DOI Figshare.txt" generate(JSONLD_DOI(), "10.1371/journal.pbio.2001414")
end
14 changes: 14 additions & 0 deletions test/references/JSONLD_DOI Figshare.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
register(DataDep(
"Identifiers for the 21st century How to design, provision, and reuse persistent identifiers to maximize utility and impact of life science data",
"""
Dataset: Identifiers for the 21st century: How to design, provision, and reuse persistent identifiers to maximize utility and impact of life science data
Website: 10.1371/journal.pbio.2001414
Author: Julie A. McMurry et al.
License: http://creativecommons.org/licenses/by/4.0
Date: June 29, 2017

nothing
""",
Any[],
[]
))
20 changes: 20 additions & 0 deletions test/references/JSONLD_Web Kaggle.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
register(DataDep(
"Map of Co-Seismic Landslides for the M 7.8 Kaikoura, New Zealand Earthquake",
"""
Dataset: Map of Co-Seismic Landslides for the M 7.8 Kaikoura, New Zealand Earthquake
Website: https://zenodo.org/record/1287281
Author: Valkaniotis Sotiris et al.
License: https://creativecommons.org/licenses/by/4.0/
Date: December 20, 2016

<p>Prepared by the Research Group on Earthquake Geology in Greece (http://eqgeogr.weebly.com/)</p>

<p>Version 2 (updated)</p>

<p>With the release of new Sentinel-2 images, and other available resources for the M7.8 Kaikoura earthquake, we present an update of the Map of Co-Seismic Landslides and Surfaces Ruptures (As of 27/11/2016). Landslides were mapped using Sentinel-2 satellite images from Copernicus, European Space Agency, dated November and December 2016. Images were visually compared with previous last available S2A images without cloud cover (13 September and 26 October) and landslides and large slope failures were manually mapped. Areas covered by cloud are omitted and shown on map. 5875 landslide sites are shown in the map. A small number of landslides could have been mis-identified due to insufficient resolution of the images, small gaps of cloud cover or for other reasons. Also, re-activated landslides on the central mountainous area were unabled to identify due to imagery restrictions (medium resolution, relief shadows etc). Some local gaps in Sentinel imagery still exist due to cloud cover, but we believe the current map is very close to the major distribution of mass movement effects. Surface ruptures were mapped using Sentinel-2 imagery and approximate position from photos of the post-earthquake aerial surveys of Environment Canterbury Regional Council (http://ecan.govt.nz)</p>

<p>KML file contains7355 landslide spots.</p>
""",
String["https://zenodo.org/api/files/5a311c7a-bd5e-4df7-be61-341d03ec9a9b/Landslide_Map_V2_A2.pdf", "https://zenodo.org/api/files/5a311c7a-bd5e-4df7-be61-341d03ec9a9b/Landslides_Kaikoura_2016.kmz", "https://zenodo.org/api/files/5a311c7a-bd5e-4df7-be61-341d03ec9a9b/Prelim_Landslide_Map_A2.jpg"],
[]
))

0 comments on commit 67ea43b

Please sign in to comment.