diff --git a/.gitignore b/.gitignore index ce175e3fcf..c3511ff453 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ _site vendor debug.log __pycache__ +.cache +!cache.db .DS_STORE .env* package.json diff --git a/CHANGELOG.md b/CHANGELOG.md index d7ab5a2c93..57e130452c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ Reference: common-changelog.org +## 1.1.4 - 2023-04-28 + +### Changed + +- Fix ORCID plugin and other cite process bugs. + ## 1.1.3 - 2023-04-20 ### Changed diff --git a/CITATION.cff b/CITATION.cff index 6d40cbc530..4870fb24c2 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,8 +1,8 @@ # citation metadata for the template itself title: "Lab Website Template" -version: 1.1.3 -date-released: 2023-04-20 +version: 1.1.4 +date-released: 2023-04-28 url: "https://github.com/greenelab/lab-website-template" authors: - family-names: "Rubinetti" diff --git a/_cite/.cache/cache.db b/_cite/.cache/cache.db index 23703d7497..fb6f435a43 100644 Binary files a/_cite/.cache/cache.db and b/_cite/.cache/cache.db differ diff --git a/_cite/cite.py b/_cite/cite.py index 8828a9f69e..e0b7dcff37 100644 --- a/_cite/cite.py +++ b/_cite/cite.py @@ -89,11 +89,11 @@ # merge sources with matching (non-blank) ids for a in range(0, len(sources)): - _id = sources[a].get("id") + _id = sources[a].get("id", "") if not _id: continue for b in range(a + 1, len(sources)): - if sources[b].get("id") == _id: + if sources[b].get("id", "") == _id: sources[a].update(sources[b]) sources[b] = {} sources = [entry for entry in sources if entry] @@ -109,6 +109,7 @@ # list of new citations citations = [] + # loop through compiled sources for index, source in enumerate(sources): log(f"Processing source {index + 1} of {len(sources)}, {label(source)}") @@ -130,7 +131,7 @@ # if Manubot cannot cite source except Exception as e: # if regular source (id entered by user), throw error - if source.get("plugin") == "sources.py": + if source.get("plugin", "") == "sources.py": log(e, 3, "ERROR") error = True # otherwise, if from metasource (id retrieved from some third-party API), just warn @@ -143,7 +144,8 @@ citation.update(source) # ensure date in proper format for correct date sorting - citation["date"] = format_date(citation.get("date")) + if citation.get("date", ""): + citation["date"] = format_date(citation.get("date", "")) # add new citation to list citations.append(citation) @@ -153,6 +155,7 @@ log("Saving updated citations") + # save new citations try: save_data(output_file, citations) diff --git a/_cite/plugins/google-scholar.py b/_cite/plugins/google-scholar.py index 0a05a00568..f04dec811d 100644 --- a/_cite/plugins/google-scholar.py +++ b/_cite/plugins/google-scholar.py @@ -9,41 +9,41 @@ def main(entry): returns list of sources to cite """ - # get id from entry - id = entry.get("gsid") - if not id: - raise Exception('No "gsid" key') - # get api key - api_key = os.environ.get("GOOGLE_SCHOLAR_API_KEY") + api_key = os.environ.get("GOOGLE_SCHOLAR_API_KEY", "") if not api_key: raise Exception('No "GOOGLE_SCHOLAR_API_KEY" env var') # serp api params = { "engine": "google_scholar_author", - "author_id": id, "api_key": api_key, - "num": 100, + "num": 100, # max allowed } + # get id from entry + _id = entry.get("gsid", "") + if not _id: + raise Exception('No "gsid" key') + # query api @log_cache @cache.memoize(name=__file__, expire=1 * (60 * 60 * 24)) - def query(): + def query(_id): + params["author_id"] = _id return GoogleSearch(params).get_dict().get("articles", []) - response = query() + response = query(_id) # list of sources to return sources = [] # go through response and format sources for work in response: - # create source source = { "id": work.get("citation_id", ""), + # api does not provide Manubot-citeable id, so keep citation details "title": work.get("title", ""), "authors": list(map(str.strip, work.get("authors", "").split(","))), "publisher": work.get("publication", ""), diff --git a/_cite/plugins/orcid.py b/_cite/plugins/orcid.py index 28817482e5..1d47e72b7e 100644 --- a/_cite/plugins/orcid.py +++ b/_cite/plugins/orcid.py @@ -10,42 +10,102 @@ def main(entry): """ # orcid api - endpoint = "https://pub.orcid.org/v2.0/$ORCID/works" + endpoint = "https://pub.orcid.org/v3.0/$ORCID/works" headers = {"Accept": "application/json"} # get id from entry - id = entry.get("orcid") - if not id: + _id = entry.get("orcid", "") + if not _id: raise Exception('No "orcid" key') # query api @log_cache @cache.memoize(name=__file__, expire=1 * (60 * 60 * 24)) - def query(): - url = endpoint.replace("$ORCID", id) + def query(_id): + url = endpoint.replace("$ORCID", _id) request = Request(url=url, headers=headers) response = json.loads(urlopen(request).read()) - return response.get("group") + return response.get("group", []) - response = query() + response = query(_id) # list of sources to return sources = [] # go through response structure and pull out ids e.g. doi:1234/56789 for work in response: - for id in work["external-ids"]["external-id"]: - # get id and id-type from response - id_type = id["external-id-type"] - id_value = id["external-id-value"] + # get list of ids + ids = work.get("external-ids", {}).get("external-id", []) + for summary in work.get("work-summary", []): + ids = ids + summary.get("external-ids", {}).get("external-id", []) - # create source - source = {"id": f"{id_type}:{id_value}"} + # prefer doi id type, or fallback to first id + _id = next( + (id for id in ids if id.get("external-id-type", "") == "doi"), + ids[0] if len(ids) > 0 else {}, + ) - # copy fields from entry to source - source.update(entry) + # get id and id-type from response + id_type = _id.get("external-id-type", "") + id_value = _id.get("external-id-value", "") - # add source to list - sources.append(source) + # create source + source = {"id": f"{id_type}:{id_value}"} + + # if not a doi, Manubot likely can't cite, so keep citation details + if id_type != "doi": + # get summaries + summaries = work.get("work-summary", []) + + # sort summary entries by most recent + summaries = sorted( + summaries, + key=lambda summary: ( + summary.get("last-modified-date", {}).get("value", 0) + ) + or summary.get("created-date", {}).get("value", 0) + or 0, + reverse=True, + ) + + # get first summary with defined sub-value + def first(get_func): + return next(value for value in map(get_func, summaries) if value) + + # get title + title = first( + lambda s: s.get("title", {}).get("title", {}).get("value", "") + ) + + # get publisher + publisher = first(lambda s: s.get("journal-title", {}).get("value", "")) + + # get date + date = ( + work.get("last-modified-date", {}).get("value", 0) + or first(lambda s: s.get("last-modified-date", {}).get("value", 0)) + or work.get("created-date", {}).get("value", 0) + or first(lambda s: s.get("created-date", {}).get("value", 0)) + or 0 + ) + + # get link + link = first(lambda s: s.get("url", {}).get("value", "")) + + # keep available details + if title: + source["title"] = title + if publisher: + source["publisher"] = publisher + if date: + source["date"] = format_date(date) + if link: + source["link"] = link + + # copy fields from entry to source + source.update(entry) + + # add source to list + sources.append(source) return sources diff --git a/_cite/plugins/pubmed.py b/_cite/plugins/pubmed.py index ac9fac7c66..63eefb7b84 100644 --- a/_cite/plugins/pubmed.py +++ b/_cite/plugins/pubmed.py @@ -14,28 +14,28 @@ def main(entry): endpoint = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$TERM&retmode=json&retmax=1000&usehistory=y" # get id from entry - id = entry.get("term") - if not id: + _id = entry.get("term", "") + if not _id: raise Exception('No "term" key') # query api @log_cache @cache.memoize(name=__file__, expire=1 * (60 * 60 * 24)) - def query(): - url = endpoint.replace("$TERM", quote(id)) + def query(_id): + url = endpoint.replace("$TERM", quote(_id)) request = Request(url=url) response = json.loads(urlopen(request).read()) - return response.get("esearchresult", {}).get("idlist") + return response.get("esearchresult", {}).get("idlist", []) - response = query() + response = query(_id) # list of sources to return sources = [] # go through response and format sources - for id in response: + for _id in response: # create source - source = {"id": f"pubmed:{id}"} + source = {"id": f"pubmed:{_id}"} # copy fields from entry to source source.update(entry) diff --git a/_cite/util.py b/_cite/util.py index 87efe940a4..74638a6c57 100644 --- a/_cite/util.py +++ b/_cite/util.py @@ -49,7 +49,7 @@ def log(message="\n--------------------\n", indent=0, level="", newline=True): "SUCCESS": "[black on #10B981]", "INFO": "[grey70]", } - color = palette.get(level) or palette.get(indent) or "[white]" + color = palette.get(level, "") or palette.get(indent, "") or "[white]" if newline: print() print(indent * " " + color + str(message) + "[/]", end="", flush=True) @@ -76,6 +76,8 @@ def format_date(date): format date as YYYY-MM-DD, or no date if malformed """ + if isinstance(date, int): + return datetime.fromtimestamp(date // 1000.0).strftime("%Y-%m-%d") try: return datetime.strptime(date, "%Y-%m-%d").strftime("%Y-%m-%d") except Exception: @@ -178,7 +180,7 @@ def cite_with_manubot(_id): # authors citation["authors"] = [] - for author in manubot.get("author", []): + for author in manubot.get("author", {}): given = author.get("given", "").strip() family = author.get("family", "").strip() if given or family: @@ -193,8 +195,8 @@ def cite_with_manubot(_id): # extract date part def date_part(citation, index): try: - return citation.get("issued").get("date-parts")[0][index] - except Exception: + return citation["issued"]["date-parts"][0][index] + except (KeyError, IndexError, TypeError): return "" # date diff --git a/_layouts/member.html b/_layouts/member.html index 23b665646e..034b5d080d 100644 --- a/_layouts/member.html +++ b/_layouts/member.html @@ -32,16 +32,20 @@ research/?search={% for alias in aliases %}"{{ alias }}" {% endfor %} {%- endcapture %} - - Search for {{ page.name | default: page.title }}'s papers on the Research page - +
+ + Search for {{ page.name | default: page.title }}'s papers on the Research page + +
{% capture search -%} blog/?search={{ page.name }} {%- endcapture %} - +