openlibhums · ajrbyers · Feb 7, 2024 · Feb 22, 2024 · Feb 22, 2024 · Mar 12, 2024
diff --git a/admin.py b/admin.py
@@ -73,11 +73,29 @@ class CSVImportArticleAdmin(utils_admin_utils.ArticleFKModelAdmin):
     date_hierarchy = ('imported')
 
 
+class NotificationAdmin(admin.ModelAdmin):
+    list_display = ('pk', 'email')
+
+
+class CitationFormatAdmin(admin.ModelAdmin):
+    list_display = ('journal', 'format')
+    raw_id_fields = ('journal',)
+
+
+class SectionMapAdmin(admin.ModelAdmin):
+    list_display = ('section', 'article_type')
+    search_fields = ('article_type', 'section__name')
+    raw_id_fields = ('section',)
+
+
 for pair in [
     (models.ExportFile, ExportFileAdmin),
     (models.CSVImport, CSVImportAdmin),
     (models.CSVImportCreateArticle, CSVImportArticleAdmin),
     (models.CSVImportUpdateArticle, CSVImportArticleAdmin),
     (models.OJSFile,),
+    (models.AutomatedImportNotification, NotificationAdmin),
+    (models.CitationFormat, CitationFormatAdmin),
+    (models.SectionMap, SectionMapAdmin),
 ]:
     admin.site.register(*pair)
diff --git a/jats.py b/jats.py
@@ -18,6 +18,7 @@
 from django.db import transaction
 from django.utils import timezone
 
+from plugins.imports import models
 from core import files
 from core import models as core_models
 from core.models import Account
@@ -42,6 +43,7 @@ def import_jats_article(
         jats_contents, journal=None,
         persist=True, filename=None, owner=None,
         images=None, request=None, stage=None,
+        get_section_from_subject=False,
 ):
     """ JATS import entrypoint
     :param jats_contents: (str) the JATS XML to be imported
@@ -63,13 +65,14 @@ def import_jats_article(
     meta["abstract"] = get_jats_abstract(metadata_soup)
     meta["issue"], meta["volume"] = get_jats_issue(jats_soup)
     meta["keywords"] = get_jats_keywords(metadata_soup)
-    meta["section_name"] = get_jats_section_name(jats_soup)
+    meta["section_name"] = get_jats_section_name(jats_soup, get_section_from_subject)
     meta["date_published"] = get_jats_pub_date(jats_soup) or datetime.date.today()
     meta["license_url"], meta["license_text"] = get_jats_license(jats_soup)
     meta["rights"] = get_jats_rights_statement(jats_soup)
     meta["authors"] = []
     meta["date_submitted"] = None
     meta["date_accepted"] = None
+    meta["custom_how_to_cite"] = get_custom_how_to_cite(metadata_soup)
     try:
         meta["first_page"] = int(metadata_soup.find("fpage").text)
     except (ValueError, AttributeError):
@@ -85,7 +88,7 @@ def import_jats_article(
         meta["date_accepted"] = get_jats_acc_date(history_soup)
 
     authors_soup = metadata_soup.find("contrib-group")
-    author_notes = metadata_soup.find("author_notes")
+    author_notes = metadata_soup.find("author-notes")
     if authors_soup:
         meta["authors"] = get_jats_authors(
             authors_soup,
@@ -115,7 +118,10 @@ def import_jats_article(
     return article
 
 
-def import_jats_zipped(zip_file, journal=None, owner=None, persist=True, stage=None):
+def import_jats_zipped(
+        zip_file, journal=None, owner=None,
+        persist=True, stage=None, get_section_from_subject=False,
+):
     """ Import a batch of Zipped JATS articles and their associated files
     :param zip_file: The zipped jats to be imported
     :param journal: Journal in which to import the articles
@@ -147,7 +153,6 @@ def import_jats_zipped(zip_file, journal=None, owner=None, persist=True, stage=N
                         else:
                             supplements.append(file_path)
 
-
                     if jats_path:
                         # Check nested dirs relative to xml like ./figures
                         for dir_ in dirs:
@@ -163,14 +168,15 @@ def import_jats_zipped(zip_file, journal=None, owner=None, persist=True, stage=N
                                 jats_file.read(), journal, persist,
                                 jats_filename, owner, supplements,
                                 stage=stage,
+                                get_section_from_subject=get_section_from_subject,
                             )
                             articles.append((jats_filename, article))
                         if pdf_path:
                             import_pdf(article, pdf_path, pdf_filename)
                 except Exception as err:
                     logger.warning(err)
                     logger.warning(traceback.format_exc())
-                    errors.append((filenames, err))
+                    errors.append((jats_path, err))
 
     return articles, errors
 
@@ -210,7 +216,7 @@ def get_jats_title(soup):
 def get_jats_abstract(soup):
     abstract = soup.find("abstract")
     if abstract:
-        return abstract.text
+        return f"<p>{abstract.text}</p>"
     else:
         return ""
 
@@ -278,16 +284,22 @@ def get_jats_acc_date(soup):
 
 def get_jats_keywords(soup):
     jats_keywords_soup = soup.find("kwd-group")
+
+    # This was previously a set but is now a list to preserve keyword order.
     if jats_keywords_soup:
-        return {
+        return [
             keyword.text.strip()
             for keyword in jats_keywords_soup.find_all("kwd")
-        }
+        ]
     else:
-        return set()
+        return list()
 
 
-def get_jats_section_name(soup):
+def get_jats_section_name(soup,get_section_from_subject):
+    if get_section_from_subject:
+        subject = soup.find("subject")
+        if subject:
+            return subject.text
     return soup.find("article").attrs.get("article-type")
 
 
@@ -334,10 +346,44 @@ def get_jats_authors(soup, metadata_soup, author_notes=None):
                 corresp_email = author_notes.find("email")
                 if corresp_email:
                     author_data["email"] = corresp_email.text
+            else:
+                # Check and alternative route for identifying corresp
+                # authors
+                corresp_ref = author.find(
+                    'xref', {'ref-type': 'corresp'}
+                )
+                if corresp_ref:
+                    author_data["correspondence"] = True
+
+                    if author_notes:
+                        xref_rid = corresp_ref.get('rid')
+                        corr_note = author_notes.find(
+                            'corresp', {'id': xref_rid}
+                        )
+                        if corr_note:
+                            corresp_email = corr_note.find(
+                                'email'
+                            )
+                            if corresp_email:
+                                author_data["email"] = corresp_email.text
+
             authors.append(author_data)
     return authors
 
 
+def get_custom_how_to_cite(metadata_soup):
+    custom_meta_tags = metadata_soup.find_all('custom-meta')
+    for custom_meta_tag in custom_meta_tags:
+        meta_name_tag = custom_meta_tag.find('meta-name')
+        if meta_name_tag:
+            meta_name_value = meta_name_tag.string
+            if meta_name_value in ['How to cite', 'How To Cite']:
+                meta_value_tag = custom_meta_tag.find('meta-value')
+                if meta_value_tag:
+                    return meta_value_tag.text
+    return ''
+
+
 def get_orcid(author_soup):
     contrib_ids = author_soup.findAll('contrib-id')
     for ci in contrib_ids:
@@ -372,12 +418,19 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
         journal = get_lost_found_journal()
 
     with transaction.atomic():
-        section, _ = submission_models.Section.objects \
-            .get_or_create(
-                journal=journal,
-                name=metadata["section_name"],
-        )
-        section.save()
+        try:
+            section_map = models.SectionMap.objects.get(
+                article_type=metadata["section_name"],
+                section__journal=journal,
+            )
+            if section_map:
+                section = section_map.section
+        except models.SectionMap.DoesNotExist:
+            section, _ = submission_models.Section.objects \
+                .get_or_create(
+                    journal=journal,
+                    name=metadata["section_name"],
+            )
 
         article = get_article(metadata.get("identifiers", {}), journal)
         if not article:
@@ -386,14 +439,16 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
                 title=metadata["title"],
                 abstract=metadata["abstract"],
                 date_published=metadata["date_published"],
-                date_accepted=metadata["date_submitted"],
+                date_accepted=metadata["date_accepted"],
                 date_submitted=metadata["date_submitted"],
                 rights=metadata["rights"],
                 stage=stage or submission_models.STAGE_PUBLISHED,
                 is_import=True,
                 owner=owner,
                 first_page=metadata["first_page"],
-                last_page=metadata["last_page"]
+                last_page=metadata["last_page"],
+                custom_how_to_cite=metadata['custom_how_to_cite'],
+                article_agreement='This article is a JATS import.',
             )
             article.section = section
             article.save()
@@ -403,11 +458,13 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
             article.abstract = metadata["abstract"]
             article.date_published = metadata["date_published"]
             article.date_published = metadata["date_published"]
-            article.date_accepted = metadata["date_submitted"]
+            article.date_accepted = metadata["date_accepted"]
             article.date_submitted = metadata["date_submitted"]
             article.rights = metadata["rights"]
             article.first_page = metadata["first_page"]
             article.last_page = metadata["last_page"]
+            article.custom_how_to_cite = metadata["custom_how_to_cite"]
+            article.section = section
             article.save()
 
         if metadata["identifiers"]["doi"]:
@@ -444,7 +501,6 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
                         "orcid": author["orcid"],
                     },
                 )
-
             fa = submission_models.FrozenAuthor.objects.create(
                 article=article,
                 author=account,
@@ -454,6 +510,7 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
                 frozen_orcid=author["orcid"],
                 frozen_email=author['email'],
                 order=idx,
+                display_email=True if author['correspondence'] else False,
             )
             if account and author["correspondence"]:
                 article.correspondence_author = account
@@ -491,6 +548,13 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
                 journal=journal,
                 defaults={"issue_type": issue_type}
             )
+            journal_models.SectionOrdering.objects.update_or_create(
+                issue=issue,
+                section=section,
+                defaults={
+                    "order": 0,
+                }
+            )
         issue.articles.add(article)
         article.primary_issue = issue
         article.save()
@@ -701,7 +765,7 @@ def import_jats_preprint(
     meta["license_url"], meta["license_text"] = get_jats_license(jats_soup)
     meta["authors"] = []
     authors_soup = metadata_soup.find("contrib-group")
-    author_notes = metadata_soup.find("author_notes")
+    author_notes = jats_soup.find("author-notes")
     if authors_soup:
         meta["authors"] = get_jats_authors(
             authors_soup,
@@ -837,7 +901,7 @@ def save_preprint(
         return preprint
 
 
-def import_html_reviews(preprint, review_files, owner):
+def import_html_reviews(preprint, review_files, owner, number=None):
     review_round, _ = review_models.ReviewRound.objects.get_or_create(
         round_number=1,
         article=preprint.article,
@@ -846,6 +910,7 @@ def import_html_reviews(preprint, review_files, owner):
         journal=preprint.article.journal,
     ).first()
     for review_file in review_files:
+        print(f"Importing {review_file}")
         with open(review_file, 'r') as r_file:
             contents = r_file.read()
             try:
@@ -878,7 +943,7 @@ def import_html_reviews(preprint, review_files, owner):
                 assignment=review_assignment,
                 original_element=default_element,
                 defaults={
-                    'answer': contents.strip().replace('\n', ''),
+                    'answer': answer,
                     'author_can_see': True,
                 }
             )

diff --git a/management/commands/fetch_crossref_how_to_cite.py b/management/commands/fetch_crossref_how_to_cite.py
@@ -0,0 +1,63 @@
+from django.core.management.base import BaseCommand
+
+import requests
+import time
+from pprint import pprint
+
+from submission import models
+
+
+class Command(BaseCommand):
+    """For a give journal query crossref for the given citation format
+    and update the custom how to cite field."""
+
+    help = "Gets custom how to cite using Crossref."
+
+    def add_arguments(self, parser):
+        parser.add_argument('--journal', type=str)
+        parser.add_argument('--article_id', type=str)
+        parser.add_argument('--style', type=str)
+        parser.add_argument('--locale', type=str)
+        parser.add_argument('--mailto', type=str)
+
+    def handle(self, *args, **options):
+        errors = []
+        articles = models.Article.objects.filter(journal__code=options.get('journal'))
+
+        article_id = options.get('article_id')
+        if article_id:
+            articles = articles.filter(pk=article_id)
+
+        for index, article in enumerate(articles):
+            print(f"Getting how to cite for article #{article.pk}. {index}/{articles.count()}")
+            if article.get_doi():
+                try:
+                    r = requests.get(
+                        headers={
+                            'Accept': 'text/bibliography',
+                            'style': options.get('style'),
+                            'locale': options.get('locale')
+                        },
+                        url=f"https://api.crossref.org/v1/works/{article.get_doi()}/transform?mailto={options.get('mailto')}",
+                    )
+                    r.encoding = 'UTF-8'
+                    how_to_cite = r.text.strip()
+                    if r.status_code == 200:
+                        print(f"Response: {how_to_cite}")
+                        article.custom_how_to_cite = how_to_cite
+                        article.save()
+                        print(f"Article #{article.pk} how to cite updated.")
+                    else:
+                        print(f"Crossref API responded with: {r.status_code}")
+                except Exception as e:
+                    errors.append(
+                        {'article': article, 'error': e}
+                    )
+            else:
+                print(f"Article #{article.pk} does not have a DOI")
+
+            time.sleep(2)
+
+        print('Errors:')
+        pprint(errors)
+