From 0e7bb54045c26793f90ed40c02c3b89c7432ec60 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 17 Dec 2019 17:36:41 +0100 Subject: [PATCH 001/117] Add a method to save citation information --- esmvalcore/_task.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 825cfa8cd3..2b229a4c96 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -11,6 +11,7 @@ import time from copy import deepcopy from multiprocessing import Pool +from pybtex.database import BibliographyData, Entry import psutil import yaml @@ -574,6 +575,33 @@ def _collect_provenance(self): self.name, time.time() - start) + + def _write_citation_file(self): + """Write citation information provided from the recorded provenance.""" + citation_file = os.path.join(self.settings['run_dir'], + 'diagnostic_citation.bibtex') + + # papers describing the diagnostic and recipe + bib_entry = BibliographyData({ + 'article-minimal': Entry('article', [ + ('author', ''), + ('title', ''), + ('journal', ""), + ('year', ''),]), + }) + + # model data citation information + + # observational data citation information + + # esmvaltool and other scientific software citation + + # scientific compute cluster citation information, if applicable + + # save the file + bib_entry.to_string(citation_file, 'bibtex') + + def __str__(self): """Get human readable description.""" txt = "{}:\nscript: {}\n{}\nsettings:\n{}\n".format( From 21e56b5bc2bb073231fd7a8c7e53650e9a989a78 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 15 Jan 2020 17:33:38 +0100 Subject: [PATCH 002/117] Fixing the function write_citation_file --- esmvalcore/_task.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 2b229a4c96..0851e090d6 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -16,7 +16,7 @@ import psutil import yaml -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -507,6 +507,7 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() + self._write_citation_file() return [self.output_dir] raise DiagnosticError( @@ -577,18 +578,27 @@ def _collect_provenance(self): def _write_citation_file(self): - """Write citation information provided from the recorded provenance.""" - citation_file = os.path.join(self.settings['run_dir'], - 'diagnostic_citation.bibtex') + """Write citation information provided by the recorded provenance.""" + provenance_file = os.path.join(self.settings['run_dir'], + 'diagnostic_provenance.yml') + with open(provenance_file, 'r') as file: + table = yaml.safe_load(file) + section = 'references' + reference_dict = {} + for filename, attributes in table.items(): + for tag in attributes[section]: + reference_dict[tag] = get_tag_value(section, tag) # papers describing the diagnostic and recipe - bib_entry = BibliographyData({ - 'article-minimal': Entry('article', [ - ('author', ''), - ('title', ''), - ('journal', ""), - ('year', ''),]), - }) + bib_entry = {} + bib_fields = ['author', 'journal', 'volume', 'pages', 'doi', 'year'] + for key in reference_dict: + reference = list(reference_dict[key].split(",")) + # "[Last name] et al., [journal abbr.], [volume], [pages], doi:[doi], [year]. + bib_entry.update({ + key: Entry('article', list(zip(bib_fields, reference ))), + }) + bib_data = BibliographyData(bib_entry) # model data citation information @@ -599,7 +609,9 @@ def _write_citation_file(self): # scientific compute cluster citation information, if applicable # save the file - bib_entry.to_string(citation_file, 'bibtex') + citation_file = os.path.join(self.settings['run_dir'], + 'diagnostic_citation.bibtex') + bib_data.to_string(citation_file, 'bibtex') def __str__(self): From db80953b3700f9b1c08f34f79677f3a73976a230 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 21 Jan 2020 17:53:36 +0100 Subject: [PATCH 003/117] Fix the function write_citation_file --- esmvalcore/_config.py | 11 ++++++++++ esmvalcore/_task.py | 48 +++++++++++++------------------------------ 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 6f44f16731..50a0f7cb85 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -26,6 +26,17 @@ def find_diagnostics(): DIAGNOSTICS_PATH = find_diagnostics() +def find_references(): + """Try to find bibtex files in references folder.""" + try: + import esmvaltool + except ImportError: + return '' + return os.path.join(os.path.dirname(esmvaltool.__file__), 'references') + + +REFERENCES_PATH = find_references() + def read_config_user_file(config_file, recipe_name): """Read config user file and store settings in a dictionary.""" diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 0851e090d6..3cd66b2d06 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -12,11 +12,13 @@ from copy import deepcopy from multiprocessing import Pool from pybtex.database import BibliographyData, Entry +import doi2bib.crossref as ref import psutil import yaml +import prov -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -507,7 +509,7 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() - self._write_citation_file() + # self._write_citation_file() return [self.output_dir] raise DiagnosticError( @@ -571,47 +573,25 @@ def _collect_provenance(self): product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) product.save_provenance() + self._write_citation_file(product) self.products.add(product) logger.debug("Collecting provenance of task %s took %.1f seconds", self.name, time.time() - start) - def _write_citation_file(self): + def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - provenance_file = os.path.join(self.settings['run_dir'], - 'diagnostic_provenance.yml') - with open(provenance_file, 'r') as file: - table = yaml.safe_load(file) - section = 'references' - reference_dict = {} - for filename, attributes in table.items(): - for tag in attributes[section]: - reference_dict[tag] = get_tag_value(section, tag) - - # papers describing the diagnostic and recipe - bib_entry = {} - bib_fields = ['author', 'journal', 'volume', 'pages', 'doi', 'year'] - for key in reference_dict: - reference = list(reference_dict[key].split(",")) - # "[Last name] et al., [journal abbr.], [volume], [pages], doi:[doi], [year]. - bib_entry.update({ - key: Entry('article', list(zip(bib_fields, reference ))), - }) - bib_data = BibliographyData(bib_entry) - - # model data citation information - - # observational data citation information - - # esmvaltool and other scientific software citation + bib_data = {v:k for k, v in TAGS['references'].items()} - # scientific compute cluster citation information, if applicable + for item in product.provenance.records: + for key, value in item.attributes: + # if key.namespace.prefix == 'attribute' and key.localpart in {'reference', 'references'}: + if key.namespace.prefix == 'attribute' and key.localpart in {'reference'}: + tag = bib_data[value] - # save the file - citation_file = os.path.join(self.settings['run_dir'], - 'diagnostic_citation.bibtex') - bib_data.to_string(citation_file, 'bibtex') + # print(REFERENCES_PATH) + # citation_file = Path(product.filename) + '_citation.bibtex' def __str__(self): From 4b63ef6e42e08b281061fe469b3f5816a62c3e70 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 22 Jan 2020 16:25:29 +0100 Subject: [PATCH 004/117] fix the function _write_citation_file --- esmvalcore/_config.py | 1 + esmvalcore/_task.py | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 50a0f7cb85..d8f7de0819 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -26,6 +26,7 @@ def find_diagnostics(): DIAGNOSTICS_PATH = find_diagnostics() + def find_references(): """Try to find bibtex files in references folder.""" try: diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 3cd66b2d06..eaab26aea9 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -11,14 +11,11 @@ import time from copy import deepcopy from multiprocessing import Pool -from pybtex.database import BibliographyData, Entry -import doi2bib.crossref as ref import psutil import yaml -import prov -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value, REFERENCES_PATH +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -579,20 +576,35 @@ def _collect_provenance(self): self.name, time.time() - start) - def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - bib_data = {v:k for k, v in TAGS['references'].items()} - + reference_tag = {v: k for k, v in TAGS['references'].items()} + # collect info from provenance + product_entry = [] for item in product.provenance.records: for key, value in item.attributes: - # if key.namespace.prefix == 'attribute' and key.localpart in {'reference', 'references'}: - if key.namespace.prefix == 'attribute' and key.localpart in {'reference'}: - tag = bib_data[value] - - # print(REFERENCES_PATH) - # citation_file = Path(product.filename) + '_citation.bibtex' - + if (key.namespace.prefix == 'attribute' + and key.localpart in {'reference', 'references'}): + product_entry.append(value) + + # map between reference tags and entries + product_tag = [] + for key in reference_tag.keys(): + for entry in product_entry: + if key in entry and reference_tag[key] not in product_tag: + product_tag.append(reference_tag[key]) + + # save all citation info into one bibtex file + bibtex_entry = '' + for tags in product_tag: + bib_file_path = os.path.join(REFERENCES_PATH, tags + '.bibtex') + if os.path.isfile(bib_file_path): + with open(bib_file_path, 'r') as file: + bibtex_entry += '{}\n'.format(file.read()) + citation_file = (os.path.splitext(product.filename)[0] + + '_citation.bibtex') + with open(citation_file, 'w') as file: + file.write(bibtex_entry) def __str__(self): """Get human readable description.""" From 6a3872388ec0e0e33db5a3998e1ae04c6becbc1e Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 22 Jan 2020 17:26:14 +0100 Subject: [PATCH 005/117] style --- esmvalcore/_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index eaab26aea9..95459f960c 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -578,7 +578,6 @@ def _collect_provenance(self): def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - reference_tag = {v: k for k, v in TAGS['references'].items()} # collect info from provenance product_entry = [] for item in product.provenance.records: @@ -588,6 +587,7 @@ def _write_citation_file(self, product): product_entry.append(value) # map between reference tags and entries + reference_tag = {v: k for k, v in TAGS['references'].items()} product_tag = [] for key in reference_tag.keys(): for entry in product_entry: From 528eee796057e08b65e2ce2cae9b8cd79f8dcf6f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 09:55:06 +0100 Subject: [PATCH 006/117] refactor and style --- esmvalcore/_task.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 95459f960c..6e4d2c18fd 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -586,7 +586,7 @@ def _write_citation_file(self, product): and key.localpart in {'reference', 'references'}): product_entry.append(value) - # map between reference tags and entries + # map between reference.tags and product.entries reference_tag = {v: k for k, v in TAGS['references'].items()} product_tag = [] for key in reference_tag.keys(): @@ -597,10 +597,14 @@ def _write_citation_file(self, product): # save all citation info into one bibtex file bibtex_entry = '' for tags in product_tag: - bib_file_path = os.path.join(REFERENCES_PATH, tags + '.bibtex') - if os.path.isfile(bib_file_path): - with open(bib_file_path, 'r') as file: + bibtex_file = os.path.join(REFERENCES_PATH, tags + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: bibtex_entry += '{}\n'.format(file.read()) + else: + raise DiagnosticError( + "The reference file ({}): does not exist.".format( + bibtex_file)) citation_file = (os.path.splitext(product.filename)[0] + '_citation.bibtex') with open(citation_file, 'w') as file: From a7a6368e72d39f86947bd96ca2ebef2c5566efce Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 16:31:04 +0100 Subject: [PATCH 007/117] Add esmvaltool paper to the provenance, and style --- esmvalcore/_provenance.py | 12 +++++++++--- esmvalcore/_task.py | 1 - 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index d0c5352e2b..d966fdf390 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -8,12 +8,15 @@ from PIL.PngImagePlugin import PngInfo from prov.dot import prov_to_dot from prov.model import ProvDocument +from ._config import replace_tags from ._version import __version__ logger = logging.getLogger(__name__) ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' +# it is the technical overview and should always be cited +ESMVALTOOL_PAPER_TAG = ['righi19gmd'] def update_without_duplicating(bundle, other): @@ -31,9 +34,12 @@ def create_namespace(provenance, namespace): def get_esmvaltool_provenance(): """Create an esmvaltool run activity.""" provenance = ProvDocument() - namespace = 'software' - create_namespace(provenance, namespace) - attributes = {} # TODO: add dependencies with versions here + for namespace in ('software', 'attribute'): + create_namespace(provenance, namespace) + + # TODO: add dependencies with versions here + attributes_value = replace_tags('references', ESMVALTOOL_PAPER_TAG) + attributes = {'attribute:references': attributes_value} activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 6e4d2c18fd..102db2bfee 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -506,7 +506,6 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() - # self._write_citation_file() return [self.output_dir] raise DiagnosticError( From 65ee713c42b7947a6731f8d7ecc3a94e724ddd6a Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 17:33:12 +0100 Subject: [PATCH 008/117] update the tag --- esmvalcore/_provenance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index d966fdf390..72ed9ce9ce 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -16,7 +16,7 @@ ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' # it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = ['righi19gmd'] +ESMVALTOOL_PAPER_TAG = ['righi19gmdd'] def update_without_duplicating(bundle, other): From 36d7e363dd8a9af5a59ea6cb9426e93cd9bc7c4d Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 17 Dec 2019 17:36:41 +0100 Subject: [PATCH 009/117] Add a method to save citation information --- esmvalcore/_task.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 28074b5163..c8c1841e2d 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -11,6 +11,7 @@ import time from copy import deepcopy from multiprocessing import Pool +from pybtex.database import BibliographyData, Entry import psutil import yaml @@ -582,6 +583,33 @@ def _collect_provenance(self): self.name, time.time() - start) + + def _write_citation_file(self): + """Write citation information provided from the recorded provenance.""" + citation_file = os.path.join(self.settings['run_dir'], + 'diagnostic_citation.bibtex') + + # papers describing the diagnostic and recipe + bib_entry = BibliographyData({ + 'article-minimal': Entry('article', [ + ('author', ''), + ('title', ''), + ('journal', ""), + ('year', ''),]), + }) + + # model data citation information + + # observational data citation information + + # esmvaltool and other scientific software citation + + # scientific compute cluster citation information, if applicable + + # save the file + bib_entry.to_string(citation_file, 'bibtex') + + def __str__(self): """Get human readable description.""" txt = "{}:\nscript: {}\n{}\nsettings:\n{}\n".format( From 5f70c732ea42eb59ea7b278dbc7d7c1a3731118e Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 15 Jan 2020 17:33:38 +0100 Subject: [PATCH 010/117] Fixing the function write_citation_file --- esmvalcore/_task.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index c8c1841e2d..17fa1c8d90 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -16,7 +16,7 @@ import psutil import yaml -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -515,6 +515,7 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() + self._write_citation_file() return [self.output_dir] raise DiagnosticError( @@ -585,18 +586,27 @@ def _collect_provenance(self): def _write_citation_file(self): - """Write citation information provided from the recorded provenance.""" - citation_file = os.path.join(self.settings['run_dir'], - 'diagnostic_citation.bibtex') + """Write citation information provided by the recorded provenance.""" + provenance_file = os.path.join(self.settings['run_dir'], + 'diagnostic_provenance.yml') + with open(provenance_file, 'r') as file: + table = yaml.safe_load(file) + section = 'references' + reference_dict = {} + for filename, attributes in table.items(): + for tag in attributes[section]: + reference_dict[tag] = get_tag_value(section, tag) # papers describing the diagnostic and recipe - bib_entry = BibliographyData({ - 'article-minimal': Entry('article', [ - ('author', ''), - ('title', ''), - ('journal', ""), - ('year', ''),]), - }) + bib_entry = {} + bib_fields = ['author', 'journal', 'volume', 'pages', 'doi', 'year'] + for key in reference_dict: + reference = list(reference_dict[key].split(",")) + # "[Last name] et al., [journal abbr.], [volume], [pages], doi:[doi], [year]. + bib_entry.update({ + key: Entry('article', list(zip(bib_fields, reference ))), + }) + bib_data = BibliographyData(bib_entry) # model data citation information @@ -607,7 +617,9 @@ def _write_citation_file(self): # scientific compute cluster citation information, if applicable # save the file - bib_entry.to_string(citation_file, 'bibtex') + citation_file = os.path.join(self.settings['run_dir'], + 'diagnostic_citation.bibtex') + bib_data.to_string(citation_file, 'bibtex') def __str__(self): From aff593d4b2f767958f1a59687b7122794a749892 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 21 Jan 2020 17:53:36 +0100 Subject: [PATCH 011/117] Fix the function write_citation_file --- esmvalcore/_config.py | 11 ++++++++++ esmvalcore/_task.py | 48 +++++++++++++------------------------------ 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index c8e08381db..06453ac572 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -26,6 +26,17 @@ def find_diagnostics(): DIAGNOSTICS_PATH = find_diagnostics() +def find_references(): + """Try to find bibtex files in references folder.""" + try: + import esmvaltool + except ImportError: + return '' + return os.path.join(os.path.dirname(esmvaltool.__file__), 'references') + + +REFERENCES_PATH = find_references() + def read_config_user_file(config_file, recipe_name): """Read config user file and store settings in a dictionary.""" diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 17fa1c8d90..340c4ace4d 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -12,11 +12,13 @@ from copy import deepcopy from multiprocessing import Pool from pybtex.database import BibliographyData, Entry +import doi2bib.crossref as ref import psutil import yaml +import prov -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -515,7 +517,7 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() - self._write_citation_file() + # self._write_citation_file() return [self.output_dir] raise DiagnosticError( @@ -579,47 +581,25 @@ def _collect_provenance(self): product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) product.save_provenance() + self._write_citation_file(product) self.products.add(product) logger.debug("Collecting provenance of task %s took %.1f seconds", self.name, time.time() - start) - def _write_citation_file(self): + def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - provenance_file = os.path.join(self.settings['run_dir'], - 'diagnostic_provenance.yml') - with open(provenance_file, 'r') as file: - table = yaml.safe_load(file) - section = 'references' - reference_dict = {} - for filename, attributes in table.items(): - for tag in attributes[section]: - reference_dict[tag] = get_tag_value(section, tag) - - # papers describing the diagnostic and recipe - bib_entry = {} - bib_fields = ['author', 'journal', 'volume', 'pages', 'doi', 'year'] - for key in reference_dict: - reference = list(reference_dict[key].split(",")) - # "[Last name] et al., [journal abbr.], [volume], [pages], doi:[doi], [year]. - bib_entry.update({ - key: Entry('article', list(zip(bib_fields, reference ))), - }) - bib_data = BibliographyData(bib_entry) - - # model data citation information - - # observational data citation information - - # esmvaltool and other scientific software citation + bib_data = {v:k for k, v in TAGS['references'].items()} - # scientific compute cluster citation information, if applicable + for item in product.provenance.records: + for key, value in item.attributes: + # if key.namespace.prefix == 'attribute' and key.localpart in {'reference', 'references'}: + if key.namespace.prefix == 'attribute' and key.localpart in {'reference'}: + tag = bib_data[value] - # save the file - citation_file = os.path.join(self.settings['run_dir'], - 'diagnostic_citation.bibtex') - bib_data.to_string(citation_file, 'bibtex') + # print(REFERENCES_PATH) + # citation_file = Path(product.filename) + '_citation.bibtex' def __str__(self): From 34062150b6ab3d2094f7782814481a947ae0f911 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 22 Jan 2020 16:25:29 +0100 Subject: [PATCH 012/117] fix the function _write_citation_file --- esmvalcore/_config.py | 1 + esmvalcore/_task.py | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 06453ac572..4c94b80ef7 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -26,6 +26,7 @@ def find_diagnostics(): DIAGNOSTICS_PATH = find_diagnostics() + def find_references(): """Try to find bibtex files in references folder.""" try: diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 340c4ace4d..ce5bdfc29b 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -11,14 +11,11 @@ import time from copy import deepcopy from multiprocessing import Pool -from pybtex.database import BibliographyData, Entry -import doi2bib.crossref as ref import psutil import yaml -import prov -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value, REFERENCES_PATH +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -587,20 +584,35 @@ def _collect_provenance(self): self.name, time.time() - start) - def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - bib_data = {v:k for k, v in TAGS['references'].items()} - + reference_tag = {v: k for k, v in TAGS['references'].items()} + # collect info from provenance + product_entry = [] for item in product.provenance.records: for key, value in item.attributes: - # if key.namespace.prefix == 'attribute' and key.localpart in {'reference', 'references'}: - if key.namespace.prefix == 'attribute' and key.localpart in {'reference'}: - tag = bib_data[value] - - # print(REFERENCES_PATH) - # citation_file = Path(product.filename) + '_citation.bibtex' - + if (key.namespace.prefix == 'attribute' + and key.localpart in {'reference', 'references'}): + product_entry.append(value) + + # map between reference tags and entries + product_tag = [] + for key in reference_tag.keys(): + for entry in product_entry: + if key in entry and reference_tag[key] not in product_tag: + product_tag.append(reference_tag[key]) + + # save all citation info into one bibtex file + bibtex_entry = '' + for tags in product_tag: + bib_file_path = os.path.join(REFERENCES_PATH, tags + '.bibtex') + if os.path.isfile(bib_file_path): + with open(bib_file_path, 'r') as file: + bibtex_entry += '{}\n'.format(file.read()) + citation_file = (os.path.splitext(product.filename)[0] + + '_citation.bibtex') + with open(citation_file, 'w') as file: + file.write(bibtex_entry) def __str__(self): """Get human readable description.""" From 05f4ce8760fe0a6244c63318c0034a1b5f07b63f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 22 Jan 2020 17:26:14 +0100 Subject: [PATCH 013/117] style --- esmvalcore/_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index ce5bdfc29b..4dbcbf266e 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -586,7 +586,6 @@ def _collect_provenance(self): def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - reference_tag = {v: k for k, v in TAGS['references'].items()} # collect info from provenance product_entry = [] for item in product.provenance.records: @@ -596,6 +595,7 @@ def _write_citation_file(self, product): product_entry.append(value) # map between reference tags and entries + reference_tag = {v: k for k, v in TAGS['references'].items()} product_tag = [] for key in reference_tag.keys(): for entry in product_entry: From 585cc92566946853e6a578b4b19a050524dd7374 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 09:55:06 +0100 Subject: [PATCH 014/117] refactor and style --- esmvalcore/_task.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 4dbcbf266e..0faa70c3a4 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -594,7 +594,7 @@ def _write_citation_file(self, product): and key.localpart in {'reference', 'references'}): product_entry.append(value) - # map between reference tags and entries + # map between reference.tags and product.entries reference_tag = {v: k for k, v in TAGS['references'].items()} product_tag = [] for key in reference_tag.keys(): @@ -605,10 +605,14 @@ def _write_citation_file(self, product): # save all citation info into one bibtex file bibtex_entry = '' for tags in product_tag: - bib_file_path = os.path.join(REFERENCES_PATH, tags + '.bibtex') - if os.path.isfile(bib_file_path): - with open(bib_file_path, 'r') as file: + bibtex_file = os.path.join(REFERENCES_PATH, tags + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: bibtex_entry += '{}\n'.format(file.read()) + else: + raise DiagnosticError( + "The reference file ({}): does not exist.".format( + bibtex_file)) citation_file = (os.path.splitext(product.filename)[0] + '_citation.bibtex') with open(citation_file, 'w') as file: From 46f7b458effa885d4d0968b7dcad340b65567b79 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 16:31:04 +0100 Subject: [PATCH 015/117] Add esmvaltool paper to the provenance, and style --- esmvalcore/_provenance.py | 12 +++++++++--- esmvalcore/_task.py | 1 - 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index d0c5352e2b..d966fdf390 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -8,12 +8,15 @@ from PIL.PngImagePlugin import PngInfo from prov.dot import prov_to_dot from prov.model import ProvDocument +from ._config import replace_tags from ._version import __version__ logger = logging.getLogger(__name__) ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' +# it is the technical overview and should always be cited +ESMVALTOOL_PAPER_TAG = ['righi19gmd'] def update_without_duplicating(bundle, other): @@ -31,9 +34,12 @@ def create_namespace(provenance, namespace): def get_esmvaltool_provenance(): """Create an esmvaltool run activity.""" provenance = ProvDocument() - namespace = 'software' - create_namespace(provenance, namespace) - attributes = {} # TODO: add dependencies with versions here + for namespace in ('software', 'attribute'): + create_namespace(provenance, namespace) + + # TODO: add dependencies with versions here + attributes_value = replace_tags('references', ESMVALTOOL_PAPER_TAG) + attributes = {'attribute:references': attributes_value} activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 0faa70c3a4..c50a212c6a 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -514,7 +514,6 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() - # self._write_citation_file() return [self.output_dir] raise DiagnosticError( From 2ea1c98e640e8c13a4fcbe68aeadd8a940305ca8 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 17:33:12 +0100 Subject: [PATCH 016/117] update the tag --- esmvalcore/_provenance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index d966fdf390..72ed9ce9ce 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -16,7 +16,7 @@ ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' # it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = ['righi19gmd'] +ESMVALTOOL_PAPER_TAG = ['righi19gmdd'] def update_without_duplicating(bundle, other): From 638d08ce97e05c7fb45c1e61dc638c8685ae416d Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 27 Jan 2020 15:37:35 +0100 Subject: [PATCH 017/117] change the method to a function --- esmvalcore/_task.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index c50a212c6a..737085dcdd 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -554,6 +554,7 @@ def _collect_provenance(self): attrs = { 'script_file': self.script, } + for key in self.settings: if key not in ignore: attrs[key] = self.settings[key] @@ -583,7 +584,8 @@ def _collect_provenance(self): self.name, time.time() - start) - def _write_citation_file(self, product): + @staticmethod + def _write_citation_file(product): """Write citation information provided by the recorded provenance.""" # collect info from provenance product_entry = [] From f61be822e5e025bdb55d874891512f899ac80556 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 27 Jan 2020 15:40:39 +0100 Subject: [PATCH 018/117] fix the function get_esmvaltool_porvenance --- esmvalcore/_provenance.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index 72ed9ce9ce..e813f9162a 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -8,15 +8,15 @@ from PIL.PngImagePlugin import PngInfo from prov.dot import prov_to_dot from prov.model import ProvDocument -from ._config import replace_tags from ._version import __version__ +from ._config import replace_tags, TAGS logger = logging.getLogger(__name__) ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' # it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = ['righi19gmdd'] +ESMVALTOOL_PAPER_TAG = 'righi19gmdd' def update_without_duplicating(bundle, other): @@ -38,7 +38,11 @@ def get_esmvaltool_provenance(): create_namespace(provenance, namespace) # TODO: add dependencies with versions here - attributes_value = replace_tags('references', ESMVALTOOL_PAPER_TAG) + section = 'references' + if section in TAGS and ESMVALTOOL_PAPER_TAG in TAGS[section]: + attributes_value = replace_tags(section, [ESMVALTOOL_PAPER_TAG]) + else: + attributes_value = ESMVALTOOL_PAPER_TAG attributes = {'attribute:references': attributes_value} activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) From 21f09d23b80d4ee4961358927b9350c2895b118a Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 28 Jan 2020 17:40:45 +0100 Subject: [PATCH 019/117] fix the if-else condition --- esmvalcore/_task.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 737085dcdd..44349e18f0 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -611,9 +611,8 @@ def _write_citation_file(product): with open(bibtex_file, 'r') as file: bibtex_entry += '{}\n'.format(file.read()) else: - raise DiagnosticError( - "The reference file ({}): does not exist.".format( - bibtex_file)) + logger.info('The reference file %s does not exist.', + bibtex_file) citation_file = (os.path.splitext(product.filename)[0] + '_citation.bibtex') with open(citation_file, 'w') as file: From eb50f0b5690b280fa90d16b09331bb2894db22bf Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 30 Jan 2020 17:50:50 +0100 Subject: [PATCH 020/117] Add CMIP citation info, and refactor --- esmvalcore/_task.py | 162 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 137 insertions(+), 25 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 44349e18f0..6a88137c71 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -12,8 +12,11 @@ from copy import deepcopy from multiprocessing import Pool +import urllib +import json import psutil import yaml +from pybtex.database import BibliographyData, Entry from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance @@ -24,6 +27,9 @@ 'mip', } +CMIP6_CITATION_URL = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch/' \ + 'cmip6?input=CMIP6.CMIP.' + def which(executable): """Find executable in PATH.""" @@ -588,35 +594,48 @@ def _collect_provenance(self): def _write_citation_file(product): """Write citation information provided by the recorded provenance.""" # collect info from provenance - product_entry = [] + citation = { + 'reference': [], + 'info_url': [], + 'tag': [], + 'file': [], + 'entry': '', + 'url': '' + } + citation['file'] = [ + os.path.splitext(product.filename)[0] + '_data_citation_url.txt', + os.path.splitext(product.filename)[0] + '_data_citation.bibtex', + ] for item in product.provenance.records: for key, value in item.attributes: if (key.namespace.prefix == 'attribute' and key.localpart in {'reference', 'references'}): - product_entry.append(value) - - # map between reference.tags and product.entries - reference_tag = {v: k for k, v in TAGS['references'].items()} - product_tag = [] - for key in reference_tag.keys(): - for entry in product_entry: - if key in entry and reference_tag[key] not in product_tag: - product_tag.append(reference_tag[key]) - - # save all citation info into one bibtex file - bibtex_entry = '' - for tags in product_tag: - bibtex_file = os.path.join(REFERENCES_PATH, tags + '.bibtex') - if os.path.isfile(bibtex_file): - with open(bibtex_file, 'r') as file: - bibtex_entry += '{}\n'.format(file.read()) - else: - logger.info('The reference file %s does not exist.', - bibtex_file) - citation_file = (os.path.splitext(product.filename)[0] - + '_citation.bibtex') - with open(citation_file, 'w') as file: - file.write(bibtex_entry) + citation['reference'].append(value) + if (key.namespace.prefix == 'attribute' + and key.localpart == 'further_info_url'): + citation['info_url'].append('.'.join( + (value.split(".org/")[1]).split(".")[1:4] + )) + + # collect CMIP6 citation, if any + if citation['info_url']: + citation['entry'], citation['url'] = _collect_cmip_citation( + citation['info_url'] + ) + + if citation['url']: + with open(citation['file'][0], 'w') as file: + file.write(citation['url']) + + # map between reference.entry and product.entry + citation['tag'] = _replace_entry(TAGS['references'], + citation['reference']) + + # collect all citation info into one bibtex file + citation['entry'] += _collect_bibtex_citation(citation['tag']) + if citation['entry']: + with open(citation['file'][1], 'w') as file: + file.write(citation['entry']) def __str__(self): """Get human readable description.""" @@ -629,6 +648,99 @@ def __str__(self): return txt +def _get_response(url): + """Return information from CMIP6 Data Citation service in json format.""" + json_data = False + try: + open_url = urllib.request.urlopen(url) + if open_url.getcode() == 200: + data = open_url.read() + json_data = json.loads(data) + else: + logger.info('Error in the CMIP citation link %s', + url) + except IOError: + logger.info('Error in receiving the CMIP citation file %s', + url) + return json_data + + +def _json_to_bibtex(data): + """Make a bibtex entry from CMIP6 Data Citation json format.""" + url = ''.join(['https://doi.org/', data['identifier']['id']]) + author_list = [] + for item in data['creators']: + author_list.append(item['creatorName']) + bib_entry = {url: Entry('misc', [ + ('url', url), + ('title', data['titles'][0]), + ('publisher', data['publisher']), + ('year', data['publicationYear']), + ('author', ' and '.join(author_list)), + ('doi', data['identifier']['id']), + ])} + bib_data = BibliographyData(bib_entry).to_string("bibtex") + return bib_data + + +def _cmip_citation(json_url): + """Get citation information from CMIP6 Data Citation Service.""" + entry = False + json_data = _get_response(json_url) + if json_data: + entry = _json_to_bibtex(json_data) + else: + logger.info('Writing the CMIP citation link %s', + json_url) + return entry + + +def _replace_entry(tags_entry, product_entry): + """Map between the entries in provenance and the entries + in config-references.yml and return tags""" + entry_tags = {v: k for k, v in tags_entry.items()} + tags = [] + for key in entry_tags.keys(): + for entry in product_entry: + if key in entry and entry_tags[key] not in tags: + tags.append(entry_tags[key]) + return tags + + +def _collect_bibtex_citation(citation_tags): + """Collect citation informtion from reference folder that + contains bibtex files""" + citation_entry = '' + for tag in citation_tags: + bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: + citation_entry += '{}\n'.format(file.read()) + else: + logger.info('The reference file %s does not exist.', + bibtex_file) + return citation_entry + + +def _collect_cmip_citation(info_url): + split_str = 'cmip6?input=CMIP6.CMIP.' + citation_entry = '' + citation_url = '' + for info in info_url: + json_url = ''.join( + [CMIP6_CITATION_URL.split(split_str)[0], + 'cerarest/export', split_str, info] + ) + entry = _cmip_citation(json_url) + if entry: + citation_entry += '{}\n'.format(entry) + else: + citation_url += '{}\n'.format( + ''.join([CMIP6_CITATION_URL, info]) + ) + return citation_entry, citation_url + + def get_flattened_tasks(tasks): """Return a set of all tasks and their ancestors in `tasks`.""" return set(t for task in tasks for t in task.flatten()) From c319a9efe2adb10d8791279ac6846d0f52c68a8f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 11:09:02 +0100 Subject: [PATCH 021/117] remove pybtex, fix _json_to_bibtex function --- esmvalcore/_task.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 6a88137c71..2e64e2010c 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -16,7 +16,6 @@ import json import psutil import yaml -from pybtex.database import BibliographyData, Entry from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance @@ -666,21 +665,20 @@ def _get_response(url): def _json_to_bibtex(data): - """Make a bibtex entry from CMIP6 Data Citation json format.""" + """Make a bibtex entry from CMIP6 Data Citation json data.""" url = ''.join(['https://doi.org/', data['identifier']['id']]) author_list = [] for item in data['creators']: author_list.append(item['creatorName']) - bib_entry = {url: Entry('misc', [ - ('url', url), - ('title', data['titles'][0]), - ('publisher', data['publisher']), - ('year', data['publicationYear']), - ('author', ' and '.join(author_list)), - ('doi', data['identifier']['id']), - ])} - bib_data = BibliographyData(bib_entry).to_string("bibtex") - return bib_data + bibtex_entry = ('@misc{'+ url + ',\n\t'\ + 'url = {' + url + '},\n\t'\ + 'title = {' + data['titles'][0] + '},\n\t'\ + 'publisher = {' + data['publisher'] + '},\n\t'\ + 'year = '+ data['publicationYear'] + ',\n\t'\ + 'author = {' + ' and '.join(author_list) + '},\n\t'\ + 'doi = {' + data['identifier']['id'] + '},\n'\ + '}') + return bibtex_entry def _cmip_citation(json_url): From 227460b8aa807d5f215d591a3c0126f88f4e53a9 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 13:41:20 +0100 Subject: [PATCH 022/117] Refactor and style --- esmvalcore/_task.py | 89 +++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 52 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 2e64e2010c..4a751e879c 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -603,7 +603,7 @@ def _write_citation_file(product): } citation['file'] = [ os.path.splitext(product.filename)[0] + '_data_citation_url.txt', - os.path.splitext(product.filename)[0] + '_data_citation.bibtex', + os.path.splitext(product.filename)[0] + '_citation.bibtex', ] for item in product.provenance.records: for key, value in item.attributes: @@ -619,19 +619,18 @@ def _write_citation_file(product): # collect CMIP6 citation, if any if citation['info_url']: citation['entry'], citation['url'] = _collect_cmip_citation( - citation['info_url'] - ) - + citation['info_url']) if citation['url']: with open(citation['file'][0], 'w') as file: file.write(citation['url']) - # map between reference.entry and product.entry - citation['tag'] = _replace_entry(TAGS['references'], - citation['reference']) + # collect recipe citation, if any + if citation['reference']: + citation['tag'] = _replace_entry(citation['reference']) + citation['entry'] += '{}\n'.format( + _collect_bibtex_citation(citation['tag'])) - # collect all citation info into one bibtex file - citation['entry'] += _collect_bibtex_citation(citation['tag']) + # write one bibtex file if citation['entry']: with open(citation['file'][1], 'w') as file: file.write(citation['entry']) @@ -670,33 +669,20 @@ def _json_to_bibtex(data): author_list = [] for item in data['creators']: author_list.append(item['creatorName']) - bibtex_entry = ('@misc{'+ url + ',\n\t'\ - 'url = {' + url + '},\n\t'\ - 'title = {' + data['titles'][0] + '},\n\t'\ - 'publisher = {' + data['publisher'] + '},\n\t'\ - 'year = '+ data['publicationYear'] + ',\n\t'\ - 'author = {' + ' and '.join(author_list) + '},\n\t'\ - 'doi = {' + data['identifier']['id'] + '},\n'\ + bibtex_entry = ('@misc{' + url + ',\n\t' + 'url = {' + url + '},\n\t' + 'title = {' + data['titles'][0] + '},\n\t' + 'publisher = {' + data['publisher'] + '},\n\t' + 'year = ' + data['publicationYear'] + ',\n\t' + 'author = {' + ' and '.join(author_list) + '},\n\t' + 'doi = {' + data['identifier']['id'] + '},\n' '}') return bibtex_entry -def _cmip_citation(json_url): - """Get citation information from CMIP6 Data Citation Service.""" - entry = False - json_data = _get_response(json_url) - if json_data: - entry = _json_to_bibtex(json_data) - else: - logger.info('Writing the CMIP citation link %s', - json_url) - return entry - - -def _replace_entry(tags_entry, product_entry): - """Map between the entries in provenance and the entries - in config-references.yml and return tags""" - entry_tags = {v: k for k, v in tags_entry.items()} +def _replace_entry(product_entry): + """Find tags of the references in provenance""" + entry_tags = {v: k for k, v in TAGS['references'].items()} tags = [] for key in entry_tags.keys(): for entry in product_entry: @@ -705,38 +691,37 @@ def _replace_entry(tags_entry, product_entry): return tags -def _collect_bibtex_citation(citation_tags): - """Collect citation informtion from reference folder that - contains bibtex files""" - citation_entry = '' - for tag in citation_tags: +def _collect_bibtex_citation(tags): + """Collect informtion from bibtex files""" + entry = '' + for tag in tags: bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') if os.path.isfile(bibtex_file): with open(bibtex_file, 'r') as file: - citation_entry += '{}\n'.format(file.read()) + entry += '{}\n'.format(file.read()) else: logger.info('The reference file %s does not exist.', bibtex_file) - return citation_entry + return entry def _collect_cmip_citation(info_url): + """Collect information from CMIP6 Data Citation Service.""" split_str = 'cmip6?input=CMIP6.CMIP.' - citation_entry = '' - citation_url = '' + url = ''.join([CMIP6_CITATION_URL.split(split_str)[0], + 'cerarest/export', split_str]) + entry = '' + link = '' for info in info_url: - json_url = ''.join( - [CMIP6_CITATION_URL.split(split_str)[0], - 'cerarest/export', split_str, info] - ) - entry = _cmip_citation(json_url) - if entry: - citation_entry += '{}\n'.format(entry) + json_url = ''.join([url, info]) # make the json url + json_data = _get_response(json_url) + if json_data: + entry += '{}\n'.format(_json_to_bibtex(json_data)) else: - citation_url += '{}\n'.format( - ''.join([CMIP6_CITATION_URL, info]) - ) - return citation_entry, citation_url + logger.info('Writing the CMIP citation link %s', + json_url) + link += '{}\n'.format(''.join([CMIP6_CITATION_URL, info])) + return entry, link def get_flattened_tasks(tasks): From 6cccf255bdd8631f458c109f28cdcf2ca49f437d Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 14:02:39 +0100 Subject: [PATCH 023/117] Refactor and style --- esmvalcore/_task.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 4a751e879c..9d53ffff74 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -593,18 +593,17 @@ def _collect_provenance(self): def _write_citation_file(product): """Write citation information provided by the recorded provenance.""" # collect info from provenance + file_name = os.path.splitext(product.filename)[0] citation = { 'reference': [], 'info_url': [], 'tag': [], - 'file': [], 'entry': '', - 'url': '' - } - citation['file'] = [ - os.path.splitext(product.filename)[0] + '_data_citation_url.txt', - os.path.splitext(product.filename)[0] + '_citation.bibtex', - ] + 'url': '', + 'file': [ + file_name + '_data_citation_url.txt', + file_name + '_citation.bibtex', + ]} for item in product.provenance.records: for key, value in item.attributes: if (key.namespace.prefix == 'attribute' @@ -612,9 +611,7 @@ def _write_citation_file(product): citation['reference'].append(value) if (key.namespace.prefix == 'attribute' and key.localpart == 'further_info_url'): - citation['info_url'].append('.'.join( - (value.split(".org/")[1]).split(".")[1:4] - )) + citation['info_url'].append(value) # collect CMIP6 citation, if any if citation['info_url']: @@ -708,19 +705,20 @@ def _collect_bibtex_citation(tags): def _collect_cmip_citation(info_url): """Collect information from CMIP6 Data Citation Service.""" split_str = 'cmip6?input=CMIP6.CMIP.' - url = ''.join([CMIP6_CITATION_URL.split(split_str)[0], - 'cerarest/export', split_str]) + url_stem = ''.join([CMIP6_CITATION_URL.split(split_str)[0], + 'cerarest/export', split_str]) entry = '' link = '' - for info in info_url: - json_url = ''.join([url, info]) # make the json url + for data_url in info_url: + data_info = '.'.join((data_url.split(".org/")[1]).split(".")[1:4]) + json_url = ''.join([url_stem, data_info]) # make the json url json_data = _get_response(json_url) if json_data: entry += '{}\n'.format(_json_to_bibtex(json_data)) else: logger.info('Writing the CMIP citation link %s', json_url) - link += '{}\n'.format(''.join([CMIP6_CITATION_URL, info])) + link += '{}\n'.format(''.join([CMIP6_CITATION_URL, data_info])) return entry, link From c4827575e5143c6dea07232fef98e163b32c3645 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 14:17:36 +0100 Subject: [PATCH 024/117] fix open_url --- esmvalcore/_task.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 9d53ffff74..5ee6f2a7e8 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -646,17 +646,18 @@ def __str__(self): def _get_response(url): """Return information from CMIP6 Data Citation service in json format.""" json_data = False - try: - open_url = urllib.request.urlopen(url) - if open_url.getcode() == 200: - data = open_url.read() - json_data = json.loads(data) - else: - logger.info('Error in the CMIP citation link %s', + if url.lower().startswith('https'): + try: + open_url = urllib.request.urlopen(url) + if open_url.getcode() == 200: + data = open_url.read() + json_data = json.loads(data) + else: + logger.info('Error in the CMIP citation link %s', + url) + except IOError: + logger.info('Error in receiving the CMIP citation file %s', url) - except IOError: - logger.info('Error in receiving the CMIP citation file %s', - url) return json_data From 824f869f211efd998071cf1a34184880e7f9152b Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 15:23:01 +0100 Subject: [PATCH 025/117] fix the _get_response function --- esmvalcore/_task.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 5ee6f2a7e8..9da3fdbc67 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -12,8 +12,7 @@ from copy import deepcopy from multiprocessing import Pool -import urllib -import json +import requests import psutil import yaml @@ -648,10 +647,9 @@ def _get_response(url): json_data = False if url.lower().startswith('https'): try: - open_url = urllib.request.urlopen(url) - if open_url.getcode() == 200: - data = open_url.read() - json_data = json.loads(data) + response = requests.get(url) + if response.status_code == 200: + json_data = response.json() else: logger.info('Error in the CMIP citation link %s', url) From 8cc7babcaf2cf1263a4f1dcc6f224b18ce0be5c0 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 16:24:31 +0100 Subject: [PATCH 026/117] add documentation --- esmvalcore/_task.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 9da3fdbc67..186f9b5b24 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -590,7 +590,14 @@ def _collect_provenance(self): @staticmethod def _write_citation_file(product): - """Write citation information provided by the recorded provenance.""" + """ + Write citation information provided by the recorded provenance. + Recipe and cmip6 data references are saved into one bibtex file. + cmip6 data references are provided by CMIP6 data citation service. + each cmip6 data reference has a json link. In the case of internet + connection, cmip6 data references are saved into a bibtex file. + Otherwise, cmip6 data reference links are saved into a text file. + """ # collect info from provenance file_name = os.path.splitext(product.filename)[0] citation = { From 96a5e850a58f92df6645f7edb85f74c1a9001720 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 17:00:08 +0100 Subject: [PATCH 027/117] Style --- esmvalcore/_task.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 186f9b5b24..1b03ecd720 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -658,11 +658,9 @@ def _get_response(url): if response.status_code == 200: json_data = response.json() else: - logger.info('Error in the CMIP citation link %s', - url) + logger.info('Error in the CMIP json link') except IOError: - logger.info('Error in receiving the CMIP citation file %s', - url) + logger.info('Error in receiving the CMIP json file') return json_data @@ -686,12 +684,12 @@ def _json_to_bibtex(data): def _replace_entry(product_entry): """Find tags of the references in provenance""" entry_tags = {v: k for k, v in TAGS['references'].items()} - tags = [] + tag_list = [] for key in entry_tags.keys(): for entry in product_entry: - if key in entry and entry_tags[key] not in tags: - tags.append(entry_tags[key]) - return tags + if key in entry and entry_tags[key] not in tag_list: + tag_list.append(entry_tags[key]) + return tag_list def _collect_bibtex_citation(tags): @@ -713,19 +711,18 @@ def _collect_cmip_citation(info_url): split_str = 'cmip6?input=CMIP6.CMIP.' url_stem = ''.join([CMIP6_CITATION_URL.split(split_str)[0], 'cerarest/export', split_str]) - entry = '' - link = '' + citation_entry = '' + citation_link = '' for data_url in info_url: data_info = '.'.join((data_url.split(".org/")[1]).split(".")[1:4]) - json_url = ''.join([url_stem, data_info]) # make the json url - json_data = _get_response(json_url) + json_data = _get_response(''.join([url_stem, data_info])) if json_data: - entry += '{}\n'.format(_json_to_bibtex(json_data)) + citation_entry += '{}\n'.format(_json_to_bibtex(json_data)) else: - logger.info('Writing the CMIP citation link %s', - json_url) - link += '{}\n'.format(''.join([CMIP6_CITATION_URL, data_info])) - return entry, link + citation_link += '{}\n'.format(''.join( + [CMIP6_CITATION_URL, data_info])) + logger.info('Returning the CMIP citation link for %s', data_info) + return citation_entry, citation_link def get_flattened_tasks(tasks): From c1214a4fcfcafd2ccc17e0a90bdc70e3c2f63d03 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 10 Feb 2020 17:41:26 +0100 Subject: [PATCH 028/117] Refactor and style --- esmvalcore/_task.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 1b03ecd720..b4ce18c988 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -612,12 +612,11 @@ def _write_citation_file(product): ]} for item in product.provenance.records: for key, value in item.attributes: - if (key.namespace.prefix == 'attribute' - and key.localpart in {'reference', 'references'}): - citation['reference'].append(value) - if (key.namespace.prefix == 'attribute' - and key.localpart == 'further_info_url'): - citation['info_url'].append(value) + if key.namespace.prefix == 'attribute': + if key.localpart in {'reference', 'references'}: + citation['reference'].append(value) + elif key.localpart == 'further_info_url': + citation['info_url'].append(value) # collect CMIP6 citation, if any if citation['info_url']: @@ -682,7 +681,7 @@ def _json_to_bibtex(data): def _replace_entry(product_entry): - """Find tags of the references in provenance""" + """Find tags of the references in provenance.""" entry_tags = {v: k for k, v in TAGS['references'].items()} tag_list = [] for key in entry_tags.keys(): @@ -693,7 +692,7 @@ def _replace_entry(product_entry): def _collect_bibtex_citation(tags): - """Collect informtion from bibtex files""" + """Collect information from bibtex files.""" entry = '' for tag in tags: bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') From b0db9ec849e0a114bab79710bc7c2bac11dcaa62 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 11 Feb 2020 13:36:04 +0100 Subject: [PATCH 029/117] add a test checking if jason data includes bibtex keys --- esmvalcore/_task.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index b4ce18c988..1c316dbd9d 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -662,6 +662,15 @@ def _get_response(url): logger.info('Error in receiving the CMIP json file') return json_data +def _valid_json_data(data): + valid_data = False + keys = ['identifier', 'creators', 'titles', 'publisher', 'publicationYear'] + if all(key in data for key in keys): + check_names = all('creatorName' in item for item in data['creators']) + if 'id' in data['identifier'] and check_names: + valid_data = True + return valid_data + def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" @@ -715,7 +724,7 @@ def _collect_cmip_citation(info_url): for data_url in info_url: data_info = '.'.join((data_url.split(".org/")[1]).split(".")[1:4]) json_data = _get_response(''.join([url_stem, data_info])) - if json_data: + if json_data and _valid_json_data(json_data): citation_entry += '{}\n'.format(_json_to_bibtex(json_data)) else: citation_link += '{}\n'.format(''.join( From cf54ae0aeb07282795c33a09549aa283dd95d476 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 11 Feb 2020 13:50:10 +0100 Subject: [PATCH 030/117] style --- esmvalcore/_task.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 1c316dbd9d..caed46a1c4 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -601,8 +601,8 @@ def _write_citation_file(product): # collect info from provenance file_name = os.path.splitext(product.filename)[0] citation = { - 'reference': [], - 'info_url': [], + 'references': [], + 'info_urls': [], 'tag': [], 'entry': '', 'url': '', @@ -614,21 +614,21 @@ def _write_citation_file(product): for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart in {'reference', 'references'}: - citation['reference'].append(value) - elif key.localpart == 'further_info_url': - citation['info_url'].append(value) + citation['references'].append(value) + elif key.localpart == 'further_info_url': + citation['info_urls'].append(value) # collect CMIP6 citation, if any - if citation['info_url']: + if citation['info_urls']: citation['entry'], citation['url'] = _collect_cmip_citation( - citation['info_url']) + citation['info_urls']) if citation['url']: with open(citation['file'][0], 'w') as file: file.write(citation['url']) # collect recipe citation, if any - if citation['reference']: - citation['tag'] = _replace_entry(citation['reference']) + if citation['references']: + citation['tag'] = _replace_entry(citation['references']) citation['entry'] += '{}\n'.format( _collect_bibtex_citation(citation['tag'])) @@ -662,6 +662,7 @@ def _get_response(url): logger.info('Error in receiving the CMIP json file') return json_data + def _valid_json_data(data): valid_data = False keys = ['identifier', 'creators', 'titles', 'publisher', 'publicationYear'] From c5bcdd5b324e258f85f22da550df04f1c304f123 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 19 Feb 2020 10:03:59 +0100 Subject: [PATCH 031/117] add new module and remove functions from task --- esmvalcore/_citation.py | 165 ++++++++++++++++++++++++++++++++++++++++ esmvalcore/_task.py | 154 +++---------------------------------- 2 files changed, 175 insertions(+), 144 deletions(-) create mode 100644 esmvalcore/_citation.py diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py new file mode 100644 index 0000000000..a6348ec51b --- /dev/null +++ b/esmvalcore/_citation.py @@ -0,0 +1,165 @@ +"""Citation module.""" +import contextlib +import datetime +import errno +import logging +import numbers +import os +import pprint +import subprocess +import threading +import time +from copy import deepcopy +from multiprocessing import Pool + +import requests +import psutil +import yaml + +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH +from ._provenance import TrackedFile, get_task_provenance + +logger = logging.getLogger(__name__) + +DATASET_KEYS = { + 'mip', +} + +CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' + + +def _write_citation_file(product): + """ + Write citation information provided by the recorded provenance. + Recipe and cmip6 data references are saved into one bibtex file. + cmip6 data references are provided by CMIP6 data citation service. + each cmip6 data reference has a json link. In the case of internet + connection, cmip6 data references are saved into a bibtex file. + Otherwise, cmip6 data reference links are saved into a text file. + """ + # collect info from provenance + product_name = os.path.splitext(product.filename)[0] + product_tags = [] + product_entries = '' + product_urls = '' + citation = { + 'references': [], + 'info_urls': [], + 'tag': [], + 'entry': '', + 'url': '', + } + for item in product.provenance.records: + for key, value in item.attributes: + if key.namespace.prefix == 'attribute': + print(item.attributes[0]) + print('&&&&&&&&&&&&&&&&&&&&&&&&') + if key.localpart in {'reference', 'references'}: + product_entries += '{}\n'.format(_collect_bibtex_citation(product_tags)) + elif key.localpart == 'mip_era' and value == 'CMIP6': + json_url, info_url = _make_url(item.attributes) + cmip_entry = _collect_cmip_citation(json_url, info_url) + if cmip_entry == info_url: + product_urls += '{}\n'.format(cmip_entry) + else: + product_entries += '{}\n'.format(cmip_entry) + + # save CMIP6 url_info, if any + if product_urls: + with open(f'{product_name}_data_citation_url.txt', 'w') as file: + file.write(citation['url']) + + # write one bibtex file + if product_entries: + with open(f'{product_name}_citation.bibtex.txt', 'w') as file: + file.write(product_entries) + + +def _get_response(url): + """Return information from CMIP6 Data Citation service in json format.""" + json_data = False + if url.lower().startswith('https'): + try: + response = requests.get(url) + if response.status_code == 200: + json_data = response.json() + else: + logger.info('Error in the CMIP json link') + except IOError: + logger.info('Error in receiving the CMIP json file') + return json_data + + +def _valid_json_data(data): + valid_data = False + keys = ['identifier', 'creators', 'titles', 'publisher', 'publicationYear'] + if all(key in data for key in keys): + check_names = all('creatorName' in item for item in data['creators']) + if 'id' in data['identifier'] and check_names: + valid_data = True + return valid_data + + +def _json_to_bibtex(data): + """Make a bibtex entry from CMIP6 Data Citation json data.""" + url = ''.join(['https://doi.org/', data['identifier']['id']]) + author_list = [] + for item in data['creators']: + author_list.append(item['creatorName']) + bibtex_entry = ('@misc{' + url + ',\n\t' + 'url = {' + url + '},\n\t' + 'title = {' + data['titles'][0] + '},\n\t' + 'publisher = {' + data['publisher'] + '},\n\t' + 'year = ' + data['publicationYear'] + ',\n\t' + 'author = {' + ' and '.join(author_list) + '},\n\t' + 'doi = {' + data['identifier']['id'] + '},\n' + '}') + return bibtex_entry + + +def _replace_entry(product_entry): + """Find tags of the references in provenance.""" + entry_tags = {v: k for k, v in TAGS['references'].items()} + tag_list = [] + for key in entry_tags.keys(): + for entry in product_entry: + if key in entry and entry_tags[key] not in tag_list: + tag_list.append(entry_tags[key]) + return tag_list + + +def _collect_bibtex_citation(tags): + """Collect information from bibtex files.""" + entry = '' + for tag in tags: + bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: + entry += '{}\n'.format(file.read()) + else: + logger.info('The reference file %s does not exist.', + bibtex_file) + return entry + + +def _collect_cmip_citation(json_url, info_url): + """Collect information from CMIP6 Data Citation Service.""" + bibtex_entry = info_url + json_data = _get_response(json_url) + if json_data and _valid_json_data(json_data): + bibtex_entry = _json_to_bibtex(json_data) + else: + logger.info('Invalid json link %s', json_url) + return bibtex_entry + + +def _make_url(attribute): + mip_era = attribute.get('attribute:mip_era') + activity_id = attribute.get('attribute:activity_id') + institution_id = attribute.get('attribute:institution_id') + source_id = attribute.get('attribute:source_id') + experiment_id = attribute.get('attribute:experiment_id') + url_prefix = f'{mip_era}.{activity_id}.{institution_id}.{source_id}.{experiment_id}' + json_url = f'{CMIP6_URL_STEM}/cerarest/exportcmip6?input={url_prefix}' + info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' + return json_url, info_url diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index caed46a1c4..88b82e95ab 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -12,12 +12,12 @@ from copy import deepcopy from multiprocessing import Pool -import requests import psutil import yaml -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags from ._provenance import TrackedFile, get_task_provenance +from ._citation import _write_citation_file logger = logging.getLogger(__name__) @@ -25,9 +25,6 @@ 'mip', } -CMIP6_CITATION_URL = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch/' \ - 'cmip6?input=CMIP6.CMIP.' - def which(executable): """Find executable in PATH.""" @@ -575,68 +572,23 @@ def _collect_provenance(self): } attributes.update(deepcopy(attrs)) - for key in attributes: - if key in TAGS: - attributes[key] = replace_tags(key, attributes[key]) + section = 'references' + # for key in attributes: + # if key in TAGS: + # if key in section: + # attributes[key] = cite_tags(key, attributes[key]) + # else: + # attributes[key] = replace_tags(key, attributes[key]) product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) product.save_provenance() - self._write_citation_file(product) + _write_citation_file(product) self.products.add(product) logger.debug("Collecting provenance of task %s took %.1f seconds", self.name, time.time() - start) - @staticmethod - def _write_citation_file(product): - """ - Write citation information provided by the recorded provenance. - Recipe and cmip6 data references are saved into one bibtex file. - cmip6 data references are provided by CMIP6 data citation service. - each cmip6 data reference has a json link. In the case of internet - connection, cmip6 data references are saved into a bibtex file. - Otherwise, cmip6 data reference links are saved into a text file. - """ - # collect info from provenance - file_name = os.path.splitext(product.filename)[0] - citation = { - 'references': [], - 'info_urls': [], - 'tag': [], - 'entry': '', - 'url': '', - 'file': [ - file_name + '_data_citation_url.txt', - file_name + '_citation.bibtex', - ]} - for item in product.provenance.records: - for key, value in item.attributes: - if key.namespace.prefix == 'attribute': - if key.localpart in {'reference', 'references'}: - citation['references'].append(value) - elif key.localpart == 'further_info_url': - citation['info_urls'].append(value) - - # collect CMIP6 citation, if any - if citation['info_urls']: - citation['entry'], citation['url'] = _collect_cmip_citation( - citation['info_urls']) - if citation['url']: - with open(citation['file'][0], 'w') as file: - file.write(citation['url']) - - # collect recipe citation, if any - if citation['references']: - citation['tag'] = _replace_entry(citation['references']) - citation['entry'] += '{}\n'.format( - _collect_bibtex_citation(citation['tag'])) - - # write one bibtex file - if citation['entry']: - with open(citation['file'][1], 'w') as file: - file.write(citation['entry']) - def __str__(self): """Get human readable description.""" txt = "{}:\nscript: {}\n{}\nsettings:\n{}\n".format( @@ -648,92 +600,6 @@ def __str__(self): return txt -def _get_response(url): - """Return information from CMIP6 Data Citation service in json format.""" - json_data = False - if url.lower().startswith('https'): - try: - response = requests.get(url) - if response.status_code == 200: - json_data = response.json() - else: - logger.info('Error in the CMIP json link') - except IOError: - logger.info('Error in receiving the CMIP json file') - return json_data - - -def _valid_json_data(data): - valid_data = False - keys = ['identifier', 'creators', 'titles', 'publisher', 'publicationYear'] - if all(key in data for key in keys): - check_names = all('creatorName' in item for item in data['creators']) - if 'id' in data['identifier'] and check_names: - valid_data = True - return valid_data - - -def _json_to_bibtex(data): - """Make a bibtex entry from CMIP6 Data Citation json data.""" - url = ''.join(['https://doi.org/', data['identifier']['id']]) - author_list = [] - for item in data['creators']: - author_list.append(item['creatorName']) - bibtex_entry = ('@misc{' + url + ',\n\t' - 'url = {' + url + '},\n\t' - 'title = {' + data['titles'][0] + '},\n\t' - 'publisher = {' + data['publisher'] + '},\n\t' - 'year = ' + data['publicationYear'] + ',\n\t' - 'author = {' + ' and '.join(author_list) + '},\n\t' - 'doi = {' + data['identifier']['id'] + '},\n' - '}') - return bibtex_entry - - -def _replace_entry(product_entry): - """Find tags of the references in provenance.""" - entry_tags = {v: k for k, v in TAGS['references'].items()} - tag_list = [] - for key in entry_tags.keys(): - for entry in product_entry: - if key in entry and entry_tags[key] not in tag_list: - tag_list.append(entry_tags[key]) - return tag_list - - -def _collect_bibtex_citation(tags): - """Collect information from bibtex files.""" - entry = '' - for tag in tags: - bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') - if os.path.isfile(bibtex_file): - with open(bibtex_file, 'r') as file: - entry += '{}\n'.format(file.read()) - else: - logger.info('The reference file %s does not exist.', - bibtex_file) - return entry - - -def _collect_cmip_citation(info_url): - """Collect information from CMIP6 Data Citation Service.""" - split_str = 'cmip6?input=CMIP6.CMIP.' - url_stem = ''.join([CMIP6_CITATION_URL.split(split_str)[0], - 'cerarest/export', split_str]) - citation_entry = '' - citation_link = '' - for data_url in info_url: - data_info = '.'.join((data_url.split(".org/")[1]).split(".")[1:4]) - json_data = _get_response(''.join([url_stem, data_info])) - if json_data and _valid_json_data(json_data): - citation_entry += '{}\n'.format(_json_to_bibtex(json_data)) - else: - citation_link += '{}\n'.format(''.join( - [CMIP6_CITATION_URL, data_info])) - logger.info('Returning the CMIP citation link for %s', data_info) - return citation_entry, citation_link - - def get_flattened_tasks(tasks): """Return a set of all tasks and their ancestors in `tasks`.""" return set(t for task in tasks for t in task.flatten()) From 824c0f9480049cc450a0caacf412ce3184612b5c Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 19 Feb 2020 10:04:31 +0100 Subject: [PATCH 032/117] fix citation parts --- esmvalcore/_config.py | 5 +++++ esmvalcore/_provenance.py | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 4c94b80ef7..64b368df9a 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -245,3 +245,8 @@ def get_tag_value(section, tag): def replace_tags(section, tags): """Replace a list of tags with their values.""" return tuple(get_tag_value(section, tag) for tag in tags) + + +def cite_tags(section, tags): + """Replace a list of tags with their values.""" + return tuple(get_tag_value(section, tag) for tag in tags) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index e813f9162a..680b321afb 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -40,7 +40,8 @@ def get_esmvaltool_provenance(): # TODO: add dependencies with versions here section = 'references' if section in TAGS and ESMVALTOOL_PAPER_TAG in TAGS[section]: - attributes_value = replace_tags(section, [ESMVALTOOL_PAPER_TAG]) + # attributes_value = replace_tags(section, [ESMVALTOOL_PAPER_TAG]) + attributes_value = cite_tags(section, [ESMVALTOOL_PAPER_TAG]) else: attributes_value = ESMVALTOOL_PAPER_TAG attributes = {'attribute:references': attributes_value} From a0a05dac50e6e11c45aab4a20a12bc4ce5cdb75a Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 13:04:02 +0100 Subject: [PATCH 033/117] fix the citation functions, fix provenance to not replace the tags for reference --- esmvalcore/_citation.py | 74 +++++++++++++++++++++++------------------ esmvalcore/_task.py | 10 ++---- 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index a6348ec51b..d0122c8d0e 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -39,23 +39,13 @@ def _write_citation_file(product): """ # collect info from provenance product_name = os.path.splitext(product.filename)[0] - product_tags = [] product_entries = '' product_urls = '' - citation = { - 'references': [], - 'info_urls': [], - 'tag': [], - 'entry': '', - 'url': '', - } for item in product.provenance.records: for key, value in item.attributes: if key.namespace.prefix == 'attribute': - print(item.attributes[0]) - print('&&&&&&&&&&&&&&&&&&&&&&&&') if key.localpart in {'reference', 'references'}: - product_entries += '{}\n'.format(_collect_bibtex_citation(product_tags)) + product_entries += '{}\n'.format(_collect_bibtex_citation(value)) elif key.localpart == 'mip_era' and value == 'CMIP6': json_url, info_url = _make_url(item.attributes) cmip_entry = _collect_cmip_citation(json_url, info_url) @@ -67,11 +57,11 @@ def _write_citation_file(product): # save CMIP6 url_info, if any if product_urls: with open(f'{product_name}_data_citation_url.txt', 'w') as file: - file.write(citation['url']) + file.write(product_urls) # write one bibtex file if product_entries: - with open(f'{product_name}_citation.bibtex.txt', 'w') as file: + with open(f'{product_name}_citation.bibtex', 'w') as file: file.write(product_entries) @@ -102,18 +92,30 @@ def _valid_json_data(data): def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" - url = ''.join(['https://doi.org/', data['identifier']['id']]) - author_list = [] - for item in data['creators']: - author_list.append(item['creatorName']) - bibtex_entry = ('@misc{' + url + ',\n\t' - 'url = {' + url + '},\n\t' - 'title = {' + data['titles'][0] + '},\n\t' - 'publisher = {' + data['publisher'] + '},\n\t' - 'year = ' + data['publicationYear'] + ',\n\t' - 'author = {' + ' and '.join(author_list) + '},\n\t' - 'doi = {' + data['identifier']['id'] + '},\n' - '}') + author_list = [item['creatorName'] for item in data['creators']] + if len(author_list) > 1: + authors = ' and '.join(author_list) + else: + authors = author_list[0] + title = data['titles'][0] + publisher = data['publisher'] + year = data['publicationYear'] + doi = data['identifier']['id'] + url = f'https://doi.org/{doi}' + + newlinetab = '\n\t' + newline = '\n' + + bibtex_entry = ( + f'{"@misc{"}{url},{newlinetab}' + f'url = {{{url}}},{newlinetab}' + f'title = {{{title}}},{newlinetab}' + f'publisher = {{{publisher}}},{newlinetab}' + f'year = {year},{newlinetab}' + f'author = {{{authors}}},{newlinetab}' + f'doi = {{{doi}}},{newline}' + f'{"}"}' + ) return bibtex_entry @@ -128,8 +130,9 @@ def _replace_entry(product_entry): return tag_list -def _collect_bibtex_citation(tags): +def _collect_bibtex_citation(value): """Collect information from bibtex files.""" + tags = value.split(',') entry = '' for tag in tags: bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') @@ -154,12 +157,19 @@ def _collect_cmip_citation(json_url, info_url): def _make_url(attribute): - mip_era = attribute.get('attribute:mip_era') - activity_id = attribute.get('attribute:activity_id') - institution_id = attribute.get('attribute:institution_id') - source_id = attribute.get('attribute:source_id') - experiment_id = attribute.get('attribute:experiment_id') - url_prefix = f'{mip_era}.{activity_id}.{institution_id}.{source_id}.{experiment_id}' + """make json and info urls based on CMIP6 Data Citation Service.""" + # the order of keys is important + localpart = { + 'mip_era': '', + 'activity_id': '', + 'institution_id': '', + 'source_id': '', + 'experiment_id': '', + } + for key, value in attribute: + if key.localpart in localpart: + localpart[key.localpart] = value + url_prefix = '.'.join(localpart.values()) json_url = f'{CMIP6_URL_STEM}/cerarest/exportcmip6?input={url_prefix}' info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' return json_url, info_url diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 88b82e95ab..399d2176f6 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -572,13 +572,9 @@ def _collect_provenance(self): } attributes.update(deepcopy(attrs)) - section = 'references' - # for key in attributes: - # if key in TAGS: - # if key in section: - # attributes[key] = cite_tags(key, attributes[key]) - # else: - # attributes[key] = replace_tags(key, attributes[key]) + for key in attributes: + if key in TAGS and key not in 'references': + attributes[key] = replace_tags(key, attributes[key]) product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) From 3a87afe9bb98709afa6cb76254d669afccf60522 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 15:05:15 +0100 Subject: [PATCH 034/117] keep the references tags and not to replace them --- esmvalcore/_provenance.py | 8 +------- esmvalcore/_recipe.py | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index 680b321afb..3996febb4d 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -10,7 +10,6 @@ from prov.model import ProvDocument from ._version import __version__ -from ._config import replace_tags, TAGS logger = logging.getLogger(__name__) @@ -38,12 +37,7 @@ def get_esmvaltool_provenance(): create_namespace(provenance, namespace) # TODO: add dependencies with versions here - section = 'references' - if section in TAGS and ESMVALTOOL_PAPER_TAG in TAGS[section]: - # attributes_value = replace_tags(section, [ESMVALTOOL_PAPER_TAG]) - attributes_value = cite_tags(section, [ESMVALTOOL_PAPER_TAG]) - else: - attributes_value = ESMVALTOOL_PAPER_TAG + attributes_value = ESMVALTOOL_PAPER_TAG attributes = {'attribute:references': attributes_value} activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 7f20c5ace1..dcb65704f7 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -949,7 +949,7 @@ def _initalize_provenance(self, raw_documentation): """Initialize the recipe provenance.""" doc = deepcopy(raw_documentation) for key in doc: - if key in TAGS: + if key in TAGS and key not in 'references': doc[key] = replace_tags(key, doc[key]) return get_recipe_provenance(doc, self._filename) From b7a6773b771bdc3f8b2c7a861e6c954b5f27a79c Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 15:06:55 +0100 Subject: [PATCH 035/117] remove unnecessary imports and refactor --- esmvalcore/_citation.py | 96 ++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 59 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index d0122c8d0e..b5e9486c14 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -1,30 +1,12 @@ """Citation module.""" -import contextlib -import datetime -import errno -import logging -import numbers import os -import pprint -import subprocess -import threading -import time -from copy import deepcopy -from multiprocessing import Pool - +import logging +import re import requests -import psutil -import yaml - -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH -from ._provenance import TrackedFile, get_task_provenance +from ._config import REFERENCES_PATH logger = logging.getLogger(__name__) -DATASET_KEYS = { - 'mip', -} - CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' @@ -39,13 +21,14 @@ def _write_citation_file(product): """ # collect info from provenance product_name = os.path.splitext(product.filename)[0] + products_tags = [] product_entries = '' product_urls = '' for item in product.provenance.records: for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart in {'reference', 'references'}: - product_entries += '{}\n'.format(_collect_bibtex_citation(value)) + products_tags.append(value) elif key.localpart == 'mip_era' and value == 'CMIP6': json_url, info_url = _make_url(item.attributes) cmip_entry = _collect_cmip_citation(json_url, info_url) @@ -59,12 +42,25 @@ def _write_citation_file(product): with open(f'{product_name}_data_citation_url.txt', 'w') as file: file.write(product_urls) + # convert tags to bibtex entries + if products_tags: + # make tags clean and unique + tags = list(set(_clean_tags(products_tags))) + for tag in tags: + product_entries += '{}\n'.format(_collect_bibtex_citation(tag)) + # write one bibtex file if product_entries: with open(f'{product_name}_citation.bibtex', 'w') as file: file.write(product_entries) +def _clean_tags(tags): + """some tages are combined in one string variable in provenance.""" + pattern = re.compile(r'\w+') + return pattern.findall(str(tags)) + + def _get_response(url): """Return information from CMIP6 Data Citation service in json format.""" json_data = False @@ -93,55 +89,37 @@ def _valid_json_data(data): def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" author_list = [item['creatorName'] for item in data['creators']] - if len(author_list) > 1: - authors = ' and '.join(author_list) - else: + if author_list[0] == author_list[-1]: authors = author_list[0] + else: + authors = ' and '.join(author_list) title = data['titles'][0] publisher = data['publisher'] year = data['publicationYear'] doi = data['identifier']['id'] url = f'https://doi.org/{doi}' - - newlinetab = '\n\t' - newline = '\n' - bibtex_entry = ( - f'{"@misc{"}{url},{newlinetab}' - f'url = {{{url}}},{newlinetab}' - f'title = {{{title}}},{newlinetab}' - f'publisher = {{{publisher}}},{newlinetab}' - f'year = {year},{newlinetab}' - f'author = {{{authors}}},{newlinetab}' - f'doi = {{{doi}}},{newline}' - f'{"}"}' + f'{"@misc{"}{url},\n\t' + f'url = {{{url}}},\n\t' + f'title = {{{title}}},\n\t' + f'publisher = {{{publisher}}},\n\t' + f'year = {year},\n\t' + f'author = {{{authors}}},\n\t' + f'doi = {{{doi}}},\n' + f'{"}"}\n' ) return bibtex_entry -def _replace_entry(product_entry): - """Find tags of the references in provenance.""" - entry_tags = {v: k for k, v in TAGS['references'].items()} - tag_list = [] - for key in entry_tags.keys(): - for entry in product_entry: - if key in entry and entry_tags[key] not in tag_list: - tag_list.append(entry_tags[key]) - return tag_list - - -def _collect_bibtex_citation(value): +def _collect_bibtex_citation(tag): """Collect information from bibtex files.""" - tags = value.split(',') - entry = '' - for tag in tags: - bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') - if os.path.isfile(bibtex_file): - with open(bibtex_file, 'r') as file: - entry += '{}\n'.format(file.read()) - else: - logger.info('The reference file %s does not exist.', - bibtex_file) + bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: + entry = '{}'.format(file.read()) + else: + logger.info('The reference file %s does not exist.', + bibtex_file) return entry From b88e2dd9860e4c6d9d9d2e92ef654da451b7e9e9 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 16:28:25 +0100 Subject: [PATCH 036/117] update the documentation --- esmvalcore/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 64b368df9a..408c6e110d 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -28,7 +28,7 @@ def find_diagnostics(): def find_references(): - """Try to find bibtex files in references folder.""" + """Try to find the path for references folder.""" try: import esmvaltool except ImportError: From 438960e276a1b6b0cc34527d544f6b2feaad5e73 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 16:30:41 +0100 Subject: [PATCH 037/117] check if the reference folder does not exist --- esmvalcore/_citation.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index b5e9486c14..d3d65f932d 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -113,13 +113,16 @@ def _json_to_bibtex(data): def _collect_bibtex_citation(tag): """Collect information from bibtex files.""" - bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') - if os.path.isfile(bibtex_file): - with open(bibtex_file, 'r') as file: - entry = '{}'.format(file.read()) + if REFERENCES_PATH: + bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: + entry = '{}'.format(file.read()) + else: + raise ValueError('The reference file {} does not exist.'.format(bibtex_file)) else: - logger.info('The reference file %s does not exist.', - bibtex_file) + logger.info('The reference folder does not exist.') + entry = '' return entry From 36677e39a354469c6c09a7f93cd95c6d1e815c50 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 17:10:14 +0100 Subject: [PATCH 038/117] remove validating json data --- esmvalcore/_citation.py | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index d3d65f932d..8f028babed 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -76,28 +76,24 @@ def _get_response(url): return json_data -def _valid_json_data(data): - valid_data = False - keys = ['identifier', 'creators', 'titles', 'publisher', 'publicationYear'] - if all(key in data for key in keys): - check_names = all('creatorName' in item for item in data['creators']) - if 'id' in data['identifier'] and check_names: - valid_data = True - return valid_data - - def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" - author_list = [item['creatorName'] for item in data['creators']] - if author_list[0] == author_list[-1]: - authors = author_list[0] - else: - authors = ' and '.join(author_list) - title = data['titles'][0] - publisher = data['publisher'] - year = data['publicationYear'] - doi = data['identifier']['id'] + if data.get('creators', False): + author_list = [item.get('creatorName', '') for item in data['creators']] + if author_list: + if author_list[0] == author_list[-1]: + authors = author_list[0] + else: + authors = ' and '.join(author_list) + + title = data.get('titles', ['title not found'])[0] + publisher = data.get('publisher', 'publisher not found') + year = data.get('publicationYear', 'publicationYear not found') + + if data.get('identifier', False): + doi = data.get('identifier').get('id', 'doi not found') url = f'https://doi.org/{doi}' + bibtex_entry = ( f'{"@misc{"}{url},\n\t' f'url = {{{url}}},\n\t' @@ -130,7 +126,7 @@ def _collect_cmip_citation(json_url, info_url): """Collect information from CMIP6 Data Citation Service.""" bibtex_entry = info_url json_data = _get_response(json_url) - if json_data and _valid_json_data(json_data): + if json_data: bibtex_entry = _json_to_bibtex(json_data) else: logger.info('Invalid json link %s', json_url) From 748987ce123c88475fd171ebdcfdfe67556e5899 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 17:21:40 +0100 Subject: [PATCH 039/117] refactor json to bibtex function --- esmvalcore/_citation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 8f028babed..ca053f9e66 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -83,6 +83,8 @@ def _json_to_bibtex(data): if author_list: if author_list[0] == author_list[-1]: authors = author_list[0] + if not authors: + authors = 'creatorName not found' else: authors = ' and '.join(author_list) From 299bd841551804040f7351b752ad685784384115 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 17:54:54 +0100 Subject: [PATCH 040/117] refactor --- esmvalcore/_citation.py | 66 +++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index ca053f9e66..d0bf2b7d9e 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -12,36 +12,44 @@ def _write_citation_file(product): """ - Write citation information provided by the recorded provenance. - Recipe and cmip6 data references are saved into one bibtex file. - cmip6 data references are provided by CMIP6 data citation service. - each cmip6 data reference has a json link. In the case of internet - connection, cmip6 data references are saved into a bibtex file. - Otherwise, cmip6 data reference links are saved into a text file. + Write citation information provided by the recorded provenance. + + Recipe and cmip6 data references are saved into one bibtex file. + cmip6 data references are provided by CMIP6 data citation service. + each cmip6 data reference has a json link. In the case of internet + connection, cmip6 data references are saved into a bibtex file. + Otherwise, cmip6 data reference links are saved into a text file. """ # collect info from provenance product_name = os.path.splitext(product.filename)[0] products_tags = [] product_entries = '' product_urls = '' + product_info_urls = [] + product_json_urls = [] + for item in product.provenance.records: for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart in {'reference', 'references'}: products_tags.append(value) elif key.localpart == 'mip_era' and value == 'CMIP6': - json_url, info_url = _make_url(item.attributes) - cmip_entry = _collect_cmip_citation(json_url, info_url) - if cmip_entry == info_url: - product_urls += '{}\n'.format(cmip_entry) - else: - product_entries += '{}\n'.format(cmip_entry) + url_prefix = _make_url_prefix(item.attributes) + product_info_urls.append(_make_info_url(url_prefix)) + product_json_urls.append(_make_json_url(url_prefix)) # save CMIP6 url_info, if any - if product_urls: + if product_info_urls: + for info_url in product_info_urls: + product_urls += '{}\n'.format(info_url) with open(f'{product_name}_data_citation_url.txt', 'w') as file: file.write(product_urls) + # convert json_urls to bibtex entries + if product_json_urls: + for json_url in product_json_urls: + product_entries += '{}\n'.format(_collect_cmip_citation(json_url)) + # convert tags to bibtex entries if products_tags: # make tags clean and unique @@ -56,7 +64,7 @@ def _write_citation_file(product): def _clean_tags(tags): - """some tages are combined in one string variable in provenance.""" + """Clean the tages that are recorded as str by provenance.""" pattern = re.compile(r'\w+') return pattern.findall(str(tags)) @@ -79,7 +87,9 @@ def _get_response(url): def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" if data.get('creators', False): - author_list = [item.get('creatorName', '') for item in data['creators']] + author_list = [ + item.get('creatorName', '') for item in data['creators'] + ] if author_list: if author_list[0] == author_list[-1]: authors = author_list[0] @@ -105,7 +115,7 @@ def _json_to_bibtex(data): f'author = {{{authors}}},\n\t' f'doi = {{{doi}}},\n' f'{"}"}\n' - ) + ) return bibtex_entry @@ -117,26 +127,28 @@ def _collect_bibtex_citation(tag): with open(bibtex_file, 'r') as file: entry = '{}'.format(file.read()) else: - raise ValueError('The reference file {} does not exist.'.format(bibtex_file)) + raise ValueError( + 'The reference file {} does not exist.'.format(bibtex_file) + ) else: logger.info('The reference folder does not exist.') entry = '' return entry -def _collect_cmip_citation(json_url, info_url): +def _collect_cmip_citation(json_url): """Collect information from CMIP6 Data Citation Service.""" - bibtex_entry = info_url json_data = _get_response(json_url) if json_data: bibtex_entry = _json_to_bibtex(json_data) else: logger.info('Invalid json link %s', json_url) + bibtex_entry = 'Invalid json link' return bibtex_entry -def _make_url(attribute): - """make json and info urls based on CMIP6 Data Citation Service.""" +def _make_url_prefix(attribute): + """Make url prefix based on CMIP6 Data Citation Service.""" # the order of keys is important localpart = { 'mip_era': '', @@ -149,6 +161,16 @@ def _make_url(attribute): if key.localpart in localpart: localpart[key.localpart] = value url_prefix = '.'.join(localpart.values()) + return url_prefix + + +def _make_json_url(url_prefix): + """Make json url based on CMIP6 Data Citation Service.""" json_url = f'{CMIP6_URL_STEM}/cerarest/exportcmip6?input={url_prefix}' + return json_url + + +def _make_info_url(url_prefix): + """Make info url based on CMIP6 Data Citation Service.""" info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' - return json_url, info_url + return info_url From 6508877752eb0a457ee13485b771d4f1e2403975 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 18:10:11 +0100 Subject: [PATCH 041/117] refactor --- esmvalcore/_citation.py | 45 +++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index d0bf2b7d9e..7575ac6bff 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -22,32 +22,20 @@ def _write_citation_file(product): """ # collect info from provenance product_name = os.path.splitext(product.filename)[0] - products_tags = [] product_entries = '' - product_urls = '' - product_info_urls = [] - product_json_urls = [] - - for item in product.provenance.records: - for key, value in item.attributes: - if key.namespace.prefix == 'attribute': - if key.localpart in {'reference', 'references'}: - products_tags.append(value) - elif key.localpart == 'mip_era' and value == 'CMIP6': - url_prefix = _make_url_prefix(item.attributes) - product_info_urls.append(_make_info_url(url_prefix)) - product_json_urls.append(_make_json_url(url_prefix)) + urls = '' + products_tags, json_urls, info_urls = _get_citation_info(product) # save CMIP6 url_info, if any - if product_info_urls: - for info_url in product_info_urls: - product_urls += '{}\n'.format(info_url) + if info_urls: + for info_url in info_urls: + urls += '{}\n'.format(info_url) with open(f'{product_name}_data_citation_url.txt', 'w') as file: - file.write(product_urls) + file.write(urls) # convert json_urls to bibtex entries - if product_json_urls: - for json_url in product_json_urls: + if json_urls: + for json_url in json_urls: product_entries += '{}\n'.format(_collect_cmip_citation(json_url)) # convert tags to bibtex entries @@ -63,6 +51,23 @@ def _write_citation_file(product): file.write(product_entries) +def _get_citation_info(product): + """Collect tags, and urls.""" + info_urls = [] + json_urls = [] + tags = [] + for item in product.provenance.records: + for key, value in item.attributes: + if key.namespace.prefix == 'attribute': + if key.localpart in {'reference', 'references'}: + tags.append(value) + elif key.localpart == 'mip_era' and value == 'CMIP6': + url_prefix = _make_url_prefix(item.attributes) + info_urls.append(_make_info_url(url_prefix)) + json_urls.append(_make_json_url(url_prefix)) + return tags, json_urls, info_urls + + def _clean_tags(tags): """Clean the tages that are recorded as str by provenance.""" pattern = re.compile(r'\w+') From df3b00822afe40b9cc33e8ef2537d51ec94fb1a8 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 09:44:11 +0100 Subject: [PATCH 042/117] remove unused function --- esmvalcore/_config.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 408c6e110d..9f77fdb3e0 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -245,8 +245,3 @@ def get_tag_value(section, tag): def replace_tags(section, tags): """Replace a list of tags with their values.""" return tuple(get_tag_value(section, tag) for tag in tags) - - -def cite_tags(section, tags): - """Replace a list of tags with their values.""" - return tuple(get_tag_value(section, tag) for tag in tags) From 7b1c339b07dd836c00b45e32e74344fd1bf4a927 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 09:54:48 +0100 Subject: [PATCH 043/117] refactor wrtite and save functions --- esmvalcore/_citation.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 7575ac6bff..a2560fbae9 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -22,16 +22,32 @@ def _write_citation_file(product): """ # collect info from provenance product_name = os.path.splitext(product.filename)[0] + info_urls = [] + json_urls = [] + products_tags = [] + for item in product.provenance.records: + for key, value in item.attributes: + if key.namespace.prefix == 'attribute': + if key.localpart in {'reference', 'references'}: + products_tags.append(value) + elif key.localpart == 'mip_era' and value == 'CMIP6': + url_prefix = _make_url_prefix(item.attributes) + info_urls.append(_make_info_url(url_prefix)) + json_urls.append(_make_json_url(url_prefix)) + + _save_citation_info(product_name, products_tags, json_urls, info_urls) + + +def _save_citation_info(product_name, products_tags, json_urls, info_urls): product_entries = '' - urls = '' - products_tags, json_urls, info_urls = _get_citation_info(product) + product_urls = '' # save CMIP6 url_info, if any if info_urls: for info_url in info_urls: - urls += '{}\n'.format(info_url) + product_urls += '{}\n'.format(info_url) with open(f'{product_name}_data_citation_url.txt', 'w') as file: - file.write(urls) + file.write(product_urls) # convert json_urls to bibtex entries if json_urls: @@ -51,23 +67,6 @@ def _write_citation_file(product): file.write(product_entries) -def _get_citation_info(product): - """Collect tags, and urls.""" - info_urls = [] - json_urls = [] - tags = [] - for item in product.provenance.records: - for key, value in item.attributes: - if key.namespace.prefix == 'attribute': - if key.localpart in {'reference', 'references'}: - tags.append(value) - elif key.localpart == 'mip_era' and value == 'CMIP6': - url_prefix = _make_url_prefix(item.attributes) - info_urls.append(_make_info_url(url_prefix)) - json_urls.append(_make_json_url(url_prefix)) - return tags, json_urls, info_urls - - def _clean_tags(tags): """Clean the tages that are recorded as str by provenance.""" pattern = re.compile(r'\w+') From 622bb86afaf490904a0eb811fcbb37d45b17b8da Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 10:03:08 +0100 Subject: [PATCH 044/117] remove new line --- esmvalcore/_task.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 399d2176f6..ef8c586734 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -555,7 +555,6 @@ def _collect_provenance(self): attrs = { 'script_file': self.script, } - for key in self.settings: if key not in ignore: attrs[key] = self.settings[key] From 0968425c1cdf0acc416d25ddabbcf3ac9a350461 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 10:59:37 +0100 Subject: [PATCH 045/117] use diagnostics path instaed of finding references path --- esmvalcore/_config.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 9f77fdb3e0..c8e08381db 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -27,18 +27,6 @@ def find_diagnostics(): DIAGNOSTICS_PATH = find_diagnostics() -def find_references(): - """Try to find the path for references folder.""" - try: - import esmvaltool - except ImportError: - return '' - return os.path.join(os.path.dirname(esmvaltool.__file__), 'references') - - -REFERENCES_PATH = find_references() - - def read_config_user_file(config_file, recipe_name): """Read config user file and store settings in a dictionary.""" with open(config_file, 'r') as file: From e7d6bd70240f96c5a28cc286652af46e698cecb5 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 11:29:30 +0100 Subject: [PATCH 046/117] use pathlib instead of os.path --- esmvalcore/_citation.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index a2560fbae9..79322f473c 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -2,8 +2,15 @@ import os import logging import re +from pathlib import Path import requests -from ._config import REFERENCES_PATH + +from ._config import DIAGNOSTICS_PATH + +if DIAGNOSTICS_PATH: + REFERENCES_PATH = Path(DIAGNOSTICS_PATH) / 'references' +else: + REFERENCES_PATH = '' logger = logging.getLogger(__name__) @@ -126,10 +133,9 @@ def _json_to_bibtex(data): def _collect_bibtex_citation(tag): """Collect information from bibtex files.""" if REFERENCES_PATH: - bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') - if os.path.isfile(bibtex_file): - with open(bibtex_file, 'r') as file: - entry = '{}'.format(file.read()) + bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' + if bibtex_file.is_file(): + entry = bibtex_file.read_text() else: raise ValueError( 'The reference file {} does not exist.'.format(bibtex_file) From 776bd72338309c96785c8c3e2c6dc7016a69b10f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 13:09:38 +0100 Subject: [PATCH 047/117] add esmvaltool technical paper as default citation entry --- esmvalcore/_citation.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 79322f473c..adee2d782b 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -16,6 +16,27 @@ CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' +# it is the technical overview and should always be cited +ESMVALTOOL_PAPER = ( + '@article{righi19gmdd,\n\t' + 'doi = {10.5194/gmd-2019-226},\n\t' + 'url = {https://doi.org/10.5194%2Fgmd-2019-226},\n\t' + 'year = 2019,\n\t' + 'month = {sep},\n\t' + 'publisher = {Copernicus {GmbH}},\n\t' + 'author = {Mattia Righi and Bouwe Andela and Veronika Eyring ' + 'and Axel Lauer and Valeriu Predoi and Manuel Schlund ' + 'and Javier Vegas-Regidor and Lisa Bock and Björn Brötz ' + 'and Lee de Mora and Faruk Diblen and Laura Dreyer ' + 'and Niels Drost and Paul Earnshaw and Birgit Hassler ' + 'and Nikolay Koldunov and Bill Little and Saskia Loosveldt Tomas ' + 'and Klaus Zimmermann},\n\t' + 'title = {{ESMValTool} v2.0 ' + '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' + 'Technical overview}\n' + '}\n' +) + def _write_citation_file(product): """ @@ -70,8 +91,12 @@ def _save_citation_info(product_name, products_tags, json_urls, info_urls): # write one bibtex file if product_entries: - with open(f'{product_name}_citation.bibtex', 'w') as file: - file.write(product_entries) + bibtex_content = product_entries + else: + # add the technical overview paper that should always be cited + bibtex_content = ESMVALTOOL_PAPER + with open(f'{product_name}_citation.bibtex', 'w') as file: + file.write(bibtex_content) def _clean_tags(tags): From d51a3f038e59cb3c4346d34059ab58f1f76b4e51 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 13:51:57 +0100 Subject: [PATCH 048/117] fix the logger error message --- esmvalcore/_citation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index adee2d782b..14d620d503 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -114,7 +114,7 @@ def _get_response(url): if response.status_code == 200: json_data = response.json() else: - logger.info('Error in the CMIP json link') + logger.info('Error in the CMIP json link: %s', url) except IOError: logger.info('Error in receiving the CMIP json file') return json_data From b835eacd679292bc80f66cd6d0d036bca13b686b Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 15:40:01 +0100 Subject: [PATCH 049/117] style and refactor --- esmvalcore/_citation.py | 18 +++++++++--------- esmvalcore/_task.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 14d620d503..25507fce66 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -38,7 +38,7 @@ ) -def _write_citation_file(product): +def _write_citation_file(filename, provenance): """ Write citation information provided by the recorded provenance. @@ -49,24 +49,24 @@ def _write_citation_file(product): Otherwise, cmip6 data reference links are saved into a text file. """ # collect info from provenance - product_name = os.path.splitext(product.filename)[0] + product_name = os.path.splitext(filename)[0] info_urls = [] json_urls = [] - products_tags = [] - for item in product.provenance.records: + product_tags = [] + for item in provenance.records: for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart in {'reference', 'references'}: - products_tags.append(value) + product_tags.append(value) elif key.localpart == 'mip_era' and value == 'CMIP6': url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - _save_citation_info(product_name, products_tags, json_urls, info_urls) + _save_citation_info(product_name, product_tags, json_urls, info_urls) -def _save_citation_info(product_name, products_tags, json_urls, info_urls): +def _save_citation_info(product_name, product_tags, json_urls, info_urls): product_entries = '' product_urls = '' @@ -83,9 +83,9 @@ def _save_citation_info(product_name, products_tags, json_urls, info_urls): product_entries += '{}\n'.format(_collect_cmip_citation(json_url)) # convert tags to bibtex entries - if products_tags: + if product_tags: # make tags clean and unique - tags = list(set(_clean_tags(products_tags))) + tags = list(set(_clean_tags(product_tags))) for tag in tags: product_entries += '{}\n'.format(_collect_bibtex_citation(tag)) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index ef8c586734..a73436d4a8 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -578,7 +578,7 @@ def _collect_provenance(self): product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) product.save_provenance() - _write_citation_file(product) + _write_citation_file(product.filename, product.provenance) self.products.add(product) logger.debug("Collecting provenance of task %s took %.1f seconds", self.name, From 800cdb9847c8f2fc2141288728194faddc18e25f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 17:32:08 +0100 Subject: [PATCH 050/117] refactor --- esmvalcore/_citation.py | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 25507fce66..72260c8e81 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -83,20 +83,18 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): product_entries += '{}\n'.format(_collect_cmip_citation(json_url)) # convert tags to bibtex entries - if product_tags: - # make tags clean and unique - tags = list(set(_clean_tags(product_tags))) - for tag in tags: - product_entries += '{}\n'.format(_collect_bibtex_citation(tag)) - - # write one bibtex file - if product_entries: - bibtex_content = product_entries + if REFERENCES_PATH: + if product_tags: + # make tags clean and unique + tags = list(set(_clean_tags(product_tags))) + for tag in tags: + product_entries += '{}\n'.format(_collect_bibtex_citation(tag)) else: # add the technical overview paper that should always be cited - bibtex_content = ESMVALTOOL_PAPER + logger.info('The reference folder does not exist.') + product_entries = ESMVALTOOL_PAPER with open(f'{product_name}_citation.bibtex', 'w') as file: - file.write(bibtex_content) + file.write(product_entries) def _clean_tags(tags): @@ -157,17 +155,13 @@ def _json_to_bibtex(data): def _collect_bibtex_citation(tag): """Collect information from bibtex files.""" - if REFERENCES_PATH: - bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' - if bibtex_file.is_file(): - entry = bibtex_file.read_text() - else: - raise ValueError( - 'The reference file {} does not exist.'.format(bibtex_file) - ) + bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' + if bibtex_file.is_file(): + entry = bibtex_file.read_text() else: - logger.info('The reference folder does not exist.') - entry = '' + raise ValueError( + 'The reference file {} does not exist.'.format(bibtex_file) + ) return entry From b96dc6da43582a493883d75b9b25c3b3b02733dc Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 18:05:04 +0100 Subject: [PATCH 051/117] fix broken tests due to removing references and replace tags --- tests/integration/test_recipe.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index cff40fcef8..89271d79f8 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -14,6 +14,7 @@ from esmvalcore._task import DiagnosticTask from esmvalcore.preprocessor import DEFAULT_ORDER, PreprocessingTask from esmvalcore.preprocessor._io import concatenate_callback +from esmvalcore._citation import REFERENCES_PATH from .test_diagnostic_run import write_config_user_file from .test_provenance import check_provenance @@ -1180,11 +1181,6 @@ def simulate_diagnostic_run(diagnostic_task): 'name': 'Bouwe Andela', }, }, - 'references': { - 'acknow_author': "Please acknowledge the author(s).", - 'contact_authors': "Please contact the author(s) ...", - 'acknow_project': "Please acknowledge the project(s).", - }, 'projects': { 'c3s-magic': 'C3S MAGIC project', }, @@ -1261,7 +1257,7 @@ def test_diagnostic_task_provenance( key).pop() == record[key] # Check that diagnostic script tags have been added - for key in ('statistics', 'domains', 'authors', 'references'): + for key in ('statistics', 'domains', 'authors'): assert product.attributes[key] == tuple(TAGS[key][k] for k in record[key]) @@ -1277,7 +1273,7 @@ def test_diagnostic_task_provenance( for key in ('description', 'references'): value = src['documentation'][key] if key == 'references': - value = ', '.join(TAGS[key][k] for k in value) + value = ', '.join(src['documentation'][key]) assert recipe_record[0].get_attribute('attribute:' + key).pop() == value From 8e403666483e203dec2788c8066bc63dc9a204bb Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 18:05:39 +0100 Subject: [PATCH 052/117] add a unit test for citation.py --- tests/integration/test_citation.py | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/integration/test_citation.py diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py new file mode 100644 index 0000000000..6838ee3663 --- /dev/null +++ b/tests/integration/test_citation.py @@ -0,0 +1,42 @@ +"""Test _citation.py.""" +from pathlib import Path +from prov.model import ProvDocument + +import esmvalcore +from esmvalcore._citation import _write_citation_file, ESMVALTOOL_PAPER +from esmvalcore._provenance import ESMVALTOOL_URI_PREFIX + +# Two test cases: +# 1: references are replaced with bibtex +# 2: CMIP6 citation info is retrieved from ES-DOC + + +def test_references(tmp_path, monkeypatch): + """Test1: references are replaced with bibtex.""" + # Create fake provenance + provenance = ProvDocument() + provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file') + provenance.add_namespace('attribute', + uri=ESMVALTOOL_URI_PREFIX + 'attribute') + + filename = str(tmp_path / 'output.nc') + attributes = {'attribute:references': 'test_tag'} + provenance.entity('file:' + filename, attributes) + + # Create fake bibtex references tag file + references_path = tmp_path / 'references' + references_path.mkdir() + monkeypatch.setattr( + esmvalcore._citation, 'REFERENCES_PATH', references_path + ) + fake_bibtex_file = references_path / 'test_tag.bibtex' + fake_bibtex = "Fake bibtex file content\n" + fake_bibtex_file.write_text(fake_bibtex) + + _write_citation_file(filename, provenance) + citation_file = tmp_path / 'output_citation.bibtex' + citation = citation_file.read_text() + assert citation == '\n'.join([ESMVALTOOL_PAPER, fake_bibtex]) + + +# def test_cmip6_data_citation(tmp_path, monkeypatch): From f9e89c8e66cc7a2232d9767198d77463f6c04b83 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 18:11:21 +0100 Subject: [PATCH 053/117] safe to remove esmvaltool bibtex file --- esmvalcore/_citation.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 72260c8e81..40d8f448ea 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -5,7 +5,7 @@ from pathlib import Path import requests -from ._config import DIAGNOSTICS_PATH +from ._config import DIAGNOSTICS_PATH, ESMVALTOOL_PAPER_TAG if DIAGNOSTICS_PATH: REFERENCES_PATH = Path(DIAGNOSTICS_PATH) / 'references' @@ -67,20 +67,20 @@ def _write_citation_file(filename, provenance): def _save_citation_info(product_name, product_tags, json_urls, info_urls): - product_entries = '' - product_urls = '' + citation_entries = [ESMVALTOOL_PAPER] + citation_urls = '' # save CMIP6 url_info, if any if info_urls: for info_url in info_urls: - product_urls += '{}\n'.format(info_url) + citation_urls += '{}\n'.format(info_url) with open(f'{product_name}_data_citation_url.txt', 'w') as file: - file.write(product_urls) + file.write(citation_urls) # convert json_urls to bibtex entries if json_urls: for json_url in json_urls: - product_entries += '{}\n'.format(_collect_cmip_citation(json_url)) + citation_entries.append(_collect_cmip_citation(json_url)) # convert tags to bibtex entries if REFERENCES_PATH: @@ -88,13 +88,11 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): # make tags clean and unique tags = list(set(_clean_tags(product_tags))) for tag in tags: - product_entries += '{}\n'.format(_collect_bibtex_citation(tag)) - else: - # add the technical overview paper that should always be cited - logger.info('The reference folder does not exist.') - product_entries = ESMVALTOOL_PAPER + if tag not in ESMVALTOOL_PAPER_TAG: + citation_entries.append(_collect_bibtex_citation(tag)) + with open(f'{product_name}_citation.bibtex', 'w') as file: - file.write(product_entries) + file.write('\n'.join(citation_entries)) def _clean_tags(tags): From 66dd95a74d5ba9df298837610e88a1d0080fdc89 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 25 Feb 2020 16:28:47 +0100 Subject: [PATCH 054/117] move the esmvaltool paper tag to citation module --- esmvalcore/_provenance.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index 3996febb4d..9ab3c134c6 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -10,12 +10,11 @@ from prov.model import ProvDocument from ._version import __version__ +from ._citation import ESMVALTOOL_PAPER_TAG logger = logging.getLogger(__name__) ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' -# it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = 'righi19gmdd' def update_without_duplicating(bundle, other): From 85f03f2705ee07499d459c9fef2dcca4230ac307 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 25 Feb 2020 16:29:27 +0100 Subject: [PATCH 055/117] add the esmvaltool paper tag --- esmvalcore/_citation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 40d8f448ea..82089ef69e 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -5,7 +5,7 @@ from pathlib import Path import requests -from ._config import DIAGNOSTICS_PATH, ESMVALTOOL_PAPER_TAG +from ._config import DIAGNOSTICS_PATH if DIAGNOSTICS_PATH: REFERENCES_PATH = Path(DIAGNOSTICS_PATH) / 'references' @@ -17,6 +17,7 @@ CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' # it is the technical overview and should always be cited +ESMVALTOOL_PAPER_TAG = 'righi19gmdd' ESMVALTOOL_PAPER = ( '@article{righi19gmdd,\n\t' 'doi = {10.5194/gmd-2019-226},\n\t' From a5da5f3c3690765c12b0472e89fc57168f5190b1 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 25 Feb 2020 16:30:03 +0100 Subject: [PATCH 056/117] remove unused import --- tests/integration/test_citation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 6838ee3663..f323f61943 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -1,5 +1,4 @@ """Test _citation.py.""" -from pathlib import Path from prov.model import ProvDocument import esmvalcore From dc37c3950feb73c09f5ff308f1b8e2a219e3b912 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 25 Feb 2020 17:15:45 +0100 Subject: [PATCH 057/117] remove unused import --- tests/integration/test_recipe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 89271d79f8..9bda6e29f5 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -14,7 +14,6 @@ from esmvalcore._task import DiagnosticTask from esmvalcore.preprocessor import DEFAULT_ORDER, PreprocessingTask from esmvalcore.preprocessor._io import concatenate_callback -from esmvalcore._citation import REFERENCES_PATH from .test_diagnostic_run import write_config_user_file from .test_provenance import check_provenance From cdd51817117eef4e871b7ac0918e1b77eba7d253 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 26 Feb 2020 16:11:34 +0100 Subject: [PATCH 058/117] refactor json to bibtex function --- esmvalcore/_citation.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 82089ef69e..90af8dc3e0 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -119,6 +119,14 @@ def _get_response(url): def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" + url = 'url not found' + title = data.get('titles', ['title not found'])[0] + publisher = data.get('publisher', 'publisher not found') + year = data.get('publicationYear', 'publicationYear not found') + authors = 'creators not found' + doi = 'doi not found' + + author_list = [] if data.get('creators', False): author_list = [ item.get('creatorName', '') for item in data['creators'] @@ -131,13 +139,9 @@ def _json_to_bibtex(data): else: authors = ' and '.join(author_list) - title = data.get('titles', ['title not found'])[0] - publisher = data.get('publisher', 'publisher not found') - year = data.get('publicationYear', 'publicationYear not found') - if data.get('identifier', False): doi = data.get('identifier').get('id', 'doi not found') - url = f'https://doi.org/{doi}' + url = f'https://doi.org/{doi}' bibtex_entry = ( f'{"@misc{"}{url},\n\t' From 4e65db148f6ea285e8966fb080431f3c47cc843f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 26 Feb 2020 16:22:22 +0100 Subject: [PATCH 059/117] fix tests for _citation.py --- tests/integration/test_citation.py | 89 +++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index f323f61943..df5ba42cce 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -2,13 +2,10 @@ from prov.model import ProvDocument import esmvalcore -from esmvalcore._citation import _write_citation_file, ESMVALTOOL_PAPER +from esmvalcore._citation import (_write_citation_file, + ESMVALTOOL_PAPER, CMIP6_URL_STEM) from esmvalcore._provenance import ESMVALTOOL_URI_PREFIX -# Two test cases: -# 1: references are replaced with bibtex -# 2: CMIP6 citation info is retrieved from ES-DOC - def test_references(tmp_path, monkeypatch): """Test1: references are replaced with bibtex.""" @@ -17,7 +14,6 @@ def test_references(tmp_path, monkeypatch): provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file') provenance.add_namespace('attribute', uri=ESMVALTOOL_URI_PREFIX + 'attribute') - filename = str(tmp_path / 'output.nc') attributes = {'attribute:references': 'test_tag'} provenance.entity('file:' + filename, attributes) @@ -38,4 +34,83 @@ def test_references(tmp_path, monkeypatch): assert citation == '\n'.join([ESMVALTOOL_PAPER, fake_bibtex]) -# def test_cmip6_data_citation(tmp_path, monkeypatch): +def mock_get_response(url): + """Mock _get_response() function.""" + json_data = False + if url.lower().startswith('https'): + json_data = {'titles': ['title is found']} + return json_data + + +def test_cmip6_data_citation(tmp_path, monkeypatch): + """Test2: CMIP6 citation info is retrieved from ES-DOC.""" + # Create fake provenance + provenance = ProvDocument() + provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file') + provenance.add_namespace('attribute', + uri=ESMVALTOOL_URI_PREFIX + 'attribute') + attributes = { + 'attribute:mip_era': 'CMIP6', + 'attribute:activity_id': 'activity', + 'attribute:institution_id': 'institution', + 'attribute:source_id': 'source', + 'attribute:experiment_id': 'experiment', + } + filename = str(tmp_path / 'output.nc') + provenance.entity('file:' + filename, attributes) + + monkeypatch.setattr( + esmvalcore._citation, '_get_response', mock_get_response + ) + _write_citation_file(filename, provenance) + citation_file = tmp_path / 'output_citation.bibtex' + + # Create fake bibtex entry + url = 'url not found' + title = 'title is found' + publisher = 'publisher not found' + year = 'publicationYear not found' + authors = 'creators not found' + doi = 'doi not found' + fake_bibtex_entry = ( + f'{"@misc{"}{url},\n\t' + f'url = {{{url}}},\n\t' + f'title = {{{title}}},\n\t' + f'publisher = {{{publisher}}},\n\t' + f'year = {year},\n\t' + f'author = {{{authors}}},\n\t' + f'doi = {{{doi}}},\n' + f'{"}"}\n' + ) + assert citation_file.read_text() == '\n'.join( + [ESMVALTOOL_PAPER, fake_bibtex_entry] + ) + + +def test_cmip6_data_citation_url(tmp_path, monkeypatch): + """Test3: CMIP6 info_url is retrieved from ES-DOC.""" + # Create fake provenance + provenance = ProvDocument() + provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file') + provenance.add_namespace('attribute', + uri=ESMVALTOOL_URI_PREFIX + 'attribute') + attributes = { + 'attribute:mip_era': 'CMIP6', + 'attribute:activity_id': 'activity', + 'attribute:institution_id': 'institution', + 'attribute:source_id': 'source', + 'attribute:experiment_id': 'experiment', + } + filename = str(tmp_path / 'output.nc') + provenance.entity('file:' + filename, attributes) + + monkeypatch.setattr( + esmvalcore._citation, '_get_response', mock_get_response + ) + _write_citation_file(filename, provenance) + citation_url = tmp_path / 'output_data_citation_url.txt' + + # Create fake info url + fake_url_prefix = '.'.join(attributes.values()) + fake_info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}' + assert citation_url.read_text() == '{}\n'.format(fake_info_url) From 1eb18b2529483d46cd73dfb8be0118f4205c983a Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 26 Feb 2020 16:53:17 +0100 Subject: [PATCH 060/117] remove unused monkeypatch --- tests/integration/test_citation.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index df5ba42cce..36d11af22a 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -87,7 +87,7 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): ) -def test_cmip6_data_citation_url(tmp_path, monkeypatch): +def test_cmip6_data_citation_url(tmp_path): """Test3: CMIP6 info_url is retrieved from ES-DOC.""" # Create fake provenance provenance = ProvDocument() @@ -103,10 +103,6 @@ def test_cmip6_data_citation_url(tmp_path, monkeypatch): } filename = str(tmp_path / 'output.nc') provenance.entity('file:' + filename, attributes) - - monkeypatch.setattr( - esmvalcore._citation, '_get_response', mock_get_response - ) _write_citation_file(filename, provenance) citation_url = tmp_path / 'output_data_citation_url.txt' From 2f49ecbeebc2977be5c848260a40c16e09411555 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 28 Feb 2020 16:27:15 +0100 Subject: [PATCH 061/117] fix typo --- esmvalcore/_citation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 90af8dc3e0..f488a29fd7 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -58,12 +58,12 @@ def _write_citation_file(filename, provenance): for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart in {'reference', 'references'}: + ## check if value is a tag in recipe or diagnostics product_tags.append(value) elif key.localpart == 'mip_era' and value == 'CMIP6': url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - _save_citation_info(product_name, product_tags, json_urls, info_urls) @@ -97,7 +97,7 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): def _clean_tags(tags): - """Clean the tages that are recorded as str by provenance.""" + """Clean the tags that are recorded as str by provenance.""" pattern = re.compile(r'\w+') return pattern.findall(str(tags)) From 789df2bb2352383e37f41376da31eb06d8b91b11 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 6 Mar 2020 16:46:30 +0100 Subject: [PATCH 062/117] add support for references that are not in diagnostics, refactor --- esmvalcore/_citation.py | 49 ++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index f488a29fd7..425e264b9b 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -49,39 +49,58 @@ def _write_citation_file(filename, provenance): connection, cmip6 data references are saved into a bibtex file. Otherwise, cmip6 data reference links are saved into a text file. """ - # collect info from provenance product_name = os.path.splitext(filename)[0] info_urls = [] json_urls = [] product_tags = [] + product_refs = [] + # collect references from provenance for item in provenance.records: + attributes = {} for key, value in item.attributes: if key.namespace.prefix == 'attribute': - if key.localpart in {'reference', 'references'}: - ## check if value is a tag in recipe or diagnostics - product_tags.append(value) - elif key.localpart == 'mip_era' and value == 'CMIP6': + attributes[key.localpart] = value + attributes[item.identifier.namespace.prefix] = item.identifier.namespace.prefix + # check if item is related to a diagnostics + if {'references', 'script_file'} <= set(attributes): + product_tags.append(attributes['references']) + # check if item is related to a recipe + if {'references', 'recipe'} <= set(attributes): + product_tags.append(attributes['references']) + # check if item is not related to a diagnostics or recipe + if not attributes.keys() & {'recipe', 'script_file'} and attributes.keys() & {'references'}: + if attributes['references'] != ESMVALTOOL_PAPER_TAG: + product_refs.append(attributes['references']) + + # collect cmip6 info from provenance + for item in provenance.records: + attributes = {} + for key, value in item.attributes: + if key.namespace.prefix == 'attribute': + if key.localpart == 'mip_era' and value == 'CMIP6': url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - _save_citation_info(product_name, product_tags, json_urls, info_urls) + _save_citation_info(product_name, product_tags, product_refs, json_urls, info_urls) -def _save_citation_info(product_name, product_tags, json_urls, info_urls): + +def _save_citation_info(product_name, product_tags, product_refs, json_urls, info_urls): citation_entries = [ESMVALTOOL_PAPER] - citation_urls = '' # save CMIP6 url_info, if any if info_urls: - for info_url in info_urls: - citation_urls += '{}\n'.format(info_url) - with open(f'{product_name}_data_citation_url.txt', 'w') as file: - file.write(citation_urls) + with open(f'{product_name}_data_citation_info.txt', 'w') as file: + file.write('\n'.join(list(set(info_urls)))) + + # save any refrences info that is not related to recipe or diagnostics + if product_refs: + with open(f'{product_name}_data_citation_info.txt', 'w') as file: + file.write('\n'.join(list(set(product_refs)))) # convert json_urls to bibtex entries - if json_urls: - for json_url in json_urls: - citation_entries.append(_collect_cmip_citation(json_url)) + for json_url in json_urls: + citation_entries.append(_collect_cmip_citation(json_url)) # convert tags to bibtex entries if REFERENCES_PATH: From 0b96874f9068d5d95312c52a7f80fe094c6577f3 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 6 Mar 2020 17:24:21 +0100 Subject: [PATCH 063/117] fix test for new codes in citation.py --- tests/integration/test_citation.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 36d11af22a..eb6b64296f 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -15,7 +15,10 @@ def test_references(tmp_path, monkeypatch): provenance.add_namespace('attribute', uri=ESMVALTOOL_URI_PREFIX + 'attribute') filename = str(tmp_path / 'output.nc') - attributes = {'attribute:references': 'test_tag'} + attributes = { + 'attribute:references': 'test_tag', + 'attribute:script_file': 'diagnostics.py' + } provenance.entity('file:' + filename, attributes) # Create fake bibtex references tag file @@ -104,9 +107,9 @@ def test_cmip6_data_citation_url(tmp_path): filename = str(tmp_path / 'output.nc') provenance.entity('file:' + filename, attributes) _write_citation_file(filename, provenance) - citation_url = tmp_path / 'output_data_citation_url.txt' + citation_url = tmp_path / 'output_data_citation_info.txt' # Create fake info url fake_url_prefix = '.'.join(attributes.values()) fake_info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}' - assert citation_url.read_text() == '{}\n'.format(fake_info_url) + assert citation_url.read_text() == fake_info_url From c452ba6025ccc5f77b68eb983ca60c9f9a932bc1 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 6 Mar 2020 17:25:00 +0100 Subject: [PATCH 064/117] fix newlines in entries --- esmvalcore/_citation.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 425e264b9b..fd2392b0a7 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -35,7 +35,7 @@ 'title = {{ESMValTool} v2.0 ' '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' 'Technical overview}\n' - '}\n' + '}' ) @@ -74,7 +74,6 @@ def _write_citation_file(filename, provenance): # collect cmip6 info from provenance for item in provenance.records: - attributes = {} for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart == 'mip_era' and value == 'CMIP6': @@ -100,7 +99,9 @@ def _save_citation_info(product_name, product_tags, product_refs, json_urls, inf # convert json_urls to bibtex entries for json_url in json_urls: - citation_entries.append(_collect_cmip_citation(json_url)) + cmip_citation = _collect_cmip_citation(json_url) + if cmip_citation: + citation_entries.append(cmip_citation) # convert tags to bibtex entries if REFERENCES_PATH: @@ -194,7 +195,7 @@ def _collect_cmip_citation(json_url): bibtex_entry = _json_to_bibtex(json_data) else: logger.info('Invalid json link %s', json_url) - bibtex_entry = 'Invalid json link' + bibtex_entry = False return bibtex_entry From dfe6e1234781ff27fc610802dafaf256d8851684 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 6 Mar 2020 17:41:42 +0100 Subject: [PATCH 065/117] style --- esmvalcore/_citation.py | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index fd2392b0a7..bf31a2ed9e 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -53,14 +53,18 @@ def _write_citation_file(filename, provenance): info_urls = [] json_urls = [] product_tags = [] - product_refs = [] # collect references from provenance for item in provenance.records: attributes = {} for key, value in item.attributes: if key.namespace.prefix == 'attribute': attributes[key.localpart] = value - attributes[item.identifier.namespace.prefix] = item.identifier.namespace.prefix + identifier = item.identifier.namespace.prefix + attributes[identifier] = identifier + if key.localpart == 'mip_era' and value == 'CMIP6': + url_prefix = _make_url_prefix(item.attributes) + info_urls.append(_make_info_url(url_prefix)) + json_urls.append(_make_json_url(url_prefix)) # check if item is related to a diagnostics if {'references', 'script_file'} <= set(attributes): product_tags.append(attributes['references']) @@ -68,35 +72,23 @@ def _write_citation_file(filename, provenance): if {'references', 'recipe'} <= set(attributes): product_tags.append(attributes['references']) # check if item is not related to a diagnostics or recipe - if not attributes.keys() & {'recipe', 'script_file'} and attributes.keys() & {'references'}: + if (not attributes.keys() & {'recipe', 'script_file'} and + attributes.keys() & {'references'}): if attributes['references'] != ESMVALTOOL_PAPER_TAG: - product_refs.append(attributes['references']) - - # collect cmip6 info from provenance - for item in provenance.records: - for key, value in item.attributes: - if key.namespace.prefix == 'attribute': - if key.localpart == 'mip_era' and value == 'CMIP6': - url_prefix = _make_url_prefix(item.attributes) - info_urls.append(_make_info_url(url_prefix)) - json_urls.append(_make_json_url(url_prefix)) + info_urls.append(attributes['references']) - _save_citation_info(product_name, product_tags, product_refs, json_urls, info_urls) + _save_citation_info(product_name, product_tags, json_urls, info_urls) -def _save_citation_info(product_name, product_tags, product_refs, json_urls, info_urls): +def _save_citation_info(product_name, product_tags, json_urls, info_urls): citation_entries = [ESMVALTOOL_PAPER] # save CMIP6 url_info, if any + # save any refrences info that is not related to recipe or diagnostics if info_urls: with open(f'{product_name}_data_citation_info.txt', 'w') as file: file.write('\n'.join(list(set(info_urls)))) - # save any refrences info that is not related to recipe or diagnostics - if product_refs: - with open(f'{product_name}_data_citation_info.txt', 'w') as file: - file.write('\n'.join(list(set(product_refs)))) - # convert json_urls to bibtex entries for json_url in json_urls: cmip_citation = _collect_cmip_citation(json_url) From 492c26d0cb8cacd7e8fe2e10d42a2e3e95654c14 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 9 Mar 2020 11:52:34 +0100 Subject: [PATCH 066/117] refactor --- esmvalcore/_citation.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index bf31a2ed9e..5f58347606 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -35,7 +35,7 @@ 'title = {{ESMValTool} v2.0 ' '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' 'Technical overview}\n' - '}' + '}\n' ) @@ -53,6 +53,7 @@ def _write_citation_file(filename, provenance): info_urls = [] json_urls = [] product_tags = [] + section = 'references' # collect references from provenance for item in provenance.records: attributes = {} @@ -65,17 +66,14 @@ def _write_citation_file(filename, provenance): url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - # check if item is related to a diagnostics - if {'references', 'script_file'} <= set(attributes): - product_tags.append(attributes['references']) - # check if item is related to a recipe - if {'references', 'recipe'} <= set(attributes): - product_tags.append(attributes['references']) - # check if item is not related to a diagnostics or recipe - if (not attributes.keys() & {'recipe', 'script_file'} and - attributes.keys() & {'references'}): - if attributes['references'] != ESMVALTOOL_PAPER_TAG: - info_urls.append(attributes['references']) + if section in attributes.keys(): + # check if reference is related to a diagnostics or a recipe + if attributes.keys() & {'script_file', 'recipe'}: + product_tags.append(attributes[section]) + # check if reference is not related to a diagnostics or a recipe + if (not attributes.keys() & {'recipe', 'script_file'} and + attributes[section] != ESMVALTOOL_PAPER_TAG): + info_urls.append(attributes[section]) _save_citation_info(product_name, product_tags, json_urls, info_urls) From 433296359fac68c00905948e7c940359df51f610 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 9 Mar 2020 18:01:22 +0100 Subject: [PATCH 067/117] add a function to convert bibtex to reference entry --- esmvalcore/_citation.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 5f58347606..a0a38742ef 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -216,3 +216,29 @@ def _make_info_url(url_prefix): """Make info url based on CMIP6 Data Citation Service.""" info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' return info_url + + +def cite_tag_value(tag): + """Convert a tag to bibtex entry.""" + reference_entry = [] + fields = ['title', 'publisher', 'authors', 'journal', 'doi'] + pattern = r'.*?\{(.*)\}.*' + + if REFERENCES_PATH: + entry = _collect_bibtex_citation(tag).split(',') + for item in entry: + if 'authors' in item: + authors_name = re.search(pattern, item).group(1).split('and') + if authors_name[0] == authors_name[-1]: + reference_entry.append(authors_name) + else: + reference_entry.append([f'{authors_name[0]}, et al.']) + if 'year' in item: + year = item.split('year =')[-1] + reference_entry.append(year) + for field in fields: + reference_entry = [ + re.search(pattern, item).group(1) for item in entry if field in item + ] + print(','.join(reference_entry)) + return ','.join(reference_entry) From 38a1a18f10f3e347d35e0c1437ba6ab0fd9aa841 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 10 Mar 2020 11:03:58 +0100 Subject: [PATCH 068/117] fix the function cite_tag_value --- esmvalcore/_citation.py | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index a0a38742ef..b6027dfe09 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -218,27 +218,10 @@ def _make_info_url(url_prefix): return info_url -def cite_tag_value(tag): - """Convert a tag to bibtex entry.""" - reference_entry = [] - fields = ['title', 'publisher', 'authors', 'journal', 'doi'] - pattern = r'.*?\{(.*)\}.*' - +def cite_tag_value(tags): + """Convert tags to bibtex entries.""" + reference_entries = '' if REFERENCES_PATH: - entry = _collect_bibtex_citation(tag).split(',') - for item in entry: - if 'authors' in item: - authors_name = re.search(pattern, item).group(1).split('and') - if authors_name[0] == authors_name[-1]: - reference_entry.append(authors_name) - else: - reference_entry.append([f'{authors_name[0]}, et al.']) - if 'year' in item: - year = item.split('year =')[-1] - reference_entry.append(year) - for field in fields: - reference_entry = [ - re.search(pattern, item).group(1) for item in entry if field in item - ] - print(','.join(reference_entry)) - return ','.join(reference_entry) + reference_entries = [_collect_bibtex_citation(tag) for tag in [tags]] + reference_entries = '\n'.join(reference_entries) + return reference_entries From 7a061cd754230a1aaacd27dd4a4b76c5df41d072 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 10 Mar 2020 11:57:14 +0100 Subject: [PATCH 069/117] remove the unnecessary condition for TAGS --- esmvalcore/_recipe.py | 2 +- esmvalcore/_task.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index dcb65704f7..7f20c5ace1 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -949,7 +949,7 @@ def _initalize_provenance(self, raw_documentation): """Initialize the recipe provenance.""" doc = deepcopy(raw_documentation) for key in doc: - if key in TAGS and key not in 'references': + if key in TAGS: doc[key] = replace_tags(key, doc[key]) return get_recipe_provenance(doc, self._filename) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index a73436d4a8..42bc44c73a 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -572,7 +572,7 @@ def _collect_provenance(self): attributes.update(deepcopy(attrs)) for key in attributes: - if key in TAGS and key not in 'references': + if key in TAGS: attributes[key] = replace_tags(key, attributes[key]) product = TrackedFile(filename, attributes, ancestors) From f30a0d62295d71622f13a6370f93700498a73519 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 10 Mar 2020 14:57:10 +0100 Subject: [PATCH 070/117] add tests to check if references have been added --- tests/integration/test_recipe.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 9bda6e29f5..14507738ad 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -14,6 +14,7 @@ from esmvalcore._task import DiagnosticTask from esmvalcore.preprocessor import DEFAULT_ORDER, PreprocessingTask from esmvalcore.preprocessor._io import concatenate_callback +from esmvalcore._citation import REFERENCES_PATH, _clean_tags from .test_diagnostic_run import write_config_user_file from .test_provenance import check_provenance @@ -1260,6 +1261,9 @@ def test_diagnostic_task_provenance( assert product.attributes[key] == tuple(TAGS[key][k] for k in record[key]) + # Check that diagnostic reference files have been added + _test_bibtex_files(product.attributes['references']) + # Check that recipe diagnostic tags have been added src = yaml.safe_load(DEFAULT_DOCUMENTATION + content) for key in ('realms', 'themes'): @@ -1269,12 +1273,16 @@ def test_diagnostic_task_provenance( # Check that recipe tags have been added recipe_record = product.provenance.get_record('recipe:recipe_test.yml') assert len(recipe_record) == 1 - for key in ('description', 'references'): - value = src['documentation'][key] - if key == 'references': - value = ', '.join(src['documentation'][key]) - assert recipe_record[0].get_attribute('attribute:' + - key).pop() == value + key = 'description' + value = src['documentation'][key] + assert recipe_record[0].get_attribute('attribute:' + + key).pop() == value + + # Check that recipe reference files have been added + key = 'references' + recipe_tags = recipe_record[0].get_attribute('attribute:' + + key).pop() + _test_bibtex_files(recipe_tags) # Test that provenance was saved to netcdf, xml and svg plot cube = iris.load(product.filename)[0] @@ -1284,6 +1292,17 @@ def test_diagnostic_task_provenance( assert os.path.exists(prefix + '.svg') +def _test_bibtex_files(product_tags): + """check bibtex files exit in REFERENCES_PATH.""" + if REFERENCES_PATH: + tags = list(set(_clean_tags(product_tags))) + for tag in tags: + bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' + if not bibtex_file.is_file(): + raise ValueError( + 'The reference file {} does not exist.'.format(bibtex_file) + ) + def test_alias_generation(tmp_path, patched_datafinder, config_user): content = dedent(""" From a3c7e420c6d386f611d20887883090de758101f0 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 10 Mar 2020 15:29:53 +0100 Subject: [PATCH 071/117] refactor --- esmvalcore/_citation.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index b6027dfe09..71640f31b3 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -94,13 +94,11 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): citation_entries.append(cmip_citation) # convert tags to bibtex entries - if REFERENCES_PATH: - if product_tags: - # make tags clean and unique - tags = list(set(_clean_tags(product_tags))) - for tag in tags: - if tag not in ESMVALTOOL_PAPER_TAG: - citation_entries.append(_collect_bibtex_citation(tag)) + if REFERENCES_PATH and product_tags: + # make tags clean and unique + tags = list(set(_clean_tags(product_tags))) + for tag in tags: + citation_entries.append(_collect_bibtex_citation(tag)) with open(f'{product_name}_citation.bibtex', 'w') as file: file.write('\n'.join(citation_entries)) @@ -172,9 +170,10 @@ def _collect_bibtex_citation(tag): if bibtex_file.is_file(): entry = bibtex_file.read_text() else: - raise ValueError( - 'The reference file {} does not exist.'.format(bibtex_file) + logger.info( + 'The reference file %s does not exist.', bibtex_file ) + entry = '' return entry From 1d81db719b0bea8b9f6792d6713191d6bc2b3478 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 10 Mar 2020 17:59:06 +0100 Subject: [PATCH 072/117] refactor --- esmvalcore/_citation.py | 44 ++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 71640f31b3..e83d26065c 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -53,27 +53,31 @@ def _write_citation_file(filename, provenance): info_urls = [] json_urls = [] product_tags = [] - section = 'references' - # collect references from provenance for item in provenance.records: - attributes = {} - for key, value in item.attributes: - if key.namespace.prefix == 'attribute': - attributes[key.localpart] = value - identifier = item.identifier.namespace.prefix - attributes[identifier] = identifier - if key.localpart == 'mip_era' and value == 'CMIP6': - url_prefix = _make_url_prefix(item.attributes) - info_urls.append(_make_info_url(url_prefix)) - json_urls.append(_make_json_url(url_prefix)) - if section in attributes.keys(): - # check if reference is related to a diagnostics or a recipe - if attributes.keys() & {'script_file', 'recipe'}: - product_tags.append(attributes[section]) - # check if reference is not related to a diagnostics or a recipe - if (not attributes.keys() & {'recipe', 'script_file'} and - attributes[section] != ESMVALTOOL_PAPER_TAG): - info_urls.append(attributes[section]) + # get cmip6 citation info + value = item.get_attribute('attribute:' + 'mip_era') + if 'CMIP6' in list(value): + url_prefix = _make_url_prefix(item.attributes) + info_urls.append(_make_info_url(url_prefix)) + json_urls.append(_make_json_url(url_prefix)) + # get diagnostics citation tags + if item.get_attribute('attribute:' + 'script_file'): + product_tags.append( + item.get_attribute('attribute:' + 'references').pop() + ) + # get recipe citation tags + if item.get_attribute('attribute:' + 'references'): + if item.identifier.namespace.prefix == 'recipe': + product_tags.append( + item.get_attribute('attribute:' + 'references').pop() + ) + # get other references information recorded by provenance + tags = list(set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG]))) + for item in provenance.records: + if item.get_attribute('attribute:' + 'references'): + value = item.get_attribute('attribute:' + 'references').pop() + if value not in tags: + info_urls.append(value) _save_citation_info(product_name, product_tags, json_urls, info_urls) From 504a17fb0b9e7a75cd4e66fc56e888c71f48fe11 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 11 Mar 2020 10:15:27 +0100 Subject: [PATCH 073/117] fix broken test --- esmvalcore/_citation.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index e83d26065c..57d13c9d87 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -60,17 +60,18 @@ def _write_citation_file(filename, provenance): url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - # get diagnostics citation tags - if item.get_attribute('attribute:' + 'script_file'): - product_tags.append( - item.get_attribute('attribute:' + 'references').pop() - ) - # get recipe citation tags if item.get_attribute('attribute:' + 'references'): + # get recipe citation tags if item.identifier.namespace.prefix == 'recipe': product_tags.append( item.get_attribute('attribute:' + 'references').pop() ) + # get diagnostics citation tags + if item.get_attribute('attribute:' + 'script_file'): + product_tags.append( + item.get_attribute('attribute:' + 'references').pop() + ) + # get other references information recorded by provenance tags = list(set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG]))) for item in provenance.records: From 857832bd9b8e94616b90a820ce10a89c367384c8 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 11 Mar 2020 15:49:25 +0100 Subject: [PATCH 074/117] fix the test for tags in test_recipe --- tests/integration/test_recipe.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 14507738ad..106dc106d6 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1273,16 +1273,13 @@ def test_diagnostic_task_provenance( # Check that recipe tags have been added recipe_record = product.provenance.get_record('recipe:recipe_test.yml') assert len(recipe_record) == 1 - key = 'description' - value = src['documentation'][key] - assert recipe_record[0].get_attribute('attribute:' + - key).pop() == value - - # Check that recipe reference files have been added - key = 'references' - recipe_tags = recipe_record[0].get_attribute('attribute:' + - key).pop() - _test_bibtex_files(recipe_tags) + for key in ('description', 'references'): + value = src['documentation'][key] + if key == 'references': + value = ','.join(src['documentation'][key]) + _test_bibtex_files(value) + assert recipe_record[0].get_attribute('attribute:' + + key).pop() == value # Test that provenance was saved to netcdf, xml and svg plot cube = iris.load(product.filename)[0] From 0997805b3960ee989f36e8547593f89070552ad4 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 11 Mar 2020 21:48:21 +0100 Subject: [PATCH 075/117] add a space after , for joining tags --- tests/integration/test_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 106dc106d6..fab899c586 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1276,7 +1276,7 @@ def test_diagnostic_task_provenance( for key in ('description', 'references'): value = src['documentation'][key] if key == 'references': - value = ','.join(src['documentation'][key]) + value = ', '.join(src['documentation'][key]) _test_bibtex_files(value) assert recipe_record[0].get_attribute('attribute:' + key).pop() == value From 16043f12e51abf172848f86d5ccdabba22a90ebc Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 11 Mar 2020 21:49:28 +0100 Subject: [PATCH 076/117] remove pop() and refactor --- esmvalcore/_citation.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 57d13c9d87..38da8f5868 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -54,31 +54,29 @@ def _write_citation_file(filename, provenance): json_urls = [] product_tags = [] for item in provenance.records: + reference_attr = item.get_attribute('attribute:' + 'references') # get cmip6 citation info value = item.get_attribute('attribute:' + 'mip_era') if 'CMIP6' in list(value): url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - if item.get_attribute('attribute:' + 'references'): + if reference_attr: # get recipe citation tags if item.identifier.namespace.prefix == 'recipe': - product_tags.append( - item.get_attribute('attribute:' + 'references').pop() - ) + product_tags += list(reference_attr) # get diagnostics citation tags if item.get_attribute('attribute:' + 'script_file'): - product_tags.append( - item.get_attribute('attribute:' + 'references').pop() - ) + product_tags += list(reference_attr) # get other references information recorded by provenance - tags = list(set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG]))) + tags = set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG])) for item in provenance.records: - if item.get_attribute('attribute:' + 'references'): - value = item.get_attribute('attribute:' + 'references').pop() - if value not in tags: - info_urls.append(value) + reference_attr = item.get_attribute('attribute:' + 'references') + if reference_attr: + value = set(_clean_tags(reference_attr)) + if not value.issubset(tags): + info_urls += list(reference_attr) _save_citation_info(product_name, product_tags, json_urls, info_urls) From 0ee0047f125433df3642cc00c4282390165ad398 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 12 Mar 2020 10:50:34 +0100 Subject: [PATCH 077/117] fix flake8 error --- tests/integration/test_recipe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index fab899c586..2d3276fe1a 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1300,6 +1300,7 @@ def _test_bibtex_files(product_tags): 'The reference file {} does not exist.'.format(bibtex_file) ) + def test_alias_generation(tmp_path, patched_datafinder, config_user): content = dedent(""" From ed6a10944e645d65cb815836de4622d176b6e4e9 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Tue, 3 Mar 2020 18:23:50 +0100 Subject: [PATCH 078/117] Implemented concatenation of cubes with derived coordinates --- esmvalcore/_recipe.py | 2 +- esmvalcore/preprocessor/_io.py | 50 ++++++- .../preprocessor/_io/test_concatenate.py | 140 +++++++++++++++++- 3 files changed, 186 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index ac8cbcbee9..ac68be8c40 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -283,7 +283,7 @@ def _get_default_settings(variable, config_user, derive=False): settings['load'] = { 'callback': concatenate_callback, } - # Configure merge + # Configure concatenation settings['concatenate'] = {} # Configure fixes diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index a43824c488..d905a00a45 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -28,6 +28,41 @@ } +def _fix_aux_factories(cube): + """Fix :class:`iris.aux_factory.AuxCoordFactory` after concatenation. + + Necessary because of bug in :mod:`iris` (see issue #2478). + + """ + coord_names = [coord.name() for coord in cube.coords()] + + # Hybrid sigma pressure coordinate + if 'atmosphere_hybrid_sigma_pressure_coordinate' in coord_names: + new_aux_factory = iris.aux_factory.HybridPressureFactory( + delta=cube.coord(var_name='ap'), + sigma=cube.coord(var_name='b'), + surface_air_pressure=cube.coord(var_name='ps'), + ) + for aux_factory in cube.aux_factories: + if isinstance(aux_factory, iris.aux_factory.HybridPressureFactory): + break + else: + cube.add_aux_factory(new_aux_factory) + + # Hybrid sigma height coordinate + if 'atmosphere_hybrid_height_coordinate' in coord_names: + new_aux_factory = iris.aux_factory.HybridHeightFactory( + delta=cube.coord(var_name='lev'), + sigma=cube.coord(var_name='b'), + orography=cube.coord(var_name='orog'), + ) + for aux_factory in cube.aux_factories: + if isinstance(aux_factory, iris.aux_factory.HybridHeightFactory): + break + else: + cube.add_aux_factory(new_aux_factory) + + def _get_attr_from_field_coord(ncfield, coord_name, attr): if coord_name is not None: attrs = ncfield.cf_group[coord_name].cf_attrs() @@ -107,9 +142,16 @@ def concatenate(cubes): concatenated = _concatenate_overlapping_cubes(concatenated) if len(concatenated) == 1: - return concatenated[0] - - logger.error('Can not concatenate cubes into a single one.') + cube = concatenated[0] + _fix_aux_factories(cube) + return cube + + # Concatenation not successful -> retrieve exact error message + try: + iris.cube.CubeList(cubes).concatenate_cube() + except iris.exceptions.ConcatenateError as exc: + msg = str(exc) + logger.error('Can not concatenate cubes into a single one: %s', msg) logger.error('Resulting cubes:') for cube in concatenated: logger.error(cube) @@ -119,7 +161,7 @@ def concatenate(cubes): pass else: logger.error('From %s to %s', time.cell(0), time.cell(-1)) - raise ValueError('Can not concatenate cubes.') + raise ValueError(f'Can not concatenate cubes: {msg}') def save(cubes, filename, optimize_access='', compress=False, **kwargs): diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index 2ee24bae0d..85a97b2583 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -1,18 +1,156 @@ """Integration tests for :func:`esmvalcore.preprocessor._io.concatenate`.""" import unittest +from unittest.mock import call import numpy as np +import pytest from cf_units import Unit -from iris.coords import DimCoord +from iris.aux_factory import HybridHeightFactory, HybridPressureFactory +from iris.coords import AuxCoord, DimCoord from iris.cube import Cube from iris.exceptions import ConcatenateError from esmvalcore.preprocessor import _io +@pytest.fixture +def mock_empty_cube(): + """Return mocked cube with irrelevant coordinates.""" + cube = unittest.mock.create_autospec(Cube, spec_set=True, instance=True) + a_coord = AuxCoord(0.0, var_name='a') + b_coord = AuxCoord(0.0, var_name='b') + cube.coords.return_value = [a_coord, b_coord] + return cube + + +@pytest.fixture +def mock_hybrid_height_cube(): + """Return mocked cube with hybrid height coordinate.""" + cube = unittest.mock.create_autospec(Cube, spec_set=True, instance=True) + lev_coord = AuxCoord([1.0], bounds=[[0.0, 2.0]], var_name='lev', units='m') + b_coord = AuxCoord([0.0], bounds=[[-0.5, 1.5]], var_name='b') + orog_coord = AuxCoord([[[100000]]], var_name='orog', units='m') + cube.coord.side_effect = [lev_coord, b_coord, orog_coord, + lev_coord, b_coord, orog_coord] + cube.coords.return_value = [ + lev_coord, + b_coord, + orog_coord, + AuxCoord(0.0, standard_name='atmosphere_hybrid_height_coordinate'), + ] + aux_factory = HybridHeightFactory( + delta=lev_coord, + sigma=b_coord, + orography=orog_coord, + ) + cube.aux_factories = ['dummy', aux_factory] + return cube + + +@pytest.fixture +def mock_hybrid_pressure_cube(): + """Return mocked cube with hybrid pressure coordinate.""" + cube = unittest.mock.create_autospec(Cube, spec_set=True, instance=True) + ap_coord = AuxCoord([1.0], bounds=[[0.0, 2.0]], var_name='ap', units='Pa') + b_coord = AuxCoord([0.0], bounds=[[-0.5, 1.5]], var_name='b') + ps_coord = AuxCoord([[[100000]]], var_name='ps', units='Pa') + cube.coord.side_effect = [ap_coord, b_coord, ps_coord, + ap_coord, b_coord, ps_coord] + cube.coords.return_value = [ + ap_coord, + b_coord, + ps_coord, + AuxCoord(0.0, + standard_name='atmosphere_hybrid_sigma_pressure_coordinate'), + ] + aux_factory = HybridPressureFactory( + delta=ap_coord, + sigma=b_coord, + surface_air_pressure=ps_coord, + ) + cube.aux_factories = ['dummy', aux_factory] + return cube + + +@pytest.fixture +def real_hybrid_pressure_cube(): + """Return cube with hybrid pressure coordinate.""" + ap_coord = AuxCoord([1.0], bounds=[[0.0, 2.0]], var_name='ap', units='Pa') + b_coord = AuxCoord([0.0], bounds=[[-0.5, 1.5]], var_name='b') + ps_coord = AuxCoord([[[100000]]], var_name='ps', units='Pa') + x_coord = AuxCoord( + 0.0, + var_name='x', + standard_name='atmosphere_hybrid_sigma_pressure_coordinate', + ) + cube = Cube([[[[0.0]]]], var_name='x', + aux_coords_and_dims=[(ap_coord, 1), (b_coord, 1), + (ps_coord, (0, 2, 3)), (x_coord, ())]) + return cube + + +def test_fix_aux_factories_empty_cube(mock_empty_cube): + """Test fixing with empty cube.""" + _io._fix_aux_factories(mock_empty_cube) + assert mock_empty_cube.mock_calls == [call.coords()] + + +def test_fix_aux_factories_hybrid_height(mock_hybrid_height_cube): + """Test fixing of hybrid height coordinate.""" + # Test with aux_factory object + _io._fix_aux_factories(mock_hybrid_height_cube) + mock_hybrid_height_cube.coords.assert_called_once_with() + mock_hybrid_height_cube.coord.assert_has_calls([call(var_name='lev'), + call(var_name='b'), + call(var_name='orog')]) + mock_hybrid_height_cube.add_aux_factory.assert_not_called() + + # Test without aux_factory object + mock_hybrid_height_cube.reset_mock() + mock_hybrid_height_cube.aux_factories = ['dummy'] + _io._fix_aux_factories(mock_hybrid_height_cube) + mock_hybrid_height_cube.coords.assert_called_once_with() + mock_hybrid_height_cube.coord.assert_has_calls([call(var_name='lev'), + call(var_name='b'), + call(var_name='orog')]) + mock_hybrid_height_cube.add_aux_factory.assert_called_once() + + +def test_fix_aux_factories_hybrid_pressure(mock_hybrid_pressure_cube): + """Test fixing of hybrid pressure coordinate.""" + # Test with aux_factory object + _io._fix_aux_factories(mock_hybrid_pressure_cube) + mock_hybrid_pressure_cube.coords.assert_called_once_with() + mock_hybrid_pressure_cube.coord.assert_has_calls([call(var_name='ap'), + call(var_name='b'), + call(var_name='ps')]) + mock_hybrid_pressure_cube.add_aux_factory.assert_not_called() + + # Test without aux_factory object + mock_hybrid_pressure_cube.reset_mock() + mock_hybrid_pressure_cube.aux_factories = ['dummy'] + _io._fix_aux_factories(mock_hybrid_pressure_cube) + mock_hybrid_pressure_cube.coords.assert_called_once_with() + mock_hybrid_pressure_cube.coord.assert_has_calls([call(var_name='ap'), + call(var_name='b'), + call(var_name='ps')]) + mock_hybrid_pressure_cube.add_aux_factory.assert_called_once() + + +def test_fix_aux_factories_real_cube(real_hybrid_pressure_cube): + """Test fixing of hybrid pressure coordinate on real cube.""" + assert not real_hybrid_pressure_cube.coords('air_pressure') + _io._fix_aux_factories(real_hybrid_pressure_cube) + air_pressure_coord = real_hybrid_pressure_cube.coord('air_pressure') + expected_coord = AuxCoord([[[[1.0]]]], bounds=[[[[[-50000., 150002.]]]]], + standard_name='air_pressure', units='Pa') + assert air_pressure_coord == expected_coord + + class TestConcatenate(unittest.TestCase): """Tests for :func:`esmvalcore.preprocessor._io.concatenate`.""" + def setUp(self): """Start tests.""" self._model_coord = DimCoord([1., 2.], From 5aeec80f0612c40bc6dabcb7b8abc23167d6e9fe Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 4 Mar 2020 10:30:19 +0100 Subject: [PATCH 079/117] Added test which tests the actual concatenation of cubes with derived coord --- .../preprocessor/_io/test_concatenate.py | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index 85a97b2583..1ee44298a4 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -8,7 +8,7 @@ from cf_units import Unit from iris.aux_factory import HybridHeightFactory, HybridPressureFactory from iris.coords import AuxCoord, DimCoord -from iris.cube import Cube +from iris.cube import Cube, CubeList from iris.exceptions import ConcatenateError from esmvalcore.preprocessor import _io @@ -148,6 +148,29 @@ def test_fix_aux_factories_real_cube(real_hybrid_pressure_cube): assert air_pressure_coord == expected_coord +def test_concatenation_with_aux_factory(real_hybrid_pressure_cube): + """Test actual concatenation of a cube with a derived coordinate.""" + + def time_coord(time_point): + """Time coordinate.""" + return DimCoord([time_point], var_name='time', standard_name='time', + units='days since 6453-2-1') + + cube_0 = real_hybrid_pressure_cube.copy() + cube_1 = real_hybrid_pressure_cube.copy() + cube_0.add_dim_coord(time_coord(0), 0) + cube_1.add_dim_coord(time_coord(1), 0) + concatenated = _io.concatenate(CubeList([cube_0, cube_1])) + air_pressure_coord = concatenated.coord('air_pressure') + expected_coord = AuxCoord( + [[[[1.0]]], [[[1.0]]]], + bounds=[[[[[-50000.0, 150002.0]]]], [[[[-50000.0, 150002.0]]]]], + standard_name='air_pressure', + units='Pa', + ) + assert air_pressure_coord == expected_coord + + class TestConcatenate(unittest.TestCase): """Tests for :func:`esmvalcore.preprocessor._io.concatenate`.""" @@ -175,7 +198,7 @@ def test_concatenate(self): concatenated.coord('time').points, np.array([1, 2, 3, 4, 5, 6])) def test_concatenate_with_overlap(self): - """Test concatenation of time overalapping cubes""" + """Test concatenation of time overalapping cubes.""" self._add_cube([6.5, 7.5], [6., 7.]) concatenated = _io.concatenate(self.raw_cubes) np.testing.assert_array_equal( From d1502f91c5ba9b40d66f99935f23568e9fa0e706 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 4 Mar 2020 16:06:39 +0100 Subject: [PATCH 080/117] Added test that tests if iris bug has been fixed --- .../preprocessor/_io/test_concatenate.py | 92 ++++++++++++++----- 1 file changed, 67 insertions(+), 25 deletions(-) diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index 1ee44298a4..326197003f 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -14,6 +14,45 @@ from esmvalcore.preprocessor import _io +def get_hybrid_pressure_cube(): + """Return cube with hybrid pressure coordinate.""" + ap_coord = AuxCoord([1.0], bounds=[[0.0, 2.0]], var_name='ap', units='Pa') + b_coord = AuxCoord([0.0], bounds=[[-0.5, 1.5]], var_name='b') + ps_coord = AuxCoord([[[100000]]], var_name='ps', units='Pa') + x_coord = AuxCoord( + 0.0, + var_name='x', + standard_name='atmosphere_hybrid_sigma_pressure_coordinate', + ) + cube = Cube([[[[0.0]]]], var_name='x', + aux_coords_and_dims=[(ap_coord, 1), (b_coord, 1), + (ps_coord, (0, 2, 3)), (x_coord, ())]) + return cube + + +def get_hybrid_pressure_cube_list(): + """Return list of cubes including hybrid pressure coordinate.""" + cube_0 = get_hybrid_pressure_cube() + cube_1 = get_hybrid_pressure_cube() + cube_0.add_dim_coord(get_time_coord(0), 0) + cube_1.add_dim_coord(get_time_coord(1), 0) + cubes = CubeList([cube_0, cube_1]) + for cube in cubes: + aux_factory = HybridPressureFactory( + delta=cube.coord(var_name='ap'), + sigma=cube.coord(var_name='b'), + surface_air_pressure=cube.coord(var_name='ps'), + ) + cube.add_aux_factory(aux_factory) + return cubes + + +def get_time_coord(time_point): + """Time coordinate.""" + return DimCoord([time_point], var_name='time', standard_name='time', + units='days since 6453-2-1') + + @pytest.fixture def mock_empty_cube(): """Return mocked cube with irrelevant coordinates.""" @@ -75,29 +114,39 @@ def mock_hybrid_pressure_cube(): @pytest.fixture def real_hybrid_pressure_cube(): - """Return cube with hybrid pressure coordinate.""" - ap_coord = AuxCoord([1.0], bounds=[[0.0, 2.0]], var_name='ap', units='Pa') - b_coord = AuxCoord([0.0], bounds=[[-0.5, 1.5]], var_name='b') - ps_coord = AuxCoord([[[100000]]], var_name='ps', units='Pa') - x_coord = AuxCoord( - 0.0, - var_name='x', - standard_name='atmosphere_hybrid_sigma_pressure_coordinate', - ) - cube = Cube([[[[0.0]]]], var_name='x', - aux_coords_and_dims=[(ap_coord, 1), (b_coord, 1), - (ps_coord, (0, 2, 3)), (x_coord, ())]) - return cube + """Return real cube with hybrid pressure coordinate.""" + return get_hybrid_pressure_cube() + + +@pytest.fixture +def real_hybrid_pressure_cube_list(): + """Return real list of cubes with hybrid pressure coordinate.""" + return get_hybrid_pressure_cube_list() + + +def check_if_fix_aux_factories_is_necessary(): + """Check if _fix_aux_factories() is necessary (i.e. iris bug is fixed).""" + cubes = get_hybrid_pressure_cube_list() + cube = cubes.concatenate_cube() + coords = [coord.name() for coord in cube.coords()] + msg = ("Apparently concatenation of cubes that have a derived variable " + "is now possible in iris (i.e. issue #2478 has been fixed). Thus, " + "this test and ALL appearances of the function " + "'_fix_aux_factories' can safely be removed!") + assert 'air_pressure' not in coords, msg def test_fix_aux_factories_empty_cube(mock_empty_cube): """Test fixing with empty cube.""" + check_if_fix_aux_factories_is_necessary() _io._fix_aux_factories(mock_empty_cube) assert mock_empty_cube.mock_calls == [call.coords()] def test_fix_aux_factories_hybrid_height(mock_hybrid_height_cube): """Test fixing of hybrid height coordinate.""" + check_if_fix_aux_factories_is_necessary() + # Test with aux_factory object _io._fix_aux_factories(mock_hybrid_height_cube) mock_hybrid_height_cube.coords.assert_called_once_with() @@ -119,6 +168,8 @@ def test_fix_aux_factories_hybrid_height(mock_hybrid_height_cube): def test_fix_aux_factories_hybrid_pressure(mock_hybrid_pressure_cube): """Test fixing of hybrid pressure coordinate.""" + check_if_fix_aux_factories_is_necessary() + # Test with aux_factory object _io._fix_aux_factories(mock_hybrid_pressure_cube) mock_hybrid_pressure_cube.coords.assert_called_once_with() @@ -140,6 +191,7 @@ def test_fix_aux_factories_hybrid_pressure(mock_hybrid_pressure_cube): def test_fix_aux_factories_real_cube(real_hybrid_pressure_cube): """Test fixing of hybrid pressure coordinate on real cube.""" + check_if_fix_aux_factories_is_necessary() assert not real_hybrid_pressure_cube.coords('air_pressure') _io._fix_aux_factories(real_hybrid_pressure_cube) air_pressure_coord = real_hybrid_pressure_cube.coord('air_pressure') @@ -148,19 +200,9 @@ def test_fix_aux_factories_real_cube(real_hybrid_pressure_cube): assert air_pressure_coord == expected_coord -def test_concatenation_with_aux_factory(real_hybrid_pressure_cube): +def test_concatenation_with_aux_factory(real_hybrid_pressure_cube_list): """Test actual concatenation of a cube with a derived coordinate.""" - - def time_coord(time_point): - """Time coordinate.""" - return DimCoord([time_point], var_name='time', standard_name='time', - units='days since 6453-2-1') - - cube_0 = real_hybrid_pressure_cube.copy() - cube_1 = real_hybrid_pressure_cube.copy() - cube_0.add_dim_coord(time_coord(0), 0) - cube_1.add_dim_coord(time_coord(1), 0) - concatenated = _io.concatenate(CubeList([cube_0, cube_1])) + concatenated = _io.concatenate(real_hybrid_pressure_cube_list) air_pressure_coord = concatenated.coord('air_pressure') expected_coord = AuxCoord( [[[[1.0]]], [[[1.0]]]], From d918cb16734d38987a7f1a7d6a250411dce15a56 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 4 Mar 2020 16:36:15 +0100 Subject: [PATCH 081/117] Replaced AssertionError by warning if iris bug is fixed --- tests/integration/preprocessor/_io/test_concatenate.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index 326197003f..1fbace4eba 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -1,5 +1,6 @@ """Integration tests for :func:`esmvalcore.preprocessor._io.concatenate`.""" +import warnings import unittest from unittest.mock import call @@ -133,7 +134,8 @@ def check_if_fix_aux_factories_is_necessary(): "is now possible in iris (i.e. issue #2478 has been fixed). Thus, " "this test and ALL appearances of the function " "'_fix_aux_factories' can safely be removed!") - assert 'air_pressure' not in coords, msg + if 'air_pressure' in coords: + warnings.warn(msg) def test_fix_aux_factories_empty_cube(mock_empty_cube): From 9f595933362f7cdcf4eb8c629596ca1424922159 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Fri, 13 Mar 2020 16:28:41 +0100 Subject: [PATCH 082/117] Added concatenation fix for atmosphere_sigma_coordinate --- esmvalcore/preprocessor/_io.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index d905a00a45..37668cde15 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -13,6 +13,7 @@ import yaml from .._task import write_ncl_settings +from ..cmor._fixes.shared import AtmosphereSigmaFactory from ._time import extract_time logger = logging.getLogger(__name__) @@ -62,6 +63,19 @@ def _fix_aux_factories(cube): else: cube.add_aux_factory(new_aux_factory) + # Atmosphere sigma coordinate + if 'atmosphere_sigma_coordinate' in coord_names: + new_aux_factory = AtmosphereSigmaFactory( + pressure_at_top=cube.coord(var_name='ptop'), + sigma=cube.coord(var_name='lev'), + surface_air_pressure=cube.coord(var_name='ps'), + ) + for aux_factory in cube.aux_factories: + if isinstance(aux_factory, AtmosphereSigmaFactory): + break + else: + cube.add_aux_factory(new_aux_factory) + def _get_attr_from_field_coord(ncfield, coord_name, attr): if coord_name is not None: From c71ad1af51430f9bb2124b1fdfaed37a447ea853 Mon Sep 17 00:00:00 2001 From: Manuel Schlund <32543114+schlunma@users.noreply.github.com> Date: Fri, 13 Mar 2020 16:32:41 +0100 Subject: [PATCH 083/117] Update esmvalcore/preprocessor/_io.py Co-Authored-By: Valeriu Predoi --- esmvalcore/preprocessor/_io.py | 1 + 1 file changed, 1 insertion(+) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 37668cde15..51fa0c201c 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -38,6 +38,7 @@ def _fix_aux_factories(cube): coord_names = [coord.name() for coord in cube.coords()] # Hybrid sigma pressure coordinate + # TODO possibly add support for other hybrid coordinates if 'atmosphere_hybrid_sigma_pressure_coordinate' in coord_names: new_aux_factory = iris.aux_factory.HybridPressureFactory( delta=cube.coord(var_name='ap'), From b0a2372bd415b196d0c444c535e1ae6f22e286c6 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 16:29:43 +0100 Subject: [PATCH 084/117] remove cite_tag_value --- esmvalcore/_citation.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 38da8f5868..a3ca74c614 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -218,12 +218,3 @@ def _make_info_url(url_prefix): """Make info url based on CMIP6 Data Citation Service.""" info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' return info_url - - -def cite_tag_value(tags): - """Convert tags to bibtex entries.""" - reference_entries = '' - if REFERENCES_PATH: - reference_entries = [_collect_bibtex_citation(tag) for tag in [tags]] - reference_entries = '\n'.join(reference_entries) - return reference_entries From 1d64c43bb3b1bf8bbadba34d0d78b9703dbfe990 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 16:33:33 +0100 Subject: [PATCH 085/117] move \t to begning of the line, remove + from get attribute --- esmvalcore/_citation.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index a3ca74c614..fbe0c4c896 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -19,20 +19,20 @@ # it is the technical overview and should always be cited ESMVALTOOL_PAPER_TAG = 'righi19gmdd' ESMVALTOOL_PAPER = ( - '@article{righi19gmdd,\n\t' - 'doi = {10.5194/gmd-2019-226},\n\t' - 'url = {https://doi.org/10.5194%2Fgmd-2019-226},\n\t' - 'year = 2019,\n\t' - 'month = {sep},\n\t' - 'publisher = {Copernicus {GmbH}},\n\t' - 'author = {Mattia Righi and Bouwe Andela and Veronika Eyring ' + '@article{righi19gmdd,\n' + '\tdoi = {10.5194/gmd-2019-226},\n' + '\turl = {https://doi.org/10.5194%2Fgmd-2019-226},\n' + '\tyear = 2019,\n' + '\tmonth = {sep},\n' + '\tpublisher = {Copernicus {GmbH}},\n' + '\tauthor = {Mattia Righi and Bouwe Andela and Veronika Eyring ' 'and Axel Lauer and Valeriu Predoi and Manuel Schlund ' 'and Javier Vegas-Regidor and Lisa Bock and Björn Brötz ' 'and Lee de Mora and Faruk Diblen and Laura Dreyer ' 'and Niels Drost and Paul Earnshaw and Birgit Hassler ' 'and Nikolay Koldunov and Bill Little and Saskia Loosveldt Tomas ' - 'and Klaus Zimmermann},\n\t' - 'title = {{ESMValTool} v2.0 ' + 'and Klaus Zimmermann},\n' + '\ttitle = {{ESMValTool} v2.0 ' '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' 'Technical overview}\n' '}\n' @@ -54,9 +54,9 @@ def _write_citation_file(filename, provenance): json_urls = [] product_tags = [] for item in provenance.records: - reference_attr = item.get_attribute('attribute:' + 'references') + reference_attr = item.get_attribute('attribute:references') # get cmip6 citation info - value = item.get_attribute('attribute:' + 'mip_era') + value = item.get_attribute('attribute:mip_era') if 'CMIP6' in list(value): url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) @@ -66,13 +66,13 @@ def _write_citation_file(filename, provenance): if item.identifier.namespace.prefix == 'recipe': product_tags += list(reference_attr) # get diagnostics citation tags - if item.get_attribute('attribute:' + 'script_file'): + if item.get_attribute('attribute:script_file'): product_tags += list(reference_attr) # get other references information recorded by provenance tags = set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG])) for item in provenance.records: - reference_attr = item.get_attribute('attribute:' + 'references') + reference_attr = item.get_attribute('attribute:references') if reference_attr: value = set(_clean_tags(reference_attr)) if not value.issubset(tags): From 7bb95a8a3247a503ac618564951e7c4635c0acdb Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 17:21:25 +0100 Subject: [PATCH 086/117] refactor write_citation_file function --- esmvalcore/_citation.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index fbe0c4c896..acefb9790e 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -57,25 +57,19 @@ def _write_citation_file(filename, provenance): reference_attr = item.get_attribute('attribute:references') # get cmip6 citation info value = item.get_attribute('attribute:mip_era') - if 'CMIP6' in list(value): + if 'CMIP6' in value: url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) if reference_attr: # get recipe citation tags if item.identifier.namespace.prefix == 'recipe': - product_tags += list(reference_attr) + product_tags.extend(reference_attr) # get diagnostics citation tags - if item.get_attribute('attribute:script_file'): - product_tags += list(reference_attr) - - # get other references information recorded by provenance - tags = set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG])) - for item in provenance.records: - reference_attr = item.get_attribute('attribute:references') - if reference_attr: - value = set(_clean_tags(reference_attr)) - if not value.issubset(tags): + elif item.get_attribute('attribute:script_file'): + print(reference_attr) + product_tags.extend(reference_attr) + elif ESMVALTOOL_PAPER_TAG not in reference_attr: info_urls += list(reference_attr) _save_citation_info(product_name, product_tags, json_urls, info_urls) From d57c984e79faf9d69e3e316cd00dd00a60e3e193 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 17:29:26 +0100 Subject: [PATCH 087/117] refactor clean_tag function, fix the logger --- esmvalcore/_citation.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index acefb9790e..05e1208825 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -67,7 +67,6 @@ def _write_citation_file(filename, provenance): product_tags.extend(reference_attr) # get diagnostics citation tags elif item.get_attribute('attribute:script_file'): - print(reference_attr) product_tags.extend(reference_attr) elif ESMVALTOOL_PAPER_TAG not in reference_attr: info_urls += list(reference_attr) @@ -93,7 +92,7 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): # convert tags to bibtex entries if REFERENCES_PATH and product_tags: # make tags clean and unique - tags = list(set(_clean_tags(product_tags))) + tags = _clean_tags(product_tags) for tag in tags: citation_entries.append(_collect_bibtex_citation(tag)) @@ -104,7 +103,7 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): def _clean_tags(tags): """Clean the tags that are recorded as str by provenance.""" pattern = re.compile(r'\w+') - return pattern.findall(str(tags)) + return list(set(pattern.findall(str(tags)))) def _get_response(url): @@ -116,9 +115,9 @@ def _get_response(url): if response.status_code == 200: json_data = response.json() else: - logger.info('Error in the CMIP json link: %s', url) + logger.warning('Error in the CMIP6 citation link: %s', url) except IOError: - logger.info('Error in receiving the CMIP json file') + logger.info('No network connection, unable to retrieve CMIP6 citation information') return json_data From 035a4422750aa022d06d53606b2b1dd3b173e9db Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 17:40:19 +0100 Subject: [PATCH 088/117] fix minor things --- esmvalcore/_citation.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 05e1208825..9d509ac7e4 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -147,15 +147,17 @@ def _json_to_bibtex(data): doi = data.get('identifier').get('id', 'doi not found') url = f'https://doi.org/{doi}' - bibtex_entry = ( - f'{"@misc{"}{url},\n\t' - f'url = {{{url}}},\n\t' - f'title = {{{title}}},\n\t' - f'publisher = {{{publisher}}},\n\t' - f'year = {year},\n\t' - f'author = {{{authors}}},\n\t' - f'doi = {{{doi}}},\n' - f'{"}"}\n' + bibtex_entry = textwrap.dedent( + f""" + @misc{{{url} + \turl = {{{url}}}, + \ttitle = {{{title}}}, + \tpublisher = {{{publisher}}}, + \tyear = {year}, + \tauthor = {{{authors}}}, + \tdoi = {{{doi}}}, + }} + """.lstrip() ) return bibtex_entry @@ -166,7 +168,7 @@ def _collect_bibtex_citation(tag): if bibtex_file.is_file(): entry = bibtex_file.read_text() else: - logger.info( + logger.warning( 'The reference file %s does not exist.', bibtex_file ) entry = '' @@ -179,8 +181,7 @@ def _collect_cmip_citation(json_url): if json_data: bibtex_entry = _json_to_bibtex(json_data) else: - logger.info('Invalid json link %s', json_url) - bibtex_entry = False + bibtex_entry = '' return bibtex_entry From f267a02a35df0c17db389511a0a3d575cda671a3 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 17:40:43 +0100 Subject: [PATCH 089/117] style --- esmvalcore/_provenance.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index 9ab3c134c6..e8f1a6521e 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -36,8 +36,7 @@ def get_esmvaltool_provenance(): create_namespace(provenance, namespace) # TODO: add dependencies with versions here - attributes_value = ESMVALTOOL_PAPER_TAG - attributes = {'attribute:references': attributes_value} + attributes = {'attribute:references': ESMVALTOOL_PAPER_TAG} activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) From 07c04baaa1911246deb435dea955fcc203329904 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 17:39:33 +0100 Subject: [PATCH 090/117] add import, refactor jason_to_bitex func --- esmvalcore/_citation.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 9d509ac7e4..91c560b8c1 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -3,6 +3,7 @@ import logging import re from pathlib import Path +import textwrap import requests from ._config import DIAGNOSTICS_PATH @@ -127,23 +128,17 @@ def _json_to_bibtex(data): title = data.get('titles', ['title not found'])[0] publisher = data.get('publisher', 'publisher not found') year = data.get('publicationYear', 'publicationYear not found') - authors = 'creators not found' doi = 'doi not found' - author_list = [] - if data.get('creators', False): + if data.get('creators', ''): author_list = [ item.get('creatorName', '') for item in data['creators'] ] - if author_list: - if author_list[0] == author_list[-1]: - authors = author_list[0] - if not authors: - authors = 'creatorName not found' - else: - authors = ' and '.join(author_list) - - if data.get('identifier', False): + authors = ' and '.join(author_list) + if not authors: + authors = 'creators not found' + + if data.get('identifier', ''): doi = data.get('identifier').get('id', 'doi not found') url = f'https://doi.org/{doi}' From 558e109c0d6ad051335bb8d5d35e3f8c30f14b4f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 17:42:24 +0100 Subject: [PATCH 091/117] move the test to esmvaltool repo --- tests/integration/test_recipe.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 2d3276fe1a..d42c3a9455 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1261,9 +1261,6 @@ def test_diagnostic_task_provenance( assert product.attributes[key] == tuple(TAGS[key][k] for k in record[key]) - # Check that diagnostic reference files have been added - _test_bibtex_files(product.attributes['references']) - # Check that recipe diagnostic tags have been added src = yaml.safe_load(DEFAULT_DOCUMENTATION + content) for key in ('realms', 'themes'): @@ -1277,7 +1274,6 @@ def test_diagnostic_task_provenance( value = src['documentation'][key] if key == 'references': value = ', '.join(src['documentation'][key]) - _test_bibtex_files(value) assert recipe_record[0].get_attribute('attribute:' + key).pop() == value @@ -1289,18 +1285,6 @@ def test_diagnostic_task_provenance( assert os.path.exists(prefix + '.svg') -def _test_bibtex_files(product_tags): - """check bibtex files exit in REFERENCES_PATH.""" - if REFERENCES_PATH: - tags = list(set(_clean_tags(product_tags))) - for tag in tags: - bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' - if not bibtex_file.is_file(): - raise ValueError( - 'The reference file {} does not exist.'.format(bibtex_file) - ) - - def test_alias_generation(tmp_path, patched_datafinder, config_user): content = dedent(""" From d60b53155b8d7d464361749b736a515a290d853c Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 17:45:58 +0100 Subject: [PATCH 092/117] undo the changes --- esmvalcore/_provenance.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index e8f1a6521e..dad826e1f0 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -32,11 +32,9 @@ def create_namespace(provenance, namespace): def get_esmvaltool_provenance(): """Create an esmvaltool run activity.""" provenance = ProvDocument() - for namespace in ('software', 'attribute'): - create_namespace(provenance, namespace) - - # TODO: add dependencies with versions here - attributes = {'attribute:references': ESMVALTOOL_PAPER_TAG} + namespace = 'software' + create_namespace(provenance, namespace) + attributes = {} # TODO: add dependencies with versions here activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) From 824143d048b9e36058d237738be229d21b99218d Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 18:37:48 +0100 Subject: [PATCH 093/117] refactor --- esmvalcore/_citation.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 91c560b8c1..85483f55aa 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -18,9 +18,9 @@ CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' # it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = 'righi19gmdd' +ESMVALTOOL_PAPER_TAG = 'righi19gmd' ESMVALTOOL_PAPER = ( - '@article{righi19gmdd,\n' + '@article{righi19gmd,\n' '\tdoi = {10.5194/gmd-2019-226},\n' '\turl = {https://doi.org/10.5194%2Fgmd-2019-226},\n' '\tyear = 2019,\n' @@ -80,9 +80,13 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): # save CMIP6 url_info, if any # save any refrences info that is not related to recipe or diagnostics + title = [ + "Some citation information are found, " + "which are not mentioned in the recipe or diagnostic." + ] if info_urls: with open(f'{product_name}_data_citation_info.txt', 'w') as file: - file.write('\n'.join(list(set(info_urls)))) + file.write('\n'.join(title + list(set(info_urls)))) # convert json_urls to bibtex entries for json_url in json_urls: @@ -118,7 +122,10 @@ def _get_response(url): else: logger.warning('Error in the CMIP6 citation link: %s', url) except IOError: - logger.info('No network connection, unable to retrieve CMIP6 citation information') + logger.info( + 'No network connection,' + 'unable to retrieve CMIP6 citation information' + ) return json_data @@ -128,6 +135,7 @@ def _json_to_bibtex(data): title = data.get('titles', ['title not found'])[0] publisher = data.get('publisher', 'publisher not found') year = data.get('publicationYear', 'publicationYear not found') + authors = 'creators not found' doi = 'doi not found' if data.get('creators', ''): @@ -135,8 +143,8 @@ def _json_to_bibtex(data): item.get('creatorName', '') for item in data['creators'] ] authors = ' and '.join(author_list) - if not authors: - authors = 'creators not found' + if not authors: + authors = 'creators not found' if data.get('identifier', ''): doi = data.get('identifier').get('id', 'doi not found') @@ -144,13 +152,13 @@ def _json_to_bibtex(data): bibtex_entry = textwrap.dedent( f""" - @misc{{{url} - \turl = {{{url}}}, - \ttitle = {{{title}}}, - \tpublisher = {{{publisher}}}, - \tyear = {year}, - \tauthor = {{{authors}}}, - \tdoi = {{{doi}}}, + @misc{{{url}, + url = {{{url}}}, + title = {{{title}}}, + publisher = {{{publisher}}}, + year = {year}, + author = {{{authors}}}, + doi = {{{doi}}}, }} """.lstrip() ) From d85156c07dfa1d774370255a0d161c7d5f946d71 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 18:38:12 +0100 Subject: [PATCH 094/117] fix the tests --- tests/integration/test_citation.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index eb6b64296f..0623ace8f6 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -1,4 +1,5 @@ """Test _citation.py.""" +import textwrap from prov.model import ProvDocument import esmvalcore @@ -75,15 +76,17 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): year = 'publicationYear not found' authors = 'creators not found' doi = 'doi not found' - fake_bibtex_entry = ( - f'{"@misc{"}{url},\n\t' - f'url = {{{url}}},\n\t' - f'title = {{{title}}},\n\t' - f'publisher = {{{publisher}}},\n\t' - f'year = {year},\n\t' - f'author = {{{authors}}},\n\t' - f'doi = {{{doi}}},\n' - f'{"}"}\n' + fake_bibtex_entry = textwrap.dedent( + f""" + @misc{{{url}, + url = {{{url}}}, + title = {{{title}}}, + publisher = {{{publisher}}}, + year = {year}, + author = {{{authors}}}, + doi = {{{doi}}}, + }} + """.lstrip() ) assert citation_file.read_text() == '\n'.join( [ESMVALTOOL_PAPER, fake_bibtex_entry] @@ -111,5 +114,9 @@ def test_cmip6_data_citation_url(tmp_path): # Create fake info url fake_url_prefix = '.'.join(attributes.values()) - fake_info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}' - assert citation_url.read_text() == fake_info_url + fake_info_url = [f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}'] + title = [ + "Some citation information are found, " + "which are not mentioned in the recipe or diagnostic." + ] + assert citation_url.read_text() == '\n'.join(title + fake_info_url) From 4e9e0141e55801d0b00994e4b3a96643a6c681b6 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 23:29:44 +0100 Subject: [PATCH 095/117] fix get_recipe_provenance function --- esmvalcore/_provenance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index dad826e1f0..fd6f032001 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -80,7 +80,7 @@ def get_recipe_provenance(documentation, filename): entity = provenance.entity( 'recipe:{}'.format(filename), { 'attribute:description': documentation.get('description', ''), - 'attribute:references': ', '.join( + 'attribute:references': str( documentation.get('references', [])), }) From a1bbbff24e5e06be81c50e4e8d40ec5fa1e56404 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 23:30:10 +0100 Subject: [PATCH 096/117] refactor extract_tags function --- esmvalcore/_citation.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 85483f55aa..db272c213b 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -96,8 +96,7 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): # convert tags to bibtex entries if REFERENCES_PATH and product_tags: - # make tags clean and unique - tags = _clean_tags(product_tags) + tags = _extract_tags(product_tags) for tag in tags: citation_entries.append(_collect_bibtex_citation(tag)) @@ -105,8 +104,10 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): file.write('\n'.join(citation_entries)) -def _clean_tags(tags): - """Clean the tags that are recorded as str by provenance.""" +def _extract_tags(tags): + """Extract tags that are recorded by provenance, + as for example, "['acknow_project', 'acknow_author']". + """ pattern = re.compile(r'\w+') return list(set(pattern.findall(str(tags)))) From f106cd0cde8d9852cc907db1c6676059cd0cf1e5 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 23:32:20 +0100 Subject: [PATCH 097/117] remove esmvaltool_paper_tag --- esmvalcore/_citation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index db272c213b..4389123424 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -18,7 +18,6 @@ CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' # it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = 'righi19gmd' ESMVALTOOL_PAPER = ( '@article{righi19gmd,\n' '\tdoi = {10.5194/gmd-2019-226},\n' @@ -69,7 +68,7 @@ def _write_citation_file(filename, provenance): # get diagnostics citation tags elif item.get_attribute('attribute:script_file'): product_tags.extend(reference_attr) - elif ESMVALTOOL_PAPER_TAG not in reference_attr: + else: info_urls += list(reference_attr) _save_citation_info(product_name, product_tags, json_urls, info_urls) From d54c0126e943bf18b6727bc7ea7a40f506b33f17 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 23:37:51 +0100 Subject: [PATCH 098/117] remove esmvaltool_paper_tag --- esmvalcore/_provenance.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index fd6f032001..bf675fae0b 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -10,7 +10,6 @@ from prov.model import ProvDocument from ._version import __version__ -from ._citation import ESMVALTOOL_PAPER_TAG logger = logging.getLogger(__name__) From a9f13234687dba5d85f43bb17ae860760ab38c96 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 24 Mar 2020 10:53:59 +0100 Subject: [PATCH 099/117] refcator bibtex string --- esmvalcore/_citation.py | 12 ++++++------ tests/integration/test_citation.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 4389123424..a426ebbfb5 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -153,12 +153,12 @@ def _json_to_bibtex(data): bibtex_entry = textwrap.dedent( f""" @misc{{{url}, - url = {{{url}}}, - title = {{{title}}}, - publisher = {{{publisher}}}, - year = {year}, - author = {{{authors}}}, - doi = {{{doi}}}, + \turl = {{{url}}}, + \ttitle = {{{title}}}, + \tpublisher = {{{publisher}}}, + \tyear = {year}, + \tauthor = {{{authors}}}, + \tdoi = {{{doi}}}, }} """.lstrip() ) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 0623ace8f6..6d51ad01ab 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -79,12 +79,12 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): fake_bibtex_entry = textwrap.dedent( f""" @misc{{{url}, - url = {{{url}}}, - title = {{{title}}}, - publisher = {{{publisher}}}, - year = {year}, - author = {{{authors}}}, - doi = {{{doi}}}, + \turl = {{{url}}}, + \ttitle = {{{title}}}, + \tpublisher = {{{publisher}}}, + \tyear = {year}, + \tauthor = {{{authors}}}, + \tdoi = {{{doi}}}, }} """.lstrip() ) From ddc5e711bb4357d6b104e60f7d324f4ebac460a3 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 24 Mar 2020 11:42:55 +0100 Subject: [PATCH 100/117] remove import from _citation --- tests/integration/test_recipe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 05bfd7d265..5ace9184d3 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -15,7 +15,6 @@ from esmvalcore._task import DiagnosticTask from esmvalcore.preprocessor import DEFAULT_ORDER, PreprocessingTask from esmvalcore.preprocessor._io import concatenate_callback -from esmvalcore._citation import REFERENCES_PATH, _clean_tags from esmvalcore.cmor.check import CheckLevels From aba4457fd1deb8aa2563f5a3e839695d5df80c06 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 24 Mar 2020 11:44:49 +0100 Subject: [PATCH 101/117] remove lstrip() --- esmvalcore/_citation.py | 2 +- tests/integration/test_citation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index a426ebbfb5..42248411ab 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -160,7 +160,7 @@ def _json_to_bibtex(data): \tauthor = {{{authors}}}, \tdoi = {{{doi}}}, }} - """.lstrip() + """ ) return bibtex_entry diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 6d51ad01ab..d8834d8843 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -86,7 +86,7 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): \tauthor = {{{authors}}}, \tdoi = {{{doi}}}, }} - """.lstrip() + """ ) assert citation_file.read_text() == '\n'.join( [ESMVALTOOL_PAPER, fake_bibtex_entry] From 27f4effc59c7e5de3847711b8ab98f55566972d2 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 24 Mar 2020 13:10:06 +0100 Subject: [PATCH 102/117] add str and fix the test --- tests/integration/test_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 5ace9184d3..e6f64b8ad0 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1282,7 +1282,7 @@ def test_diagnostic_task_provenance( for key in ('description', 'references'): value = src['documentation'][key] if key == 'references': - value = ', '.join(src['documentation'][key]) + value = str(src['documentation'][key]) assert recipe_record[0].get_attribute('attribute:' + key).pop() == value From fb2f057108e46c342420a1077b45832be3a9bb39 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 24 Mar 2020 13:10:23 +0100 Subject: [PATCH 103/117] style --- esmvalcore/_citation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 42248411ab..cb2e935711 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -104,8 +104,9 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): def _extract_tags(tags): - """Extract tags that are recorded by provenance, - as for example, "['acknow_project', 'acknow_author']". + """ + Extract tags that are recorded by provenance as + for example, "['acknow_project', 'acknow_author']". """ pattern = re.compile(r'\w+') return list(set(pattern.findall(str(tags)))) From 60b90800764f611c8a665a0f9ac6b3ef2cea3678 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 12:52:28 +0100 Subject: [PATCH 104/117] refactor write_citation_file function --- esmvalcore/_citation.py | 56 +++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index cb2e935711..9b77bfb6b2 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -50,17 +50,18 @@ def _write_citation_file(filename, provenance): Otherwise, cmip6 data reference links are saved into a text file. """ product_name = os.path.splitext(filename)[0] - info_urls = [] - json_urls = [] + further_info = [] + cmip6_info_urls = [] + cmip6_json_urls = [] product_tags = [] for item in provenance.records: - reference_attr = item.get_attribute('attribute:references') # get cmip6 citation info - value = item.get_attribute('attribute:mip_era') - if 'CMIP6' in value: + mip_era = item.get_attribute('attribute:mip_era') + if 'CMIP6' in mip_era: url_prefix = _make_url_prefix(item.attributes) - info_urls.append(_make_info_url(url_prefix)) - json_urls.append(_make_json_url(url_prefix)) + cmip6_info_urls.append(_make_info_url(url_prefix)) + cmip6_json_urls.append(_make_json_url(url_prefix)) + reference_attr = item.get_attribute('attribute:references') if reference_attr: # get recipe citation tags if item.identifier.namespace.prefix == 'recipe': @@ -69,24 +70,14 @@ def _write_citation_file(filename, provenance): elif item.get_attribute('attribute:script_file'): product_tags.extend(reference_attr) else: - info_urls += list(reference_attr) - - _save_citation_info(product_name, product_tags, json_urls, info_urls) + further_info.extend(reference_attr) + _save_citation(product_name, product_tags, cmip6_json_urls) + _save_citation_info(product_name, cmip6_info_urls, further_info) -def _save_citation_info(product_name, product_tags, json_urls, info_urls): +def _save_citation(product_name, product_tags, json_urls): + """Save all bibtex entries in one bibtex file.""" citation_entries = [ESMVALTOOL_PAPER] - - # save CMIP6 url_info, if any - # save any refrences info that is not related to recipe or diagnostics - title = [ - "Some citation information are found, " - "which are not mentioned in the recipe or diagnostic." - ] - if info_urls: - with open(f'{product_name}_data_citation_info.txt', 'w') as file: - file.write('\n'.join(title + list(set(info_urls)))) - # convert json_urls to bibtex entries for json_url in json_urls: cmip_citation = _collect_cmip_citation(json_url) @@ -103,6 +94,27 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): file.write('\n'.join(citation_entries)) +def _save_citation_info(product_name, info_urls, further_info): + """Save all citation information in one text file.""" + lines = [] + # save CMIP6 url_info, if any + if info_urls: + lines.append( + "Follow the links below to find more information about CMIP6 data." + ) + lines.extend(info_urls) + # save any refrences info that is not related to recipe or diagnostics + if further_info: + lines.append( + "Some data citation information are found, " + "which are not mentioned in the recipe or diagnostic." + ) + lines.extend(further_info) + if lines: + with open(f'{product_name}_data_citation_info.txt', 'w') as file: + file.write('\n'.join(lines)) + + def _extract_tags(tags): """ Extract tags that are recorded by provenance as From fd0ff2e8cf13858db6045e86d3d41e1c89ec7a68 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 13:04:51 +0100 Subject: [PATCH 105/117] fix multiline docstring --- esmvalcore/_citation.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 9b77bfb6b2..707a69f3e0 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -75,6 +75,7 @@ def _write_citation_file(filename, provenance): _save_citation(product_name, product_tags, cmip6_json_urls) _save_citation_info(product_name, cmip6_info_urls, further_info) + def _save_citation(product_name, product_tags, json_urls): """Save all bibtex entries in one bibtex file.""" citation_entries = [ESMVALTOOL_PAPER] @@ -116,9 +117,10 @@ def _save_citation_info(product_name, info_urls, further_info): def _extract_tags(tags): - """ - Extract tags that are recorded by provenance as - for example, "['acknow_project', 'acknow_author']". + """Extract tags. + + Tags are recorded as string of lists by provenance. + For example, "['acknow_project', 'acknow_author']". """ pattern = re.compile(r'\w+') return list(set(pattern.findall(str(tags)))) @@ -136,7 +138,7 @@ def _get_response(url): logger.warning('Error in the CMIP6 citation link: %s', url) except IOError: logger.info( - 'No network connection,' + 'No network connection, ' 'unable to retrieve CMIP6 citation information' ) return json_data From 6bb038ffc04e128e48b437a53e7167c5dda6c5b7 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 13:05:24 +0100 Subject: [PATCH 106/117] fix title for info_url --- tests/integration/test_citation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index d8834d8843..69867653c5 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -116,7 +116,6 @@ def test_cmip6_data_citation_url(tmp_path): fake_url_prefix = '.'.join(attributes.values()) fake_info_url = [f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}'] title = [ - "Some citation information are found, " - "which are not mentioned in the recipe or diagnostic." + "Follow the links below to find more information about CMIP6 data." ] assert citation_url.read_text() == '\n'.join(title + fake_info_url) From c995e663f093f05d28a38ad4a3c7eff56b61d6c0 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 13:28:38 +0100 Subject: [PATCH 107/117] fix minor things --- esmvalcore/_citation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 707a69f3e0..0ee529901a 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -128,7 +128,7 @@ def _extract_tags(tags): def _get_response(url): """Return information from CMIP6 Data Citation service in json format.""" - json_data = False + json_data = None if url.lower().startswith('https'): try: response = requests.get(url) @@ -153,7 +153,7 @@ def _json_to_bibtex(data): authors = 'creators not found' doi = 'doi not found' - if data.get('creators', ''): + if 'creators' in data: author_list = [ item.get('creatorName', '') for item in data['creators'] ] @@ -161,8 +161,8 @@ def _json_to_bibtex(data): if not authors: authors = 'creators not found' - if data.get('identifier', ''): - doi = data.get('identifier').get('id', 'doi not found') + if 'identifier' in data: + doi = data['identifier'].get('id', 'doi not found') url = f'https://doi.org/{doi}' bibtex_entry = textwrap.dedent( @@ -228,5 +228,5 @@ def _make_json_url(url_prefix): def _make_info_url(url_prefix): """Make info url based on CMIP6 Data Citation Service.""" - info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' + info_url = f'{CMIP6_URL_STEM}/cmip6?input={url_prefix}' return info_url From 096e70d999e973d6b578b6c739f4bbfd802db849 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 13:30:04 +0100 Subject: [PATCH 108/117] remove duplicated cmip6 --- tests/integration/test_citation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 69867653c5..8dc4b89c8a 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -114,7 +114,7 @@ def test_cmip6_data_citation_url(tmp_path): # Create fake info url fake_url_prefix = '.'.join(attributes.values()) - fake_info_url = [f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}'] + fake_info_url = [f'{CMIP6_URL_STEM}/cmip6?input={fake_url_prefix}'] title = [ "Follow the links below to find more information about CMIP6 data." ] From f078010c9532dd74543f9bbfcc80195e38e4769d Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 13:33:32 +0100 Subject: [PATCH 109/117] style --- esmvalcore/_citation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 0ee529901a..7365ca48c7 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -45,9 +45,9 @@ def _write_citation_file(filename, provenance): Recipe and cmip6 data references are saved into one bibtex file. cmip6 data references are provided by CMIP6 data citation service. - each cmip6 data reference has a json link. In the case of internet + Each cmip6 data reference has a json link. In the case of internet connection, cmip6 data references are saved into a bibtex file. - Otherwise, cmip6 data reference links are saved into a text file. + Also, cmip6 data reference links are saved into a text file. """ product_name = os.path.splitext(filename)[0] further_info = [] @@ -119,7 +119,7 @@ def _save_citation_info(product_name, info_urls, further_info): def _extract_tags(tags): """Extract tags. - Tags are recorded as string of lists by provenance. + Tags are recorded as string of a list by provenance. For example, "['acknow_project', 'acknow_author']". """ pattern = re.compile(r'\w+') From 26a571b29fed1468179b105c6866f152056438bf Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 26 Mar 2020 10:27:18 +0100 Subject: [PATCH 110/117] Adapted custom co2s table to match CMIP6 version --- esmvalcore/cmor/tables/custom/CMOR_co2s.dat | 7 ++++--- esmvalcore/preprocessor/_derive/co2s.py | 4 ++-- tests/unit/preprocessor/_derive/test_co2s.py | 8 ++++---- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/esmvalcore/cmor/tables/custom/CMOR_co2s.dat b/esmvalcore/cmor/tables/custom/CMOR_co2s.dat index c1b9af6168..fd0a876b6c 100644 --- a/esmvalcore/cmor/tables/custom/CMOR_co2s.dat +++ b/esmvalcore/cmor/tables/custom/CMOR_co2s.dat @@ -7,10 +7,11 @@ modeling_realm: atmos ! Variable attributes: !---------------------------------- standard_name: mole_fraction_of_carbon_dioxide_in_air -units: mol mol-1 -cell_methods: time: mean +units: 1e-06 +cell_methods: area: time: mean cell_measures: area: areacella -long_name: Mole Fraction of CO2 at surface level +long_name: Atmosphere CO2 +comment: As co2, but only at the surface !---------------------------------- ! Additional variable information: !---------------------------------- diff --git a/esmvalcore/preprocessor/_derive/co2s.py b/esmvalcore/preprocessor/_derive/co2s.py index 4305e08a19..b3a98cfef7 100644 --- a/esmvalcore/preprocessor/_derive/co2s.py +++ b/esmvalcore/preprocessor/_derive/co2s.py @@ -11,7 +11,7 @@ class DerivedVariable(DerivedVariableBase): @staticmethod def required(project): """Declare the variables needed for derivation.""" - required = [{'short_name': 'co2'}] + required = [{'short_name': 'co2', 'mip': 'Amon'}] return required @staticmethod @@ -35,5 +35,5 @@ def calculate(cubes): surface_data = cube.data[tuple(indices)] cube = cube[:, 0, :, :] cube.data = surface_data - cube.convert_units('mol mol-1') + cube.convert_units('1e-6') return cube diff --git a/tests/unit/preprocessor/_derive/test_co2s.py b/tests/unit/preprocessor/_derive/test_co2s.py index fd5155a0bd..38206be9b1 100644 --- a/tests/unit/preprocessor/_derive/test_co2s.py +++ b/tests/unit/preprocessor/_derive/test_co2s.py @@ -43,7 +43,7 @@ def masked_cubes(): co2_data, var_name='co2', standard_name='mole_fraction_of_carbon_dioxide_in_air', - units='1', + units='1e-6', dim_coords_and_dims=coord_spec, ) return iris.cube.CubeList([cube]) @@ -63,7 +63,7 @@ def unmasked_cubes(): co2_data, var_name='co2', standard_name='mole_fraction_of_carbon_dioxide_in_air', - units='1e-1', + units='1e-7', dim_coords_and_dims=coord_spec, ) return iris.cube.CubeList([cube]) @@ -77,7 +77,7 @@ def test_co2_calculate_masked_cubes(masked_cubes): np.testing.assert_allclose(out_cube.data, [[[170.0, 100.0], [80.0, 10.0]]]) - assert out_cube.units == 'mol mol-1' + assert out_cube.units == '1e-6' np.testing.assert_allclose(out_cube.coord('air_pressure').points, 123456.0) @@ -90,6 +90,6 @@ def test_co2_calculate_unmasked_cubes(unmasked_cubes): np.testing.assert_allclose(out_cube.data, [[[20.0, 10.0], [8.0, 0.9]]]) - assert out_cube.units == 'mol mol-1' + assert out_cube.units == '1e-6' np.testing.assert_allclose(out_cube.coord('air_pressure').points, 123456.0) From 0beabacc9f904e2d25841bcaf4850089acabad89 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 26 Mar 2020 10:34:59 +0100 Subject: [PATCH 111/117] Removed unnecessary mip in co2s derivation script --- esmvalcore/preprocessor/_derive/co2s.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_derive/co2s.py b/esmvalcore/preprocessor/_derive/co2s.py index b3a98cfef7..8a1dd0949f 100644 --- a/esmvalcore/preprocessor/_derive/co2s.py +++ b/esmvalcore/preprocessor/_derive/co2s.py @@ -11,7 +11,7 @@ class DerivedVariable(DerivedVariableBase): @staticmethod def required(project): """Declare the variables needed for derivation.""" - required = [{'short_name': 'co2', 'mip': 'Amon'}] + required = [{'short_name': 'co2'}] return required @staticmethod From d035e4367606b55b9da1b3fb9d6499826f5fdef0 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Fri, 27 Mar 2020 10:08:35 +0100 Subject: [PATCH 112/117] Adapted test of co2s derivation script --- tests/unit/preprocessor/_derive/test_co2s.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/preprocessor/_derive/test_co2s.py b/tests/unit/preprocessor/_derive/test_co2s.py index 38206be9b1..ad05aebd11 100644 --- a/tests/unit/preprocessor/_derive/test_co2s.py +++ b/tests/unit/preprocessor/_derive/test_co2s.py @@ -63,7 +63,7 @@ def unmasked_cubes(): co2_data, var_name='co2', standard_name='mole_fraction_of_carbon_dioxide_in_air', - units='1e-7', + units='1e-8', dim_coords_and_dims=coord_spec, ) return iris.cube.CubeList([cube]) @@ -88,8 +88,8 @@ def test_co2_calculate_unmasked_cubes(unmasked_cubes): out_cube = derived_var.calculate(unmasked_cubes) assert not np.ma.is_masked(out_cube.data) np.testing.assert_allclose(out_cube.data, - [[[20.0, 10.0], - [8.0, 0.9]]]) + [[[2.0, 1.0], + [0.8, 0.09]]]) assert out_cube.units == '1e-6' np.testing.assert_allclose(out_cube.coord('air_pressure').points, 123456.0) From 66fdf11adcc40b036e2913020f0b0d4e4069d880 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 30 Mar 2020 16:35:20 +0200 Subject: [PATCH 113/117] Improve text and avoid duplicate citation entries --- esmvalcore/_citation.py | 144 ++++++++++++++++------------- esmvalcore/_task.py | 4 +- tests/integration/test_citation.py | 41 ++++---- 3 files changed, 99 insertions(+), 90 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 7365ca48c7..3dfc0f1b9c 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -1,23 +1,21 @@ """Citation module.""" -import os import logging +import os import re -from pathlib import Path import textwrap +from functools import lru_cache + import requests from ._config import DIAGNOSTICS_PATH -if DIAGNOSTICS_PATH: - REFERENCES_PATH = Path(DIAGNOSTICS_PATH) / 'references' -else: - REFERENCES_PATH = '' - logger = logging.getLogger(__name__) +REFERENCES_PATH = DIAGNOSTICS_PATH / 'references' + CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' -# it is the technical overview and should always be cited +# The technical overview paper should always be cited ESMVALTOOL_PAPER = ( '@article{righi19gmd,\n' '\tdoi = {10.5194/gmd-2019-226},\n' @@ -35,11 +33,10 @@ '\ttitle = {{ESMValTool} v2.0 ' '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' 'Technical overview}\n' - '}\n' -) + '}\n') -def _write_citation_file(filename, provenance): +def _write_citation_files(filename, provenance): """ Write citation information provided by the recorded provenance. @@ -50,80 +47,97 @@ def _write_citation_file(filename, provenance): Also, cmip6 data reference links are saved into a text file. """ product_name = os.path.splitext(filename)[0] - further_info = [] - cmip6_info_urls = [] - cmip6_json_urls = [] - product_tags = [] + + tags = set() + cmip6_json_urls = set() + cmip6_info_urls = set() + other_info = set() + for item in provenance.records: - # get cmip6 citation info - mip_era = item.get_attribute('attribute:mip_era') - if 'CMIP6' in mip_era: + # get cmip6 data citation info + cmip6_data = 'CMIP6' in item.get_attribute('attribute:mip_era') + if cmip6_data: url_prefix = _make_url_prefix(item.attributes) - cmip6_info_urls.append(_make_info_url(url_prefix)) - cmip6_json_urls.append(_make_json_url(url_prefix)) - reference_attr = item.get_attribute('attribute:references') - if reference_attr: - # get recipe citation tags + cmip6_info_urls.add(_make_info_url(url_prefix)) + cmip6_json_urls.add(_make_json_url(url_prefix)) + + # get other citation info + references = item.get_attribute('attribute:references') + if not references: + # ESMValTool CMORization scripts use 'reference' (without final s) + references = item.get_attribute('attribute:reference') + if references: if item.identifier.namespace.prefix == 'recipe': - product_tags.extend(reference_attr) - # get diagnostics citation tags + # get recipe citation tags + tags.update(references) elif item.get_attribute('attribute:script_file'): - product_tags.extend(reference_attr) - else: - further_info.extend(reference_attr) + # get diagnostics citation tags + tags.update(references) + elif not cmip6_data: + # get any other data citation tags, e.g. CMIP5 + other_info.update(references) - _save_citation(product_name, product_tags, cmip6_json_urls) - _save_citation_info(product_name, cmip6_info_urls, further_info) + _save_citation_bibtex(product_name, tags, cmip6_json_urls) + _save_citation_info_txt(product_name, cmip6_info_urls, other_info) -def _save_citation(product_name, product_tags, json_urls): - """Save all bibtex entries in one bibtex file.""" +def _save_citation_bibtex(product_name, tags, json_urls): + """Save the bibtex entries in a bibtex file.""" citation_entries = [ESMVALTOOL_PAPER] + + # convert tags to bibtex entries + if tags: + entries = set() + for tag in _extract_tags(tags): + entries.add(_collect_bibtex_citation(tag)) + citation_entries.extend(sorted(entries)) + # convert json_urls to bibtex entries + entries = set() for json_url in json_urls: cmip_citation = _collect_cmip_citation(json_url) if cmip_citation: - citation_entries.append(cmip_citation) - - # convert tags to bibtex entries - if REFERENCES_PATH and product_tags: - tags = _extract_tags(product_tags) - for tag in tags: - citation_entries.append(_collect_bibtex_citation(tag)) + entries.add(cmip_citation) + citation_entries.extend(sorted(entries)) with open(f'{product_name}_citation.bibtex', 'w') as file: file.write('\n'.join(citation_entries)) -def _save_citation_info(product_name, info_urls, further_info): - """Save all citation information in one text file.""" +def _save_citation_info_txt(product_name, info_urls, other_info): + """Save all data citation information in one text file.""" lines = [] - # save CMIP6 url_info, if any + # Save CMIP6 url_info if info_urls: lines.append( - "Follow the links below to find more information about CMIP6 data." - ) - lines.extend(info_urls) - # save any refrences info that is not related to recipe or diagnostics - if further_info: - lines.append( - "Some data citation information are found, " - "which are not mentioned in the recipe or diagnostic." + "Follow the links below to find more information about CMIP6 data:" ) - lines.extend(further_info) + lines.extend(f'- {url}' for url in sorted(info_urls)) + + # Save any references from the 'references' and 'reference' NetCDF global + # attributes. + if other_info: + if lines: + lines.append('') + lines.append("Additional data citation information was found, for " + "which no entry is available in the bibtex file:") + lines.extend('- ' + str(t).replace('\n', ' ') + for t in sorted(other_info)) + if lines: with open(f'{product_name}_data_citation_info.txt', 'w') as file: - file.write('\n'.join(lines)) + file.write('\n'.join(lines) + '\n') def _extract_tags(tags): """Extract tags. - Tags are recorded as string of a list by provenance. - For example, "['acknow_project', 'acknow_author']". + Tags are recorded as a list of strings converted to a string in provenance. + For example, a single entry in the list `tags` could be the string + "['acknow_project', 'acknow_author']". """ pattern = re.compile(r'\w+') - return list(set(pattern.findall(str(tags)))) + return set(pattern.findall(str(tags))) def _get_response(url): @@ -137,10 +151,8 @@ def _get_response(url): else: logger.warning('Error in the CMIP6 citation link: %s', url) except IOError: - logger.info( - 'No network connection, ' - 'unable to retrieve CMIP6 citation information' - ) + logger.info('No network connection, ' + 'unable to retrieve CMIP6 citation information') return json_data @@ -165,8 +177,7 @@ def _json_to_bibtex(data): doi = data['identifier'].get('id', 'doi not found') url = f'https://doi.org/{doi}' - bibtex_entry = textwrap.dedent( - f""" + bibtex_entry = textwrap.dedent(f""" @misc{{{url}, \turl = {{{url}}}, \ttitle = {{{title}}}, @@ -175,24 +186,25 @@ def _json_to_bibtex(data): \tauthor = {{{authors}}}, \tdoi = {{{doi}}}, }} - """ - ) + """).lstrip() return bibtex_entry +@lru_cache(maxsize=1024) def _collect_bibtex_citation(tag): """Collect information from bibtex files.""" bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' if bibtex_file.is_file(): entry = bibtex_file.read_text() else: - logger.warning( - 'The reference file %s does not exist.', bibtex_file - ) entry = '' + logger.warning( + "The reference file %s does not exist, citation information " + "incomplete.", bibtex_file) return entry +@lru_cache(maxsize=1024) def _collect_cmip_citation(json_url): """Collect information from CMIP6 Data Citation Service.""" json_data = _get_response(json_url) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index add4e12ecc..d348f80ad3 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -17,9 +17,9 @@ import psutil import yaml +from ._citation import _write_citation_files from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags from ._provenance import TrackedFile, get_task_provenance -from ._citation import _write_citation_file logger = logging.getLogger(__name__) @@ -566,7 +566,7 @@ def _collect_provenance(self): product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) product.save_provenance() - _write_citation_file(product.filename, product.provenance) + _write_citation_files(product.filename, product.provenance) self.products.add(product) logger.debug("Collecting provenance of task %s took %.1f seconds", self.name, diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 8dc4b89c8a..50fb65ef02 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -1,10 +1,11 @@ """Test _citation.py.""" import textwrap + from prov.model import ProvDocument import esmvalcore -from esmvalcore._citation import (_write_citation_file, - ESMVALTOOL_PAPER, CMIP6_URL_STEM) +from esmvalcore._citation import (CMIP6_URL_STEM, ESMVALTOOL_PAPER, + _write_citation_files) from esmvalcore._provenance import ESMVALTOOL_URI_PREFIX @@ -25,14 +26,13 @@ def test_references(tmp_path, monkeypatch): # Create fake bibtex references tag file references_path = tmp_path / 'references' references_path.mkdir() - monkeypatch.setattr( - esmvalcore._citation, 'REFERENCES_PATH', references_path - ) + monkeypatch.setattr(esmvalcore._citation, 'REFERENCES_PATH', + references_path) fake_bibtex_file = references_path / 'test_tag.bibtex' fake_bibtex = "Fake bibtex file content\n" fake_bibtex_file.write_text(fake_bibtex) - _write_citation_file(filename, provenance) + _write_citation_files(filename, provenance) citation_file = tmp_path / 'output_citation.bibtex' citation = citation_file.read_text() assert citation == '\n'.join([ESMVALTOOL_PAPER, fake_bibtex]) @@ -63,10 +63,9 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): filename = str(tmp_path / 'output.nc') provenance.entity('file:' + filename, attributes) - monkeypatch.setattr( - esmvalcore._citation, '_get_response', mock_get_response - ) - _write_citation_file(filename, provenance) + monkeypatch.setattr(esmvalcore._citation, '_get_response', + mock_get_response) + _write_citation_files(filename, provenance) citation_file = tmp_path / 'output_citation.bibtex' # Create fake bibtex entry @@ -76,8 +75,7 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): year = 'publicationYear not found' authors = 'creators not found' doi = 'doi not found' - fake_bibtex_entry = textwrap.dedent( - f""" + fake_bibtex_entry = textwrap.dedent(f""" @misc{{{url}, \turl = {{{url}}}, \ttitle = {{{title}}}, @@ -86,11 +84,9 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): \tauthor = {{{authors}}}, \tdoi = {{{doi}}}, }} - """ - ) + """).lstrip() assert citation_file.read_text() == '\n'.join( - [ESMVALTOOL_PAPER, fake_bibtex_entry] - ) + [ESMVALTOOL_PAPER, fake_bibtex_entry]) def test_cmip6_data_citation_url(tmp_path): @@ -109,13 +105,14 @@ def test_cmip6_data_citation_url(tmp_path): } filename = str(tmp_path / 'output.nc') provenance.entity('file:' + filename, attributes) - _write_citation_file(filename, provenance) + _write_citation_files(filename, provenance) citation_url = tmp_path / 'output_data_citation_info.txt' # Create fake info url fake_url_prefix = '.'.join(attributes.values()) - fake_info_url = [f'{CMIP6_URL_STEM}/cmip6?input={fake_url_prefix}'] - title = [ - "Follow the links below to find more information about CMIP6 data." - ] - assert citation_url.read_text() == '\n'.join(title + fake_info_url) + text = '\n'.join([ + "Follow the links below to find more information about CMIP6 data:", + f"- {CMIP6_URL_STEM}/cmip6?input={fake_url_prefix}", + '', + ]) + assert citation_url.read_text() == text From 47312ae4d7a0c808f68afe807757d28a5b6ba657 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 30 Mar 2020 17:05:46 +0200 Subject: [PATCH 114/117] Increase version to v2.0.0b9 --- CITATION.cff | 2 +- esmvalcore/_version.py | 2 +- meta.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 48cd294253..aa57af62e3 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -98,5 +98,5 @@ license: "Apache-2.0" message: "If you use this software, please cite it using these metadata." repository-code: "https://github.com/ESMValGroup/ESMValCore/" title: ESMValCore -version: "v2.0.0b8" +version: "v2.0.0b9" ... diff --git a/esmvalcore/_version.py b/esmvalcore/_version.py index 4633adb7ef..bdc26a75b8 100644 --- a/esmvalcore/_version.py +++ b/esmvalcore/_version.py @@ -1,2 +1,2 @@ """ESMValCore version.""" -__version__ = '2.0.0b8' +__version__ = '2.0.0b9' diff --git a/meta.yaml b/meta.yaml index 0ec6e432a6..babb88dc52 100644 --- a/meta.yaml +++ b/meta.yaml @@ -5,7 +5,7 @@ # conda build . -c conda-forge -c esmvalgroup # Package version number -{% set version = "2.0.0b8" %} +{% set version = "2.0.0b9" %} package: name: esmvalcore From df925bc7101301133cf176f0187fba6f4b503eb8 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Mon, 30 Mar 2020 17:26:08 +0200 Subject: [PATCH 115/117] Fixed derivation of co2s --- esmvalcore/preprocessor/_derive/co2s.py | 99 +++++++++++++++----- tests/unit/preprocessor/_derive/test_co2s.py | 77 ++++++++++----- 2 files changed, 134 insertions(+), 42 deletions(-) diff --git a/esmvalcore/preprocessor/_derive/co2s.py b/esmvalcore/preprocessor/_derive/co2s.py index 8a1dd0949f..42623206fb 100644 --- a/esmvalcore/preprocessor/_derive/co2s.py +++ b/esmvalcore/preprocessor/_derive/co2s.py @@ -1,39 +1,96 @@ """Derivation of variable ``co2s``.""" import dask.array as da import iris +import numpy as np +import stratify from ._baseclass import DerivedVariableBase +def _get_first_unmasked_data(array, axis): + """Get first unmasked value of an array along an axis.""" + mask = da.ma.getmaskarray(array) + numerical_mask = da.where(mask, -1.0, 1.0) + indices_first_positive = da.argmax(numerical_mask, axis=axis) + indices = da.meshgrid( + *[da.arange(array.shape[i]) for i in range(array.ndim) if i != axis], + indexing='ij') + indices.insert(axis, indices_first_positive) + first_unmasked_data = np.array(array)[tuple(indices)] + return first_unmasked_data + + class DerivedVariable(DerivedVariableBase): - """Derivation of variable ``co2s``.""" + """Derivation of variable ``co2s``. + + Use linear interpolation/extrapolation and surface air pressure to + calculate CO2 mole fraction at surface. + + Note + ---- + In some cases, ``co2`` data is masked. In these cases, the masked values + correspond to values where the pressure level is higher than the surface + air pressure (e.g. the 1000 hPa level for grid cells with high elevation). + To obtain an unmasked ``co2s`` field, it is necessary to fill these masked + values accordingly, i.e. with the lowest unmasked value for each grid cell. + + """ @staticmethod def required(project): """Declare the variables needed for derivation.""" - required = [{'short_name': 'co2'}] + required = [{'short_name': 'co2'}, {'short_name': 'ps'}] return required @staticmethod def calculate(cubes): """Compute mole fraction of CO2 at surface.""" - cube = cubes.extract_strict( + co2_cube = cubes.extract_strict( iris.Constraint(name='mole_fraction_of_carbon_dioxide_in_air')) - mask = da.ma.getmaskarray(cube.core_data()) - if not mask.any(): - cube = cube[:, 0, :, :] - else: - numerical_mask = da.where(mask, -1.0, 1.0) - indices_first_positive = da.argmax(numerical_mask, axis=1) - indices = da.meshgrid( - da.arange(cube.shape[0]), - da.arange(cube.shape[2]), - da.arange(cube.shape[3]), - indexing='ij', - ) - indices.insert(1, indices_first_positive) - surface_data = cube.data[tuple(indices)] - cube = cube[:, 0, :, :] - cube.data = surface_data - cube.convert_units('1e-6') - return cube + ps_cube = cubes.extract_strict( + iris.Constraint(name='surface_air_pressure')) + + # Fill masked data if necessary (interpolation fails with masked data) + (z_axis,) = co2_cube.coord_dims(co2_cube.coord(axis='Z', + dim_coords=True)) + mask = da.ma.getmaskarray(co2_cube.core_data()) + if mask.any(): + first_unmasked_data = _get_first_unmasked_data( + co2_cube.core_data(), axis=z_axis) + dim_map = [dim for dim in range(co2_cube.ndim) if dim != z_axis] + first_unmasked_data = iris.util.broadcast_to_shape( + first_unmasked_data, co2_cube.shape, dim_map) + co2_cube.data = da.where(mask, first_unmasked_data, + co2_cube.core_data()) + + # Interpolation (not supported for dask arrays) + air_pressure_coord = co2_cube.coord('air_pressure') + original_levels = iris.util.broadcast_to_shape( + air_pressure_coord.points, co2_cube.shape, + co2_cube.coord_dims(air_pressure_coord)) + target_levels = np.expand_dims(ps_cube.data, axis=z_axis) + co2s_data = stratify.interpolate( + target_levels, + original_levels, + co2_cube.data, + axis=z_axis, + interpolation='linear', + extrapolation='linear', + ) + co2s_data = np.squeeze(co2s_data, axis=z_axis) + + # Construct co2s cube + indices = [slice(None)] * co2_cube.ndim + indices[z_axis] = 0 + co2s_cube = co2_cube[tuple(indices)] + co2s_cube.data = co2s_data + if co2s_cube.coords('air_pressure'): + co2s_cube.remove_coord('air_pressure') + ps_coord = iris.coords.AuxCoord(ps_cube.data, + var_name='plev', + standard_name='air_pressure', + long_name='pressure', + units=ps_cube.units) + co2s_cube.add_aux_coord(ps_coord, np.arange(co2s_cube.ndim)) + co2s_cube.convert_units('1e-6') + return co2s_cube diff --git a/tests/unit/preprocessor/_derive/test_co2s.py b/tests/unit/preprocessor/_derive/test_co2s.py index ad05aebd11..3fd364edd7 100644 --- a/tests/unit/preprocessor/_derive/test_co2s.py +++ b/tests/unit/preprocessor/_derive/test_co2s.py @@ -7,28 +7,51 @@ import esmvalcore.preprocessor._derive.co2s as co2s -def get_coord_spec(): +def get_coord_spec(include_plev=True): """Coordinate specs for cubes.""" time_coord = iris.coords.DimCoord([0], var_name='time', standard_name='time', units='days since 0000-01-01 00:00:00') - plev_coord = iris.coords.DimCoord([123456.0, 50000.0, 1000.0], - var_name='plev', - standard_name='air_pressure', units='Pa') lat_coord = iris.coords.DimCoord([0.0, 1.0], var_name='latitude', standard_name='latitude', units='degrees') lon_coord = iris.coords.DimCoord([0.0, 1.0], var_name='longitude', standard_name='longitude', units='degrees') - coord_spec = [ - (time_coord, 0), - (plev_coord, 1), - (lat_coord, 2), - (lon_coord, 3), - ] + if include_plev: + plev_coord = iris.coords.DimCoord([100000.0, 90000.0, 50000.0], + var_name='plev', + standard_name='air_pressure', + units='Pa') + coord_spec = [ + (time_coord, 0), + (plev_coord, 1), + (lat_coord, 2), + (lon_coord, 3), + ] + else: + coord_spec = [ + (time_coord, 0), + (lat_coord, 1), + (lon_coord, 2), + ] return coord_spec +def get_ps_cube(): + """Surface air pressure cube.""" + ps_data = [[[105000.0, 50000.0], + [95000.0, 60000.0]]] + coord_spec = get_coord_spec(include_plev=False) + cube = iris.cube.Cube( + ps_data, + var_name='ps', + standard_name='surface_air_pressure', + units='Pa', + dim_coords_and_dims=coord_spec, + ) + return cube + + @pytest.fixture def masked_cubes(): """Masked CO2 cube.""" @@ -39,14 +62,15 @@ def masked_cubes(): [80.0, -1.0]], [[100.0, 50.0], [30.0, 10.0]]]], 0.0) - cube = iris.cube.Cube( + co2_cube = iris.cube.Cube( co2_data, var_name='co2', standard_name='mole_fraction_of_carbon_dioxide_in_air', units='1e-6', dim_coords_and_dims=coord_spec, ) - return iris.cube.CubeList([cube]) + ps_cube = get_ps_cube() + return iris.cube.CubeList([co2_cube, ps_cube]) @pytest.fixture @@ -59,14 +83,15 @@ def unmasked_cubes(): [70.0, 5.0]], [[100.0, 50.0], [30.0, 1.0]]]]) - cube = iris.cube.Cube( + co2_cube = iris.cube.Cube( co2_data, var_name='co2', standard_name='mole_fraction_of_carbon_dioxide_in_air', units='1e-8', dim_coords_and_dims=coord_spec, ) - return iris.cube.CubeList([cube]) + ps_cube = get_ps_cube() + return iris.cube.CubeList([co2_cube, ps_cube]) def test_co2_calculate_masked_cubes(masked_cubes): @@ -75,11 +100,16 @@ def test_co2_calculate_masked_cubes(masked_cubes): out_cube = derived_var.calculate(masked_cubes) assert not np.ma.is_masked(out_cube.data) np.testing.assert_allclose(out_cube.data, - [[[170.0, 100.0], + [[[180.0, 50.0], [80.0, 10.0]]]) assert out_cube.units == '1e-6' - np.testing.assert_allclose(out_cube.coord('air_pressure').points, - 123456.0) + plev_coord = out_cube.coord('air_pressure') + assert plev_coord.var_name == 'plev' + assert plev_coord.standard_name == 'air_pressure' + assert plev_coord.long_name == 'pressure' + assert plev_coord.units == 'Pa' + np.testing.assert_allclose(plev_coord.points, + [[[105000.0, 50000.0], [95000.0, 60000.0]]]) def test_co2_calculate_unmasked_cubes(unmasked_cubes): @@ -88,8 +118,13 @@ def test_co2_calculate_unmasked_cubes(unmasked_cubes): out_cube = derived_var.calculate(unmasked_cubes) assert not np.ma.is_masked(out_cube.data) np.testing.assert_allclose(out_cube.data, - [[[2.0, 1.0], - [0.8, 0.09]]]) + [[[2.25, 0.50], + [0.75, 0.02]]]) assert out_cube.units == '1e-6' - np.testing.assert_allclose(out_cube.coord('air_pressure').points, - 123456.0) + plev_coord = out_cube.coord('air_pressure') + assert plev_coord.var_name == 'plev' + assert plev_coord.standard_name == 'air_pressure' + assert plev_coord.long_name == 'pressure' + assert plev_coord.units == 'Pa' + np.testing.assert_allclose(plev_coord.points, + [[[105000.0, 50000.0], [95000.0, 60000.0]]]) From 7a85ee17cebe2ac0eebc5c5581f5b0530270284a Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Tue, 31 Mar 2020 11:36:50 +0200 Subject: [PATCH 116/117] Update ESMValTool reference --- esmvalcore/_citation.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 3dfc0f1b9c..b3ead8a600 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -17,23 +17,26 @@ # The technical overview paper should always be cited ESMVALTOOL_PAPER = ( - '@article{righi19gmd,\n' - '\tdoi = {10.5194/gmd-2019-226},\n' - '\turl = {https://doi.org/10.5194%2Fgmd-2019-226},\n' - '\tyear = 2019,\n' - '\tmonth = {sep},\n' - '\tpublisher = {Copernicus {GmbH}},\n' - '\tauthor = {Mattia Righi and Bouwe Andela and Veronika Eyring ' - 'and Axel Lauer and Valeriu Predoi and Manuel Schlund ' - 'and Javier Vegas-Regidor and Lisa Bock and Björn Brötz ' - 'and Lee de Mora and Faruk Diblen and Laura Dreyer ' - 'and Niels Drost and Paul Earnshaw and Birgit Hassler ' - 'and Nikolay Koldunov and Bill Little and Saskia Loosveldt Tomas ' - 'and Klaus Zimmermann},\n' - '\ttitle = {{ESMValTool} v2.0 ' - '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' - 'Technical overview}\n' - '}\n') + "@article{righi20gmd,\n" + "\tdoi = {10.5194/gmd-13-1179-2020},\n" + "\turl = {https://doi.org/10.5194/gmd-13-1179-2020},\n" + "\tyear = {2020},\n" + "\tmonth = mar,\n" + "\tpublisher = {Copernicus {GmbH}},\n" + "\tvolume = {13},\n" + "\tnumber = {3},\n" + "\tpages = {1179--1199},\n" + "\tauthor = {Mattia Righi and Bouwe Andela and Veronika Eyring " + "and Axel Lauer and Valeriu Predoi and Manuel Schlund " + "and Javier Vegas-Regidor and Lisa Bock and Bj\"{o}rn Br\"{o}tz " + "and Lee de Mora and Faruk Diblen and Laura Dreyer " + "and Niels Drost and Paul Earnshaw and Birgit Hassler " + "and Nikolay Koldunov and Bill Little and Saskia Loosveldt Tomas " + "and Klaus Zimmermann},\n" + "\ttitle = {Earth System Model Evaluation Tool (ESMValTool) v2.0 " + "-- technical overview},\n" + "\tjournal = {Geoscientific Model Development}\n" + "}\n") def _write_citation_files(filename, provenance): From e58a3a9dbbf92d69835b27377d1c2ce273ad5482 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Tue, 31 Mar 2020 12:56:35 +0200 Subject: [PATCH 117/117] Add v2.0.0b9 release notes --- doc/changelog.rst | 26 ++++++++++++++++++++- esmvalcore/utils/draft_release_notes.py | 31 +++++++++++++++++++------ 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index 8f05f42b60..89dd0f77b3 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,5 +1,29 @@ Changelog ========= +v2.0.0b9 +-------- -For older releases, see the release notes on https://github.com/ESMValGroup/ESMValCore/releases. \ No newline at end of file +This release includes + +Bug fixes +~~~~~~~~~ + +- Cast dtype float32 to output from zonal and meridional area preprocessors (`#581 `__) `Valeriu Predoi `__ + +Improvements +~~~~~~~~~~~~ + +- Unpin on Python<3.8 for conda package (run) (`#570 `__) `Valeriu Predoi `__ +- Update pytest installation marker (`#572 `__) `Bouwe Andela `__ +- Remove vmrh2o (`#573 `__) `Mattia Righi `__ +- Restructure documentation (`#575 `__) `Bouwe Andela `__ +- Fix mask in land variables for CCSM4 (`#579 `__) `Klaus Zimmermann `__ +- Fix derive scripts wrt required method (`#585 `__) `Klaus Zimmermann `__ +- Check coordinates do not have repeated standard names (`#558 `__) `Javier Vegas-Regidor `__ +- Added derivation script for co2s (`#587 `__) `Manuel Schlund `__ +- Adapted custom co2s table to match CMIP6 version (`#588 `__) `Manuel Schlund `__ +- Increase version to v2.0.0b9 (`#593 `__) `Bouwe Andela `__ +- Add a method to save citation information (`#402 `__) `SarahAlidoost `__ + +For older releases, see the release notes on https://github.com/ESMValGroup/ESMValCore/releases. diff --git a/esmvalcore/utils/draft_release_notes.py b/esmvalcore/utils/draft_release_notes.py index 0702ba590c..ad3af567b4 100644 --- a/esmvalcore/utils/draft_release_notes.py +++ b/esmvalcore/utils/draft_release_notes.py @@ -22,8 +22,17 @@ "~/.github_api_key, see:\nhttps://help.github.com/en/github/" "authenticating-to-github/creating-a-personal-access-token-" "for-the-command-line") + +from esmvalcore import __version__ + +VERSION = f"v{__version__}" GITHUB_REPO = "ESMValGroup/ESMValCore" +TITLES = { + 'bug': 'Bug fixes', + 'enhancement': 'Improvements', +} + def draft_notes_since(previous_release_date, labels): """Draft release notes containing the merged pull requests. @@ -57,18 +66,26 @@ def draft_notes_since(previous_release_date, labels): user = pull.user username = user.login if user.name is None else user.name - line = (f"- {pull.title} (#{pull.number}) " - f"[{username}](https://github.com/{user.login})") + line = ( + f"- {pull.title} (`#{pull.number} " + f"`__) " + f"`{username} `__") if label not in lines: lines[label] = [] lines[label].append((pull.closed_at, line)) # Create sections - sections = ["This release includes"] + sections = [ + VERSION, + '-' * len(VERSION), + '', + "This release includes", + ] for label in sorted(lines): - sections.append('\n' + label) - lines[label].sort() # sort by merge time - sections.append('\n'.join(line for _, line in lines[label])) + entries = sorted(lines[label]) # sort by merge time + label = TITLES.get(label, label) + sections.append('\n'.join(['', label, '~' * len(label), ''])) + sections.append('\n'.join(entry for _, entry in entries)) notes = '\n'.join(sections) print(notes) @@ -76,7 +93,7 @@ def draft_notes_since(previous_release_date, labels): if __name__ == '__main__': - PREVIOUS_RELEASE = datetime.datetime(2020, 1, 17) + PREVIOUS_RELEASE = datetime.datetime(2020, 3, 6) LABELS = ('bug', 'fix for dataset') draft_notes_since(PREVIOUS_RELEASE, LABELS)