From b662a9786e055920125a37c0dd852c4a4cf26041 Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Fri, 11 Aug 2023 16:19:46 -0600 Subject: [PATCH 01/11] Add new 'tools' directory, and beginning of metadata name check script. --- tools/meta_stdname_check | 96 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100755 tools/meta_stdname_check diff --git a/tools/meta_stdname_check b/tools/meta_stdname_check new file mode 100755 index 0000000..30af295 --- /dev/null +++ b/tools/meta_stdname_check @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +""" + +This tool checks if all of the +standard names present in a +CCPP metadata file also exist +in the standard names dictionary. + +The tool currently has two options: + +1. A path to a single metadata file + is passed, in which case only that + file's standard names are checked, e.g.: + +./meta_stdname_check --metafile-loc /path/to/file.meta --stdname-dict /path/to/dict.xml + +2. A path to a directory is passed, in + which case the directory is searched + for metadata files, and all found + files' standard names are checked, e.g.: + +./meta_stdname_check --metafile-loc /meta/path/ --stdname-dict /path/to/dict.xml + +""" + +############################# +#Import needed python modules +############################# + +import argparse + +################# +#Helper functions +################# + +#++++++++++++++++++++++++++++++ +#Input Argument parser function +#++++++++++++++++++++++++++++++ + +def parse_arguments(): + + """ + Parses command-line input arguments + using the argparse python module and + outputs the final argument object. + """ + + #Create description: + desc = "Check if the metafile contains variable standard names\n" + desc += "that are not in the provided standard names dictionary." + + #Create parser object: + parser = argparse.ArgumentParser(description=desc) + + #Add input arguments to be parsed: + parser.add_argument('--metafile-loc', + metavar='', + action='store', type=str, + help="Location of metadata file(s)") + + parser.add_argument('--stdname-dict', + metavar='', + action='store', type=str, + help="Location of standard name dictionary (XML file)") + + #Parse Argument inputs + args = parser.parse_args() + return args + +#++++++++++++++++++++++++++++++ + +############ +#Main script +############ + +#Parse command-line arguments: +input_args = parse_arguments() + +#Open standard name dictionary: +#CONTINUE HERE!!!!!!!! + +#Extract all standard names from dictionary: + +#Find all CCPP metadata files: + +#Loop through all metadata files: + + #Find all metadata standard names + #that are not in the dictionary + +#Print list of metadata file standard +#names that are not in the dictionary: + +############## +#End of script From eb5f921929706ff597d11cc63334ef0108ab993e Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Wed, 23 Aug 2023 09:42:46 -0600 Subject: [PATCH 02/11] Finish first version of metadata file standard name checker. --- tools/lib/xml_tools.py | 253 +++++++++++++++++++++++++++++++++++ tools/meta_stdname_check | 277 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 520 insertions(+), 10 deletions(-) create mode 100644 tools/lib/xml_tools.py diff --git a/tools/lib/xml_tools.py b/tools/lib/xml_tools.py new file mode 100644 index 0000000..956ad85 --- /dev/null +++ b/tools/lib/xml_tools.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python + +""" +Parse and / or validate an XML file and return the captured variables. +""" + +# Python library imports +from __future__ import print_function +import os +import os.path +import subprocess +import sys +import logging +from distutils.spawn import find_executable +import xml.etree.ElementTree as ET +try: + _XMLLINT = find_executable('xmllint') +except ImportError: + _XMLLINT = None +# end try + +# Find python version +PY3 = sys.version_info[0] > 2 +PYSUBVER = sys.version_info[1] +_LOGGER = None + +############################################################################### +def call_command(commands, logger, silent=False): +############################################################################### + """ + Try a command line and return the output on success (None on failure) + >>> call_command(['ls', 'really__improbable_fffilename.foo'], _LOGGER) #doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + RuntimeError: Execution of 'ls really__improbable_fffilename.foo' failed: + [Errno 2] No such file or directory + >>> call_command(['ls', 'really__improbable_fffilename.foo'], _LOGGER, silent=True) + False + >>> call_command(['ls'], _LOGGER) + True + """ + result = False + outstr = '' + if logger is None: + silent = True + # end if + try: + if PY3: + if PYSUBVER > 6: + cproc = subprocess.run(commands, check=True, + capture_output=True) + if not silent: + logger.debug(cproc.stdout) + # end if + result = cproc.returncode == 0 + elif PYSUBVER >= 5: + cproc = subprocess.run(commands, check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if not silent: + logger.debug(cproc.stdout) + # end if + result = cproc.returncode == 0 + else: + raise ValueError("Python 3 must be at least version 3.5") + # end if + else: + pproc = subprocess.Popen(commands, stdin=None, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + output, _ = pproc.communicate() + if not silent: + logger.debug(output) + # end if + result = pproc.returncode == 0 + # end if + except (OSError, RuntimeError, subprocess.CalledProcessError) as err: + if silent: + result = False + else: + cmd = ' '.join(commands) + emsg = "Execution of '{}' failed with code:\n" + outstr = emsg.format(cmd, err.returncode) + outstr += "{}".format(err.output) + raise RuntimeError(outstr) + # end if + # end of try + return result + +############################################################################### +def find_schema_version(root): +############################################################################### + """ + Find the version of the host registry file represented by root + >>> find_schema_version(ET.fromstring('')) + [1, 0] + >>> find_schema_version(ET.fromstring('')) #doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ValueError: Illegal version string, '1.a' + Format must be . + >>> find_schema_version(ET.fromstring('')) #doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ValueError: Illegal version string, '0.0' + Major version must be at least 1 + >>> find_schema_version(ET.fromstring('')) #doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ValueError: Illegal version string, '0.0' + Minor version must be at least 0 + """ + verbits = None + if 'version' not in root.attrib: + raise ValueError("version attribute required") + # end if + version = root.attrib['version'] + versplit = version.split('.') + try: + if len(versplit) != 2: + raise ValueError('oops') + # end if (no else needed) + try: + verbits = [int(x) for x in versplit] + except ValueError as verr: + raise ValueError(verr) + # end try + if verbits[0] < 1: + raise ValueError('Major version must be at least 1') + # end if + if verbits[1] < 0: + raise ValueError('Minor version must be non-negative') + # end if + except ValueError as verr: + errstr = """Illegal version string, '{}' + Format must be .""" + ve_str = str(verr) + if ve_str: + errstr = ve_str + '\n' + errstr + # end if + raise ValueError(errstr.format(version)) + # end try + return verbits + +############################################################################### +def find_schema_file(schema_root, version, schema_path=None): +############################################################################### + """Find and return the schema file based on and + or return None. + If is present, use that as the directory to find the + appropriate schema file. Otherwise, just look in the current directory.""" + + verstring = '_'.join([str(x) for x in version]) + schema_filename = "{}_v{}.xsd".format(schema_root, verstring) + if schema_path: + schema_file = os.path.join(schema_path, schema_filename) + else: + schema_file = schema_filename + # end if + if os.path.exists(schema_file): + return schema_file + # end if + return None + +############################################################################### +def validate_xml_file(filename, schema_root, version, logger, + schema_path=None, error_on_noxmllint=False): +############################################################################### + """ + Find the appropriate schema and validate the XML file, , + against it using xmllint + """ + # Check the filename + if not os.path.isfile(filename): + raise ValueError("validate_xml_file: Filename, '{}', does not exist".format(filename)) + # end if + if not os.access(filename, os.R_OK): + raise ValueError("validate_xml_file: Cannot open '{}'".format(filename)) + # end if + if not schema_path: + # Find the schema, based on the model version + thispath = os.path.abspath(__file__) + pdir = os.path.dirname(os.path.dirname(os.path.dirname(thispath))) + schema_path = os.path.join(pdir, 'schema') + # end if + schema_file = find_schema_file(schema_root, version, schema_path) + if not (schema_file and os.path.isfile(schema_file)): + verstring = '.'.join([str(x) for x in version]) + emsg = """validate_xml_file: Cannot find schema for version {}, + {} does not exist""" + raise ValueError(emsg.format(verstring, schema_file)) + # end if + if not os.access(schema_file, os.R_OK): + emsg = "validate_xml_file: Cannot open schema, '{}'" + raise ValueError(emsg.format(schema_file)) + # end if + if _XMLLINT is not None: + if logger is not None: + lmsg = "Checking file {} against schema {}" + logger.debug(lmsg.format(filename, schema_file)) + # end if + cmd = [_XMLLINT, '--noout', '--schema', schema_file, filename] + result = call_command(cmd, logger) + return result + # end if + lmsg = "xmllint not found, could not validate file {}" + if error_on_noxmllint: + raise ValueError("validate_xml_file: " + lmsg.format(filename)) + # end if + if logger is not None: + logger.warning(lmsg.format(filename)) + # end if + return True # We could not check but still need to proceed + +############################################################################### +def read_xml_file(filename, logger=None): +############################################################################### + """Read the XML file, , and return its tree and root""" + if os.path.isfile(filename) and os.access(filename, os.R_OK): + if PY3: + file_open = (lambda x: open(x, 'r', encoding='utf-8')) + else: + file_open = (lambda x: open(x, 'r')) + # end if + with file_open(filename) as file_: + try: + tree = ET.parse(file_) + root = tree.getroot() + except ET.ParseError as perr: + emsg = "read_xml_file: Cannot read {}, {}" + raise ValueError(emsg.format(filename, perr)) + elif not os.access(filename, os.R_OK): + raise ValueError("read_xml_file: Cannot open '{}'".format(filename)) + else: + emsg = "read_xml_file: Filename, '{}', does not exist" + raise ValueError(emsg.format(filename)) + # end if + if logger: + logger.debug("Read XML file, '{}'".format(filename)) + # end if + return tree, root + +############################################################################### + +if __name__ == "__main__": + _LOGGER = logging.getLogger('xml_tools') + for handler in list(_LOGGER.handlers): + _LOGGER.removeHandler(handler) + # end for + _LOGGER.addHandler(logging.NullHandler()) + try: + # First, run doctest + import doctest + doctest.testmod() + except ValueError as cerr: + print("{}".format(cerr)) +# no else: diff --git a/tools/meta_stdname_check b/tools/meta_stdname_check index 30af295..d145537 100755 --- a/tools/meta_stdname_check +++ b/tools/meta_stdname_check @@ -24,11 +24,27 @@ The tool currently has two options: """ -############################# -#Import needed python modules -############################# +###################################### +#Import needed standard python modules +###################################### import argparse +import sys +import os +import os.path + +################################################ +#Add CCPP framework (lib) modules to python path +################################################ + +_CURR_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.join(_CURR_DIR, "lib")) + +####################################### +#Import needed framework python modules +####################################### + +from xml_tools import read_xml_file ################# #Helper functions @@ -66,31 +82,272 @@ def parse_arguments(): #Parse Argument inputs args = parser.parse_args() - return args -#++++++++++++++++++++++++++++++ + return args.metafile_loc, args.stdname_dict + +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#Function to extract standard names from element tree root +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +def get_dict_stdnames(xml_tree_root): + + """ + Extract all elements with the "standard_name" tag, + find the "name" attribute for that tag, and collect + all of those "names" in a set. + """ + + #Create empty set to store standard name names: + std_names = set() + + #Loop over all standard_name tags" + for stdname in xml_tree_root.findall('./section/standard_name'): + #Add the "name" attribute to the set: + std_names.add(stdname.attrib['name']) + #End for + + return std_names + +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#Function to parse a list of strings from a metadata file +#in order to find all standard names +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +def find_metafile_stdnames(metafile_obj): + + """ + Find all lines that start with "standard_name", + and then assume that all characters after an "=" + are part of the standard name, excluding those + that are behind a comment delimiter (#). + + NOTE: + + The CCPP-framework has much more advanced parsers + that can extract this same info, but bringing them + into this repo would require many additional + supporting source files to be brought in as well. + + However, if it is found that this simplified parser + is hitting too many edge cases then it might be wise + to use the actual CCPP-framework parser instead of + expanding on this function or script. + """ + + #Create empty set to store found standard names: + meta_stdname_set = set() + + #Loop over lines in metadata file object: + for line in metafile_obj: + + #Check if line starts with "standard_name": + if line.lstrip().startswith("standard_name"): + + #Attempt to find string index for "equals" sign: + equals_index = line.find("=") + + #Check if an equals sign actually + #exists: + if equals_index != -1: + + #If so, then extract all text to the right + #of the equals sign: + stdname_text = line[equals_index+1:] + + #Attempt to find the index for a comment delimiter: + comment_index = stdname_text.find("#") + + #If comment exists, then remove + #it from the standard name text: + if comment_index != -1: + stdname_text = stdname_text[:comment_index] + #End if + #End if + + #Add stripped/trimmed text to the standardname set: + meta_stdname_set.add(stdname_text.strip()) + + #End if + #End for + + return meta_stdname_set + +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#Function to extract standard names in CCPP metadata file +#that are not in a provided set of accepted standard names +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +def missing_metafile_names(metafile, stdname_set): + + """ + Extract all standard names listed in CCPP + metadata file, and provide a list of all + names that are not in the provide standard + name set. + """ + + #Open metadata file: + with open(metafile,'r', encoding='utf-8') as meta_file: + + #Find all standard names in metadata file + meta_stdname_set = find_metafile_stdnames(meta_file) + #End with + + #Create set of all standard names not in dictionary set: + missing_stdname_set = meta_stdname_set.difference(stdname_set) + + #Return list of missing standard names: + return list(missing_stdname_set) + +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#Function to find the paths to all metadata files within +#a given directory path +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +def find_metadata_files(dir_path): + + """ + Walk through the provided directory + and create a list of all found CCPP + metadata files. + """ + + #Create new, empy list to store metadata file paths: + metadata_files = [] + + #Walk through provided directory: + for root, _, files in os.walk(dir_path): + #Ignore git directories: + if '.git' not in root: + + #Find all metadata files in current root location: + local_meta_files = [mfil for mfil in files if mfil[-5:] == '.meta'] + + + #Add all found metadata files to metadata list, + #including their full path: + for local_file in local_meta_files: + metadata_files.append(os.path.join(root, local_file)) + #End for + #End if + #End for + + #Return list of metadata files: + return metadata_files + +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#Function to print a "human-readable" list of all of the +#standard names in the provided CCPP metadata files that +#were not found in the provided standard name dictionary +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +def print_missing_names(missing_names_dict): + + """ + Prints a list of the metadata files that + contain standard names not found in the + dictionary, and underneath each metadata + file a list of each "missing" standard name. + """ + + #Loop over dictionary keys, which should be + #paths to metadata files: + + print("\n#######################") + msg = "Non-dictionary standard names found in the following" + msg += " metadata files:" + print(msg) + + for metafile in missing_names_dict: + + print("\n--------------------------\n") + print(f"{metafile}\n") + + #Extract standard names for file: + missing_names_list = missing_names_dict[metafile] + + for stdname in missing_names_list: + print(f" - {stdname}") + #End for + + #End for + + print("\n#######################") + +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ############ #Main script ############ #Parse command-line arguments: -input_args = parse_arguments() +metafile_loc, stdname_xml = parse_arguments() #Open standard name dictionary: -#CONTINUE HERE!!!!!!!! +_, stdname_dict_root = read_xml_file(stdname_xml) #Extract all standard names from dictionary: +std_names = get_dict_stdnames(stdname_dict_root) -#Find all CCPP metadata files: +#Create new meta file/missing names dictionary: +meta_miss_names_dict = {} -#Loop through all metadata files: +#Check if user passed in single metadata file: +if os.path.isfile(metafile_loc): #Find all metadata standard names - #that are not in the dictionary + #that are not in the dictionary: + missing_stdnames = missing_metafile_names(metafile_loc, + std_names) + + #If miissing stdnames exist, then add the + #file and missing names to dictionary: + meta_miss_names_dict[metafile_loc] = missing_stdnames + +#If not a file, then check if a directory: +elif os.path.isdir(metafile_loc): + + #Find all CCPP metadata files that are + #located in or under this directory: + meta_files = find_metadata_files(metafile_loc) + + #Loop through all metadata files: + for meta_file in meta_files: + + #Find all metadata standard names + #that are not in the dictionary + missing_stdnames = missing_metafile_names(meta_file, + std_names) + + #If miissing stdnames exist, then add the + #file and missing names to dictionary: + meta_miss_names_dict[meta_file] = missing_stdnames + #End for + +else: + #This is a non-supported input, so raise + #an error: + emsg = f"The metafile-loc arg input, '{metafile_loc}'\n" + emsg += "is neither a file nor a directory," + emsg += " so script will end here." + raise FileNotFoundError(emsg) +#End if #Print list of metadata file standard #names that are not in the dictionary: +if meta_miss_names_dict: + #Print organized, human-readable + #list of "missing" standard names + #to the screen, along with the + #metadata file they are associated + #with + print_missing_names(meta_miss_names_dict) +else: + #Notify user that all standard names + #exist in the dictionary: + print("All standard names are in the dictionary!") +#End if + ############## #End of script From 576b33006597dc96fd82c8b4653d75a3c59cde8e Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Wed, 23 Aug 2023 10:00:13 -0600 Subject: [PATCH 03/11] Pylint fixes. --- tools/meta_stdname_check | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/meta_stdname_check b/tools/meta_stdname_check index d145537..7b24acf 100755 --- a/tools/meta_stdname_check +++ b/tools/meta_stdname_check @@ -98,15 +98,15 @@ def get_dict_stdnames(xml_tree_root): """ #Create empty set to store standard name names: - std_names = set() + std_dict_names = set() #Loop over all standard_name tags" for stdname in xml_tree_root.findall('./section/standard_name'): #Add the "name" attribute to the set: - std_names.add(stdname.attrib['name']) + std_dict_names.add(stdname.attrib['name']) #End for - return std_names + return std_dict_names #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #Function to parse a list of strings from a metadata file @@ -187,10 +187,10 @@ def missing_metafile_names(metafile, stdname_set): """ #Open metadata file: - with open(metafile,'r', encoding='utf-8') as meta_file: + with open(metafile,'r', encoding='utf-8') as mfile: #Find all standard names in metadata file - meta_stdname_set = find_metafile_stdnames(meta_file) + meta_stdname_set = find_metafile_stdnames(mfile) #End with #Create set of all standard names not in dictionary set: From 9608534214d0db5dc4b06c2ddf41efd7be1b31d5 Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Wed, 23 Aug 2023 11:22:08 -0600 Subject: [PATCH 04/11] Move python scripts to 'tools' directory, and remove copy of xml_tools.py --- .../check_xml_unique.py | 0 ...ta_stdname_check => meta_stdname_check.py} | 0 .../write_standard_name_table.py | 0 xml_tools.py | 253 ------------------ 4 files changed, 253 deletions(-) rename check_xml_unique.py => tools/check_xml_unique.py (100%) rename tools/{meta_stdname_check => meta_stdname_check.py} (100%) rename write_standard_name_table.py => tools/write_standard_name_table.py (100%) delete mode 100644 xml_tools.py diff --git a/check_xml_unique.py b/tools/check_xml_unique.py similarity index 100% rename from check_xml_unique.py rename to tools/check_xml_unique.py diff --git a/tools/meta_stdname_check b/tools/meta_stdname_check.py similarity index 100% rename from tools/meta_stdname_check rename to tools/meta_stdname_check.py diff --git a/write_standard_name_table.py b/tools/write_standard_name_table.py similarity index 100% rename from write_standard_name_table.py rename to tools/write_standard_name_table.py diff --git a/xml_tools.py b/xml_tools.py deleted file mode 100644 index 956ad85..0000000 --- a/xml_tools.py +++ /dev/null @@ -1,253 +0,0 @@ -#!/usr/bin/env python - -""" -Parse and / or validate an XML file and return the captured variables. -""" - -# Python library imports -from __future__ import print_function -import os -import os.path -import subprocess -import sys -import logging -from distutils.spawn import find_executable -import xml.etree.ElementTree as ET -try: - _XMLLINT = find_executable('xmllint') -except ImportError: - _XMLLINT = None -# end try - -# Find python version -PY3 = sys.version_info[0] > 2 -PYSUBVER = sys.version_info[1] -_LOGGER = None - -############################################################################### -def call_command(commands, logger, silent=False): -############################################################################### - """ - Try a command line and return the output on success (None on failure) - >>> call_command(['ls', 'really__improbable_fffilename.foo'], _LOGGER) #doctest: +IGNORE_EXCEPTION_DETAIL - Traceback (most recent call last): - RuntimeError: Execution of 'ls really__improbable_fffilename.foo' failed: - [Errno 2] No such file or directory - >>> call_command(['ls', 'really__improbable_fffilename.foo'], _LOGGER, silent=True) - False - >>> call_command(['ls'], _LOGGER) - True - """ - result = False - outstr = '' - if logger is None: - silent = True - # end if - try: - if PY3: - if PYSUBVER > 6: - cproc = subprocess.run(commands, check=True, - capture_output=True) - if not silent: - logger.debug(cproc.stdout) - # end if - result = cproc.returncode == 0 - elif PYSUBVER >= 5: - cproc = subprocess.run(commands, check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - if not silent: - logger.debug(cproc.stdout) - # end if - result = cproc.returncode == 0 - else: - raise ValueError("Python 3 must be at least version 3.5") - # end if - else: - pproc = subprocess.Popen(commands, stdin=None, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - output, _ = pproc.communicate() - if not silent: - logger.debug(output) - # end if - result = pproc.returncode == 0 - # end if - except (OSError, RuntimeError, subprocess.CalledProcessError) as err: - if silent: - result = False - else: - cmd = ' '.join(commands) - emsg = "Execution of '{}' failed with code:\n" - outstr = emsg.format(cmd, err.returncode) - outstr += "{}".format(err.output) - raise RuntimeError(outstr) - # end if - # end of try - return result - -############################################################################### -def find_schema_version(root): -############################################################################### - """ - Find the version of the host registry file represented by root - >>> find_schema_version(ET.fromstring('')) - [1, 0] - >>> find_schema_version(ET.fromstring('')) #doctest: +IGNORE_EXCEPTION_DETAIL - Traceback (most recent call last): - ValueError: Illegal version string, '1.a' - Format must be . - >>> find_schema_version(ET.fromstring('')) #doctest: +IGNORE_EXCEPTION_DETAIL - Traceback (most recent call last): - ValueError: Illegal version string, '0.0' - Major version must be at least 1 - >>> find_schema_version(ET.fromstring('')) #doctest: +IGNORE_EXCEPTION_DETAIL - Traceback (most recent call last): - ValueError: Illegal version string, '0.0' - Minor version must be at least 0 - """ - verbits = None - if 'version' not in root.attrib: - raise ValueError("version attribute required") - # end if - version = root.attrib['version'] - versplit = version.split('.') - try: - if len(versplit) != 2: - raise ValueError('oops') - # end if (no else needed) - try: - verbits = [int(x) for x in versplit] - except ValueError as verr: - raise ValueError(verr) - # end try - if verbits[0] < 1: - raise ValueError('Major version must be at least 1') - # end if - if verbits[1] < 0: - raise ValueError('Minor version must be non-negative') - # end if - except ValueError as verr: - errstr = """Illegal version string, '{}' - Format must be .""" - ve_str = str(verr) - if ve_str: - errstr = ve_str + '\n' + errstr - # end if - raise ValueError(errstr.format(version)) - # end try - return verbits - -############################################################################### -def find_schema_file(schema_root, version, schema_path=None): -############################################################################### - """Find and return the schema file based on and - or return None. - If is present, use that as the directory to find the - appropriate schema file. Otherwise, just look in the current directory.""" - - verstring = '_'.join([str(x) for x in version]) - schema_filename = "{}_v{}.xsd".format(schema_root, verstring) - if schema_path: - schema_file = os.path.join(schema_path, schema_filename) - else: - schema_file = schema_filename - # end if - if os.path.exists(schema_file): - return schema_file - # end if - return None - -############################################################################### -def validate_xml_file(filename, schema_root, version, logger, - schema_path=None, error_on_noxmllint=False): -############################################################################### - """ - Find the appropriate schema and validate the XML file, , - against it using xmllint - """ - # Check the filename - if not os.path.isfile(filename): - raise ValueError("validate_xml_file: Filename, '{}', does not exist".format(filename)) - # end if - if not os.access(filename, os.R_OK): - raise ValueError("validate_xml_file: Cannot open '{}'".format(filename)) - # end if - if not schema_path: - # Find the schema, based on the model version - thispath = os.path.abspath(__file__) - pdir = os.path.dirname(os.path.dirname(os.path.dirname(thispath))) - schema_path = os.path.join(pdir, 'schema') - # end if - schema_file = find_schema_file(schema_root, version, schema_path) - if not (schema_file and os.path.isfile(schema_file)): - verstring = '.'.join([str(x) for x in version]) - emsg = """validate_xml_file: Cannot find schema for version {}, - {} does not exist""" - raise ValueError(emsg.format(verstring, schema_file)) - # end if - if not os.access(schema_file, os.R_OK): - emsg = "validate_xml_file: Cannot open schema, '{}'" - raise ValueError(emsg.format(schema_file)) - # end if - if _XMLLINT is not None: - if logger is not None: - lmsg = "Checking file {} against schema {}" - logger.debug(lmsg.format(filename, schema_file)) - # end if - cmd = [_XMLLINT, '--noout', '--schema', schema_file, filename] - result = call_command(cmd, logger) - return result - # end if - lmsg = "xmllint not found, could not validate file {}" - if error_on_noxmllint: - raise ValueError("validate_xml_file: " + lmsg.format(filename)) - # end if - if logger is not None: - logger.warning(lmsg.format(filename)) - # end if - return True # We could not check but still need to proceed - -############################################################################### -def read_xml_file(filename, logger=None): -############################################################################### - """Read the XML file, , and return its tree and root""" - if os.path.isfile(filename) and os.access(filename, os.R_OK): - if PY3: - file_open = (lambda x: open(x, 'r', encoding='utf-8')) - else: - file_open = (lambda x: open(x, 'r')) - # end if - with file_open(filename) as file_: - try: - tree = ET.parse(file_) - root = tree.getroot() - except ET.ParseError as perr: - emsg = "read_xml_file: Cannot read {}, {}" - raise ValueError(emsg.format(filename, perr)) - elif not os.access(filename, os.R_OK): - raise ValueError("read_xml_file: Cannot open '{}'".format(filename)) - else: - emsg = "read_xml_file: Filename, '{}', does not exist" - raise ValueError(emsg.format(filename)) - # end if - if logger: - logger.debug("Read XML file, '{}'".format(filename)) - # end if - return tree, root - -############################################################################### - -if __name__ == "__main__": - _LOGGER = logging.getLogger('xml_tools') - for handler in list(_LOGGER.handlers): - _LOGGER.removeHandler(handler) - # end for - _LOGGER.addHandler(logging.NullHandler()) - try: - # First, run doctest - import doctest - doctest.testmod() - except ValueError as cerr: - print("{}".format(cerr)) -# no else: From 124094e42868b574a91b70f84b0cd1dc2961b410 Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Wed, 23 Aug 2023 11:45:09 -0600 Subject: [PATCH 05/11] Update python scripts and Github Action workflow to make sure everything works with the new repo structure. --- .github/workflows/pull_request_ci.yml | 4 ++-- README.md | 2 +- tools/check_xml_unique.py | 17 +++++++++++++++-- tools/write_standard_name_table.py | 17 +++++++++++++++-- 4 files changed, 33 insertions(+), 7 deletions(-) mode change 100644 => 100755 tools/write_standard_name_table.py diff --git a/.github/workflows/pull_request_ci.yml b/.github/workflows/pull_request_ci.yml index eb3c881..d5ab6d1 100644 --- a/.github/workflows/pull_request_ci.yml +++ b/.github/workflows/pull_request_ci.yml @@ -19,7 +19,7 @@ jobs: sudo apt-get -y install libxml2-utils - name: Check for duplicate standard names - run: python3 check_xml_unique.py standard_names.xml + run: python3 tools/check_xml_unique.py standard_names.xml check-rerendered-markdown: runs-on: ubuntu-latest @@ -41,5 +41,5 @@ jobs: # If this fails you have likely forgotten to rerun the write script # after adding a new name, or updating its description. checksum=$(sha256sum Metadata-standard-names.md) - python3 write_standard_name_table.py standard_names.xml + tools/write_standard_name_table.py standard_names.xml test "$checksum" = "$(sha256sum Metadata-standard-names.md)" diff --git a/README.md b/README.md index d16cdee..1cc2fdb 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ A Markdown file describing the standard names To regenerate the standard name Markdown file, run: ``` -python write_standard_name_table.py standard_names.xml +tools/write_standard_name_table.py standard_names.xml ``` Then, commit the new Metadata-standard-names.md file and push to GitHub. diff --git a/tools/check_xml_unique.py b/tools/check_xml_unique.py index 02cdea3..cbe301d 100755 --- a/tools/check_xml_unique.py +++ b/tools/check_xml_unique.py @@ -8,9 +8,21 @@ import sys import os.path import xml.etree.ElementTree as ET -from xml_tools import find_schema_file, find_schema_version, validate_xml_file, read_xml_file import copy +################################################ +#Add CCPP framework (lib) modules to python path +################################################ + +_CURR_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.join(_CURR_DIR, "lib")) + +####################################### +#Import needed framework python modules +####################################### + +from xml_tools import find_schema_file, find_schema_version, validate_xml_file, read_xml_file + ############################################################################### def parse_command_line(args, description): ############################################################################### @@ -40,7 +52,8 @@ def main_func(): version = find_schema_version(root) schema_name = os.path.basename(stdname_file)[0:-4] schema_root = os.path.dirname(stdname_file) - schema_file = find_schema_file(schema_name, version) + schema_path = os.path.join(schema_root,schema_name) + schema_file = find_schema_file(schema_path, version) if schema_file: try: validate_xml_file(stdname_file, schema_name, version, None, diff --git a/tools/write_standard_name_table.py b/tools/write_standard_name_table.py old mode 100644 new mode 100755 index d7741a5..7f8e70c --- a/tools/write_standard_name_table.py +++ b/tools/write_standard_name_table.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Convert a metadata standard-name XML library file to a documentation format. @@ -10,10 +10,23 @@ import argparse import sys import re -# Copied from CCPP framework + +################################################ +#Add CCPP framework (lib) modules to python path +################################################ + +_CURR_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.join(_CURR_DIR, "lib")) + +####################################### +#Import needed framework python modules +####################################### + from xml_tools import validate_xml_file, read_xml_file from xml_tools import find_schema_file, find_schema_version +####################################### + _REAL_SUBST_RE = re.compile(r"(.*\d)p(\d.*)") ######################################################################## From da39171630180c176e8bb3a2397abd49bb7a2fec Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Wed, 23 Aug 2023 12:17:25 -0600 Subject: [PATCH 06/11] Don't add metadata file to dictionary if no missing names are found. --- tools/meta_stdname_check.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/meta_stdname_check.py b/tools/meta_stdname_check.py index 7b24acf..229571f 100755 --- a/tools/meta_stdname_check.py +++ b/tools/meta_stdname_check.py @@ -300,9 +300,11 @@ def print_missing_names(missing_names_dict): missing_stdnames = missing_metafile_names(metafile_loc, std_names) - #If miissing stdnames exist, then add the + #If missing stdnames exist, then add the #file and missing names to dictionary: - meta_miss_names_dict[metafile_loc] = missing_stdnames + if missing_stdnames: + meta_miss_names_dict[metafile_loc] = missing_stdnames + #End if #If not a file, then check if a directory: elif os.path.isdir(metafile_loc): From 55a628ffb5e41e37a5ea603f2b3cacbb5f6cbd68 Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Wed, 23 Aug 2023 12:22:21 -0600 Subject: [PATCH 07/11] Add the fix to the multiple metadata files section as well. --- tools/meta_stdname_check.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/meta_stdname_check.py b/tools/meta_stdname_check.py index 229571f..5f04d6e 100755 --- a/tools/meta_stdname_check.py +++ b/tools/meta_stdname_check.py @@ -321,9 +321,11 @@ def print_missing_names(missing_names_dict): missing_stdnames = missing_metafile_names(meta_file, std_names) - #If miissing stdnames exist, then add the + #If missing stdnames exist, then add the #file and missing names to dictionary: - meta_miss_names_dict[meta_file] = missing_stdnames + if missing_stdnames: + meta_miss_names_dict[meta_file] = missing_stdnames + #End if #End for else: From 137af54d977006f74b11a1c2561088e5ac5b8be8 Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Wed, 23 Aug 2023 12:25:01 -0600 Subject: [PATCH 08/11] Remove un-needed 'python3' command in Github Action workflow. --- .github/workflows/pull_request_ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request_ci.yml b/.github/workflows/pull_request_ci.yml index d5ab6d1..e1ebae7 100644 --- a/.github/workflows/pull_request_ci.yml +++ b/.github/workflows/pull_request_ci.yml @@ -19,7 +19,7 @@ jobs: sudo apt-get -y install libxml2-utils - name: Check for duplicate standard names - run: python3 tools/check_xml_unique.py standard_names.xml + run: tools/check_xml_unique.py standard_names.xml check-rerendered-markdown: runs-on: ubuntu-latest From eab75cc72904c3cbf60d2eec12bb614d1d52b337 Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Mon, 28 Aug 2023 15:58:46 -0600 Subject: [PATCH 09/11] Add short argument flag options to script. --- tools/meta_stdname_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/meta_stdname_check.py b/tools/meta_stdname_check.py index 5f04d6e..f5b337d 100755 --- a/tools/meta_stdname_check.py +++ b/tools/meta_stdname_check.py @@ -70,12 +70,12 @@ def parse_arguments(): parser = argparse.ArgumentParser(description=desc) #Add input arguments to be parsed: - parser.add_argument('--metafile-loc', + parser.add_argument('-m', '--metafile-loc', metavar='', action='store', type=str, help="Location of metadata file(s)") - parser.add_argument('--stdname-dict', + parser.add_argument('-s', '--stdname-dict', metavar='', action='store', type=str, help="Location of standard name dictionary (XML file)") From d312a6e9c1b4e7111e21c5149b811fe5e33ba7f6 Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Fri, 22 Sep 2023 15:04:44 -0600 Subject: [PATCH 10/11] Add timestamp to printed output. --- tools/meta_stdname_check.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/meta_stdname_check.py b/tools/meta_stdname_check.py index f5b337d..3a046a9 100755 --- a/tools/meta_stdname_check.py +++ b/tools/meta_stdname_check.py @@ -32,6 +32,7 @@ import sys import os import os.path +import datetime ################################################ #Add CCPP framework (lib) modules to python path @@ -250,14 +251,19 @@ def print_missing_names(missing_names_dict): file a list of each "missing" standard name. """ - #Loop over dictionary keys, which should be - #paths to metadata files: + #Get current date/time: + curr_time = datetime.datetime.now() print("\n#######################") - msg = "Non-dictionary standard names found in the following" + print("Date/time of when script was run:") + print(curr_time) + print("#######################") + msg = "\nNon-dictionary standard names found in the following" msg += " metadata files:" print(msg) + #Loop over dictionary keys, which should be + #paths to metadata files: for metafile in missing_names_dict: print("\n--------------------------\n") From 5f9ad9cf40a33c8a4b67b527b68d297ed5b9e043 Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Mon, 2 Oct 2023 16:11:16 -0600 Subject: [PATCH 11/11] Improve script usage docstring. --- tools/meta_stdname_check.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/meta_stdname_check.py b/tools/meta_stdname_check.py index 3a046a9..afcb066 100755 --- a/tools/meta_stdname_check.py +++ b/tools/meta_stdname_check.py @@ -16,9 +16,10 @@ ./meta_stdname_check --metafile-loc /path/to/file.meta --stdname-dict /path/to/dict.xml 2. A path to a directory is passed, in - which case the directory is searched - for metadata files, and all found - files' standard names are checked, e.g.: + which case the directory is searched, + along with any subdirectories, for + metadata files, and all found files' + standard names are checked, e.g.: ./meta_stdname_check --metafile-loc /meta/path/ --stdname-dict /path/to/dict.xml