From b4105391dfa2077d20b12be6a57e3d258a55a187 Mon Sep 17 00:00:00 2001 From: "Ed (ODSC)" Date: Wed, 8 Jan 2025 12:27:40 +0000 Subject: [PATCH 1/3] workflows/lint.yml: Add black and isort --- .github/workflows/lint.yml | 2 + .isort.cfg | 2 + cove_bods/apps.py | 2 +- cove_bods/forms.py | 12 +- cove_bods/process.py | 202 ++++++++++++++++----------- cove_bods/tests/test_page_content.py | 2 - cove_bods/views.py | 30 ++-- cove_project/settings.py | 102 +++++++------- cove_project/urls.py | 9 +- cove_project/wsgi.py | 2 +- requirements_dev.in | 4 +- requirements_dev.txt | 15 +- 12 files changed, 220 insertions(+), 164 deletions(-) create mode 100644 .isort.cfg diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 23019c5..cbb1684 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -18,4 +18,6 @@ jobs: restore-keys: | ${{ runner.os }}-pip- - run: pip install -r requirements_dev.txt + - run: isort --check-only cove_project/ cove_bods/ + - run: black --check cove_project/ cove_bods/ - run: flake8 cove_project/ cove_bods/ diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..9caba16 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,2 @@ +[isort] +profile=black diff --git a/cove_bods/apps.py b/cove_bods/apps.py index 4b6764b..f15dad6 100644 --- a/cove_bods/apps.py +++ b/cove_bods/apps.py @@ -2,4 +2,4 @@ class CoveBodsConfig(AppConfig): - name = 'cove_bods' + name = "cove_bods" diff --git a/cove_bods/forms.py b/cove_bods/forms.py index b1eb862..3f2ab70 100644 --- a/cove_bods/forms.py +++ b/cove_bods/forms.py @@ -17,16 +17,22 @@ class NewUploadForm(forms.Form): ), label="", ) - sample_mode = forms.BooleanField(label="Process using Sample mode (see information above)", required=False) + sample_mode = forms.BooleanField( + label="Process using Sample mode (see information above)", required=False + ) class NewTextForm(forms.Form): file_field_names = [] paste = forms.CharField(label="Paste (JSON only)", widget=forms.Textarea) - sample_mode = forms.BooleanField(label="Process using Sample mode (see information above)", required=False) + sample_mode = forms.BooleanField( + label="Process using Sample mode (see information above)", required=False + ) class NewURLForm(forms.Form): file_field_names = [] url = forms.URLField(label="URL") - sample_mode = forms.BooleanField(label="Process using Sample mode (see information above)", required=False) + sample_mode = forms.BooleanField( + label="Process using Sample mode (see information above)", required=False + ) diff --git a/cove_bods/process.py b/cove_bods/process.py index 66a8663..06bfb1a 100644 --- a/cove_bods/process.py +++ b/cove_bods/process.py @@ -1,33 +1,29 @@ -from django.core.files.base import ContentFile -from django.core.files.storage import default_storage - -from libcovebods.schema import SchemaBODS -from libcovebods.config import LibCoveBODSConfig -from libcovebods.jsonschemavalidate import JSONSchemaValidator -from libcovebods.additionalfields import AdditionalFields -import libcovebods.run_tasks -import libcovebods.data_reader -from typing import List - import json import os.path +from typing import List import flattentool -from sentry_sdk import capture_exception - -from libcoveweb2.models import SuppliedDataFile, SuppliedData +import libcovebods.data_reader +import libcovebods.run_tasks +from django.core.files.base import ContentFile +from django.core.files.storage import default_storage +from libcovebods.additionalfields import AdditionalFields +from libcovebods.config import LibCoveBODSConfig +from libcovebods.jsonschemavalidate import JSONSchemaValidator +from libcovebods.schema import SchemaBODS +from libcoveweb2.models import SuppliedData, SuppliedDataFile from libcoveweb2.process.base import ProcessDataTask from libcoveweb2.process.common_tasks.task_with_state import TaskWithState # from libcove.lib.converters import convert_json, convert_spreadsheet -from libcoveweb2.utils import get_file_type_for_flatten_tool -from libcoveweb2.utils import group_data_list_by +from libcoveweb2.utils import get_file_type_for_flatten_tool, group_data_list_by +from sentry_sdk import capture_exception def create_error_file(directory: str, name: str, data: dict): """Create temporary error file""" filename = os.path.join(directory, f"{name}-error.json") - return default_storage.save(filename, ContentFile(json.dumps(data).encode('utf-8'))) + return default_storage.save(filename, ContentFile(json.dumps(data).encode("utf-8"))) def error_file_exists(directory: str, name: str) -> bool: @@ -39,7 +35,7 @@ def error_file_exists(directory: str, name: str) -> bool: def read_error_file(directory: str, name: str) -> dict: """Read data from error file""" filename = os.path.join(directory, f"{name}-error.json") - return json.loads(default_storage.open(filename).read().decode('utf-8')) + return json.loads(default_storage.open(filename).read().decode("utf-8")) def delete_error_file(directory: str, name: str): @@ -63,9 +59,9 @@ def get_context(self): class SetOrTestSuppliedDataFormat(ProcessDataTask): map_file_type_to_format = { - 'json': 'json', - 'xlsx': 'spreadsheet', - 'ods': 'spreadsheet' + "json": "json", + "xlsx": "spreadsheet", + "ods": "spreadsheet", } def is_processing_applicable(self) -> bool: @@ -80,10 +76,14 @@ def process(self, process_data: dict) -> dict: supplied_data_files = SuppliedDataFile.objects.filter( supplied_data=self.supplied_data ) - all_file_types = [get_file_type_for_flatten_tool(i) for i in supplied_data_files] + all_file_types = [ + get_file_type_for_flatten_tool(i) for i in supplied_data_files + ] file_types_reduced = list(set([i for i in all_file_types if i])) if len(file_types_reduced) == 1: - self.supplied_data.format = self.map_file_type_to_format[file_types_reduced[0]] + self.supplied_data.format = self.map_file_type_to_format[ + file_types_reduced[0] + ] self.supplied_data.save() elif len(file_types_reduced) == 0: @@ -218,9 +218,7 @@ def __init__( self, supplied_data: SuppliedData, supplied_data_files: List[SuppliedDataFile] ): super().__init__(supplied_data, supplied_data_files) - self.data_filename = os.path.join( - self.supplied_data.data_dir(), "schema.json" - ) + self.data_filename = os.path.join(self.supplied_data.data_dir(), "schema.json") def is_processing_applicable(self) -> bool: return True @@ -230,16 +228,16 @@ def is_processing_needed(self) -> bool: def process(self, process_data: dict) -> dict: # Make things and set info for later in processing - process_data['data_reader'] = libcovebods.data_reader.DataReader( - process_data["json_data_filename"], sample_mode=process_data['sample_mode'] + process_data["data_reader"] = libcovebods.data_reader.DataReader( + process_data["json_data_filename"], sample_mode=process_data["sample_mode"] + ) + process_data["config"] = LibCoveBODSConfig() + process_data["schema"] = SchemaBODS( + process_data["data_reader"], process_data["config"] ) - process_data['config'] = LibCoveBODSConfig() - process_data['schema'] = SchemaBODS(process_data['data_reader'], process_data['config']) # Save some to disk for templates if not os.path.exists(self.data_filename): - save_data = { - "schema_version_used": process_data['schema'].schema_version - } + save_data = {"schema_version_used": process_data["schema"].schema_version} with open(self.data_filename, "w") as fp: json.dump(save_data, fp, indent=4) # return @@ -282,7 +280,9 @@ def is_processing_applicable(self) -> bool: def is_processing_needed(self) -> bool: if os.path.exists(self.xlsx_filename): return False - if error_file_exists(self.supplied_data.storage_dir(), "ConvertJSONIntoSpreadsheets"): + if error_file_exists( + self.supplied_data.storage_dir(), "ConvertJSONIntoSpreadsheets" + ): return False return True @@ -300,16 +300,21 @@ def process(self, process_data: dict) -> dict: "root_id": "statementID", "id_name": "statementID", "root_is_list": True, - "schema": process_data['schema'].pkg_schema_url, + "schema": process_data["schema"].pkg_schema_url, } try: flattentool.flatten(process_data["json_data_filename"], **flatten_kwargs) except Exception as err: capture_exception(err) - create_error_file(self.supplied_data.storage_dir(), "ConvertJSONIntoSpreadsheets", - {"type": type(err).__name__, - "filename": process_data["json_data_filename"].split('/')[-1]}) + create_error_file( + self.supplied_data.storage_dir(), + "ConvertJSONIntoSpreadsheets", + { + "type": type(err).__name__, + "filename": process_data["json_data_filename"].split("/")[-1], + }, + ) return process_data @@ -326,10 +331,15 @@ def get_context(self): context["download_xlsx_size"] = os.stat(self.xlsx_filename).st_size else: context["can_download_xlsx"] = False - if error_file_exists(self.supplied_data.storage_dir(), "ConvertJSONIntoSpreadsheets"): - context["xlsx_error"] = read_error_file(self.supplied_data.storage_dir(), - "ConvertJSONIntoSpreadsheets") - delete_error_file(self.supplied_data.storage_dir(), "ConvertJSONIntoSpreadsheets") + if error_file_exists( + self.supplied_data.storage_dir(), "ConvertJSONIntoSpreadsheets" + ): + context["xlsx_error"] = read_error_file( + self.supplied_data.storage_dir(), "ConvertJSONIntoSpreadsheets" + ) + delete_error_file( + self.supplied_data.storage_dir(), "ConvertJSONIntoSpreadsheets" + ) else: context["xlsx_error"] = False # done! @@ -342,11 +352,12 @@ class PythonValidateTask(TaskWithState): def process_get_state(self, process_data: dict) -> dict: context = libcovebods.run_tasks.process_additional_checks( - process_data['data_reader'], - process_data['config'], - process_data['schema'], - task_classes=libcovebods.run_tasks.TASK_CLASSES_IN_SAMPLE_MODE if - process_data["sample_mode"] else libcovebods.run_tasks.TASK_CLASSES + process_data["data_reader"], + process_data["config"], + process_data["schema"], + task_classes=libcovebods.run_tasks.TASK_CLASSES_IN_SAMPLE_MODE + if process_data["sample_mode"] + else libcovebods.run_tasks.TASK_CLASSES, ) # counts @@ -354,46 +365,72 @@ def process_get_state(self, process_data: dict) -> dict: # We need to calculate some stats for showing in the view total_ownership_or_control_interest_statements = 0 - for key, count in \ - context['statistics']['count_ownership_or_control_statement_interest_statement_types'].items(): + for key, count in context["statistics"][ + "count_ownership_or_control_statement_interest_statement_types" + ].items(): total_ownership_or_control_interest_statements += count - context['statistics'][ - 'count_ownership_or_control_interest_statement'] = total_ownership_or_control_interest_statements # noqa + context["statistics"][ + "count_ownership_or_control_interest_statement" + ] = total_ownership_or_control_interest_statements # noqa # The use of r_e_type is to stop flake8 complaining about line length - r_e_type = 'registeredEntity' - context['statistics']['count_entities_registeredEntity_legalEntity_with_any_identifier'] = ( - context['statistics']['count_entity_statements_types_with_any_identifier'][r_e_type] + - context['statistics']['count_entity_statements_types_with_any_identifier']['legalEntity']) - context['statistics']['count_entities_registeredEntity_legalEntity_with_any_identifier_with_id_and_scheme'] = ( - context['statistics']['count_entity_statements_types_with_any_identifier_with_id_and_scheme'][ - r_e_type] + - context['statistics']['count_entity_statements_types_with_any_identifier_with_id_and_scheme'][ - 'legalEntity']) - context['statistics']['count_entities_registeredEntity_legalEntity'] = ( - context['statistics']['count_entity_statements_types'][r_e_type] + - context['statistics']['count_entity_statements_types']['legalEntity']) - unknown_schema_version_used = \ - [i for i in context['additional_checks'] if i['type'] == 'unknown_schema_version_used'] - context['unknown_schema_version_used'] = unknown_schema_version_used[0] \ - if unknown_schema_version_used else None - context['inconsistent_schema_version_used_count'] = \ - len([i for i in context['additional_checks'] if i['type'] == 'inconsistent_schema_version_used']) - - context['checks_not_run_in_sample_mode'] = [] + r_e_type = "registeredEntity" + context["statistics"][ + "count_entities_registeredEntity_legalEntity_with_any_identifier" + ] = ( + context["statistics"]["count_entity_statements_types_with_any_identifier"][ + r_e_type + ] + + context["statistics"][ + "count_entity_statements_types_with_any_identifier" + ]["legalEntity"] + ) + context["statistics"][ + "count_entities_registeredEntity_legalEntity_with_any_identifier_with_id_and_scheme" + ] = ( + context["statistics"][ + "count_entity_statements_types_with_any_identifier_with_id_and_scheme" + ][r_e_type] + + context["statistics"][ + "count_entity_statements_types_with_any_identifier_with_id_and_scheme" + ]["legalEntity"] + ) + context["statistics"]["count_entities_registeredEntity_legalEntity"] = ( + context["statistics"]["count_entity_statements_types"][r_e_type] + + context["statistics"]["count_entity_statements_types"]["legalEntity"] + ) + unknown_schema_version_used = [ + i + for i in context["additional_checks"] + if i["type"] == "unknown_schema_version_used" + ] + context["unknown_schema_version_used"] = ( + unknown_schema_version_used[0] if unknown_schema_version_used else None + ) + context["inconsistent_schema_version_used_count"] = len( + [ + i + for i in context["additional_checks"] + if i["type"] == "inconsistent_schema_version_used" + ] + ) + + context["checks_not_run_in_sample_mode"] = [] if process_data["sample_mode"]: classes_not_run_in_sample_mode = [ - x for x in libcovebods.run_tasks.TASK_CLASSES + x + for x in libcovebods.run_tasks.TASK_CLASSES if x not in libcovebods.run_tasks.TASK_CLASSES_IN_SAMPLE_MODE ] for class_not_run_in_sample_mode in classes_not_run_in_sample_mode: - context['checks_not_run_in_sample_mode'].extend( + context["checks_not_run_in_sample_mode"].extend( class_not_run_in_sample_mode.get_additional_check_types_possible( - process_data['config'], - process_data['schema'] + process_data["config"], process_data["schema"] ) ) - context['checks_not_run_in_sample_mode'] = list(set(context['checks_not_run_in_sample_mode'])) + context["checks_not_run_in_sample_mode"] = list( + set(context["checks_not_run_in_sample_mode"]) + ) return context, process_data @@ -403,18 +440,19 @@ class JsonSchemaValidateTask(TaskWithState): state_filename: str = "jsonschema_validate.json" def process_get_state(self, process_data: dict) -> dict: - worker = JSONSchemaValidator(process_data['schema']) + worker = JSONSchemaValidator(process_data["schema"]) # Get list of validation errors - validation_errors = worker.validate(process_data['data_reader']) + validation_errors = worker.validate(process_data["data_reader"]) validation_errors = [i.json() for i in validation_errors] # Context context = { "validation_errors_count": len(validation_errors), "validation_errors": group_data_list_by( - validation_errors, lambda i: i["validator"] + str(i['path_ending']) + i["message"] - ) + validation_errors, + lambda i: i["validator"] + str(i["path_ending"]) + i["message"], + ), } return context, process_data @@ -425,9 +463,9 @@ class AdditionalFieldsChecksTask(TaskWithState): state_filename: str = "additional_fields.json" def process_get_state(self, process_data: dict) -> dict: - worker = AdditionalFields(process_data['schema']) + worker = AdditionalFields(process_data["schema"]) - output = worker.process(process_data['data_reader']) + output = worker.process(process_data["data_reader"]) context = {"additional_fields": output} context["any_additional_fields_exist"] = len(output) > 0 diff --git a/cove_bods/tests/test_page_content.py b/cove_bods/tests/test_page_content.py index c76bceb..dd4ed19 100644 --- a/cove_bods/tests/test_page_content.py +++ b/cove_bods/tests/test_page_content.py @@ -1,7 +1,5 @@ import pytest - from libcoveweb2.tests.lib_functional import browser, server_url # noqa - from selenium.webdriver.common.by import By diff --git a/cove_bods/views.py b/cove_bods/views.py index f6804b0..39e5e13 100644 --- a/cove_bods/views.py +++ b/cove_bods/views.py @@ -1,31 +1,32 @@ import logging -from cove_project import settings from django.shortcuts import render -from libcoveweb2.views import ( - ExploreDataView, - InputDataView -) from libcoveweb2.models import SuppliedDataFile -from cove_bods.forms import NewTextForm, NewUploadForm, NewURLForm +from libcoveweb2.views import ExploreDataView, InputDataView +from cove_bods.forms import NewTextForm, NewUploadForm, NewURLForm +from cove_project import settings logger = logging.getLogger(__name__) JSON_FORM_CLASSES = { - "upload_form": NewUploadForm, - "text_form": NewTextForm, - "url_form": NewURLForm, - } + "upload_form": NewUploadForm, + "text_form": NewTextForm, + "url_form": NewURLForm, +} class NewInput(InputDataView): form_classes = JSON_FORM_CLASSES input_template = "cove_bods/index.html" - allowed_content_types = settings.ALLOWED_JSON_CONTENT_TYPES + settings.ALLOWED_SPREADSHEET_CONTENT_TYPES + allowed_content_types = ( + settings.ALLOWED_JSON_CONTENT_TYPES + settings.ALLOWED_SPREADSHEET_CONTENT_TYPES + ) content_type_incorrect_message = "This does not appear to be a supported file." - allowed_file_extensions = settings.ALLOWED_JSON_EXTENSIONS + settings.ALLOWED_SPREADSHEET_EXTENSIONS + allowed_file_extensions = ( + settings.ALLOWED_JSON_EXTENSIONS + settings.ALLOWED_SPREADSHEET_EXTENSIONS + ) file_extension_incorrect_message = "This does not appear to be a supported file." supplied_data_format = "unknown" @@ -47,10 +48,7 @@ def save_file_content_to_supplied_data( supplied_data.save_file(request.FILES["file_upload"]) elif form_name == "text_form": supplied_data.save_file_contents( - "input.json", - form.cleaned_data["paste"], - "application/json", - None + "input.json", form.cleaned_data["paste"], "application/json", None ) elif form_name == "url_form": supplied_data.save_file_from_source_url( diff --git a/cove_project/settings.py b/cove_project/settings.py index d532a8e..a620fa6 100644 --- a/cove_project/settings.py +++ b/cove_project/settings.py @@ -11,35 +11,33 @@ """ import os -from libcoveweb2 import settings + import environ +from libcoveweb2 import settings # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) env = environ.Env( # set default values and casting - DB_NAME=(str, os.path.join(BASE_DIR, 'db.sqlite3')), - SENTRY_DSN=(str, ''), + DB_NAME=(str, os.path.join(BASE_DIR, "db.sqlite3")), + SENTRY_DSN=(str, ""), CELERY_BROKER_URL=(str, ""), REDIS_URL=(str, ""), ) # We use the setting to choose whether to show the section about Sentry in the # terms and conditions -SENTRY_DSN = env('SENTRY_DSN') +SENTRY_DSN = env("SENTRY_DSN") if SENTRY_DSN: import sentry_sdk from sentry_sdk.integrations.django import DjangoIntegration from sentry_sdk.integrations.logging import ignore_logger - ignore_logger('django.security.DisallowedHost') - sentry_sdk.init( - dsn=env('SENTRY_DSN'), - integrations=[DjangoIntegration()] - ) + ignore_logger("django.security.DisallowedHost") + sentry_sdk.init(dsn=env("SENTRY_DSN"), integrations=[DjangoIntegration()]) -DEALER_TYPE = 'git' +DEALER_TYPE = "git" PIWIK = settings.PIWIK GOOGLE_ANALYTICS_ID = settings.GOOGLE_ANALYTICS_ID @@ -47,8 +45,8 @@ # We can't take MEDIA_ROOT and MEDIA_URL from cove settings, # ... otherwise the files appear under the BASE_DIR that is the Cove library install. # That could get messy. We want them to appear in our directory. -MEDIA_ROOT = os.path.join(BASE_DIR, 'media') -MEDIA_URL = '/media/' +MEDIA_ROOT = os.path.join(BASE_DIR, "media") +MEDIA_URL = "/media/" SECRET_KEY = settings.SECRET_KEY DEBUG = settings.DEBUG @@ -57,45 +55,45 @@ # Application definition INSTALLED_APPS = [ - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', - 'bootstrap3', + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", + "bootstrap3", "libcoveweb2", - 'cove_bods', + "cove_bods", ] MIDDLEWARE = ( - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.locale.LocaleMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', - 'django.middleware.security.SecurityMiddleware', - 'dealer.contrib.django.Middleware', + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.locale.LocaleMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", + "django.middleware.security.SecurityMiddleware", + "dealer.contrib.django.Middleware", "libcoveweb2.middleware.CoveConfigCurrentApp", ) -ROOT_URLCONF = 'cove_project.urls' +ROOT_URLCONF = "cove_project.urls" TEMPLATES = settings.TEMPLATES -WSGI_APPLICATION = 'cove_project.wsgi.application' +WSGI_APPLICATION = "cove_project.wsgi.application" # We can't take DATABASES from cove settings, # ... otherwise the files appear under the BASE_DIR that is the Cove library install. # That could get messy. We want them to appear in our directory. DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': env('DB_NAME'), + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": env("DB_NAME"), } } @@ -104,16 +102,16 @@ AUTH_PASSWORD_VALIDATORS = [ { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", }, ] @@ -129,7 +127,7 @@ LANGUAGES = settings.LANGUAGES -LOCALE_PATHS = (os.path.join(BASE_DIR, 'cove_bods', 'locale'),) +LOCALE_PATHS = (os.path.join(BASE_DIR, "cove_bods", "locale"),) # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/2.1/howto/static-files/ @@ -137,8 +135,8 @@ # We can't take STATIC_URL and STATIC_ROOT from cove settings, # ... otherwise the files appear under the BASE_DIR that is the Cove library install. # and that doesn't work with our standard Apache setup. -STATIC_URL = '/static/' -STATIC_ROOT = os.path.join(BASE_DIR, 'static') +STATIC_URL = "/static/" +STATIC_ROOT = os.path.join(BASE_DIR, "static") # Misc @@ -147,17 +145,17 @@ # BODS Config COVE_CONFIG = { - 'app_name': 'cove_bods', - 'app_base_template': 'cove_bods/base.html', - 'app_verbose_name': 'BODS Data Review Tool', - 'app_strapline': 'Review your BODS data.', - 'root_list_path': 'there-is-no-root-list-path', - 'root_id': 'statementID', - 'id_name': 'statementID', - 'root_is_list': True, - 'convert_titles': False, - 'input_methods': ['upload', 'url', 'text'], - 'support_email': 'data@open-contracting.org' + "app_name": "cove_bods", + "app_base_template": "cove_bods/base.html", + "app_verbose_name": "BODS Data Review Tool", + "app_strapline": "Review your BODS data.", + "root_list_path": "there-is-no-root-list-path", + "root_id": "statementID", + "id_name": "statementID", + "root_is_list": True, + "convert_titles": False, + "input_methods": ["upload", "url", "text"], + "support_email": "data@open-contracting.org", } # https://github.com/OpenDataServices/cove/issues/1098 diff --git a/cove_project/urls.py b/cove_project/urls.py index 7d56bb5..6dd42ca 100644 --- a/cove_project/urls.py +++ b/cove_project/urls.py @@ -1,12 +1,15 @@ +from django.conf import settings from django.conf.urls import url from django.conf.urls.static import static -from django.conf import settings +from django.urls import re_path from libcoveweb2.urls import urlpatterns + import cove_bods.views -from django.urls import re_path urlpatterns += [re_path(r"^$", cove_bods.views.NewInput.as_view(), name="index")] -urlpatterns += [url(r'^data/(.+)$', cove_bods.views.ExploreBODSView.as_view(), name='explore')] +urlpatterns += [ + url(r"^data/(.+)$", cove_bods.views.ExploreBODSView.as_view(), name="explore") +] urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) diff --git a/cove_project/wsgi.py b/cove_project/wsgi.py index d27b945..e4bf27d 100644 --- a/cove_project/wsgi.py +++ b/cove_project/wsgi.py @@ -11,6 +11,6 @@ from django.core.wsgi import get_wsgi_application -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'cove_project.settings') +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cove_project.settings") application = get_wsgi_application() diff --git a/requirements_dev.in b/requirements_dev.in index 0adbf26..f5fa6ff 100644 --- a/requirements_dev.in +++ b/requirements_dev.in @@ -2,7 +2,9 @@ pytest pytest-django flake8 +black==22.3.0 +isort pytest-localserver selenium transifex-client -pip-tools \ No newline at end of file +pip-tools diff --git a/requirements_dev.txt b/requirements_dev.txt index fcb0703..2bfb09e 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -17,9 +17,7 @@ async-generator==1.10 # trio # trio-websocket async-timeout==4.0.2 - # via - # -r requirements.txt - # redis + # via -r requirements.txt attrs==23.1.0 # via # -r requirements.txt @@ -35,6 +33,8 @@ billiard==3.6.4.0 # via # -r requirements.txt # celery +black==22.3.0 + # via -r requirements_dev.in btrees==5.0 # via # -r requirements.txt @@ -62,6 +62,7 @@ charset-normalizer==3.1.0 click==8.1.3 # via # -r requirements.txt + # black # celery # click-didyoumean # click-plugins @@ -126,6 +127,8 @@ ijson==3.2.0.post0 # libcovebods iniconfig==2.0.0 # via pytest +isort==5.13.2 + # via -r requirements_dev.in jsonref==1.1.0 # via # -r requirements.txt @@ -152,6 +155,8 @@ lxml==4.9.2 # flattentool mccabe==0.7.0 # via flake8 +mypy-extensions==1.0.0 + # via black odfpy==1.4.1 # via # -r requirements.txt @@ -168,6 +173,8 @@ packaging==23.1 # build # libcovebods # pytest +pathspec==0.12.1 + # via black persistent==5.0 # via # -r requirements.txt @@ -175,6 +182,8 @@ persistent==5.0 # zodb pip-tools==6.12.2 # via -r requirements_dev.in +platformdirs==4.3.6 + # via black pluggy==1.0.0 # via pytest prompt-toolkit==3.0.38 From 71b2dae7d101a6ca907ab00b6585aec98136626b Mon Sep 17 00:00:00 2001 From: "Ed (ODSC)" Date: Wed, 8 Jan 2025 12:31:28 +0000 Subject: [PATCH 2/3] .github/workflows: Update from Python 3.7 --- .github/workflows/branch-deploy.yml | 2 +- .github/workflows/branch-destroy.yml | 2 +- .github/workflows/live-deploy.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/branch-deploy.yml b/.github/workflows/branch-deploy.yml index 6bd65a8..8b2eb0d 100644 --- a/.github/workflows/branch-deploy.yml +++ b/.github/workflows/branch-deploy.yml @@ -19,7 +19,7 @@ jobs: - name: Setup python uses: actions/setup-python@v2 with: - python-version: 3.7 + python-version: 3.11 architecture: x64 - run: pip install dokkusd - uses: oNaiPs/secrets-to-env-action@v1 diff --git a/.github/workflows/branch-destroy.yml b/.github/workflows/branch-destroy.yml index 7f4947d..615ef73 100644 --- a/.github/workflows/branch-destroy.yml +++ b/.github/workflows/branch-destroy.yml @@ -16,7 +16,7 @@ jobs: - name: Setup python uses: actions/setup-python@v2 with: - python-version: 3.7 + python-version: 3.11 architecture: x64 - run: pip install dokkusd - run: python -m dokkusd.cli destroy --appname ${{ vars.DOKKU_APP_NAME_PREFIX }}-${{ github.event.ref }} diff --git a/.github/workflows/live-deploy.yml b/.github/workflows/live-deploy.yml index 49f656d..951c3c9 100644 --- a/.github/workflows/live-deploy.yml +++ b/.github/workflows/live-deploy.yml @@ -20,7 +20,7 @@ jobs: - name: Setup python uses: actions/setup-python@v2 with: - python-version: 3.7 + python-version: 3.11 architecture: x64 - run: pip install dokkusd - uses: oNaiPs/secrets-to-env-action@v1 From 3897668e2535dd3604257fcd80120bfc3f7c7c6f Mon Sep 17 00:00:00 2001 From: "Ed (ODSC)" Date: Wed, 8 Jan 2025 12:49:52 +0000 Subject: [PATCH 3/3] cove_bods: Add support for BODS 0.4 data --- README.md | 6 +- cove_bods/process.py | 71 +- .../cove_bods/additional_checks_table.html | 676 ++++++++++++++++-- .../cove_bods/additional_fields_table.html | 84 +++ cove_bods/templates/cove_bods/base.html | 2 +- cove_bods/templates/cove_bods/explore.html | 8 +- .../templates/cove_bods/validation_table.html | 18 +- cove_project/urls.py | 3 +- requirements.in | 10 +- requirements.txt | 158 ++-- requirements_dev.in | 2 +- requirements_dev.txt | 213 +++--- 12 files changed, 1019 insertions(+), 232 deletions(-) create mode 100644 cove_bods/templates/cove_bods/additional_fields_table.html diff --git a/README.md b/README.md index 17ab9aa..c16e968 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,8 @@ -# openownership-cove-bods-alpha +# Open Ownership BODS Cove + +Checks data complies with the Beneficial Ownership Data Standard (BODS) versions 0.1-0.4, +and displays any errors. Also converts uploaded data between JSON and XLSX formats. +Based on: https://github.com/OpenDataServices/cove ## Dev installation diff --git a/cove_bods/process.py b/cove_bods/process.py index 06bfb1a..5009f1d 100644 --- a/cove_bods/process.py +++ b/cove_bods/process.py @@ -5,21 +5,40 @@ import flattentool import libcovebods.data_reader import libcovebods.run_tasks +import pandas from django.core.files.base import ContentFile from django.core.files.storage import default_storage from libcovebods.additionalfields import AdditionalFields from libcovebods.config import LibCoveBODSConfig from libcovebods.jsonschemavalidate import JSONSchemaValidator from libcovebods.schema import SchemaBODS +from libcovebods.schema_dir import schema_registry from libcoveweb2.models import SuppliedData, SuppliedDataFile from libcoveweb2.process.base import ProcessDataTask from libcoveweb2.process.common_tasks.task_with_state import TaskWithState # from libcove.lib.converters import convert_json, convert_spreadsheet from libcoveweb2.utils import get_file_type_for_flatten_tool, group_data_list_by +from packaging import version as packaging_version from sentry_sdk import capture_exception +def check_table_file_new(input_file): + if get_file_type_for_flatten_tool(input_file) == "xlsx": + data = pandas.read_excel(input_file.upload_dir_and_filename()) + if "statementID" in data.head(): + return False + else: + return True + else: + with open(input_file.upload_dir_and_filename()) as file: + head = file.readline() + if "statementID" in head: + return False + else: + return True + + def create_error_file(directory: str, name: str, data: dict): """Create temporary error file""" filename = os.path.join(directory, f"{name}-error.json") @@ -179,14 +198,25 @@ def process(self, process_data: dict) -> dict: # We don't know what schema version the spreadsheet is in. Use default schema. schema = SchemaBODS() + config = LibCoveBODSConfig().config + + if check_table_file_new(supplied_data_json_file): + statement_id_name = "statementID" + schema = config["schema_versions"]["0.2"]["schema_url"] + else: + statement_id_name = "statementId" + schema = schema_registry( + config["schema_versions"][config["schema_latest_version"]]["schema_url"] + ).contents("urn:statement") + unflatten_kwargs = { "output_name": os.path.join(output_dir, "unflattened.json"), "root_list_path": "there-is-no-root-list-path", - "root_id": "statementID", - "id_name": "statementID", + "root_id": statement_id_name, + "id_name": statement_id_name, "root_is_list": True, "input_format": get_file_type_for_flatten_tool(supplied_data_json_file), - "schema": schema.pkg_schema_url, + "schema": schema, } flattentool.unflatten(input_filename, **unflatten_kwargs) @@ -232,12 +262,21 @@ def process(self, process_data: dict) -> dict: process_data["json_data_filename"], sample_mode=process_data["sample_mode"] ) process_data["config"] = LibCoveBODSConfig() - process_data["schema"] = SchemaBODS( - process_data["data_reader"], process_data["config"] - ) + try: + process_data["schema"] = SchemaBODS( + process_data["data_reader"], process_data["config"] + ) + except json.decoder.JSONDecodeError: + raise ValueError("JSON: Data parsing error") # Save some to disk for templates if not os.path.exists(self.data_filename): save_data = {"schema_version_used": process_data["schema"].schema_version} + if packaging_version.parse( + process_data["schema"].schema_version + ) < packaging_version.parse("0.4"): + save_data["record_schema_used"] = False + else: + save_data["record_schema_used"] = True with open(self.data_filename, "w") as fp: json.dump(save_data, fp, indent=4) # return @@ -294,13 +333,27 @@ def process(self, process_data: dict) -> dict: os.makedirs(self.output_dir, exist_ok=True) + if os.path.isdir(process_data["schema"].pkg_schema_url): + schema = schema_registry(process_data["schema"].pkg_schema_url).contents( + "urn:statement" + ) + else: + schema = process_data["schema"].pkg_schema_url + + if packaging_version.parse( + process_data["schema"].schema_version + ) < packaging_version.parse("0.4"): + statement_id_name = "statementID" + else: + statement_id_name = "statementId" + flatten_kwargs = { "output_name": self.output_dir, "root_list_path": "there-is-no-root-list-path", - "root_id": "statementID", - "id_name": "statementID", + "root_id": statement_id_name, + "id_name": statement_id_name, "root_is_list": True, - "schema": process_data["schema"].pkg_schema_url, + "schema": schema, } try: diff --git a/cove_bods/templates/cove_bods/additional_checks_table.html b/cove_bods/templates/cove_bods/additional_checks_table.html index 5931edc..96a42ab 100644 --- a/cove_bods/templates/cove_bods/additional_checks_table.html +++ b/cove_bods/templates/cove_bods/additional_checks_table.html @@ -13,10 +13,10 @@ {% if additional_check.type == 'entity_identifier_scheme_not_known' %} - {% trans 'The statement has an identifier scheme which is not valid.' %} + {% blocktrans %}scheme is not valid. Check the BODS documentation for guidance on identifiers. {% endblocktrans %} - {% trans 'Invalid Scheme' %}: {{ additional_check.scheme }} + scheme: {{ additional_check.scheme }} {{ additional_check.entity_statement }} @@ -25,10 +25,10 @@ {% elif additional_check.type == 'entity_statement_out_of_order' %} - {% trans 'This statement references an entity but that entity is defined after this statement.' %} + {% blocktrans %}Entity statement not in correct order. Check that the Entity statement is placed in the array before any statement referencing it.{% endblocktrans %} - {% trans 'Entity that is out of order' %}: {{ additional_check.entity_statement_out_of_order }} + {% trans 'Entity statement' %}: {{ additional_check.entity_statement_out_of_order }} {{ additional_check.seen_in_ownership_or_control_statement }} @@ -37,10 +37,10 @@ {% elif additional_check.type == 'person_statement_out_of_order' %} - {% trans 'This statement references a person but that person is defined after this statement.' %} + {% blocktrans %}Person statement not in correct order. Check that the Person statement is placed in the array before any statement referencing it.{% endblocktrans %} - {% trans 'Entity that is out of order' %}: {{ additional_check.person_statement_out_of_order }} + {% trans 'Person statement' %}: {{ additional_check.person_statement_out_of_order }} {{ additional_check.seen_in_ownership_or_control_statement }} @@ -49,7 +49,7 @@ {% elif additional_check.type == 'entity_statement_not_used_in_ownership_or_control_statement' %} - {% trans 'This Entity Statement is not used in any ownership or control statements.' %} + {% blocktrans %}Entity statement is not referenced from any Relationship statements. Check whether it should be the subject or interestedParty of a relationship.{% endblocktrans %} @@ -60,7 +60,7 @@ {% elif additional_check.type == 'person_statement_not_used_in_ownership_or_control_statement' %} - {% trans 'This Person Statement is not used in any ownership or control statements.' %} + {% blocktrans %}Person statement is not referenced from any Relationship statements. Check whether it should be the interestedParty of a relationship.{% endblocktrans %} @@ -71,10 +71,10 @@ {% elif additional_check.type == 'entity_statement_missing' %} - {% trans 'This Entity Statement is referenced from an ownership or control statement, but it is missing.' %} + {% blocktrans %}Entity statement is missing. Check whether an Entity statement is incorrectly referenced from interestedParty or subject, or whether an Entity statement is missing.{% endblocktrans %} - {% trans 'Entity that is missing' %}: {{ additional_check.entity_statement_missing }} + {% trans 'Entity statement' %}: {{ additional_check.entity_statement_missing }} {{ additional_check.seen_in_ownership_or_control_statement }} @@ -83,10 +83,10 @@ {% elif additional_check.type == 'person_statement_missing' %} - {% trans 'This Person Statement is referenced from an ownership or control statement, but it is missing.' %} + {% blocktrans %}Person statement is missing. Check whether a Person statement is incorrectly referenced from interestedParty, or whether a Person statement is missing.{% endblocktrans %} - {% trans 'Person that is missing' %}: {{ additional_check.person_statement_missing }} + {% trans 'Person statement' %}: {{ additional_check.person_statement_missing }} {{ additional_check.seen_in_ownership_or_control_statement }} @@ -95,10 +95,10 @@ {% elif additional_check.type == 'duplicate_statement_id' %} - {% trans 'This statement ID has been used more than once.' %} + {% blocktrans %}statementId value used in multiple statements. Different statements should not have the same statementId value.{% endblocktrans %} - {% trans 'Statement ID' %}: {{ additional_check.id }} + statementId: {{ additional_check.id }}   @@ -107,10 +107,10 @@ {% elif additional_check.type == 'person_birth_year_too_early' %} - {% trans 'This Person Statement has a birthday that is to early.' %} + {% blocktrans %}birthDate value is invalid. The year is too far in the past. Check that the date is correct and well formatted.{% endblocktrans %} - {% trans 'Year' %}: {{ additional_check.year }} + birthDate {% trans 'year' %}: {{ additional_check.year }} {{ additional_check.person_statement }} @@ -119,10 +119,10 @@ {% elif additional_check.type == 'person_birth_year_too_late' %} - {% trans 'This Person Statement has a birthday that is to late.' %} + {% blocktrans %}birthDate value is invalid. The date is in the future. Check that the date is correct and well formatted.{% endblocktrans %} - {% trans 'Year' %}: {{ additional_check.year }} + birthDate {% trans 'year' %}: {{ additional_check.year }} {{ additional_check.person_statement }} @@ -131,10 +131,10 @@ {% elif additional_check.type == 'wrong_address_type_used' and additional_check.statement_type == 'entity' %} - {% trans 'This Entity Statement has an address type that is not allowed in entity statements.' %} + {% blocktrans %}type of address is invalid in an Entity statement. Check that the address type is correct.{% endblocktrans %} - {% trans 'Type' %}: {{ additional_check.address_type }} + type: {{ additional_check.address_type }} {{ additional_check.statement }} @@ -143,10 +143,10 @@ {% elif additional_check.type == 'wrong_address_type_used' and additional_check.statement_type == 'person' %} - {% trans 'This Person Statement has an address type that is not allowed in person statements.' %} + {% blocktrans %}type of address is invalid in a Person statement. Check that the address type is correct.{% endblocktrans %} - {% trans 'Type' %}: {{ additional_check.address_type }} + type: {{ additional_check.address_type }} {{ additional_check.statement }} @@ -155,7 +155,7 @@ {% elif additional_check.type == 'alternative_address_with_no_other_address_types' and additional_check.statement_type == 'entity' %} - {% trans 'This Entity Statement has an alternate address but no other addresses.' %} + {% blocktrans %}type of address is 'alternative' when no other addresses are published. Check that the address type is correct.{% endblocktrans %} @@ -167,7 +167,7 @@ {% elif additional_check.type == 'alternative_address_with_no_other_address_types' and additional_check.statement_type == 'person' %} - {% trans 'This Person Statement has an alternate address but no other addresses.' %} + {% blocktrans %}type of address is 'alternative' when no other addresses are published. Check that the address type is correct.{% endblocktrans %} @@ -179,10 +179,10 @@ {% elif additional_check.type == 'component_statement_id_not_in_package' %} - {% trans 'This Ownership-or-control Statement has a component statement that is not in this package.' %} + {% blocktrans %}componentStatementIDs contains a statementID not included in this dataset. Check that this is expected.{% endblocktrans %} - {% trans 'Component Statement ID' %}: {{ additional_check.component_statement_id }} + statementID: {{ additional_check.component_statement_id }} {{ additional_check.seen_in_ownership_or_control_statement }} @@ -191,7 +191,7 @@ {% elif additional_check.type == 'ownership_or_control_statement_has_is_compontent_and_component_statement_ids' %} - {% trans 'An Ownership-or-control Statement cannot both be a component statement (isComponent) and have component statements (componentStatementIDs).' %} + {% blocktrans %}Ownership-or-control statement has an isComponent value ('true') incompatible with having its own components in componentStatementIDs.{% endblocktrans %} @@ -202,7 +202,7 @@ {% elif additional_check.type == 'statement_is_component_but_not_used_in_component_statement_ids' and additional_check.statement_type == 'person' %} - {% trans 'This Person Statement is a component (isComponent) but no primary Ownership-or-control Statement references it (from componentStatementIDs)' %} + {% blocktrans %}Person statement has an isComponent value of 'true' but does not appear in any componentStatementIDs list. Check that this is expected.{% endblocktrans %} @@ -213,7 +213,7 @@ {% elif additional_check.type == 'statement_is_component_but_not_used_in_component_statement_ids' and additional_check.statement_type == 'entity' %} - {% trans 'This Entity Statement is a component (isComponent) but no primary Ownership-or-control Statement references it (from componentStatementIDs)' %} + {% blocktrans %}Entity statement has an isComponent value of 'true' but does not appear in any componentStatementIDs list. Check that this is expected.{% endblocktrans %} @@ -224,7 +224,7 @@ {% elif additional_check.type == 'statement_is_component_but_not_used_in_component_statement_ids' and additional_check.statement_type == 'ownership_or_control' %} - {% trans 'This Ownership-or-control Statement is a component (isComponent) but no primary Ownership-or-control Statement references it (from componentStatementIDs)' %} + {% blocktrans %}Ownership-or-control statement has an isComponent value of 'true' but does not appear in any componentStatementIDs list. Check that this is expected.{% endblocktrans %} @@ -235,7 +235,7 @@ {% elif additional_check.type == 'statement_is_component_but_is_after_use_in_component_statement_id' and additional_check.statement_type == 'person' %} - {% blocktrans %}This Person Statement is a component (isComponent) and should appear before the primary Ownership-or-control Statement that references it (from componentStatementIDs).{%endblocktrans%} + {% blocktrans %}Person statement not in the correct position. As a component (isComponent 'true'), it must appear before the primary Ownership-or-control statement that references it (from componentStatementIDs @@ -246,7 +246,7 @@ {% elif additional_check.type == 'statement_is_component_but_is_after_use_in_component_statement_id' and additional_check.statement_type == 'entity' %} - {% blocktrans %}This Entity Statement is a component (isComponent) and should appear before the primary Ownership-or-control Statement that references it (from componentStatementIDs).{%endblocktrans%} + {% blocktrans %}Entity statement not in the correct position. As a component (isComponent 'true'), it must appear before the primary Ownership-or-control statement that references it (from componentStatementIDs @@ -257,7 +257,7 @@ {% elif additional_check.type == 'statement_is_component_but_is_after_use_in_component_statement_id' and additional_check.statement_type == 'ownership_or_control' %} - {% blocktrans %}This Ownership-or-control Statement is a component (isComponent) and should appear before the primary Ownership-or-control Statement that references it (from componentStatementIDs).{%endblocktrans%} + {% blocktrans %}Ownership-or-control statement not in correct order. As a component (isComponent 'true'), it must appear before the primary Ownership-or-control statement that references it (from componentStatementIDs @@ -268,10 +268,10 @@ {% elif additional_check.type == 'inconsistent_schema_version_used' and additional_check.statement_type == 'person' %} - {% blocktrans %}This Person Statement and the first statement of the submitted data reference different BODS versions.{%endblocktrans%} + {% blocktrans %}bodsVersion is different than that in the first statement of the dataset. Check that the schema versions are compatible.{% endblocktrans %} - {% trans 'Schema Version Used' %}: {{ additional_check.schema_version }} + bodsVersion: {{ additional_check.schema_version }} {{ additional_check.statement }} @@ -280,7 +280,7 @@ {% elif additional_check.type == 'inconsistent_schema_version_used' and additional_check.statement_type == 'entity' %} - {% blocktrans %}This Entity Statement and the first statement of the submitted data reference different BODS versions.{%endblocktrans%} + {% blocktrans %}bodsVersion is different than that in the first statement of the dataset. Check that the schema versions are compatible.{% endblocktrans %} {% trans 'Schema Version Used' %}: {{ additional_check.schema_version }} @@ -292,7 +292,7 @@ {% elif additional_check.type == 'inconsistent_schema_version_used' and additional_check.statement_type == 'ownership_or_control' %} - {% blocktrans %}This Ownership-or-control Statement and the first statement of the submitted data reference different BODS versions.{%endblocktrans%} + {% blocktrans %}bodsVersion is different than that in the first statement of the dataset. Check that the schema versions are compatible.{% endblocktrans %} {% trans 'Schema Version Used' %}: {{ additional_check.schema_version }} @@ -304,10 +304,10 @@ {% elif additional_check.type == 'unknown_schema_version_used' %} - {% blocktrans %}This data attempted to use a schema version that was not recognised.{%endblocktrans%} + {% blocktrans %}bodsVersion not valid. Check that the value is correctly formatted.{% endblocktrans %} - {{ additional_check.schema_version }} + bodsVersion: {{ additional_check.schema_version }} @@ -315,7 +315,7 @@ {% elif additional_check.type == 'statement_is_beneficialOwnershipOrControl_but_no_person_specified' %} - {% blocktrans %}This Ownership-or-control Statement contains interests where beneficialOwnershipOrControl is true. Therefore interestedParty must reference a Person Statement.{%endblocktrans%} + {% blocktrans %}Ownership-or-control statement asserts beneficialOwnershipOrControl is 'true' but interestedParty does not reference a Person statement. Check that information is correctly represented.{% endblocktrans %} @@ -326,7 +326,7 @@ {% elif additional_check.type == 'statement_entity_type_and_entity_sub_type_do_not_align' %} - {% blocktrans %}The specified entitySubtype is not valid for the specified entityType.{%endblocktrans%} + {% blocktrans %}entitySubtype is not valid for the specified entityType.{% endblocktrans %} @@ -337,7 +337,7 @@ {% elif additional_check.type == 'has_public_listing_information_but_has_public_listing_is_false' %} - {% blocktrans %}This Entity Statement hasPublicListing that does not exist or is false. Information has been provided under companyFilingsURLs or securitiesListings so hasPublicListing must be true.{%endblocktrans%} + {% blocktrans %}hasPublicListing has incorrect value. Value of companyFilingsURLs or securitiesListings suggests hasPublicListing must be 'true'.{% endblocktrans %} @@ -348,7 +348,7 @@ {% elif additional_check.type == 'entity_security_listing_market_identifier_code_set_but_not_operating_market_identifier_code' %} - {% blocktrans %}This Entity Statement has a security listing where marketIdentifierCode is set but operatingMarketIdentifierCode is not set.{%endblocktrans%} + {% blocktrans %}operatingMarketIdentifierCode should be set alongside marketIdentifierCode.{% endblocktrans %} @@ -359,7 +359,7 @@ {% elif additional_check.type == 'entity_security_listing_operating_market_identifier_code_set_but_not_market_identifier_code' %} - {% blocktrans %}This Entity Statement has a security listing where operatingMarketIdentifierCode is set but marketIdentifierCode is not set.{%endblocktrans%} + {% blocktrans %}marketIdentifierCode should be set alongside operatingMarketIdentifierCode.{% endblocktrans %} @@ -371,7 +371,7 @@ {# Currently this applies to 0.2 only #} - {% blocktrans %}This Person Statement has some PEP details without missing info but their PEP status has not been declared as True.{%endblocktrans%} + {% blocktrans %}hasPepStatus has incorrect value. pepStatusDetails are substantive, suggesting that hasPepStatus should be 'true'.{% endblocktrans %} @@ -383,7 +383,7 @@ {# Currently this applies to 0.3+ only #} - {% blocktrans %}This Person Statement has some PEP details but their PEP status is missing or has been declared as 'isNotPep'.{%endblocktrans%} + {% blocktrans %}politicalExposure.status has incorrect value. politicalExposure.details are substantive, suggesting that politicalExposure.status should be 'isPep'.{% endblocktrans %} @@ -396,9 +396,9 @@ {% if schema_version_used == '0.2' %} - {% blocktrans %}This Person Statement has some PEP details with missing info but their status has been declared as True.{%endblocktrans%} + {% blocktrans %}hasPepStatus has incorrect value. pepStatusDetails contains missingInfoReason, suggesting that hasPepStatus should have no value.{% endblocktrans %} {% else %} - {% blocktrans %}This Person Statement has a missingInfoReason for PEP status details, so PEP status should be declared as 'unknown'.{%endblocktrans%} + {% blocktrans %}politicalExposure.status has incorrect value. politicalExposure.details contains missingInfoReason, suggesting that status should be 'unknown'.{% endblocktrans %} {% endif %} @@ -407,78 +407,634 @@ {{ additional_check.statement }} + {% elif additional_check.type == "statement_annotation_creation_date_is_future_date" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}creationDate of an annotation is in the future. Check that dates are correctly generated and well formatted.{% endblocktrans %} + + + creationDate: {{ additional_check.creation_date }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_publication_date_is_future_date" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}publicationDate is in the future. Check that dates are correctly generated and well formatted.{% endblocktrans %} + + + publicationDate: {{ additional_check.publication_date }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_entity_is_component_not_in_component_details" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}Entity statement has an isComponent value of 'true' but does not appear in the componentRecords list of a later Relationship statement in the dataset. Check that component records are correctly listed and that statements are in the correct order.{% endblocktrans %} + + + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_person_is_component_not_in_component_details" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}Person statement has an isComponent value of 'true' but appears in the componentRecords list of no later Relationship statement in the dataset. Check that component records are correctly listed and that statements are in the correct order.{% endblocktrans %} + + + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_relationship_is_component_not_in_component_details" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}Relationship statement has an isComponent value of 'true' but does not appear in the componentRecords list of a later Relationship statement in the dataset. Check that component records are correctly listed and that statements are in the correct order.{% endblocktrans %} + + + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_source_retrieved_at_future_date" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}retrievedAt is in the future. Check that dates are correctly generated and well formatted.{% endblocktrans %} + + + retrievedAt: {{ additional_check.retrieval_date }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_date_is_future_date" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}statementDate is in the future. Check that dates are correctly generated and well formatted.{% endblocktrans %} + + + statementDate: {{ additional_check.statement_date }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_person_birth_date_in_future" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}birthDate is in the future. Check that dates are correctly generated and well formatted.{% endblocktrans %} + + + birthDate: {{ additional_check.birth_date }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_person_birth_date_too_far_in_past" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}birthDate is before 1800. Check that dates are correctly generated and well formatted.{% endblocktrans %} + + + birthDate: {{ additional_check.birth_date }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_entity_dissolution_before_founding_date" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}foundingDate is later than dissolutionDate. Check that dates are correctly generated and well formatted.{% endblocktrans %} + + + foundingDate: {{ additional_check.founding_date }}
+ dissolutionDate: {{ additional_check.dissolution_date }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_person_death_date_not_sensible_value" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}deathDate is incorrect. Check that the date is not before the birthDate,in the future or more than 120 years after birth date.{% endblocktrans %} + + + deathDate: {{ additional_check.death_date }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_relationship_interests_start_after_end_date" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}startDate is later than endDate in an Interest. Check that dates are correctly generated and well formatted.{% endblocktrans %} + + + startDate: {{ additional_check.start_date }}
+ endDate: {{ additional_check.end_date }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_entity_securities_listings_haspubliclisting_is_false" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}hasPublicListing has incorrect value. Value of securitiesListings suggests hasPublicListing must be 'true'.{% endblocktrans %} + + + securitiesListings: {{ additional_check.securities_listings }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_relationship_interests_exact_has_min_max" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}share.exact is provided, alongside a range (minimum and maximum values). Provide either an exact value, or a range.{% endblocktrans %} + + + share.exact: {{ additional_check.share_exact }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_relationship_interests_not_exact_max_greater_than_min" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}share is an invalid range. The maximum value is less than the minimum value.{% endblocktrans %} + + + share {% trans '(exclusive) minimum' %}: {{ additional_check.minval }}
+ share {% trans '(exclusive) maximum' %}: {{ additional_check.maxval }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_relationship_interests_exact_max_equals_min" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}share is an invalid range. The maximum value is the same as the minimum value.{% endblocktrans %} + + + share {% trans '(exclusive) minimum' %}: {{ additional_check.minval }}
+ share {% trans '(exclusive) maximum' %}: {{ additional_check.maxval }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_relationship_interests_share_min_and_exclusivemin" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}share is an invalid range. Only one of minimum and exclusiveMinimum must be provided.{% endblocktrans %} + + + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_relationship_interests_share_max_and_exclusivemax" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}share is an invalid range. Only one of maximum and exclusiveMaximum must be provided.{% endblocktrans %} + + + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_declaration_subject_not_exist" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}declarationSubject does not appear in the dataset. Check that the dataset is complete, or that this is expected.{% endblocktrans %} + + + declarationSubject: {{ additional_check.declaration_subject }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_declaration_subject_not_entity_person" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}declarationSubject must reference an entity or person. Check that recordId values are being correctly generated and used.{% endblocktrans %} + + + declarationSubject: {{ additional_check.record_id }}
+ recordType of {{ additional_check.record_id }}: {{ additional_check.record_type }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "multiple_statements_in_series_with_record_status_new" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}recordStatus reported as 'new' in multiple Statements for a single Record. Check that recordStatus is ‘new’ only the first time a Statement is published for the record.{% endblocktrans %} + + + recordId: {{ additional_check.record_id }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_with_record_status_new_must_be_first" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}statementDate is too early. The recordStatus is 'updated' or 'closed' but statementDate is earlier than that of the corresponding 'new' Statement.{% endblocktrans %} + + + recordId: {{ additional_check.record_id }}
+ statementId: {{ additional_check.statement_id }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "multiple_statements_in_series_with_record_status_closed" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}recordStatus reported as 'closed' in multiple Statements for a single Record. Check that recordStatus is 'closed' only the final time a Statement is published for a record.{% endblocktrans %} + + + recordId: {{ additional_check.record_id }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statement_with_record_status_closed_must_be_last" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}statementDate is too late. recordStatus is 'new' or 'updated' but statementDate is later than that of the corresponding 'closed' Statement.{% endblocktrans %} + + + recordId:: {{ additional_check.record_id }}
+ statementId:: {{ additional_check.statement_id }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "statements_in_series_with_different_record_types" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}recordType varies across Statements for the same record. Check that Statements relating to the same record all have the same type.{%endblocktrans%} + + + recordId: {{ additional_check.record_id }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "component_record_is_statement_id" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}componentRecords contains a statement ID value. componentRecords entries must be record IDs.{% endblocktrans %} + + + componentRecords: {{ additional_check.component_id }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "component_record_id_not_in_dataset" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}componentRecords contains a recordId not included in this dataset. Check that this is expected.{% endblocktrans %} + + + componentRecords: {{ additional_check.component_id }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "subject_must_be_record_id" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}subject has unexpected value. subject must be either a record ID for a Statement in the dataset or an Unspecified Record object.{% endblocktrans %} + + + subject: {{ additional_check.subject }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "subject_can_only_refer_to_entity" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}subject must be the recordId of an entity (not a person or relationship). Check that recordId values are being correctly generated and used.{% endblocktrans %} + + + subject: {{ additional_check.subject }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "interested_party_must_be_record_id" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}interestedParty not recognised. Check that the value matches a recordId in the dataset or is an Unspecified Record object.{% endblocktrans %} + + + interestedParty: {{ additional_check.interested_party }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "interested_party_can_only_refer_to_entity_or_person" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}interestedParty is invalid. The value should be a recordId for a person or an entity in the dataset (not a relationship).{% endblocktrans %} + + + interestedParty: {{ additional_check.interested_party }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "interest_beneficial_ownership_interested_party_not_person" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}beneficialOwnershipOrControl is 'true' but interestedParty is not a person. Check that the interested party is correct and that beneficialOwnershipOrControl is used correctly.{% endblocktrans %} + + + interestedParty: {{ additional_check.interested_party }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "annotation_statement_pointer_target_invalid" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}statementPointerTarget is invalid. Check that it is a valid JSON pointer and that it points to an existing field in the Statement.{% endblocktrans %} + + + statementPointerTarget: {{ additional_check.pointer }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "relationship_interests_subject_should_be_entity_nomination_arrangement" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}subject has unexpected type. Interests in this Relationship statement suggest that the subject's entityType.subtype should be 'nomination'.{% endblocktrans %} + + + entityType.subtype: {{ additional_check.subject_record_subtype }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "relationship_interests_subject_should_be_entity_trust" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}subject has unexpected type. Interests in this Relationship statement suggest that the subject's entityType.subtype should be 'trust'.{% endblocktrans %} + + + entityType.subtype: {{ additional_check.subject_record_subtype }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "relationship_subject_not_before_relationship_in_dataset" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}subject not found. The subject must match the recordId of at least one prior Statement in the dataset. Check that Statements are ordered correctly.{% endblocktrans %} + + + subject: {{ additional_check.subject_id }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "relationship_interested_party_not_before_relationship_in_dataset" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}interestedParty not found. The interested party must match the recordId of at least one prior Statement in the dataset. Check that Statements are ordered correctly.{% endblocktrans %} + + + interestedParty: {{ additional_check.interested_party_id }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "person_identifiers_invalid_composition" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}scheme has incorrect formatting. Check the field description for guidance.{% endblocktrans %} + + + scheme: {{ additional_check.scheme }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "person_identifiers_no_valid_iso_3166_1_alpha_3_code" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}scheme contains an unrecognised jurisdiction. An ISO 3166-1 3-digit country code is expected. Check the field description for guidance.{% endblocktrans %} + + + scheme: {{ additional_check.scheme }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "person_identifiers_not_passport_taxid_idcard" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}scheme has unrecognised type. 'PASSPORT', 'TAXID' or 'IDCARD' is expected. Check the field description for guidance.{% endblocktrans %} + + + scheme: {{ additional_check.scheme }} + + + {{ additional_check.statement }} + + + {% elif additional_check.type == "entity_identifiers_not_known_scheme" %} + {# Currently this applies to 0.4+ #} + + + {% blocktrans %}scheme is unrecognised. A code from org-id.guide is expected. Check the field description for guidance.{% endblocktrans %} + + + scheme: {{ additional_check.scheme }} + + + {{ additional_check.statement }} + + + {% endif %} {% endfor %} {% if not statistics.count_ownership_or_control_statement_with_at_least_one_interest_beneficial %} - {% blocktrans %}No individuals are disclosed as beneficial owners. beneficialOwnershipOrControl must be set to true within an Interest object to indicate that the interested party is a beneficial owner.{%endblocktrans%} + {% blocktrans %}beneficialOwnershipOrControl expected to be 'true' in at least one Relationship statement with a person as an interested party. If this dataset contains beneficial owners, check that beneficialOwnershipOrControl is correctly used.{% endblocktrans %} - {% blocktrans %}All Ownership-or-control statements{%endblocktrans%} + {% blocktrans %}All Relationship statements.{% endblocktrans %} {% endif %} {% for check_not_run_in_sample_mode in checks_not_run_in_sample_mode %} {% if check_not_run_in_sample_mode == "entity_statement_missing" %} - {% trans 'This Entity Statement is referenced from an ownership or control statement, but it is missing.' %} + + {% blocktrans %}Entity statement is missing. Check whether an Entity statement is incorrectly referenced from interestedParty or subject, or whether an Entity statement is missing.{% endblocktrans %} + This check is not carried out in Sample mode. {% elif check_not_run_in_sample_mode == "person_statement_out_of_order" %} - {% trans 'This statement references a person but that person is defined after this statement.' %} + + {% blocktrans %}Person statement not in correct order. Check that the Person statement is placed in the array before any statement referencing it.{% endblocktrans %} + This check is not carried out in Sample mode. {% elif check_not_run_in_sample_mode == "statement_is_component_but_is_after_use_in_component_statement_id" %} - {% trans 'This Statement is a component (isComponent) and should appear before the primary Ownership-or-control Statement that references it (from componentStatementIDs).' %} + + {% blocktrans %}Ownership-or-control statement not in correct order. As a component (isComponent 'true'), it must appear before the primary Ownership-or-control statement that references it (from componentStatementIDs This check is not carried out in Sample mode. {% elif check_not_run_in_sample_mode == "statement_is_component_but_not_used_in_component_statement_ids" %} - {% trans 'This Statement is a component (isComponent) but no primary Ownership-or-control Statement references it (from componentStatementIDs)' %} + + {% blocktrans %}Ownership-or-control statement has an isComponent value of 'true' but appears in no componentStatementIDs list. Check that this is expected.{% endblocktrans %} + This check is not carried out in Sample mode. {% elif check_not_run_in_sample_mode == "person_statement_missing" %} - {% trans 'This Person Statement is referenced from an ownership or control statement, but it is missing.' %} + + {% blocktrans %}Person statement is missing. Check whether a Person statement is incorrectly referenced from interestedParty, or whether a Person statement is missing.{% endblocktrans %} + This check is not carried out in Sample mode. {% elif check_not_run_in_sample_mode == "person_statement_not_used_in_ownership_or_control_statement" %} - {% trans 'This Person Statement is not used in any ownership or control statements.' %} + + {% blocktrans %}Person statement is not referenced from any Relationship statements. Check whether it should be the interestedParty of a relationship.{% endblocktrans %} + This check is not carried out in Sample mode. {% elif check_not_run_in_sample_mode == "entity_statement_not_used_in_ownership_or_control_statement" %} - {% trans 'This Entity Statement is not used in any ownership or control statements.' %} + + {% blocktrans %}Entity statement is not referenced from any Relationship statements. Check whether it should be the subject or interestedParty of a relationship.{% endblocktrans %} + This check is not carried out in Sample mode. {% elif check_not_run_in_sample_mode == "duplicate_statement_id" %} - {% trans 'This statement ID has been used more than once.' %} + + {% blocktrans %}statementId value used in multiple statements. Different statements should not have the same statementId value.{% endblocktrans %} + This check is not carried out in Sample mode. {% elif check_not_run_in_sample_mode == "entity_statement_out_of_order" %} - {% trans 'This statement references an entity but that entity is defined after this statement.' %} + + {% blocktrans %}Entity statement not in correct order. Check that the Entity statement is placed in the array before any statement referencing it.{% endblocktrans %} + This check is not carried out in Sample mode. {% elif check_not_run_in_sample_mode == "component_statement_id_not_in_package" %} - {% trans 'This Ownership-or-control Statement has a component statement that is not in this package.' %} + + {% blocktrans %}componentStatementIDs contains a statementID not included in this dataset. Check that this is expected.{% endblocktrans %} + This check is not carried out in Sample mode. diff --git a/cove_bods/templates/cove_bods/additional_fields_table.html b/cove_bods/templates/cove_bods/additional_fields_table.html new file mode 100644 index 0000000..849b890 --- /dev/null +++ b/cove_bods/templates/cove_bods/additional_fields_table.html @@ -0,0 +1,84 @@ +{% load i18n %} + + + + + + + + + + + + + {% for full_path, info in additional_fields.items %} + {% if info.root_additional_field %} + + + + + + + + {% endif %} + {% endfor %} + +
{% trans 'Field Name' %}{% trans 'Field Path' %}{% trans 'Usage Count' %}{% trans 'First 3 Values' %}{% trans 'Child Fields' %}
+ {{ info.field_name }} + + {{ full_path }} + + {{ info.count }} + +
    + {% for example in info.examples|slice:":3" %} +
  • + {{ example }} +
  • + {% endfor %} +
+
+ {% if info.additional_field_descendance %} + {{info.additional_field_descendance|length}} + {% trans "(See child fields)" %} + {% endif %} +
+ +{% for parent_full_path, parent_info in additional_fields.items %} + {% if parent_info.root_additional_field and parent_info.additional_field_descendance %} + + {% endif %} +{% endfor %} + diff --git a/cove_bods/templates/cove_bods/base.html b/cove_bods/templates/cove_bods/base.html index 5b883f2..fcda516 100644 --- a/cove_bods/templates/cove_bods/base.html +++ b/cove_bods/templates/cove_bods/base.html @@ -21,7 +21,7 @@