Skip to content

Commit

Permalink
[infra] python-v1.6.2 (#1089)
Browse files Browse the repository at this point in the history
* [infra] fix dataset_config.yaml folder path (#1067)

* feat(infra) merge master

* [infra] conform Metadata to new metadata changes (#1093)

* [dados-bot] br_ms_vacinacao_covid19 (2022-01-23) (#1086)

Co-authored-by: terminal_name <github_email>

* [dados] br_bd_diretorios_brasil.etnia_indigena (#1087)

* Sobe diretorio etnia_indigena

* Update table_config.yaml

* Update table_config.yaml

* feat: conform Metadata's schema to new one

* fix: conform yaml generation to new schema

* fix: delete test_dataset folder

Co-authored-by: Lucas Moreira <[email protected]>
Co-authored-by: Gustavo Aires Tiago <[email protected]>

Co-authored-by: Ricardo Dahis <[email protected]>
Co-authored-by: Lucas Moreira <[email protected]>
Co-authored-by: Gustavo Aires Tiago <[email protected]>
  • Loading branch information
4 people authored Feb 12, 2022
1 parent 25ba135 commit d5577ae
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 254 deletions.
3 changes: 2 additions & 1 deletion bases/br_bd_diretorios_brasil/dataset_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,5 @@ github_url:

# Não altere esse campo.
# Data da última modificação dos metadados gerada automaticamente pelo CKAN.
metadata_modified: '2022-02-09T21:59:32.440801'

metadata_modified: '2022-02-09T21:59:32.440801'
7 changes: 0 additions & 7 deletions bases/test_dataset/README.md

This file was deleted.

42 changes: 0 additions & 42 deletions bases/test_dataset/dataset_config.yaml

This file was deleted.

26 changes: 0 additions & 26 deletions bases/test_dataset/test_table/publish.sql

This file was deleted.

1 change: 0 additions & 1 deletion bases/test_dataset/test_table/schema-staging.json

This file was deleted.

160 changes: 0 additions & 160 deletions bases/test_dataset/test_table/table_config.yaml

This file was deleted.

33 changes: 16 additions & 17 deletions python-package/basedosdados/upload/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ def ckan_data_dict(self) -> dict:
"private": ckan_dataset.get("private") or False,
"owner_org": self.owner_org,
"resources": ckan_dataset.get("resources", [])
or [{"resource_type": "external_link", "name": ""}],
or [{"resource_type": "external_link", "name": ""}]
or [{"resource_type": "information_request", "name": ""}],
"groups": [
{"name": group} for group in self.local_metadata.get("groups", []) or []
],
Expand All @@ -133,6 +134,7 @@ def ckan_data_dict(self) -> dict:
{
"key": "dataset_args",
"value": {
"short_description": self.local_metadata.get("short_description"),
"description": self.local_metadata.get("description"),
"ckan_url": self.local_metadata.get("ckan_url"),
"github_url": self.local_metadata.get("github_url"),
Expand All @@ -154,30 +156,27 @@ def ckan_data_dict(self) -> dict:
"spatial_coverage": self.local_metadata.get("spatial_coverage"),
"temporal_coverage": self.local_metadata.get("temporal_coverage"),
"update_frequency": self.local_metadata.get("update_frequency"),
"entity": self.local_metadata.get("entity"),
"time_unit": self.local_metadata.get("time_unit"),
"identifying_columns": self.local_metadata.get(
"identifying_columns"
),
"observation_level": self.local_metadata.get("observation_level"),
"last_updated": self.local_metadata.get("last_updated"),
"version": self.local_metadata.get("version"),
"published_by": self.local_metadata.get("published_by"),
"data_cleaned_by": self.local_metadata.get("data_cleaned_by"),
"data_cleaning_description": self.local_metadata.get(
"data_cleaning_description"
),
"data_cleaning_code_url": self.local_metadata.get("data_cleaning_code_url"),
"partner_organization": self.local_metadata.get("partner_organization"),
"raw_files_url": self.local_metadata.get("raw_files_url"),
"auxiliary_files_url": self.local_metadata.get(
"auxiliary_files_url"
),
"architecture_url": self.local_metadata.get("architecture_url"),
"covered_by_dictionary": self.local_metadata.get(
"covered_by_dictionary"
),
"source_bucket_name": self.local_metadata.get("source_bucket_name"),
"project_id_prod": self.local_metadata.get("project_id_prod"),
"project_id_staging": self.local_metadata.get("project_id_staging"),
"partitions": self.local_metadata.get("partitions"),
"bdm_file_size": self.local_metadata.get("bdm_file_size"),
"uncompressed_file_size": self.local_metadata.get("uncompressed_file_size"),
"compressed_file_size": self.local_metadata.get("compressed_file_size"),
"columns": self.local_metadata.get("columns"),
"metadata_modified": self.local_metadata.get("metadata_modified"),
"package_id": ckan_dataset.get("id"),
Expand Down Expand Up @@ -208,7 +207,7 @@ def metadata_schema(self) -> dict:

dataset_url = f"{self.CKAN_URL}/api/3/action/bd_dataset_schema"
dataset_schema = requests.get(dataset_url).json().get("result")

return dataset_schema

def exists_in_ckan(self) -> bool:
Expand Down Expand Up @@ -324,7 +323,7 @@ def create(
# if `dataset_config.yaml` doesn't exist but user wants to create
# it alongside `table_config.yaml`
dataset_config_exists = (
self.metadata_path / "dataset_config.yaml"
self.metadata_path / self.dataset_id / "dataset_config.yaml"
).exists()
if self.table_id and not table_only and not dataset_config_exists:
self.dataset_metadata_obj.create(if_exists=if_exists)
Expand All @@ -348,7 +347,7 @@ def validate(self) -> bool:

ckan = RemoteCKAN(self.CKAN_URL, user_agent="", apikey=None)
response = ckan.action.bd_dataset_validate(**self.ckan_data_dict)

if response.get("errors"):
error = {self.ckan_data_dict.get("name"): response["errors"]}
message = f"{self.filepath} has validation errors: {error}"
Expand Down Expand Up @@ -414,14 +413,14 @@ def publish(
self.validate()

assert self.is_updated(), (
f"Could not publish metadata due to out of date config file. "
f"Could not publish metadata due to out-of-date config file. "
f"Please run `basedosdados metadata create {self.dataset_id} "
f"{self.table_id or ''}` to get the most recently updated met"
f"adata and apply your changes to it."
)

data_dict = self.ckan_data_dict.copy()

if self.table_id:

# publish dataset metadata first if user wants to publish both
Expand Down Expand Up @@ -690,9 +689,9 @@ def build_yaml_object(
if yaml.get("partitions") == "":
yaml["partitions"] = None

# Add dataset_id and table_id
yaml["dataset_id"] = dataset_id
if table_id:
# Add dataset_id and table_id
yaml["dataset_id"] = dataset_id
yaml["table_id"] = table_id

# Add gcloud config variables
Expand Down

0 comments on commit d5577ae

Please sign in to comment.