diff --git a/pipelines/datasets/br_ms_sinan/flows.py b/pipelines/datasets/br_ms_sinan/flows.py index f9e41aa86..93f7357ad 100644 --- a/pipelines/datasets/br_ms_sinan/flows.py +++ b/pipelines/datasets/br_ms_sinan/flows.py @@ -7,7 +7,7 @@ from pipelines.datasets.br_ms_sinan.schedules import ( everyday_sinan_microdados ) - +# Pipeline Sinan br_ms_sinan__microdados_dengue = deepcopy(flow_sinan) br_ms_sinan__microdados_dengue.name = "br_ms_sinan.microdados_dengue" br_ms_sinan__microdados_dengue.code_owners = ["tricktx"] diff --git a/pipelines/datasets/br_ms_sinan/schedules.py b/pipelines/datasets/br_ms_sinan/schedules.py index faf3afadc..93aa1491b 100644 --- a/pipelines/datasets/br_ms_sinan/schedules.py +++ b/pipelines/datasets/br_ms_sinan/schedules.py @@ -1,12 +1,11 @@ # -*- coding: utf-8 -*- -""" -Schedules for br_ms_sinan -""" from datetime import datetime from prefect.schedules import Schedule, adjustments, filters from prefect.schedules.clocks import CronClock from pipelines.constants import constants - +""" +Schedules for br_ms_sinan +""" everyday_sinan_microdados = Schedule( clocks=[ CronClock( @@ -27,4 +26,4 @@ ], filters=[filters.is_weekday], adjustments=[adjustments.next_weekday], -) \ No newline at end of file +) diff --git a/pipelines/utils/crawler_datasus/flows.py b/pipelines/utils/crawler_datasus/flows.py index 9c6515302..6ea356cf5 100644 --- a/pipelines/utils/crawler_datasus/flows.py +++ b/pipelines/utils/crawler_datasus/flows.py @@ -372,7 +372,6 @@ with Flow(name="DATASUS-SINAN", code_owners=["trick"]) as flow_sinan: - # Parameters dataset_id = Parameter("dataset_id", default ="br_ms_sinan", required=True) table_id = Parameter("table_id", default="microdados_dengue", required=True) update_metadata = Parameter("update_metadata", default=True, required=False) diff --git a/pipelines/utils/crawler_datasus/tasks.py b/pipelines/utils/crawler_datasus/tasks.py index bd4b8a1e1..2cfec58fe 100644 --- a/pipelines/utils/crawler_datasus/tasks.py +++ b/pipelines/utils/crawler_datasus/tasks.py @@ -44,8 +44,6 @@ from pipelines.utils.metadata.utils import get_api_most_recent_date, get_url from pipelines.utils.utils import log - - @task( max_retries=2, retry_delay=timedelta(seconds=constants.TASK_RETRY_DELAY.value), diff --git a/pipelines/utils/crawler_datasus/utils.py b/pipelines/utils/crawler_datasus/utils.py index 22738d69a..0b2243ec2 100644 --- a/pipelines/utils/crawler_datasus/utils.py +++ b/pipelines/utils/crawler_datasus/utils.py @@ -82,6 +82,7 @@ def dbf_to_parquet(dbf: str, table_id: str, counter: int, chunk_size:int) -> st counter_chunk += 1 if table_id == "microdados_dengue": + log(f'---- post processing {table_id=}') df = pd.read_parquet(parquet_filepath) df = post_process_microdados_dengue(df)