Skip to content

Commit

Permalink
add in task if to tables the densidade
Browse files Browse the repository at this point in the history
  • Loading branch information
tricktx committed Nov 13, 2024
1 parent f2b7fb9 commit add5326
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 13 deletions.
2 changes: 1 addition & 1 deletion pipelines/utils/crawler_anatel/banda_larga_fixa/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@

new_ano = get_year_and_unzip(day=ano)

update_tables = get_max_date_in_table_microdados(ano=new_ano)
update_tables = get_max_date_in_table_microdados(ano=new_ano, table_id=table_id, upstream_tasks=[new_ano])

get_max_date = check_if_data_is_outdated(
dataset_id = dataset_id,
Expand Down
36 changes: 27 additions & 9 deletions pipelines/utils/crawler_anatel/banda_larga_fixa/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,39 @@ def join_tables_in_function(table_id: str, ano):
max_retries=constants.TASK_MAX_RETRIES.value,
retry_delay=timedelta(seconds=constants.TASK_RETRY_DELAY.value),
)
def get_max_date_in_table_microdados(ano: int):
log("Obtendo a data máxima do arquivo microdados da Anatel")
df = pd.read_csv(
f"{anatel_constants.INPUT_PATH.value}Acessos_Banda_Larga_Fixa_{ano}.csv",
def get_max_date_in_table_microdados(table_id: str, ano: int) -> pd.datetime:
if table_id == "microdados":
log("Obtendo a data máxima do arquivo microdados da Anatel")
df = pd.read_csv(
f"{anatel_constants.INPUT_PATH.value}Acessos_Banda_Larga_Fixa_{ano}.csv",
sep=";",
encoding="utf-8",
dtype=str
)
df['data'] = df['Ano'] + '-' + df['Mês']

df['data'] = pd.to_datetime(df['data'], format="%Y-%m")

log(df['data'].max())

return df['data'].max()

else:
log(f"{anatel_constants.INPUT_PATH.value}Densidade_Telefonia_Movel.csv")

df = pd.read_csv(
f"{anatel_constants.INPUT_PATH.value}Densidade_Telefonia_Movel.csv",
sep=";",
encoding="utf-8",
dtype=str
)
df['data'] = df['Ano'] + '-' + df['Mês']
)
df['data'] = df['Ano'] + '-' + df['Mês']

df['data'] = pd.to_datetime(df['data'], format="%Y-%m")
df['data'] = pd.to_datetime(df['data'], format="%Y-%m")

log(df['data'].max())
log(df['data'].max())

return df['data'].max()
return df['data'].max()


@task
Expand Down
2 changes: 1 addition & 1 deletion pipelines/utils/crawler_anatel/telefonia_movel/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
new_semester = get_semester(semestre, upstream_tasks=[new_year])

update_tables = get_max_date_in_table_microdados(
ano=new_year, semestre=new_semester, upstream_tasks=[new_year, new_semester]
table_id = table_id, ano=new_year, semestre=new_semester, upstream_tasks=[new_year, new_semester]
)

get_max_date = check_if_data_is_outdated(
Expand Down
23 changes: 21 additions & 2 deletions pipelines/utils/crawler_anatel/telefonia_movel/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ def join_tables_in_function(table_id, semestre, ano):
max_retries=constants.TASK_MAX_RETRIES.value,
retry_delay=timedelta(seconds=constants.TASK_RETRY_DELAY.value),
)
def get_max_date_in_table_microdados(ano, semestre):
def get_max_date_in_table_microdados(table_id, ano, semestre):

if table_id == 'microdados':
log("Obtendo a data máxima da tabela microdados...")
log(
f"{anatel_constants.INPUT_PATH.value}Acessos_Telefonia_Movel_{ano}_{semestre}S.csv"
Expand All @@ -65,7 +67,24 @@ def get_max_date_in_table_microdados(ano, semestre):

log(df['data'].max())

return df['data'].max()
return df['data'].max()

else:
log(f"{anatel_constants.INPUT_PATH.value}Densidade_Telefonia_Movel.csv")

df = pd.read_csv(
f"{anatel_constants.INPUT_PATH.value}Densidade_Telefonia_Movel.csv",
sep=";",
encoding="utf-8",
dtype=str
)
df['data'] = df['Ano'] + '-' + df['Mês']

df['data'] = pd.to_datetime(df['data'], format="%Y-%m")

log(df['data'].max())

return df['data'].max()


@task(
Expand Down

0 comments on commit add5326

Please sign in to comment.