Skip to content

Commit

Permalink
[infra] Feedback messages in upload methods [issue #1059] (#1085)
Browse files Browse the repository at this point in the history
* Creating dataclass config

* Success messages - create and update (table.py) using loguru

* feat: improve log level control

* refa: move logger config to Base.__init__

* Improving log level control

* Adjusting log level control function in base.py

* Fixing repeated 'DELETE' messages everytime Table is replaced.

* Importing 'dataclass' from 'dataclasses' to make config work.

* Fixing repeated 'UPDATE' messages inside other functions.

* Defining a new script message format.

* Definng standard log messages for 'dataset.py' functions

* Definng standard log messages for 'storage.py' functions

* Definng standard log messages for 'table.py' functions

* Definng standard log messages for 'metadata.py' functions

* Adds standard configuration to billing_project_id in download.py

* Configuring billing_project_id in download.py

* Configuring config_path in base.py

Co-authored-by: Guilherme Salustiano <[email protected]>
Co-authored-by: Isadora Bugarin <[email protected]>
  • Loading branch information
3 people authored Mar 14, 2022
1 parent 564e671 commit 05f9621
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 10 deletions.
10 changes: 9 additions & 1 deletion python-package/basedosdados/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
__all__ = ["constants"]
__all__ = ["config", "constants"]

from enum import Enum
from dataclasses import dataclass


@dataclass
class config:
verbose: bool = True
billing_project_id: str = None
project_config_path: str = None


class constants(Enum):
Expand Down
25 changes: 19 additions & 6 deletions python-package/basedosdados/download/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
BaseDosDadosInvalidProjectIDException,
BaseDosDadosNoBillingProjectIDException,
)
from basedosdados.constants import config, constants
from pandas_gbq.gbq import GenericGBQException


Expand Down Expand Up @@ -49,6 +50,10 @@ def read_sql(
Query result
"""

# standard billing_project_id configuration
if billing_project_id is None:
billing_project_id == config.billing_project_id

try:
# Set a two hours timeout
bigquery_storage_v1.client.BigQueryReadClient.read_rows = partialmethod(
Expand Down Expand Up @@ -86,8 +91,8 @@ def read_sql(
def read_table(
dataset_id,
table_id,
query_project_id="basedosdados",
billing_project_id=None,
query_project_id="basedosdados",
limit=None,
from_file=False,
reauth=False,
Expand All @@ -101,10 +106,10 @@ def read_table(
table_id (str): Optional.
Table id available in basedosdados.dataset_id.
It should always come with dataset_id.
query_project_id (str): Optional.
Which project the table lives. You can change this you want to query different projects.
billing_project_id (str): Optional.
Project that will be billed. Find your Project ID here https://console.cloud.google.com/projectselector2/home/dashboard
query_project_id (str): Optional.
Which project the table lives. You can change this you want to query different projects.
limit (int): Optional.
Number of rows to read from table.
from_file (boolean): Optional.
Expand All @@ -122,6 +127,10 @@ def read_table(
Query result
"""

# standard billing_project_id configuration
if billing_project_id is None:
billing_project_id == config.billing_project_id

if (dataset_id is not None) and (table_id is not None):
query = f"""
SELECT *
Expand All @@ -147,8 +156,8 @@ def download(
query=None,
dataset_id=None,
table_id=None,
query_project_id="basedosdados",
billing_project_id=None,
query_project_id="basedosdados",
limit=None,
from_file=False,
reauth=False,
Expand Down Expand Up @@ -180,10 +189,10 @@ def download(
table_id (str): Optional.
Table id available in basedosdados.dataset_id.
It should always come with dataset_id.
query_project_id (str): Optional.
Which project the table lives. You can change this you want to query different projects.
billing_project_id (str): Optional.
Project that will be billed. Find your Project ID here https://console.cloud.google.com/projectselector2/home/dashboard
query_project_id (str): Optional.
Which project the table lives. You can change this you want to query different projects.
limit (int): Optional
Number of rows.
from_file (boolean): Optional.
Expand All @@ -201,6 +210,10 @@ def download(
"Either table_id, dataset_id or query should be filled."
)

# standard billing_project_id configuration
if billing_project_id is None:
billing_project_id == config.billing_project_id

client = google_client(query_project_id, billing_project_id, from_file, reauth)

# makes sure that savepath is a filepath and not a folder
Expand Down
18 changes: 17 additions & 1 deletion python-package/basedosdados/upload/base.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
from google.cloud import bigquery, storage
from google.oauth2 import service_account
from loguru import logger
import yaml
from jinja2 import Template
from pathlib import Path
import shutil
import tomlkit
import warnings
import json
import sys
import base64
from os import getenv
from basedosdados import constants
from basedosdados.constants import config, constants

from functools import lru_cache

Expand All @@ -26,9 +28,14 @@ def __init__(
overwrite_cli_config=False,
):

# standard config_path configuration
if config_path is None:
config_path == config.config_path

self.config_path = Path.home() / config_path
self._init_config(force=overwrite_cli_config)
self.config = self._load_config()
self._config_log(config.verbose)

self.templates = Path(templates or self.config["templates_path"])
self.metadata_path = Path(metadata_path or self.config["metadata_path"])
Expand Down Expand Up @@ -295,6 +302,15 @@ def _init_config(self, force):

config_file.open("w", encoding="utf-8").write(tomlkit.dumps(c_file))

def _config_log(self, verbose: bool):
logger.remove() # remove o default handler
logger_level = "INFO" if verbose else "ERROR"
logger.add(
sys.stderr,
level=logger_level,
format="<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
)

def _load_config(self):

if getenv(constants.ENV_CONFIG.value):
Expand Down
31 changes: 31 additions & 0 deletions python-package/basedosdados/upload/dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pathlib import Path
from google.cloud import bigquery
from loguru import logger

from google.api_core.exceptions import Conflict

Expand Down Expand Up @@ -142,6 +143,13 @@ def publicize(self, mode="all", dataset_is_public=True):
dataset.access_entries = entries

m["client"].update_dataset(dataset, ["access_entries"])
logger.success(
" {object} {object_id}_{mode} was {action}!",
object_id=self.dataset_id,
mode=mode,
object="Dataset",
action="publicized",
)

def create(
self, mode="all", if_exists="raise", dataset_is_public=True, location=None
Expand Down Expand Up @@ -192,6 +200,14 @@ def create(
# exists within the project.
try:
job = m["client"].create_dataset(dataset_obj) # Make an API request.
logger.success(
" {object} {object_id}_{mode} was {action}!",
object_id=self.dataset_id,
mode=mode,
object="Dataset",
action="created",
)

except Conflict:

if if_exists == "pass":
Expand All @@ -212,6 +228,13 @@ def delete(self, mode="all"):
for m in self._loop_modes(mode):

m["client"].delete_dataset(m["id"], delete_contents=True, not_found_ok=True)
logger.info(
" {object} {object_id}_{mode} was {action}!",
object_id=self.dataset_id,
mode=mode,
object="Dataset",
action="deleted",
)

def update(self, mode="all", location=None):
"""Update dataset description. Toogle mode to choose which dataset to update.
Expand All @@ -235,3 +258,11 @@ def update(self, mode="all", location=None):
),
fields=["description"],
) # Make an API request.

logger.success(
" {object} {object_id}_{mode} was {action}!",
object_id=self.dataset_id,
mode=mode,
object="Dataset",
action="updated",
)
22 changes: 22 additions & 0 deletions python-package/basedosdados/upload/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from copy import deepcopy
from functools import lru_cache
from loguru import logger

import requests
import ruamel.yaml as ryaml
Expand Down Expand Up @@ -338,6 +339,13 @@ def create(
if self.table_id and not table_only and not dataset_config_exists:
self.dataset_metadata_obj.create(if_exists=if_exists)

logger.success(
" {object} {object_id} was {action}!",
object_id=self.table_id,
object="Metadata",
action="created",
)

return self

def validate(self) -> bool:
Expand All @@ -363,6 +371,13 @@ def validate(self) -> bool:
message = f"{self.filepath} has validation errors: {error}"
raise BaseDosDadosException(message)

logger.success(
" {object} {object_id} was {action}!",
object_id=self.table_id,
object="Metadata",
action="validated",
)

return True

def publish(
Expand Down Expand Up @@ -461,6 +476,13 @@ def publish(
self.create(if_exists="replace")
self.dataset_metadata_obj.create(if_exists="replace")

logger.success(
" {object} {object_id} was {action}!",
object_id=data_dict,
object="Metadata",
action="published",
)

return published

except (BaseDosDadosException, ValidationError) as e:
Expand Down
39 changes: 39 additions & 0 deletions python-package/basedosdados/upload/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from basedosdados.upload.base import Base
from basedosdados.exceptions import BaseDosDadosException
from loguru import logger

from google.api_core import exceptions
from google.api_core.retry import Retry
Expand Down Expand Up @@ -217,6 +218,14 @@ def upload(
"to 'replace' to overwrite data."
)

logger.success(
" {object} {filename}_{mode} was {action}!",
filename=filepath.name,
mode=mode,
object="File",
action="uploaded",
)

def download(
self,
filename="*",
Expand Down Expand Up @@ -300,6 +309,14 @@ def download(
# download blob to savepath
blob.download_to_filename(filename=f"{savepath}/{blob.name}")

logger.success(
" {object} {object_id}_{mode} was {action}!",
object_id=self.dataset_id,
mode=mode,
object="File",
action="downloaded",
)

def delete_file(self, filename, mode, partitions=None, not_found_ok=False):
"""Deletes file from path `<bucket_name>/<mode>/<dataset_id>/<table_id>/<partitions>/<filename>`.
Expand Down Expand Up @@ -335,6 +352,14 @@ def delete_file(self, filename, mode, partitions=None, not_found_ok=False):
else:
return

logger.success(
" {object} {filename}_{mode} was {action}!",
filename=filename,
mode=mode,
object="File",
action="deleted",
)

def delete_table(self, mode="staging", bucket_name=None, not_found_ok=False):
"""Deletes a table from storage, sends request in batches.
Expand Down Expand Up @@ -396,6 +421,13 @@ def delete_table(self, mode="staging", bucket_name=None, not_found_ok=False):
time.sleep(5)
counter += 1
traceback.print_exc(file=sys.stderr)
logger.success(
" {object} {object_id}_{mode} was {action}!",
object_id=self.table_id,
mode=mode,
object="Table",
action="deleted",
)

def copy_table(
self,
Expand Down Expand Up @@ -465,3 +497,10 @@ def copy_table(
counter += 1
time.sleep(5)
traceback.print_exc(file=sys.stderr)
logger.success(
" {object} {object_id}_{mode} was {action}!",
object_id=self.table_id,
mode=mode,
object="Table",
action="copied",
)
Loading

0 comments on commit 05f9621

Please sign in to comment.