Skip to content

Commit

Permalink
[infra] Adiciona testes e corrige o traceback da função download (#626
Browse files Browse the repository at this point in the history
) (#649)

* Fix download traceback try-except order, add tests

* Fix invalid billing project id test, add syntax error test

* Remove placeholders and unused objects

* Move query and billing_id tests to test_read_sql_*

* Add specific tutorial exceptions to possible download errors

* Fix read_sql exceptions

* Restructure Exception system

* Fix Exception new structure

Co-authored-by: Vítor Mussa <[email protected]>
  • Loading branch information
JoaoCarabetta and vmussa authored Jul 29, 2021
1 parent 4807121 commit bc75b48
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 53 deletions.
57 changes: 25 additions & 32 deletions python-package/basedosdados/download/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@
import pandas_gbq
from pathlib import Path
import pydata_google_auth
from pydata_google_auth.exceptions import PyDataCredentialsError
from google.cloud import bigquery
from google.cloud import bigquery_storage_v1
from functools import partialmethod
import re
import pandas as pd
from basedosdados.upload.base import Base
from functools import partialmethod
from basedosdados.validation.exceptions import BaseDosDadosException
from basedosdados.exceptions import (
BaseDosDadosException, BaseDosDadosAccessDeniedException,
BaseDosDadosAuthorizationException, BaseDosDadosInvalidProjectIDException,
BaseDosDadosNoBillingProjectIDException
)
from pandas_gbq.gbq import GenericGBQException


Expand Down Expand Up @@ -149,7 +155,6 @@ def read_sql(query, billing_project_id=None, from_file=False, reauth=False):
"""

try:

# Set a two hours timeout
bigquery_storage_v1.client.BigQueryReadClient.read_rows = partialmethod(
bigquery_storage_v1.client.BigQueryReadClient.read_rows,
Expand All @@ -161,38 +166,26 @@ def read_sql(query, billing_project_id=None, from_file=False, reauth=False):
credentials=credentials(from_file=from_file, reauth=reauth),
project_id=billing_project_id,
)
except (OSError, ValueError) as e:
msg = (
"\nWe are not sure which Google Cloud project should be billed.\n"
"First, you should make sure that you have a Google Cloud project.\n"
"If you don't have one, set one up following these steps: \n"
"\t1. Go to this link https://console.cloud.google.com/projectselector2/home/dashboard\n"
"\t2. Agree with Terms of Service if asked\n"
"\t3. Click in Create Project\n"
"\t4. Put a cool name in your project\n"
"\t5. Hit create\n"
"\n"
"Copy the Project ID, (notice that it is not the Project Name)\n"
"Now, you have two options:\n"
"1. Add an argument to your function poiting to the billing project id.\n"
" Like `bd.read_table('br_ibge_pib', 'municipios', billing_project_id=<YOUR_PROJECT_ID>)`\n"
"2. You can set a project_id in the environment by running the following command in your terminal: `gcloud config set project <YOUR_PROJECT_ID>`.\n"
" Bear in mind that you need `gcloud` installed."
)
raise BaseDosDadosException(msg) from e

except GenericGBQException as e:
if "Reason: 403" in str(e):
raise BaseDosDadosException(
"\nYou still don't have a Google Cloud Project.\n"
"Set one up following these steps: \n"
"1. Go to this link https://console.cloud.google.com/projectselector2/home/dashboard\n"
"2. Agree with Terms of Service if asked\n"
"3. Click in Create Project\n"
"4. Put a cool name in your project\n"
"5. Hit create\n"
"6. Rerun this command with the flag `reauth=True`. \n"
" Like `read_table('br_ibge_pib', 'municipios', reauth=True)`"
)
raise BaseDosDadosAccessDeniedException

elif re.match("Reason: 400 POST .* [Pp]roject[ ]*I[Dd]", str(e)):
raise BaseDosDadosInvalidProjectIDException

raise

except PyDataCredentialsError as e:
raise BaseDosDadosAuthorizationException

except (OSError, ValueError) as e:
exc_from_no_billing_id = (
"Could not determine project ID" in str(e) or \
"reading from stdin while output is captured" in str(e)
)
if exc_from_no_billing_id:
raise BaseDosDadosNoBillingProjectIDException
raise


Expand Down
75 changes: 75 additions & 0 deletions python-package/basedosdados/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
class BaseDosDadosException(Exception):
"""Exclusive Exception from Base dos Dados"""


class BaseDosDadosAccessDeniedException(BaseDosDadosException):
"""Exception raised if the user provides a wrong GCP project ID."""
def __init__(self):
self.message = (
"\nAre you sure you are using the right `billing_project_id`?"
"\nYou must use the Project ID available in your Google Cloud"
" console home page at https://console.cloud.google.com/home/dashboard"
"\nIf you still don't have a Google Cloud Project, you have to "
"create one.\n"
"You can set one up by following these steps: \n"
"1. Go to this link https://console.cloud.google.com/projectselector2/home/dashboard\n"
"2. Agree with Terms of Service if asked\n"
"3. Click in Create Project\n"
"4. Put a cool name in your project\n"
"5. Hit create\n"
"6. Rerun this command with the flag `reauth=True`. \n"
" Like `read_table('br_ibge_pib', 'municipios', "
"billing_project_id=<YOUR_PROJECT_ID>, reauth=True)`"
)
super().__init__(self.message)


class BaseDosDadosInvalidProjectIDException(BaseDosDadosException):
"""Exception raised if the user provides an invalid GCP project ID."""
def __init__(self):
self.message = (
"\nYou are using an invalid `billing_project_id`.\nMake sure "
"you set it to the Project ID available in your Google Cloud"
" console home page at https://console.cloud.google.com/home/dashboard"
)
super().__init__(self.message)


class BaseDosDadosNoBillingProjectIDException(BaseDosDadosException):
"""Exception raised if the user provides no GCP billing project ID."""
def __init__(self):
self.message = (
"\nWe are not sure which Google Cloud project should be billed.\n"
"First, you should make sure that you have a Google Cloud project.\n"
"If you don't have one, set one up following these steps: \n"
"\t1. Go to this link https://console.cloud.google.com/projectselector2/home/dashboard\n"
"\t2. Agree with Terms of Service if asked\n"
"\t3. Click in Create Project\n"
"\t4. Put a cool name in your project\n"
"\t5. Hit create\n"
"\n"
"Copy the Project ID, (notice that it is not the Project Name)\n"
"Now, you have two options:\n"
"1. Add an argument to your function poiting to the billing project id.\n"
" Like `bd.read_table('br_ibge_pib', 'municipios', billing_project_id=<YOUR_PROJECT_ID>)`\n"
"2. You can set a project_id in the environment by running the following command in your terminal: `gcloud config set project <YOUR_PROJECT_ID>`.\n"
" Bear in mind that you need `gcloud` installed."
)
super().__init__(self.message)


class BaseDosDadosAuthorizationException(BaseDosDadosException):
"""Exception raised if the user doesn't complete the authorization
process correctly."""
def __init__(self):
self.message = (
"\nAre you sure you did the authorization process correctly?\n"
"If you were given the option to enter an authorization code, "
"please try again and make sure you are entering the right one."
"\nYou can try again by rerunning this command with the flag "
"`reauth=True`. \n\tLike `read_table('br_ibge_pib', 'municipios',"
" billing_project_id=<YOUR_PROJECT_ID>, reauth=True)`"
"\nThen you can click at the provided link and get the right "
"authorization code."
)
super().__init__(self.message)
2 changes: 1 addition & 1 deletion python-package/basedosdados/upload/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from basedosdados.upload.storage import Storage
from basedosdados.upload.dataset import Dataset
from basedosdados.upload.datatypes import Datatype
from basedosdados.validation.exceptions import BaseDosDadosException
from basedosdados.exceptions import BaseDosDadosException


class Table(Base):
Expand Down
2 changes: 0 additions & 2 deletions python-package/basedosdados/validation/exceptions.py

This file was deleted.

96 changes: 79 additions & 17 deletions python-package/tests/test_download.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from os import read
import pytest
from pathlib import Path
import pandas as pd
from pandas_gbq.gbq import GenericGBQException
import shutil

from basedosdados import (
Expand All @@ -14,7 +16,10 @@
get_table_columns,
get_table_size,
)
from basedosdados.validation.exceptions import BaseDosDadosException
from basedosdados.exceptions import (
BaseDosDadosException, BaseDosDadosNoBillingProjectIDException,
BaseDosDadosInvalidProjectIDException
)


TEST_PROJECT_ID = "basedosdados-dev"
Expand All @@ -35,13 +40,6 @@ def test_download_by_query():

assert SAVEFILE.exists()

# No billing
with pytest.raises(BaseDosDadosException):
download(
SAVEFILE,
query="select * from `basedosdados.br_ibge_pib.municipio` limit 10",
)


def test_download_by_table():

Expand All @@ -57,15 +55,6 @@ def test_download_by_table():

assert SAVEFILE.exists()

# No billing
with pytest.raises(BaseDosDadosException):
download(
SAVEFILE,
dataset_id="br_ibge_pib",
table_id="municipio",
limit=10,
)


def test_download_save_to_path():

Expand Down Expand Up @@ -119,6 +108,79 @@ def test_read_sql():
)


def test_read_sql_no_billing_project_id():

with pytest.raises(BaseDosDadosNoBillingProjectIDException) as excinfo:
read_sql(
query="select * from `basedosdados.br_ibge_pib.municipio` limit 10",
)

assert (
"We are not sure which Google Cloud project should be billed." \
in str(excinfo.value)
)


def test_read_sql_invalid_billing_project_id():

pattern = r"You are using an invalid `billing_project_id`"

with pytest.raises(BaseDosDadosInvalidProjectIDException, match=pattern):
read_sql(
query="select * from `basedosdados.br_ibge_pib.municipio` limit 10",
billing_project_id="inexistent_project_id",
from_file=True,
)


def test_read_sql_inexistent_project():

with pytest.raises(GenericGBQException) as excinfo:
read_sql(
query="select * from `asedosdados.br_ibge_pib.municipio` limit 10",
billing_project_id=TEST_PROJECT_ID,
from_file=True
)

assert "Reason: 404 Not found: Project" in str(excinfo.value)


def test_read_sql_inexistent_dataset():

with pytest.raises(GenericGBQException) as excinfo:
read_sql(
query="select * from `basedosdados.br_ibge_inexistent.municipio` limit 10",
billing_project_id=TEST_PROJECT_ID,
from_file=True
)

assert "Reason: 404 Not found: Dataset" in str(excinfo.value)


def test_read_sql_inexistent_table():

with pytest.raises(GenericGBQException) as excinfo:
read_sql(
query="select * from `basedosdados.br_ibge_pib.inexistent` limit 10",
billing_project_id=TEST_PROJECT_ID,
from_file=True
)

assert "Reason: 404 Not found: Table" in str(excinfo.value)


def test_read_sql_syntax_error():

with pytest.raises(GenericGBQException) as excinfo:
read_sql(
query="invalid_statement * from `basedosdados.br_ibge_pib.municipio` limit 10",
billing_project_id=TEST_PROJECT_ID,
from_file=True
)

assert "Reason: 400 Syntax error" in str(excinfo.value)


def test_read_table():

assert isinstance(
Expand Down
2 changes: 1 addition & 1 deletion python-package/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from google.api_core.exceptions import NotFound

from basedosdados import Dataset, Table, Storage
from basedosdados.validation.exceptions import BaseDosDadosException
from basedosdados.exceptions import BaseDosDadosException

DATASET_ID = "pytest"
TABLE_ID = "pytest"
Expand Down

0 comments on commit bc75b48

Please sign in to comment.