Skip to content

Commit

Permalink
fix(load): update error message in case of dataset not found locally …
Browse files Browse the repository at this point in the history
…and missing api keys (#1589)

* tests: add tests for config, smart dataframe and smart datalake

* fix(dataset): update exception message

---------

Co-authored-by: Gabriele Venturi <[email protected]>
  • Loading branch information
ArslanSaleem and gventuri authored Feb 5, 2025
1 parent 6e8b3f9 commit f3f698e
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 6 deletions.
21 changes: 18 additions & 3 deletions pandasai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,17 @@ def load(dataset_path: str) -> DataFrame:
raise ValueError("The path must be in the format 'organization/dataset'.")

dataset_full_path = os.path.join(find_project_root(), "datasets", dataset_path)
if not os.path.exists(dataset_full_path):

local_dataset_exists = os.path.exists(dataset_full_path)

if not local_dataset_exists:
api_key = os.environ.get("PANDABI_API_KEY", None)
api_url = os.environ.get("PANDABI_API_URL", DEFAULT_API_URL)

if not api_url or not api_key:
raise PandaAIApiKeyError()
raise PandaAIApiKeyError(
f'The dataset "{dataset_path}" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.'
)

request_session = get_pandaai_session()

Expand All @@ -232,7 +238,16 @@ def load(dataset_path: str) -> DataFrame:
zip_file.extractall(dataset_full_path)

loader = DatasetLoader.create_loader_from_path(dataset_path)
return loader.load()
df = loader.load()

message = (
"Dataset loaded successfully."
if local_dataset_exists
else "Dataset fetched successfully from the remote server."
)
print(message)

return df


def read_csv(filepath: str) -> DataFrame:
Expand Down
25 changes: 22 additions & 3 deletions tests/unit_tests/test_pandasai_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,20 +137,39 @@ def test_load_dataset_not_found(self, mockenviron, mock_bytes_io, mock_zip_file)
with pytest.raises(DatasetNotFound):
pandasai.load(dataset_path)

@patch("pandasai.os.path.exists")
@patch("pandasai.os.environ", {})
@patch("pandasai.get_pandaai_session")
def test_load_missing_not_found_locally_and_no_remote_key(
self, mock_session, mock_exists
):
"""Test loading when API URL is missing."""
mock_exists.return_value = False
mock_response = MagicMock()
mock_response.status_code = 404
mock_session.return_value.get.return_value = mock_response
dataset_path = "org/dataset_name"

with pytest.raises(
PandaAIApiKeyError,
match='The dataset "org/dataset_name" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.',
):
pandasai.load(dataset_path)

@patch("pandasai.os.path.exists")
@patch("pandasai.os.environ", {"PANDABI_API_KEY": "key"})
def test_load_missing_api_url(self, mock_exists):
"""Test loading when API URL is missing."""
mock_exists.return_value = False
dataset_path = "org/dataset_name"

with pytest.raises(PandaAIApiKeyError):
with pytest.raises(DatasetNotFound):
pandasai.load(dataset_path)

@patch("pandasai.os.path.exists")
@patch("pandasai.os.environ", {"PANDABI_API_KEY": "key"})
@patch("pandasai.get_pandaai_session")
def test_load_missing_api_url(self, mock_session, mock_exists):
def test_load_missing_not_found(self, mock_session, mock_exists):
"""Test loading when API URL is missing."""
mock_exists.return_value = False
mock_response = MagicMock()
Expand Down Expand Up @@ -202,7 +221,7 @@ def test_load_without_api_credentials(
pandasai.load("test/dataset")
assert (
str(exc_info.value)
== "PandaAI API key not found. Please set your API key using PandaAI.set_api_key() or by setting the PANDASAI_API_KEY environment variable."
== 'The dataset "test/dataset" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.'
)

def test_clear_cache(self):
Expand Down

0 comments on commit f3f698e

Please sign in to comment.