Skip to content

Commit

Permalink
debugging - why is test_list_private_datasets failing?
Browse files Browse the repository at this point in the history
  • Loading branch information
daverigby committed Feb 9, 2024
1 parent c4b7251 commit 62c1e0e
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 1 deletion.
3 changes: 2 additions & 1 deletion pinecone_datasets/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def load(**kwargs) -> "Catalog":
)
collected_datasets = []
try:
metadata_files = fs.cat(public_datasets_base_path + "/*/metadata.json")
prefix = "gs" if isinstance(fs, gcsfs.GCSFileSystem) else "s3"
metadata_files = fs.cat(prefix + "://" + public_datasets_base_path + "/*/metadata.json")
for path, value in metadata_files.items():
name = path.split("/")[1]
try:
Expand Down
1 change: 1 addition & 0 deletions pinecone_datasets/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def get_cloud_fs(
fs: Union[gcsfs.GCSFileSystem, s3fs.S3FileSystem] - the filesystem object
"""
is_anon = path == cfg.Storage.endpoint
print(f"get_cloud_fs(path:{path}, kwargs:{kwargs}, is_anon:{is_anon}")
if path.startswith("gs://") or "storage.googleapis.com" in path:
fs = gcsfs.GCSFileSystem(token="anon" if is_anon else None, **kwargs)
elif path.startswith("s3://") or "s3.amazonaws.com" in path:
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/test_private_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

def test_list_private_datasets():
os.environ["DATASETS_CATALOG_BASEPATH"] = "s3://ram-datasets"
os.environ["S3FS_LOGGING_LEVEL"] = "DEBUG"
os.environ["GCSFS_DEBUG"] = "DEBUG"
lst = list_datasets(endpoint_url="https://storage.googleapis.com")
print(lst)
del os.environ["DATASETS_CATALOG_BASEPATH"]
Expand Down

0 comments on commit 62c1e0e

Please sign in to comment.