Skip to content

Commit

Permalink
fix(listing): ingnore special 'dir' files
Browse files Browse the repository at this point in the history
  • Loading branch information
shcheklein committed Jan 1, 2025
1 parent 508414a commit 9d137b5
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion src/datachain/lib/listing.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,21 @@ def _file_c(name: str) -> Column:
return dc.filter(pathfunc.parent(_file_c("path")) == path.lstrip("/").rstrip("/*"))


def _isfile(client: "Client", path: str) -> bool:
"""
Returns True if uri points to a file
"""
try:
info = client.fs.info(path)

if info.name.endswith("/"):
return False

return info["type"] == "file"
except: # noqa: E722
return False


def parse_listing_uri(uri: str, cache, client_config) -> tuple[Optional[str], str, str]:
"""
Parsing uri and returns listing dataset name, listing uri and listing path
Expand All @@ -94,7 +109,7 @@ def parse_listing_uri(uri: str, cache, client_config) -> tuple[Optional[str], st
storage_uri, path = Client.parse_url(uri)
telemetry.log_param("client", client.PREFIX)

if not uri.endswith("/") and client.fs.isfile(uri):
if not uri.endswith("/") and _isfile(client, uri):
return None, f'{storage_uri}/{path.lstrip("/")}', path
if uses_glob(path):
lst_uri_path = posixpath.dirname(path)
Expand Down

0 comments on commit 9d137b5

Please sign in to comment.