Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(listing): ignore special 'dir' files #767

Merged
merged 1 commit into from
Jan 3, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion src/datachain/lib/listing.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,24 @@ def _file_c(name: str) -> Column:
return dc.filter(pathfunc.parent(_file_c("path")) == path.lstrip("/").rstrip("/*"))


def _isfile(client: "Client", path: str) -> bool:
"""
Returns True if uri points to a file
"""
try:
info = client.fs.info(path)
name = info.get("name")
# case for special simulated directories on some clouds
# e.g. Google creates a zero byte file with the same name as the
# directory with a trailing slash at the end
if not name or name.endswith("/"):
return False

return info["type"] == "file"
except: # noqa: E722
return False


def parse_listing_uri(uri: str, cache, client_config) -> tuple[Optional[str], str, str]:
"""
Parsing uri and returns listing dataset name, listing uri and listing path
Expand All @@ -94,7 +112,7 @@ def parse_listing_uri(uri: str, cache, client_config) -> tuple[Optional[str], st
storage_uri, path = Client.parse_url(uri)
telemetry.log_param("client", client.PREFIX)

if not uri.endswith("/") and client.fs.isfile(uri):
if not uri.endswith("/") and _isfile(client, uri):
return None, f'{storage_uri}/{path.lstrip("/")}', path
if uses_glob(path):
lst_uri_path = posixpath.dirname(path)
Expand Down
Loading