diff --git a/src/datachain/lib/listing.py b/src/datachain/lib/listing.py index 1829f99fc..94d10d1fa 100644 --- a/src/datachain/lib/listing.py +++ b/src/datachain/lib/listing.py @@ -85,6 +85,21 @@ def _file_c(name: str) -> Column: return dc.filter(pathfunc.parent(_file_c("path")) == path.lstrip("/").rstrip("/*")) +def _isfile(client: "Client", path: str) -> bool: + """ + Returns True if uri points to a file + """ + try: + info = client.fs.info(path) + + if info.name.endswith("/"): + return False + + return info["type"] == "file" + except: # noqa: E722 + return False + + def parse_listing_uri(uri: str, cache, client_config) -> tuple[Optional[str], str, str]: """ Parsing uri and returns listing dataset name, listing uri and listing path @@ -94,7 +109,7 @@ def parse_listing_uri(uri: str, cache, client_config) -> tuple[Optional[str], st storage_uri, path = Client.parse_url(uri) telemetry.log_param("client", client.PREFIX) - if not uri.endswith("/") and client.fs.isfile(uri): + if not uri.endswith("/") and _isfile(client, uri): return None, f'{storage_uri}/{path.lstrip("/")}', path if uses_glob(path): lst_uri_path = posixpath.dirname(path)