From 3ae0a9cc241f00344fafc44f2c523be79a88a0e4 Mon Sep 17 00:00:00 2001 From: painebot Date: Fri, 21 Feb 2025 18:59:11 -0500 Subject: [PATCH] Small fixes for fsspec backend, Enable fsspec tests for s3fs and smb --- .github/workflows/build.yml | 2 +- js/src/treefinder.ts | 2 - jupyterfs/manager/fsspec.py | 101 +++++++++++++++++++++--------- jupyterfs/metamanager.py | 4 ++ jupyterfs/tests/test_fsmanager.py | 94 ++++++++++++++++----------- pyproject.toml | 2 + 6 files changed, 136 insertions(+), 69 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1e077b5..761d25c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -85,7 +85,7 @@ jobs: - name: Test run: make coverage - if: matrix.os == 'ubuntu-latest' + if: matrix.os != 'windows-latest' - name: Teardown Linux testing infra run: make teardown-infra-ubuntu DOCKER_COMPOSE="docker compose" diff --git a/js/src/treefinder.ts b/js/src/treefinder.ts index 29adb7a..2fed3a6 100644 --- a/js/src/treefinder.ts +++ b/js/src/treefinder.ts @@ -745,7 +745,6 @@ export namespace TreeFinderSidebar { columns, settings, preferredDir, - rootPath = "", caption = "TreeFinder", id = "jupyterlab-tree-finder", @@ -780,7 +779,6 @@ export namespace TreeFinderSidebar { tooltip: "Refresh", }); - widget.toolbar.addItem("new file", new_file_button); widget.toolbar.addItem("upload", uploader_button); widget.toolbar.addItem("refresh", refresh_button); diff --git a/jupyterfs/manager/fsspec.py b/jupyterfs/manager/fsspec.py index b264e7e..c57f65c 100644 --- a/jupyterfs/manager/fsspec.py +++ b/jupyterfs/manager/fsspec.py @@ -8,6 +8,7 @@ import mimetypes from base64 import decodebytes, encodebytes from datetime import datetime +from pathlib import PurePosixPath import nbformat from jupyter_server.services.contents.filemanager import FileContentsManager @@ -55,6 +56,38 @@ def create(*args, **kwargs): def _checkpoints_class_default(self): return NullCheckpoints + def _normalize_path(self, path): + if path and self._fs.root_marker and not path.startswith(self._fs.root_marker): + path = f"{self._fs.root_marker}{path}" + if path and not path.startswith(self.root): + path = f"{self.root}/{path}" + path = path or self.root + + # Fix any differences due to root markers + path = path.replace("//", "/") + + # TODO better carveouts + # S3 doesn't like spaces in paths + if self._fs.__class__.__name__.startswith("S3"): + path = path.replace(" ", "_") + + return path + + def _is_path_hidden(self, path): + """Does the specific API style path correspond to a hidden node? + Args: + path (str): The path to check. + Returns: + hidden (bool): Whether the path is hidden. + """ + # We treat entries with leading . in the name as hidden (unix convention) + # We can (and should) check this even if the path does not exist + if PurePosixPath(path).name.startswith("."): + return True + + # TODO PyFilesystem implementation does more, perhaps we should as well + return False + def is_hidden(self, path): """Does the API style path correspond to a hidden directory or file? Args: @@ -62,7 +95,16 @@ def is_hidden(self, path): Returns: hidden (bool): Whether the path exists and is hidden. """ - return path.rsplit("/", 1)[-1].startswith(".") + path = self._normalize_path(path) + ppath = PurePosixPath(path) + # Path checks are quick, so we do it first to avoid unnecessary stat calls + if any(part.startswith(".") for part in ppath.parts): + return True + while ppath.parents: + if self._is_path_hidden(str(path)): + return True + ppath = ppath.parent + return False def file_exists(self, path): """Returns True if the file exists, else returns False. @@ -71,6 +113,7 @@ def file_exists(self, path): Returns: exists (bool): Whether the file exists. """ + path = self._normalize_path(path) return self._fs.isfile(path) def dir_exists(self, path): @@ -80,6 +123,7 @@ def dir_exists(self, path): Returns: exists (bool): Whether the path is indeed a directory. """ + path = self._normalize_path(path) return self._fs.isdir(path) def exists(self, path): @@ -89,12 +133,16 @@ def exists(self, path): Returns: exists (bool): Whether the target exists. """ + path = self._normalize_path(path) return self._fs.exists(path) def _base_model(self, path): """Build the common base of a contents model""" - model = self._fs.info(path).copy() + try: + model = self._fs.info(path).copy() + except FileNotFoundError: + model = {"type": "file", "size": 0} model["name"] = path.rstrip("/").rsplit("/", 1)[-1] model["path"] = path.replace(self.root, "", 1) model["last_modified"] = datetime.fromtimestamp(model["mtime"]).isoformat() if "mtime" in model else EPOCH_START @@ -116,6 +164,13 @@ def _dir_model(self, path, content=True): if content is requested, will include a listing of the directory """ model = self._base_model(path) + + four_o_four = "directory does not exist: %r" % path + + if not self.allow_hidden and self.is_hidden(path): + self.log.debug("Refusing to serve hidden directory %r, via 404 Error", path) + raise web.HTTPError(404, four_o_four) + if content: files = self._fs.ls(path, detail=True, refresh=True) model["content"] = [self._base_model(f["name"]) for f in files if self.allow_hidden or not self.is_hidden(f["name"])] @@ -133,16 +188,8 @@ def _read_file(self, path, format): """ try: bcontent = self._fs.cat(path) - except OSError: - # sometimes this causes errors: - # e.g. fir s3fs it causes - # OSError: [Errno 22] Unsupported header 'x-amz-checksum-mode' received for this API call. - # So try non-binary - try: - bcontent = self._fs.open(path, mode="r").read() - return bcontent, "text" - except OSError as e: - raise web.HTTPError(400, path, reason=str(e)) + except OSError as e: + raise web.HTTPError(400, path, reason=str(e)) if format is None or format == "text": # Try to interpret as unicode if format is unknown or if unicode @@ -172,6 +219,7 @@ def _file_model(self, path, content=True, format=None): If not specified, try to decode as UTF-8, and fall back to base64 """ model = self._base_model(path) + model["type"] = "file" model["mimetype"] = mimetypes.guess_type(path)[0] if content: @@ -216,14 +264,6 @@ def _notebook_model(self, path, content=True): self.validate_notebook_model(model) return model - def _normalize_path(self, path): - if path and self._fs.root_marker and not path.startswith(self._fs.root_marker): - path = f"{self._fs.root_marker}{path}" - if path and not path.startswith(self.root): - path = f"{self.root}/{path}" - path = path or self.root - return path - def get(self, path, content=True, type=None, format=None): """Takes a path for an entity and returns its model Args: @@ -250,8 +290,17 @@ def get(self, path, content=True, type=None, format=None): def _save_directory(self, path, model): """create a directory""" - if not self._fs.exists(path): - self._fs.mkdir(path) + if not self.allow_hidden and self.is_hidden(path): + raise web.HTTPError(400, f"Cannot create directory {path!r}") + + if not self.exists(path): + # TODO better carveouts + if self._fs.__class__.__name__.startswith("S3"): + # need to make a file temporarily + # use the convention of a hidden file + self._fs.touch(f"{path}/.s3fskeep") + else: + self._fs.mkdir(path) elif not self._fs.isdir(path): raise web.HTTPError(400, "Not a directory: %s" % (path)) else: @@ -277,11 +326,7 @@ def _save_file(self, path, content, format): bcontent = decodebytes(b64_bytes) except Exception as e: raise web.HTTPError(400, "Encoding error saving %s: %s" % (path, e)) - - if format == "text": - self._fs.pipe(path, bcontent) - else: - self._fs.pipe(path, bcontent) + self._fs.pipe(path, bcontent) def save(self, model, path=""): """Save the file model and return the model with no content.""" @@ -333,7 +378,7 @@ def rename_file(self, old_path, new_path): return # Should we proceed with the move? - if self._fs.exists(new_path): # TODO and not samefile(old_os_path, new_os_path): + if self.exists(new_path): # TODO and not samefile(old_os_path, new_os_path): raise web.HTTPError(409, "File already exists: %s" % new_path) # Move the file diff --git a/jupyterfs/metamanager.py b/jupyterfs/metamanager.py index 6635117..02460fc 100644 --- a/jupyterfs/metamanager.py +++ b/jupyterfs/metamanager.py @@ -281,4 +281,8 @@ async def post(self): else: resources = valid_resources + for resource in resources: + if not isinstance(resource, dict): + raise web.HTTPError(400, f"Resources must be a list of dicts, got: {resource}") + self.finish(json.dumps(self.contents_manager.initResource(*resources, options=options))) diff --git a/jupyterfs/tests/test_fsmanager.py b/jupyterfs/tests/test_fsmanager.py index dd6f6a5..4178687 100644 --- a/jupyterfs/tests/test_fsmanager.py +++ b/jupyterfs/tests/test_fsmanager.py @@ -9,7 +9,9 @@ import shutil import socket from contextlib import nullcontext +from itertools import product from pathlib import Path +from shutil import which import pytest import tornado.web @@ -23,7 +25,8 @@ test_root_osfs = "osfs_local" -test_url_s3 = "http://127.0.0.1/" +# test_url_s3 = "http://127.0.0.1/" +test_url_s3 = f"http://{socket.gethostname().replace('.local', '')}" test_port_s3 = "9000" test_host_smb_docker_share = socket.gethostbyname(socket.gethostname()) @@ -66,17 +69,14 @@ class _TestBase: """Contains tests universal to all PyFilesystemContentsManager flavors""" - @pytest.fixture - def resource_uri(self): - raise NotImplementedError - - @pytest.mark.parametrize("jp_server_config", configs) - async def test_write_read(self, jp_fetch, resource_uri, jp_server_config): + @pytest.mark.parametrize("jp_server_config,resource_type", list(product(configs, ["pyfs", "fsspec"]))) + async def test_write_read(self, jp_fetch, jp_server_config, resource_type, tmp_path): allow_hidden = jp_server_config["ContentsManager"]["allow_hidden"] cc = ContentsClient(jp_fetch) - resources = await cc.set_resources([{"url": resource_uri}]) + resource = self.get_resource(resource_type, tmp_path) + resources = await cc.set_resources([resource]) drive = resources[0]["drive"] fpaths = [ @@ -131,11 +131,14 @@ def setup_method(self, method): def teardown_method(self, method): shutil.rmtree(self._test_dir, ignore_errors=True) - @pytest.fixture - def resource_uri(self, tmp_path): - yield f"osfs://{tmp_path}" + def get_resource(self, resource_type, tmp_path=None): + if resource_type == "pyfs": + return {"url": f"osfs://{tmp_path}", "type": resource_type} + elif resource_type == "fsspec": + return {"url": f"file://{tmp_path}", "type": resource_type} +@pytest.mark.skipif(which("docker") is None, reason="requires docker") class Test_FSManager_s3(_TestBase): """Tests on an instance of s3proxy running in a docker Manual startup of equivalent docker: @@ -159,20 +162,33 @@ def setup_method(self, method): def teardown_method(self, method): self._rootDirUtil.delete() - @pytest.fixture - def resource_uri(self): - uri = "s3://{id}:{key}@{bucket}?endpoint_url={url}:{port}".format( - id=s3.aws_access_key_id, - key=s3.aws_secret_access_key, - bucket=test_dir, - url=test_url_s3.strip("/"), - port=test_port_s3, - ) - yield uri + def get_resource(self, resource_type, tmp_path=None): + if resource_type == "pyfs": + uri = "s3://{id}:{key}@{bucket}?endpoint_url={url}:{port}".format( + id=s3.aws_access_key_id, + key=s3.aws_secret_access_key, + bucket=test_dir, + url=test_url_s3.strip("/"), + port=test_port_s3, + ) + return {"url": uri, "type": resource_type} + elif resource_type == "fsspec": + uri = f"s3://{test_dir}" + return { + "url": uri, + "type": resource_type, + "kwargs": { + "key": s3.aws_access_key_id, + "secret": s3.aws_secret_access_key, + "endpoint_url": f"{test_url_s3}:{test_port_s3}", + "default_cache_type": "none", + }, + } @pytest.mark.darwin @pytest.mark.linux +@pytest.mark.skipif(which("docker") is None, reason="requires docker") class Test_FSManager_smb_docker_share(_TestBase): """(mac/linux only. future: windows) runs its own samba server via py-docker. Automatically creates and exposes a share from a docker @@ -195,12 +211,6 @@ class Test_FSManager_smb_docker_share(_TestBase): smb_port=test_name_port_smb_docker_share, ) - # direct_tcp=False, - # host=None, - # hostname=None, - # my_name="local", - # name_port=137, - # smb_port=None, @classmethod def setup_class(cls): # delete any existing root @@ -217,17 +227,25 @@ def teardown_method(self, method): # delete any existing root self._rootDirUtil.delete() - @pytest.fixture - def resource_uri(self): - uri = "smb://{username}:{passwd}@{host}:{smb_port}/{share}?name-port={name_port}".format( - username=samba.smb_user, - passwd=samba.smb_passwd, - host=test_host_smb_docker_share, - share=test_hostname_smb_docker_share, - smb_port=test_name_port_smb_docker_share, - name_port=test_name_port_smb_docker_nameport, - ) - yield uri + def get_resource(self, resource_type, tmp_path=None): + if resource_type == "pyfs": + uri = "smb://{username}:{passwd}@{host}:{smb_port}/{share}?name-port={name_port}".format( + username=samba.smb_user, + passwd=samba.smb_passwd, + host=test_host_smb_docker_share, + share=test_hostname_smb_docker_share, + smb_port=test_name_port_smb_docker_share, + name_port=test_name_port_smb_docker_nameport, + ) + elif resource_type == "fsspec": + uri = "smb://{username}:{passwd}@{host}:{smb_port}/{share}".format( + username=samba.smb_user, + passwd=samba.smb_passwd, + host=test_host_smb_docker_share, + share=test_hostname_smb_docker_share, + smb_port=test_name_port_smb_docker_share, + ) + return {"url": uri, "type": resource_type} # @pytest.mark.darwin diff --git a/pyproject.toml b/pyproject.toml index 8b538c3..a6a78b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,8 @@ develop = [ "fs.smbfs>=0.6.3", # fsspec "fsspec>=2023.6.0", + "s3fs>=2024", + "smbprotocol", ] fs = [ "fs>=2.4.11",