neuroinformatics-unit · niksirbi · Apr 8, 2024 · Mar 25, 2024 · Mar 25, 2024 · Mar 25, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -6,23 +6,33 @@ repos:
     - repo: https://github.com/pre-commit/pre-commit-hooks
       rev: v4.5.0
       hooks:
+          - id: check-added-large-files
           - id: check-docstring-first
           - id: check-executables-have-shebangs
+          - id: check-case-conflict
           - id: check-merge-conflict
+          - id: check-symlinks
+          - id: check-yaml
           - id: check-toml
+          - id: debug-statements
           - id: end-of-file-fixer
           - id: mixed-line-ending
             args: [--fix=lf]
+          - id: name-tests-test
+            args: ["--pytest-test-first"]
           - id: requirements-txt-fixer
           - id: trailing-whitespace
+    - repo: https://github.com/pre-commit/pygrep-hooks
+      rev: v1.10.0
+      hooks:
+        - id: rst-backticks
+        - id: rst-directive-colons
+        - id: rst-inline-touching-normal
     - repo: https://github.com/astral-sh/ruff-pre-commit
       rev: v0.3.4
       hooks:
         - id: ruff
-    - repo: https://github.com/psf/black
-      rev: 24.3.0
-      hooks:
-          - id: black
+        - id: ruff-format
     - repo: https://github.com/pre-commit/mirrors-mypy
       rev: v1.9.0
       hooks:

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -67,17 +67,15 @@ A typical PR workflow would be:
 ### Formatting and pre-commit hooks
 
 Running `pre-commit install` will set up [pre-commit hooks](https://pre-commit.com/) to ensure a consistent formatting style. Currently, these include:
-* [ruff](https://github.com/astral-sh/ruff) does a number of jobs, including enforcing PEP8 and sorting imports
-* [black](https://black.readthedocs.io/en/stable/) for auto-formatting
-* [mypy](https://mypy.readthedocs.io/en/stable/index.html) as a static type checker
+* [ruff](https://github.com/astral-sh/ruff) does a number of jobs, including code linting and auto-formatting.
+* [mypy](https://mypy.readthedocs.io/en/stable/index.html) as a static type checker.
 * [check-manifest](https://github.com/mgedmin/check-manifest) to ensure that the right files are included in the pip package.
 * [codespell](https://github.com/codespell-project/codespell) to check for common misspellings.
 
 These will prevent code from being committed if any of these hooks fail. To run them individually (from the root of the repository), you can use:
 
 ```sh
 ruff .
-black ./
 mypy -p movement
 check-manifest
 codespell
@@ -90,7 +88,28 @@ pre-commit run  # for staged files
 pre-commit run -a  # for all files in the repository
 ```
 
-For docstrings, we adhere to the  [numpydoc](https://numpydoc.readthedocs.io/en/latest/format.html) style.
+Some problems will be automatically fixed by the hooks. In this case, you should
+stage the auto-fixed changes and run the hooks again:
+
+```sh
+git add .
+pre-commit run
+```
+
+If a problem cannot be auto-fixed, the corresponding tool will provide
+information on what the issue is and how to fix it. For example, `ruff` might
+output something like:
+
+```sh
+movement/io/load_poses.py:551:80: E501 Line too long (90 > 79)
+```
+
+This pinpoints the problem to a single code line and a specific [ruff rule](https://docs.astral.sh/ruff/rules/) violation.
+Sometimes you may have good reasons to ignore a particular rule for a specific line of code. You can do this by adding an inline comment, e.g. `# noqa: E501`. Replace `E501` with the code of the rule you want to ignore.
+
+For docstrings, we adhere to the [numpydoc](https://numpydoc.readthedocs.io/en/latest/format.html) style.
+Make sure to provide docstrings for all public functions, classes, and methods.
+This is important as it allows for [automatic generation of the API reference](#updating-the-api-reference).
 
 ### Testing
 

diff --git a/README.md b/README.md
@@ -1,8 +1,7 @@
 [![License](https://img.shields.io/badge/License-BSD_3--Clause-orange.svg)](https://opensource.org/licenses/BSD-3-Clause)
 ![CI](https://img.shields.io/github/actions/workflow/status/neuroinformatics-unit/movement/test_and_deploy.yml?label=CI)
 [![codecov](https://codecov.io/gh/neuroinformatics-unit/movement/branch/main/graph/badge.svg?token=P8CCH3TI8K)](https://codecov.io/gh/neuroinformatics-unit/movement)
-[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v0.json)](https://github.com/astral-sh/ruff)
-[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black)
+[![Code style: Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/format.json)](https://github.com/astral-sh/ruff)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
 [![project chat](https://img.shields.io/badge/zulip-join_chat-brightgreen.svg)](https://neuroinformatics.zulipchat.com/#narrow/stream/406001-Movement/topic/Welcome!)
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -103,8 +103,7 @@
         "branch": "gh-pages",
         "binderhub_url": "https://mybinder.org",
         "dependencies": ["environment.yml"],
-
-     },
+    },
 }
 
 # -- Options for HTML output -------------------------------------------------
@@ -152,7 +151,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 html_css_files = [
     ("css/custom.css", {"priority": 100}),
 ]

diff --git a/examples/README.rst b/examples/README.rst
@@ -3,4 +3,4 @@
 Examples
 ========
 
-Below is a gallery of examples using `movement`.
+Below is a gallery of examples using ``movement``.
diff --git a/movement/filtering.py b/movement/filtering.py
@@ -27,7 +27,7 @@ def wrapper(*args, **kwargs):
 
         # Append the log entry to the result's attributes
         if result is not None and hasattr(result, "attrs"):
-            if "log" not in result.attrs.keys():
+            if "log" not in result.attrs:
                 result.attrs["log"] = []
             result.attrs["log"].append(log_entry)
 

diff --git a/movement/io/load_poses.py b/movement/io/load_poses.py
@@ -364,7 +364,7 @@ def _load_from_sleap_analysis_file(
             )
         # If present, read the point-wise scores,
         # and transpose to shape: (n_frames, n_tracks, n_keypoints)
-        if "point_scores" in f.keys():
+        if "point_scores" in f:
             scores = f["point_scores"][:].transpose((2, 0, 1))
         return ValidPosesDataset(
             position_array=tracks.astype(np.float32),
@@ -502,7 +502,7 @@ def _parse_dlc_csv_to_df(file_path: Path) -> pd.DataFrame:
     file = ValidPosesCSV(file_path)
 
     possible_level_names = ["scorer", "individuals", "bodyparts", "coords"]
-    with open(file.path, "r") as f:
+    with open(file.path) as f:
         # if line starts with a possible level name, split it into a list
         # of strings, and add it to the list of header lines
         header_lines = [
@@ -543,12 +543,9 @@ def _load_df_from_dlc_h5(file_path: Path) -> pd.DataFrame:
     """
 
     file = ValidHDF5(file_path, expected_datasets=["df_with_missing"])
-
-    try:
-        # pd.read_hdf does not always return a DataFrame
-        df = pd.DataFrame(pd.read_hdf(file.path, key="df_with_missing"))
-    except Exception as error:
-        raise log_error(error, f"Could not load a dataframe from {file.path}.")
+    # pd.read_hdf does not always return a DataFrame but we assume it does
+    # in this case (since we know what's in the "df_with_missing" dataset)
+    df = pd.DataFrame(pd.read_hdf(file.path, key="df_with_missing"))
     return df
 
 

diff --git a/movement/io/save_poses.py b/movement/io/save_poses.py
@@ -54,7 +54,7 @@ def _auto_split_individuals(ds: xr.Dataset) -> bool:
     else returns False."""
 
     n_individuals = ds.sizes["individuals"]
-    return True if n_individuals == 1 else False
+    return n_individuals == 1
 
 
 def _save_dlc_df(filepath: Path, df: pd.DataFrame) -> None:

diff --git a/movement/io/validators.py b/movement/io/validators.py
@@ -93,13 +93,12 @@ def file_has_access_permissions(self, attribute, value):
     @path.validator
     def file_has_expected_suffix(self, attribute, value):
         """Ensures that the file has one of the expected suffix(es)."""
-        if self.expected_suffix:  # list is not empty
-            if value.suffix not in self.expected_suffix:
-                raise log_error(
-                    ValueError,
-                    f"Expected file with suffix(es) {self.expected_suffix} "
-                    f"but got suffix {value.suffix} instead.",
-                )
+        if self.expected_suffix and value.suffix not in self.expected_suffix:
+            raise log_error(
+                ValueError,
+                f"Expected file with suffix(es) {self.expected_suffix} "
+                f"but got suffix {value.suffix} instead.",
+            )
 
 
 @define
@@ -175,7 +174,7 @@ def csv_file_contains_expected_levels(self, attribute, value):
         among its top rows."""
         expected_levels = ["scorer", "bodyparts", "coords"]
 
-        with open(value, "r") as f:
+        with open(value) as f:
             top4_row_starts = [f.readline().split(",")[0] for _ in range(4)]
 
             if top4_row_starts[3].isdigit():

diff --git a/movement/logging.py b/movement/logging.py
@@ -6,12 +6,13 @@
     "%(asctime)s - %(levelname)s - "
     "%(processName)s %(filename)s:%(lineno)s - %(message)s"
 )
+DEFAULT_LOG_DIRECTORY = Path.home() / ".movement"
 
 
 def configure_logging(
     log_level: int = logging.DEBUG,
     logger_name: str = "movement",
-    log_directory: Path = Path.home() / ".movement",
+    log_directory: Path = DEFAULT_LOG_DIRECTORY,
 ):
     """Configure the logging module.
     This function sets up a circular log file with a rotating file handler.
@@ -41,8 +42,7 @@ def configure_logging(
     # Logger needs to be (re)configured if unconfigured or
     # if configured but the log file path has changed
     configure_logger = (
-        not logger_configured
-        or log_file != logger.handlers[0].baseFilename  # type: ignore
+        not logger_configured or log_file != logger.handlers[0].baseFilename  # type: ignore
     )
 
     if configure_logger:

diff --git a/movement/sample_data.py b/movement/sample_data.py
@@ -99,7 +99,7 @@ def _fetch_metadata(file_name: str, data_dir: Path = DATA_DIR) -> list[dict]:
         else:
             raise log_error(RequestException, failed_msg) from exc_info
 
-    with open(local_file_path, "r") as metadata_file:
+    with open(local_file_path) as metadata_file:
         metadata = yaml.safe_load(metadata_file)
     return metadata
 
@@ -145,12 +145,12 @@ def fetch_sample_data_path(filename: str) -> Path:
     """
     try:
         return Path(SAMPLE_DATA.fetch(filename, progressbar=True))
-    except ValueError:
+    except ValueError as error:
         raise log_error(
             ValueError,
             f"File '{filename}' is not in the registry. Valid "
             f"filenames are: {list_sample_data()}",
-        )
+        ) from error
 
 
 def fetch_sample_data(

diff --git a/pyproject.toml b/pyproject.toml
@@ -47,7 +47,6 @@ dev = [
   "pytest-cov",
   "coverage",
   "tox",
-  "black",
   "mypy",
   "pre-commit",
   "ruff",
@@ -75,11 +74,6 @@ exclude = ["tests", "docs*"]
 [tool.pytest.ini_options]
 addopts = "--cov=movement"
 
-[tool.black]
-target-version = ['py39', 'py310', 'py311']
-skip-string-normalization = false
-line-length = 79
-
 [tool.setuptools_scm]
 
 [tool.check-manifest]
@@ -100,9 +94,22 @@ ignore_missing_imports = true
 [tool.ruff]
 line-length = 79
 exclude = ["__init__.py", "build", ".eggs"]
-lint.select = ["I", "E", "F"]
+# See https://docs.astral.sh/ruff/rules/
+lint.select = [
+  "E",   # pycodestyle errors
+  "F",   # Pyflakes
+  "UP",  # pyupgrade
+  "I",   # isort
+  "B",   # flake8 bugbear
+  "SIM", # flake8 simplify
+  "C90", # McCabe complexity
+  #"D",  # pydocstyle
+]
 fix = true
 
+[tool.ruff.format]
+docstring-code-format = true # Also format code in docstrings
+
 [tool.cibuildwheel]
 build = "cp39-* cp310-* cp311-*"
 

diff --git a/tests/test_unit/test_logging.py b/tests/test_unit/test_logging.py
@@ -19,7 +19,7 @@ def test_logfile_contains_message(level, message):
     logger = logging.getLogger("movement")
     eval(f"logger.{level.lower()}('{message}')")
     log_file = logger.handlers[0].baseFilename
-    with open(log_file, "r") as f:
+    with open(log_file) as f:
         last_line = f.readlines()[-1]
     assert level in last_line
     assert message in last_line

diff --git a/tests/test_unit/test_save_poses.py b/tests/test_unit/test_save_poses.py
@@ -186,7 +186,7 @@ def test_to_dlc_df_split_individuals(
             # this should produce a dict of dfs in single-animal DLC format
             assert isinstance(df, dict)
             for ind in ind_names:
-                assert ind in df.keys()
+                assert ind in df
                 assert isinstance(df[ind], pd.DataFrame)
                 assert df[ind].columns.names == [
                     "scorer",