neuroinformatics-unit · sfmig · Jun 28, 2024 · Jun 20, 2024 · Jun 20, 2024 · Jun 20, 2024
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -322,17 +322,20 @@ To add a new file, you will need to:
 2. Ask to be added as a collaborator on the [movement data repository](gin:neuroinformatics/movement-test-data) (if not already)
 3. Download the [GIN CLI](gin:G-Node/Info/wiki/GIN+CLI+Setup#quickstart) and set it up with your GIN credentials, by running `gin login` in a terminal.
 4. Clone the movement data repository to your local machine, by running `gin get neuroinformatics/movement-test-data` in a terminal.
-5. Add your new files to the `poses`, `videos`, and/or `frames` folders as appropriate. Follow the existing file naming conventions as closely as possible.
-6. Determine the sha256 checksum hash of each new file by running `sha256sum <filename>` in a terminal. For convenience, we've included a `get_sha256_hashes.py` script in the [movement data repository](gin:neuroinformatics/movement-test-data). If you run this from the root of the data repository, within a Python environment with `movement` installed, it will calculate the sha256 hashes for all files in the `poses`, `videos`, and `frames` folders and write them to files named `poses_hashes.txt`, `videos_hashes.txt`, and `frames_hashes.txt`, respectively.
+5. Add your new files to the `poses`, `videos`, `frames` and/or `bboxes` folders as appropriate. Follow the existing file naming conventions as closely as possible.
+6. Determine the sha256 checksum hash of each new file. You can do this in terminal by running `sha256sum <filename>` in Ubuntu, or `shasum -a 256 <filename>` in MacOS. For convenience, we've included a `get_sha256_hashes.py` script in the [movement data repository](gin:neuroinformatics/movement-test-data). If you run this from the root of the data repository, within a Python environment with `movement` installed, it will calculate the sha256 hashes for all files in the `poses`, `videos`, and `frames` folders and write them to files named `poses_hashes.txt`, `videos_hashes.txt`, and `frames_hashes.txt`, respectively.
 7. Add metadata for your new files to `metadata.yaml`, including their sha256 hashes you've calculated. See the example entry below for guidance.
 8. Commit a specific file with `gin commit -m <message> <filename>`, or `gin commit -m <message> .` to commit all changes.
 9. Upload the committed changes to the GIN repository by running `gin upload`. Latest changes to the repository can be pulled via `gin download`. `gin sync` will synchronise the latest changes bidirectionally.
 
+
+
 ### `metadata.yaml` example entry
 ```yaml
 "SLEAP_three-mice_Aeon_proofread.analysis.h5":
   sha256sum: "82ebd281c406a61536092863bc51d1a5c7c10316275119f7daf01c1ff33eac2a"
   source_software: "SLEAP"
+  type: "poses"  # "poses" or "bboxes" depending on the type of tracked data
   fps: 50
   species: "mouse"
   number_of_individuals: 3

diff --git a/movement/sample_data.py b/movement/sample_data.py
@@ -116,7 +116,8 @@
 def _generate_file_registry(metadata: dict[str, dict]) -> dict[str, str]:
     """Generate a file registry based on the contents of the metadata.
 
-    This includes files containing poses, frames, or entire videos.
+    This includes files containing poses, frames, videos or bounding boxes
+    data.
 
     Parameters
     ----------
@@ -131,7 +132,7 @@
     """
     file_registry = {}
     for ds, val in metadata.items():
-        file_registry[f"poses/{ds}"] = val["sha256sum"]
+        file_registry[f"{val['type']}/{ds}"] = val["sha256sum"]
         for key in ["video", "frame"]:
             file_name = val[key]["file_name"]
             if file_name:
@@ -178,14 +179,15 @@
     -------
     paths : dict
         Dictionary mapping file types to their respective paths. The possible
-        file types are: "poses", "frame", "video". If "frame" or "video" are
-        not available, the corresponding value is None.
+        file types are: "poses", "frame", "video" or "bboxes. If "frame" or
+        "video" are not available, the corresponding value is None.
 
     Examples
     --------
     >>> from movement.sample_data import fetch_dataset_paths
     >>> paths = fetch_dataset_paths("DLC_single-mouse_EPM.predictions.h5")
-    >>> poses_path = paths["poses"]
+    >>> poses_path = paths["poses"]  # if the data is "pose" data
+    >>> bboxes_path = paths["poses"]  # if the data is "bboxes" data
     >>> frame_path = paths["frame"]
     >>> video_path = paths["video"]
 
@@ -205,21 +207,42 @@
     frame_file_name = metadata[filename]["frame"]["file_name"]
     video_file_name = metadata[filename]["video"]["file_name"]
 
-    return {
-        "poses": Path(
-            SAMPLE_DATA.fetch(f"poses/{filename}", progressbar=True)
-        ),
-        "frame": None
+    # add trajectory data
+    # if no type is specified, assume "poses" for backwards compatibility
+    paths_dict: dict[str, Path | None] = {}
+    if ("type" not in metadata[filename]) or (
+        metadata[filename]["type"] == "poses"
+    ):
+        paths_dict = {
+            "poses": Path(
+                SAMPLE_DATA.fetch(f"poses/{filename}", progressbar=True)
+            )
+        }
+    elif metadata[filename]["type"] == "bboxes":
+        paths_dict = {
+            "bboxes": Path(
+                SAMPLE_DATA.fetch(f"bboxes/{filename}", progressbar=True)
+            )
+        }
+
+    # add frame and video data if available
+    paths_dict["frame"] = (
+        None
         if not frame_file_name
         else Path(
             SAMPLE_DATA.fetch(f"frames/{frame_file_name}", progressbar=True)
-        ),
-        "video": None
+        )
+    )
+
+    paths_dict["video"] = (
+        None
         if not video_file_name
         else Path(
             SAMPLE_DATA.fetch(f"videos/{video_file_name}", progressbar=True)
-        ),
-    }
+        )
+    )
+
+    return paths_dict
 
 
 def fetch_dataset(
@@ -257,11 +280,20 @@
     """
     file_paths = fetch_dataset_paths(filename)
 
-    ds = load_poses.from_file(
-        file_paths["poses"],
-        source_software=metadata[filename]["source_software"],
-        fps=metadata[filename]["fps"],
-    )
+    if "poses" in file_paths:
+        ds = load_poses.from_file(
+            file_paths["poses"],
+            source_software=metadata[filename]["source_software"],
+            fps=metadata[filename]["fps"],
+        )
+    elif "bboxes" in file_paths:
+        pass
+        # TO BE IMPLEMENTED IN PR 229: https://github.com/neuroinformatics-unit/movement/pull/229
+        # ds = load_bboxes.from_file(
+        #     file_paths["bboxes"],
+        #     source_software=metadata[filename]["source_software"],
+        #     fps=metadata[filename]["fps"],
+        # )
     ds.attrs["frame_path"] = file_paths["frame"]
     ds.attrs["video_path"] = file_paths["video"]
 

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -20,10 +20,13 @@ def pytest_configure():
     """Perform initial configuration for pytest.
     Fetches pose data file paths as a dictionary for tests.
     """
-    pytest.POSE_DATA_PATHS = {
-        file_name: fetch_dataset_paths(file_name)["poses"]
-        for file_name in list_datasets()
-    }
+    pytest.DATA_PATHS = {}
+    for file_name in list_datasets():
+        paths_dict = fetch_dataset_paths(file_name)
+        if "poses" in paths_dict:
+            pytest.DATA_PATHS[file_name] = paths_dict["poses"]
+        elif "bboxes" in paths_dict:
+            pytest.DATA_PATHS[file_name] = paths_dict["bboxes"]
 
 
 @pytest.fixture(autouse=True)
@@ -194,9 +197,7 @@ def new_csv_file(tmp_path):
 @pytest.fixture
 def dlc_style_df():
     """Return a valid DLC-style DataFrame."""
-    return pd.read_hdf(
-        pytest.POSE_DATA_PATHS.get("DLC_single-wasp.predictions.h5")
-    )
+    return pd.read_hdf(pytest.DATA_PATHS.get("DLC_single-wasp.predictions.h5"))
 
 
 @pytest.fixture(
@@ -211,7 +212,7 @@ def dlc_style_df():
 )
 def sleap_file(request):
     """Return the file path for a SLEAP .h5 or .slp file."""
-    return pytest.POSE_DATA_PATHS.get(request.param)
+    return pytest.DATA_PATHS.get(request.param)
 
 
 @pytest.fixture

diff --git a/tests/test_integration/test_io.py b/tests/test_integration/test_io.py
@@ -2,7 +2,7 @@
 import numpy as np
 import pytest
 import xarray as xr
-from pytest import POSE_DATA_PATHS
+from pytest import DATA_PATHS
 
 from movement.io import load_poses, save_poses
 
@@ -62,7 +62,7 @@ def test_to_sleap_analysis_file_returns_same_h5_file_content(
         file) to a SLEAP-style .h5 analysis file returns the same file
         contents.
         """
-        sleap_h5_file_path = POSE_DATA_PATHS.get(sleap_h5_file)
+        sleap_h5_file_path = DATA_PATHS.get(sleap_h5_file)
         ds = load_poses.from_sleap_file(sleap_h5_file_path, fps=fps)
         save_poses.to_sleap_analysis_file(ds, new_h5_file)
 
@@ -93,7 +93,7 @@ def test_to_sleap_analysis_file_source_file(self, file, new_h5_file):
         to a SLEAP-style .h5 analysis file stores the .slp labels path
         only when the source file is a .slp file.
         """
-        file_path = POSE_DATA_PATHS.get(file)
+        file_path = DATA_PATHS.get(file)
         if file.startswith("DLC"):
             ds = load_poses.from_dlc_file(file_path)
         else:

diff --git a/tests/test_unit/test_load_poses.py b/tests/test_unit/test_load_poses.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pytest
 import xarray as xr
-from pytest import POSE_DATA_PATHS
+from pytest import DATA_PATHS
 from sleap_io.io.slp import read_labels, write_labels
 from sleap_io.model.labels import LabeledFrame, Labels
 
@@ -18,9 +18,7 @@ class TestLoadPoses:
     @pytest.fixture
     def sleap_slp_file_without_tracks(self, tmp_path):
         """Mock and return the path to a SLEAP .slp file without tracks."""
-        sleap_file = POSE_DATA_PATHS.get(
-            "SLEAP_single-mouse_EPM.predictions.slp"
-        )
+        sleap_file = DATA_PATHS.get("SLEAP_single-mouse_EPM.predictions.slp")
         labels = read_labels(sleap_file)
         file_path = tmp_path / "track_is_none.slp"
         lfs = []
@@ -48,7 +46,7 @@ def sleap_slp_file_without_tracks(self, tmp_path):
     @pytest.fixture
     def sleap_h5_file_without_tracks(self, tmp_path):
         """Mock and return the path to a SLEAP .h5 file without tracks."""
-        sleap_file = POSE_DATA_PATHS.get("SLEAP_single-mouse_EPM.analysis.h5")
+        sleap_file = DATA_PATHS.get("SLEAP_single-mouse_EPM.analysis.h5")
         file_path = tmp_path / "track_is_none.h5"
         with h5py.File(sleap_file, "r") as f1, h5py.File(file_path, "w") as f2:
             for key in list(f1.keys()):
@@ -120,7 +118,7 @@ def test_load_from_sleap_file_without_tracks(
             sleap_file_without_tracks
         )
         ds_from_tracked = load_poses.from_sleap_file(
-            POSE_DATA_PATHS.get("SLEAP_single-mouse_EPM.analysis.h5")
+            DATA_PATHS.get("SLEAP_single-mouse_EPM.analysis.h5")
         )
         # Check if the "individuals" coordinate matches
         # the assigned default "individuals_0"
@@ -153,8 +151,8 @@ def test_load_from_sleap_slp_file_or_h5_file_returns_same(
         """Test that loading pose tracks from SLEAP .slp and .h5 files
         return the same Dataset.
         """
-        slp_file_path = POSE_DATA_PATHS.get(slp_file)
-        h5_file_path = POSE_DATA_PATHS.get(h5_file)
+        slp_file_path = DATA_PATHS.get(slp_file)
+        h5_file_path = DATA_PATHS.get(h5_file)
         ds_from_slp = load_poses.from_sleap_file(slp_file_path)
         ds_from_h5 = load_poses.from_sleap_file(h5_file_path)
         xr.testing.assert_allclose(ds_from_h5, ds_from_slp)
@@ -171,7 +169,7 @@ def test_load_from_dlc_file(self, file_name):
         """Test that loading pose tracks from valid DLC files
         returns a proper Dataset.
         """
-        file_path = POSE_DATA_PATHS.get(file_name)
+        file_path = DATA_PATHS.get(file_name)
         ds = load_poses.from_dlc_file(file_path)
         self.assert_dataset(ds, file_path, "DeepLabCut")
 
@@ -191,8 +189,8 @@ def test_load_from_dlc_file_csv_or_h5_file_returns_same(self):
         """Test that loading pose tracks from DLC .csv and .h5 files
         return the same Dataset.
         """
-        csv_file_path = POSE_DATA_PATHS.get("DLC_single-wasp.predictions.csv")
-        h5_file_path = POSE_DATA_PATHS.get("DLC_single-wasp.predictions.h5")
+        csv_file_path = DATA_PATHS.get("DLC_single-wasp.predictions.csv")
+        h5_file_path = DATA_PATHS.get("DLC_single-wasp.predictions.h5")
         ds_from_csv = load_poses.from_dlc_file(csv_file_path)
         ds_from_h5 = load_poses.from_dlc_file(h5_file_path)
         xr.testing.assert_allclose(ds_from_h5, ds_from_csv)
@@ -210,7 +208,7 @@ def test_load_from_dlc_file_csv_or_h5_file_returns_same(self):
     def test_fps_and_time_coords(self, fps, expected_fps, expected_time_unit):
         """Test that time coordinates are set according to the provided fps."""
         ds = load_poses.from_sleap_file(
-            POSE_DATA_PATHS.get("SLEAP_three-mice_Aeon_proofread.analysis.h5"),
+            DATA_PATHS.get("SLEAP_three-mice_Aeon_proofread.analysis.h5"),
             fps=fps,
         )
         assert ds.time_unit == expected_time_unit
@@ -234,7 +232,7 @@ def test_load_from_lp_file(self, file_name):
         """Test that loading pose tracks from valid LightningPose (LP) files
         returns a proper Dataset.
         """
-        file_path = POSE_DATA_PATHS.get(file_name)
+        file_path = DATA_PATHS.get(file_name)
         ds = load_poses.from_lp_file(file_path)
         self.assert_dataset(ds, file_path, "LightningPose")
 
@@ -243,7 +241,7 @@ def test_load_from_lp_or_dlc_file_returns_same(self):
         using either the `from_lp_file` or `from_dlc_file` function
         returns the same Dataset (except for the source_software).
         """
-        file_path = POSE_DATA_PATHS.get("LP_mouse-face_AIND.predictions.csv")
+        file_path = DATA_PATHS.get("LP_mouse-face_AIND.predictions.csv")
         ds_drom_lp = load_poses.from_lp_file(file_path)
         ds_from_dlc = load_poses.from_dlc_file(file_path)
         xr.testing.assert_allclose(ds_from_dlc, ds_drom_lp)
@@ -254,7 +252,7 @@ def test_load_multi_individual_from_lp_file_raises(self):
         """Test that loading a multi-individual .csv file using the
         `from_lp_file` function raises a ValueError.
         """
-        file_path = POSE_DATA_PATHS.get("DLC_two-mice.predictions.csv")
+        file_path = DATA_PATHS.get("DLC_two-mice.predictions.csv")
         with pytest.raises(ValueError):
             load_poses.from_lp_file(file_path)
 

diff --git a/tests/test_unit/test_sample_data.py b/tests/test_unit/test_sample_data.py
@@ -38,6 +38,7 @@ def validate_metadata(metadata: dict[str, dict]) -> None:
     """Assert that the metadata is in the expected format."""
     metadata_fields = [
         "sha256sum",
+        "type",
         "source_software",
         "fps",
         "species",
@@ -61,7 +62,7 @@ def validate_metadata(metadata: dict[str, dict]) -> None:
         set(val.keys()) == set(metadata_fields) for val in metadata.values()
     ), f"Found issues with the names of medatada fields. {check_yaml_msg}"
 
-    # check that metadata keys (pose file names) are unique
+    # check that metadata keys (file names) are unique
     assert len(metadata.keys()) == len(set(metadata.keys()))
 
     # check that the first 2 fields are present and are strings

diff --git a/tests/test_unit/test_save_poses.py b/tests/test_unit/test_save_poses.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import pytest
 import xarray as xr
-from pytest import POSE_DATA_PATHS
+from pytest import DATA_PATHS
 
 from movement.io import load_poses, save_poses
 
@@ -66,33 +66,33 @@ def output_file_params(self, request):
             (np.array([1, 2, 3]), pytest.raises(ValueError)),  # incorrect type
             (
                 load_poses.from_dlc_file(
-                    POSE_DATA_PATHS.get("DLC_single-wasp.predictions.h5")
+                    DATA_PATHS.get("DLC_single-wasp.predictions.h5")
                 ),
                 does_not_raise(),
             ),  # valid dataset
             (
                 load_poses.from_dlc_file(
-                    POSE_DATA_PATHS.get("DLC_two-mice.predictions.csv")
+                    DATA_PATHS.get("DLC_two-mice.predictions.csv")
                 ),
                 does_not_raise(),
             ),  # valid dataset
             (
                 load_poses.from_sleap_file(
-                    POSE_DATA_PATHS.get("SLEAP_single-mouse_EPM.analysis.h5")
+                    DATA_PATHS.get("SLEAP_single-mouse_EPM.analysis.h5")
                 ),
                 does_not_raise(),
             ),  # valid dataset
             (
                 load_poses.from_sleap_file(
-                    POSE_DATA_PATHS.get(
+                    DATA_PATHS.get(
                         "SLEAP_three-mice_Aeon_proofread.predictions.slp"
                     )
                 ),
                 does_not_raise(),
             ),  # valid dataset
             (
                 load_poses.from_lp_file(
-                    POSE_DATA_PATHS.get("LP_mouse-face_AIND.predictions.csv")
+                    DATA_PATHS.get("LP_mouse-face_AIND.predictions.csv")
                 ),
                 does_not_raise(),
             ),  # valid dataset