[TEST] Add non-regression tests for AIBL that compares the inside of …

…all metadata tsv files (#1424) * WIP 1611 * Remove test todos * Enum as suggested + error message from assert_... upgraded * Remove .AIBL in test... .py * Catch assertionerrors for one conclusion message * Add unit tests for testing tools * Modify unit tests
aramis-lab · Feb 7, 2025 · a89fcff · a89fcff
1 parent 3e294d9
commit a89fcff
Show file tree

Hide file tree

Showing 3 changed files with 205 additions and 10 deletions.
diff --git a/test/nonregression/iotools/test_run_converters.py b/test/nonregression/iotools/test_run_converters.py
@@ -4,7 +4,11 @@
 """
 
 from pathlib import Path
-from test.nonregression.testing_tools import compare_folders, configure_paths
+from test.nonregression.testing_tools import (
+    compare_bids_tsv,
+    compare_folders,
+    configure_paths,
+)
 
 import pytest
 
@@ -31,3 +35,5 @@ def test_converters(cmdopt, tmp_path, study: StudyName):
     )
 
     compare_folders(output_dir, ref_dir / "bids", output_dir)
+    if study == StudyName.AIBL:
+        compare_bids_tsv(output_dir, ref_dir / "bids")
diff --git a/test/nonregression/testing_tools.py b/test/nonregression/testing_tools.py
@@ -1,9 +1,10 @@
 # coding: utf8
 
 import os
+from enum import Enum
 from os import PathLike
 from pathlib import Path
-from typing import Dict, List, Tuple
+from typing import Callable, Dict, Union
 
 import numpy as np
 import pandas as pd
@@ -13,7 +14,7 @@ def configure_paths(
     base_dir: Path,
     tmp_path: Path,
     name: str,
-) -> Tuple[Path, Path, Path]:
+) -> tuple[Path, Path, Path]:
     """Configure paths for tests."""
     input_dir = base_dir / name / "in"
     ref_dir = base_dir / name / "ref"
@@ -26,8 +27,8 @@ def configure_paths(
 def likeliness_measure(
     file1: PathLike,
     file2: PathLike,
-    threshold1: Tuple,
-    threshold2: Tuple,
+    threshold1: tuple,
+    threshold2: tuple,
     display: bool = False,
 ) -> bool:
     """Compares 2 Nifti inputs, with 2 different thresholds.
@@ -245,7 +246,7 @@ def _is_included(sub_ses_tsv_1: PathLike, sub_ses_tsv_2: PathLike) -> bool:
     return True
 
 
-def _sort_subject_field(subjects: List, fields: List) -> List:
+def _sort_subject_field(subjects: list, fields: list) -> list:
     """Helper function for `same_missing_modality_tsv`.
     Returns a sorted list of fields. The list is sorted by corresponding
     subject_id and by field_id if the subject_ids are equal.
@@ -361,8 +362,8 @@ def clean_folder(path: PathLike, recreate: bool = True):
 
 def list_files_with_extensions(
     path_folder: PathLike,
-    extensions_to_keep: Tuple[str, ...],
-) -> List[str]:
+    extensions_to_keep: tuple[str, ...],
+) -> list[str]:
     """List all the files with the provided extensions
     in the path_folder.
 
@@ -387,7 +388,7 @@ def list_files_with_extensions(
 
 def create_list_hashes(
     path_folder: PathLike,
-    extensions_to_keep: Tuple[str, ...] = (".nii.gz", ".tsv", ".json"),
+    extensions_to_keep: tuple[str, ...] = (".nii.gz", ".tsv", ".json"),
 ) -> Dict:
     """Computes a dictionary of files with their corresponding hashes.
 
@@ -483,3 +484,67 @@ def compare_folders_structures(
             if key not in hashes_check:
                 error_message2 += f"{key}'s creation was not expected !\n"
         raise ValueError(error_message1 + error_message2)
+
+
+class Level(str, Enum):
+    PARTICIPANTS = "participants"
+    SESSIONS = "sessions"
+    SCANS = "scans"
+
+
+def _load_participants_tsv(
+    bids_dir: Path,
+    _: Path,
+) -> pd.DataFrame:
+    return pd.read_csv(bids_dir / "participants.tsv", sep="\t").sort_values(
+        by="participant_id", ignore_index=True
+    )
+
+
+def _load_sessions_tsv(bids_dir: Path, ref_tsv: Path) -> pd.DataFrame:
+    return pd.read_csv(
+        bids_dir / ref_tsv.parent.name / ref_tsv.name, sep="\t"
+    ).sort_values(by="session_id", ignore_index=True)
+
+
+def _load_scans_tsv(bids_dir: Path, ref_tsv: Path) -> pd.DataFrame:
+    return pd.read_csv(
+        bids_dir / ref_tsv.parent.parent.name / ref_tsv.parent.name / ref_tsv.name,
+        sep="\t",
+    ).sort_values(by="filename", ignore_index=True)
+
+
+LoaderInterface = Callable[[Path, Path], pd.DataFrame]
+
+
+def _loader_factory(level: Union[str, Level]) -> LoaderInterface:
+    if (level := Level(level)) == Level.PARTICIPANTS:
+        return _load_participants_tsv
+    if level == Level.SESSIONS:
+        return _load_sessions_tsv
+    if level == Level.SCANS:
+        return _load_scans_tsv
+    raise (ValueError, f"TSV metadata file loader not implemented for level {level}.")
+
+
+def _compare_frames(df1: pd.DataFrame, df2: pd.DataFrame, object: str):
+    from pandas.testing import assert_frame_equal
+
+    assert_frame_equal(df1, df2, check_like=True, obj=object)
+
+
+def _iteratively_compare_frames(bids_ref: Path, bids_out: Path, level: Level):
+    loader = _loader_factory(level)
+    for tsv in bids_ref.rglob(f"*{level.value}.tsv"):
+        _compare_frames(loader(bids_out, tsv), loader(bids_ref, tsv), tsv.name)
+
+
+def compare_bids_tsv(bids_out: Path, bids_ref: Path):
+    errors = []
+    for level in Level:
+        try:
+            _iteratively_compare_frames(bids_ref, bids_out, level)
+        except AssertionError as e:
+            errors += [str(e).replace("\n\n", "\n")]
+    if errors:
+        raise AssertionError("\n\n".join(errors))
diff --git a/test/unittests/test_testing_tools.py b/test/unittests/test_testing_tools.py
@@ -1,7 +1,12 @@
 import os
 from os import PathLike
-from pathlib import PurePath
+from pathlib import Path, PurePath
 from test.nonregression.testing_tools import (
+    Level,
+    _load_participants_tsv,
+    _load_scans_tsv,
+    _load_sessions_tsv,
+    compare_bids_tsv,
     compare_folders_structures,
     compare_folders_with_hashes,
     create_list_hashes,
@@ -10,7 +15,9 @@
 
 import nibabel as nib
 import numpy as np
+import pandas as pd
 import pytest
+from pandas.testing import assert_frame_equal
 
 
 def test_likeliness_measure(tmp_path: PurePath):
@@ -221,3 +228,120 @@ def test_compare_folders_structures(
     shutil.rmtree(tmp_path / "sub-02")
     with pytest.raises(ValueError, match="/sub-02/bar.tsv not found !"):
         compare_func(tmp_path, tmp_path / "hashes.pl")
+
+
+def build_bids_tsv(tmp_path: Path) -> Path:
+    bids_path = tmp_path / "BIDS"
+    bids_path.mkdir()
+    prpc = pd.DataFrame({"participant_id": ["sub-002", "sub-001"], "age": [20, 26]})
+    prpc.to_csv(bids_path / "participants.tsv", sep="\t", index=False)
+    sub_path = bids_path / "sub-001"
+    sub_path.mkdir()
+    sess = pd.DataFrame({"session_id": ["ses-M012", "ses-M006"], "age": [20, 20]})
+    sess.to_csv(sub_path / "sub-001_sessions.tsv", sep="\t", index=False)
+    ses_path = sub_path / "ses-M016"
+    ses_path.mkdir()
+    scans = pd.DataFrame(
+        {
+            "filename": ["pet/foo.json", "anat/foo.json"],
+            "acq_time": ["00:00:00", "00:00:00"],
+        }
+    )
+    scans.to_csv(ses_path / "sub-001_ses-M016_scans.tsv", sep="\t", index=False)
+    return bids_path
+
+
+def test_loader_participants(tmp_path):
+    bids_path = build_bids_tsv(tmp_path)
+
+    assert_frame_equal(
+        pd.DataFrame({"participant_id": ["sub-001", "sub-002"], "age": [26, 20]}),
+        _load_participants_tsv(bids_path, Path("")),
+    )
+
+
+def test_loader_sessions(tmp_path):
+    bids_path = build_bids_tsv(tmp_path)
+
+    assert_frame_equal(
+        pd.DataFrame({"session_id": ["ses-M006", "ses-M012"], "age": [20, 20]}),
+        _load_sessions_tsv(bids_path, bids_path / "sub-001" / "sub-001_sessions.tsv"),
+    )
+
+
+def test_loader_scans(tmp_path):
+    bids_path = build_bids_tsv(tmp_path)
+
+    assert_frame_equal(
+        pd.DataFrame(
+            {
+                "filename": ["anat/foo.json", "pet/foo.json"],
+                "acq_time": ["00:00:00", "00:00:00"],
+            }
+        ),
+        _load_scans_tsv(
+            bids_path, bids_path / "sub-001" / "ses-M016" / "sub-001_ses-M016_scans.tsv"
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "level, expected",
+    [
+        ("participants", _load_participants_tsv),
+        (Level.SESSIONS, _load_sessions_tsv),
+        (Level.SCANS, _load_scans_tsv),
+    ],
+)
+def test_loader_factory(level, expected):
+    from test.nonregression.testing_tools import _loader_factory
+
+    assert expected == _loader_factory(level)
+
+
+def test_loader_factory_error():
+    from test.nonregression.testing_tools import _loader_factory
+
+    with pytest.raises(ValueError):
+        _loader_factory("foo")
+
+
+def test_compare_bids_tsv_success(tmp_path):
+    bids_path = build_bids_tsv(tmp_path)
+    compare_bids_tsv(bids_path, bids_path)
+
+
+@pytest.mark.parametrize(
+    "modified_frame, frame_path, error_message",
+    [
+        (
+            pd.DataFrame({"participant_id": ["sub-001"], "age": [26]}),
+            "participants.tsv",
+            "participants.tsv shape mismatch",
+        ),
+        (
+            pd.DataFrame({"session_id": ["ses-M012", "ses-M006"], "age": [20, 25]}),
+            "sub-001/sub-001_sessions.tsv",
+            r"sub-001_sessions.tsv.* values are different",
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "filename": ["pet/foo.nii.gz", "anat/foo.json"],
+                    "acq_time": ["00:00:00", "00:00:00"],
+                }
+            ),
+            "sub-001/ses-M016/sub-001_ses-M016_scans.tsv",
+            r"sub-001_ses-M016_scans.tsv.* values are different",
+        ),
+    ],
+)
+def test_compare_bids_tsv_error(tmp_path, modified_frame, frame_path, error_message):
+    from shutil import copytree
+
+    bids_path = build_bids_tsv(tmp_path)
+    copy = tmp_path / "BIDS_copy"
+    copytree(bids_path, copy)
+    modified_frame.to_csv(copy / frame_path, sep="\t", index=False)
+    with pytest.raises(AssertionError, match=error_message):
+        compare_bids_tsv(bids_path, copy)