Skip to content

Commit

Permalink
[TEST] Add non-regression tests for AIBL that compares the inside of …
Browse files Browse the repository at this point in the history
…all metadata tsv files (#1424)

* WIP 1611

* Remove test todos

* Enum as suggested + error message from assert_... upgraded

* Remove .AIBL in test... .py

* Catch assertionerrors for one conclusion message

* Add unit tests for testing tools

* Modify unit tests
  • Loading branch information
AliceJoubert authored Feb 7, 2025
1 parent 3e294d9 commit a89fcff
Show file tree
Hide file tree
Showing 3 changed files with 205 additions and 10 deletions.
8 changes: 7 additions & 1 deletion test/nonregression/iotools/test_run_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
"""

from pathlib import Path
from test.nonregression.testing_tools import compare_folders, configure_paths
from test.nonregression.testing_tools import (
compare_bids_tsv,
compare_folders,
configure_paths,
)

import pytest

Expand All @@ -31,3 +35,5 @@ def test_converters(cmdopt, tmp_path, study: StudyName):
)

compare_folders(output_dir, ref_dir / "bids", output_dir)
if study == StudyName.AIBL:
compare_bids_tsv(output_dir, ref_dir / "bids")
81 changes: 73 additions & 8 deletions test/nonregression/testing_tools.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# coding: utf8

import os
from enum import Enum
from os import PathLike
from pathlib import Path
from typing import Dict, List, Tuple
from typing import Callable, Dict, Union

import numpy as np
import pandas as pd
Expand All @@ -13,7 +14,7 @@ def configure_paths(
base_dir: Path,
tmp_path: Path,
name: str,
) -> Tuple[Path, Path, Path]:
) -> tuple[Path, Path, Path]:
"""Configure paths for tests."""
input_dir = base_dir / name / "in"
ref_dir = base_dir / name / "ref"
Expand All @@ -26,8 +27,8 @@ def configure_paths(
def likeliness_measure(
file1: PathLike,
file2: PathLike,
threshold1: Tuple,
threshold2: Tuple,
threshold1: tuple,
threshold2: tuple,
display: bool = False,
) -> bool:
"""Compares 2 Nifti inputs, with 2 different thresholds.
Expand Down Expand Up @@ -245,7 +246,7 @@ def _is_included(sub_ses_tsv_1: PathLike, sub_ses_tsv_2: PathLike) -> bool:
return True


def _sort_subject_field(subjects: List, fields: List) -> List:
def _sort_subject_field(subjects: list, fields: list) -> list:
"""Helper function for `same_missing_modality_tsv`.
Returns a sorted list of fields. The list is sorted by corresponding
subject_id and by field_id if the subject_ids are equal.
Expand Down Expand Up @@ -361,8 +362,8 @@ def clean_folder(path: PathLike, recreate: bool = True):

def list_files_with_extensions(
path_folder: PathLike,
extensions_to_keep: Tuple[str, ...],
) -> List[str]:
extensions_to_keep: tuple[str, ...],
) -> list[str]:
"""List all the files with the provided extensions
in the path_folder.
Expand All @@ -387,7 +388,7 @@ def list_files_with_extensions(

def create_list_hashes(
path_folder: PathLike,
extensions_to_keep: Tuple[str, ...] = (".nii.gz", ".tsv", ".json"),
extensions_to_keep: tuple[str, ...] = (".nii.gz", ".tsv", ".json"),
) -> Dict:
"""Computes a dictionary of files with their corresponding hashes.
Expand Down Expand Up @@ -483,3 +484,67 @@ def compare_folders_structures(
if key not in hashes_check:
error_message2 += f"{key}'s creation was not expected !\n"
raise ValueError(error_message1 + error_message2)


class Level(str, Enum):
PARTICIPANTS = "participants"
SESSIONS = "sessions"
SCANS = "scans"


def _load_participants_tsv(
bids_dir: Path,
_: Path,
) -> pd.DataFrame:
return pd.read_csv(bids_dir / "participants.tsv", sep="\t").sort_values(
by="participant_id", ignore_index=True
)


def _load_sessions_tsv(bids_dir: Path, ref_tsv: Path) -> pd.DataFrame:
return pd.read_csv(
bids_dir / ref_tsv.parent.name / ref_tsv.name, sep="\t"
).sort_values(by="session_id", ignore_index=True)


def _load_scans_tsv(bids_dir: Path, ref_tsv: Path) -> pd.DataFrame:
return pd.read_csv(
bids_dir / ref_tsv.parent.parent.name / ref_tsv.parent.name / ref_tsv.name,
sep="\t",
).sort_values(by="filename", ignore_index=True)


LoaderInterface = Callable[[Path, Path], pd.DataFrame]


def _loader_factory(level: Union[str, Level]) -> LoaderInterface:
if (level := Level(level)) == Level.PARTICIPANTS:
return _load_participants_tsv
if level == Level.SESSIONS:
return _load_sessions_tsv
if level == Level.SCANS:
return _load_scans_tsv
raise (ValueError, f"TSV metadata file loader not implemented for level {level}.")


def _compare_frames(df1: pd.DataFrame, df2: pd.DataFrame, object: str):
from pandas.testing import assert_frame_equal

assert_frame_equal(df1, df2, check_like=True, obj=object)


def _iteratively_compare_frames(bids_ref: Path, bids_out: Path, level: Level):
loader = _loader_factory(level)
for tsv in bids_ref.rglob(f"*{level.value}.tsv"):
_compare_frames(loader(bids_out, tsv), loader(bids_ref, tsv), tsv.name)


def compare_bids_tsv(bids_out: Path, bids_ref: Path):
errors = []
for level in Level:
try:
_iteratively_compare_frames(bids_ref, bids_out, level)
except AssertionError as e:
errors += [str(e).replace("\n\n", "\n")]
if errors:
raise AssertionError("\n\n".join(errors))
126 changes: 125 additions & 1 deletion test/unittests/test_testing_tools.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import os
from os import PathLike
from pathlib import PurePath
from pathlib import Path, PurePath
from test.nonregression.testing_tools import (
Level,
_load_participants_tsv,
_load_scans_tsv,
_load_sessions_tsv,
compare_bids_tsv,
compare_folders_structures,
compare_folders_with_hashes,
create_list_hashes,
Expand All @@ -10,7 +15,9 @@

import nibabel as nib
import numpy as np
import pandas as pd
import pytest
from pandas.testing import assert_frame_equal


def test_likeliness_measure(tmp_path: PurePath):
Expand Down Expand Up @@ -221,3 +228,120 @@ def test_compare_folders_structures(
shutil.rmtree(tmp_path / "sub-02")
with pytest.raises(ValueError, match="/sub-02/bar.tsv not found !"):
compare_func(tmp_path, tmp_path / "hashes.pl")


def build_bids_tsv(tmp_path: Path) -> Path:
bids_path = tmp_path / "BIDS"
bids_path.mkdir()
prpc = pd.DataFrame({"participant_id": ["sub-002", "sub-001"], "age": [20, 26]})
prpc.to_csv(bids_path / "participants.tsv", sep="\t", index=False)
sub_path = bids_path / "sub-001"
sub_path.mkdir()
sess = pd.DataFrame({"session_id": ["ses-M012", "ses-M006"], "age": [20, 20]})
sess.to_csv(sub_path / "sub-001_sessions.tsv", sep="\t", index=False)
ses_path = sub_path / "ses-M016"
ses_path.mkdir()
scans = pd.DataFrame(
{
"filename": ["pet/foo.json", "anat/foo.json"],
"acq_time": ["00:00:00", "00:00:00"],
}
)
scans.to_csv(ses_path / "sub-001_ses-M016_scans.tsv", sep="\t", index=False)
return bids_path


def test_loader_participants(tmp_path):
bids_path = build_bids_tsv(tmp_path)

assert_frame_equal(
pd.DataFrame({"participant_id": ["sub-001", "sub-002"], "age": [26, 20]}),
_load_participants_tsv(bids_path, Path("")),
)


def test_loader_sessions(tmp_path):
bids_path = build_bids_tsv(tmp_path)

assert_frame_equal(
pd.DataFrame({"session_id": ["ses-M006", "ses-M012"], "age": [20, 20]}),
_load_sessions_tsv(bids_path, bids_path / "sub-001" / "sub-001_sessions.tsv"),
)


def test_loader_scans(tmp_path):
bids_path = build_bids_tsv(tmp_path)

assert_frame_equal(
pd.DataFrame(
{
"filename": ["anat/foo.json", "pet/foo.json"],
"acq_time": ["00:00:00", "00:00:00"],
}
),
_load_scans_tsv(
bids_path, bids_path / "sub-001" / "ses-M016" / "sub-001_ses-M016_scans.tsv"
),
)


@pytest.mark.parametrize(
"level, expected",
[
("participants", _load_participants_tsv),
(Level.SESSIONS, _load_sessions_tsv),
(Level.SCANS, _load_scans_tsv),
],
)
def test_loader_factory(level, expected):
from test.nonregression.testing_tools import _loader_factory

assert expected == _loader_factory(level)


def test_loader_factory_error():
from test.nonregression.testing_tools import _loader_factory

with pytest.raises(ValueError):
_loader_factory("foo")


def test_compare_bids_tsv_success(tmp_path):
bids_path = build_bids_tsv(tmp_path)
compare_bids_tsv(bids_path, bids_path)


@pytest.mark.parametrize(
"modified_frame, frame_path, error_message",
[
(
pd.DataFrame({"participant_id": ["sub-001"], "age": [26]}),
"participants.tsv",
"participants.tsv shape mismatch",
),
(
pd.DataFrame({"session_id": ["ses-M012", "ses-M006"], "age": [20, 25]}),
"sub-001/sub-001_sessions.tsv",
r"sub-001_sessions.tsv.* values are different",
),
(
pd.DataFrame(
{
"filename": ["pet/foo.nii.gz", "anat/foo.json"],
"acq_time": ["00:00:00", "00:00:00"],
}
),
"sub-001/ses-M016/sub-001_ses-M016_scans.tsv",
r"sub-001_ses-M016_scans.tsv.* values are different",
),
],
)
def test_compare_bids_tsv_error(tmp_path, modified_frame, frame_path, error_message):
from shutil import copytree

bids_path = build_bids_tsv(tmp_path)
copy = tmp_path / "BIDS_copy"
copytree(bids_path, copy)
modified_frame.to_csv(copy / frame_path, sep="\t", index=False)
with pytest.raises(AssertionError, match=error_message):
compare_bids_tsv(bids_path, copy)

0 comments on commit a89fcff

Please sign in to comment.