From bab7ff52f075f2bd22bba5f6249ec232f5db3cca Mon Sep 17 00:00:00 2001 From: mzouink Date: Thu, 9 Jan 2025 14:50:57 -0500 Subject: [PATCH 1/8] remove fibsem: --- .../datasplits/datasets/arrays/__init__.py | 1 - .../arrays/missing_annotations_mask_config.py | 62 ------------------- pyproject.toml | 2 - 3 files changed, 65 deletions(-) delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py diff --git a/dacapo/experiments/datasplits/datasets/arrays/__init__.py b/dacapo/experiments/datasplits/datasets/arrays/__init__.py index d8e6d6d7b..7b16453b3 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/__init__.py +++ b/dacapo/experiments/datasplits/datasets/arrays/__init__.py @@ -6,7 +6,6 @@ from .binarize_array_config import BinarizeArrayConfig # noqa from .resampled_array_config import ResampledArrayConfig # noqa from .intensity_array_config import IntensitiesArrayConfig # noqa -from .missing_annotations_mask_config import MissingAnnotationsMaskConfig # noqa from .ones_array_config import OnesArrayConfig # noqa from .concat_array_config import ConcatArrayConfig # noqa from .logical_or_array_config import LogicalOrArrayConfig # noqa diff --git a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py deleted file mode 100644 index ba805b645..000000000 --- a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py +++ /dev/null @@ -1,62 +0,0 @@ -import attr - -from .array_config import ArrayConfig - -from typing import List, Tuple -from funlib.persistence import Array -from fibsem_tools.metadata.groundtruth import LabelList - -import dask.array as da - - -@attr.s -class MissingAnnotationsMaskConfig(ArrayConfig): - """ - This config class provides the necessary configuration for turning an Annotated dataset into a - multi class binary classification problem - - Attributes: - source_array_config : ArrayConfig - The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension - groupings : List[Tuple[str, List[int]]] - List of id groups with a symantic name. Each id group is a List of ids. - Group i found in groupings[i] will be binarized and placed in channel i. - Note: - The output array will have a channel dimension equal to the number of groups. - Each channel will be a binary mask of the ids in the groupings list. - """ - - source_array_config: ArrayConfig = attr.ib( - metadata={ - "help_text": "The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension" - } - ) - - groupings: List[Tuple[str, List[int]]] = attr.ib( - metadata={ - "help_text": "List of id groups with a symantic name. Each id group is a List of ids. " - "Group i found in groupings[i] will be binarized and placed in channel i." - } - ) - - def array(self, mode: str = "r") -> Array: - labels = self.source_array_config.array(mode) - grouped = da.ones((len(self.groupings), *labels.shape), dtype=bool) - grouped[:] = labels.data > 0 - labels_list = LabelList.parse_obj( - {"labels": labels._source_data.attrs["labels"]} - ).labels - present_not_annotated = set( - [ - label.value - for label in labels_list - if label.annotationState.present and not label.annotationState.annotated - ] - ) - for i, (_, ids) in enumerate(self.groupings): - if any([id in present_not_annotated for id in ids]): - grouped[i] = 0 - - return Array( - grouped, labels.offset, labels.voxel_size, labels.axis_names, labels.units - ) diff --git a/pyproject.toml b/pyproject.toml index 9070e4b37..cd65da233 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,6 @@ dependencies = [ "lazy-property", "neuroglancer", "torch", - "fibsem_tools >= 6.0, <=6.3", "attrs", "bokeh", "numpy-indexed>=0.3.7", @@ -183,7 +182,6 @@ module = [ "daisy.*", "lazy_property.*", "skimage.*", - "fibsem_tools.*", "neuroglancer.*", "tqdm.*", "zarr.*", From 0ad972d427655ca54f7e8f8f36588401e7b085cd Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Sun, 19 Jan 2025 13:33:55 -0500 Subject: [PATCH 2/8] retreive mask --- .../datasplits/datasets/arrays/__init__.py | 1 + .../arrays/missing_annotations_mask_config.py | 62 +++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py diff --git a/dacapo/experiments/datasplits/datasets/arrays/__init__.py b/dacapo/experiments/datasplits/datasets/arrays/__init__.py index 7b16453b3..d8e6d6d7b 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/__init__.py +++ b/dacapo/experiments/datasplits/datasets/arrays/__init__.py @@ -6,6 +6,7 @@ from .binarize_array_config import BinarizeArrayConfig # noqa from .resampled_array_config import ResampledArrayConfig # noqa from .intensity_array_config import IntensitiesArrayConfig # noqa +from .missing_annotations_mask_config import MissingAnnotationsMaskConfig # noqa from .ones_array_config import OnesArrayConfig # noqa from .concat_array_config import ConcatArrayConfig # noqa from .logical_or_array_config import LogicalOrArrayConfig # noqa diff --git a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py new file mode 100644 index 000000000..92539aa16 --- /dev/null +++ b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py @@ -0,0 +1,62 @@ +import attr + +from .array_config import ArrayConfig + +from typing import List, Tuple +from funlib.persistence import Array +from cellmap_schemas.annotation import LabelList + +import dask.array as da + + +@attr.s +class MissingAnnotationsMaskConfig(ArrayConfig): + """ + This config class provides the necessary configuration for turning an Annotated dataset into a + multi class binary classification problem + + Attributes: + source_array_config : ArrayConfig + The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension + groupings : List[Tuple[str, List[int]]] + List of id groups with a symantic name. Each id group is a List of ids. + Group i found in groupings[i] will be binarized and placed in channel i. + Note: + The output array will have a channel dimension equal to the number of groups. + Each channel will be a binary mask of the ids in the groupings list. + """ + + source_array_config: ArrayConfig = attr.ib( + metadata={ + "help_text": "The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension" + } + ) + + groupings: List[Tuple[str, List[int]]] = attr.ib( + metadata={ + "help_text": "List of id groups with a symantic name. Each id group is a List of ids. " + "Group i found in groupings[i] will be binarized and placed in channel i." + } + ) + + def array(self, mode: str = "r") -> Array: + labels = self.source_array_config.array(mode) + grouped = da.ones((len(self.groupings), *labels.shape), dtype=bool) + grouped[:] = labels.data > 0 + labels_list = LabelList.parse_obj( + {"labels": labels._source_data.attrs["labels"]} + ).labels + present_not_annotated = set( + [ + label.value + for label in labels_list + if label.annotationState.present and not label.annotationState.annotated + ] + ) + for i, (_, ids) in enumerate(self.groupings): + if any([id in present_not_annotated for id in ids]): + grouped[i] = 0 + + return Array( + grouped, labels.offset, labels.voxel_size, labels.axis_names, labels.units + ) From 7b810450fccb7393b42c2021437f98c6055b8d29 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Thu, 23 Jan 2025 13:50:49 -0500 Subject: [PATCH 3/8] fix dependeny, mypy, black --- .../datasplits/datasets/arrays/binarize_array_config.py | 8 +++++--- pyproject.toml | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py index 4df39abb4..dbbb9102f 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py @@ -56,9 +56,11 @@ def array(self, mode="r") -> Array: def group_array(data): groups = [ - da.isin(data, group_ids) - if len(group_ids) > 0 - else data != self.background + ( + da.isin(data, group_ids) + if len(group_ids) > 0 + else data != self.background + ) for _, group_ids in self.groupings ] out = da.stack(groups, axis=0) diff --git a/pyproject.toml b/pyproject.toml index cd65da233..3c145fc0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ dependencies = [ "boto3", "matplotlib", "xarray-multiscale", + "cellmap-schemas", ] # extras @@ -200,7 +201,8 @@ module = [ "napari.*", "empanada.*", "IPython.*", - "xarray_multiscale.*" + "xarray_multiscale.*", + "cellmap_schemas.*", ] ignore_missing_imports = true From 09a39186ddb099761960b877a65d50413432b380 Mon Sep 17 00:00:00 2001 From: mzouink Date: Thu, 23 Jan 2025 19:03:22 +0000 Subject: [PATCH 4/8] :art: Format Python code with psf/black --- tests/operations/test_array.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/operations/test_array.py b/tests/operations/test_array.py index a6656ff44..ff8bca191 100644 --- a/tests/operations/test_array.py +++ b/tests/operations/test_array.py @@ -19,10 +19,21 @@ def test_array(array_config): array = array_config.array() - assert array.offset == (12, 12, 12), f"offset is not correct, expected (12, 12, 12), got {array.offset}" - assert array.voxel_size == (1, 2, 4), f"resolution is not correct, expected (1, 2, 4), got {array.voxel_size}" - assert array.axis_names == ["z", "y", "x"], f"axis names are not correct, expected ['z', 'y', 'x'], got {array.axis_names}" + assert array.offset == ( + 12, + 12, + 12, + ), f"offset is not correct, expected (12, 12, 12), got {array.offset}" + assert array.voxel_size == ( + 1, + 2, + 4, + ), f"resolution is not correct, expected (1, 2, 4), got {array.voxel_size}" + assert array.axis_names == [ + "z", + "y", + "x", + ], f"axis names are not correct, expected ['z', 'y', 'x'], got {array.axis_names}" # offset = array.attrs["offset"] # resolution = array.attrs["resolution"] - From 72f894ba133e69113e7d330efd2fc49e7b391025 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Thu, 23 Jan 2025 14:18:05 -0500 Subject: [PATCH 5/8] Update pyproject.toml --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fcb1ed4b1..132dff3c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -200,7 +200,6 @@ module = [ "napari.*", "empanada.*", "IPython.*", - "xarray_multiscale.*", "cellmap_schemas.*", ] ignore_missing_imports = true From 302f4c4c4308f29b2daf96c17602af0fda39aa7a Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Tue, 18 Feb 2025 13:44:52 -0500 Subject: [PATCH 6/8] add dep --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 132dff3c1..8db109552 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,8 @@ dependencies = [ "boto3", "matplotlib", "cellmap-schemas", - "funlib.persistence @ git+https://github.com/funkelab/funlib.persistence.git@ome-ngff" + "funlib.persistence @ git+https://github.com/funkelab/funlib.persistence.git@ome-ngff", + "xarray-multiscale", ] # extras From 5fb69a0967201ea9e0e65e563106e9366592a905 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Tue, 18 Feb 2025 13:47:40 -0500 Subject: [PATCH 7/8] fix boto3 version for faster pypi --- pyproject.toml | 2 +- remove_all.py | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 remove_all.py diff --git a/pyproject.toml b/pyproject.toml index 8db109552..5dfb3356e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ dependencies = [ "pyyaml", "scipy", "upath", - "boto3", + "boto3==1.35.81", "matplotlib", "cellmap-schemas", "funlib.persistence @ git+https://github.com/funkelab/funlib.persistence.git@ome-ngff", diff --git a/remove_all.py b/remove_all.py new file mode 100644 index 000000000..baa01286d --- /dev/null +++ b/remove_all.py @@ -0,0 +1,26 @@ +import os +import re + +def remove_docstrings_from_file(file_path): + with open(file_path, 'r') as file: + content = file.read() + + # Pattern to match single-line and multi-line docstrings + docstring_pattern = r'(""".*?"""|\'\'\'.*?\'\'\')' + updated_content = re.sub(docstring_pattern, '', content, flags=re.DOTALL) + + with open(file_path, 'w') as file: + file.write(updated_content) + +def process_directory(directory_path): + for root, _, files in os.walk(directory_path): + for file in files: + if file.endswith('.py'): + file_path = os.path.join(root, file) + print(f"Removing docstrings from {file_path}") + remove_docstrings_from_file(file_path) + +if __name__ == "__main__": + directory_path = input("Enter the path to the directory: ") + process_directory(directory_path) + From f5b4206a2a25722b8ce99e839a02c369989d8dfb Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Tue, 18 Feb 2025 13:48:20 -0500 Subject: [PATCH 8/8] remove tmp --- remove_all.py | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 remove_all.py diff --git a/remove_all.py b/remove_all.py deleted file mode 100644 index baa01286d..000000000 --- a/remove_all.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -import re - -def remove_docstrings_from_file(file_path): - with open(file_path, 'r') as file: - content = file.read() - - # Pattern to match single-line and multi-line docstrings - docstring_pattern = r'(""".*?"""|\'\'\'.*?\'\'\')' - updated_content = re.sub(docstring_pattern, '', content, flags=re.DOTALL) - - with open(file_path, 'w') as file: - file.write(updated_content) - -def process_directory(directory_path): - for root, _, files in os.walk(directory_path): - for file in files: - if file.endswith('.py'): - file_path = os.path.join(root, file) - print(f"Removing docstrings from {file_path}") - remove_docstrings_from_file(file_path) - -if __name__ == "__main__": - directory_path = input("Enter the path to the directory: ") - process_directory(directory_path) -