From bab7ff52f075f2bd22bba5f6249ec232f5db3cca Mon Sep 17 00:00:00 2001
From: mzouink <zouinkhi.marwan@gmail.com>
Date: Thu, 9 Jan 2025 14:50:57 -0500
Subject: [PATCH 1/8] remove fibsem:

---
 .../datasplits/datasets/arrays/__init__.py    |  1 -
 .../arrays/missing_annotations_mask_config.py | 62 -------------------
 pyproject.toml                                |  2 -
 3 files changed, 65 deletions(-)
 delete mode 100644 dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py

diff --git a/dacapo/experiments/datasplits/datasets/arrays/__init__.py b/dacapo/experiments/datasplits/datasets/arrays/__init__.py
index d8e6d6d7b..7b16453b3 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/__init__.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/__init__.py
@@ -6,7 +6,6 @@
 from .binarize_array_config import BinarizeArrayConfig  # noqa
 from .resampled_array_config import ResampledArrayConfig  # noqa
 from .intensity_array_config import IntensitiesArrayConfig  # noqa
-from .missing_annotations_mask_config import MissingAnnotationsMaskConfig  # noqa
 from .ones_array_config import OnesArrayConfig  # noqa
 from .concat_array_config import ConcatArrayConfig  # noqa
 from .logical_or_array_config import LogicalOrArrayConfig  # noqa
diff --git a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py
deleted file mode 100644
index ba805b645..000000000
--- a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import attr
-
-from .array_config import ArrayConfig
-
-from typing import List, Tuple
-from funlib.persistence import Array
-from fibsem_tools.metadata.groundtruth import LabelList
-
-import dask.array as da
-
-
-@attr.s
-class MissingAnnotationsMaskConfig(ArrayConfig):
-    """
-    This config class provides the necessary configuration for turning an Annotated dataset into a
-    multi class binary classification problem
-
-    Attributes:
-        source_array_config : ArrayConfig
-            The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension
-        groupings : List[Tuple[str, List[int]]]
-            List of id groups with a symantic name. Each id group is a List of ids.
-            Group i found in groupings[i] will be binarized and placed in channel i.
-    Note:
-        The output array will have a channel dimension equal to the number of groups.
-        Each channel will be a binary mask of the ids in the groupings list.
-    """
-
-    source_array_config: ArrayConfig = attr.ib(
-        metadata={
-            "help_text": "The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension"
-        }
-    )
-
-    groupings: List[Tuple[str, List[int]]] = attr.ib(
-        metadata={
-            "help_text": "List of id groups with a symantic name. Each id group is a List of ids. "
-            "Group i found in groupings[i] will be binarized and placed in channel i."
-        }
-    )
-
-    def array(self, mode: str = "r") -> Array:
-        labels = self.source_array_config.array(mode)
-        grouped = da.ones((len(self.groupings), *labels.shape), dtype=bool)
-        grouped[:] = labels.data > 0
-        labels_list = LabelList.parse_obj(
-            {"labels": labels._source_data.attrs["labels"]}
-        ).labels
-        present_not_annotated = set(
-            [
-                label.value
-                for label in labels_list
-                if label.annotationState.present and not label.annotationState.annotated
-            ]
-        )
-        for i, (_, ids) in enumerate(self.groupings):
-            if any([id in present_not_annotated for id in ids]):
-                grouped[i] = 0
-
-        return Array(
-            grouped, labels.offset, labels.voxel_size, labels.axis_names, labels.units
-        )
diff --git a/pyproject.toml b/pyproject.toml
index 9070e4b37..cd65da233 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,7 +38,6 @@ dependencies = [
         "lazy-property",
         "neuroglancer",
         "torch",
-        "fibsem_tools >= 6.0, <=6.3",
         "attrs",
         "bokeh",
         "numpy-indexed>=0.3.7",
@@ -183,7 +182,6 @@ module = [
     "daisy.*",
     "lazy_property.*",
     "skimage.*",
-    "fibsem_tools.*",
     "neuroglancer.*",
     "tqdm.*",
     "zarr.*",

From 0ad972d427655ca54f7e8f8f36588401e7b085cd Mon Sep 17 00:00:00 2001
From: Marwan Zouinkhi <zouinkhi.marwan@gmail.com>
Date: Sun, 19 Jan 2025 13:33:55 -0500
Subject: [PATCH 2/8] retreive mask

---
 .../datasplits/datasets/arrays/__init__.py    |  1 +
 .../arrays/missing_annotations_mask_config.py | 62 +++++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py

diff --git a/dacapo/experiments/datasplits/datasets/arrays/__init__.py b/dacapo/experiments/datasplits/datasets/arrays/__init__.py
index 7b16453b3..d8e6d6d7b 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/__init__.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/__init__.py
@@ -6,6 +6,7 @@
 from .binarize_array_config import BinarizeArrayConfig  # noqa
 from .resampled_array_config import ResampledArrayConfig  # noqa
 from .intensity_array_config import IntensitiesArrayConfig  # noqa
+from .missing_annotations_mask_config import MissingAnnotationsMaskConfig  # noqa
 from .ones_array_config import OnesArrayConfig  # noqa
 from .concat_array_config import ConcatArrayConfig  # noqa
 from .logical_or_array_config import LogicalOrArrayConfig  # noqa
diff --git a/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py
new file mode 100644
index 000000000..92539aa16
--- /dev/null
+++ b/dacapo/experiments/datasplits/datasets/arrays/missing_annotations_mask_config.py
@@ -0,0 +1,62 @@
+import attr
+
+from .array_config import ArrayConfig
+
+from typing import List, Tuple
+from funlib.persistence import Array
+from cellmap_schemas.annotation import LabelList
+
+import dask.array as da
+
+
+@attr.s
+class MissingAnnotationsMaskConfig(ArrayConfig):
+    """
+    This config class provides the necessary configuration for turning an Annotated dataset into a
+    multi class binary classification problem
+
+    Attributes:
+        source_array_config : ArrayConfig
+            The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension
+        groupings : List[Tuple[str, List[int]]]
+            List of id groups with a symantic name. Each id group is a List of ids.
+            Group i found in groupings[i] will be binarized and placed in channel i.
+    Note:
+        The output array will have a channel dimension equal to the number of groups.
+        Each channel will be a binary mask of the ids in the groupings list.
+    """
+
+    source_array_config: ArrayConfig = attr.ib(
+        metadata={
+            "help_text": "The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension"
+        }
+    )
+
+    groupings: List[Tuple[str, List[int]]] = attr.ib(
+        metadata={
+            "help_text": "List of id groups with a symantic name. Each id group is a List of ids. "
+            "Group i found in groupings[i] will be binarized and placed in channel i."
+        }
+    )
+
+    def array(self, mode: str = "r") -> Array:
+        labels = self.source_array_config.array(mode)
+        grouped = da.ones((len(self.groupings), *labels.shape), dtype=bool)
+        grouped[:] = labels.data > 0
+        labels_list = LabelList.parse_obj(
+            {"labels": labels._source_data.attrs["labels"]}
+        ).labels
+        present_not_annotated = set(
+            [
+                label.value
+                for label in labels_list
+                if label.annotationState.present and not label.annotationState.annotated
+            ]
+        )
+        for i, (_, ids) in enumerate(self.groupings):
+            if any([id in present_not_annotated for id in ids]):
+                grouped[i] = 0
+
+        return Array(
+            grouped, labels.offset, labels.voxel_size, labels.axis_names, labels.units
+        )

From 7b810450fccb7393b42c2021437f98c6055b8d29 Mon Sep 17 00:00:00 2001
From: Marwan Zouinkhi <zouinkhi.marwan@gmail.com>
Date: Thu, 23 Jan 2025 13:50:49 -0500
Subject: [PATCH 3/8] fix dependeny, mypy, black

---
 .../datasplits/datasets/arrays/binarize_array_config.py   | 8 +++++---
 pyproject.toml                                            | 4 +++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py
index 4df39abb4..dbbb9102f 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/binarize_array_config.py
@@ -56,9 +56,11 @@ def array(self, mode="r") -> Array:
 
         def group_array(data):
             groups = [
-                da.isin(data, group_ids)
-                if len(group_ids) > 0
-                else data != self.background
+                (
+                    da.isin(data, group_ids)
+                    if len(group_ids) > 0
+                    else data != self.background
+                )
                 for _, group_ids in self.groupings
             ]
             out = da.stack(groups, axis=0)
diff --git a/pyproject.toml b/pyproject.toml
index cd65da233..3c145fc0d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,6 +59,7 @@ dependencies = [
         "boto3",
         "matplotlib",
         "xarray-multiscale",
+        "cellmap-schemas",
         ]
 
 # extras
@@ -200,7 +201,8 @@ module = [
     "napari.*",
     "empanada.*",
     "IPython.*",
-    "xarray_multiscale.*"
+    "xarray_multiscale.*",
+    "cellmap_schemas.*",
 ]
 ignore_missing_imports = true
 

From 09a39186ddb099761960b877a65d50413432b380 Mon Sep 17 00:00:00 2001
From: mzouink <mzouink@users.noreply.github.com>
Date: Thu, 23 Jan 2025 19:03:22 +0000
Subject: [PATCH 4/8] :art: Format Python code with psf/black

---
 tests/operations/test_array.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tests/operations/test_array.py b/tests/operations/test_array.py
index a6656ff44..ff8bca191 100644
--- a/tests/operations/test_array.py
+++ b/tests/operations/test_array.py
@@ -19,10 +19,21 @@
 def test_array(array_config):
     array = array_config.array()
 
-    assert array.offset == (12, 12, 12), f"offset is not correct, expected (12, 12, 12), got {array.offset}"
-    assert array.voxel_size == (1, 2, 4), f"resolution is not correct, expected (1, 2, 4), got {array.voxel_size}"
-    assert array.axis_names == ["z", "y", "x"], f"axis names are not correct, expected ['z', 'y', 'x'], got {array.axis_names}"
+    assert array.offset == (
+        12,
+        12,
+        12,
+    ), f"offset is not correct, expected (12, 12, 12), got {array.offset}"
+    assert array.voxel_size == (
+        1,
+        2,
+        4,
+    ), f"resolution is not correct, expected (1, 2, 4), got {array.voxel_size}"
+    assert array.axis_names == [
+        "z",
+        "y",
+        "x",
+    ], f"axis names are not correct, expected ['z', 'y', 'x'], got {array.axis_names}"
 
     # offset = array.attrs["offset"]
     # resolution = array.attrs["resolution"]
-

From 72f894ba133e69113e7d330efd2fc49e7b391025 Mon Sep 17 00:00:00 2001
From: Marwan Zouinkhi <zouinkhi.marwan@gmail.com>
Date: Thu, 23 Jan 2025 14:18:05 -0500
Subject: [PATCH 5/8] Update pyproject.toml

---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index fcb1ed4b1..132dff3c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -200,7 +200,6 @@ module = [
     "napari.*",
     "empanada.*",
     "IPython.*",
-    "xarray_multiscale.*",
     "cellmap_schemas.*",
 ]
 ignore_missing_imports = true

From 302f4c4c4308f29b2daf96c17602af0fda39aa7a Mon Sep 17 00:00:00 2001
From: Marwan Zouinkhi <zouinkhi.marwan@gmail.com>
Date: Tue, 18 Feb 2025 13:44:52 -0500
Subject: [PATCH 6/8] add dep

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 132dff3c1..8db109552 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,7 +58,8 @@ dependencies = [
         "boto3",
         "matplotlib",
         "cellmap-schemas",
-        "funlib.persistence @ git+https://github.com/funkelab/funlib.persistence.git@ome-ngff"
+        "funlib.persistence @ git+https://github.com/funkelab/funlib.persistence.git@ome-ngff",
+        "xarray-multiscale",
         ]
 
 # extras

From 5fb69a0967201ea9e0e65e563106e9366592a905 Mon Sep 17 00:00:00 2001
From: Marwan Zouinkhi <zouinkhi.marwan@gmail.com>
Date: Tue, 18 Feb 2025 13:47:40 -0500
Subject: [PATCH 7/8] fix boto3 version for faster pypi

---
 pyproject.toml |  2 +-
 remove_all.py  | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 remove_all.py

diff --git a/pyproject.toml b/pyproject.toml
index 8db109552..5dfb3356e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,7 +55,7 @@ dependencies = [
         "pyyaml",
         "scipy",
         "upath",
-        "boto3",
+        "boto3==1.35.81",
         "matplotlib",
         "cellmap-schemas",
         "funlib.persistence @ git+https://github.com/funkelab/funlib.persistence.git@ome-ngff",
diff --git a/remove_all.py b/remove_all.py
new file mode 100644
index 000000000..baa01286d
--- /dev/null
+++ b/remove_all.py
@@ -0,0 +1,26 @@
+import os
+import re
+
+def remove_docstrings_from_file(file_path):
+    with open(file_path, 'r') as file:
+        content = file.read()
+
+    # Pattern to match single-line and multi-line docstrings
+    docstring_pattern = r'(""".*?"""|\'\'\'.*?\'\'\')'
+    updated_content = re.sub(docstring_pattern, '', content, flags=re.DOTALL)
+
+    with open(file_path, 'w') as file:
+        file.write(updated_content)
+
+def process_directory(directory_path):
+    for root, _, files in os.walk(directory_path):
+        for file in files:
+            if file.endswith('.py'):
+                file_path = os.path.join(root, file)
+                print(f"Removing docstrings from {file_path}")
+                remove_docstrings_from_file(file_path)
+
+if __name__ == "__main__":
+    directory_path = input("Enter the path to the directory: ")
+    process_directory(directory_path)
+

From f5b4206a2a25722b8ce99e839a02c369989d8dfb Mon Sep 17 00:00:00 2001
From: Marwan Zouinkhi <zouinkhi.marwan@gmail.com>
Date: Tue, 18 Feb 2025 13:48:20 -0500
Subject: [PATCH 8/8] remove tmp

---
 remove_all.py | 26 --------------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 remove_all.py

diff --git a/remove_all.py b/remove_all.py
deleted file mode 100644
index baa01286d..000000000
--- a/remove_all.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import os
-import re
-
-def remove_docstrings_from_file(file_path):
-    with open(file_path, 'r') as file:
-        content = file.read()
-
-    # Pattern to match single-line and multi-line docstrings
-    docstring_pattern = r'(""".*?"""|\'\'\'.*?\'\'\')'
-    updated_content = re.sub(docstring_pattern, '', content, flags=re.DOTALL)
-
-    with open(file_path, 'w') as file:
-        file.write(updated_content)
-
-def process_directory(directory_path):
-    for root, _, files in os.walk(directory_path):
-        for file in files:
-            if file.endswith('.py'):
-                file_path = os.path.join(root, file)
-                print(f"Removing docstrings from {file_path}")
-                remove_docstrings_from_file(file_path)
-
-if __name__ == "__main__":
-    directory_path = input("Enter the path to the directory: ")
-    process_directory(directory_path)
-