chore: merge dev into release

dattalab · Apr 5, 2023 · 4ba3db1 · 4ba3db1
2 parents 7fbef6a + 88f0e47
commit 4ba3db1
Show file tree

Hide file tree

Showing 16 changed files with 1,557 additions and 826 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -17,6 +17,7 @@ jobs:
       stage: pip-install
       before_install:
         - pip install -U pip
+        - pip install numpy==1.18.3
         - pip install pytest==5.4.1 codecov pytest-cov
         - export PYTHONPATH=$PYTHONPATH:$(pwd)
         - if [ "$TRAVIS_OS_NAME" == "linux" ]; then sudo add-apt-repository -y ppa:mc3man/xerus-media; fi

diff --git a/moseq2_app/__init__.py b/moseq2_app/__init__.py
@@ -1 +1 @@
-__version__ = 'v1.2.3'
+__version__ = 'v1.3.0'
diff --git a/moseq2_app/flip/controller.py b/moseq2_app/flip/controller.py
@@ -26,8 +26,7 @@
 
 class FlipRangeTool(FlipClassifierWidgets):
 
-    def __init__(self, input_dir, max_frames, output_file,
-                 tail_filter_iters, prefilter_kernel_size,
+    def __init__(self, input_dir, max_frames, output_file, clean_parameters,
                  launch_gui=True, continuous_slider_update=True):
         """
         Find all the extracted sessions within the given input path, and prepare for GUI display.
@@ -36,8 +35,7 @@ def __init__(self, input_dir, max_frames, output_file,
         input_dir (str): Path to base directory containing extraction session folders
         max_frames (int): Maximum number of frames to include in the dataset.
         output_file (str): Path to save the outputted flip classifier.
-        tail_filter_iters (int): Number of tail filtering iterations
-        prefilter_kernel_size (int): Size of the median spatial filter.
+        clean_parameters (dict): Parameters passed to moseq2_extract.extract.proc.clean_frames 
         launch_gui (bool): Indicates whether to launch the labeling gui or just create the FlipClassifier instance.
         continuous_slider_update (bool): Indicates whether to continuously update the view upon slider edits.
         
@@ -74,11 +72,7 @@ def __init__(self, input_dir, max_frames, output_file,
                              launch_gui=launch_gui)
 
             # initialize frame cleaning parameter dict
-            self.clean_parameters = {
-                'iters_tail': tail_filter_iters,
-                'strel_tail': cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9)),
-                'prefilter_space': (prefilter_kernel_size,)
-            }
+            self.clean_parameters = clean_parameters
 
     def load_sessions(self):
         """
@@ -413,4 +407,4 @@ def apply_flip_classifier(self, chunk_size=4000, chunk_overlap=0,
                     # Check if video is done writing. If not, wait.
                     if video_pipe is not None:
                         video_pipe.communicate()
-                        video_pipe = None
+                        video_pipe = None
diff --git a/moseq2_app/main.py b/moseq2_app/main.py
@@ -41,9 +41,8 @@ def validate_inputs(inputs, progress_paths):
 @filter_warnings
 def flip_classifier_tool(input_dir,
                          output_file,
+                         clean_parameters,
                          max_frames=1e6,
-                         tail_filter_iters=1,
-                         space_filter_size=3,
                          continuous_slider_update=True,
                          launch_gui=True):
     """
@@ -54,8 +53,7 @@ def flip_classifier_tool(input_dir,
     input_dir (str): Path to base directory containing extraction session folders
     max_frames (int): Maximum number of frames to include in the dataset.
     output_file (str): Path to save the outputted flip classifier.
-    tail_filter_iters (int): Number of tail filtering iterations
-    prefilter_kernel_size (int): Size of the median spatial filter.
+    clean_parameters (dict): Parameters passed to moseq2_extract.extract.proc.clean_frames 
     continuous_slider_update (bool): Indicates whether to continuously update the view upon slider widget interactions.
     launch_gui (bool): Indicates whether to launch the labeling gui or just create the FlipClassifier instance.
 
@@ -66,8 +64,7 @@ def flip_classifier_tool(input_dir,
     flip_finder = FlipRangeTool(input_dir=input_dir,
                                 max_frames=max_frames,
                                 output_file=output_file,
-                                tail_filter_iters=tail_filter_iters,
-                                prefilter_kernel_size=space_filter_size,
+                                clean_parameters=clean_parameters,
                                 launch_gui=launch_gui,
                                 continuous_slider_update=continuous_slider_update)
 

diff --git a/moseq2_app/viz/controller.py b/moseq2_app/viz/controller.py
@@ -181,9 +181,9 @@ def get_mean_syllable_info(self):
             # sorted/relabeled syllable usage and duration information from [0, max_syllable) inclusive
             df, scalar_df = merge_labels_with_scalars(self.sorted_index, self.model_path)
             df = df.astype(dict(SubjectName=str, SessionName=str))
-            print('Writing main syllable info to parquet')
-            df.to_parquet(self.df_output_file, engine='fastparquet', compression='gzip')
-            scalar_df.to_parquet(self.scalar_df_output, compression='gzip')
+            # print('Writing main syllable info to parquet')
+            # df.to_parquet(self.df_output_file, engine='fastparquet', compression='gzip')
+            # scalar_df.to_parquet(self.scalar_df_output, compression='gzip')
         else:
             print('Loading parquet files')
             df = pd.read_parquet(self.df_output_file, engine='fastparquet')
@@ -524,13 +524,13 @@ def get_session_mean_syllable_info_df(self):
             df = pd.read_parquet(self.df_path, engine='fastparquet')
             if not os.path.exists(self.scalar_df_path):
                 self.scalar_df = scalars_to_dataframe(self.sorted_index, model_path=self.model_path)
-                self.scalar_df.to_parquet(self.scalar_df_path, compression='gzip')
+                # self.scalar_df.to_parquet(self.scalar_df_path, compression='gzip')
             else:
                 self.scalar_df = pd.read_parquet(self.scalar_df_path)
         else:
             print('Syllable DataFrame not found. Computing and saving syllable statistics...')
             df, self.scalar_df = merge_labels_with_scalars(self.sorted_index, self.model_path)
-            self.scalar_df.to_parquet(self.scalar_df_path, compression='gzip')
+            # self.scalar_df.to_parquet(self.scalar_df_path, compression='gzip')
 
         if self.get_pdfs:
             # Compute syllable position PDFs

diff --git a/notebooks/Flip-Classifier-Training-Notebook.ipynb b/notebooks/Flip-Classifier-Training-Notebook.ipynb
@@ -43,22 +43,33 @@
    "outputs": [],
    "source": [
     "from moseq2_app.main import flip_classifier_tool\n",
+    "from moseq2_extract.util import read_yaml, get_strels\n",
     "\n",
     "input_dir = './' # Specify the data folder\n",
+    "config_path = './config.yaml' # Specify the config file\n",
     "model_path = './flip-classifier-xx-1.pkl' ## e.g. ./flip-classifier-azure-ephys.pkl\n",
     "\n",
     "max_frames = 1e5 # max number of frames to use (performance anecdotally saturates around 1e5)\n",
-    "tail_filter_iters = 1 # number of tail filter iterations\n",
-    "space_filter_size = 3 # size of the spatial median blur filter kernel size\n",
+    "\n",
+    "config_data = read_yaml(config_path) # load config data\n",
+    "\n",
+    "strels = get_strels(config_data)# get structuring elements\n",
+    "\n",
+    "clean_parameters = {'prefilter_space': config_data['spatial_filter_size'], # median filter kernel sizes \n",
+    "                    'prefilter_time': config_data['temporal_filter_size'], # temporal filter kernel sizes\n",
+    "                    'strel_tail': strels['strel_tail'], # struc. element for filtering tail\n",
+    "                    'iters_tail': config_data['tail_filter_iters'], # number of iters for morph. opening to filter tail\n",
+    "                    'frame_dtype': config_data['frame_dtype'], # frame dtype\n",
+    "                    'strel_min':strels['strel_min'], # structuring element for erosion\n",
+    "                    'iters_min': config_data['cable_filter_iters']}# number of iterations for erosion\n",
     "\n",
     "continuous_slider_update = True # update the view as the slider values are updated\n",
     "launch_gui = True # launches the frame selector gui\n",
     "\n",
     "FF = flip_classifier_tool(input_dir=input_dir,\n",
     "                          output_file=model_path,\n",
     "                          max_frames=max_frames,\n",
-    "                          tail_filter_iters=tail_filter_iters,\n",
-    "                          space_filter_size=space_filter_size,\n",
+    "                          clean_parameters=clean_parameters,\n",
     "                          continuous_slider_update=continuous_slider_update,\n",
     "                          launch_gui=launch_gui)"
    ]
@@ -228,7 +239,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.7.12"
   },
   "pycharm": {
    "stem_cell": {

diff --git a/notebooks/MoSeq2-Analysis-Visualization-Notebook.ipynb b/notebooks/MoSeq2-Analysis-Visualization-Notebook.ipynb
@@ -33,6 +33,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -41,24 +42,25 @@
     "## Files and Directory Structure\n",
     "To run this notebook, you need the following files and folders in your data directory:\n",
     "- `progress.yaml` - this file stores all the required MoSeq paths used throughout the notebooks\n",
-    "- `model.p` - at least one trained AR-HMM file. It is usually saved in this format within a model folder that lives in your data directory. In the example below, it is saved in `base_model_path`.\n",
-    "- `moseq2-index.yaml` - contains paths to extracted sessions and is used to generate syllable crowd movies\n",
     "- `config.yaml` - configuration file that stores parameters used throughout the MoSeq pipeline\n",
-    "- `_pca/` - folder that contains data generated from the PCA section of the extraction notebook or CLI\n",
     "- `aggregate_results/` - folder that contains aggregated and extracted session data\n",
+    "- `moseq2-index.yaml` - contains paths to extracted sessions and is used to generate syllable crowd movies\n",
+    "- `_pca/` - folder that contains data generated from the PCA section of the extraction notebook or CLI\n",
+    "- `base_model_path` - the folder that keeps all the trained models or all the output model objects after applying pre-trained model(s) to data.\n",
+    "- `model.p` - the trained AR-HMM or the output model object after applying a pre-trained model. There should be at least one trained AR-HMM or one output applied model object in the `base_model_path`.\n",
     "\n",
     "At this stage, your base directory should look something like what's shown below:\n",
     "```\n",
     ".\n",
-    "└── <base_dir>/\n",
-    "    ├── progress.yaml\n",
+    "└── <data_dir>/\n",
     "    ├── config.yaml\n",
+    "    ├── aggregate_results/\n",
     "    ├── moseq2-index.yaml\n",
+    "    ├── _pca/\n",
     "    ├── base_model_path/\n",
     "    ├   └── model.p\n",
     "    ...\n",
-    "    ├── _pca/\n",
-    "    └── aggregate_results/\n",
+    "    └── progress.yaml\n",
     "\n",
     "```\n",
     "For more information about how MoSeq organizes data, check out our [wiki](https://github.com/dattalab/moseq2-app/wiki/Directory-Structures-and-yaml-Files-in-MoSeq-Pipeline).\n",
@@ -678,7 +680,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.12"
+   "version": "3.8.9"
   },
   "pycharm": {
    "stem_cell": {