From 6d79cc2484ea2e83fd20e12f6164a232e415d443 Mon Sep 17 00:00:00 2001
From: Ned Molter <emolter@stsci.edu>
Date: Wed, 11 Sep 2024 17:37:13 -0400
Subject: [PATCH 1/5] JP-3743: Make outlier detection respect weights for
 in-memory models

---
 .../tests/test_outlier_detection.py              | 16 +++++++++++++---
 jwst/outlier_detection/utils.py                  | 16 +++++++++++-----
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py
index ebd732437b..5478aca6e3 100644
--- a/jwst/outlier_detection/tests/test_outlier_detection.py
+++ b/jwst/outlier_detection/tests/test_outlier_detection.py
@@ -600,8 +600,18 @@ def test_create_median(three_sci_as_asn, tmp_cwd):
     lib_on_disk = ModelLibrary(three_sci_as_asn, on_disk=True)
     lib_in_memory = ModelLibrary(three_sci_as_asn, on_disk=False)
 
-    median_on_disk = create_median(lib_on_disk, 0.7)
-    median_in_memory = create_median(lib_in_memory, 0.7)
+    # make this test meaningful w.r.t. handling of weights
+    with (lib_on_disk, lib_in_memory):
+        for lib in [lib_on_disk, lib_in_memory]:
+            for model in lib:
+                model.wht = np.ones_like(model.data)
+                model.wht[4,9] = 0.5
+                lib.shelve(model, modify=True)
+
+    median_on_disk = create_median(lib_on_disk, 0.7, on_disk=True)
+    median_in_memory = create_median(lib_in_memory, 0.7, on_disk=False)
+
+    assert np.isnan(median_in_memory[4,9])
 
     # Make sure the median library is the same for on-disk and in-memory
-    assert np.allclose(median_on_disk, median_in_memory)
+    assert np.allclose(median_on_disk, median_in_memory, equal_nan=True)
\ No newline at end of file
diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py
index 4abdfbad1b..0165cdccea 100644
--- a/jwst/outlier_detection/utils.py
+++ b/jwst/outlier_detection/utils.py
@@ -66,13 +66,19 @@ def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0):
     # compute median over all models
     if not on_disk:
         # easier case: all models in library can be loaded into memory at once
-        model_list = []
         with resampled_models:
-            for resampled in resampled_models:
-                model_list.append(resampled.data)
-                resampled_models.shelve(resampled, modify=False)
+            for i, resampled in enumerate(resampled_models):
+                # convert to a CubeModel to pass into create_cube_median
+                if i == 0:
+                    cube_shape = (len(resampled_models), resampled.data.shape[0], resampled.data.shape[1])
+                    cube = datamodels.CubeModel(data=np.zeros(cube_shape))
+                    cube.wht = np.zeros(cube_shape)
+
+                cube.data[i] = resampled.data
+                cube.wht[i] = resampled.wht
+                resampled_models.shelve(resampled, i, modify=False)
                 del resampled
-        return np.nanmedian(np.array(model_list), axis=0)
+        return create_cube_median(cube, maskpt)
     else:
         # set up buffered access to all input models
         with resampled_models:

From f32668f612ca2a74233ee03dd5b4e3606e7b4dc4 Mon Sep 17 00:00:00 2001
From: Ned Molter <emolter@stsci.edu>
Date: Wed, 11 Sep 2024 17:42:43 -0400
Subject: [PATCH 2/5] added changelog entry

---
 CHANGES.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index 3e5d7a4927..a17ab3914b 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -520,6 +520,9 @@ outlier_detection
 
 - Re-enabled saving of blot models when `save_intermediate_results` is True. [#8758]
 
+- Fixed a bug that caused different results from the median calculation when the
+  `in_memory` parameter was set to `True` vs `False`. [#8777]
+
 pathloss
 --------
 

From 681de1bbc78ba73f10132c3b6440cd2d8daab836 Mon Sep 17 00:00:00 2001
From: Ned Molter <emolter@stsci.edu>
Date: Thu, 12 Sep 2024 09:34:19 -0400
Subject: [PATCH 3/5] improvements per @braingram comments

---
 jwst/outlier_detection/utils.py | 34 ++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py
index 0165cdccea..dc323df2eb 100644
--- a/jwst/outlier_detection/utils.py
+++ b/jwst/outlier_detection/utils.py
@@ -57,28 +57,28 @@ def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0):
     """
     # Compute the weight threshold for each input model
     weight_thresholds = []
+    model_list = []
     with resampled_models:
         for resampled in resampled_models:
             weight_threshold = compute_weight_threshold(resampled.wht, maskpt)
-            weight_thresholds.append(weight_threshold)
+            if not on_disk:
+                # handle weights right away for in-memory case
+                data = resampled.data.copy()
+                data[resampled.wht < weight_threshold] = np.nan
+                model_list.append(data)
+                del data
+            else:
+                weight_thresholds.append(weight_threshold)
             resampled_models.shelve(resampled, modify=False)
-
-    # compute median over all models
+        del resampled
+    
+    # easier case: all models in library can be loaded into memory at once
     if not on_disk:
-        # easier case: all models in library can be loaded into memory at once
-        with resampled_models:
-            for i, resampled in enumerate(resampled_models):
-                # convert to a CubeModel to pass into create_cube_median
-                if i == 0:
-                    cube_shape = (len(resampled_models), resampled.data.shape[0], resampled.data.shape[1])
-                    cube = datamodels.CubeModel(data=np.zeros(cube_shape))
-                    cube.wht = np.zeros(cube_shape)
-
-                cube.data[i] = resampled.data
-                cube.wht[i] = resampled.wht
-                resampled_models.shelve(resampled, i, modify=False)
-                del resampled
-        return create_cube_median(cube, maskpt)
+        with warnings.catch_warnings():
+            warnings.filterwarnings(action="ignore",
+                                    message="All-NaN slice encountered",
+                                    category=RuntimeWarning)
+            return np.nanmedian(np.array(model_list), axis=0)
     else:
         # set up buffered access to all input models
         with resampled_models:

From 8457a61a7d0b5d996f7ee16c9f45101f2249843e Mon Sep 17 00:00:00 2001
From: Ned Molter <emolter@stsci.edu>
Date: Thu, 12 Sep 2024 12:47:37 -0400
Subject: [PATCH 4/5] read create_median on_disk behavior from input library
 and add regtest

---
 jwst/outlier_detection/imaging.py                      | 3 +--
 jwst/outlier_detection/tests/test_outlier_detection.py | 4 ++--
 jwst/outlier_detection/utils.py                        | 3 ++-
 jwst/regtest/test_nircam_mtimage.py                    | 5 +++--
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/jwst/outlier_detection/imaging.py b/jwst/outlier_detection/imaging.py
index 9f1aabf597..5678b45802 100644
--- a/jwst/outlier_detection/imaging.py
+++ b/jwst/outlier_detection/imaging.py
@@ -100,14 +100,13 @@ def detect_outliers(
                 input_models.shelve(model, modify=True)
 
     # Perform median combination on set of drizzled mosaics
-    median_data = create_median(drizzled_models, maskpt, on_disk=not in_memory)
+    median_data = create_median(drizzled_models, maskpt)
 
     if save_intermediate_results:
         # make a median model
         with drizzled_models:
             example_model = drizzled_models.borrow(0)
             drizzled_models.shelve(example_model, modify=False)
-            #with datamodels.open(example_model) as dm0:
             median_model = datamodels.ImageModel(median_data)
             median_model.update(example_model)
             median_model.meta.wcs = median_wcs
diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py
index 5478aca6e3..ec469f4fb4 100644
--- a/jwst/outlier_detection/tests/test_outlier_detection.py
+++ b/jwst/outlier_detection/tests/test_outlier_detection.py
@@ -608,8 +608,8 @@ def test_create_median(three_sci_as_asn, tmp_cwd):
                 model.wht[4,9] = 0.5
                 lib.shelve(model, modify=True)
 
-    median_on_disk = create_median(lib_on_disk, 0.7, on_disk=True)
-    median_in_memory = create_median(lib_in_memory, 0.7, on_disk=False)
+    median_on_disk = create_median(lib_on_disk, 0.7)
+    median_in_memory = create_median(lib_in_memory, 0.7)
 
     assert np.isnan(median_in_memory[4,9])
 
diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py
index dc323df2eb..601e3f2bd2 100644
--- a/jwst/outlier_detection/utils.py
+++ b/jwst/outlier_detection/utils.py
@@ -32,7 +32,7 @@ def create_cube_median(cube_model, maskpt):
     return median
 
 
-def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0):
+def create_median(resampled_models, maskpt, buffer_size=10.0):
     """Create a median image from the singly resampled images.
 
     Parameters
@@ -56,6 +56,7 @@ def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0):
         The median image.
     """
     # Compute the weight threshold for each input model
+    on_disk = resampled_models._on_disk
     weight_thresholds = []
     model_list = []
     with resampled_models:
diff --git a/jwst/regtest/test_nircam_mtimage.py b/jwst/regtest/test_nircam_mtimage.py
index 961d34914d..21b5013042 100644
--- a/jwst/regtest/test_nircam_mtimage.py
+++ b/jwst/regtest/test_nircam_mtimage.py
@@ -8,11 +8,12 @@
 
 
 @pytest.mark.bigdata
-def test_nircam_image_moving_target_i2d(rtdata, fitsdiff_default_kwargs):
+@pytest.mark.parametrize("in_memory", [True, False])
+def test_nircam_image_moving_target_i2d(rtdata, fitsdiff_default_kwargs, in_memory):
     """Test resampled i2d of moving target exposures for NIRCam imaging"""
     rtdata.get_asn("nircam/image/mt_asn.json")
     rtdata.output = "mt_assoc_i2d.fits"
-    args = ["calwebb_image3", rtdata.input]
+    args = ["calwebb_image3", rtdata.input, "--in_memory=" + str(in_memory)]
     Step.from_cmdline(args)
     rtdata.get_truth("truth/test_nircam_mtimage/mt_assoc_i2d.fits")
 

From f6671fd7911dcbe9d05b0465524aca0735ef2994 Mon Sep 17 00:00:00 2001
From: Ned Molter <emolter@stsci.edu>
Date: Thu, 12 Sep 2024 12:52:56 -0400
Subject: [PATCH 5/5] updated docstring to reflect create_median change

---
 jwst/outlier_detection/utils.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py
index 601e3f2bd2..2bc6b2ff75 100644
--- a/jwst/outlier_detection/utils.py
+++ b/jwst/outlier_detection/utils.py
@@ -43,20 +43,19 @@ def create_median(resampled_models, maskpt, buffer_size=10.0):
     maskpt : float
         The weight threshold for masking out low weight pixels.
 
-    on_disk : bool
-        If True, the input models are on disk and will be read in chunks.
-
     buffer_size : float
         The size of chunk in MB, per input model, that will be read into memory.
-        This parameter has no effect if on_disk is False.
+        This parameter has no effect if the input library has its on_disk attribute
+        set to False.
 
     Returns
     -------
     median_image : ndarray
         The median image.
     """
-    # Compute the weight threshold for each input model
     on_disk = resampled_models._on_disk
+    
+    # Compute the weight threshold for each input model
     weight_thresholds = []
     model_list = []
     with resampled_models: