spacetelescope · emolter · Sep 12, 2024 · Sep 11, 2024 · Sep 11, 2024 · Sep 12, 2024
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -520,6 +520,9 @@ outlier_detection
 
 - Re-enabled saving of blot models when `save_intermediate_results` is True. [#8758]
 
+- Fixed a bug that caused different results from the median calculation when the
+  `in_memory` parameter was set to `True` vs `False`. [#8777]
+
 pathloss
 --------
 

diff --git a/jwst/outlier_detection/imaging.py b/jwst/outlier_detection/imaging.py
@@ -100,14 +100,13 @@ def detect_outliers(
                 input_models.shelve(model, modify=True)
 
     # Perform median combination on set of drizzled mosaics
-    median_data = create_median(drizzled_models, maskpt, on_disk=not in_memory)
+    median_data = create_median(drizzled_models, maskpt)
 
     if save_intermediate_results:
         # make a median model
         with drizzled_models:
             example_model = drizzled_models.borrow(0)
             drizzled_models.shelve(example_model, modify=False)
-            #with datamodels.open(example_model) as dm0:
             median_model = datamodels.ImageModel(median_data)
             median_model.update(example_model)
             median_model.meta.wcs = median_wcs

diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py
@@ -600,8 +600,18 @@ def test_create_median(three_sci_as_asn, tmp_cwd):
     lib_on_disk = ModelLibrary(three_sci_as_asn, on_disk=True)
     lib_in_memory = ModelLibrary(three_sci_as_asn, on_disk=False)
 
+    # make this test meaningful w.r.t. handling of weights
+    with (lib_on_disk, lib_in_memory):
+        for lib in [lib_on_disk, lib_in_memory]:
+            for model in lib:
+                model.wht = np.ones_like(model.data)
+                model.wht[4,9] = 0.5
+                lib.shelve(model, modify=True)
+
     median_on_disk = create_median(lib_on_disk, 0.7)
     median_in_memory = create_median(lib_in_memory, 0.7)
 
+    assert np.isnan(median_in_memory[4,9])
+
     # Make sure the median library is the same for on-disk and in-memory
-    assert np.allclose(median_on_disk, median_in_memory)
+    assert np.allclose(median_on_disk, median_in_memory, equal_nan=True)
diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py
@@ -32,7 +32,7 @@ def create_cube_median(cube_model, maskpt):
     return median
 
 
-def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0):
+def create_median(resampled_models, maskpt, buffer_size=10.0):
     """Create a median image from the singly resampled images.
 
     Parameters
@@ -43,36 +43,42 @@ def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0):
     maskpt : float
         The weight threshold for masking out low weight pixels.
 
-    on_disk : bool
-        If True, the input models are on disk and will be read in chunks.
-
     buffer_size : float
         The size of chunk in MB, per input model, that will be read into memory.
-        This parameter has no effect if on_disk is False.
+        This parameter has no effect if the input library has its on_disk attribute
+        set to False.
 
     Returns
     -------
     median_image : ndarray
         The median image.
     """
+    on_disk = resampled_models._on_disk
+
     # Compute the weight threshold for each input model
     weight_thresholds = []
+    model_list = []
     with resampled_models:
         for resampled in resampled_models:
             weight_threshold = compute_weight_threshold(resampled.wht, maskpt)
-            weight_thresholds.append(weight_threshold)
+            if not on_disk:
+                # handle weights right away for in-memory case
+                data = resampled.data.copy()
+                data[resampled.wht < weight_threshold] = np.nan
+                model_list.append(data)
+                del data
+            else:
+                weight_thresholds.append(weight_threshold)
             resampled_models.shelve(resampled, modify=False)
-
-    # compute median over all models
+        del resampled
+
+    # easier case: all models in library can be loaded into memory at once
     if not on_disk:
-        # easier case: all models in library can be loaded into memory at once
-        model_list = []
-        with resampled_models:
-            for resampled in resampled_models:
-                model_list.append(resampled.data)
-                resampled_models.shelve(resampled, modify=False)
-                del resampled
-        return np.nanmedian(np.array(model_list), axis=0)
+        with warnings.catch_warnings():
+            warnings.filterwarnings(action="ignore",
+                                    message="All-NaN slice encountered",
+                                    category=RuntimeWarning)
+            return np.nanmedian(np.array(model_list), axis=0)
     else:
         # set up buffered access to all input models
         with resampled_models:

diff --git a/jwst/regtest/test_nircam_mtimage.py b/jwst/regtest/test_nircam_mtimage.py
@@ -8,11 +8,12 @@
 
 
 @pytest.mark.bigdata
-def test_nircam_image_moving_target_i2d(rtdata, fitsdiff_default_kwargs):
+@pytest.mark.parametrize("in_memory", [True, False])
+def test_nircam_image_moving_target_i2d(rtdata, fitsdiff_default_kwargs, in_memory):
     """Test resampled i2d of moving target exposures for NIRCam imaging"""
     rtdata.get_asn("nircam/image/mt_asn.json")
     rtdata.output = "mt_assoc_i2d.fits"
-    args = ["calwebb_image3", rtdata.input]
+    args = ["calwebb_image3", rtdata.input, "--in_memory=" + str(in_memory)]
     Step.from_cmdline(args)
     rtdata.get_truth("truth/test_nircam_mtimage/mt_assoc_i2d.fits")