From 6d79cc2484ea2e83fd20e12f6164a232e415d443 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 11 Sep 2024 17:37:13 -0400 Subject: [PATCH 1/5] JP-3743: Make outlier detection respect weights for in-memory models --- .../tests/test_outlier_detection.py | 16 +++++++++++++--- jwst/outlier_detection/utils.py | 16 +++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py index ebd732437b..5478aca6e3 100644 --- a/jwst/outlier_detection/tests/test_outlier_detection.py +++ b/jwst/outlier_detection/tests/test_outlier_detection.py @@ -600,8 +600,18 @@ def test_create_median(three_sci_as_asn, tmp_cwd): lib_on_disk = ModelLibrary(three_sci_as_asn, on_disk=True) lib_in_memory = ModelLibrary(three_sci_as_asn, on_disk=False) - median_on_disk = create_median(lib_on_disk, 0.7) - median_in_memory = create_median(lib_in_memory, 0.7) + # make this test meaningful w.r.t. handling of weights + with (lib_on_disk, lib_in_memory): + for lib in [lib_on_disk, lib_in_memory]: + for model in lib: + model.wht = np.ones_like(model.data) + model.wht[4,9] = 0.5 + lib.shelve(model, modify=True) + + median_on_disk = create_median(lib_on_disk, 0.7, on_disk=True) + median_in_memory = create_median(lib_in_memory, 0.7, on_disk=False) + + assert np.isnan(median_in_memory[4,9]) # Make sure the median library is the same for on-disk and in-memory - assert np.allclose(median_on_disk, median_in_memory) + assert np.allclose(median_on_disk, median_in_memory, equal_nan=True) \ No newline at end of file diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index 4abdfbad1b..0165cdccea 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -66,13 +66,19 @@ def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0): # compute median over all models if not on_disk: # easier case: all models in library can be loaded into memory at once - model_list = [] with resampled_models: - for resampled in resampled_models: - model_list.append(resampled.data) - resampled_models.shelve(resampled, modify=False) + for i, resampled in enumerate(resampled_models): + # convert to a CubeModel to pass into create_cube_median + if i == 0: + cube_shape = (len(resampled_models), resampled.data.shape[0], resampled.data.shape[1]) + cube = datamodels.CubeModel(data=np.zeros(cube_shape)) + cube.wht = np.zeros(cube_shape) + + cube.data[i] = resampled.data + cube.wht[i] = resampled.wht + resampled_models.shelve(resampled, i, modify=False) del resampled - return np.nanmedian(np.array(model_list), axis=0) + return create_cube_median(cube, maskpt) else: # set up buffered access to all input models with resampled_models: From f32668f612ca2a74233ee03dd5b4e3606e7b4dc4 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 11 Sep 2024 17:42:43 -0400 Subject: [PATCH 2/5] added changelog entry --- CHANGES.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 3e5d7a4927..a17ab3914b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -520,6 +520,9 @@ outlier_detection - Re-enabled saving of blot models when `save_intermediate_results` is True. [#8758] +- Fixed a bug that caused different results from the median calculation when the + `in_memory` parameter was set to `True` vs `False`. [#8777] + pathloss -------- From 681de1bbc78ba73f10132c3b6440cd2d8daab836 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 12 Sep 2024 09:34:19 -0400 Subject: [PATCH 3/5] improvements per @braingram comments --- jwst/outlier_detection/utils.py | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index 0165cdccea..dc323df2eb 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -57,28 +57,28 @@ def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0): """ # Compute the weight threshold for each input model weight_thresholds = [] + model_list = [] with resampled_models: for resampled in resampled_models: weight_threshold = compute_weight_threshold(resampled.wht, maskpt) - weight_thresholds.append(weight_threshold) + if not on_disk: + # handle weights right away for in-memory case + data = resampled.data.copy() + data[resampled.wht < weight_threshold] = np.nan + model_list.append(data) + del data + else: + weight_thresholds.append(weight_threshold) resampled_models.shelve(resampled, modify=False) - - # compute median over all models + del resampled + + # easier case: all models in library can be loaded into memory at once if not on_disk: - # easier case: all models in library can be loaded into memory at once - with resampled_models: - for i, resampled in enumerate(resampled_models): - # convert to a CubeModel to pass into create_cube_median - if i == 0: - cube_shape = (len(resampled_models), resampled.data.shape[0], resampled.data.shape[1]) - cube = datamodels.CubeModel(data=np.zeros(cube_shape)) - cube.wht = np.zeros(cube_shape) - - cube.data[i] = resampled.data - cube.wht[i] = resampled.wht - resampled_models.shelve(resampled, i, modify=False) - del resampled - return create_cube_median(cube, maskpt) + with warnings.catch_warnings(): + warnings.filterwarnings(action="ignore", + message="All-NaN slice encountered", + category=RuntimeWarning) + return np.nanmedian(np.array(model_list), axis=0) else: # set up buffered access to all input models with resampled_models: From 8457a61a7d0b5d996f7ee16c9f45101f2249843e Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 12 Sep 2024 12:47:37 -0400 Subject: [PATCH 4/5] read create_median on_disk behavior from input library and add regtest --- jwst/outlier_detection/imaging.py | 3 +-- jwst/outlier_detection/tests/test_outlier_detection.py | 4 ++-- jwst/outlier_detection/utils.py | 3 ++- jwst/regtest/test_nircam_mtimage.py | 5 +++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/jwst/outlier_detection/imaging.py b/jwst/outlier_detection/imaging.py index 9f1aabf597..5678b45802 100644 --- a/jwst/outlier_detection/imaging.py +++ b/jwst/outlier_detection/imaging.py @@ -100,14 +100,13 @@ def detect_outliers( input_models.shelve(model, modify=True) # Perform median combination on set of drizzled mosaics - median_data = create_median(drizzled_models, maskpt, on_disk=not in_memory) + median_data = create_median(drizzled_models, maskpt) if save_intermediate_results: # make a median model with drizzled_models: example_model = drizzled_models.borrow(0) drizzled_models.shelve(example_model, modify=False) - #with datamodels.open(example_model) as dm0: median_model = datamodels.ImageModel(median_data) median_model.update(example_model) median_model.meta.wcs = median_wcs diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py index 5478aca6e3..ec469f4fb4 100644 --- a/jwst/outlier_detection/tests/test_outlier_detection.py +++ b/jwst/outlier_detection/tests/test_outlier_detection.py @@ -608,8 +608,8 @@ def test_create_median(three_sci_as_asn, tmp_cwd): model.wht[4,9] = 0.5 lib.shelve(model, modify=True) - median_on_disk = create_median(lib_on_disk, 0.7, on_disk=True) - median_in_memory = create_median(lib_in_memory, 0.7, on_disk=False) + median_on_disk = create_median(lib_on_disk, 0.7) + median_in_memory = create_median(lib_in_memory, 0.7) assert np.isnan(median_in_memory[4,9]) diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index dc323df2eb..601e3f2bd2 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -32,7 +32,7 @@ def create_cube_median(cube_model, maskpt): return median -def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0): +def create_median(resampled_models, maskpt, buffer_size=10.0): """Create a median image from the singly resampled images. Parameters @@ -56,6 +56,7 @@ def create_median(resampled_models, maskpt, on_disk=True, buffer_size=10.0): The median image. """ # Compute the weight threshold for each input model + on_disk = resampled_models._on_disk weight_thresholds = [] model_list = [] with resampled_models: diff --git a/jwst/regtest/test_nircam_mtimage.py b/jwst/regtest/test_nircam_mtimage.py index 961d34914d..21b5013042 100644 --- a/jwst/regtest/test_nircam_mtimage.py +++ b/jwst/regtest/test_nircam_mtimage.py @@ -8,11 +8,12 @@ @pytest.mark.bigdata -def test_nircam_image_moving_target_i2d(rtdata, fitsdiff_default_kwargs): +@pytest.mark.parametrize("in_memory", [True, False]) +def test_nircam_image_moving_target_i2d(rtdata, fitsdiff_default_kwargs, in_memory): """Test resampled i2d of moving target exposures for NIRCam imaging""" rtdata.get_asn("nircam/image/mt_asn.json") rtdata.output = "mt_assoc_i2d.fits" - args = ["calwebb_image3", rtdata.input] + args = ["calwebb_image3", rtdata.input, "--in_memory=" + str(in_memory)] Step.from_cmdline(args) rtdata.get_truth("truth/test_nircam_mtimage/mt_assoc_i2d.fits") From f6671fd7911dcbe9d05b0465524aca0735ef2994 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 12 Sep 2024 12:52:56 -0400 Subject: [PATCH 5/5] updated docstring to reflect create_median change --- jwst/outlier_detection/utils.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index 601e3f2bd2..2bc6b2ff75 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -43,20 +43,19 @@ def create_median(resampled_models, maskpt, buffer_size=10.0): maskpt : float The weight threshold for masking out low weight pixels. - on_disk : bool - If True, the input models are on disk and will be read in chunks. - buffer_size : float The size of chunk in MB, per input model, that will be read into memory. - This parameter has no effect if on_disk is False. + This parameter has no effect if the input library has its on_disk attribute + set to False. Returns ------- median_image : ndarray The median image. """ - # Compute the weight threshold for each input model on_disk = resampled_models._on_disk + + # Compute the weight threshold for each input model weight_thresholds = [] model_list = [] with resampled_models: