Skip to content

Commit

Permalink
make Image and Measurement creation atomic together (#648)
Browse files Browse the repository at this point in the history
* make Image and Measurement creation atomic together
Fixes #647

* updated changelog

* add test for invalid catalog

* added missing test config; renamed new test data to avoid inclusion in existing glob tests
  • Loading branch information
marxide authored Apr 11, 2022
1 parent 358a663 commit 0ded985
Show file tree
Hide file tree
Showing 8 changed files with 1,397 additions and 33 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

#### Fixed

- Ensure Image models are not created if the catalogue ingest fails [#648](https://github.com/askap-vast/vast-pipeline/pull/648).
- Fixed run failures caused by attempting to force fit images with empty catalogues [#653](https://github.com/askap-vast/vast-pipeline/pull/653).
- Fixed a Bokeh regression that requires LabelSet values to be strings [#652](https://github.com/askap-vast/vast-pipeline/pull/652).
- Fixed deprecation warning on astroquery Ned import [#644](https://github.com/askap-vast/vast-pipeline/pull/644).
Expand Down Expand Up @@ -96,6 +97,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

#### List of PRs

- [#648](https://github.com/askap-vast/vast-pipeline/pull/648): fix: make Image and Measurement creation atomic together.
- [#653](https://github.com/askap-vast/vast-pipeline/pull/653): fix: Allow forced fitting on images with empty catalogues.
- [#652](https://github.com/askap-vast/vast-pipeline/pull/652): dep, fix: Bump bokeh 2.4.2.
- [#644](https://github.com/askap-vast/vast-pipeline/pull/644): fix: Fix astroquery Ned import deprecation.
Expand Down
50 changes: 24 additions & 26 deletions vast_pipeline/pipeline/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,35 +110,33 @@ def make_upload_images(
img, exists_f = get_create_img(band.id, image)
skyreg = img.skyreg

# add image and skyregion to respective lists
images.append(img)
if skyreg not in skyregions:
skyregions.append(skyreg)

if exists_f:
logger.info('Image %s already processed', img.name)
continue

# 1.3 get the image measurements and save them in DB
measurements = image.read_selavy(img)
logger.info(
'Processed measurements dataframe of shape: (%i, %i)',
measurements.shape[0], measurements.shape[1]
)
# add image and skyregion to respective lists
images.append(img)
if skyreg not in skyregions:
skyregions.append(skyreg)

if exists_f:
logger.info("Image %s already processed", img.name)
continue

# 1.3 get the image measurements and save them in DB
measurements = image.read_selavy(img)
logger.info(
"Processed measurements dataframe of shape: (%i, %i)",
measurements.shape[0],
measurements.shape[1],
)

# upload measurements, a column with the db is added to the df
measurements = make_upload_measurements(measurements)
# upload measurements, a column with the db is added to the df
measurements = make_upload_measurements(measurements)

# save measurements to parquet file in pipeline run folder
base_folder = os.path.dirname(img.measurements_path)
if not os.path.exists(base_folder):
os.makedirs(base_folder)
# save measurements to parquet file in pipeline run folder
base_folder = os.path.dirname(img.measurements_path)
if not os.path.exists(base_folder):
os.makedirs(base_folder)

measurements.to_parquet(
img.measurements_path,
index=False
)
del measurements, image, band, img
measurements.to_parquet(img.measurements_path, index=False)
del measurements, image, band, img

logger.info(
'Total images upload/loading time: %.2f seconds',
Expand Down
846 changes: 846 additions & 0 deletions vast_pipeline/tests/data/epoch-invalid.fits

Large diffs are not rendered by default.

238 changes: 238 additions & 0 deletions vast_pipeline/tests/data/epoch-invalid.meanMap.fits

Large diffs are not rendered by default.

201 changes: 201 additions & 0 deletions vast_pipeline/tests/data/epoch-invalid.noiseMap.fits

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions vast_pipeline/tests/data/epoch-invalid.selavy.components.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#island_idcomponent_idcomponent_namera_hms_contdec_dms_contra_deg_contdec_deg_contra_errdec_errfreqflux_peakflux_peak_errflux_intflux_int_errmaj_axismin_axispos_angmaj_axis_errmin_axis_errpos_ang_errmaj_axis_deconvmin_axis_deconvpos_ang_deconvmaj_axis_deconv_errmin_axis_deconv_errpos_ang_deconv_errchi_squared_fitrms_fit_gaussspectral_indexspectral_curvaturespectral_index_errspectral_curvature_errrms_imagehas_siblingsfit_is_estimatespectral_index_from_TTflag_c4comment
#----[deg][deg][arcsec][arcsec][MHz][mJy/beam][mJy/beam][mJy][mJy][arcsec][arcsec][deg][arcsec][arcsec][deg][arcsec][arcsec][deg][arcsec][arcsec][deg]--[mJy/beam]--------[mJy/beam]
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
run:
path: vast_pipeline/tests/pipeline-runs/basic-association-invalid-catalog
suppress_astropy_warnings: true
inputs:
image:
- vast_pipeline/tests/data/epoch01.fits
- vast_pipeline/tests/data/epoch02.fits
- vast_pipeline/tests/data/epoch03.fits
- vast_pipeline/tests/data/epoch04.fits
- vast_pipeline/tests/data/epoch-invalid.fits
selavy:
- vast_pipeline/tests/data/epoch01.selavy.components.txt
- vast_pipeline/tests/data/epoch02.selavy.components.txt
- vast_pipeline/tests/data/epoch03.selavy.components.txt
- vast_pipeline/tests/data/epoch04.selavy.components.txt
- vast_pipeline/tests/data/epoch-invalid.selavy.components.txt
noise:
- vast_pipeline/tests/data/epoch01.noiseMap.fits
- vast_pipeline/tests/data/epoch02.noiseMap.fits
- vast_pipeline/tests/data/epoch03.noiseMap.fits
- vast_pipeline/tests/data/epoch04.noiseMap.fits
- vast_pipeline/tests/data/epoch-invalid.noiseMap.fits
background:
- vast_pipeline/tests/data/epoch01.meanMap.fits
- vast_pipeline/tests/data/epoch02.meanMap.fits
- vast_pipeline/tests/data/epoch03.meanMap.fits
- vast_pipeline/tests/data/epoch04.meanMap.fits
- vast_pipeline/tests/data/epoch-invalid.meanMap.fits
source_monitoring:
monitor: false
source_association:
method: basic
radius: 10.0
new_sources:
min_sigma: 5.0
measurements:
source_finder: selavy
flux_fractional_error: 0.0
condon_errors: true
selavy_local_rms_fill_value: 0.2
ra_uncertainty: 1
dec_uncertainty: 1
variability:
source_aggregate_pair_metrics_min_abs_vs: 4.3
47 changes: 40 additions & 7 deletions vast_pipeline/tests/test_runpipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

from django.conf import settings as s
from django.test import TestCase, override_settings
from django.core.management import call_command
from django.core.management import call_command, CommandError

from vast_pipeline.models import Image


TEST_ROOT = os.path.join(s.BASE_DIR, 'vast_pipeline', 'tests')
Expand All @@ -14,17 +16,48 @@
RAW_IMAGE_DIR=os.path.join(TEST_ROOT, 'data'),
)
class RunPipelineTest(TestCase):
basic_assoc_run: str
run_dir: str

@classmethod
def setUpTestData(cls):
cls.basic_assoc_run = os.path.join(s.PIPELINE_WORKING_DIR, 'basic-association')
def setUpClass(cls):
super().setUpClass()
cls.run_dir = os.path.join(s.PIPELINE_WORKING_DIR, 'basic-association')

def setUp(self):
# TODO: replace with a load images function and call 'runpipeline'
# from each tests (e.g. test_basic_assoc, text_advanced_assoc, etc.)
call_command('runpipeline', self.basic_assoc_run)
call_command('runpipeline', self.run_dir)
self.run_logs = glob.glob(os.path.join(self.run_dir, '*_log.txt'))

def tearDown(self):
"""Clean up the run directory after the test by removing the backup config and
all run logs.
"""
call_command('clearpiperun', self.run_dir)
os.remove(os.path.join(self.run_dir, "config_prev.yaml"))
for run_log in self.run_logs:
os.remove(run_log)

def test_check_run(self):
run_logs = glob.glob(os.path.join(self.basic_assoc_run, '*_log.txt'))
self.assertEqual(len(run_logs), 1)
self.assertEqual(len(self.run_logs), 1)


@override_settings(
PIPELINE_WORKING_DIR=os.path.join(TEST_ROOT, 'pipeline-runs'),
RAW_IMAGE_DIR=os.path.join(TEST_ROOT, 'data'),
)
class RunPipelineInvalidCatalogTest(TestCase):
def test_invalid_catalog(self):
run_dir = os.path.join(s.PIPELINE_WORKING_DIR, 'basic-association-invalid-catalog')
with self.assertRaises(CommandError) as context:
call_command('runpipeline', run_dir)
# check the original exception raised during runpipeline command is a KeyError
# (caused by attempting to read the invalid catalog)
original_exception = context.exception.__context__
self.assertIsInstance(original_exception, KeyError)
# check that only the image for the invalid catalog was not saved
for epoch in range(1, 5):
image = Image.objects.get(name=f"epoch{epoch:02d}.fits")
self.assertIsInstance(image, Image)
with self.assertRaises(Image.DoesNotExist):
Image.objects.get(name="epoch05.fits")

0 comments on commit 0ded985

Please sign in to comment.