Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add migrate_data script to make older x1d files readable by later versions of jwst #6055

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ combine_1d

- Added SRCTYPE to COMBINE1D output extension headers, propagated from
EXTRACT1D inputs [#6079]

cube_build
----------

Expand Down Expand Up @@ -40,6 +40,12 @@ lib
- Updated set_telescope_pointing to populate ENGQLPTG keyword with new
allowed values [#6088]

scripts
-------

- Add migrate_data command with support for migrating spec_table in
x1d files produced with <= 1.1.0 of this package. [#6055]

1.2.0 (2021-05-24)
==================

Expand Down
4 changes: 2 additions & 2 deletions docs/jwst/data_products/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ Data Products Information

.. toctree::
:maxdepth: 3

stages.rst
file_naming.rst
product_types.rst
common_features.rst
science_products.rst
nonscience_products.rst
guidestar_products.rst

migrating.rst
30 changes: 30 additions & 0 deletions docs/jwst/data_products/migrating.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Migrating deprecated products
-----------------------------

On rare occasion, the model schemas are changed in such a way as to
break compatibility with data products produced by earlier versions
of this package. When these older files are opened the software
will report validation errors:

.. doctest-skip::

>>> from jwst import datamodels
>>> datamodels.open("jw95115001001_02102_00001_nrs1_x1d.fits")
...
ValueError: Column names don't match schema...

In some cases it will be possible to update the file to the
new format using the ``migrate_data`` tool included with this package:
::

$ migrate_data jw95115001001_02102_00001_nrs1_x1d.fits --in-place

It can also be run on multiple files:
::

$ migrate_data *_x1d.fits --in-place

Or configured to write updated files to a separate output directory:
::

$ migrate_data *_x1d.fits --output-dir some/other/directory
10 changes: 4 additions & 6 deletions jwst/regtest/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,20 +182,18 @@ def generate_upload_schema(pattern, target, recursive=False):
def _rtdata_fixture_implementation(artifactory_repos, envopt, request):
"""Provides the RemoteResource class"""
inputs_root, results_root = artifactory_repos
rtdata = RegtestData(env=envopt, inputs_root=inputs_root,
results_root=results_root)

yield rtdata
return RegtestData(env=envopt, inputs_root=inputs_root,
results_root=results_root)


@pytest.fixture(scope='function')
def rtdata(artifactory_repos, envopt, request, _jail):
yield from _rtdata_fixture_implementation(artifactory_repos, envopt, request)
return _rtdata_fixture_implementation(artifactory_repos, envopt, request)


@pytest.fixture(scope='module')
def rtdata_module(artifactory_repos, envopt, request, jail):
yield from _rtdata_fixture_implementation(artifactory_repos, envopt, request)
return _rtdata_fixture_implementation(artifactory_repos, envopt, request)


@pytest.fixture
Expand Down
20 changes: 10 additions & 10 deletions jwst/regtest/regtestdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(self, env="dev", inputs_root="jwst-pipeline",
input=None, input_remote=None, output=None, truth=None,
truth_remote=None, remote_results_path=None, test_name=None,
traceback=None, **kwargs):
self._env = env
self.env = env
self._inputs_root = inputs_root
self._results_root = results_root
self._bigdata_root = get_bigdata_root()
Expand Down Expand Up @@ -142,9 +142,9 @@ def get_data(self, path=None, docopy=None):
self.input_remote = path
if docopy is None:
docopy = self.docopy
self.input = get_bigdata(self._inputs_root, self._env, path,
self.input = get_bigdata(self._inputs_root, self.env, path,
docopy=docopy)
self.input_remote = os.path.join(self._inputs_root, self._env, path)
self.input_remote = os.path.join(self._inputs_root, self.env, path)

return self.input

Expand All @@ -161,13 +161,13 @@ def data_glob(self, path=None, glob='*', docopy=None):
# is a local path or URL.
root = self.bigdata_root
if op.exists(root):
root_path = op.join(root, self._inputs_root, self._env)
root_path = op.join(root, self._inputs_root, self.env)
root_len = len(root_path) + 1
path = op.join(root_path, path)
file_paths = _data_glob_local(path, glob)
elif check_url(root):
root_len = len(self._env) + 1
file_paths = _data_glob_url(self._inputs_root, self._env, path, glob, root=root)
root_len = len(self.env) + 1
file_paths = _data_glob_url(self._inputs_root, self.env, path, glob, root=root)
else:
raise BigdataError('Path cannot be found: {}'.format(path))

Expand All @@ -192,9 +192,9 @@ def get_truth(self, path=None, docopy=None):
os.makedirs('truth', exist_ok=True)
os.chdir('truth')
try:
self.truth = get_bigdata(self._inputs_root, self._env, path,
self.truth = get_bigdata(self._inputs_root, self.env, path,
docopy=docopy)
self.truth_remote = os.path.join(self._inputs_root, self._env, path)
self.truth_remote = os.path.join(self._inputs_root, self.env, path)
except BigdataError:
os.chdir('..')
raise
Expand Down Expand Up @@ -232,7 +232,7 @@ def get_asn(self, path=None, docopy=True, get_members=True):
docopy = self.docopy

# Get the association JSON file
self.input = get_bigdata(self._inputs_root, self._env, path,
self.input = get_bigdata(self._inputs_root, self.env, path,
docopy=docopy)
with open(self.input) as fp:
asn = load_asn(fp)
Expand All @@ -245,7 +245,7 @@ def get_asn(self, path=None, docopy=True, get_members=True):
fullpath = os.path.join(
os.path.dirname(self.input_remote),
member['expname'])
get_bigdata(self._inputs_root, self._env, fullpath,
get_bigdata(self._inputs_root, self.env, fullpath,
docopy=self.docopy)

def to_asdf(self, path):
Expand Down
43 changes: 43 additions & 0 deletions jwst/regtest/test_migrate_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""
Tests of the migrate_data script, which attempts to update
files that have become invalid due to changes in model schemas
between versions of this package.

Obtains examples of files from artifactory truth data for
older releases.
"""
import subprocess

from astropy.io import fits
import pytest

from jwst import datamodels


@pytest.fixture(autouse=True)
def strict_validation(monkeypatch):
monkeypatch.setenv("STRICT_VALIDATION", "true")
yield


@pytest.mark.bigdata
@pytest.mark.parametrize("truth_path", [
"truth/test_miri_lrs_slit_spec2/jw00623032001_03102_00001_mirimage_x1d.fits",
"truth/test_nirspec_mos_spec2/f170lp-g235m_mos_observation-6-c0e0_001_dn_nrs1_mod_x1d.fits",
], ids=["miri-lrs-x1d", "nirspec-mos-x1d"])
def test_x1d_spec_table(truth_path, rtdata):
rtdata.env = "1.1.0"
rtdata.get_truth(truth_path)

# Confirm that the file doesn't initially validate
# (open with fits first so that the failed call to open doesn't leave behind an open file)
with fits.open(rtdata.truth, memmap=False) as hdu:
with pytest.raises(ValueError, match="Column names don't match schema"):
with datamodels.open(hdu):
pass

subprocess.check_call(["migrate_data", rtdata.truth, "--in-place"])

# Now the model should validate
with datamodels.open(rtdata.truth):
pass
142 changes: 142 additions & 0 deletions scripts/migrate_data
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#!/usr/bin/env python3
"""
Migrate .fits files whose format has changed between jwst package versions.
"""
import argparse
from datetime import datetime
import os
import re
import traceback
import warnings

import asdf
from astropy.io import fits
import numpy as np
from packaging.specifiers import SpecifierSet

import jwst
from jwst import datamodels


def parse_args():
parser = argparse.ArgumentParser("migrate_data", "migrate .fits files whose format has changed between jwst package versions")

parser.add_argument("files", nargs="+", help="one or more .fits files")

output_group = parser.add_mutually_exclusive_group(required=True)
output_group.add_argument("--output-dir", help="write modified files to an output directory")
output_group.add_argument("--in-place", help="modify files in-place", action="store_true")

return parser.parse_args()


# If there get to be many of these we may want to move
# them to jwst.datamodels somewhere:

def migrate_spec_table_1_1_0(hdul):
"""
spectable.schema added additional columns and renamed
two columns.
"""
schema = asdf.schema.load_schema("http://stsci.edu/schemas/jwst_datamodel/spectable.schema")
dtype = asdf.tags.core.ndarray.asdf_datatype_to_numpy_dtype(schema["datatype"])
renamed_columns = {
"ERROR": "FLUX_ERROR",
"BERROR": "BKGD_ERROR",
}

for hdu in hdul:
if hdu.name == "EXTRACT1D":
new_data = np.zeros(hdu.data.shape, dtype=dtype)
for column_name in hdu.data.dtype.names:
new_data[renamed_columns.get(column_name, column_name)] = hdu.data[column_name]
hdu.data = new_data


# The first key is a model class name, the second
# a jwst package version specifier. The value
# is a method that accepts an HDUList and modifies
# it in-place.
_MIGRATE_METHODS = {
"SpecModel": {
"> 0.13.1, <= 1.1.0": migrate_spec_table_1_1_0,
},
"MultiSpecModel": {
"> 0.13.1, <= 1.1.0": migrate_spec_table_1_1_0,
},
}


def migrate_file(filename, args):
if args.in_place:
mode = "update"
else:
mode = "readonly"

with fits.open(filename, memmap=False, mode=mode) as hdul:
model_type = hdul[0].header.get("DATAMODL")
jwst_version = hdul[0].header.get("CAL_VER")

if not (model_type and jwst_version):
print(f"Unable to migrate {filename}: DATAMODL and CAL_VER keywords are required")
return

match = re.match(r'^[0-9]+\.[0-9]+\.[0-9]+', jwst_version)
if match is None:
print(f"Unable to migrate {filename}: CAL_VER not understood")
return
jwst_version = match.group(0)

if model_type not in _MIGRATE_METHODS:
print(f"Migration for {filename} DATAMODL {model_type} not implemented")
return

with warnings.catch_warnings():
warnings.simplefilter("ignore")
exception_raised = False
try:
getattr(datamodels, model_type)(hdul, strict_validation=True)
except Exception:
exception_raised = True
if not exception_raised:
print(f"{filename} is already valid")
return

migrate_method = next((m for s, m in _MIGRATE_METHODS[model_type].items() if jwst_version in SpecifierSet(s)), None)
if migrate_method is None:
print(f"Migration for {filename} CAL_VER {jwst_version} not implemented")
return

migrate_method(hdul)
hdul[0].header["HISTORY"] = f"Migrated with jwst {jwst.__version__} migrate_data script {datetime.utcnow().isoformat()}"

try:
getattr(datamodels, model_type)(hdul, strict_validation=True)
except Exception:
print(f"Migration for {filename} failed to produce a valid model:\n")
traceback.print_exc()
return

if args.in_place:
hdul.flush()
else:
output_filename = os.path.join(args.output_dir, os.path.basename(filename))
hdul.writeto(output_filename, checksum=True, overwrite=True)


def main():
args = parse_args()

if args.output_dir:
os.makedirs(args.output_dir, exist_ok=True)

for file in args.files:
try:
migrate_file(file, args)
except Exception:
print(f"Error migrating {file}:\n")
traceback.print_exc()


if __name__ == "__main__":
main()