From 9db6a9097e3f6594de29d60ee964f21354d3088e Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Tue, 12 Jun 2018 15:29:05 -0500 Subject: [PATCH 01/16] Update MultiScene to work with xarray/dask --- satpy/multiscene.py | 76 +++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 51 deletions(-) diff --git a/satpy/multiscene.py b/satpy/multiscene.py index 6d3a1094c4..0b05697c2b 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -31,74 +31,48 @@ def stack(datasets): """First dataset at the bottom.""" - - base = Dataset(datasets[0], copy=True) + base = datasets[0].copy() for dataset in datasets[1:]: - base_mask = np.ma.getmaskarray(base) - other_mask = np.ma.getmaskarray(dataset) - base.mask = np.logical_and(base_mask, other_mask) - not_masked = np.logical_not(other_mask) - base[not_masked] = dataset[not_masked] - + base = base.where(dataset.isnull(), dataset) return base -def stack_time(datasets): - """Oldest time at the bottom.""" - - return stack(sorted(datasets, key=lambda x: x.info['start_time'])) - - class MultiScene(object): + """Container for multiple `Scene` objects.""" def __init__(self, layers): - self.layers = layers + """Initialize MultiScene and validate sub-scenes""" + self.scenes = layers + + @property + def loaded_dataset_ids(self): + """Union of all Dataset IDs loaded by all children.""" + return set(ds_id for scene in self.scenes for ds_id in scene.keys()) + + @property + def shared_dataset_ids(self): + """Dataset IDs shared by all children.""" + shared_ids = set(self.scenes[0].keys()) + for scene in self.scenes[1:]: + shared_ids &= set(scene.keys()) + return shared_ids def load(self, *args, **kwargs): """Load the required datasets from the multiple scenes.""" - for layer in self.layers: + for layer in self.scenes: layer.load(*args, **kwargs) def resample(self, destination, **kwargs): """Resample the multiscene.""" - return MultiScene([layer.resample(destination, **kwargs) for layer in self.layers]) + return self.__class__([scn.resample(destination, **kwargs) + for scn in self.scenes]) def blend(self, blend_function=stack): """Blend the datasets into one scene.""" scn = Scene() - common_datasets = None - for layer in self.layers: - if common_datasets is None: - common_datasets = set( - [dataset.id.name for dataset in layer]) - else: - common_datasets &= set( - [dataset.id.name for dataset in layer]) - for dataset_id in common_datasets: - datasets = [layer[dataset_id] for layer in self.layers] - scn[dataset_id] = blend_function(datasets) + common_datasets = self.shared_dataset_ids + for ds_id in common_datasets: + datasets = [scn[ds_id] for scn in self.scenes if ds_id in scn] + scn[ds_id] = blend_function(datasets) return scn - - -if __name__ == '__main__': - from datetime import datetime - from satpy.utils import debug_on - - debug_on() - scenes = [ - Scene(platform_name="Meteosat-10", sensor="seviri", - start_time=datetime(2016, 9, 6, 11, 0), - base_dir="/home/a001673/data/satellite/Meteosat-10/seviri/lvl1.5/2015/04/20/HRIT"), - - Scene(platform_name="SNPP", sensor="viirs", - start_time=datetime(2016, 9, 6, 10, 51), - end_time=datetime(2016, 9, 6, 11, 0), - base_dir="/home/a001673/data/satellite/Suomi-NPP/viirs/lvl1b/2015/03/11/SDR") - ] - - mscn = MultiScene(scenes) - mscn.load(['overview_sun']) - mscn = mscn.resample('eurol') - scn = mscn.blend() - scn.save_dataset('overview_sun', '/tmp/blend.png') From 3944ac48d1ec8d8bbde013e6c1a48cedc177d44f Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Tue, 12 Jun 2018 15:48:15 -0500 Subject: [PATCH 02/16] Fix redefined 'scn' variable to make stickler happy --- satpy/multiscene.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/satpy/multiscene.py b/satpy/multiscene.py index 0b05697c2b..8db9ff02d7 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -69,10 +69,10 @@ def resample(self, destination, **kwargs): def blend(self, blend_function=stack): """Blend the datasets into one scene.""" - scn = Scene() + new_scn = Scene() common_datasets = self.shared_dataset_ids for ds_id in common_datasets: datasets = [scn[ds_id] for scn in self.scenes if ds_id in scn] - scn[ds_id] = blend_function(datasets) + new_scn[ds_id] = blend_function(datasets) - return scn + return new_scn From 5f89a7d383cec2e259ce70cae57e62f29f962649 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Tue, 12 Jun 2018 17:06:57 -0500 Subject: [PATCH 03/16] Add fixes for saving a multiscene to a gif --- satpy/__init__.py | 1 + satpy/multiscene.py | 78 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/satpy/__init__.py b/satpy/__init__.py index 884f5ba96b..f9fad8bec2 100644 --- a/satpy/__init__.py +++ b/satpy/__init__.py @@ -55,5 +55,6 @@ available_readers) # noqa from satpy.writers import available_writers # noqa from satpy.scene import Scene # noqa +from satpy.multiscene import MultiScene # noqa log = get_logger('satpy') diff --git a/satpy/multiscene.py b/satpy/multiscene.py index 8db9ff02d7..a8f9b48ed3 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -23,10 +23,12 @@ """MultiScene object to blend satellite data. """ -import numpy as np - -from satpy.dataset import Dataset +import logging +import dask.array as da from satpy.scene import Scene +from satpy.writers import get_enhanced_image + +log = logging.getLogger(__name__) def stack(datasets): @@ -57,6 +59,13 @@ def shared_dataset_ids(self): shared_ids &= set(scene.keys()) return shared_ids + @property + def all_same_area(self): + all_areas = [ds.attrs.get('area', None) + for scn in self.scenes for ds in scn] + all_areas = [area for area in all_areas if area is not None] + return all(all_areas[0] == area for area in all_areas[1:]) + def load(self, *args, **kwargs): """Load the required datasets from the multiple scenes.""" for layer in self.scenes: @@ -76,3 +85,66 @@ def blend(self, blend_function=stack): new_scn[ds_id] = blend_function(datasets) return new_scn + + def _get_animation_info(self, all_datasets, filename, fill_value=None): + """Determine filename and shape of animation to be created.""" + first_dataset = [ds for ds in all_datasets if ds is not None][0] + first_img = get_enhanced_image(first_dataset) + first_img_data = first_img._finalize(fill_value=fill_value)[0] + shape = tuple(first_img_data.sizes.get(dim_name) + for dim_name in ('y', 'x', 'bands')) + if fill_value is None and filename.endswith('gif'): + log.warning("Forcing fill value to '0' for GIF Luminance images") + fill_value = 0 + shape = shape[:2] + + this_fn = filename.format(**first_dataset.attrs) + return this_fn, shape, fill_value + + def save(self, filename, datasets=None, fps=10, fill_value=None, **kwargs): + """Helper method for saving to movie or GIF formats. + + Supported formats are dependent on the `imageio` library and are + determined by filename extension by default. + + By default all datasets available will be saved to individual files + using the first Scene's datasets metadata to format the filename + provided. If a dataset is not available from a Scene then a black + array is used instead (np.zeros(shape)). + + Args: + filename (str): Filename to save to. Can include python string + formatting keys from dataset ``.attrs`` + (ex. "{name}_{start_time:%Y%m%d_%H%M%S.gif") + datasets (list): DatasetIDs to save (default: all datasets) + fill_value (int): Value to use instead creating an alpha band. + fps (int): Frames per second for produced animation + **kwargs: Additional keyword arguments to pass to + `imageio.get_writer`. + + """ + import imageio + if not self.all_same_area: + raise ValueError("Sub-scenes must all be on the same area " + "(see the 'resample' method).") + + dataset_ids = datasets or self.loaded_dataset_ids + for dataset_id in dataset_ids: + all_datasets = [scn[dataset_id] for scn in self.scenes] + this_fn, shape, fill_value = self._get_animation_info( + all_datasets, filename, fill_value=fill_value) + writer = imageio.get_writer(this_fn, fps=fps, **kwargs) + + for ds in all_datasets: + if ds is None: + data = da.zeros(shape) + else: + img = get_enhanced_image(ds) + data, mode = img._finalize(fill_value=fill_value) + if data.ndim == 3: + # assume all other shapes are (y, x) + # we need arrays grouped by pixel so + # transpose if needed + data = data.transpose('y', 'x', 'bands') + writer.append_data(data.values) + writer.close() From 1bf8dd932e14ddb016142a672e138cc8c3821ed7 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Tue, 12 Jun 2018 22:35:04 -0500 Subject: [PATCH 04/16] Add initial multiscene tests --- .travis.yml | 2 +- appveyor.yml | 2 +- satpy/multiscene.py | 21 ++-- satpy/readers/__init__.py | 8 ++ satpy/scene.py | 4 + satpy/tests/__init__.py | 3 +- satpy/tests/test_multiscene.py | 181 +++++++++++++++++++++++++++++++++ satpy/tests/test_scene.py | 6 +- setup.py | 2 + 9 files changed, 217 insertions(+), 12 deletions(-) create mode 100644 satpy/tests/test_multiscene.py diff --git a/.travis.yml b/.travis.yml index 8306b9ad26..496bd4b6cd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ env: - MAIN_CMD='python setup.py' # 'krb5' is added as a workaround for an issue with gdal # see: https://github.com/conda-forge/gdal-feedstock/issues/205 - - CONDA_DEPENDENCIES='xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coveralls coverage codecov behave netcdf4 h5py h5netcdf gdal rasterio pyhdf mock krb5' + - CONDA_DEPENDENCIES='xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coveralls coverage codecov behave netcdf4 h5py h5netcdf gdal rasterio imageio pyhdf mock krb5' - PIP_DEPENDENCIES='trollsift trollimage pyspectral pyorbital' - SETUP_XVFB=False - EVENT_TYPE='push pull_request' diff --git a/appveyor.yml b/appveyor.yml index 13ba638350..842cd86903 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -3,7 +3,7 @@ environment: PYTHON: "C:\\conda" MINICONDA_VERSION: "latest" CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci-helpers\\appveyor\\windows_sdk.cmd" - CONDA_DEPENDENCIES: "xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coverage netcdf4 h5py h5netcdf gdal rasterio pyhdf mock" + CONDA_DEPENDENCIES: "xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coverage netcdf4 h5py h5netcdf gdal rasterio imageio pyhdf mock" PIP_DEPENDENCIES: "trollsift trollimage pyspectral pyorbital" CONDA_CHANNELS: "conda-forge" diff --git a/satpy/multiscene.py b/satpy/multiscene.py index a8f9b48ed3..c7f0b751c7 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -25,6 +25,7 @@ import logging import dask.array as da +import xarray as xr from satpy.scene import Scene from satpy.writers import get_enhanced_image @@ -42,9 +43,9 @@ def stack(datasets): class MultiScene(object): """Container for multiple `Scene` objects.""" - def __init__(self, layers): + def __init__(self, scenes=None): """Initialize MultiScene and validate sub-scenes""" - self.scenes = layers + self.scenes = scenes or [] @property def loaded_dataset_ids(self): @@ -101,7 +102,8 @@ def _get_animation_info(self, all_datasets, filename, fill_value=None): this_fn = filename.format(**first_dataset.attrs) return this_fn, shape, fill_value - def save(self, filename, datasets=None, fps=10, fill_value=None, **kwargs): + def save(self, filename, datasets=None, fps=10, fill_value=None, + ignore_missing=False, **kwargs): """Helper method for saving to movie or GIF formats. Supported formats are dependent on the `imageio` library and are @@ -117,8 +119,10 @@ def save(self, filename, datasets=None, fps=10, fill_value=None, **kwargs): formatting keys from dataset ``.attrs`` (ex. "{name}_{start_time:%Y%m%d_%H%M%S.gif") datasets (list): DatasetIDs to save (default: all datasets) - fill_value (int): Value to use instead creating an alpha band. fps (int): Frames per second for produced animation + fill_value (int): Value to use instead creating an alpha band. + ignore_missing (bool): Don't include a black frame when a dataset + is missing from a child scene. **kwargs: Additional keyword arguments to pass to `imageio.get_writer`. @@ -130,14 +134,17 @@ def save(self, filename, datasets=None, fps=10, fill_value=None, **kwargs): dataset_ids = datasets or self.loaded_dataset_ids for dataset_id in dataset_ids: - all_datasets = [scn[dataset_id] for scn in self.scenes] + all_datasets = [scn.datasets.get(dataset_id) for scn in self.scenes] this_fn, shape, fill_value = self._get_animation_info( all_datasets, filename, fill_value=fill_value) writer = imageio.get_writer(this_fn, fps=fps, **kwargs) for ds in all_datasets: - if ds is None: - data = da.zeros(shape) + if ds is None and ignore_missing: + continue + elif ds is None: + data = da.zeros(shape, chunks=shape) + data = xr.DataArray(data) else: img = get_enhanced_image(ds) data, mode = img._finalize(fill_value=fill_value) diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py index 97f96e45b0..c66da4fb06 100644 --- a/satpy/readers/__init__.py +++ b/satpy/readers/__init__.py @@ -296,6 +296,14 @@ def __getitem__(self, item): key = self.get_key(item) return super(DatasetDict, self).__getitem__(key) + def get(self, key, default=None): + """Get value with optional default.""" + try: + key = self.get_key(key) + except KeyError: + return default + return super(DatasetDict, self).get(key, default) + def __setitem__(self, key, value): """Support assigning 'Dataset' objects or dictionaries of metadata. """ diff --git a/satpy/scene.py b/satpy/scene.py index f41fcab65d..39c6feb6f3 100644 --- a/satpy/scene.py +++ b/satpy/scene.py @@ -561,6 +561,10 @@ def crop(self, area=None, ll_bbox=None, xy_bbox=None, dataset_ids=None): return new_scn + def get(self, key, default=None): + """Return value from DatasetDict with optional default.""" + return self.datasets.get(key, default) + def __getitem__(self, key): """Get a dataset or create a new 'slice' of the Scene.""" if isinstance(key, tuple) and not isinstance(key, DatasetID): diff --git a/satpy/tests/__init__.py b/satpy/tests/__init__.py index 1197d4971b..b0d3d462e4 100644 --- a/satpy/tests/__init__.py +++ b/satpy/tests/__init__.py @@ -29,7 +29,7 @@ test_readers, test_resample, test_scene, test_utils, test_writers, test_yaml_reader, writer_tests, - test_enhancements, compositor_tests) + test_enhancements, compositor_tests, test_multiscene) if sys.version_info < (2, 7): @@ -56,6 +56,7 @@ def suite(): mysuite.addTests(test_utils.suite()) mysuite.addTests(test_enhancements.suite()) mysuite.addTests(compositor_tests.suite()) + mysuite.addTests(test_multiscene.suite()) return mysuite diff --git a/satpy/tests/test_multiscene.py b/satpy/tests/test_multiscene.py new file mode 100644 index 0000000000..d71fdc03fd --- /dev/null +++ b/satpy/tests/test_multiscene.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2018. +# +# Author(s): +# +# David Hoese +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +"""Unit tests for multiscene.py. +""" + +import os +import sys +import tempfile +import shutil + +if sys.version_info < (2, 7): + import unittest2 as unittest +else: + import unittest + +try: + from unittest import mock +except ImportError: + import mock + +DEFAULT_SHAPE = (5, 10) + + +def _fake_get_enhanced_image(img): + from trollimage.xrimage import XRImage + return XRImage(img) + + +def _create_test_area(proj_str=None, shape=DEFAULT_SHAPE, extents=None): + """Create a test area definition.""" + from pyresample.geometry import AreaDefinition + from pyresample.utils import proj4_str_to_dict + if proj_str is None: + proj_str = '+proj=lcc +datum=WGS84 +ellps=WGS84 +lon_0=-95. ' \ + '+lat_0=25 +lat_1=25 +units=m +no_defs' + proj_dict = proj4_str_to_dict(proj_str) + extents = extents or (-1000., -1500., 1000., 1500.) + + return AreaDefinition( + 'test', + 'test', + 'test', + proj_dict, + x_size=shape[1], + y_size=shape[0], + area_extent=extents + ) + + +def _create_test_dataset(name, shape=DEFAULT_SHAPE, area=None): + """Create a test DataArray object.""" + import xarray as xr + import dask.array as da + import numpy as np + + return xr.DataArray( + da.zeros(shape, dtype=np.float32, chunks=shape), dims=('y', 'x'), + attrs={'name': name, 'area': area}) + + +def _create_test_scenes(num_scenes=2, shape=DEFAULT_SHAPE, area=None): + """Helper to create some test scenes.""" + from satpy import Scene + ds1 = _create_test_dataset('ds1', shape=shape, area=area) + ds2 = _create_test_dataset('ds2', shape=shape, area=area) + scenes = [] + for scn_idx in range(num_scenes): + scn = Scene() + scn['ds1'] = ds1 + scn['ds2'] = ds2 + scenes.append(scn) + return scenes + + +class TestMultiScene(unittest.TestCase): + """Test basic functionality of MultiScene.""" + + def test_init_empty(self): + """Test creating a multiscene with no children.""" + from satpy import MultiScene + mscn = MultiScene() + + def test_init_children(self): + """Test creating a multiscene with children.""" + from satpy import MultiScene + scenes = _create_test_scenes() + mscn = MultiScene(scenes) + + def test_properties(self): + """Test basic properties/attributes of the MultiScene.""" + from satpy import MultiScene, DatasetID + + area = _create_test_area() + scenes = _create_test_scenes(area=area) + ds1_id = DatasetID(name='ds1') + ds2_id = DatasetID(name='ds2') + ds3_id = DatasetID(name='ds3') + ds4_id = DatasetID(name='ds4') + + # Add a dataset to only one of the Scenes + scenes[1]['ds3'] = _create_test_dataset('ds3') + mscn = MultiScene(scenes) + + self.assertSetEqual(mscn.loaded_dataset_ids, + {ds1_id, ds2_id, ds3_id}) + self.assertSetEqual(mscn.shared_dataset_ids, {ds1_id, ds2_id}) + self.assertTrue(mscn.all_same_area) + + bigger_area = _create_test_area(shape=(20, 40)) + scenes[0]['ds4'] = _create_test_dataset('ds4', shape=(20, 40), + area=bigger_area) + + self.assertSetEqual(mscn.loaded_dataset_ids, + {ds1_id, ds2_id, ds3_id, ds4_id}) + self.assertSetEqual(mscn.shared_dataset_ids, {ds1_id, ds2_id}) + self.assertFalse(mscn.all_same_area) + + +class TestMultiSceneSave(unittest.TestCase): + """Test saving a MultiScene to various formats.""" + + def setUp(self): + """Create temporary directory to save files to""" + self.base_dir = tempfile.mkdtemp() + + def tearDown(self): + """Remove the temporary directory created for a test""" + try: + shutil.rmtree(self.base_dir, ignore_errors=True) + except OSError: + pass + + @mock.patch('satpy.multiscene.get_enhanced_image', _fake_get_enhanced_image) + def test_save_mp4(self): + """Save a series of fake scenes to an mp4 video.""" + from satpy import MultiScene, DatasetID + + area = _create_test_area() + scenes = _create_test_scenes(area=area) + ds1_id = DatasetID(name='ds1') + ds2_id = DatasetID(name='ds2') + ds3_id = DatasetID(name='ds3') + ds4_id = DatasetID(name='ds4') + + # Add a dataset to only one of the Scenes + scenes[1]['ds3'] = _create_test_dataset('ds3') + mscn = MultiScene(scenes) + fn = os.path.join(self.base_dir, 'test_save_mp4.mp4') + mscn.save(fn) + + +def suite(): + """The test suite for test_multiscene.""" + loader = unittest.TestLoader() + mysuite = unittest.TestSuite() + mysuite.addTest(loader.loadTestsFromTestCase(TestMultiScene)) + mysuite.addTest(loader.loadTestsFromTestCase(TestMultiSceneSave)) + + return mysuite + + +if __name__ == "__main__": + unittest.main() diff --git a/satpy/tests/test_scene.py b/satpy/tests/test_scene.py index 9202d8963b..b7558d3ff2 100644 --- a/satpy/tests/test_scene.py +++ b/satpy/tests/test_scene.py @@ -229,6 +229,8 @@ def test_getitem(self): self.assertIs(scene['2'], ds2) self.assertIs(scene['3'], ds3) self.assertRaises(KeyError, scene.__getitem__, '4') + self.assertIs(scene.get('3'), ds3) + self.assertIs(scene.get('4'), None) def test_getitem_modifiers(self): """Test __getitem__ with names and modifiers""" @@ -1657,8 +1659,7 @@ def test_save_dataset_default(self): def suite(): - """The test suite for test_scene. - """ + """The test suite for test_scene.""" loader = unittest.TestLoader() mysuite = unittest.TestSuite() mysuite.addTest(loader.loadTestsFromTestCase(TestScene)) @@ -1668,5 +1669,6 @@ def suite(): return mysuite + if __name__ == "__main__": unittest.main() diff --git a/setup.py b/setup.py index a1f3f4d990..acad93a36f 100644 --- a/setup.py +++ b/setup.py @@ -70,6 +70,8 @@ # Writers: 'scmi': ['netCDF4 >= 1.1.8'], 'geotiff': ['gdal', 'trollimage[geotiff]'], + # MultiScene: + 'animations': ['imageio'], } all_extras = [] for extra_deps in extras_require.values(): From d68ba1771dc98e937b6ca9476dfdf1b5b237ec46 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Tue, 12 Jun 2018 22:39:12 -0500 Subject: [PATCH 05/16] Fix unused variables in multiscene tests --- satpy/tests/test_multiscene.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/satpy/tests/test_multiscene.py b/satpy/tests/test_multiscene.py index d71fdc03fd..87d6222638 100644 --- a/satpy/tests/test_multiscene.py +++ b/satpy/tests/test_multiscene.py @@ -96,13 +96,13 @@ class TestMultiScene(unittest.TestCase): def test_init_empty(self): """Test creating a multiscene with no children.""" from satpy import MultiScene - mscn = MultiScene() + MultiScene() def test_init_children(self): """Test creating a multiscene with children.""" from satpy import MultiScene scenes = _create_test_scenes() - mscn = MultiScene(scenes) + MultiScene(scenes) def test_properties(self): """Test basic properties/attributes of the MultiScene.""" @@ -126,7 +126,7 @@ def test_properties(self): bigger_area = _create_test_area(shape=(20, 40)) scenes[0]['ds4'] = _create_test_dataset('ds4', shape=(20, 40), - area=bigger_area) + area=bigger_area) self.assertSetEqual(mscn.loaded_dataset_ids, {ds1_id, ds2_id, ds3_id, ds4_id}) @@ -155,10 +155,6 @@ def test_save_mp4(self): area = _create_test_area() scenes = _create_test_scenes(area=area) - ds1_id = DatasetID(name='ds1') - ds2_id = DatasetID(name='ds2') - ds3_id = DatasetID(name='ds3') - ds4_id = DatasetID(name='ds4') # Add a dataset to only one of the Scenes scenes[1]['ds3'] = _create_test_dataset('ds3') From 4db359a2f8bf10d4dca659e0b049cb74970c8216 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Tue, 12 Jun 2018 22:40:17 -0500 Subject: [PATCH 06/16] Fix unused import --- satpy/tests/test_multiscene.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/satpy/tests/test_multiscene.py b/satpy/tests/test_multiscene.py index 87d6222638..0c8b5d860f 100644 --- a/satpy/tests/test_multiscene.py +++ b/satpy/tests/test_multiscene.py @@ -151,8 +151,7 @@ def tearDown(self): @mock.patch('satpy.multiscene.get_enhanced_image', _fake_get_enhanced_image) def test_save_mp4(self): """Save a series of fake scenes to an mp4 video.""" - from satpy import MultiScene, DatasetID - + from satpy import MultiScene area = _create_test_area() scenes = _create_test_scenes(area=area) From 3da41a7dbf8c4275102fe1401373f4686c909797 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Tue, 12 Jun 2018 22:55:52 -0500 Subject: [PATCH 07/16] Add ffmpeg to test requirements Will likely mock it in the future --- .travis.yml | 2 +- appveyor.yml | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 496bd4b6cd..61f5773245 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ env: - MAIN_CMD='python setup.py' # 'krb5' is added as a workaround for an issue with gdal # see: https://github.com/conda-forge/gdal-feedstock/issues/205 - - CONDA_DEPENDENCIES='xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coveralls coverage codecov behave netcdf4 h5py h5netcdf gdal rasterio imageio pyhdf mock krb5' + - CONDA_DEPENDENCIES='xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coveralls coverage codecov behave netcdf4 h5py h5netcdf gdal rasterio imageio ffmpeg pyhdf mock krb5' - PIP_DEPENDENCIES='trollsift trollimage pyspectral pyorbital' - SETUP_XVFB=False - EVENT_TYPE='push pull_request' diff --git a/appveyor.yml b/appveyor.yml index 842cd86903..3bfaa4bdcb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -3,7 +3,7 @@ environment: PYTHON: "C:\\conda" MINICONDA_VERSION: "latest" CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci-helpers\\appveyor\\windows_sdk.cmd" - CONDA_DEPENDENCIES: "xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coverage netcdf4 h5py h5netcdf gdal rasterio imageio pyhdf mock" + CONDA_DEPENDENCIES: "xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coverage netcdf4 h5py h5netcdf gdal rasterio imageio ffmpeg pyhdf mock" PIP_DEPENDENCIES: "trollsift trollimage pyspectral pyorbital" CONDA_CHANNELS: "conda-forge" diff --git a/setup.py b/setup.py index acad93a36f..0d5e72118f 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ 'dask[array] >=0.17.1'] # pyhdf (conda) == python-hdf4 (pip) -test_requires = ['behave', 'h5py', 'netCDF4', 'pyhdf'] +test_requires = ['behave', 'h5py', 'netCDF4', 'pyhdf', 'imageio', 'ffmpeg'] if sys.version < '3.0': test_requires.append('mock') From 713167a67b1543c65febe0de9b07d6fb2522ef24 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Tue, 12 Jun 2018 23:08:55 -0500 Subject: [PATCH 08/16] Remove ffmpeg from setup.py requirements (we want the command line tool) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0d5e72118f..c0621a2094 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ 'dask[array] >=0.17.1'] # pyhdf (conda) == python-hdf4 (pip) -test_requires = ['behave', 'h5py', 'netCDF4', 'pyhdf', 'imageio', 'ffmpeg'] +test_requires = ['behave', 'h5py', 'netCDF4', 'pyhdf', 'imageio'] if sys.version < '3.0': test_requires.append('mock') From c738b8a61c12dc5f09ec17beedff00fc17204ce4 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Tue, 12 Jun 2018 23:16:38 -0500 Subject: [PATCH 09/16] Remove ffmpeg from test environment --- .travis.yml | 2 +- appveyor.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 61f5773245..496bd4b6cd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ env: - MAIN_CMD='python setup.py' # 'krb5' is added as a workaround for an issue with gdal # see: https://github.com/conda-forge/gdal-feedstock/issues/205 - - CONDA_DEPENDENCIES='xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coveralls coverage codecov behave netcdf4 h5py h5netcdf gdal rasterio imageio ffmpeg pyhdf mock krb5' + - CONDA_DEPENDENCIES='xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coveralls coverage codecov behave netcdf4 h5py h5netcdf gdal rasterio imageio pyhdf mock krb5' - PIP_DEPENDENCIES='trollsift trollimage pyspectral pyorbital' - SETUP_XVFB=False - EVENT_TYPE='push pull_request' diff --git a/appveyor.yml b/appveyor.yml index 3bfaa4bdcb..842cd86903 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -3,7 +3,7 @@ environment: PYTHON: "C:\\conda" MINICONDA_VERSION: "latest" CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci-helpers\\appveyor\\windows_sdk.cmd" - CONDA_DEPENDENCIES: "xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coverage netcdf4 h5py h5netcdf gdal rasterio imageio ffmpeg pyhdf mock" + CONDA_DEPENDENCIES: "xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coverage netcdf4 h5py h5netcdf gdal rasterio imageio pyhdf mock" PIP_DEPENDENCIES: "trollsift trollimage pyspectral pyorbital" CONDA_CHANNELS: "conda-forge" From f8c264686d7b3b5eb7591901e0780cbdaad33dc8 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Tue, 12 Jun 2018 23:17:13 -0500 Subject: [PATCH 10/16] Add cascaded_compute dask utility function --- satpy/multiscene.py | 52 ++++++++++++++++++++++++++++++++-- satpy/tests/test_multiscene.py | 9 +++++- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/satpy/multiscene.py b/satpy/multiscene.py index c7f0b751c7..ad31695cba 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -24,14 +24,55 @@ """ import logging +import dask import dask.array as da import xarray as xr from satpy.scene import Scene from satpy.writers import get_enhanced_image +try: + import imageio +except ImportError: + imageio = None + log = logging.getLogger(__name__) +def cascaded_compute(callback, arrays, optimize=True): + """Dask helper function for iterating over computed dask arrays. + + Args: + callback (callable): Called with a single numpy array computed from + the provided dask arrays. + arrays (list, tuple): Dask arrays to pass to callback. + optimize (bool): Whether to try to optimize the dask graphs of the + provided arrays. + + Returns: `dask.Delayed` object to be computed + + """ + if optimize: + # optimize Dask graph over all objects + dsk = da.Array.__dask_optimize__( + # combine all Dask Array graphs + dask.sharedict.merge(*[e.__dask_graph__() for e in arrays]), + # get Dask Array keys in result + list(dask.core.flatten([e.__dask_keys__() for e in arrays])) + ) + # rebuild Dask Arrays + arrays = [da.Array(dsk, e.name, e.chunks, e.dtype) for e in arrays] + + def _callback_wrapper(arr, cb=callback, previous_call=None): + del previous_call # used only for task ordering + return cb(arr) + + current_write = None + for dask_arr in arrays: + current_write = dask.delayed(_callback_wrapper)( + dask_arr, previous_call=current_write) + return current_write + + def stack(datasets): """First dataset at the bottom.""" base = datasets[0].copy() @@ -127,18 +168,24 @@ def save(self, filename, datasets=None, fps=10, fill_value=None, `imageio.get_writer`. """ - import imageio if not self.all_same_area: raise ValueError("Sub-scenes must all be on the same area " "(see the 'resample' method).") + if imageio is None: + raise ImportError("Missing required 'imageio' library") dataset_ids = datasets or self.loaded_dataset_ids + for dataset_id in dataset_ids: all_datasets = [scn.datasets.get(dataset_id) for scn in self.scenes] this_fn, shape, fill_value = self._get_animation_info( all_datasets, filename, fill_value=fill_value) writer = imageio.get_writer(this_fn, fps=fps, **kwargs) + def _append_wrapper(data): + writer.append_data(data) + + data_to_write = [] for ds in all_datasets: if ds is None and ignore_missing: continue @@ -153,5 +200,6 @@ def save(self, filename, datasets=None, fps=10, fill_value=None, # we need arrays grouped by pixel so # transpose if needed data = data.transpose('y', 'x', 'bands') - writer.append_data(data.values) + data_to_write.append(data.data) + cascaded_compute(_append_wrapper, data_to_write).compute() writer.close() diff --git a/satpy/tests/test_multiscene.py b/satpy/tests/test_multiscene.py index 0c8b5d860f..1652972922 100644 --- a/satpy/tests/test_multiscene.py +++ b/satpy/tests/test_multiscene.py @@ -159,7 +159,14 @@ def test_save_mp4(self): scenes[1]['ds3'] = _create_test_dataset('ds3') mscn = MultiScene(scenes) fn = os.path.join(self.base_dir, 'test_save_mp4.mp4') - mscn.save(fn) + writer_mock = mock.MagicMock() + with mock.patch('satpy.multiscene.imageio.get_writer') as get_writer: + get_writer.return_value = writer_mock + mscn.save(fn) + + # 2 saves for the first scene + 1 black frame + # 3 for the second scene + self.assertEqual(writer_mock.append_data.call_count, 3 + 3) def suite(): From 3eace71c5b2332653d3011e2a38715a00ffbe5f6 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Wed, 13 Jun 2018 10:06:37 -0500 Subject: [PATCH 11/16] Cleanup multiscene animation saving --- satpy/multiscene.py | 54 +++++++++++++++++++--------------- satpy/tests/test_multiscene.py | 29 +++++++++++++++--- 2 files changed, 55 insertions(+), 28 deletions(-) diff --git a/satpy/multiscene.py b/satpy/multiscene.py index ad31695cba..6657f281ae 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -130,7 +130,9 @@ def blend(self, blend_function=stack): def _get_animation_info(self, all_datasets, filename, fill_value=None): """Determine filename and shape of animation to be created.""" - first_dataset = [ds for ds in all_datasets if ds is not None][0] + valid_datasets = [ds for ds in all_datasets if ds is not None] + first_dataset = valid_datasets[0] + last_dataset = valid_datasets[-1] first_img = get_enhanced_image(first_dataset) first_img_data = first_img._finalize(fill_value=fill_value)[0] shape = tuple(first_img_data.sizes.get(dim_name) @@ -140,9 +142,31 @@ def _get_animation_info(self, all_datasets, filename, fill_value=None): fill_value = 0 shape = shape[:2] - this_fn = filename.format(**first_dataset.attrs) + attrs = first_dataset.attrs.copy() + if 'end_time' in last_dataset.attrs: + attrs['end_time'] = last_dataset.attrs['end_time'] + this_fn = filename.format(**attrs) return this_fn, shape, fill_value + def _get_animation_frames(self, all_datasets, shape, fill_value=None, + ignore_missing=False): + """Create enhanced image frames to save to a file.""" + for ds in all_datasets: + if ds is None and ignore_missing: + continue + elif ds is None: + data = da.zeros(shape, chunks=shape) + data = xr.DataArray(data) + else: + img = get_enhanced_image(ds) + data, mode = img._finalize(fill_value=fill_value) + if data.ndim == 3: + # assume all other shapes are (y, x) + # we need arrays grouped by pixel so + # transpose if needed + data = data.transpose('y', 'x', 'bands') + yield data.data + def save(self, filename, datasets=None, fps=10, fill_value=None, ignore_missing=False, **kwargs): """Helper method for saving to movie or GIF formats. @@ -175,31 +199,13 @@ def save(self, filename, datasets=None, fps=10, fill_value=None, raise ImportError("Missing required 'imageio' library") dataset_ids = datasets or self.loaded_dataset_ids - for dataset_id in dataset_ids: all_datasets = [scn.datasets.get(dataset_id) for scn in self.scenes] this_fn, shape, fill_value = self._get_animation_info( all_datasets, filename, fill_value=fill_value) - writer = imageio.get_writer(this_fn, fps=fps, **kwargs) + data_to_write = list(self._get_animation_frames( + all_datasets, shape, fill_value, ignore_missing)) - def _append_wrapper(data): - writer.append_data(data) - - data_to_write = [] - for ds in all_datasets: - if ds is None and ignore_missing: - continue - elif ds is None: - data = da.zeros(shape, chunks=shape) - data = xr.DataArray(data) - else: - img = get_enhanced_image(ds) - data, mode = img._finalize(fill_value=fill_value) - if data.ndim == 3: - # assume all other shapes are (y, x) - # we need arrays grouped by pixel so - # transpose if needed - data = data.transpose('y', 'x', 'bands') - data_to_write.append(data.data) - cascaded_compute(_append_wrapper, data_to_write).compute() + writer = imageio.get_writer(this_fn, fps=fps, **kwargs) + cascaded_compute(writer.append_data, data_to_write).compute() writer.close() diff --git a/satpy/tests/test_multiscene.py b/satpy/tests/test_multiscene.py index 1652972922..b64d0928eb 100644 --- a/satpy/tests/test_multiscene.py +++ b/satpy/tests/test_multiscene.py @@ -25,6 +25,7 @@ import sys import tempfile import shutil +from datetime import datetime if sys.version_info < (2, 7): import unittest2 as unittest @@ -84,8 +85,8 @@ def _create_test_scenes(num_scenes=2, shape=DEFAULT_SHAPE, area=None): scenes = [] for scn_idx in range(num_scenes): scn = Scene() - scn['ds1'] = ds1 - scn['ds2'] = ds2 + scn['ds1'] = ds1.copy() + scn['ds2'] = ds2.copy() scenes.append(scn) return scenes @@ -157,16 +158,36 @@ def test_save_mp4(self): # Add a dataset to only one of the Scenes scenes[1]['ds3'] = _create_test_dataset('ds3') + # Add a start and end time + for ds_id in ['ds1', 'ds2', 'ds3']: + scenes[1][ds_id].attrs['start_time'] = datetime(2018, 1, 2) + scenes[1][ds_id].attrs['end_time'] = datetime(2018, 1, 2, 12) + if ds_id == 'ds3': + continue + scenes[0][ds_id].attrs['start_time'] = datetime(2018, 1, 1) + scenes[0][ds_id].attrs['end_time'] = datetime(2018, 1, 1, 12) + mscn = MultiScene(scenes) - fn = os.path.join(self.base_dir, 'test_save_mp4.mp4') + fn = os.path.join( + self.base_dir, + 'test_save_mp4_{name}_{start_time:%Y%m%d_%H}_' + '{end_time:%Y%m%d_%H}.mp4') writer_mock = mock.MagicMock() with mock.patch('satpy.multiscene.imageio.get_writer') as get_writer: get_writer.return_value = writer_mock - mscn.save(fn) + # force order of datasets by specifying them + mscn.save(fn, datasets=['ds1', 'ds2', 'ds3']) # 2 saves for the first scene + 1 black frame # 3 for the second scene self.assertEqual(writer_mock.append_data.call_count, 3 + 3) + filenames = [os.path.basename(args[0][0]) for args in get_writer.call_args_list] + self.assertEqual(filenames[0], + 'test_save_mp4_ds1_20180101_00_20180102_12.mp4') + self.assertEqual(filenames[1], + 'test_save_mp4_ds2_20180101_00_20180102_12.mp4') + self.assertEqual(filenames[2], + 'test_save_mp4_ds3_20180102_00_20180102_12.mp4') def suite(): From 2eeeab07d9344704277ff0e706dd1345701e9532 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Wed, 13 Jun 2018 12:09:03 -0500 Subject: [PATCH 12/16] Add multiscene documentation and fix a few bugs --- doc/source/conf.py | 5 ++-- doc/source/index.rst | 1 + doc/source/multiscene.rst | 58 +++++++++++++++++++++++++++++++++++++++ doc/source/quickstart.rst | 31 ++++++++++----------- satpy/multiscene.py | 45 ++++++++++++++++++++++-------- 5 files changed, 109 insertions(+), 31 deletions(-) create mode 100644 doc/source/multiscene.rst diff --git a/doc/source/conf.py b/doc/source/conf.py index 77a004c335..11095cadd9 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -235,6 +235,7 @@ def __getattr__(cls, name): 'https://docs.python.org/3.6': None, 'https://docs.scipy.org/doc/numpy': None, 'https://docs.scipy.org/doc/scipy/reference': None, - 'http://xarray.pydata.org/en/stable/': None, - 'http://pyresample.readthedocs.io/en/stable': None, + 'https://xarray.pydata.org/en/stable/': None, + 'https://dask.pydata.org/en/latest/': None, + 'https://pyresample.readthedocs.io/en/stable': None, } diff --git a/doc/source/index.rst b/doc/source/index.rst index 2528c8ccc9..9fb7387346 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -38,6 +38,7 @@ installation. composites resample writers + multiscene dev_guide/index SatPy API diff --git a/doc/source/multiscene.rst b/doc/source/multiscene.rst new file mode 100644 index 0000000000..8af212cc99 --- /dev/null +++ b/doc/source/multiscene.rst @@ -0,0 +1,58 @@ +MultiScene (Experimental) +========================= + +Scene objects in SatPy are meant to represent a single geographic region at +a specific single instant in time or range of time. This means they are not +suited for handling multiple orbits of polar-orbiting satellite data, +multiple time steps of geostationary satellite data, or other special data +cases. To handle these cases SatPy provides the `MultiScene` class. The below +examples will walk through some basic use cases of the MultiScene. + +.. warning:: + + These features are still early in development and may change overtime as + more user feedback is received and more features added. + +Stacking scenes +--------------- + +The MultiScene makes it easy to take multiple Scenes and stack them on top of +each other. The code below takes two separate orbits from a VIIRS sensor and +stacks them on top of each other. + + >>> from satpy import Scene, MultiScene + >>> from glob import glob + >>> from pyresample.geometry import AreaDefinition + >>> my_area = AreaDefinition(...) + >>> scenes = [ + ... Scene(reader='viirs_sdr', filenames=glob('/data/viirs/day_1/*t180*.h5')), + ... Scene(reader='viirs_sdr', filenames=glob('/data/viirs/day_2/*t180*.h5')) + ... ] + >>> mscn = MultiScene(scenes) + >>> mscn.load(['I04']) + >>> new_mscn = mscn.resample(my_area) + >>> blended_scene = new_mscn.blend() + >>> blended_scene.save_datasets() + +Saving frames of an animation +----------------------------- + +The MultiScene can take "frames" of data and join them together in a single +animation movie file. Saving animations required the `imageio` python library +and for most available formats the ``ffmpeg`` command line tool suite should +also be installed. The below example saves a series of GOES-EAST ABI channel +1 and channel 2 frames to MP4 movie files. + + >>> from satpy import Scene, MultiScene + >>> from glob import glob + >>> scenes = [ + ... Scene(reader='abi_l1b', filenames=[fn]) for fn in sorted(glob('/data/abi/day_1/*C0[12]*.nc')) + ... ] + >>> mscn = MultiScene(scenes) + >>> mscn.load(['C01', 'C02']) + >>> mscn.save('{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=2) + +.. warning:: + + GIF images, although supported, are not recommended due to the large file + sizes that can be produced from only a few frames. diff --git a/doc/source/quickstart.rst b/doc/source/quickstart.rst index ae2e2befd8..9833c3f587 100644 --- a/doc/source/quickstart.rst +++ b/doc/source/quickstart.rst @@ -11,18 +11,17 @@ Loading data >>> sys.setdefaultencoding('utf8') To work with weather satellite data you must create a :class:`~satpy.scene.Scene` object. In order for SatPy to -get access to the data it must be told what files to read and what :ref:`SatPy Reader ` should read them:: +get access to the data it must be told what files to read and what :ref:`SatPy Reader ` should read them: >>> from satpy import Scene >>> from glob import glob >>> filenames = glob("/home/a001673/data/satellite/Meteosat-10/seviri/lvl1.5/2015/04/20/HRIT/*201504201000*") >>> global_scene = Scene(reader="hrit_msg", filenames=filenames) -To load data from the files use the :meth:`Scene.load ` method:: +To load data from the files use the :meth:`Scene.load ` method: >>> global_scene.load([0.6, 0.8, 10.8]) >>> print(global_scene) - seviri/IR_108: area: On-the-fly area start_time: 2015-04-20 10:00:00 @@ -47,10 +46,9 @@ SatPy allows loading file data by wavelengths in micrometers (shown above) or by >>> global_scene.load(["VIS006", "VIS008", "IR_108"]) To have a look at the available channels for loading from your :class:`~satpy.scene.Scene` object use the -:meth:`available_datasets ` method:: +:meth:`available_datasets ` method: >>> global_scene.available_dataset_names() - ['HRV', 'IR_108', 'IR_120', @@ -65,15 +63,15 @@ To have a look at the available channels for loading from your :class:`~satpy.sc 'WV_073'] -To access the loaded data use the wavelength or name:: +To access the loaded data use the wavelength or name: >>> print(global_scene[0.6]) -To visualize loaded data in a pop-up window:: +To visualize loaded data in a pop-up window: >>> global_scene.show(0.6) -To make combine datasets and make a new dataset:: +To make combine datasets and make a new dataset: >>> global_scene["ndvi"] = (global_scene[0.8] - global_scene[0.6]) / (global_scene[0.8] + global_scene[0.6]) >>> global_scene.show("ndvi") @@ -84,14 +82,13 @@ advanced loading methods see the :doc:`readers` documentation. Generating composites ===================== -SatPy comes with many composite recipes built-in and makes them loadable like any other dataset:: +SatPy comes with many composite recipes built-in and makes them loadable like any other dataset: >>> global_scene.load(['overview']) -To get a list of all available composites for the current scene:: +To get a list of all available composites for the current scene: >>> global_scene.available_composite_names() - ['overview_sun', 'airmass', 'natural', @@ -126,7 +123,7 @@ to a uniform grid, limiting input data to an area of interest, changing from one projection to another, or for preparing datasets to be combined in a composite (see above). For more details on resampling, different resampling algorithms, and creating your own area of interest see the -:doc:`resample` documentation. To resample a SatPy Scene:: +:doc:`resample` documentation. To resample a SatPy Scene: >>> local_scene = global_scene.resample("eurol") @@ -134,7 +131,7 @@ This creates a copy of the original ``global_scene`` with all loaded datasets resampled to the built-in "eurol" area. Any composites that were requested, but could not be generated are automatically generated after resampling. The new ``local_scene`` can now be used like the original ``global_scene`` for -working with datasets, saving them to disk or showing them on screen:: +working with datasets, saving them to disk or showing them on screen: >>> local_scene.show('overview') >>> local_scene.save_dataset('overview', './local_overview.tif') @@ -142,15 +139,15 @@ working with datasets, saving them to disk or showing them on screen:: Saving to disk ============== -To save all loaded datasets to disk as geotiff images:: +To save all loaded datasets to disk as geotiff images: >>> global_scene.save_datasets() -To save all loaded datasets to disk as PNG images:: +To save all loaded datasets to disk as PNG images: >>> global_scene.save_datasets(writer='simple_image') -Or to save an individual dataset:: +Or to save an individual dataset: >>> global_scene.save_dataset('VIS006', 'my_nice_image.png') @@ -165,7 +162,7 @@ Troubleshooting Due to the way SatPy works, producing as many datasets as possible, there are times that behavior can be unexpected but with no exceptions raised. To help troubleshoot these situations log messages can be turned on. To do this run -the following code before running any other SatPy code:: +the following code before running any other SatPy code: >>> from satpy.utils import debug_on >>> debug_on() diff --git a/satpy/multiscene.py b/satpy/multiscene.py index 6657f281ae..d83d82a04f 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -24,6 +24,7 @@ """ import logging +import numpy as np import dask import dask.array as da import xarray as xr @@ -101,13 +102,22 @@ def shared_dataset_ids(self): shared_ids &= set(scene.keys()) return shared_ids - @property - def all_same_area(self): - all_areas = [ds.attrs.get('area', None) - for scn in self.scenes for ds in scn] + def _all_same_area(self, dataset_ids): + """Return True if all areas for the provided IDs are equal.""" + all_areas = [] + for ds_id in dataset_ids: + for scn in self.scenes: + ds = scn.get(ds_id) + if ds is None: + continue + all_areas.append(ds.attrs.get('area')) all_areas = [area for area in all_areas if area is not None] return all(all_areas[0] == area for area in all_areas[1:]) + @property + def all_same_area(self): + return self._all_same_area(self.loaded_dataset_ids) + def load(self, *args, **kwargs): """Load the required datasets from the multiple scenes.""" for layer in self.scenes: @@ -151,11 +161,12 @@ def _get_animation_info(self, all_datasets, filename, fill_value=None): def _get_animation_frames(self, all_datasets, shape, fill_value=None, ignore_missing=False): """Create enhanced image frames to save to a file.""" - for ds in all_datasets: + for idx, ds in enumerate(all_datasets): if ds is None and ignore_missing: continue elif ds is None: - data = da.zeros(shape, chunks=shape) + log.debug("Missing frame: %d", idx) + data = da.zeros(shape, dtype=np.uint8, chunks=shape) data = xr.DataArray(data) else: img = get_enhanced_image(ds) @@ -192,20 +203,30 @@ def save(self, filename, datasets=None, fps=10, fill_value=None, `imageio.get_writer`. """ - if not self.all_same_area: - raise ValueError("Sub-scenes must all be on the same area " - "(see the 'resample' method).") if imageio is None: raise ImportError("Missing required 'imageio' library") dataset_ids = datasets or self.loaded_dataset_ids + writers = [] + delayeds = [] for dataset_id in dataset_ids: + if not self._all_same_area([dataset_id]): + raise ValueError("Sub-scene datasets must all be on the same " + "area (see the 'resample' method).") + all_datasets = [scn.datasets.get(dataset_id) for scn in self.scenes] - this_fn, shape, fill_value = self._get_animation_info( + this_fn, shape, this_fill = self._get_animation_info( all_datasets, filename, fill_value=fill_value) data_to_write = list(self._get_animation_frames( - all_datasets, shape, fill_value, ignore_missing)) + all_datasets, shape, this_fill, ignore_missing)) writer = imageio.get_writer(this_fn, fps=fps, **kwargs) - cascaded_compute(writer.append_data, data_to_write).compute() + delayed = cascaded_compute(writer.append_data, data_to_write) + # Save delayeds and writers to compute and close later + delayeds.append(delayed) + writers.append(writer) + # compute all the datasets at once to combine any computations that + # can be shared + dask.compute(delayeds) + for writer in writers: writer.close() From 41d009addef0eb74067afae15c14117bfaff5867 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Thu, 14 Jun 2018 09:49:49 -0500 Subject: [PATCH 13/16] Update documentation to remove mention of .info Addresses #178 --- doc/source/overview.rst | 43 ++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/doc/source/overview.rst b/doc/source/overview.rst index 4f43b66e0c..34d1aa68ce 100644 --- a/doc/source/overview.rst +++ b/doc/source/overview.rst @@ -27,28 +27,39 @@ single continuous time range. It is possible to combine Scenes to form a Scene with multiple regions or multiple time observations, but it is not guaranteed that all functionality works in these situations. -Datasets -======== - -SatPy's lowest-level container for data is the -:class:`~satpy.dataset.Dataset`. ``Dataset`` is a subclass of NumPy's -:class:`~numpy.ma.MaskedArray` with an additional ``.info`` dictionary -attribute for various metadata. In most use cases these objects can be -operated on like normal NumPy arrays with special care taken to make -sure the metadata dictionary contains expected values. - -.. warning:: - - Starting with version 0.9, SatPy will replace ``Dataset`` with the - :class:`xarray.DataArray` object. The main difference will be that the - ``.info`` metadata dictionary will be accessed via ``.attrs``. +DataArrays +========== -``Datasets`` are identified throughout SatPy by a ``DatasetID``. A +SatPy's lower-level container for data is the +:class:`xarray.DataArray`. For historical reasons DataArrays are often +referred to as "Datasets" in SatPy. These objects act similar to normal +numpy arrays, but add additional metadata and attributes for describing the +data. Metadata is stored in a ``.attrs`` dictionary and named dimensions can +be accessed in a ``.dims`` attribute, along with other attributes. +In most use cases these objects can be operated on like normal NumPy arrays +with special care taken to make sure the metadata dictionary contains +expected values. See the XArray documentation for more info on handling +:class:`xarray.DataArray` objects. + +Additionally, SatPy uses a special form of DataArrays where data is stored +in :class:`dask.array.Array` objects which allows SatPy to perform +multi-threaded lazy operations vastly improving the performance of processing. +For help on developing with dask and xarray see +:doc:`dev_guide/xarray_migration` or the documentation for the specific +project. + +To uniquely identify ``DataArray`` objects SatPy uses `DatasetID`. A ``DatasetID`` consists of various pieces of available metadata. This usually includes `name` and `wavelength` as identifying metadata, but also includes `resolution`, `calibration`, `polarization`, and additional `modifiers` to further distinguish one dataset from another. +.. warning:: + + XArray includes other object types called "Datasets". These are different + from the "Datasets" mentioned in SatPy. + + Reading ======= From 9f6b7dee3c02d19c9f851863bb0a99ce3258d15e Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Thu, 14 Jun 2018 09:51:00 -0500 Subject: [PATCH 14/16] Add check in DatasetID to make sure modifiers are tuples See #323 --- satpy/dataset.py | 7 +++++++ satpy/tests/test_dataset.py | 21 ++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/satpy/dataset.py b/satpy/dataset.py index 46f40ab4e7..f4c042d630 100644 --- a/satpy/dataset.py +++ b/satpy/dataset.py @@ -138,6 +138,13 @@ class DatasetID(DatasetID): etc). `None` or empty tuple if not applicable. """ + def __new__(cls, *args, **kwargs): + ret = super(DatasetID, cls).__new__(cls, *args, **kwargs) + if ret.modifiers is not None and not isinstance(ret.modifiers, tuple): + raise TypeError("'DatasetID' modifiers must be a tuple or None, " + "not {}".format(type(ret.modifiers))) + return ret + @staticmethod def name_match(a, b): """Return if two string names are equal. diff --git a/satpy/tests/test_dataset.py b/satpy/tests/test_dataset.py index df5ee19b5b..803ea453ff 100644 --- a/satpy/tests/test_dataset.py +++ b/satpy/tests/test_dataset.py @@ -27,8 +27,27 @@ class TestDatasetID(unittest.TestCase): + + def test_basic_init(self): + """Test basic ways of creating a DatasetID.""" + from satpy.dataset import DatasetID + DatasetID(name="a") + DatasetID(name="a", wavelength=0.86) + DatasetID(name="a", resolution=1000) + DatasetID(name="a", calibration='radiance') + DatasetID(name="a", wavelength=0.86, resolution=250, + calibration='radiance') + DatasetID(name="a", wavelength=0.86, resolution=250, + calibration='radiance', modifiers=('sunz_corrected',)) + DatasetID(wavelength=0.86) + + def test_init_bad_modifiers(self): + """Test that modifiers are a tuple.""" + from satpy.dataset import DatasetID + self.assertRaises(TypeError, DatasetID, name="a", modifiers="str") + def test_compare_no_wl(self): - """Compare fully qualified wavelength ID to no wavelength ID""" + """Compare fully qualified wavelength ID to no wavelength ID.""" from satpy.dataset import DatasetID d1 = DatasetID(name="a", wavelength=(0.1, 0.2, 0.3)) d2 = DatasetID(name="a", wavelength=None) From fee0f87cf16145cba23ecdb193b318b4320c9e92 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Thu, 14 Jun 2018 13:21:37 -0500 Subject: [PATCH 15/16] Add xarray migration document to sphinx docs --- doc/source/conf.py | 12 +- doc/source/dev_guide/xarray_migration.rst | 300 +++++++++++++++++++++- 2 files changed, 296 insertions(+), 16 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 11095cadd9..778ca41d96 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -232,10 +232,10 @@ def __getattr__(cls, name): # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'https://docs.python.org/3.6': None, - 'https://docs.scipy.org/doc/numpy': None, - 'https://docs.scipy.org/doc/scipy/reference': None, - 'https://xarray.pydata.org/en/stable/': None, - 'https://dask.pydata.org/en/latest/': None, - 'https://pyresample.readthedocs.io/en/stable': None, + 'python': ('https://docs.python.org/3', None), + 'numpy': ('https://docs.scipy.org/doc/numpy', None), + 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None), + 'xarray': ('https://xarray.pydata.org/en/stable', None), + 'dask': ('https://dask.pydata.org/en/latest', None), + 'pyresample': ('https://pyresample.readthedocs.io/en/stable', None), } diff --git a/doc/source/dev_guide/xarray_migration.rst b/doc/source/dev_guide/xarray_migration.rst index 5782ab9b10..2491d3b486 100644 --- a/doc/source/dev_guide/xarray_migration.rst +++ b/doc/source/dev_guide/xarray_migration.rst @@ -18,22 +18,302 @@ these libraries allow SatPy to easily distribute operations over multiple workers, lazy evaluate operations, and keep track additional metadata and coordinate information. -Lazy Operations -=============== +XArray +------ -.. todo:: +.. code-block:: python - Mention no inplace operations, compute multiple things at a time, etc. + import xarray as xr -Indexing -======== +:class:`XArray's DataArray ` is now the standard data +structure for arrays in satpy. They allow the array to define dimensions, +coordinates, and attributes (that we use for metadata). +To create such an array, you can do for example -Masks and fill values -===================== +.. code-block:: python + my_dataarray = xr.DataArray(my_data, dims=['y', 'x'], + coords={'x': np.arange(...)}, + attrs={'sensor': 'olci'}) -Chunks -====== +where ``my_data`` can be a regular numpy array, a numpy memmap, or, if you +want to keep things lazy, a dask array (more on dask later). SatPy uses dask +arrays with all of its DataArrays. +Dimensions +********** + +In satpy, the dimensions of the arrays should include: + +- `x` for the x or column or pixel dimension +- `y` for the y or row or line dimension +- `bands` for composites +- `time` can also be provided, but we have limited support for it at the + moment. Use metadata for common cases (`start_time`, `end_time`) + +Dimensions are accessible through +:attr:`my_dataarray.dims `. To get the size of a +given dimension, use :attr:`~xarray.DataArray.sizes`: + +.. code-block:: python + + my_dataarray.sizes['x'] + +Coordinates +*********** + +Coordinates can be defined for those dimensions when it makes sense: + +- `x` and `y`: Usually defined when the data's area is an + :class:`~pyresample.geometry.AreaDefinition`, and they contain + the projection coordinates in x and y. +- `bands`: Contain the letter of the color they represent, eg + ``['R', 'G', 'B']`` for an RGB composite. + +This allows then to select for example a single band like this: + +.. code-block:: python + + red = my_composite.sel(bands='R') + +or even multiple bands: + +.. code-block:: python + + red_and_blue = my_composite.sel(bands=['R', 'B']) + +To access the coordinates of the data array, use the following syntax: + +.. code-block:: python + + x_coords = my_dataarray['x'] + my_dataarray['y'] = np.arange(...) + +Most of the time, satpy will fill the coordinates for you, so you just need to provide the dimension names. + +Attributes +********** + +To save metadata, we use the :attr:`~xarray.DataArray.attrs` dictionary. + +.. code-block:: python + + my_dataarray.attrs['platform_name'] = 'Sentinel-3A' + +Some metadata that should always be present in our dataarrays: +- ``area`` the area of the dataset. This should be handled in the reader. +- ``start_time``, ``end_time`` +- ``sensor`` + +Operations on DataArrays +************************ + +DataArrays work with regular arithmetic operation as one would expect of eg +numpy arrays, with the exception that using an operator on two DataArrays +requires both arrays to share the same dimensions, and coordinates if those +are defined. + +For mathematical functions like cos or log, use the +:ref:`ufuncs ` module: + +.. code-block:: python + + import xarray.ufuncs as xu + cos_zen = xu.cos(zen_xarray) + +Note that the ``xu.something`` function also work on numpy arrays. + +Masking data +************ + +In DataArrays, masked data is represented with NaN values. Hence the default +type is ``float64``, but ``float32`` works also in this case. XArray can't +handle masked data for integer data, but in satpy we try to use the special +``_FillValue`` attribute (in ``.attrs``) to handle this case. If you come +across a case where this isn't handled properly, contact us. + +Masking data from a condition can be done with: + +.. code-block:: python + + result = my_dataarray.where(my_dataarray > 5) + +Result is then analogous to my_dataarray, with values lower or equal to 5 replaced by NaNs. + +Further reading +*************** + +http://xarray.pydata.org/en/stable/generated/xarray.DataArray.html#xarray.DataArray + +Dask +---- + +.. code-block:: python + + import dask.array as da + +The data part of the DataArrays we use in satpy are mostly dask Arrays. That allows lazy and chunked operations for efficient processing. + +Creation +******** + +From a numpy array +++++++++++++++++++ + +To create a dask array from a numpy array, one can call the +:func:`~dask.array.from_array` function: + +.. code-block:: python + + darr = da.from_array(my_numpy_array, chunks=4096) + +The *chunks* keyword tells dask the size of a chunk of data. If the numpy +array is 3-dimensional, the chunk size provide above means that one chunk +will be 4096x4096x4096 elements. To prevent this, one can provide a tuple: + +.. code-block:: python + + darr = da.from_array(my_numpy_array, chunks=(4096, 1024, 2)) + +meaning a chunk will be 4096x1024x2 elements in size. + +Even more detailed sizes for the chunks can be provided if needed, see the +:doc:`dask documentation `. + +From memmaps or other lazy objects +++++++++++++++++++++++++++++++++++ + +To avoid loading the data into memory when creating a dask array, other kinds +of arrays can be passed to :func:`~dask.array.from_array`. For example, a +numpy memmap allows dask to know where the data is, and will only be loaded +when the actual values need to be computed. Another example is a hdf5 +variable read with h5py. + +Procedural generation of data ++++++++++++++++++++++++++++++ + +Some procedural generation function are available in dask, eg +:func:`~dask.array.meshgrid`, :func:`~dask.array.arange`, or +:func:`random.random `. + +From XArray to Dask and back +**************************** + +Certain operations are easiest to perform on dask arrays by themselves, +especially when certain functions are only available from the dask library. +In these cases you can operate on the dask array beneath the DataArray and +create a new DataArray when done. Note dask arrays do not support in-place +operations. In-place operations on xarray DataArrays will reassign the dask +array automatically. + +.. code-block:: python + + dask_arr = my_dataarray.data + dask_arr = dask_arr + 1 + # ... other non-xarray operations ... + new_dataarr = xr.DataArray(dask_arr, dims=my_dataarray.dims, attrs=my_dataarray.attrs.copy()) + +Or if the operation should be assigned back to the original DataArray (if and +only if the data is the same size): + +.. code-block:: python + + my_dataarray.data = dask_arr + + +Operations and how to get actual results +**************************************** + +Regular arithmetic operations are provided, and generate another dask array. + + >>> arr1 = da.random.uniform(0, 1000, size=(1000, 1000), chunks=100) + >>> arr2 = da.random.uniform(0, 1000, size=(1000, 1000), chunks=100) + >>> arr1 + arr2 + dask.array + +In order to compute the actual data during testing, use the +:func:`~dask.compute` method. +In normal SatPy operations you will want the data to be evaluated as late as +possible to improve performance so `compute` should only be used when needed. + + >>> (arr1 + arr2).compute() + array([[ 898.08811639, 1236.96107629, 1154.40255292, ..., + 1537.50752674, 1563.89278664, 433.92598566], + [ 1657.43843608, 1063.82390257, 1265.08687916, ..., + 1103.90421234, 1721.73564104, 1276.5424228 ], + [ 1620.11393216, 212.45816261, 771.99348555, ..., + 1675.6561068 , 585.89123159, 935.04366354], + ..., + [ 1533.93265862, 1103.33725432, 191.30794159, ..., + 520.00434673, 426.49238283, 1090.61323471], + [ 816.6108554 , 1526.36292498, 412.91953023, ..., + 982.71285721, 699.087645 , 1511.67447362], + [ 1354.6127365 , 1671.24591983, 1144.64848757, ..., + 1247.37586051, 1656.50487092, 978.28184726]]) + +Dask also provides `cos`, `log` and other mathematical function, that you +can use with :func:`da.cos ` and +:func:`da.log `. However, since satpy uses xarrays as +standard data structure, prefer the xarray functions when possible (they call +in turn the dask counterparts when possible). + +Wrapping non-dask friendly functions +************************************ + +Some operations are not supported by dask yet or are difficult to convert to +take full advantage of dask's multithreaded operations. In these cases you +can wrap a function to run on an entire dask array when it is being computed +and pass on the result. Note that this requires fully computing all of the +dask inputs to the function and are passed as a numpy array or in the case +of an XArray DataArray they will be a DataArray with a numpy array +underneath. You should *NOT* use dask functions inside the delayed function. + + +.. code-block:: python + + import dask + import dask.array as da + + def _complex_operation(my_arr1, my_arr2): + return my_arr1 + my_arr2 + + delayed_result = dask.delayed(_complex_operation)(my_dask_arr1, my_dask_arr2) + # to create a dask array to use in the future + my_new_arr = da.from_delayed(delayed_result, dtype=my_dask_arr1.dtype, shape=my_dask_arr1.shape) + +Dask Delayed objects can also be computed ``delayed_result.compute()`` if +the array is not needed or if the function doesn't return an array. + +http://dask.pydata.org/en/latest/array-api.html#dask.array.from_delayed + +Map dask blocks to non-dask friendly functions +********************************************** + +If the complicated operation you need to perform can be vectorized and does +not need the entire data array to do its operations you can use +:func:`da.map_blocks ` to get better performance +than creating a delayed function. Similar to delayed functions the inputs to +the function are fully computed DataArrays or numpy arrays, but only the +individual chunks of the dask array at a time. Note that ``map_blocks`` must +be provided dask arrays and won't function properly on XArray DataArrays. + +.. code-block:: python + + my_new_arr = da.map_blocks(_complex_operation, my_dask_arr1, my_dask_arr2, dtype=my_dask_arr1.dtype) + +http://dask.pydata.org/en/latest/array-api.html#dask.array.core.map_blocks + +Helpful functions +***************** + +- :func:`~dask.array.core.map_blocks` +- :func:`~dask.array.map_overlap` +- :func:`~dask.array.core.atop` +- :func:`~dask.array.store` +- :func:`~dask.array.tokenize` +- :func:`~dask.compute` +- :doc:`delayed` +- :func:`~dask.array.rechunk` +- :attr:`~dask.array.Array.vindex` From 68f7d671b7df050b045f167e6f04ef055fccc338 Mon Sep 17 00:00:00 2001 From: davidh-ssec Date: Thu, 14 Jun 2018 13:58:27 -0500 Subject: [PATCH 16/16] Rename MultiScene 'save' method to 'save_animation' --- doc/source/multiscene.rst | 14 +++++++++++++- satpy/multiscene.py | 4 ++-- satpy/tests/test_multiscene.py | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/doc/source/multiscene.rst b/doc/source/multiscene.rst index 8af212cc99..192400bc72 100644 --- a/doc/source/multiscene.rst +++ b/doc/source/multiscene.rst @@ -47,10 +47,22 @@ also be installed. The below example saves a series of GOES-EAST ABI channel >>> from glob import glob >>> scenes = [ ... Scene(reader='abi_l1b', filenames=[fn]) for fn in sorted(glob('/data/abi/day_1/*C0[12]*.nc')) + ... ] + >>> mscn = MultiScene(scenes) + >>> mscn.load(['C01', 'C02']) + >>> mscn.save_animation('{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=2) + + + ... ] + >>> mscn = MultiScene(scenes) + >>> mscn.load(['C01', 'C02']) + >>> mscn.save_animation('{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=2) + + ... ] >>> mscn = MultiScene(scenes) >>> mscn.load(['C01', 'C02']) - >>> mscn.save('{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=2) + >>> mscn.save_animation('{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=2) .. warning:: diff --git a/satpy/multiscene.py b/satpy/multiscene.py index d83d82a04f..4865c00e81 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -178,8 +178,8 @@ def _get_animation_frames(self, all_datasets, shape, fill_value=None, data = data.transpose('y', 'x', 'bands') yield data.data - def save(self, filename, datasets=None, fps=10, fill_value=None, - ignore_missing=False, **kwargs): + def save_animation(self, filename, datasets=None, fps=10, fill_value=None, + ignore_missing=False, **kwargs): """Helper method for saving to movie or GIF formats. Supported formats are dependent on the `imageio` library and are diff --git a/satpy/tests/test_multiscene.py b/satpy/tests/test_multiscene.py index b64d0928eb..977905f28a 100644 --- a/satpy/tests/test_multiscene.py +++ b/satpy/tests/test_multiscene.py @@ -176,7 +176,7 @@ def test_save_mp4(self): with mock.patch('satpy.multiscene.imageio.get_writer') as get_writer: get_writer.return_value = writer_mock # force order of datasets by specifying them - mscn.save(fn, datasets=['ds1', 'ds2', 'ds3']) + mscn.save_animation(fn, datasets=['ds1', 'ds2', 'ds3']) # 2 saves for the first scene + 1 black frame # 3 for the second scene