diff --git a/.travis.yml b/.travis.yml index 8306b9ad26..496bd4b6cd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ env: - MAIN_CMD='python setup.py' # 'krb5' is added as a workaround for an issue with gdal # see: https://github.com/conda-forge/gdal-feedstock/issues/205 - - CONDA_DEPENDENCIES='xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coveralls coverage codecov behave netcdf4 h5py h5netcdf gdal rasterio pyhdf mock krb5' + - CONDA_DEPENDENCIES='xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coveralls coverage codecov behave netcdf4 h5py h5netcdf gdal rasterio imageio pyhdf mock krb5' - PIP_DEPENDENCIES='trollsift trollimage pyspectral pyorbital' - SETUP_XVFB=False - EVENT_TYPE='push pull_request' diff --git a/appveyor.yml b/appveyor.yml index 13ba638350..842cd86903 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -3,7 +3,7 @@ environment: PYTHON: "C:\\conda" MINICONDA_VERSION: "latest" CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci-helpers\\appveyor\\windows_sdk.cmd" - CONDA_DEPENDENCIES: "xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coverage netcdf4 h5py h5netcdf gdal rasterio pyhdf mock" + CONDA_DEPENDENCIES: "xarray dask toolz Cython sphinx cartopy pillow matplotlib scipy pyyaml pyproj pyresample coverage netcdf4 h5py h5netcdf gdal rasterio imageio pyhdf mock" PIP_DEPENDENCIES: "trollsift trollimage pyspectral pyorbital" CONDA_CHANNELS: "conda-forge" diff --git a/doc/source/conf.py b/doc/source/conf.py index 77a004c335..778ca41d96 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -232,9 +232,10 @@ def __getattr__(cls, name): # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'https://docs.python.org/3.6': None, - 'https://docs.scipy.org/doc/numpy': None, - 'https://docs.scipy.org/doc/scipy/reference': None, - 'http://xarray.pydata.org/en/stable/': None, - 'http://pyresample.readthedocs.io/en/stable': None, + 'python': ('https://docs.python.org/3', None), + 'numpy': ('https://docs.scipy.org/doc/numpy', None), + 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None), + 'xarray': ('https://xarray.pydata.org/en/stable', None), + 'dask': ('https://dask.pydata.org/en/latest', None), + 'pyresample': ('https://pyresample.readthedocs.io/en/stable', None), } diff --git a/doc/source/dev_guide/xarray_migration.rst b/doc/source/dev_guide/xarray_migration.rst index 5782ab9b10..2491d3b486 100644 --- a/doc/source/dev_guide/xarray_migration.rst +++ b/doc/source/dev_guide/xarray_migration.rst @@ -18,22 +18,302 @@ these libraries allow SatPy to easily distribute operations over multiple workers, lazy evaluate operations, and keep track additional metadata and coordinate information. -Lazy Operations -=============== +XArray +------ -.. todo:: +.. code-block:: python - Mention no inplace operations, compute multiple things at a time, etc. + import xarray as xr -Indexing -======== +:class:`XArray's DataArray ` is now the standard data +structure for arrays in satpy. They allow the array to define dimensions, +coordinates, and attributes (that we use for metadata). +To create such an array, you can do for example -Masks and fill values -===================== +.. code-block:: python + my_dataarray = xr.DataArray(my_data, dims=['y', 'x'], + coords={'x': np.arange(...)}, + attrs={'sensor': 'olci'}) -Chunks -====== +where ``my_data`` can be a regular numpy array, a numpy memmap, or, if you +want to keep things lazy, a dask array (more on dask later). SatPy uses dask +arrays with all of its DataArrays. +Dimensions +********** + +In satpy, the dimensions of the arrays should include: + +- `x` for the x or column or pixel dimension +- `y` for the y or row or line dimension +- `bands` for composites +- `time` can also be provided, but we have limited support for it at the + moment. Use metadata for common cases (`start_time`, `end_time`) + +Dimensions are accessible through +:attr:`my_dataarray.dims `. To get the size of a +given dimension, use :attr:`~xarray.DataArray.sizes`: + +.. code-block:: python + + my_dataarray.sizes['x'] + +Coordinates +*********** + +Coordinates can be defined for those dimensions when it makes sense: + +- `x` and `y`: Usually defined when the data's area is an + :class:`~pyresample.geometry.AreaDefinition`, and they contain + the projection coordinates in x and y. +- `bands`: Contain the letter of the color they represent, eg + ``['R', 'G', 'B']`` for an RGB composite. + +This allows then to select for example a single band like this: + +.. code-block:: python + + red = my_composite.sel(bands='R') + +or even multiple bands: + +.. code-block:: python + + red_and_blue = my_composite.sel(bands=['R', 'B']) + +To access the coordinates of the data array, use the following syntax: + +.. code-block:: python + + x_coords = my_dataarray['x'] + my_dataarray['y'] = np.arange(...) + +Most of the time, satpy will fill the coordinates for you, so you just need to provide the dimension names. + +Attributes +********** + +To save metadata, we use the :attr:`~xarray.DataArray.attrs` dictionary. + +.. code-block:: python + + my_dataarray.attrs['platform_name'] = 'Sentinel-3A' + +Some metadata that should always be present in our dataarrays: +- ``area`` the area of the dataset. This should be handled in the reader. +- ``start_time``, ``end_time`` +- ``sensor`` + +Operations on DataArrays +************************ + +DataArrays work with regular arithmetic operation as one would expect of eg +numpy arrays, with the exception that using an operator on two DataArrays +requires both arrays to share the same dimensions, and coordinates if those +are defined. + +For mathematical functions like cos or log, use the +:ref:`ufuncs ` module: + +.. code-block:: python + + import xarray.ufuncs as xu + cos_zen = xu.cos(zen_xarray) + +Note that the ``xu.something`` function also work on numpy arrays. + +Masking data +************ + +In DataArrays, masked data is represented with NaN values. Hence the default +type is ``float64``, but ``float32`` works also in this case. XArray can't +handle masked data for integer data, but in satpy we try to use the special +``_FillValue`` attribute (in ``.attrs``) to handle this case. If you come +across a case where this isn't handled properly, contact us. + +Masking data from a condition can be done with: + +.. code-block:: python + + result = my_dataarray.where(my_dataarray > 5) + +Result is then analogous to my_dataarray, with values lower or equal to 5 replaced by NaNs. + +Further reading +*************** + +http://xarray.pydata.org/en/stable/generated/xarray.DataArray.html#xarray.DataArray + +Dask +---- + +.. code-block:: python + + import dask.array as da + +The data part of the DataArrays we use in satpy are mostly dask Arrays. That allows lazy and chunked operations for efficient processing. + +Creation +******** + +From a numpy array +++++++++++++++++++ + +To create a dask array from a numpy array, one can call the +:func:`~dask.array.from_array` function: + +.. code-block:: python + + darr = da.from_array(my_numpy_array, chunks=4096) + +The *chunks* keyword tells dask the size of a chunk of data. If the numpy +array is 3-dimensional, the chunk size provide above means that one chunk +will be 4096x4096x4096 elements. To prevent this, one can provide a tuple: + +.. code-block:: python + + darr = da.from_array(my_numpy_array, chunks=(4096, 1024, 2)) + +meaning a chunk will be 4096x1024x2 elements in size. + +Even more detailed sizes for the chunks can be provided if needed, see the +:doc:`dask documentation `. + +From memmaps or other lazy objects +++++++++++++++++++++++++++++++++++ + +To avoid loading the data into memory when creating a dask array, other kinds +of arrays can be passed to :func:`~dask.array.from_array`. For example, a +numpy memmap allows dask to know where the data is, and will only be loaded +when the actual values need to be computed. Another example is a hdf5 +variable read with h5py. + +Procedural generation of data ++++++++++++++++++++++++++++++ + +Some procedural generation function are available in dask, eg +:func:`~dask.array.meshgrid`, :func:`~dask.array.arange`, or +:func:`random.random `. + +From XArray to Dask and back +**************************** + +Certain operations are easiest to perform on dask arrays by themselves, +especially when certain functions are only available from the dask library. +In these cases you can operate on the dask array beneath the DataArray and +create a new DataArray when done. Note dask arrays do not support in-place +operations. In-place operations on xarray DataArrays will reassign the dask +array automatically. + +.. code-block:: python + + dask_arr = my_dataarray.data + dask_arr = dask_arr + 1 + # ... other non-xarray operations ... + new_dataarr = xr.DataArray(dask_arr, dims=my_dataarray.dims, attrs=my_dataarray.attrs.copy()) + +Or if the operation should be assigned back to the original DataArray (if and +only if the data is the same size): + +.. code-block:: python + + my_dataarray.data = dask_arr + + +Operations and how to get actual results +**************************************** + +Regular arithmetic operations are provided, and generate another dask array. + + >>> arr1 = da.random.uniform(0, 1000, size=(1000, 1000), chunks=100) + >>> arr2 = da.random.uniform(0, 1000, size=(1000, 1000), chunks=100) + >>> arr1 + arr2 + dask.array + +In order to compute the actual data during testing, use the +:func:`~dask.compute` method. +In normal SatPy operations you will want the data to be evaluated as late as +possible to improve performance so `compute` should only be used when needed. + + >>> (arr1 + arr2).compute() + array([[ 898.08811639, 1236.96107629, 1154.40255292, ..., + 1537.50752674, 1563.89278664, 433.92598566], + [ 1657.43843608, 1063.82390257, 1265.08687916, ..., + 1103.90421234, 1721.73564104, 1276.5424228 ], + [ 1620.11393216, 212.45816261, 771.99348555, ..., + 1675.6561068 , 585.89123159, 935.04366354], + ..., + [ 1533.93265862, 1103.33725432, 191.30794159, ..., + 520.00434673, 426.49238283, 1090.61323471], + [ 816.6108554 , 1526.36292498, 412.91953023, ..., + 982.71285721, 699.087645 , 1511.67447362], + [ 1354.6127365 , 1671.24591983, 1144.64848757, ..., + 1247.37586051, 1656.50487092, 978.28184726]]) + +Dask also provides `cos`, `log` and other mathematical function, that you +can use with :func:`da.cos ` and +:func:`da.log `. However, since satpy uses xarrays as +standard data structure, prefer the xarray functions when possible (they call +in turn the dask counterparts when possible). + +Wrapping non-dask friendly functions +************************************ + +Some operations are not supported by dask yet or are difficult to convert to +take full advantage of dask's multithreaded operations. In these cases you +can wrap a function to run on an entire dask array when it is being computed +and pass on the result. Note that this requires fully computing all of the +dask inputs to the function and are passed as a numpy array or in the case +of an XArray DataArray they will be a DataArray with a numpy array +underneath. You should *NOT* use dask functions inside the delayed function. + + +.. code-block:: python + + import dask + import dask.array as da + + def _complex_operation(my_arr1, my_arr2): + return my_arr1 + my_arr2 + + delayed_result = dask.delayed(_complex_operation)(my_dask_arr1, my_dask_arr2) + # to create a dask array to use in the future + my_new_arr = da.from_delayed(delayed_result, dtype=my_dask_arr1.dtype, shape=my_dask_arr1.shape) + +Dask Delayed objects can also be computed ``delayed_result.compute()`` if +the array is not needed or if the function doesn't return an array. + +http://dask.pydata.org/en/latest/array-api.html#dask.array.from_delayed + +Map dask blocks to non-dask friendly functions +********************************************** + +If the complicated operation you need to perform can be vectorized and does +not need the entire data array to do its operations you can use +:func:`da.map_blocks ` to get better performance +than creating a delayed function. Similar to delayed functions the inputs to +the function are fully computed DataArrays or numpy arrays, but only the +individual chunks of the dask array at a time. Note that ``map_blocks`` must +be provided dask arrays and won't function properly on XArray DataArrays. + +.. code-block:: python + + my_new_arr = da.map_blocks(_complex_operation, my_dask_arr1, my_dask_arr2, dtype=my_dask_arr1.dtype) + +http://dask.pydata.org/en/latest/array-api.html#dask.array.core.map_blocks + +Helpful functions +***************** + +- :func:`~dask.array.core.map_blocks` +- :func:`~dask.array.map_overlap` +- :func:`~dask.array.core.atop` +- :func:`~dask.array.store` +- :func:`~dask.array.tokenize` +- :func:`~dask.compute` +- :doc:`delayed` +- :func:`~dask.array.rechunk` +- :attr:`~dask.array.Array.vindex` diff --git a/doc/source/index.rst b/doc/source/index.rst index 2528c8ccc9..9fb7387346 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -38,6 +38,7 @@ installation. composites resample writers + multiscene dev_guide/index SatPy API diff --git a/doc/source/multiscene.rst b/doc/source/multiscene.rst new file mode 100644 index 0000000000..192400bc72 --- /dev/null +++ b/doc/source/multiscene.rst @@ -0,0 +1,70 @@ +MultiScene (Experimental) +========================= + +Scene objects in SatPy are meant to represent a single geographic region at +a specific single instant in time or range of time. This means they are not +suited for handling multiple orbits of polar-orbiting satellite data, +multiple time steps of geostationary satellite data, or other special data +cases. To handle these cases SatPy provides the `MultiScene` class. The below +examples will walk through some basic use cases of the MultiScene. + +.. warning:: + + These features are still early in development and may change overtime as + more user feedback is received and more features added. + +Stacking scenes +--------------- + +The MultiScene makes it easy to take multiple Scenes and stack them on top of +each other. The code below takes two separate orbits from a VIIRS sensor and +stacks them on top of each other. + + >>> from satpy import Scene, MultiScene + >>> from glob import glob + >>> from pyresample.geometry import AreaDefinition + >>> my_area = AreaDefinition(...) + >>> scenes = [ + ... Scene(reader='viirs_sdr', filenames=glob('/data/viirs/day_1/*t180*.h5')), + ... Scene(reader='viirs_sdr', filenames=glob('/data/viirs/day_2/*t180*.h5')) + ... ] + >>> mscn = MultiScene(scenes) + >>> mscn.load(['I04']) + >>> new_mscn = mscn.resample(my_area) + >>> blended_scene = new_mscn.blend() + >>> blended_scene.save_datasets() + +Saving frames of an animation +----------------------------- + +The MultiScene can take "frames" of data and join them together in a single +animation movie file. Saving animations required the `imageio` python library +and for most available formats the ``ffmpeg`` command line tool suite should +also be installed. The below example saves a series of GOES-EAST ABI channel +1 and channel 2 frames to MP4 movie files. + + >>> from satpy import Scene, MultiScene + >>> from glob import glob + >>> scenes = [ + ... Scene(reader='abi_l1b', filenames=[fn]) for fn in sorted(glob('/data/abi/day_1/*C0[12]*.nc')) + ... ] + >>> mscn = MultiScene(scenes) + >>> mscn.load(['C01', 'C02']) + >>> mscn.save_animation('{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=2) + + + ... ] + >>> mscn = MultiScene(scenes) + >>> mscn.load(['C01', 'C02']) + >>> mscn.save_animation('{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=2) + + + ... ] + >>> mscn = MultiScene(scenes) + >>> mscn.load(['C01', 'C02']) + >>> mscn.save_animation('{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=2) + +.. warning:: + + GIF images, although supported, are not recommended due to the large file + sizes that can be produced from only a few frames. diff --git a/doc/source/overview.rst b/doc/source/overview.rst index 4f43b66e0c..34d1aa68ce 100644 --- a/doc/source/overview.rst +++ b/doc/source/overview.rst @@ -27,28 +27,39 @@ single continuous time range. It is possible to combine Scenes to form a Scene with multiple regions or multiple time observations, but it is not guaranteed that all functionality works in these situations. -Datasets -======== - -SatPy's lowest-level container for data is the -:class:`~satpy.dataset.Dataset`. ``Dataset`` is a subclass of NumPy's -:class:`~numpy.ma.MaskedArray` with an additional ``.info`` dictionary -attribute for various metadata. In most use cases these objects can be -operated on like normal NumPy arrays with special care taken to make -sure the metadata dictionary contains expected values. - -.. warning:: - - Starting with version 0.9, SatPy will replace ``Dataset`` with the - :class:`xarray.DataArray` object. The main difference will be that the - ``.info`` metadata dictionary will be accessed via ``.attrs``. +DataArrays +========== -``Datasets`` are identified throughout SatPy by a ``DatasetID``. A +SatPy's lower-level container for data is the +:class:`xarray.DataArray`. For historical reasons DataArrays are often +referred to as "Datasets" in SatPy. These objects act similar to normal +numpy arrays, but add additional metadata and attributes for describing the +data. Metadata is stored in a ``.attrs`` dictionary and named dimensions can +be accessed in a ``.dims`` attribute, along with other attributes. +In most use cases these objects can be operated on like normal NumPy arrays +with special care taken to make sure the metadata dictionary contains +expected values. See the XArray documentation for more info on handling +:class:`xarray.DataArray` objects. + +Additionally, SatPy uses a special form of DataArrays where data is stored +in :class:`dask.array.Array` objects which allows SatPy to perform +multi-threaded lazy operations vastly improving the performance of processing. +For help on developing with dask and xarray see +:doc:`dev_guide/xarray_migration` or the documentation for the specific +project. + +To uniquely identify ``DataArray`` objects SatPy uses `DatasetID`. A ``DatasetID`` consists of various pieces of available metadata. This usually includes `name` and `wavelength` as identifying metadata, but also includes `resolution`, `calibration`, `polarization`, and additional `modifiers` to further distinguish one dataset from another. +.. warning:: + + XArray includes other object types called "Datasets". These are different + from the "Datasets" mentioned in SatPy. + + Reading ======= diff --git a/doc/source/quickstart.rst b/doc/source/quickstart.rst index ae2e2befd8..9833c3f587 100644 --- a/doc/source/quickstart.rst +++ b/doc/source/quickstart.rst @@ -11,18 +11,17 @@ Loading data >>> sys.setdefaultencoding('utf8') To work with weather satellite data you must create a :class:`~satpy.scene.Scene` object. In order for SatPy to -get access to the data it must be told what files to read and what :ref:`SatPy Reader ` should read them:: +get access to the data it must be told what files to read and what :ref:`SatPy Reader ` should read them: >>> from satpy import Scene >>> from glob import glob >>> filenames = glob("/home/a001673/data/satellite/Meteosat-10/seviri/lvl1.5/2015/04/20/HRIT/*201504201000*") >>> global_scene = Scene(reader="hrit_msg", filenames=filenames) -To load data from the files use the :meth:`Scene.load ` method:: +To load data from the files use the :meth:`Scene.load ` method: >>> global_scene.load([0.6, 0.8, 10.8]) >>> print(global_scene) - seviri/IR_108: area: On-the-fly area start_time: 2015-04-20 10:00:00 @@ -47,10 +46,9 @@ SatPy allows loading file data by wavelengths in micrometers (shown above) or by >>> global_scene.load(["VIS006", "VIS008", "IR_108"]) To have a look at the available channels for loading from your :class:`~satpy.scene.Scene` object use the -:meth:`available_datasets ` method:: +:meth:`available_datasets ` method: >>> global_scene.available_dataset_names() - ['HRV', 'IR_108', 'IR_120', @@ -65,15 +63,15 @@ To have a look at the available channels for loading from your :class:`~satpy.sc 'WV_073'] -To access the loaded data use the wavelength or name:: +To access the loaded data use the wavelength or name: >>> print(global_scene[0.6]) -To visualize loaded data in a pop-up window:: +To visualize loaded data in a pop-up window: >>> global_scene.show(0.6) -To make combine datasets and make a new dataset:: +To make combine datasets and make a new dataset: >>> global_scene["ndvi"] = (global_scene[0.8] - global_scene[0.6]) / (global_scene[0.8] + global_scene[0.6]) >>> global_scene.show("ndvi") @@ -84,14 +82,13 @@ advanced loading methods see the :doc:`readers` documentation. Generating composites ===================== -SatPy comes with many composite recipes built-in and makes them loadable like any other dataset:: +SatPy comes with many composite recipes built-in and makes them loadable like any other dataset: >>> global_scene.load(['overview']) -To get a list of all available composites for the current scene:: +To get a list of all available composites for the current scene: >>> global_scene.available_composite_names() - ['overview_sun', 'airmass', 'natural', @@ -126,7 +123,7 @@ to a uniform grid, limiting input data to an area of interest, changing from one projection to another, or for preparing datasets to be combined in a composite (see above). For more details on resampling, different resampling algorithms, and creating your own area of interest see the -:doc:`resample` documentation. To resample a SatPy Scene:: +:doc:`resample` documentation. To resample a SatPy Scene: >>> local_scene = global_scene.resample("eurol") @@ -134,7 +131,7 @@ This creates a copy of the original ``global_scene`` with all loaded datasets resampled to the built-in "eurol" area. Any composites that were requested, but could not be generated are automatically generated after resampling. The new ``local_scene`` can now be used like the original ``global_scene`` for -working with datasets, saving them to disk or showing them on screen:: +working with datasets, saving them to disk or showing them on screen: >>> local_scene.show('overview') >>> local_scene.save_dataset('overview', './local_overview.tif') @@ -142,15 +139,15 @@ working with datasets, saving them to disk or showing them on screen:: Saving to disk ============== -To save all loaded datasets to disk as geotiff images:: +To save all loaded datasets to disk as geotiff images: >>> global_scene.save_datasets() -To save all loaded datasets to disk as PNG images:: +To save all loaded datasets to disk as PNG images: >>> global_scene.save_datasets(writer='simple_image') -Or to save an individual dataset:: +Or to save an individual dataset: >>> global_scene.save_dataset('VIS006', 'my_nice_image.png') @@ -165,7 +162,7 @@ Troubleshooting Due to the way SatPy works, producing as many datasets as possible, there are times that behavior can be unexpected but with no exceptions raised. To help troubleshoot these situations log messages can be turned on. To do this run -the following code before running any other SatPy code:: +the following code before running any other SatPy code: >>> from satpy.utils import debug_on >>> debug_on() diff --git a/satpy/__init__.py b/satpy/__init__.py index 884f5ba96b..f9fad8bec2 100644 --- a/satpy/__init__.py +++ b/satpy/__init__.py @@ -55,5 +55,6 @@ available_readers) # noqa from satpy.writers import available_writers # noqa from satpy.scene import Scene # noqa +from satpy.multiscene import MultiScene # noqa log = get_logger('satpy') diff --git a/satpy/dataset.py b/satpy/dataset.py index 46f40ab4e7..f4c042d630 100644 --- a/satpy/dataset.py +++ b/satpy/dataset.py @@ -138,6 +138,13 @@ class DatasetID(DatasetID): etc). `None` or empty tuple if not applicable. """ + def __new__(cls, *args, **kwargs): + ret = super(DatasetID, cls).__new__(cls, *args, **kwargs) + if ret.modifiers is not None and not isinstance(ret.modifiers, tuple): + raise TypeError("'DatasetID' modifiers must be a tuple or None, " + "not {}".format(type(ret.modifiers))) + return ret + @staticmethod def name_match(a, b): """Return if two string names are equal. diff --git a/satpy/multiscene.py b/satpy/multiscene.py index 6d3a1094c4..4865c00e81 100644 --- a/satpy/multiscene.py +++ b/satpy/multiscene.py @@ -23,82 +23,210 @@ """MultiScene object to blend satellite data. """ +import logging import numpy as np - -from satpy.dataset import Dataset +import dask +import dask.array as da +import xarray as xr from satpy.scene import Scene +from satpy.writers import get_enhanced_image +try: + import imageio +except ImportError: + imageio = None -def stack(datasets): - """First dataset at the bottom.""" +log = logging.getLogger(__name__) - base = Dataset(datasets[0], copy=True) - for dataset in datasets[1:]: - base_mask = np.ma.getmaskarray(base) - other_mask = np.ma.getmaskarray(dataset) - base.mask = np.logical_and(base_mask, other_mask) - not_masked = np.logical_not(other_mask) - base[not_masked] = dataset[not_masked] - return base +def cascaded_compute(callback, arrays, optimize=True): + """Dask helper function for iterating over computed dask arrays. + Args: + callback (callable): Called with a single numpy array computed from + the provided dask arrays. + arrays (list, tuple): Dask arrays to pass to callback. + optimize (bool): Whether to try to optimize the dask graphs of the + provided arrays. -def stack_time(datasets): - """Oldest time at the bottom.""" + Returns: `dask.Delayed` object to be computed - return stack(sorted(datasets, key=lambda x: x.info['start_time'])) + """ + if optimize: + # optimize Dask graph over all objects + dsk = da.Array.__dask_optimize__( + # combine all Dask Array graphs + dask.sharedict.merge(*[e.__dask_graph__() for e in arrays]), + # get Dask Array keys in result + list(dask.core.flatten([e.__dask_keys__() for e in arrays])) + ) + # rebuild Dask Arrays + arrays = [da.Array(dsk, e.name, e.chunks, e.dtype) for e in arrays] + def _callback_wrapper(arr, cb=callback, previous_call=None): + del previous_call # used only for task ordering + return cb(arr) + + current_write = None + for dask_arr in arrays: + current_write = dask.delayed(_callback_wrapper)( + dask_arr, previous_call=current_write) + return current_write -class MultiScene(object): - def __init__(self, layers): - self.layers = layers +def stack(datasets): + """First dataset at the bottom.""" + base = datasets[0].copy() + for dataset in datasets[1:]: + base = base.where(dataset.isnull(), dataset) + return base + + +class MultiScene(object): + """Container for multiple `Scene` objects.""" + + def __init__(self, scenes=None): + """Initialize MultiScene and validate sub-scenes""" + self.scenes = scenes or [] + + @property + def loaded_dataset_ids(self): + """Union of all Dataset IDs loaded by all children.""" + return set(ds_id for scene in self.scenes for ds_id in scene.keys()) + + @property + def shared_dataset_ids(self): + """Dataset IDs shared by all children.""" + shared_ids = set(self.scenes[0].keys()) + for scene in self.scenes[1:]: + shared_ids &= set(scene.keys()) + return shared_ids + + def _all_same_area(self, dataset_ids): + """Return True if all areas for the provided IDs are equal.""" + all_areas = [] + for ds_id in dataset_ids: + for scn in self.scenes: + ds = scn.get(ds_id) + if ds is None: + continue + all_areas.append(ds.attrs.get('area')) + all_areas = [area for area in all_areas if area is not None] + return all(all_areas[0] == area for area in all_areas[1:]) + + @property + def all_same_area(self): + return self._all_same_area(self.loaded_dataset_ids) def load(self, *args, **kwargs): """Load the required datasets from the multiple scenes.""" - for layer in self.layers: + for layer in self.scenes: layer.load(*args, **kwargs) def resample(self, destination, **kwargs): """Resample the multiscene.""" - return MultiScene([layer.resample(destination, **kwargs) for layer in self.layers]) + return self.__class__([scn.resample(destination, **kwargs) + for scn in self.scenes]) def blend(self, blend_function=stack): """Blend the datasets into one scene.""" - scn = Scene() - common_datasets = None - for layer in self.layers: - if common_datasets is None: - common_datasets = set( - [dataset.id.name for dataset in layer]) + new_scn = Scene() + common_datasets = self.shared_dataset_ids + for ds_id in common_datasets: + datasets = [scn[ds_id] for scn in self.scenes if ds_id in scn] + new_scn[ds_id] = blend_function(datasets) + + return new_scn + + def _get_animation_info(self, all_datasets, filename, fill_value=None): + """Determine filename and shape of animation to be created.""" + valid_datasets = [ds for ds in all_datasets if ds is not None] + first_dataset = valid_datasets[0] + last_dataset = valid_datasets[-1] + first_img = get_enhanced_image(first_dataset) + first_img_data = first_img._finalize(fill_value=fill_value)[0] + shape = tuple(first_img_data.sizes.get(dim_name) + for dim_name in ('y', 'x', 'bands')) + if fill_value is None and filename.endswith('gif'): + log.warning("Forcing fill value to '0' for GIF Luminance images") + fill_value = 0 + shape = shape[:2] + + attrs = first_dataset.attrs.copy() + if 'end_time' in last_dataset.attrs: + attrs['end_time'] = last_dataset.attrs['end_time'] + this_fn = filename.format(**attrs) + return this_fn, shape, fill_value + + def _get_animation_frames(self, all_datasets, shape, fill_value=None, + ignore_missing=False): + """Create enhanced image frames to save to a file.""" + for idx, ds in enumerate(all_datasets): + if ds is None and ignore_missing: + continue + elif ds is None: + log.debug("Missing frame: %d", idx) + data = da.zeros(shape, dtype=np.uint8, chunks=shape) + data = xr.DataArray(data) else: - common_datasets &= set( - [dataset.id.name for dataset in layer]) - for dataset_id in common_datasets: - datasets = [layer[dataset_id] for layer in self.layers] - scn[dataset_id] = blend_function(datasets) - - return scn - - -if __name__ == '__main__': - from datetime import datetime - from satpy.utils import debug_on - - debug_on() - scenes = [ - Scene(platform_name="Meteosat-10", sensor="seviri", - start_time=datetime(2016, 9, 6, 11, 0), - base_dir="/home/a001673/data/satellite/Meteosat-10/seviri/lvl1.5/2015/04/20/HRIT"), - - Scene(platform_name="SNPP", sensor="viirs", - start_time=datetime(2016, 9, 6, 10, 51), - end_time=datetime(2016, 9, 6, 11, 0), - base_dir="/home/a001673/data/satellite/Suomi-NPP/viirs/lvl1b/2015/03/11/SDR") - ] - - mscn = MultiScene(scenes) - mscn.load(['overview_sun']) - mscn = mscn.resample('eurol') - scn = mscn.blend() - scn.save_dataset('overview_sun', '/tmp/blend.png') + img = get_enhanced_image(ds) + data, mode = img._finalize(fill_value=fill_value) + if data.ndim == 3: + # assume all other shapes are (y, x) + # we need arrays grouped by pixel so + # transpose if needed + data = data.transpose('y', 'x', 'bands') + yield data.data + + def save_animation(self, filename, datasets=None, fps=10, fill_value=None, + ignore_missing=False, **kwargs): + """Helper method for saving to movie or GIF formats. + + Supported formats are dependent on the `imageio` library and are + determined by filename extension by default. + + By default all datasets available will be saved to individual files + using the first Scene's datasets metadata to format the filename + provided. If a dataset is not available from a Scene then a black + array is used instead (np.zeros(shape)). + + Args: + filename (str): Filename to save to. Can include python string + formatting keys from dataset ``.attrs`` + (ex. "{name}_{start_time:%Y%m%d_%H%M%S.gif") + datasets (list): DatasetIDs to save (default: all datasets) + fps (int): Frames per second for produced animation + fill_value (int): Value to use instead creating an alpha band. + ignore_missing (bool): Don't include a black frame when a dataset + is missing from a child scene. + **kwargs: Additional keyword arguments to pass to + `imageio.get_writer`. + + """ + if imageio is None: + raise ImportError("Missing required 'imageio' library") + + dataset_ids = datasets or self.loaded_dataset_ids + writers = [] + delayeds = [] + for dataset_id in dataset_ids: + if not self._all_same_area([dataset_id]): + raise ValueError("Sub-scene datasets must all be on the same " + "area (see the 'resample' method).") + + all_datasets = [scn.datasets.get(dataset_id) for scn in self.scenes] + this_fn, shape, this_fill = self._get_animation_info( + all_datasets, filename, fill_value=fill_value) + data_to_write = list(self._get_animation_frames( + all_datasets, shape, this_fill, ignore_missing)) + + writer = imageio.get_writer(this_fn, fps=fps, **kwargs) + delayed = cascaded_compute(writer.append_data, data_to_write) + # Save delayeds and writers to compute and close later + delayeds.append(delayed) + writers.append(writer) + # compute all the datasets at once to combine any computations that + # can be shared + dask.compute(delayeds) + for writer in writers: + writer.close() diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py index 997f782c68..bb2e4f7176 100644 --- a/satpy/readers/__init__.py +++ b/satpy/readers/__init__.py @@ -296,6 +296,14 @@ def __getitem__(self, item): key = self.get_key(item) return super(DatasetDict, self).__getitem__(key) + def get(self, key, default=None): + """Get value with optional default.""" + try: + key = self.get_key(key) + except KeyError: + return default + return super(DatasetDict, self).get(key, default) + def __setitem__(self, key, value): """Support assigning 'Dataset' objects or dictionaries of metadata. """ diff --git a/satpy/scene.py b/satpy/scene.py index f41fcab65d..39c6feb6f3 100644 --- a/satpy/scene.py +++ b/satpy/scene.py @@ -561,6 +561,10 @@ def crop(self, area=None, ll_bbox=None, xy_bbox=None, dataset_ids=None): return new_scn + def get(self, key, default=None): + """Return value from DatasetDict with optional default.""" + return self.datasets.get(key, default) + def __getitem__(self, key): """Get a dataset or create a new 'slice' of the Scene.""" if isinstance(key, tuple) and not isinstance(key, DatasetID): diff --git a/satpy/tests/__init__.py b/satpy/tests/__init__.py index 1197d4971b..b0d3d462e4 100644 --- a/satpy/tests/__init__.py +++ b/satpy/tests/__init__.py @@ -29,7 +29,7 @@ test_readers, test_resample, test_scene, test_utils, test_writers, test_yaml_reader, writer_tests, - test_enhancements, compositor_tests) + test_enhancements, compositor_tests, test_multiscene) if sys.version_info < (2, 7): @@ -56,6 +56,7 @@ def suite(): mysuite.addTests(test_utils.suite()) mysuite.addTests(test_enhancements.suite()) mysuite.addTests(compositor_tests.suite()) + mysuite.addTests(test_multiscene.suite()) return mysuite diff --git a/satpy/tests/test_dataset.py b/satpy/tests/test_dataset.py index df5ee19b5b..803ea453ff 100644 --- a/satpy/tests/test_dataset.py +++ b/satpy/tests/test_dataset.py @@ -27,8 +27,27 @@ class TestDatasetID(unittest.TestCase): + + def test_basic_init(self): + """Test basic ways of creating a DatasetID.""" + from satpy.dataset import DatasetID + DatasetID(name="a") + DatasetID(name="a", wavelength=0.86) + DatasetID(name="a", resolution=1000) + DatasetID(name="a", calibration='radiance') + DatasetID(name="a", wavelength=0.86, resolution=250, + calibration='radiance') + DatasetID(name="a", wavelength=0.86, resolution=250, + calibration='radiance', modifiers=('sunz_corrected',)) + DatasetID(wavelength=0.86) + + def test_init_bad_modifiers(self): + """Test that modifiers are a tuple.""" + from satpy.dataset import DatasetID + self.assertRaises(TypeError, DatasetID, name="a", modifiers="str") + def test_compare_no_wl(self): - """Compare fully qualified wavelength ID to no wavelength ID""" + """Compare fully qualified wavelength ID to no wavelength ID.""" from satpy.dataset import DatasetID d1 = DatasetID(name="a", wavelength=(0.1, 0.2, 0.3)) d2 = DatasetID(name="a", wavelength=None) diff --git a/satpy/tests/test_multiscene.py b/satpy/tests/test_multiscene.py new file mode 100644 index 0000000000..977905f28a --- /dev/null +++ b/satpy/tests/test_multiscene.py @@ -0,0 +1,204 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2018. +# +# Author(s): +# +# David Hoese +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +"""Unit tests for multiscene.py. +""" + +import os +import sys +import tempfile +import shutil +from datetime import datetime + +if sys.version_info < (2, 7): + import unittest2 as unittest +else: + import unittest + +try: + from unittest import mock +except ImportError: + import mock + +DEFAULT_SHAPE = (5, 10) + + +def _fake_get_enhanced_image(img): + from trollimage.xrimage import XRImage + return XRImage(img) + + +def _create_test_area(proj_str=None, shape=DEFAULT_SHAPE, extents=None): + """Create a test area definition.""" + from pyresample.geometry import AreaDefinition + from pyresample.utils import proj4_str_to_dict + if proj_str is None: + proj_str = '+proj=lcc +datum=WGS84 +ellps=WGS84 +lon_0=-95. ' \ + '+lat_0=25 +lat_1=25 +units=m +no_defs' + proj_dict = proj4_str_to_dict(proj_str) + extents = extents or (-1000., -1500., 1000., 1500.) + + return AreaDefinition( + 'test', + 'test', + 'test', + proj_dict, + x_size=shape[1], + y_size=shape[0], + area_extent=extents + ) + + +def _create_test_dataset(name, shape=DEFAULT_SHAPE, area=None): + """Create a test DataArray object.""" + import xarray as xr + import dask.array as da + import numpy as np + + return xr.DataArray( + da.zeros(shape, dtype=np.float32, chunks=shape), dims=('y', 'x'), + attrs={'name': name, 'area': area}) + + +def _create_test_scenes(num_scenes=2, shape=DEFAULT_SHAPE, area=None): + """Helper to create some test scenes.""" + from satpy import Scene + ds1 = _create_test_dataset('ds1', shape=shape, area=area) + ds2 = _create_test_dataset('ds2', shape=shape, area=area) + scenes = [] + for scn_idx in range(num_scenes): + scn = Scene() + scn['ds1'] = ds1.copy() + scn['ds2'] = ds2.copy() + scenes.append(scn) + return scenes + + +class TestMultiScene(unittest.TestCase): + """Test basic functionality of MultiScene.""" + + def test_init_empty(self): + """Test creating a multiscene with no children.""" + from satpy import MultiScene + MultiScene() + + def test_init_children(self): + """Test creating a multiscene with children.""" + from satpy import MultiScene + scenes = _create_test_scenes() + MultiScene(scenes) + + def test_properties(self): + """Test basic properties/attributes of the MultiScene.""" + from satpy import MultiScene, DatasetID + + area = _create_test_area() + scenes = _create_test_scenes(area=area) + ds1_id = DatasetID(name='ds1') + ds2_id = DatasetID(name='ds2') + ds3_id = DatasetID(name='ds3') + ds4_id = DatasetID(name='ds4') + + # Add a dataset to only one of the Scenes + scenes[1]['ds3'] = _create_test_dataset('ds3') + mscn = MultiScene(scenes) + + self.assertSetEqual(mscn.loaded_dataset_ids, + {ds1_id, ds2_id, ds3_id}) + self.assertSetEqual(mscn.shared_dataset_ids, {ds1_id, ds2_id}) + self.assertTrue(mscn.all_same_area) + + bigger_area = _create_test_area(shape=(20, 40)) + scenes[0]['ds4'] = _create_test_dataset('ds4', shape=(20, 40), + area=bigger_area) + + self.assertSetEqual(mscn.loaded_dataset_ids, + {ds1_id, ds2_id, ds3_id, ds4_id}) + self.assertSetEqual(mscn.shared_dataset_ids, {ds1_id, ds2_id}) + self.assertFalse(mscn.all_same_area) + + +class TestMultiSceneSave(unittest.TestCase): + """Test saving a MultiScene to various formats.""" + + def setUp(self): + """Create temporary directory to save files to""" + self.base_dir = tempfile.mkdtemp() + + def tearDown(self): + """Remove the temporary directory created for a test""" + try: + shutil.rmtree(self.base_dir, ignore_errors=True) + except OSError: + pass + + @mock.patch('satpy.multiscene.get_enhanced_image', _fake_get_enhanced_image) + def test_save_mp4(self): + """Save a series of fake scenes to an mp4 video.""" + from satpy import MultiScene + area = _create_test_area() + scenes = _create_test_scenes(area=area) + + # Add a dataset to only one of the Scenes + scenes[1]['ds3'] = _create_test_dataset('ds3') + # Add a start and end time + for ds_id in ['ds1', 'ds2', 'ds3']: + scenes[1][ds_id].attrs['start_time'] = datetime(2018, 1, 2) + scenes[1][ds_id].attrs['end_time'] = datetime(2018, 1, 2, 12) + if ds_id == 'ds3': + continue + scenes[0][ds_id].attrs['start_time'] = datetime(2018, 1, 1) + scenes[0][ds_id].attrs['end_time'] = datetime(2018, 1, 1, 12) + + mscn = MultiScene(scenes) + fn = os.path.join( + self.base_dir, + 'test_save_mp4_{name}_{start_time:%Y%m%d_%H}_' + '{end_time:%Y%m%d_%H}.mp4') + writer_mock = mock.MagicMock() + with mock.patch('satpy.multiscene.imageio.get_writer') as get_writer: + get_writer.return_value = writer_mock + # force order of datasets by specifying them + mscn.save_animation(fn, datasets=['ds1', 'ds2', 'ds3']) + + # 2 saves for the first scene + 1 black frame + # 3 for the second scene + self.assertEqual(writer_mock.append_data.call_count, 3 + 3) + filenames = [os.path.basename(args[0][0]) for args in get_writer.call_args_list] + self.assertEqual(filenames[0], + 'test_save_mp4_ds1_20180101_00_20180102_12.mp4') + self.assertEqual(filenames[1], + 'test_save_mp4_ds2_20180101_00_20180102_12.mp4') + self.assertEqual(filenames[2], + 'test_save_mp4_ds3_20180102_00_20180102_12.mp4') + + +def suite(): + """The test suite for test_multiscene.""" + loader = unittest.TestLoader() + mysuite = unittest.TestSuite() + mysuite.addTest(loader.loadTestsFromTestCase(TestMultiScene)) + mysuite.addTest(loader.loadTestsFromTestCase(TestMultiSceneSave)) + + return mysuite + + +if __name__ == "__main__": + unittest.main() diff --git a/satpy/tests/test_scene.py b/satpy/tests/test_scene.py index 9202d8963b..b7558d3ff2 100644 --- a/satpy/tests/test_scene.py +++ b/satpy/tests/test_scene.py @@ -229,6 +229,8 @@ def test_getitem(self): self.assertIs(scene['2'], ds2) self.assertIs(scene['3'], ds3) self.assertRaises(KeyError, scene.__getitem__, '4') + self.assertIs(scene.get('3'), ds3) + self.assertIs(scene.get('4'), None) def test_getitem_modifiers(self): """Test __getitem__ with names and modifiers""" @@ -1657,8 +1659,7 @@ def test_save_dataset_default(self): def suite(): - """The test suite for test_scene. - """ + """The test suite for test_scene.""" loader = unittest.TestLoader() mysuite = unittest.TestSuite() mysuite.addTest(loader.loadTestsFromTestCase(TestScene)) @@ -1668,5 +1669,6 @@ def suite(): return mysuite + if __name__ == "__main__": unittest.main() diff --git a/setup.py b/setup.py index a1f3f4d990..c0621a2094 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ 'dask[array] >=0.17.1'] # pyhdf (conda) == python-hdf4 (pip) -test_requires = ['behave', 'h5py', 'netCDF4', 'pyhdf'] +test_requires = ['behave', 'h5py', 'netCDF4', 'pyhdf', 'imageio'] if sys.version < '3.0': test_requires.append('mock') @@ -70,6 +70,8 @@ # Writers: 'scmi': ['netCDF4 >= 1.1.8'], 'geotiff': ['gdal', 'trollimage[geotiff]'], + # MultiScene: + 'animations': ['imageio'], } all_extras = [] for extra_deps in extras_require.values():