From e182debe126f48c078e7a5337bf15eea8286b973 Mon Sep 17 00:00:00 2001 From: gbrener Date: Mon, 6 Nov 2017 13:32:34 -0600 Subject: [PATCH] Change load_array->load_layers, MLDataset->xr.Dataset Name changes, plus having load_layers() return an xr.Dataset object instead of an MLDataset object. --- conda.recipe/meta.yaml | 1 + earthio/__init__.py | 3 +- earthio/hdf4.py | 7 +-- earthio/hdf5.py | 9 ++- earthio/{load_array.py => load_layers.py} | 12 ++-- earthio/netcdf.py | 7 +-- earthio/reshape.py | 62 +++++++++---------- ...test_load_array.py => test_load_layers.py} | 4 +- earthio/tests/test_readers_general.py | 4 +- earthio/tests/util.py | 3 + earthio/tif.py | 9 ++- earthio/util.py | 4 +- environment.yml | 1 + 13 files changed, 63 insertions(+), 63 deletions(-) rename earthio/{load_array.py => load_layers.py} (90%) rename earthio/tests/{test_load_array.py => test_load_layers.py} (86%) diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index 137a4b9..65b5d84 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -68,3 +68,4 @@ test: requires: - pytest + - xarray_filters diff --git a/earthio/__init__.py b/earthio/__init__.py index 7cbff0d..3f97ca0 100644 --- a/earthio/__init__.py +++ b/earthio/__init__.py @@ -1,12 +1,11 @@ '''Package of readers from common satellite and weather data formats''' # The modules below use __all__ -from xarray_filters.mldataset import * from earthio.hdf4 import * from earthio.hdf5 import * from earthio.netcdf import * from earthio.tif import * from earthio.util import * from earthio.reshape import * -from earthio.load_array import * +from earthio.load_layers import * from earthio.local_file_iterators import * diff --git a/earthio/hdf4.py b/earthio/hdf4.py index 2432b44..b226ecc 100644 --- a/earthio/hdf4.py +++ b/earthio/hdf4.py @@ -6,7 +6,7 @@ Tools for reading HDF4 files. Typically use the interface through - - :func:`earthio.load_array` + - :func:`earthio.load_layers` - :func:`earthio.load_meta` ''' @@ -62,7 +62,7 @@ def load_hdf4_meta(datafile): def load_hdf4_array(datafile, meta, layer_specs=None): - '''Return an MLDataset where each subdataset is a DataArray + '''Return an xr.Dataset where each subdataset is a DataArray Parameters: :datafile: filename @@ -76,7 +76,6 @@ def load_hdf4_array(datafile, meta, layer_specs=None): ''' import gdal from gdalconst import GA_ReadOnly - from earthio import MLDataset from earthio.metadata_selection import match_meta logger.debug('load_hdf4_array: {}'.format(datafile)) f = gdal.Open(datafile, GA_ReadOnly) @@ -138,4 +137,4 @@ def load_hdf4_array(datafile, meta, layer_specs=None): attrs = copy.deepcopy(attrs) attrs['layer_order'] = layer_order gc.collect() - return MLDataset(elm_store_data, attrs=attrs) + return xr.Dataset(elm_store_data, attrs=attrs) diff --git a/earthio/hdf5.py b/earthio/hdf5.py index 8319856..9b47299 100644 --- a/earthio/hdf5.py +++ b/earthio/hdf5.py @@ -6,7 +6,7 @@ Tools for reading HDF5 files. Typically use the interface through - - :func:`earthio.load_array` + - :func:`earthio.load_layers` - :func:`earthio.load_meta` ''' @@ -32,7 +32,6 @@ window_to_gdal_read_kwargs, meta_strings_to_dict) -from earthio import MLDataset from earthio.metadata_selection import match_meta __all__ = [ @@ -97,7 +96,7 @@ def load_subdataset(subdataset, attrs, layer_spec, **reader_kwargs): def load_hdf5_array(datafile, meta, layer_specs): - '''Return an MLDataset where each subdataset is a DataArray + '''Return an xr.Dataset where each subdataset is a DataArray Parameters: :datafile: filename @@ -107,7 +106,7 @@ def load_hdf5_array(datafile, meta, layer_specs): as layers Returns: - :es: An MLDataset + :es: An xr.Dataset ''' import gdal from gdalconst import GA_ReadOnly @@ -151,4 +150,4 @@ def load_hdf5_array(datafile, meta, layer_specs): attrs = copy.deepcopy(attrs) attrs['layer_order'] = layer_order gc.collect() - return MLDataset(elm_store_data, attrs=attrs) + return xr.Dataset(elm_store_data, attrs=attrs) diff --git a/earthio/load_array.py b/earthio/load_layers.py similarity index 90% rename from earthio/load_array.py rename to earthio/load_layers.py index 212e635..a4ef5ba 100644 --- a/earthio/load_array.py +++ b/earthio/load_layers.py @@ -1,9 +1,9 @@ ''' ------------------ -``earthio.load_array`` +``earthio.load_layers`` ++++++++++++++++++++++++++ -load_array returns an MLDataset for HDF, NetCDF, GeoTiff files +load_layers returns an xr.Dataset for HDF, NetCDF, GeoTiff files ''' from __future__ import absolute_import, division, print_function, unicode_literals @@ -18,7 +18,7 @@ from earthio.hdf5 import load_hdf5_array, load_hdf5_meta from earthio.tif import load_dir_of_tifs_meta,load_dir_of_tifs_array -__all__ = ['load_array', 'load_meta'] +__all__ = ['load_layers', 'load_meta'] EXT = OrderedDict([ ('netcdf', ('nc', 'nc\d',)), @@ -45,8 +45,8 @@ def _find_file_type(filename): return ftype -def load_array(filename, meta=None, layer_specs=None, reader=None): - '''Create MLDataset from HDF4 / 5 or NetCDF files or TIF directories +def load_layers(filename, meta=None, layer_specs=None, reader=None): + '''Create xr.Dataset from HDF4 / 5 or NetCDF files or TIF directories Parameters: :filename: filename (HDF4 / 5 or NetCDF) or directory name (TIF) @@ -55,7 +55,7 @@ def load_array(filename, meta=None, layer_specs=None, reader=None): :reader: named reader from earthio - one of: ('tif', 'hdf4', 'hdf5', 'netcdf') Returns: - :es: MLDataset (xarray.Dataset) with layers specified by layer_specs as DataArrays in "data_vars" attribute + :es: xr.Dataset with layers specified by layer_specs as DataArrays in "data_vars" attribute ''' ftype = reader or _find_file_type(filename) if meta is None: diff --git a/earthio/netcdf.py b/earthio/netcdf.py index 13d64aa..9438a07 100644 --- a/earthio/netcdf.py +++ b/earthio/netcdf.py @@ -6,7 +6,7 @@ Tools for reading NetCDF files. Typically use the interface through - - :func:`earthio.load_array` + - :func:`earthio.load_layers` - :func:`earthio.load_meta` ''' @@ -24,7 +24,6 @@ VALID_X_NAMES, VALID_Y_NAMES, take_geo_transform_from_meta, meta_strings_to_dict) -from earthio import MLDataset from earthio.metadata_selection import match_meta from six import string_types @@ -114,7 +113,7 @@ def load_netcdf_array(datafile, meta, layer_specs=None): :variables: dict, list: list of variables to load Returns: - :new_es: MLDataset xarray.Dataset + :new_es: xr.Dataset ''' logger.debug('load_netcdf_array: {}'.format(datafile)) ds = xr.open_dataset(datafile) @@ -137,7 +136,7 @@ def load_netcdf_array(datafile, meta, layer_specs=None): for b, sub_dataset_name in zip(meta['layer_meta'], data): b['geo_transform'] = meta['geo_transform'] = geo_transform b['sub_dataset_name'] = sub_dataset_name - new_es = MLDataset(data, + new_es = xr.Dataset(data, coords=_normalize_coords(ds), attrs=meta) return new_es diff --git a/earthio/reshape.py b/earthio/reshape.py index e1c9e3d..829389d 100644 --- a/earthio/reshape.py +++ b/earthio/reshape.py @@ -20,7 +20,7 @@ import scipy.interpolate as spi import xarray as xr -from earthio import MLDataset, Canvas +from earthio import Canvas from earthio.util import (canvas_to_coords, VALID_X_NAMES, VALID_Y_NAMES, @@ -41,12 +41,12 @@ ] def transpose(es, new_dims): - '''Transpose an MLDataset - elm.pipeline.steps.Transpose + '''Transpose an xr.Dataset - elm.pipeline.steps.Transpose Parameters: :new_dims: passed to xarray.DataArray.transpose Returns: - :MLDataset transposed + :Dataset: transposed ''' trans = OrderedDict() for layer in es.data_vars: @@ -57,11 +57,11 @@ def transpose(es, new_dims): canvas = attr.asdict(trans[layer].canvas) canvas['dims'] = new_dims trans[layer].attrs['canvas'] = Canvas(**canvas) - return MLDataset(trans, attrs=es.attrs) + return xr.Dataset(trans, attrs=es.attrs) def aggregate_simple(es, **kwargs): - '''aggregate MLDataset - elm.pipeline.steps.Agg + '''aggregate xr.Dataset - elm.pipeline.steps.Agg Parameters: :kwargs: Keywords may contain @@ -70,7 +70,7 @@ def aggregate_simple(es, **kwargs): - :axis: dimension integer Returns: - :MLDataset: aggregated + :Dataset: aggregated ''' func = kwargs['func'] @@ -92,23 +92,23 @@ def aggregate_simple(es, **kwargs): lost_axes.append(data_arr.dims.index(dim) if dim else axis) agged[layer] = getattr(data_arr, func)(**kw) if len(set(lost_axes)) != 1: - raise ValueError('Cannot aggregate when the axis (dim) of aggregation is not the same for all DataArrays in MLDataset') - return MLDataset(agged, attrs=es.attrs, add_canvas=False, lost_axis=lost_axes[0]) + raise ValueError('Cannot aggregate when the axis (dim) of aggregation is not the same for all DataArrays in xr.Dataset') + return xr.Dataset(agged, attrs=es.attrs, add_canvas=False, lost_axis=lost_axes[0]) def select_canvas(es, new_canvas): - '''reindex_like new_canvas for every layer (DataArray) in MLDataset + '''reindex_like new_canvas for every layer (DataArray) in xr.Dataset Parameters: - :es: MLDataset + :es: xr.Dataset :new_canvas: an earthio.Canvas object Returns: - :es: MLDataset where every layer (DataArray) has the same + :es: xr.Dataset where every layer (DataArray) has the same coordinates - those of new_canvas ''' if getattr(es, '_dummy_canvas', False): - raise ValueError('This MLDataset cannot be run through select_canvas because geo transform was not read correctly from input data') + raise ValueError('This xr.Dataset cannot be run through select_canvas because geo transform was not read correctly from input data') es_new_dict = OrderedDict() for layer in es.data_vars: data_arr = getattr(es, layer) @@ -132,13 +132,13 @@ def select_canvas(es, new_canvas): es_new_dict[layer] = data_arr attrs = copy.deepcopy(es.attrs) attrs['canvas'] = new_canvas - es_new = MLDataset(es_new_dict, attrs=attrs) + es_new = xr.Dataset(es_new_dict, attrs=attrs) return es_new def drop_na_rows(flat): - '''Drop any NA rows from MLDataset flat''' + '''Drop any NA rows from xr.Dataset flat''' check_is_flat(flat) flat_dropped = flat.flat.dropna(dim='space') flat_dropped.attrs.update(flat.attrs) @@ -146,20 +146,20 @@ def drop_na_rows(flat): attrs = copy.deepcopy(flat.attrs) attrs.update(flat_dropped.attrs) attrs['shape_before_drop_na_rows'] = flat.flat.values.shape - no_na = MLDataset({'flat': flat_dropped}, attrs=attrs) + no_na = xr.Dataset({'flat': flat_dropped}, attrs=attrs) return no_na def flatten(es, ravel_order='C'): - '''Given an MLDataset with different rasters (DataArray) as layers, + '''Given an xr.Dataset with different rasters (DataArray) as layers, flatten the rasters into a single 2-D DataArray called "flat" - in a new MLDataset. + in a new xr.Dataset. Params: - :elm_store: 3-d MLDataset (layer, y, x) + :elm_store: 3-d xr.Dataset (layer, y, x) Returns: - :elm_store: 2-d MLDataset (space, layer) + :elm_store: 2-d xr.Dataset (space, layer) ''' if check_is_flat(es, raise_err=False): return es @@ -193,7 +193,7 @@ def flatten(es, ravel_order='C'): attrs['old_dims'] = old_dims attrs['flatten_data_array'] = True attrs.update(copy.deepcopy(es.attrs)) - flat = MLDataset({'flat': xr.DataArray(store, + flat = xr.Dataset({'flat': xr.DataArray(store, coords=[('space', np.arange(store.shape[0])), ('layer', layer_names)], dims=('space', @@ -217,7 +217,7 @@ def filled_flattened(na_dropped): attrs.pop('shape_before_drop_na_rows', None) attrs['notnull_shape'] = na_dropped.flat.values.shape layer = attrs['layer_order'] - filled_es = MLDataset({'flat': xr.DataArray(filled, + filled_es = xr.Dataset({'flat': xr.DataArray(filled, coords=[('space', np.arange(shp[0])), ('layer', layer)], dims=('space', 'layer'), @@ -228,17 +228,17 @@ def filled_flattened(na_dropped): def check_is_flat(flat, raise_err=True): - '''Check if an MLDataset has a DataArray called flat with dimensions (space, layer) + '''Check if an xr.Dataset has a DataArray called flat with dimensions (space, layer) Parameters: - :flat: an MLDataset + :flat: an xr.Dataset :raise_err: raise or not Returns: :bool: ``True`` if flat ``False`` or ``ValueError`` if not flat (raise_err=True) ''' if not hasattr(flat, 'flat') or not all(hasattr(flat.flat, at) for at in ('space', 'layer')): - msg = 'Expected an MLDataset/Dataset with attribute "flat" and dims ("space", "layer")' + msg = 'Expected an xr.Dataset with attribute "flat" and dims ("space", "layer")' if raise_err: raise ValueError(msg) else: @@ -247,16 +247,16 @@ def check_is_flat(flat, raise_err=True): def inverse_flatten(flat, add_canvas=False, **attrs): - '''Given an MLDataset that has been flattened to (space, layer) dims, - return a 3-d MLDataset with dims (layer, y, x). Requires that metadata - about x,y dims were preserved when the 2-d input MLDataset was created + '''Given an xr.Dataset that has been flattened to (space, layer) dims, + return a 3-d xr.Dataset with dims (layer, y, x). Requires that metadata + about x,y dims were preserved when the 2-d input xr.Dataset was created Params: - :flat: a 2-d MLDataset (space, layer) - :attrs: attribute dict to update the dict of the returned MLDataset + :flat: a 2-d xr.Dataset (space, layer) + :attrs: attribute dict to update the dict of the returned xr.Dataset Returns: - :es: MLDataset (layer, y, x) + :es: xr.Dataset (layer, y, x) ''' flat = filled_flattened(flat) attrs2 = copy.deepcopy(flat.attrs) @@ -284,4 +284,4 @@ def inverse_flatten(flat, add_canvas=False, **attrs): dims=dims, attrs=attrs) es_new_dict[layer] = data_arr - return MLDataset(es_new_dict, attrs=attrs, add_canvas=add_canvas) + return xr.Dataset(es_new_dict, attrs=attrs, add_canvas=add_canvas) diff --git a/earthio/tests/test_load_array.py b/earthio/tests/test_load_layers.py similarity index 86% rename from earthio/tests/test_load_array.py rename to earthio/tests/test_load_layers.py index 6d523e0..abc2ba0 100644 --- a/earthio/tests/test_load_array.py +++ b/earthio/tests/test_load_layers.py @@ -20,11 +20,11 @@ TRIALS['netcdf'] = NETCDF_FILES[0] @pytest.mark.parametrize('ftype,filename', sorted(TRIALS.items())) -def test_load_array(ftype, filename): +def test_load_layers(ftype, filename): if ftype == 'tif': # avoid memory trouble layer_specs = tif_layer_specs[:3] else: layer_specs = None - assert isinstance(load_array(filename, layer_specs=layer_specs), MLDataset) + assert isinstance(load_layers(filename, layer_specs=layer_specs), xr.Dataset) diff --git a/earthio/tests/test_readers_general.py b/earthio/tests/test_readers_general.py index a8b2a97..7e74c34 100644 --- a/earthio/tests/test_readers_general.py +++ b/earthio/tests/test_readers_general.py @@ -28,7 +28,7 @@ def test_flatten_no_meta(): - '''Tests MLDataset can be flattened / inverse even with no attrs''' + '''Tests xr.Dataset can be flattened / inverse even with no attrs''' es = random_raster() flat = flatten(es) inv = inverse_flatten(flat) @@ -38,7 +38,7 @@ def test_flatten_no_meta(): def test_na_drop_no_meta(): - '''Tests MLDataset can be flattened / inverse even with NaNs + '''Tests xr.Dataset can be flattened / inverse even with NaNs dropped and no attrs''' es = random_raster() flat = flatten(es) diff --git a/earthio/tests/util.py b/earthio/tests/util.py index d4e4fe7..9771a97 100644 --- a/earthio/tests/util.py +++ b/earthio/tests/util.py @@ -2,6 +2,9 @@ import glob import os + +from xarray_filters.tests.test_data import ts_clustering_example + EARTHIO_EXAMPLE_DATA_PATH = os.environ.get('EARTHIO_EXAMPLE_DATA_PATH') if not EARTHIO_EXAMPLE_DATA_PATH: EARTHIO_EXAMPLE_DATA_PATH = os.environ.get('ELM_EXAMPLE_DATA_PATH') diff --git a/earthio/tif.py b/earthio/tif.py index 753de71..b950505 100644 --- a/earthio/tif.py +++ b/earthio/tif.py @@ -6,7 +6,7 @@ Tools for reading GeoTiff files. Typically use the interface through - - :func:`earthio.load_array` + - :func:`earthio.load_layers` - :func:`earthio.`load_meta` ''' @@ -32,7 +32,6 @@ LayerSpec, meta_strings_to_dict) -from earthio import MLDataset from six import string_types logger = logging.getLogger(__name__) @@ -156,7 +155,7 @@ def open_prefilter(filename, meta, **reader_kwargs): raise def load_dir_of_tifs_array(dir_of_tiffs, meta, layer_specs=None): - '''Return an MLDataset where each subdataset is a DataArray + '''Return an xr.Dataset where each subdataset is a DataArray Parameters: :dir_of_tiffs: directory of GeoTiff files where each is a @@ -166,7 +165,7 @@ def load_dir_of_tifs_array(dir_of_tiffs, meta, layer_specs=None): defaulting to reading all subdatasets as layers Returns: - :X: MLDataset + :X: xr.Dataset ''' @@ -213,4 +212,4 @@ def load_dir_of_tifs_array(dir_of_tiffs, meta, layer_specs=None): attrs['layer_order'].append(layer_name) gc.collect() - return MLDataset(elm_store_dict, attrs=attrs) + return xr.Dataset(elm_store_dict, attrs=attrs) diff --git a/earthio/util.py b/earthio/util.py index bbc6a51..0565e49 100644 --- a/earthio/util.py +++ b/earthio/util.py @@ -476,10 +476,10 @@ def _set_na_from_valid_range(values, valid_range): def set_na_from_meta(es, **kwargs): '''Set NaNs based on "valid_range" "invalid_range" and/or "missing" - in MLDataset attrs or DataArray attrs + in xr.Dataset attrs or xr.DataArray attrs Parameters: - :es: earthio.MLDataset + :es: xr.Dataset :kwargs: ignored Recursively searches es's attrs for keys loosely matching: diff --git a/environment.yml b/environment.yml index 90198e9..9f45616 100644 --- a/environment.yml +++ b/environment.yml @@ -42,3 +42,4 @@ dependencies: - six - pip: - cachey + - git+https://github.com/ContinuumIO/xarray_filters.git # for testing