Skip to content

Commit

Permalink
Change load_array->load_layers, MLDataset->xr.Dataset
Browse files Browse the repository at this point in the history
Name changes, plus having load_layers() return an xr.Dataset object instead of an MLDataset object.
  • Loading branch information
gbrener committed Nov 6, 2017
1 parent 274d9e1 commit e182deb
Show file tree
Hide file tree
Showing 13 changed files with 63 additions and 63 deletions.
1 change: 1 addition & 0 deletions conda.recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,4 @@ test:

requires:
- pytest
- xarray_filters
3 changes: 1 addition & 2 deletions earthio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
'''Package of readers from common satellite and weather data formats'''
# The modules below use __all__
from xarray_filters.mldataset import *
from earthio.hdf4 import *
from earthio.hdf5 import *
from earthio.netcdf import *
from earthio.tif import *
from earthio.util import *
from earthio.reshape import *
from earthio.load_array import *
from earthio.load_layers import *
from earthio.local_file_iterators import *

7 changes: 3 additions & 4 deletions earthio/hdf4.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Tools for reading HDF4 files. Typically use the interface through
- :func:`earthio.load_array`
- :func:`earthio.load_layers`
- :func:`earthio.load_meta`
'''
Expand Down Expand Up @@ -62,7 +62,7 @@ def load_hdf4_meta(datafile):


def load_hdf4_array(datafile, meta, layer_specs=None):
'''Return an MLDataset where each subdataset is a DataArray
'''Return an xr.Dataset where each subdataset is a DataArray
Parameters:
:datafile: filename
Expand All @@ -76,7 +76,6 @@ def load_hdf4_array(datafile, meta, layer_specs=None):
'''
import gdal
from gdalconst import GA_ReadOnly
from earthio import MLDataset
from earthio.metadata_selection import match_meta
logger.debug('load_hdf4_array: {}'.format(datafile))
f = gdal.Open(datafile, GA_ReadOnly)
Expand Down Expand Up @@ -138,4 +137,4 @@ def load_hdf4_array(datafile, meta, layer_specs=None):
attrs = copy.deepcopy(attrs)
attrs['layer_order'] = layer_order
gc.collect()
return MLDataset(elm_store_data, attrs=attrs)
return xr.Dataset(elm_store_data, attrs=attrs)
9 changes: 4 additions & 5 deletions earthio/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Tools for reading HDF5 files. Typically use the interface through
- :func:`earthio.load_array`
- :func:`earthio.load_layers`
- :func:`earthio.load_meta`
'''
Expand All @@ -32,7 +32,6 @@
window_to_gdal_read_kwargs,
meta_strings_to_dict)

from earthio import MLDataset
from earthio.metadata_selection import match_meta

__all__ = [
Expand Down Expand Up @@ -97,7 +96,7 @@ def load_subdataset(subdataset, attrs, layer_spec, **reader_kwargs):


def load_hdf5_array(datafile, meta, layer_specs):
'''Return an MLDataset where each subdataset is a DataArray
'''Return an xr.Dataset where each subdataset is a DataArray
Parameters:
:datafile: filename
Expand All @@ -107,7 +106,7 @@ def load_hdf5_array(datafile, meta, layer_specs):
as layers
Returns:
:es: An MLDataset
:es: An xr.Dataset
'''
import gdal
from gdalconst import GA_ReadOnly
Expand Down Expand Up @@ -151,4 +150,4 @@ def load_hdf5_array(datafile, meta, layer_specs):
attrs = copy.deepcopy(attrs)
attrs['layer_order'] = layer_order
gc.collect()
return MLDataset(elm_store_data, attrs=attrs)
return xr.Dataset(elm_store_data, attrs=attrs)
12 changes: 6 additions & 6 deletions earthio/load_array.py → earthio/load_layers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
'''
------------------
``earthio.load_array``
``earthio.load_layers``
++++++++++++++++++++++++++
load_array returns an MLDataset for HDF, NetCDF, GeoTiff files
load_layers returns an xr.Dataset for HDF, NetCDF, GeoTiff files
'''

from __future__ import absolute_import, division, print_function, unicode_literals
Expand All @@ -18,7 +18,7 @@
from earthio.hdf5 import load_hdf5_array, load_hdf5_meta
from earthio.tif import load_dir_of_tifs_meta,load_dir_of_tifs_array

__all__ = ['load_array', 'load_meta']
__all__ = ['load_layers', 'load_meta']

EXT = OrderedDict([
('netcdf', ('nc', 'nc\d',)),
Expand All @@ -45,8 +45,8 @@ def _find_file_type(filename):
return ftype


def load_array(filename, meta=None, layer_specs=None, reader=None):
'''Create MLDataset from HDF4 / 5 or NetCDF files or TIF directories
def load_layers(filename, meta=None, layer_specs=None, reader=None):
'''Create xr.Dataset from HDF4 / 5 or NetCDF files or TIF directories
Parameters:
:filename: filename (HDF4 / 5 or NetCDF) or directory name (TIF)
Expand All @@ -55,7 +55,7 @@ def load_array(filename, meta=None, layer_specs=None, reader=None):
:reader: named reader from earthio - one of: ('tif', 'hdf4', 'hdf5', 'netcdf')
Returns:
:es: MLDataset (xarray.Dataset) with layers specified by layer_specs as DataArrays in "data_vars" attribute
:es: xr.Dataset with layers specified by layer_specs as DataArrays in "data_vars" attribute
'''
ftype = reader or _find_file_type(filename)
if meta is None:
Expand Down
7 changes: 3 additions & 4 deletions earthio/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Tools for reading NetCDF files. Typically use the interface through
- :func:`earthio.load_array`
- :func:`earthio.load_layers`
- :func:`earthio.load_meta`
'''
Expand All @@ -24,7 +24,6 @@
VALID_X_NAMES, VALID_Y_NAMES,
take_geo_transform_from_meta,
meta_strings_to_dict)
from earthio import MLDataset
from earthio.metadata_selection import match_meta
from six import string_types

Expand Down Expand Up @@ -114,7 +113,7 @@ def load_netcdf_array(datafile, meta, layer_specs=None):
:variables: dict<str:str>, list<str>: list of variables to load
Returns:
:new_es: MLDataset xarray.Dataset
:new_es: xr.Dataset
'''
logger.debug('load_netcdf_array: {}'.format(datafile))
ds = xr.open_dataset(datafile)
Expand All @@ -137,7 +136,7 @@ def load_netcdf_array(datafile, meta, layer_specs=None):
for b, sub_dataset_name in zip(meta['layer_meta'], data):
b['geo_transform'] = meta['geo_transform'] = geo_transform
b['sub_dataset_name'] = sub_dataset_name
new_es = MLDataset(data,
new_es = xr.Dataset(data,
coords=_normalize_coords(ds),
attrs=meta)
return new_es
62 changes: 31 additions & 31 deletions earthio/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import scipy.interpolate as spi
import xarray as xr

from earthio import MLDataset, Canvas
from earthio import Canvas
from earthio.util import (canvas_to_coords,
VALID_X_NAMES,
VALID_Y_NAMES,
Expand All @@ -41,12 +41,12 @@
]

def transpose(es, new_dims):
'''Transpose an MLDataset - elm.pipeline.steps.Transpose
'''Transpose an xr.Dataset - elm.pipeline.steps.Transpose
Parameters:
:new_dims: passed to xarray.DataArray.transpose
Returns:
:MLDataset transposed
:Dataset: transposed
'''
trans = OrderedDict()
for layer in es.data_vars:
Expand All @@ -57,11 +57,11 @@ def transpose(es, new_dims):
canvas = attr.asdict(trans[layer].canvas)
canvas['dims'] = new_dims
trans[layer].attrs['canvas'] = Canvas(**canvas)
return MLDataset(trans, attrs=es.attrs)
return xr.Dataset(trans, attrs=es.attrs)


def aggregate_simple(es, **kwargs):
'''aggregate MLDataset - elm.pipeline.steps.Agg
'''aggregate xr.Dataset - elm.pipeline.steps.Agg
Parameters:
:kwargs: Keywords may contain
Expand All @@ -70,7 +70,7 @@ def aggregate_simple(es, **kwargs):
- :axis: dimension integer
Returns:
:MLDataset: aggregated
:Dataset: aggregated
'''
func = kwargs['func']
Expand All @@ -92,23 +92,23 @@ def aggregate_simple(es, **kwargs):
lost_axes.append(data_arr.dims.index(dim) if dim else axis)
agged[layer] = getattr(data_arr, func)(**kw)
if len(set(lost_axes)) != 1:
raise ValueError('Cannot aggregate when the axis (dim) of aggregation is not the same for all DataArrays in MLDataset')
return MLDataset(agged, attrs=es.attrs, add_canvas=False, lost_axis=lost_axes[0])
raise ValueError('Cannot aggregate when the axis (dim) of aggregation is not the same for all DataArrays in xr.Dataset')
return xr.Dataset(agged, attrs=es.attrs, add_canvas=False, lost_axis=lost_axes[0])


def select_canvas(es, new_canvas):
'''reindex_like new_canvas for every layer (DataArray) in MLDataset
'''reindex_like new_canvas for every layer (DataArray) in xr.Dataset
Parameters:
:es: MLDataset
:es: xr.Dataset
:new_canvas: an earthio.Canvas object
Returns:
:es: MLDataset where every layer (DataArray) has the same
:es: xr.Dataset where every layer (DataArray) has the same
coordinates - those of new_canvas
'''
if getattr(es, '_dummy_canvas', False):
raise ValueError('This MLDataset cannot be run through select_canvas because geo transform was not read correctly from input data')
raise ValueError('This xr.Dataset cannot be run through select_canvas because geo transform was not read correctly from input data')
es_new_dict = OrderedDict()
for layer in es.data_vars:
data_arr = getattr(es, layer)
Expand All @@ -132,34 +132,34 @@ def select_canvas(es, new_canvas):
es_new_dict[layer] = data_arr
attrs = copy.deepcopy(es.attrs)
attrs['canvas'] = new_canvas
es_new = MLDataset(es_new_dict, attrs=attrs)
es_new = xr.Dataset(es_new_dict, attrs=attrs)

return es_new


def drop_na_rows(flat):
'''Drop any NA rows from MLDataset flat'''
'''Drop any NA rows from xr.Dataset flat'''
check_is_flat(flat)
flat_dropped = flat.flat.dropna(dim='space')
flat_dropped.attrs.update(flat.attrs)
flat_dropped.attrs['drop_na_rows'] = flat.flat.values.shape[0] - flat_dropped.shape[0]
attrs = copy.deepcopy(flat.attrs)
attrs.update(flat_dropped.attrs)
attrs['shape_before_drop_na_rows'] = flat.flat.values.shape
no_na = MLDataset({'flat': flat_dropped}, attrs=attrs)
no_na = xr.Dataset({'flat': flat_dropped}, attrs=attrs)
return no_na


def flatten(es, ravel_order='C'):
'''Given an MLDataset with different rasters (DataArray) as layers,
'''Given an xr.Dataset with different rasters (DataArray) as layers,
flatten the rasters into a single 2-D DataArray called "flat"
in a new MLDataset.
in a new xr.Dataset.
Params:
:elm_store: 3-d MLDataset (layer, y, x)
:elm_store: 3-d xr.Dataset (layer, y, x)
Returns:
:elm_store: 2-d MLDataset (space, layer)
:elm_store: 2-d xr.Dataset (space, layer)
'''
if check_is_flat(es, raise_err=False):
return es
Expand Down Expand Up @@ -193,7 +193,7 @@ def flatten(es, ravel_order='C'):
attrs['old_dims'] = old_dims
attrs['flatten_data_array'] = True
attrs.update(copy.deepcopy(es.attrs))
flat = MLDataset({'flat': xr.DataArray(store,
flat = xr.Dataset({'flat': xr.DataArray(store,
coords=[('space', np.arange(store.shape[0])),
('layer', layer_names)],
dims=('space',
Expand All @@ -217,7 +217,7 @@ def filled_flattened(na_dropped):
attrs.pop('shape_before_drop_na_rows', None)
attrs['notnull_shape'] = na_dropped.flat.values.shape
layer = attrs['layer_order']
filled_es = MLDataset({'flat': xr.DataArray(filled,
filled_es = xr.Dataset({'flat': xr.DataArray(filled,
coords=[('space', np.arange(shp[0])),
('layer', layer)],
dims=('space', 'layer'),
Expand All @@ -228,17 +228,17 @@ def filled_flattened(na_dropped):


def check_is_flat(flat, raise_err=True):
'''Check if an MLDataset has a DataArray called flat with dimensions (space, layer)
'''Check if an xr.Dataset has a DataArray called flat with dimensions (space, layer)
Parameters:
:flat: an MLDataset
:flat: an xr.Dataset
:raise_err: raise or not
Returns:
:bool: ``True`` if flat ``False`` or ``ValueError`` if not flat (raise_err=True)
'''
if not hasattr(flat, 'flat') or not all(hasattr(flat.flat, at) for at in ('space', 'layer')):
msg = 'Expected an MLDataset/Dataset with attribute "flat" and dims ("space", "layer")'
msg = 'Expected an xr.Dataset with attribute "flat" and dims ("space", "layer")'
if raise_err:
raise ValueError(msg)
else:
Expand All @@ -247,16 +247,16 @@ def check_is_flat(flat, raise_err=True):


def inverse_flatten(flat, add_canvas=False, **attrs):
'''Given an MLDataset that has been flattened to (space, layer) dims,
return a 3-d MLDataset with dims (layer, y, x). Requires that metadata
about x,y dims were preserved when the 2-d input MLDataset was created
'''Given an xr.Dataset that has been flattened to (space, layer) dims,
return a 3-d xr.Dataset with dims (layer, y, x). Requires that metadata
about x,y dims were preserved when the 2-d input xr.Dataset was created
Params:
:flat: a 2-d MLDataset (space, layer)
:attrs: attribute dict to update the dict of the returned MLDataset
:flat: a 2-d xr.Dataset (space, layer)
:attrs: attribute dict to update the dict of the returned xr.Dataset
Returns:
:es: MLDataset (layer, y, x)
:es: xr.Dataset (layer, y, x)
'''
flat = filled_flattened(flat)
attrs2 = copy.deepcopy(flat.attrs)
Expand Down Expand Up @@ -284,4 +284,4 @@ def inverse_flatten(flat, add_canvas=False, **attrs):
dims=dims,
attrs=attrs)
es_new_dict[layer] = data_arr
return MLDataset(es_new_dict, attrs=attrs, add_canvas=add_canvas)
return xr.Dataset(es_new_dict, attrs=attrs, add_canvas=add_canvas)
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
TRIALS['netcdf'] = NETCDF_FILES[0]

@pytest.mark.parametrize('ftype,filename', sorted(TRIALS.items()))
def test_load_array(ftype, filename):
def test_load_layers(ftype, filename):
if ftype == 'tif':
# avoid memory trouble
layer_specs = tif_layer_specs[:3]
else:
layer_specs = None
assert isinstance(load_array(filename, layer_specs=layer_specs), MLDataset)
assert isinstance(load_layers(filename, layer_specs=layer_specs), xr.Dataset)

4 changes: 2 additions & 2 deletions earthio/tests/test_readers_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@


def test_flatten_no_meta():
'''Tests MLDataset can be flattened / inverse even with no attrs'''
'''Tests xr.Dataset can be flattened / inverse even with no attrs'''
es = random_raster()
flat = flatten(es)
inv = inverse_flatten(flat)
Expand All @@ -38,7 +38,7 @@ def test_flatten_no_meta():


def test_na_drop_no_meta():
'''Tests MLDataset can be flattened / inverse even with NaNs
'''Tests xr.Dataset can be flattened / inverse even with NaNs
dropped and no attrs'''
es = random_raster()
flat = flatten(es)
Expand Down
3 changes: 3 additions & 0 deletions earthio/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import glob
import os

from xarray_filters.tests.test_data import ts_clustering_example

EARTHIO_EXAMPLE_DATA_PATH = os.environ.get('EARTHIO_EXAMPLE_DATA_PATH')
if not EARTHIO_EXAMPLE_DATA_PATH:
EARTHIO_EXAMPLE_DATA_PATH = os.environ.get('ELM_EXAMPLE_DATA_PATH')
Expand Down
Loading

0 comments on commit e182deb

Please sign in to comment.