From f979f0244ddd96f1b9409edda44b33cd13a3ed0d Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Mon, 9 Apr 2018 18:31:17 -1000 Subject: [PATCH] Create LibGMT.grid_to_vfile for xarray input support (#159) Implements a new context manager `LibGMT.grid_to_vfile` that takes an `xarray.DataArray`, creates a `GMT_MATRIX` to store the data, passes it along to a virtual file, and yields the virtual file name. This is the main building block for supporting the `grd*` commands (#124). --- gmt/clib/core.py | 212 +++++++++++++++++++++++++++++------------ gmt/clib/utils.py | 108 ++++++++++++++++++++- gmt/tests/test_clib.py | 32 ++++++- 3 files changed, 284 insertions(+), 68 deletions(-) diff --git a/gmt/clib/core.py b/gmt/clib/core.py index 0516fb345a5..10836bc323a 100644 --- a/gmt/clib/core.py +++ b/gmt/clib/core.py @@ -9,7 +9,8 @@ import numpy as np from ..exceptions import GMTCLibError, GMTCLibNoSessionError, GMTInvalidInput -from .utils import load_libgmt, kwargs_to_ctypes_array, vectors_to_arrays +from .utils import load_libgmt, kwargs_to_ctypes_array, vectors_to_arrays, \ + dataarray_to_matrix, as_c_contiguous class LibGMT(): # pylint: disable=too-many-instance-attributes @@ -77,6 +78,11 @@ class LibGMT(): # pylint: disable=too-many-instance-attributes 'GMT_OUTPUT', ] + grid_registrations = [ + 'GMT_GRID_PIXEL_REG', + 'GMT_GRID_NODE_REG', + ] + # Map numpy dtypes to GMT types _dtypes = { 'float64': 'GMT_DOUBLE', @@ -377,9 +383,9 @@ def create_data(self, family, geometry, mode, **kwargs): The increments between points of the dataset. See the C function documentation. registration : int - The node registration (what the coordinates mean). Also a very - complex argument. See the C function documentation. Defaults to - ``GMT_GRID_NODE_REG``. + The node registration (what the coordinates mean). Can be + ``'GMT_GRID_PIXEL_REG'`` or ``'GMT_GRID_NODE_REG'``. Defaults to + ``'GMT_GRID_NODE_REG'``. pad : int The grid padding. Defaults to ``GMT_PAD_DEFAULT``. @@ -405,33 +411,35 @@ def create_data(self, family, geometry, mode, **kwargs): ] c_create_data.restype = ctypes.c_void_p - # Parse and check input arguments family_int = self._parse_constant(family, valid=self.data_families, valid_modifiers=self.data_vias) - if mode not in self.data_modes: - raise GMTInvalidInput( - "Invalid data creation mode '{}'.".format(mode)) - geometry_int = self._parse_constant(geometry, - valid=self.data_geometries) - # dim is required (get a segmentation fault if passing it as None - if 'dim' not in kwargs: - kwargs['dim'] = [0]*4 - # Convert dim, ranges, and inc to ctypes arrays if given + mode_int = self._parse_constant(mode, valid=self.data_modes) + geometry_int = self._parse_constant( + geometry, valid=self.data_geometries) + registration_int = self._parse_constant( + kwargs.get('registration', 'GMT_GRID_NODE_REG'), + valid=self.grid_registrations) + + # Convert dim, ranges, and inc to ctypes arrays if given (will be None + # if not given to represent NULL pointers) dim = kwargs_to_ctypes_array('dim', kwargs, ctypes.c_uint64*4) ranges = kwargs_to_ctypes_array('ranges', kwargs, ctypes.c_double*4) inc = kwargs_to_ctypes_array('inc', kwargs, ctypes.c_double*2) - pad = self._parse_pad(family, kwargs) - - # Use the GMT defaults if no value is given - registration = kwargs.get('registration', - self.get_constant('GMT_GRID_NODE_REG')) # Use a NULL pointer (None) for existing data to indicate that the # container should be created empty. Fill it in later using put_vector # and put_matrix. - data_ptr = c_create_data(self.current_session, family_int, - geometry_int, self.get_constant(mode), dim, - ranges, inc, registration, pad, None) + data_ptr = c_create_data( + self.current_session, + family_int, + geometry_int, + mode_int, + dim, + ranges, + inc, + registration_int, + self._parse_pad(family, kwargs), + None) if data_ptr is None: raise GMTCLibError("Failed to create an empty GMT data pointer.") @@ -534,6 +542,11 @@ def _check_dtype_and_dim(self, array, ndim): ... gmttype = lib._check_dtype_and_dim(data, ndim=1) ... gmttype == lib.get_constant('GMT_DOUBLE') True + >>> data = np.ones((5, 2), dtype='float32') + >>> with LibGMT() as lib: + ... gmttype = lib._check_dtype_and_dim(data, ndim=2) + ... gmttype == lib.get_constant('GMT_FLOAT') + True """ if array.dtype.name not in self._dtypes: @@ -746,6 +759,7 @@ def open_virtual_file(self, family, geometry, direction, data): Examples -------- + >>> from gmt.helpers import GMTTempFile >>> import os >>> import numpy as np >>> x = np.array([0, 1, 2, 3, 4]) @@ -764,15 +778,12 @@ def open_virtual_file(self, family, geometry, direction, data): ... # Add the dataset to a virtual file ... vfargs = (family, geometry, 'GMT_IN', dataset) ... with lib.open_virtual_file(*vfargs) as vfile: - ... # Send the output to a file so that we can read it - ... ofile = 'virtual_file_example.txt' - ... lib.call_module('info', '{} ->{}'.format(vfile, ofile)) - >>> with open(ofile) as f: - ... # Replace tabs with spaces - ... print(f.read().strip().replace('\\t', ' ')) + ... # Send the output to a temp file so that we can read it + ... with GMTTempFile() as ofile: + ... args = '{} ->{}'.format(vfile, ofile.name) + ... lib.call_module('info', args) + ... print(ofile.read().strip()) : N = 5 <0/4> <5/9> - >>> # Clean up the output file - >>> os.remove(ofile) """ c_open_virtualfile = self._libgmt.GMT_Open_VirtualFile @@ -789,9 +800,9 @@ def open_virtual_file(self, family, geometry, direction, data): valid_modifiers=self.data_vias) geometry_int = self._parse_constant(geometry, valid=self.data_geometries) - if direction not in ('GMT_IN', 'GMT_OUT'): - raise GMTInvalidInput("Invalid direction '{}'.".format(direction)) - direction_int = self.get_constant(direction) + direction_int = self._parse_constant( + direction, valid=['GMT_IN', 'GMT_OUT'], + valid_modifiers=['GMT_IS_REFERENCE']) buff = ctypes.create_string_buffer(self.get_constant('GMT_STR16')) @@ -814,7 +825,7 @@ def open_virtual_file(self, family, geometry, direction, data): @contextmanager def vectors_to_vfile(self, *vectors): """ - Store 1d arrays in a GMT virtual file to pass them to a module. + Store 1d arrays in a GMT virtual file to use as a module input. Context manager (use in a ``with`` block). Yields the virtual file name that you can pass as an argument to a GMT module call. Closes the @@ -825,10 +836,11 @@ def vectors_to_vfile(self, *vectors): :meth:`~gmt.clib.LibGMT.put_vector`, and :meth:`~gmt.clib.LibGMT.open_virtual_file` - The virtual file will contain the arrays as a GMT Dataset (via - vectors). If the arrays are C contiguous blocks of memory, they will be - passed without copying to GMT. If they are not (e.g., they are columns - of a 2D array), they will need to be copied to a contiguous block. + The virtual file will contain the arrays as ``GMT Vector`` structures. + + If the arrays are C contiguous blocks of memory, they will be passed + without copying to GMT. If they are not (e.g., they are columns of a 2D + array), they will need to be copied to a contiguous block. Parameters ---------- @@ -845,6 +857,7 @@ def vectors_to_vfile(self, *vectors): Examples -------- + >>> from gmt.helpers import GMTTempFile >>> import numpy as np >>> import pandas as pd >>> x = [1, 2, 3] @@ -853,16 +866,20 @@ def vectors_to_vfile(self, *vectors): >>> with LibGMT() as lib: ... with lib.vectors_to_vfile(x, y, z) as vfile: ... # Send the output to a file so that we can read it - ... ofile = 'vectors_to_vfile_example.txt' - ... lib.call_module('info', '{} ->{}'.format(vfile, ofile)) - >>> with open(ofile) as f: - ... # Replace tabs with spaces - ... print(f.read().strip().replace('\\t', ' ')) + ... with GMTTempFile() as ofile: + ... args = '{} ->{}'.format(vfile, ofile.name) + ... lib.call_module('info', args) + ... print(ofile.read().strip()) : N = 3 <1/3> <4/6> <7/9> - >>> # Clean up the output file - >>> os.remove(ofile) """ + # Conversion to a C-contiguous array needs to be done here and not in + # put_matrix because we need to maintain a reference to the copy while + # it is being used by the C API. Otherwise, the array would be garbage + # collected and the memory freed. Creating it in this context manager + # guarantees that the copy will be around until the virtual file is + # closed. + # The conversion is implicit in vectors_to_arrays. arrays = vectors_to_arrays(vectors) columns = len(arrays) @@ -886,14 +903,13 @@ def vectors_to_vfile(self, *vectors): @contextmanager def matrix_to_vfile(self, matrix): """ - Store a 2d array in a GMT virtual file Dataset to pass it to a module. + Store a 2d array in a GMT virtual file to use as a module input. Context manager (use in a ``with`` block). Yields the virtual file name that you can pass as an argument to a GMT module call. Closes the virtual file upon exit of the ``with`` block. - The virtual file will contain the array as a GMT Dataset (via matrix). - + The virtual file will contain the array as a ``GMT_MATRIX``. **Not meant for creating GMT Grids**. The grid requires more metadata than just the data matrix. This creates a Dataset (table). @@ -925,6 +941,7 @@ def matrix_to_vfile(self, matrix): Examples -------- + >>> from gmt.helpers import GMTTempFile >>> import numpy as np >>> data = np.arange(12).reshape((4, 3)) >>> print(data) @@ -935,21 +952,21 @@ def matrix_to_vfile(self, matrix): >>> with LibGMT() as lib: ... with lib.matrix_to_vfile(data) as vfile: ... # Send the output to a file so that we can read it - ... ofile = 'matrix_to_vfile_example.txt' - ... lib.call_module('info', '{} ->{}'.format(vfile, ofile)) - >>> with open(ofile) as f: - ... # Replace tabs with spaces - ... print(f.read().strip().replace('\\t', ' ')) + ... with GMTTempFile() as ofile: + ... args = '{} ->{}'.format(vfile, ofile.name) + ... lib.call_module('info', args) + ... print(ofile.read().strip()) : N = 4 <0/9> <1/10> <2/11> - >>> # Clean up the output file - >>> os.remove(ofile) """ + # Conversion to a C-contiguous array needs to be done here and not in + # put_matrix because we need to maintain a reference to the copy while + # it is being used by the C API. Otherwise, the array would be garbage + # collected and the memory freed. Creating it in this context manager + # guarantees that the copy will be around until the virtual file is + # closed. + matrix = as_c_contiguous(matrix) rows, columns = matrix.shape - # Array must be in C contiguous order to pass its memory pointer to - # GMT. - if not matrix.flags.c_contiguous: - matrix = matrix.copy(order='C') family = 'GMT_IS_DATASET|GMT_VIA_MATRIX' geometry = 'GMT_IS_POINT' @@ -963,6 +980,83 @@ def matrix_to_vfile(self, matrix): with self.open_virtual_file(*vf_args) as vfile: yield vfile + @contextmanager + def grid_to_vfile(self, grid): + """ + Store a grid in a GMT virtual file to use as a module input. + + Used to pass grid data into GMT modules. Grids must be + ``xarray.DataArray`` instances. + + Context manager (use in a ``with`` block). Yields the virtual file name + that you can pass as an argument to a GMT module call. Closes the + virtual file upon exit of the ``with`` block. + + The virtual file will contain the grid as a ``GMT_MATRIX``. + + Use this instead of creating ``GMT_GRID`` and virtual files by hand + with :meth:`~gmt.clib.LibGMT.create_data`, + :meth:`~gmt.clib.LibGMT.put_matrix`, and + :meth:`~gmt.clib.LibGMT.open_virtual_file` + + The grid data matrix must be C contiguous in memory. If it is not + (e.g., it is a slice of a larger array), the array will be copied to + make sure it is. + + Parameters + ---------- + grid : xarray.DataArraw + The grid that will be included in the virtual file. + + Yields + ------ + vfile : str + The name of virtual file. Pass this as a file name argument to a + GMT module. + + Examples + -------- + + >>> from gmt.datasets import load_earth_relief + >>> from gmt.helpers import GMTTempFile + >>> data = load_earth_relief(resolution='60m') + >>> print(data.shape) + (181, 361) + >>> print(data.lon.values.min(), data.lon.values.max()) + -180.0 180.0 + >>> print(data.lat.values.min(), data.lat.values.max()) + -90.0 90.0 + >>> print(data.values.min(), data.values.max()) + -8425.0 5551.0 + >>> with LibGMT() as lib: + ... with lib.grid_to_vfile(data) as vfile: + ... # Send the output to a file so that we can read it + ... with GMTTempFile() as ofile: + ... args = '{} -L0 -Cn ->{}'.format(vfile, ofile.name) + ... lib.call_module('grdinfo', args) + ... print(ofile.read().strip()) + -180 180 -90 90 -8425 5551 1 1 361 181 + >>> # The output is: w e s n z0 z1 dx dy n_columns n_rows + + """ + # Conversion to a C-contiguous array needs to be done here and not in + # put_matrix because we need to maintain a reference to the copy while + # it is being used by the C API. Otherwise, the array would be garbage + # collected and the memory freed. Creating it in this context manager + # guarantees that the copy will be around until the virtual file is + # closed. + # The conversion is implicit in dataarray_to_matrix. + matrix, region, inc = dataarray_to_matrix(grid) + family = 'GMT_IS_GRID|GMT_VIA_MATRIX' + geometry = 'GMT_IS_SURFACE' + gmt_grid = self.create_data(family, geometry, + mode='GMT_CONTAINER_ONLY', + ranges=region, inc=inc) + self.put_matrix(gmt_grid, matrix) + args = (family, geometry, 'GMT_IN|GMT_IS_REFERENCE', gmt_grid) + with self.open_virtual_file(*args) as vfile: + yield vfile + def extract_region(self): """ Extract the WESN bounding box of the currently active figure. diff --git a/gmt/clib/utils.py b/gmt/clib/utils.py index 7561aaa0b04..cb555ebbe22 100644 --- a/gmt/clib/utils.py +++ b/gmt/clib/utils.py @@ -7,7 +7,105 @@ import numpy as np import pandas -from ..exceptions import GMTOSError, GMTCLibError, GMTCLibNotFoundError +from ..exceptions import GMTOSError, GMTCLibError, GMTCLibNotFoundError, \ + GMTInvalidInput + + +def dataarray_to_matrix(grid): + """ + Transform a xarray.DataArray into a data 2D array and metadata. + + Use this to extract the underlying numpy array of data and the region and + increment for the grid. + + Only allows grids with two dimensions and constant grid spacing (GMT + doesn't allow variable grid spacing). + + If the underlying data array is not C contiguous, for example if it's a + slice of a larger grid, a copy will need to be generated. + + Parameters + ---------- + grid : xarray.DataArray + The input grid as a DataArray instance. Information is retrieved from + the coordinate arrays, not from headers. + + Returns + ------- + matrix : 2d-array + The 2D array of data from the grid. + region : list + The West, East, South, North boundaries of the grid. + inc : list + The grid spacing in East-West and North-South, respectively. + + Raises + ------ + GMTInvalidInput + If the grid has more than two dimensions or variable grid spacing. + + Examples + -------- + + >>> from gmt.datasets import load_earth_relief + >>> # Use the global Earth relief grid with 1 degree spacing (60') + >>> grid = load_earth_relief(resolution='60m') + >>> matrix, region, inc = dataarray_to_matrix(grid) + >>> print(region) + [-180.0, 180.0, -90.0, 90.0] + >>> print(inc) + [1.0, 1.0] + >>> type(matrix) + + >>> print(matrix.shape) + (181, 361) + >>> matrix.flags.c_contiguous + True + >>> # Using a slice of the grid, the matrix will be copied to guarantee + >>> # that it's C-contiguous in memory. The increment should be unchanged. + >>> matrix, region, inc = dataarray_to_matrix(grid[10:41,30:101]) + >>> matrix.flags.c_contiguous + True + >>> print(matrix.shape) + (31, 71) + >>> print(region) + [-150.0, -80.0, -80.0, -50.0] + >>> print(inc) + [1.0, 1.0] + >>> # but not if only taking every other grid point. + >>> matrix, region, inc = dataarray_to_matrix(grid[10:41:2,30:101:2]) + >>> matrix.flags.c_contiguous + True + >>> print(matrix.shape) + (16, 36) + >>> print(region) + [-150.0, -80.0, -80.0, -50.0] + >>> print(inc) + [2.0, 2.0] + + """ + if len(grid.dims) != 2: + raise GMTInvalidInput( + "Invalid number of grid dimensions '{}'. Must be 2." + .format(len(grid.dims))) + # Extract region and inc from the grid + region = [] + inc = [] + # Reverse the dims because it is rows, columns ordered. In geographic + # grids, this would be North-South, East-West. GMT's region and inc are + # East-West, North-South. + for dim in grid.dims[::-1]: + coord = grid.coords[dim].values + coord_incs = coord[1:] - coord[0:-1] + coord_inc = coord_incs[0] + if not np.allclose(coord_incs, coord_inc): + raise GMTInvalidInput( + "Grid appears to have irregular spacing in the '{}' dimension." + .format(dim)) + region.extend([coord.min(), coord.max()]) + inc.append(coord_inc) + matrix = as_c_contiguous(grid.values[:]) + return matrix, region, inc def vectors_to_arrays(vectors): @@ -53,11 +151,11 @@ def vectors_to_arrays(vectors): True """ - arrays = [_as_c_contiguous(_as_array(i)) for i in vectors] + arrays = [as_c_contiguous(_as_array(i)) for i in vectors] return arrays -def _as_c_contiguous(array): +def as_c_contiguous(array): """ Ensure a numpy array is C contiguous in memory. @@ -83,7 +181,7 @@ def _as_c_contiguous(array): array([1, 3, 5]) >>> x.flags.c_contiguous False - >>> new_x = _as_c_contiguous(x) + >>> new_x = as_c_contiguous(x) >>> new_x array([1, 3, 5]) >>> new_x.flags.c_contiguous @@ -91,7 +189,7 @@ def _as_c_contiguous(array): >>> x = np.array([8, 9, 10]) >>> x.flags.c_contiguous True - >>> _as_c_contiguous(x).flags.c_contiguous + >>> as_c_contiguous(x).flags.c_contiguous True """ diff --git a/gmt/tests/test_clib.py b/gmt/tests/test_clib.py index c847dd8fdfb..19dedfbac83 100644 --- a/gmt/tests/test_clib.py +++ b/gmt/tests/test_clib.py @@ -8,10 +8,12 @@ import pytest import numpy as np import numpy.testing as npt -import pandas +import pandas as pd +import xarray as xr from ..clib.core import LibGMT -from ..clib.utils import clib_extension, load_libgmt, check_libgmt +from ..clib.utils import clib_extension, load_libgmt, check_libgmt, \ + dataarray_to_matrix from ..exceptions import GMTCLibError, GMTOSError, GMTCLibNotFoundError, \ GMTCLibNoSessionError, GMTInvalidInput from ..helpers import GMTTempFile @@ -439,7 +441,7 @@ def test_put_matrix_grid(): mode='GMT_CONTAINER_ONLY', ranges=wesn[:4], inc=inc, - registration=lib.get_constant('GMT_GRID_NODE_REG'), + registration='GMT_GRID_NODE_REG', ) data = np.arange(shape[0]*shape[1], dtype=dtype).reshape(shape) lib.put_matrix(grid, matrix=data) @@ -610,7 +612,7 @@ def test_vectors_to_vfile_pandas(): dtypes = 'float32 float64 int32 int64 uint32 uint64'.split() size = 13 for dtype in dtypes: - data = pandas.DataFrame( + data = pd.DataFrame( data=dict(x=np.arange(size, dtype=dtype), y=np.arange(size, size*2, 1, dtype=dtype), z=np.arange(size*2, size*3, 1, dtype=dtype)) @@ -690,3 +692,25 @@ def test_write_data_fails(): with pytest.raises(GMTCLibError): lib.write_data('GMT_IS_VECTOR', 'GMT_IS_POINT', 'GMT_WRITE_SET', [1]*6, 'some-file-name', None) + + +def test_dataarray_to_matrix_dims_fails(): + "Check that it fails for > 2 dims" + # Make a 3D regular grid + data = np.ones((10, 12, 11), dtype='float32') + x = np.arange(11) + y = np.arange(12) + z = np.arange(10) + grid = xr.DataArray(data, coords=[('z', z), ('y', y), ('x', x)]) + with pytest.raises(GMTInvalidInput): + dataarray_to_matrix(grid) + + +def test_dataarray_to_matrix_inc_fails(): + "Check that it fails for variable increments" + data = np.ones((4, 5), dtype='float64') + x = np.linspace(0, 1, 5) + y = np.logspace(2, 3, 4) + grid = xr.DataArray(data, coords=[('y', y), ('x', x)]) + with pytest.raises(GMTInvalidInput): + dataarray_to_matrix(grid)