From c811e650eea385355f367131557712309e759b7b Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 13 Jul 2020 14:47:23 +1200 Subject: [PATCH 01/27] Move test_put_* from test_clib to test_clib_put --- pygmt/tests/test_clib.py | 153 -------------------------------- pygmt/tests/test_clib_put.py | 164 +++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 153 deletions(-) create mode 100644 pygmt/tests/test_clib_put.py diff --git a/pygmt/tests/test_clib.py b/pygmt/tests/test_clib.py index 5fa8a1ae88d..b593208af36 100644 --- a/pygmt/tests/test_clib.py +++ b/pygmt/tests/test_clib.py @@ -304,159 +304,6 @@ def test_create_data_fails(): ) -def test_put_vector(): - "Check that assigning a numpy array to a dataset works" - dtypes = "float32 float64 int32 int64 uint32 uint64".split() - for dtype in dtypes: - with clib.Session() as lib: - dataset = lib.create_data( - family="GMT_IS_DATASET|GMT_VIA_VECTOR", - geometry="GMT_IS_POINT", - mode="GMT_CONTAINER_ONLY", - dim=[3, 5, 1, 0], # columns, rows, layers, dtype - ) - x = np.array([1, 2, 3, 4, 5], dtype=dtype) - y = np.array([6, 7, 8, 9, 10], dtype=dtype) - z = np.array([11, 12, 13, 14, 15], dtype=dtype) - lib.put_vector(dataset, column=lib["GMT_X"], vector=x) - lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) - lib.put_vector(dataset, column=lib["GMT_Z"], vector=z) - # Turns out wesn doesn't matter for Datasets - wesn = [0] * 6 - # Save the data to a file to see if it's being accessed correctly - with GMTTempFile() as tmp_file: - lib.write_data( - "GMT_IS_VECTOR", - "GMT_IS_POINT", - "GMT_WRITE_SET", - wesn, - tmp_file.name, - dataset, - ) - # Load the data and check that it's correct - newx, newy, newz = tmp_file.loadtxt(unpack=True, dtype=dtype) - npt.assert_allclose(newx, x) - npt.assert_allclose(newy, y) - npt.assert_allclose(newz, z) - - -def test_put_vector_invalid_dtype(): - "Check that it fails with an exception for invalid data types" - with clib.Session() as lib: - dataset = lib.create_data( - family="GMT_IS_DATASET|GMT_VIA_VECTOR", - geometry="GMT_IS_POINT", - mode="GMT_CONTAINER_ONLY", - dim=[2, 3, 1, 0], # columns, rows, layers, dtype - ) - data = np.array([37, 12, 556], dtype="complex128") - with pytest.raises(GMTInvalidInput): - lib.put_vector(dataset, column=1, vector=data) - - -def test_put_vector_wrong_column(): - "Check that it fails with an exception when giving an invalid column" - with clib.Session() as lib: - dataset = lib.create_data( - family="GMT_IS_DATASET|GMT_VIA_VECTOR", - geometry="GMT_IS_POINT", - mode="GMT_CONTAINER_ONLY", - dim=[1, 3, 1, 0], # columns, rows, layers, dtype - ) - data = np.array([37, 12, 556], dtype="float32") - with pytest.raises(GMTCLibError): - lib.put_vector(dataset, column=1, vector=data) - - -def test_put_vector_2d_fails(): - "Check that it fails with an exception for multidimensional arrays" - with clib.Session() as lib: - dataset = lib.create_data( - family="GMT_IS_DATASET|GMT_VIA_VECTOR", - geometry="GMT_IS_POINT", - mode="GMT_CONTAINER_ONLY", - dim=[1, 6, 1, 0], # columns, rows, layers, dtype - ) - data = np.array([[37, 12, 556], [37, 12, 556]], dtype="int32") - with pytest.raises(GMTInvalidInput): - lib.put_vector(dataset, column=0, vector=data) - - -def test_put_matrix(): - "Check that assigning a numpy 2d array to a dataset works" - dtypes = "float32 float64 int32 int64 uint32 uint64".split() - shape = (3, 4) - for dtype in dtypes: - with clib.Session() as lib: - dataset = lib.create_data( - family="GMT_IS_DATASET|GMT_VIA_MATRIX", - geometry="GMT_IS_POINT", - mode="GMT_CONTAINER_ONLY", - dim=[shape[1], shape[0], 1, 0], # columns, rows, layers, dtype - ) - data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) - lib.put_matrix(dataset, matrix=data) - # wesn doesn't matter for Datasets - wesn = [0] * 6 - # Save the data to a file to see if it's being accessed correctly - with GMTTempFile() as tmp_file: - lib.write_data( - "GMT_IS_MATRIX", - "GMT_IS_POINT", - "GMT_WRITE_SET", - wesn, - tmp_file.name, - dataset, - ) - # Load the data and check that it's correct - newdata = tmp_file.loadtxt(dtype=dtype) - npt.assert_allclose(newdata, data) - - -def test_put_matrix_fails(): - "Check that put_matrix raises an exception if return code is not zero" - # It's hard to make put_matrix fail on the C API level because of all the - # checks on input arguments. Mock the C API function just to make sure it - # works. - with clib.Session() as lib: - with mock(lib, "GMT_Put_Matrix", returns=1): - with pytest.raises(GMTCLibError): - lib.put_matrix(dataset=None, matrix=np.empty((10, 2)), pad=0) - - -def test_put_matrix_grid(): - "Check that assigning a numpy 2d array to a grid works" - dtypes = "float32 float64 int32 int64 uint32 uint64".split() - wesn = [10, 15, 30, 40, 0, 0] - inc = [1, 1] - shape = ((wesn[3] - wesn[2]) // inc[1] + 1, (wesn[1] - wesn[0]) // inc[0] + 1) - for dtype in dtypes: - with clib.Session() as lib: - grid = lib.create_data( - family="GMT_IS_GRID|GMT_VIA_MATRIX", - geometry="GMT_IS_SURFACE", - mode="GMT_CONTAINER_ONLY", - ranges=wesn[:4], - inc=inc, - registration="GMT_GRID_NODE_REG", - ) - data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) - lib.put_matrix(grid, matrix=data) - # Save the data to a file to see if it's being accessed correctly - with GMTTempFile() as tmp_file: - lib.write_data( - "GMT_IS_MATRIX", - "GMT_IS_SURFACE", - "GMT_CONTAINER_AND_DATA", - wesn, - tmp_file.name, - grid, - ) - # Load the data and check that it's correct - newdata = tmp_file.loadtxt(dtype=dtype) - npt.assert_allclose(newdata, data) - - def test_virtual_file(): "Test passing in data via a virtual file with a Dataset" dtypes = "float32 float64 int32 int64 uint32 uint64".split() diff --git a/pygmt/tests/test_clib_put.py b/pygmt/tests/test_clib_put.py new file mode 100644 index 00000000000..b2f97450585 --- /dev/null +++ b/pygmt/tests/test_clib_put.py @@ -0,0 +1,164 @@ +""" +Test the functions that put data into GMT. +""" +import numpy as np +import numpy.testing as npt +import pytest + +from .test_clib import mock +from .. import clib +from ..exceptions import GMTCLibError, GMTInvalidInput +from ..helpers import GMTTempFile + + +def test_put_vector(): + "Check that assigning a numpy array to a dataset works" + dtypes = "float32 float64 int32 int64 uint32 uint64".split() + for dtype in dtypes: + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[3, 5, 1, 0], # columns, rows, layers, dtype + ) + x = np.array([1, 2, 3, 4, 5], dtype=dtype) + y = np.array([6, 7, 8, 9, 10], dtype=dtype) + z = np.array([11, 12, 13, 14, 15], dtype=dtype) + lib.put_vector(dataset, column=lib["GMT_X"], vector=x) + lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) + lib.put_vector(dataset, column=lib["GMT_Z"], vector=z) + # Turns out wesn doesn't matter for Datasets + wesn = [0] * 6 + # Save the data to a file to see if it's being accessed correctly + with GMTTempFile() as tmp_file: + lib.write_data( + "GMT_IS_VECTOR", + "GMT_IS_POINT", + "GMT_WRITE_SET", + wesn, + tmp_file.name, + dataset, + ) + # Load the data and check that it's correct + newx, newy, newz = tmp_file.loadtxt(unpack=True, dtype=dtype) + npt.assert_allclose(newx, x) + npt.assert_allclose(newy, y) + npt.assert_allclose(newz, z) + + +def test_put_vector_invalid_dtype(): + "Check that it fails with an exception for invalid data types" + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[2, 3, 1, 0], # columns, rows, layers, dtype + ) + data = np.array([37, 12, 556], dtype="complex128") + with pytest.raises(GMTInvalidInput): + lib.put_vector(dataset, column=1, vector=data) + + +def test_put_vector_wrong_column(): + "Check that it fails with an exception when giving an invalid column" + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[1, 3, 1, 0], # columns, rows, layers, dtype + ) + data = np.array([37, 12, 556], dtype="float32") + with pytest.raises(GMTCLibError): + lib.put_vector(dataset, column=1, vector=data) + + +def test_put_vector_2d_fails(): + "Check that it fails with an exception for multidimensional arrays" + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[1, 6, 1, 0], # columns, rows, layers, dtype + ) + data = np.array([[37, 12, 556], [37, 12, 556]], dtype="int32") + with pytest.raises(GMTInvalidInput): + lib.put_vector(dataset, column=0, vector=data) + + +def test_put_matrix(): + "Check that assigning a numpy 2d array to a dataset works" + dtypes = "float32 float64 int32 int64 uint32 uint64".split() + shape = (3, 4) + for dtype in dtypes: + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_MATRIX", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[shape[1], shape[0], 1, 0], # columns, rows, layers, dtype + ) + data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) + lib.put_matrix(dataset, matrix=data) + # wesn doesn't matter for Datasets + wesn = [0] * 6 + # Save the data to a file to see if it's being accessed correctly + with GMTTempFile() as tmp_file: + lib.write_data( + "GMT_IS_MATRIX", + "GMT_IS_POINT", + "GMT_WRITE_SET", + wesn, + tmp_file.name, + dataset, + ) + # Load the data and check that it's correct + newdata = tmp_file.loadtxt(dtype=dtype) + npt.assert_allclose(newdata, data) + + +def test_put_matrix_fails(): + "Check that put_matrix raises an exception if return code is not zero" + # It's hard to make put_matrix fail on the C API level because of all the + # checks on input arguments. Mock the C API function just to make sure it + # works. + with clib.Session() as lib: + with mock(lib, "GMT_Put_Matrix", returns=1): + with pytest.raises(GMTCLibError): + lib.put_matrix(dataset=None, matrix=np.empty((10, 2)), pad=0) + + +def test_put_matrix_grid(): + "Check that assigning a numpy 2d array to a grid works" + dtypes = "float32 float64 int32 int64 uint32 uint64".split() + wesn = [10, 15, 30, 40, 0, 0] + inc = [1, 1] + shape = ((wesn[3] - wesn[2]) // inc[1] + 1, (wesn[1] - wesn[0]) // inc[0] + 1) + for dtype in dtypes: + with clib.Session() as lib: + grid = lib.create_data( + family="GMT_IS_GRID|GMT_VIA_MATRIX", + geometry="GMT_IS_SURFACE", + mode="GMT_CONTAINER_ONLY", + ranges=wesn[:4], + inc=inc, + registration="GMT_GRID_NODE_REG", + ) + data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) + lib.put_matrix(grid, matrix=data) + # Save the data to a file to see if it's being accessed correctly + with GMTTempFile() as tmp_file: + lib.write_data( + "GMT_IS_MATRIX", + "GMT_IS_SURFACE", + "GMT_CONTAINER_AND_DATA", + wesn, + tmp_file.name, + grid, + ) + # Load the data and check that it's correct + newdata = tmp_file.loadtxt(dtype=dtype) + npt.assert_allclose(newdata, data) From 5c5f15260c8d0aa87842efc751f5a869dc2d99af Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 13 Jul 2020 14:47:57 +1200 Subject: [PATCH 02/27] Initial wrapper for GMT_Put_Strings C API function Include a test called test_put_strings that doesn't actually work yet. --- pygmt/clib/session.py | 55 ++++++++++++++++++++++++++++++++++-- pygmt/tests/test_clib_put.py | 28 ++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index ce3ad66e11b..e6d0354580c 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -722,7 +722,7 @@ def put_vector(self, dataset, column, vector): """ Attach a numpy 1D array as a column on a GMT dataset. - Use this functions to attach numpy array data to a GMT dataset and pass + Use this function to attach numpy array data to a GMT dataset and pass it to GMT modules. Wraps ``GMT_Put_Vector``. The dataset must be created by :meth:`~gmt.clib.Session.create_data` @@ -776,11 +776,62 @@ def put_vector(self, dataset, column, vector): ) ) + def put_strings(self, dataset, column, strings): + """ + Attach a numpy 1D array of dtype str as a column on a GMT dataset. + + Use this function to attach string type numpy array data to a GMT + dataset and pass it to GMT modules. Wraps ``GMT_Put_Strings``. + + The dataset must be created by :meth:`~gmt.clib.Session.create_data` + first. Use ``family='GMT_IS_DATASET|GMT_VIA_VECTOR'``. + + .. warning:: + The numpy array must be C contiguous in memory. If it comes from a + column slice of a 2d array, for example, you will have to make a + copy. Use :func:`numpy.ascontiguousarray` to make sure your vector + is contiguous (it won't copy if it already is). + + Parameters + ---------- + dataset : :class:`ctypes.c_void_p` + The ctypes void pointer to a ``GMT_Dataset``. Create it with + :meth:`~gmt.clib.Session.create_data`. + column : int + The column number of this vector in the dataset (starting from 0). + strings : numpy 1d-array + The array that will be attached to the dataset. Must be a 1d C + contiguous array. + + Raises + ------ + GMTCLibError + If given invalid input or ``GMT_Put_Strings`` exits with status != + 0. + + """ + c_put_strings = self.get_libgmt_func( + "GMT_Put_Strings", + argtypes=[ctp.c_void_p, ctp.c_uint, ctp.c_void_p, ctp.c_wchar_p], + restype=ctp.c_int, + ) + + gmt_type = self._check_dtype_and_dim(strings, ndim=1) + strings_pointer = strings.ctypes.data_as(ctp.c_void_p) + status = c_put_strings( + self.session_pointer, dataset, column, gmt_type, strings_pointer + ) + if status != 0: + raise GMTCLibError( + f"Failed to put strings of type {strings.dtype}", + f"in column {column} of dataset.", + ) + def put_matrix(self, dataset, matrix, pad=0): """ Attach a numpy 2D array to a GMT dataset. - Use this functions to attach numpy array data to a GMT dataset and pass + Use this function to attach numpy array data to a GMT dataset and pass it to GMT modules. Wraps ``GMT_Put_Matrix``. The dataset must be created by :meth:`~gmt.clib.Session.create_data` diff --git a/pygmt/tests/test_clib_put.py b/pygmt/tests/test_clib_put.py index b2f97450585..8755096afa1 100644 --- a/pygmt/tests/test_clib_put.py +++ b/pygmt/tests/test_clib_put.py @@ -11,6 +11,34 @@ from ..helpers import GMTTempFile +def test_put_strings(): + "Check that assigning a numpy array of dtype str to a dataset works" + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[1, 5, 1, 0], # columns, rows, layers, dtype + ) + s = np.array(["a", "b", "c", "d", "e"], dtype=np.str) + lib.put_strings(dataset, column=lib["GMT_S"], strings=s) + # Turns out wesn doesn't matter for Datasets + wesn = [0] * 6 + # Save the data to a file to see if it's being accessed correctly + with GMTTempFile() as tmp_file: + lib.write_data( + "GMT_IS_VECTOR", + "GMT_IS_POINT", + "GMT_WRITE_SET", + wesn, + tmp_file.name, + dataset, + ) + # Load the data and check that it's correct + news = tmp_file.loadtxt(unpack=True, dtype=np.str) + npt.assert_allclose(news, s) + + def test_put_vector(): "Check that assigning a numpy array to a dataset works" dtypes = "float32 float64 int32 int64 uint32 uint64".split() From fccedae79ab73fdbf9ca617ac2d09dc7f73970e8 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Tue, 14 Jul 2020 14:43:33 +1200 Subject: [PATCH 03/27] Try using GMT_TEXT for numpy string types --- pygmt/clib/session.py | 5 +++-- pygmt/tests/test_clib_put.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 4571d2db9e6..2ab82623b23 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -56,6 +56,7 @@ np.uint64: "GMT_ULONG", np.uint32: "GMT_UINT", np.datetime64: "GMT_DATETIME", + np.str_: "GMT_TEXT", } @@ -235,7 +236,7 @@ def __getitem__(self, name): value = c_get_enum(session, name.encode()) if value is None or value == -99999: - raise GMTCLibError("Constant '{}' doesn't exits in libgmt.".format(name)) + raise GMTCLibError(f"Constant '{name}' doesn't exist in libgmt.") return value @@ -827,7 +828,7 @@ def put_strings(self, dataset, column, strings): """ c_put_strings = self.get_libgmt_func( "GMT_Put_Strings", - argtypes=[ctp.c_void_p, ctp.c_uint, ctp.c_void_p, ctp.c_wchar_p], + argtypes=[ctp.c_void_p, ctp.c_uint, ctp.c_void_p, ctp.c_uint], restype=ctp.c_int, ) diff --git a/pygmt/tests/test_clib_put.py b/pygmt/tests/test_clib_put.py index d2a2268d1b6..07642b7e980 100644 --- a/pygmt/tests/test_clib_put.py +++ b/pygmt/tests/test_clib_put.py @@ -21,7 +21,7 @@ def test_put_strings(): dim=[1, 5, 1, 0], # columns, rows, layers, dtype ) s = np.array(["a", "b", "c", "d", "e"], dtype=np.str) - lib.put_strings(dataset, column=lib["GMT_S"], strings=s) + lib.put_strings(dataset, column=lib["GMT_TEXT"], strings=s) # Turns out wesn doesn't matter for Datasets wesn = [0] * 6 # Save the data to a file to see if it's being accessed correctly From 07b95edfcee998f437a7e9dc2d37d5afcda3b124 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Tue, 14 Jul 2020 15:00:16 +1200 Subject: [PATCH 04/27] Remove column argument from put_strings function --- pygmt/clib/session.py | 13 ++++--------- pygmt/tests/test_clib_put.py | 2 +- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 2ab82623b23..8c011f4927c 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -792,7 +792,7 @@ def put_vector(self, dataset, column, vector): ) ) - def put_strings(self, dataset, column, strings): + def put_strings(self, dataset, strings): """ Attach a numpy 1D array of dtype str as a column on a GMT dataset. @@ -813,8 +813,6 @@ def put_strings(self, dataset, column, strings): dataset : :class:`ctypes.c_void_p` The ctypes void pointer to a ``GMT_Dataset``. Create it with :meth:`~gmt.clib.Session.create_data`. - column : int - The column number of this vector in the dataset (starting from 0). strings : numpy 1d-array The array that will be attached to the dataset. Must be a 1d C contiguous array. @@ -828,19 +826,16 @@ def put_strings(self, dataset, column, strings): """ c_put_strings = self.get_libgmt_func( "GMT_Put_Strings", - argtypes=[ctp.c_void_p, ctp.c_uint, ctp.c_void_p, ctp.c_uint], + argtypes=[ctp.c_void_p, ctp.c_uint, ctp.c_void_p, ctp.c_void_p], restype=ctp.c_int, ) gmt_type = self._check_dtype_and_dim(strings, ndim=1) strings_pointer = strings.ctypes.data_as(ctp.c_void_p) - status = c_put_strings( - self.session_pointer, dataset, column, gmt_type, strings_pointer - ) + status = c_put_strings(self.session_pointer, dataset, gmt_type, strings_pointer) if status != 0: raise GMTCLibError( - f"Failed to put strings of type {strings.dtype}", - f"in column {column} of dataset.", + f"Failed to put strings of type {strings.dtype} into dataset" ) def put_matrix(self, dataset, matrix, pad=0): diff --git a/pygmt/tests/test_clib_put.py b/pygmt/tests/test_clib_put.py index 07642b7e980..48590a9ff0c 100644 --- a/pygmt/tests/test_clib_put.py +++ b/pygmt/tests/test_clib_put.py @@ -21,7 +21,7 @@ def test_put_strings(): dim=[1, 5, 1, 0], # columns, rows, layers, dtype ) s = np.array(["a", "b", "c", "d", "e"], dtype=np.str) - lib.put_strings(dataset, column=lib["GMT_TEXT"], strings=s) + lib.put_strings(dataset, strings=s) # Turns out wesn doesn't matter for Datasets wesn = [0] * 6 # Save the data to a file to see if it's being accessed correctly From 88eef3ab98a0bb19af0859a8e19935b5b79fe13a Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Tue, 23 Jun 2020 11:13:51 +1200 Subject: [PATCH 05/27] Set valid GMT data mode as GMT_IS_OUTPUT Should be GMT_IS_OUTPUT instead of GMT_OUTPUT. Also update some docstrings that were missed in the #210 refactor PR. --- pygmt/clib/session.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 8c011f4927c..dad7f7653bf 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -44,7 +44,7 @@ "GMT_IS_SURFACE", ] -MODES = ["GMT_CONTAINER_ONLY", "GMT_OUTPUT"] +MODES = ["GMT_CONTAINER_ONLY", "GMT_IS_OUTPUT"] REGISTRATIONS = ["GMT_GRID_PIXEL_REG", "GMT_GRID_NODE_REG"] @@ -512,13 +512,13 @@ def create_data(self, family, geometry, mode, **kwargs): ---------- family : str A valid GMT data family name (e.g., ``'GMT_IS_DATASET'``). See the - ``data_families`` attribute for valid names. + ``FAMILIES`` attribute for valid names. geometry : str A valid GMT data geometry name (e.g., ``'GMT_IS_POINT'``). See the - ``data_geometries`` attribute for valid names. + ``GEOMETRIES`` attribute for valid names. mode : str - A valid GMT data mode (e.g., ``'GMT_OUTPUT'``). See the - ``data_modes`` attribute for valid names. + A valid GMT data mode (e.g., ``'GMT_IS_OUTPUT'``). See the + ``MODES`` attribute for valid names. dim : list of 4 integers The dimensions of the dataset. See the documentation for the GMT C API function ``GMT_Create_Data`` (``src/gmt_api.c``) for the full @@ -903,12 +903,12 @@ def write_data(self, family, geometry, mode, wesn, output, data): ---------- family : str A valid GMT data family name (e.g., ``'GMT_IS_DATASET'``). See the - ``data_families`` attribute for valid names. Don't use the + ``FAMILIES`` attribute for valid names. Don't use the ``GMT_VIA_VECTOR`` or ``GMT_VIA_MATRIX`` constructs for this. Use ``GMT_IS_VECTOR`` and ``GMT_IS_MATRIX`` instead. geometry : str A valid GMT data geometry name (e.g., ``'GMT_IS_POINT'``). See the - ``data_geometries`` attribute for valid names. + ``GEOMETRIES`` attribute for valid names. mode : str How the data is to be written to the file. This option varies depending on the given family. See the GMT API documentation for From 6390e040b7d19e6f88152cd341f5e47a31ca7aea Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Tue, 14 Jul 2020 16:06:05 +1200 Subject: [PATCH 06/27] Try passing "GMT_IS_VECTOR" family type to put_strings --- pygmt/clib/session.py | 15 +++++++++------ pygmt/tests/test_clib_put.py | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index dad7f7653bf..8025a096bcb 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -56,7 +56,6 @@ np.uint64: "GMT_ULONG", np.uint32: "GMT_UINT", np.datetime64: "GMT_DATETIME", - np.str_: "GMT_TEXT", } @@ -792,7 +791,7 @@ def put_vector(self, dataset, column, vector): ) ) - def put_strings(self, dataset, strings): + def put_strings(self, dataset, family, strings): """ Attach a numpy 1D array of dtype str as a column on a GMT dataset. @@ -800,7 +799,7 @@ def put_strings(self, dataset, strings): dataset and pass it to GMT modules. Wraps ``GMT_Put_Strings``. The dataset must be created by :meth:`~gmt.clib.Session.create_data` - first. Use ``family='GMT_IS_DATASET|GMT_VIA_VECTOR'``. + first. .. warning:: The numpy array must be C contiguous in memory. If it comes from a @@ -813,6 +812,9 @@ def put_strings(self, dataset, strings): dataset : :class:`ctypes.c_void_p` The ctypes void pointer to a ``GMT_Dataset``. Create it with :meth:`~gmt.clib.Session.create_data`. + family : str + The family type of the dataset. Can be either ``GMT_IS_VECTOR`` or + ``GMT_IS_MATRIX``. strings : numpy 1d-array The array that will be attached to the dataset. Must be a 1d C contiguous array. @@ -830,9 +832,10 @@ def put_strings(self, dataset, strings): restype=ctp.c_int, ) - gmt_type = self._check_dtype_and_dim(strings, ndim=1) - strings_pointer = strings.ctypes.data_as(ctp.c_void_p) - status = c_put_strings(self.session_pointer, dataset, gmt_type, strings_pointer) + strings_pointer = strings.ctypes.data_as(ctp.c_char_p) + status = c_put_strings( + self.session_pointer, self[family], dataset, strings_pointer + ) if status != 0: raise GMTCLibError( f"Failed to put strings of type {strings.dtype} into dataset" diff --git a/pygmt/tests/test_clib_put.py b/pygmt/tests/test_clib_put.py index 48590a9ff0c..820de3a05ef 100644 --- a/pygmt/tests/test_clib_put.py +++ b/pygmt/tests/test_clib_put.py @@ -21,7 +21,7 @@ def test_put_strings(): dim=[1, 5, 1, 0], # columns, rows, layers, dtype ) s = np.array(["a", "b", "c", "d", "e"], dtype=np.str) - lib.put_strings(dataset, strings=s) + lib.put_strings(dataset, family="GMT_IS_VECTOR", strings=s) # Turns out wesn doesn't matter for Datasets wesn = [0] * 6 # Save the data to a file to see if it's being accessed correctly From 42af00ef8b16afd6c844d648860a6435338dd864 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Tue, 14 Jul 2020 16:54:49 +1200 Subject: [PATCH 07/27] Do `put_vector` x and y before `put_strings` s, dim is 2 only --- pygmt/tests/test_clib_put.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_put.py b/pygmt/tests/test_clib_put.py index 820de3a05ef..37198cd87dd 100644 --- a/pygmt/tests/test_clib_put.py +++ b/pygmt/tests/test_clib_put.py @@ -18,9 +18,13 @@ def test_put_strings(): family="GMT_IS_DATASET|GMT_VIA_VECTOR", geometry="GMT_IS_POINT", mode="GMT_CONTAINER_ONLY", - dim=[1, 5, 1, 0], # columns, rows, layers, dtype + dim=[2, 5, 1, 0], # columns, rows, layers, dtype ) + x = np.array([1, 2, 3, 4, 5], dtype=np.int32) + y = np.array([6, 7, 8, 9, 10], dtype=np.int32) s = np.array(["a", "b", "c", "d", "e"], dtype=np.str) + lib.put_vector(dataset, column=lib["GMT_X"], vector=x) + lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) lib.put_strings(dataset, family="GMT_IS_VECTOR", strings=s) # Turns out wesn doesn't matter for Datasets wesn = [0] * 6 @@ -36,6 +40,7 @@ def test_put_strings(): ) # Load the data and check that it's correct news = tmp_file.loadtxt(unpack=True, dtype=np.str) + print(news) npt.assert_allclose(news, s) From 9ef04b0434c326083d20046239f9aa1c55f0f4cc Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Tue, 14 Jul 2020 20:39:59 +1200 Subject: [PATCH 08/27] Refactor text to use virtualfile_from_vectors instead of pandas tempfile Modified virtualfile_from_vectors to use put_strings on last column instead of put_vectors if it has a string data type. In theory this should work for `text`, but in reality, a segmentation fault happens. --- pygmt/base_plotting.py | 35 ++++++++++------------------------- pygmt/clib/session.py | 9 ++++++++- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/pygmt/base_plotting.py b/pygmt/base_plotting.py index a9ca7dd26b8..78b1c49d87b 100644 --- a/pygmt/base_plotting.py +++ b/pygmt/base_plotting.py @@ -3,9 +3,7 @@ Does not define any special non-GMT methods (savefig, show, etc). """ import contextlib -import csv import numpy as np -import pandas as pd from .clib import Session from .exceptions import GMTInvalidInput @@ -14,7 +12,6 @@ dummy_context, data_kind, fmt_docstring, - GMTTempFile, use_alias, kwargs_to_strings, ) @@ -970,27 +967,15 @@ def text( if position is not None and isinstance(position, str): kwargs["F"] += f'+c{position}+t"{text}"' - with GMTTempFile(suffix=".txt") as tmpfile: - with Session() as lib: - fname = textfiles if kind == "file" else "" - if kind == "vectors": - if position is not None: - fname = "" - else: - pd.DataFrame.from_dict( - { - "x": np.atleast_1d(x), - "y": np.atleast_1d(y), - "text": np.atleast_1d(text), - } - ).to_csv( - tmpfile.name, - sep="\t", - header=False, - index=False, - quoting=csv.QUOTE_NONE, - ) - fname = tmpfile.name - + with Session() as lib: + file_context = dummy_context(textfiles) if kind == "file" else "" + if kind == "vectors": + if position is not None: + file_context = dummy_context("") + else: + file_context = lib.virtualfile_from_vectors( + np.atleast_1d(x), np.atleast_1d(y), np.atleast_1d(text) + ) + with file_context as fname: arg_str = " ".join([fname, build_arg_string(kwargs)]) lib.call_module("text", arg_str) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 8025a096bcb..9aa5392833e 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1135,6 +1135,9 @@ def virtualfile_from_vectors(self, *vectors): arrays = vectors_to_arrays(vectors) columns = len(arrays) + if np.issubdtype(arrays[-1].dtype, np.str_): + columns -= 1 + rows = len(arrays[0]) if not all(len(i) == rows for i in arrays): raise GMTInvalidInput("All arrays must have same size.") @@ -1146,8 +1149,12 @@ def virtualfile_from_vectors(self, *vectors): family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, 1, 0] ) - for col, array in enumerate(arrays): + # Use put_vector for first n columns with numerical type data + for col, array in enumerate(arrays[:columns]): self.put_vector(dataset, column=col, vector=array) + # Use put_strings for last column with string type data + for array in arrays[columns:]: + self.put_strings(dataset, family="GMT_IS_VECTOR", strings=array) with self.open_virtual_file( family, geometry, "GMT_IN|GMT_IS_REFERENCE", dataset From ab1e04170849da95c58d390120839c8587b2fa64 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Wed, 15 Jul 2020 08:49:19 +1200 Subject: [PATCH 09/27] Split test_clib_put into separate _strings, _matrix and _vector files --- pygmt/tests/test_clib_put.py | 197 --------------------------- pygmt/tests/test_clib_put_matrix.py | 87 ++++++++++++ pygmt/tests/test_clib_put_strings.py | 44 ++++++ pygmt/tests/test_clib_put_vector.py | 88 ++++++++++++ 4 files changed, 219 insertions(+), 197 deletions(-) delete mode 100644 pygmt/tests/test_clib_put.py create mode 100644 pygmt/tests/test_clib_put_matrix.py create mode 100644 pygmt/tests/test_clib_put_strings.py create mode 100644 pygmt/tests/test_clib_put_vector.py diff --git a/pygmt/tests/test_clib_put.py b/pygmt/tests/test_clib_put.py deleted file mode 100644 index 37198cd87dd..00000000000 --- a/pygmt/tests/test_clib_put.py +++ /dev/null @@ -1,197 +0,0 @@ -""" -Test the functions that put data into GMT. -""" -import numpy as np -import numpy.testing as npt -import pytest - -from .test_clib import mock -from .. import clib -from ..exceptions import GMTCLibError, GMTInvalidInput -from ..helpers import GMTTempFile - - -def test_put_strings(): - "Check that assigning a numpy array of dtype str to a dataset works" - with clib.Session() as lib: - dataset = lib.create_data( - family="GMT_IS_DATASET|GMT_VIA_VECTOR", - geometry="GMT_IS_POINT", - mode="GMT_CONTAINER_ONLY", - dim=[2, 5, 1, 0], # columns, rows, layers, dtype - ) - x = np.array([1, 2, 3, 4, 5], dtype=np.int32) - y = np.array([6, 7, 8, 9, 10], dtype=np.int32) - s = np.array(["a", "b", "c", "d", "e"], dtype=np.str) - lib.put_vector(dataset, column=lib["GMT_X"], vector=x) - lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) - lib.put_strings(dataset, family="GMT_IS_VECTOR", strings=s) - # Turns out wesn doesn't matter for Datasets - wesn = [0] * 6 - # Save the data to a file to see if it's being accessed correctly - with GMTTempFile() as tmp_file: - lib.write_data( - "GMT_IS_VECTOR", - "GMT_IS_POINT", - "GMT_WRITE_SET", - wesn, - tmp_file.name, - dataset, - ) - # Load the data and check that it's correct - news = tmp_file.loadtxt(unpack=True, dtype=np.str) - print(news) - npt.assert_allclose(news, s) - - -def test_put_vector(): - "Check that assigning a numpy array to a dataset works" - dtypes = "float32 float64 int32 int64 uint32 uint64".split() - for dtype in dtypes: - with clib.Session() as lib: - dataset = lib.create_data( - family="GMT_IS_DATASET|GMT_VIA_VECTOR", - geometry="GMT_IS_POINT", - mode="GMT_CONTAINER_ONLY", - dim=[3, 5, 1, 0], # columns, rows, layers, dtype - ) - x = np.array([1, 2, 3, 4, 5], dtype=dtype) - y = np.array([6, 7, 8, 9, 10], dtype=dtype) - z = np.array([11, 12, 13, 14, 15], dtype=dtype) - lib.put_vector(dataset, column=lib["GMT_X"], vector=x) - lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) - lib.put_vector(dataset, column=lib["GMT_Z"], vector=z) - # Turns out wesn doesn't matter for Datasets - wesn = [0] * 6 - # Save the data to a file to see if it's being accessed correctly - with GMTTempFile() as tmp_file: - lib.write_data( - "GMT_IS_VECTOR", - "GMT_IS_POINT", - "GMT_WRITE_SET", - wesn, - tmp_file.name, - dataset, - ) - # Load the data and check that it's correct - newx, newy, newz = tmp_file.loadtxt(unpack=True, dtype=dtype) - npt.assert_allclose(newx, x) - npt.assert_allclose(newy, y) - npt.assert_allclose(newz, z) - - -def test_put_vector_invalid_dtype(): - "Check that it fails with an exception for invalid data types" - with clib.Session() as lib: - dataset = lib.create_data( - family="GMT_IS_DATASET|GMT_VIA_VECTOR", - geometry="GMT_IS_POINT", - mode="GMT_CONTAINER_ONLY", - dim=[2, 3, 1, 0], # columns, rows, layers, dtype - ) - data = np.array([37, 12, 556], dtype="object") - with pytest.raises(GMTInvalidInput): - lib.put_vector(dataset, column=1, vector=data) - - -def test_put_vector_wrong_column(): - "Check that it fails with an exception when giving an invalid column" - with clib.Session() as lib: - dataset = lib.create_data( - family="GMT_IS_DATASET|GMT_VIA_VECTOR", - geometry="GMT_IS_POINT", - mode="GMT_CONTAINER_ONLY", - dim=[1, 3, 1, 0], # columns, rows, layers, dtype - ) - data = np.array([37, 12, 556], dtype="float32") - with pytest.raises(GMTCLibError): - lib.put_vector(dataset, column=1, vector=data) - - -def test_put_vector_2d_fails(): - "Check that it fails with an exception for multidimensional arrays" - with clib.Session() as lib: - dataset = lib.create_data( - family="GMT_IS_DATASET|GMT_VIA_VECTOR", - geometry="GMT_IS_POINT", - mode="GMT_CONTAINER_ONLY", - dim=[1, 6, 1, 0], # columns, rows, layers, dtype - ) - data = np.array([[37, 12, 556], [37, 12, 556]], dtype="int32") - with pytest.raises(GMTInvalidInput): - lib.put_vector(dataset, column=0, vector=data) - - -def test_put_matrix(): - "Check that assigning a numpy 2d array to a dataset works" - dtypes = "float32 float64 int32 int64 uint32 uint64".split() - shape = (3, 4) - for dtype in dtypes: - with clib.Session() as lib: - dataset = lib.create_data( - family="GMT_IS_DATASET|GMT_VIA_MATRIX", - geometry="GMT_IS_POINT", - mode="GMT_CONTAINER_ONLY", - dim=[shape[1], shape[0], 1, 0], # columns, rows, layers, dtype - ) - data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) - lib.put_matrix(dataset, matrix=data) - # wesn doesn't matter for Datasets - wesn = [0] * 6 - # Save the data to a file to see if it's being accessed correctly - with GMTTempFile() as tmp_file: - lib.write_data( - "GMT_IS_MATRIX", - "GMT_IS_POINT", - "GMT_WRITE_SET", - wesn, - tmp_file.name, - dataset, - ) - # Load the data and check that it's correct - newdata = tmp_file.loadtxt(dtype=dtype) - npt.assert_allclose(newdata, data) - - -def test_put_matrix_fails(): - "Check that put_matrix raises an exception if return code is not zero" - # It's hard to make put_matrix fail on the C API level because of all the - # checks on input arguments. Mock the C API function just to make sure it - # works. - with clib.Session() as lib: - with mock(lib, "GMT_Put_Matrix", returns=1): - with pytest.raises(GMTCLibError): - lib.put_matrix(dataset=None, matrix=np.empty((10, 2)), pad=0) - - -def test_put_matrix_grid(): - "Check that assigning a numpy 2d array to a grid works" - dtypes = "float32 float64 int32 int64 uint32 uint64".split() - wesn = [10, 15, 30, 40, 0, 0] - inc = [1, 1] - shape = ((wesn[3] - wesn[2]) // inc[1] + 1, (wesn[1] - wesn[0]) // inc[0] + 1) - for dtype in dtypes: - with clib.Session() as lib: - grid = lib.create_data( - family="GMT_IS_GRID|GMT_VIA_MATRIX", - geometry="GMT_IS_SURFACE", - mode="GMT_CONTAINER_ONLY", - ranges=wesn[:4], - inc=inc, - registration="GMT_GRID_NODE_REG", - ) - data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) - lib.put_matrix(grid, matrix=data) - # Save the data to a file to see if it's being accessed correctly - with GMTTempFile() as tmp_file: - lib.write_data( - "GMT_IS_MATRIX", - "GMT_IS_SURFACE", - "GMT_CONTAINER_AND_DATA", - wesn, - tmp_file.name, - grid, - ) - # Load the data and check that it's correct - newdata = tmp_file.loadtxt(dtype=dtype) - npt.assert_allclose(newdata, data) diff --git a/pygmt/tests/test_clib_put_matrix.py b/pygmt/tests/test_clib_put_matrix.py new file mode 100644 index 00000000000..8e754ecf109 --- /dev/null +++ b/pygmt/tests/test_clib_put_matrix.py @@ -0,0 +1,87 @@ +""" +Test the functions that put matrix data into GMT. +""" +import numpy as np +import numpy.testing as npt +import pytest + + +from .test_clib import mock +from .. import clib +from ..exceptions import GMTCLibError +from ..helpers import GMTTempFile + + +def test_put_matrix(): + "Check that assigning a numpy 2d array to a dataset works" + dtypes = "float32 float64 int32 int64 uint32 uint64".split() + shape = (3, 4) + for dtype in dtypes: + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_MATRIX", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[shape[1], shape[0], 1, 0], # columns, rows, layers, dtype + ) + data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) + lib.put_matrix(dataset, matrix=data) + # wesn doesn't matter for Datasets + wesn = [0] * 6 + # Save the data to a file to see if it's being accessed correctly + with GMTTempFile() as tmp_file: + lib.write_data( + "GMT_IS_MATRIX", + "GMT_IS_POINT", + "GMT_WRITE_SET", + wesn, + tmp_file.name, + dataset, + ) + # Load the data and check that it's correct + newdata = tmp_file.loadtxt(dtype=dtype) + npt.assert_allclose(newdata, data) + + +def test_put_matrix_fails(): + "Check that put_matrix raises an exception if return code is not zero" + # It's hard to make put_matrix fail on the C API level because of all the + # checks on input arguments. Mock the C API function just to make sure it + # works. + with clib.Session() as lib: + with mock(lib, "GMT_Put_Matrix", returns=1): + with pytest.raises(GMTCLibError): + lib.put_matrix(dataset=None, matrix=np.empty((10, 2)), pad=0) + + +def test_put_matrix_grid(): + "Check that assigning a numpy 2d array to a grid works" + dtypes = "float32 float64 int32 int64 uint32 uint64".split() + wesn = [10, 15, 30, 40, 0, 0] + inc = [1, 1] + shape = ((wesn[3] - wesn[2]) // inc[1] + 1, (wesn[1] - wesn[0]) // inc[0] + 1) + for dtype in dtypes: + with clib.Session() as lib: + grid = lib.create_data( + family="GMT_IS_GRID|GMT_VIA_MATRIX", + geometry="GMT_IS_SURFACE", + mode="GMT_CONTAINER_ONLY", + ranges=wesn[:4], + inc=inc, + registration="GMT_GRID_NODE_REG", + ) + data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) + lib.put_matrix(grid, matrix=data) + # Save the data to a file to see if it's being accessed correctly + with GMTTempFile() as tmp_file: + lib.write_data( + "GMT_IS_MATRIX", + "GMT_IS_SURFACE", + "GMT_CONTAINER_AND_DATA", + wesn, + tmp_file.name, + grid, + ) + # Load the data and check that it's correct + newdata = tmp_file.loadtxt(dtype=dtype) + npt.assert_allclose(newdata, data) diff --git a/pygmt/tests/test_clib_put_strings.py b/pygmt/tests/test_clib_put_strings.py new file mode 100644 index 00000000000..9efe0b6e63e --- /dev/null +++ b/pygmt/tests/test_clib_put_strings.py @@ -0,0 +1,44 @@ +""" +Test the functions that put string data into GMT. +""" +import numpy as np +import numpy.testing as npt + + +from .. import clib +from ..helpers import GMTTempFile + + +def test_put_strings(): + "Check that assigning a numpy array of dtype str to a dataset works" + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[2, 5, 1, 0], # columns, rows, layers, dtype + ) + x = np.array([1, 2, 3, 4, 5], dtype=np.int32) + y = np.array([6, 7, 8, 9, 10], dtype=np.int32) + strings = np.array(["a", "b", "c", "d", "e"], dtype=np.str) + lib.put_vector(dataset, column=lib["GMT_X"], vector=x) + lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) + lib.put_strings(dataset, family="GMT_IS_VECTOR", strings=strings) + # Turns out wesn doesn't matter for Datasets + wesn = [0] * 6 + # Save the data to a file to see if it's being accessed correctly + with GMTTempFile() as tmp_file: + lib.write_data( + "GMT_IS_VECTOR", + "GMT_IS_POINT", + "GMT_WRITE_SET", + wesn, + tmp_file.name, + dataset, + ) + print(tmp_file.read()) + # Load the data and check that it's correct + newstrings = tmp_file.loadtxt(unpack=True, dtype=np.str) + print(newstrings) + # npt.assert_string_equal(news, s) + npt.assert_allclose(newstrings, strings) diff --git a/pygmt/tests/test_clib_put_vector.py b/pygmt/tests/test_clib_put_vector.py new file mode 100644 index 00000000000..4c78945ef24 --- /dev/null +++ b/pygmt/tests/test_clib_put_vector.py @@ -0,0 +1,88 @@ +""" +Test the functions that put vector data into GMT. +""" +import numpy as np +import numpy.testing as npt +import pytest + +from .. import clib +from ..exceptions import GMTCLibError, GMTInvalidInput +from ..helpers import GMTTempFile + + +def test_put_vector(): + "Check that assigning a numpy array to a dataset works" + dtypes = "float32 float64 int32 int64 uint32 uint64".split() + for dtype in dtypes: + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[3, 5, 1, 0], # columns, rows, layers, dtype + ) + x = np.array([1, 2, 3, 4, 5], dtype=dtype) + y = np.array([6, 7, 8, 9, 10], dtype=dtype) + z = np.array([11, 12, 13, 14, 15], dtype=dtype) + lib.put_vector(dataset, column=lib["GMT_X"], vector=x) + lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) + lib.put_vector(dataset, column=lib["GMT_Z"], vector=z) + # Turns out wesn doesn't matter for Datasets + wesn = [0] * 6 + # Save the data to a file to see if it's being accessed correctly + with GMTTempFile() as tmp_file: + lib.write_data( + "GMT_IS_VECTOR", + "GMT_IS_POINT", + "GMT_WRITE_SET", + wesn, + tmp_file.name, + dataset, + ) + # Load the data and check that it's correct + newx, newy, newz = tmp_file.loadtxt(unpack=True, dtype=dtype) + npt.assert_allclose(newx, x) + npt.assert_allclose(newy, y) + npt.assert_allclose(newz, z) + + +def test_put_vector_invalid_dtype(): + "Check that it fails with an exception for invalid data types" + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[2, 3, 1, 0], # columns, rows, layers, dtype + ) + data = np.array([37, 12, 556], dtype="object") + with pytest.raises(GMTInvalidInput): + lib.put_vector(dataset, column=1, vector=data) + + +def test_put_vector_wrong_column(): + "Check that it fails with an exception when giving an invalid column" + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[1, 3, 1, 0], # columns, rows, layers, dtype + ) + data = np.array([37, 12, 556], dtype="float32") + with pytest.raises(GMTCLibError): + lib.put_vector(dataset, column=1, vector=data) + + +def test_put_vector_2d_fails(): + "Check that it fails with an exception for multidimensional arrays" + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[1, 6, 1, 0], # columns, rows, layers, dtype + ) + data = np.array([[37, 12, 556], [37, 12, 556]], dtype="int32") + with pytest.raises(GMTInvalidInput): + lib.put_vector(dataset, column=0, vector=data) From 01754fcadd252a254ce325003c33e3fb3408eec7 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Wed, 15 Jul 2020 10:49:52 +1200 Subject: [PATCH 10/27] Try using ctypes.POINTER in argtypes of c_put_strings --- pygmt/clib/session.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 9aa5392833e..ded19109b6e 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -828,11 +828,18 @@ def put_strings(self, dataset, family, strings): """ c_put_strings = self.get_libgmt_func( "GMT_Put_Strings", - argtypes=[ctp.c_void_p, ctp.c_uint, ctp.c_void_p, ctp.c_void_p], + argtypes=[ + ctp.c_void_p, + ctp.c_uint, + ctp.c_void_p, + ctp.POINTER(ctp.c_char_p), + ], restype=ctp.c_int, ) - strings_pointer = strings.ctypes.data_as(ctp.c_char_p) + strings_pointer = (ctp.c_char_p * len(strings))() + strings_pointer[:] = np.char.encode(strings) + status = c_put_strings( self.session_pointer, self[family], dataset, strings_pointer ) From 2485437302e8224e8325e5d956d89f7c3ed6e7e3 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 27 Jul 2020 01:45:58 -0400 Subject: [PATCH 11/27] Pass strings using "GMT_IS_VECTOR|GMT_IS_DUPLICATE" --- pygmt/clib/session.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 9588066c4b8..2dc44d8c9de 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -44,6 +44,8 @@ "GMT_IS_SURFACE", ] +METHODS = ["GMT_IS_DUPLICATE", "GMT_IS_REFERENCE"] + MODES = ["GMT_CONTAINER_ONLY", "GMT_IS_OUTPUT"] REGISTRATIONS = ["GMT_GRID_PIXEL_REG", "GMT_GRID_NODE_REG"] @@ -842,8 +844,12 @@ def put_strings(self, dataset, family, strings): strings_pointer = (ctp.c_char_p * len(strings))() strings_pointer[:] = np.char.encode(strings) + family_int = self._parse_constant( + family, valid=FAMILIES, valid_modifiers=METHODS + ) + status = c_put_strings( - self.session_pointer, self[family], dataset, strings_pointer + self.session_pointer, family_int, dataset, strings_pointer ) if status != 0: raise GMTCLibError( @@ -1059,9 +1065,7 @@ def open_virtual_file(self, family, geometry, direction, data): family_int = self._parse_constant(family, valid=FAMILIES, valid_modifiers=VIAS) geometry_int = self._parse_constant(geometry, valid=GEOMETRIES) direction_int = self._parse_constant( - direction, - valid=["GMT_IN", "GMT_OUT"], - valid_modifiers=["GMT_IS_REFERENCE", "GMT_IS_DUPLICATE"], + direction, valid=["GMT_IN", "GMT_OUT"], valid_modifiers=METHODS, ) buff = ctp.create_string_buffer(self["GMT_VF_LEN"]) @@ -1162,8 +1166,11 @@ def virtualfile_from_vectors(self, *vectors): for col, array in enumerate(arrays[:columns]): self.put_vector(dataset, column=col, vector=array) # Use put_strings for last column with string type data + # Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings for array in arrays[columns:]: - self.put_strings(dataset, family="GMT_IS_VECTOR", strings=array) + self.put_strings( + dataset, family="GMT_IS_VECTOR|GMT_IS_DUPLICATE", strings=array + ) with self.open_virtual_file( family, geometry, "GMT_IN|GMT_IS_REFERENCE", dataset From e8e7768e2cf27514500d48319f6f91a8e771ce8c Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 30 Jul 2020 14:59:58 -0400 Subject: [PATCH 12/27] Use GMT_IS_VECTOR|GMT_IS_DUPLICATE when calling put_strings --- pygmt/tests/test_clib_put_strings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_put_strings.py b/pygmt/tests/test_clib_put_strings.py index 9efe0b6e63e..9fb35779e27 100644 --- a/pygmt/tests/test_clib_put_strings.py +++ b/pygmt/tests/test_clib_put_strings.py @@ -23,7 +23,9 @@ def test_put_strings(): strings = np.array(["a", "b", "c", "d", "e"], dtype=np.str) lib.put_vector(dataset, column=lib["GMT_X"], vector=x) lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) - lib.put_strings(dataset, family="GMT_IS_VECTOR", strings=strings) + lib.put_strings( + dataset, family="GMT_IS_VECTOR|GMT_IS_DUPLICATE", strings=strings + ) # Turns out wesn doesn't matter for Datasets wesn = [0] * 6 # Save the data to a file to see if it's being accessed correctly From 2f0c798ed21a671e02e4b5c4886617ef8a75b91d Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 30 Jul 2020 22:41:13 -0400 Subject: [PATCH 13/27] Improve the put_strings test --- pygmt/tests/test_clib_put_strings.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pygmt/tests/test_clib_put_strings.py b/pygmt/tests/test_clib_put_strings.py index 9fb35779e27..5f0226c1133 100644 --- a/pygmt/tests/test_clib_put_strings.py +++ b/pygmt/tests/test_clib_put_strings.py @@ -38,9 +38,10 @@ def test_put_strings(): tmp_file.name, dataset, ) - print(tmp_file.read()) # Load the data and check that it's correct - newstrings = tmp_file.loadtxt(unpack=True, dtype=np.str) - print(newstrings) - # npt.assert_string_equal(news, s) - npt.assert_allclose(newstrings, strings) + newx, newy, newstrings = tmp_file.loadtxt( + unpack=True, dtype=[("x", np.int32), ("y", np.int32), ("text", " Date: Wed, 5 Aug 2020 09:42:54 +1200 Subject: [PATCH 14/27] Add back import pandas as pd --- pygmt/base_plotting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pygmt/base_plotting.py b/pygmt/base_plotting.py index a396da0a297..a04a08bc0d1 100644 --- a/pygmt/base_plotting.py +++ b/pygmt/base_plotting.py @@ -3,7 +3,9 @@ Does not define any special non-GMT methods (savefig, show, etc). """ import contextlib + import numpy as np +import pandas as pd from .clib import Session from .exceptions import GMTError, GMTInvalidInput From ed0dce6281778ea3b8f9473ed3fed445b59737c4 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 10 Aug 2020 09:05:29 +1200 Subject: [PATCH 15/27] Revert refactor text to use virtualfile_from_vectors This reverts commit 9ef04b0434c326083d20046239f9aa1c55f0f4cc. I.e. Use pd.to_csv again. --- pygmt/base_plotting.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/pygmt/base_plotting.py b/pygmt/base_plotting.py index a04a08bc0d1..f978e9c9cf5 100644 --- a/pygmt/base_plotting.py +++ b/pygmt/base_plotting.py @@ -3,7 +3,7 @@ Does not define any special non-GMT methods (savefig, show, etc). """ import contextlib - +import csv import numpy as np import pandas as pd @@ -14,6 +14,7 @@ dummy_context, data_kind, fmt_docstring, + GMTTempFile, use_alias, kwargs_to_strings, ) @@ -969,16 +970,28 @@ def text( if position is not None and isinstance(position, str): kwargs["F"] += f'+c{position}+t"{text}"' - with Session() as lib: - file_context = dummy_context(textfiles) if kind == "file" else "" - if kind == "vectors": - if position is not None: - file_context = dummy_context("") - else: - file_context = lib.virtualfile_from_vectors( - np.atleast_1d(x), np.atleast_1d(y), np.atleast_1d(text) - ) - with file_context as fname: + with GMTTempFile(suffix=".txt") as tmpfile: + with Session() as lib: + fname = textfiles if kind == "file" else "" + if kind == "vectors": + if position is not None: + fname = "" + else: + pd.DataFrame.from_dict( + { + "x": np.atleast_1d(x), + "y": np.atleast_1d(y), + "text": np.atleast_1d(text), + } + ).to_csv( + tmpfile.name, + sep="\t", + header=False, + index=False, + quoting=csv.QUOTE_NONE, + ) + fname = tmpfile.name + arg_str = " ".join([fname, build_arg_string(kwargs)]) lib.call_module("text", arg_str) From aeabd8e8f041898a533e6d44152293d86ac2038c Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 10 Aug 2020 11:53:26 +1200 Subject: [PATCH 16/27] Add test for passing in one string column to virtualfile_from_vectors --- pygmt/tests/test_clib.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pygmt/tests/test_clib.py b/pygmt/tests/test_clib.py index b593208af36..6f1a88b5bd6 100644 --- a/pygmt/tests/test_clib.py +++ b/pygmt/tests/test_clib.py @@ -399,6 +399,22 @@ def test_virtualfile_from_vectors(): assert output == expected +def test_virtual_from_vectors_one_string_column(): + "Test passing in one column with string dtype into virtual file dataset" + size = 5 + x = np.arange(size, dtype=np.int32) + y = np.arange(size, size * 2, 1, dtype=np.int32) + strings = np.array(["a", "b", "c", "d", "e"], dtype=np.str) + with clib.Session() as lib: + with lib.virtualfile_from_vectors(x, y, strings) as vfile: + with GMTTempFile() as outfile: + lib.call_module("gmtinfo", f"{vfile} ->{outfile.name}") + output = outfile.read(keep_tabs=True) + bounds = "\t".join([f"<{i.min():.0f}/{i.max():.0f}>" for i in (x, y)]) + expected = f": N = {size}\t{bounds}\n" + assert output == expected + + def test_virtualfile_from_vectors_transpose(): "Test transforming matrix columns to virtual file dataset" dtypes = "float32 float64 int32 int64 uint32 uint64".split() From abcbcf73af8fa8b144d3ff5f4acf63616675cccc Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 10 Aug 2020 13:25:17 +1200 Subject: [PATCH 17/27] Refactor virtualfile_from_vectors to handle up to 2 string columns --- pygmt/clib/session.py | 33 +++++++++++++++++++++++++-------- pygmt/tests/test_clib.py | 17 +++++++++++++++++ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 2dc44d8c9de..4c1cc6f6596 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1148,8 +1148,12 @@ def virtualfile_from_vectors(self, *vectors): arrays = vectors_to_arrays(vectors) columns = len(arrays) - if np.issubdtype(arrays[-1].dtype, np.str_): - columns -= 1 + str_cols = [ + col + for col, array in enumerate(arrays) + if np.issubdtype(array.dtype, np.str_) + ] + columns -= len(str_cols) rows = len(arrays[0]) if not all(len(i) == rows for i in arrays): @@ -1162,14 +1166,27 @@ def virtualfile_from_vectors(self, *vectors): family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, 1, 0] ) - # Use put_vector for first n columns with numerical type data - for col, array in enumerate(arrays[:columns]): - self.put_vector(dataset, column=col, vector=array) - # Use put_strings for last column with string type data + # Use put_vector for columns with numerical type data + for col, array in enumerate(arrays): + if col not in str_cols: + self.put_vector(dataset, column=col, vector=array) + + # Use put_strings for last column(s) with string type data # Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings - for array in arrays[columns:]: + if str_cols: + if len(str_cols) == 1: + strings = arrays[str_cols[0]] + elif len(str_cols) == 2: + strings = np.char.add( + np.char.add(arrays[str_cols[0]], " "), arrays[str_cols[1]] + ) + else: + raise NotImplementedError( + f"Unable to handle {len(str_cols)} columns of string arrays. " + "Please use only 1 or 2 instead." + ) self.put_strings( - dataset, family="GMT_IS_VECTOR|GMT_IS_DUPLICATE", strings=array + dataset, family="GMT_IS_VECTOR|GMT_IS_DUPLICATE", strings=strings ) with self.open_virtual_file( diff --git a/pygmt/tests/test_clib.py b/pygmt/tests/test_clib.py index 6f1a88b5bd6..b42398d495a 100644 --- a/pygmt/tests/test_clib.py +++ b/pygmt/tests/test_clib.py @@ -415,6 +415,23 @@ def test_virtual_from_vectors_one_string_column(): assert output == expected +def test_virtual_from_vectors_two_string_columns(): + "Test passing in two columns of string dtype into virtual file dataset" + size = 5 + x = np.arange(size, dtype=np.int32) + y = np.arange(size, size * 2, 1, dtype=np.int32) + strings1 = np.array(["a", "b", "c", "d", "e"], dtype=np.str) + strings2 = np.array(["f", "g", "h", "i", "j"], dtype=np.str) + with clib.Session() as lib: + with lib.virtualfile_from_vectors(x, y, strings1, strings2) as vfile: + with GMTTempFile() as outfile: + lib.call_module("gmtinfo", f"{vfile} ->{outfile.name}") + output = outfile.read(keep_tabs=True) + bounds = "\t".join([f"<{i.min():.0f}/{i.max():.0f}>" for i in (x, y)]) + expected = f": N = {size}\t{bounds}\n" + assert output == expected + + def test_virtualfile_from_vectors_transpose(): "Test transforming matrix columns to virtual file dataset" dtypes = "float32 float64 int32 int64 uint32 uint64".split() From 129135e6488efb77226e4d542e48eeb8db9ec502 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 10 Aug 2020 13:29:42 +1200 Subject: [PATCH 18/27] Fix test_plot_datetime by not passing first two columns into put_strings --- pygmt/clib/session.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 4c1cc6f6596..45088063dcc 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1148,9 +1148,12 @@ def virtualfile_from_vectors(self, *vectors): arrays = vectors_to_arrays(vectors) columns = len(arrays) + # Find arrays that are of string dtype from column 3 onwards + # Assumes that first 2 columns contains coordinates like longitude + # latitude, or datetime string types. str_cols = [ - col - for col, array in enumerate(arrays) + col + 2 + for col, array in enumerate(arrays[2:]) if np.issubdtype(array.dtype, np.str_) ] columns -= len(str_cols) From 4f1ccda5a736ff807f68be1c6462dc7eda0a30cd Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 10 Aug 2020 14:07:06 +1200 Subject: [PATCH 19/27] Refactor to handle any number of string type columns --- pygmt/clib/session.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 45088063dcc..8f616bd04a1 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1177,17 +1177,8 @@ def virtualfile_from_vectors(self, *vectors): # Use put_strings for last column(s) with string type data # Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings if str_cols: - if len(str_cols) == 1: - strings = arrays[str_cols[0]] - elif len(str_cols) == 2: - strings = np.char.add( - np.char.add(arrays[str_cols[0]], " "), arrays[str_cols[1]] - ) - else: - raise NotImplementedError( - f"Unable to handle {len(str_cols)} columns of string arrays. " - "Please use only 1 or 2 instead." - ) + string_arrays = [arrays[col] for col in str_cols] + strings = np.apply_along_axis(func1d=" ".join, axis=0, arr=string_arrays) self.put_strings( dataset, family="GMT_IS_VECTOR|GMT_IS_DUPLICATE", strings=strings ) From 30c18eb4bc3958738101a4e39c46487ca71e2e1d Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 10 Aug 2020 14:14:24 +1200 Subject: [PATCH 20/27] Test for put_strings failing to increase code coverage --- pygmt/tests/test_clib_put_strings.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_put_strings.py b/pygmt/tests/test_clib_put_strings.py index 5f0226c1133..8a04266221c 100644 --- a/pygmt/tests/test_clib_put_strings.py +++ b/pygmt/tests/test_clib_put_strings.py @@ -3,9 +3,10 @@ """ import numpy as np import numpy.testing as npt - +import pytest from .. import clib +from ..exceptions import GMTCLibError from ..helpers import GMTTempFile @@ -45,3 +46,14 @@ def test_put_strings(): npt.assert_array_equal(newx, x) npt.assert_array_equal(newy, y) npt.assert_array_equal(newstrings, strings) + + +def test_put_strings_fails(): + "Check that put_strings raises an exception if return code is not zero" + with clib.Session() as lib: + with pytest.raises(GMTCLibError): + lib.put_strings( + dataset=None, + family="GMT_IS_VECTOR|GMT_IS_DUPLICATE", + strings=np.empty(shape=(3,), dtype=np.str), + ) From 62469cb5b5105dddef28563af4e4cb8b7d241bba Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 10 Aug 2020 14:31:30 +1200 Subject: [PATCH 21/27] Expect failures for tests using GMT_Put_strings on GMT < 6.1.1 --- pygmt/tests/test_clib.py | 11 +++++++++++ pygmt/tests/test_clib_put_strings.py | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/pygmt/tests/test_clib.py b/pygmt/tests/test_clib.py index b42398d495a..3218e5c5e12 100644 --- a/pygmt/tests/test_clib.py +++ b/pygmt/tests/test_clib.py @@ -27,6 +27,9 @@ TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") +with clib.Session() as _lib: + gmt_version = Version(_lib.info["version"]) + @contextmanager def mock(session, func, returns=None, mock_func=None): @@ -399,6 +402,10 @@ def test_virtualfile_from_vectors(): assert output == expected +@pytest.mark.xfail( + condition=gmt_version < Version("6.1.1"), + reason="GMT_Put_Strings only works for GMT 6.1.1 and above", +) def test_virtual_from_vectors_one_string_column(): "Test passing in one column with string dtype into virtual file dataset" size = 5 @@ -415,6 +422,10 @@ def test_virtual_from_vectors_one_string_column(): assert output == expected +@pytest.mark.xfail( + condition=gmt_version < Version("6.1.1"), + reason="GMT_Put_Strings only works for GMT 6.1.1 and above", +) def test_virtual_from_vectors_two_string_columns(): "Test passing in two columns of string dtype into virtual file dataset" size = 5 diff --git a/pygmt/tests/test_clib_put_strings.py b/pygmt/tests/test_clib_put_strings.py index 8a04266221c..ceaa8c99d5a 100644 --- a/pygmt/tests/test_clib_put_strings.py +++ b/pygmt/tests/test_clib_put_strings.py @@ -4,12 +4,20 @@ import numpy as np import numpy.testing as npt import pytest +from packaging.version import Version from .. import clib from ..exceptions import GMTCLibError from ..helpers import GMTTempFile +with clib.Session() as _lib: + gmt_version = Version(_lib.info["version"]) + +@pytest.mark.xfail( + condition=gmt_version < Version("6.1.1"), + reason="GMT_Put_Strings only works for GMT 6.1.1 and above", +) def test_put_strings(): "Check that assigning a numpy array of dtype str to a dataset works" with clib.Session() as lib: From 26f51c933bc699821322aa253cb8b19c7d73cad7 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 10 Aug 2020 15:07:44 +1200 Subject: [PATCH 22/27] Fix an incorrect note in virtualfile_from_vectors --- pygmt/clib/session.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 8f616bd04a1..39fe87162fc 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1140,11 +1140,12 @@ def virtualfile_from_vectors(self, *vectors): """ # Conversion to a C-contiguous array needs to be done here and not in - # put_matrix because we need to maintain a reference to the copy while - # it is being used by the C API. Otherwise, the array would be garbage - # collected and the memory freed. Creating it in this context manager - # guarantees that the copy will be around until the virtual file is - # closed. The conversion is implicit in vectors_to_arrays. + # put_vector or put_strings because we need to maintain a reference to + # the copy while it is being used by the C API. Otherwise, the array + # would be garbage collected and the memory freed. Creating it in this + # context manager guarantees that the copy will be around until the + # virtual file is closed. The conversion is implicit in + # vectors_to_arrays. arrays = vectors_to_arrays(vectors) columns = len(arrays) From 726de4489ff0a38a00c4a4eb50c436c75ce139f8 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 10 Aug 2020 15:43:14 +1200 Subject: [PATCH 23/27] Concatenate last string columns instead of allowing arbitrary positions --- pygmt/clib/session.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 39fe87162fc..f2a212da3a9 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1152,12 +1152,10 @@ def virtualfile_from_vectors(self, *vectors): # Find arrays that are of string dtype from column 3 onwards # Assumes that first 2 columns contains coordinates like longitude # latitude, or datetime string types. - str_cols = [ - col + 2 - for col, array in enumerate(arrays[2:]) - if np.issubdtype(array.dtype, np.str_) - ] - columns -= len(str_cols) + for col, array in enumerate(arrays[2:]): + if np.issubdtype(array.dtype, np.str_): + columns = col + 2 + break rows = len(arrays[0]) if not all(len(i) == rows for i in arrays): @@ -1171,14 +1169,13 @@ def virtualfile_from_vectors(self, *vectors): ) # Use put_vector for columns with numerical type data - for col, array in enumerate(arrays): - if col not in str_cols: - self.put_vector(dataset, column=col, vector=array) + for col, array in enumerate(arrays[:columns]): + self.put_vector(dataset, column=col, vector=array) # Use put_strings for last column(s) with string type data # Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings - if str_cols: - string_arrays = [arrays[col] for col in str_cols] + string_arrays = arrays[columns:] + if string_arrays: strings = np.apply_along_axis(func1d=" ".join, axis=0, arr=string_arrays) self.put_strings( dataset, family="GMT_IS_VECTOR|GMT_IS_DUPLICATE", strings=strings From 4c994775e03f01e09701e36b889a24509f818bda Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 10 Aug 2020 16:43:23 +1200 Subject: [PATCH 24/27] Test variable length strings Co-Authored-By: Dongdong Tian --- pygmt/tests/test_clib.py | 6 +++--- pygmt/tests/test_clib_put_strings.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pygmt/tests/test_clib.py b/pygmt/tests/test_clib.py index 3218e5c5e12..1f0cf3cbedc 100644 --- a/pygmt/tests/test_clib.py +++ b/pygmt/tests/test_clib.py @@ -411,7 +411,7 @@ def test_virtual_from_vectors_one_string_column(): size = 5 x = np.arange(size, dtype=np.int32) y = np.arange(size, size * 2, 1, dtype=np.int32) - strings = np.array(["a", "b", "c", "d", "e"], dtype=np.str) + strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=np.str) with clib.Session() as lib: with lib.virtualfile_from_vectors(x, y, strings) as vfile: with GMTTempFile() as outfile: @@ -431,8 +431,8 @@ def test_virtual_from_vectors_two_string_columns(): size = 5 x = np.arange(size, dtype=np.int32) y = np.arange(size, size * 2, 1, dtype=np.int32) - strings1 = np.array(["a", "b", "c", "d", "e"], dtype=np.str) - strings2 = np.array(["f", "g", "h", "i", "j"], dtype=np.str) + strings1 = np.array(["a", "bc", "def", "ghij", "klmno"], dtype=np.str) + strings2 = np.array(["pqrst", "uvwx", "yz!", "@#", "$"], dtype=np.str) with clib.Session() as lib: with lib.virtualfile_from_vectors(x, y, strings1, strings2) as vfile: with GMTTempFile() as outfile: diff --git a/pygmt/tests/test_clib_put_strings.py b/pygmt/tests/test_clib_put_strings.py index ceaa8c99d5a..6fc7885901a 100644 --- a/pygmt/tests/test_clib_put_strings.py +++ b/pygmt/tests/test_clib_put_strings.py @@ -29,7 +29,7 @@ def test_put_strings(): ) x = np.array([1, 2, 3, 4, 5], dtype=np.int32) y = np.array([6, 7, 8, 9, 10], dtype=np.int32) - strings = np.array(["a", "b", "c", "d", "e"], dtype=np.str) + strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=np.str) lib.put_vector(dataset, column=lib["GMT_X"], vector=x) lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) lib.put_strings( @@ -49,7 +49,7 @@ def test_put_strings(): ) # Load the data and check that it's correct newx, newy, newstrings = tmp_file.loadtxt( - unpack=True, dtype=[("x", np.int32), ("y", np.int32), ("text", " Date: Mon, 10 Aug 2020 22:05:55 +1200 Subject: [PATCH 25/27] Replace gmt info with select in test_virtualfile_from_vectors_str_cols --- pygmt/tests/test_clib.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pygmt/tests/test_clib.py b/pygmt/tests/test_clib.py index 1f0cf3cbedc..72f274703a5 100644 --- a/pygmt/tests/test_clib.py +++ b/pygmt/tests/test_clib.py @@ -406,7 +406,7 @@ def test_virtualfile_from_vectors(): condition=gmt_version < Version("6.1.1"), reason="GMT_Put_Strings only works for GMT 6.1.1 and above", ) -def test_virtual_from_vectors_one_string_column(): +def test_virtualfile_from_vectors_one_string_column(): "Test passing in one column with string dtype into virtual file dataset" size = 5 x = np.arange(size, dtype=np.int32) @@ -415,10 +415,9 @@ def test_virtual_from_vectors_one_string_column(): with clib.Session() as lib: with lib.virtualfile_from_vectors(x, y, strings) as vfile: with GMTTempFile() as outfile: - lib.call_module("gmtinfo", f"{vfile} ->{outfile.name}") + lib.call_module("select", f"{vfile} -Vw ->{outfile.name}") output = outfile.read(keep_tabs=True) - bounds = "\t".join([f"<{i.min():.0f}/{i.max():.0f}>" for i in (x, y)]) - expected = f": N = {size}\t{bounds}\n" + expected = "".join(f"{i}\t{j}\t{k}\n" for i, j, k in zip(x, y, strings)) assert output == expected @@ -426,7 +425,7 @@ def test_virtual_from_vectors_one_string_column(): condition=gmt_version < Version("6.1.1"), reason="GMT_Put_Strings only works for GMT 6.1.1 and above", ) -def test_virtual_from_vectors_two_string_columns(): +def test_virtualfile_from_vectors_two_string_columns(): "Test passing in two columns of string dtype into virtual file dataset" size = 5 x = np.arange(size, dtype=np.int32) @@ -436,10 +435,11 @@ def test_virtual_from_vectors_two_string_columns(): with clib.Session() as lib: with lib.virtualfile_from_vectors(x, y, strings1, strings2) as vfile: with GMTTempFile() as outfile: - lib.call_module("gmtinfo", f"{vfile} ->{outfile.name}") + lib.call_module("select", f"{vfile} -Vw ->{outfile.name}") output = outfile.read(keep_tabs=True) - bounds = "\t".join([f"<{i.min():.0f}/{i.max():.0f}>" for i in (x, y)]) - expected = f": N = {size}\t{bounds}\n" + expected = "".join( + f"{h}\t{i}\t{j}\t{k}\n" for h, i, j, k in zip(x, y, strings1, strings2) + ) assert output == expected From fe794c946f50640d85aa64f832895a4d3f439760 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 10 Aug 2020 22:52:21 +1200 Subject: [PATCH 26/27] Fix truncated strings and an incorrect test --- pygmt/clib/session.py | 7 ++++++- pygmt/tests/test_clib.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index f2a212da3a9..ee538f7a50e 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1176,7 +1176,12 @@ def virtualfile_from_vectors(self, *vectors): # Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings string_arrays = arrays[columns:] if string_arrays: - strings = np.apply_along_axis(func1d=" ".join, axis=0, arr=string_arrays) + if len(string_arrays) == 1: + strings = string_arrays[0] + elif len(string_arrays) > 1: + strings = np.apply_along_axis( + func1d=" ".join, axis=0, arr=string_arrays + ) self.put_strings( dataset, family="GMT_IS_VECTOR|GMT_IS_DUPLICATE", strings=strings ) diff --git a/pygmt/tests/test_clib.py b/pygmt/tests/test_clib.py index 72f274703a5..23264f27e48 100644 --- a/pygmt/tests/test_clib.py +++ b/pygmt/tests/test_clib.py @@ -438,7 +438,7 @@ def test_virtualfile_from_vectors_two_string_columns(): lib.call_module("select", f"{vfile} -Vw ->{outfile.name}") output = outfile.read(keep_tabs=True) expected = "".join( - f"{h}\t{i}\t{j}\t{k}\n" for h, i, j, k in zip(x, y, strings1, strings2) + f"{h}\t{i}\t{j} {k}\n" for h, i, j, k in zip(x, y, strings1, strings2) ) assert output == expected From f59d970d2fc1950a59e3a132d8bc16e75c063a1b Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Tue, 11 Aug 2020 08:24:40 +1200 Subject: [PATCH 27/27] Replace gmt select with convert in test_virttualfile_from_vectors_str_cols --- pygmt/tests/test_clib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygmt/tests/test_clib.py b/pygmt/tests/test_clib.py index 23264f27e48..a94c2c578bf 100644 --- a/pygmt/tests/test_clib.py +++ b/pygmt/tests/test_clib.py @@ -415,7 +415,7 @@ def test_virtualfile_from_vectors_one_string_column(): with clib.Session() as lib: with lib.virtualfile_from_vectors(x, y, strings) as vfile: with GMTTempFile() as outfile: - lib.call_module("select", f"{vfile} -Vw ->{outfile.name}") + lib.call_module("convert", f"{vfile} ->{outfile.name}") output = outfile.read(keep_tabs=True) expected = "".join(f"{i}\t{j}\t{k}\n" for i, j, k in zip(x, y, strings)) assert output == expected @@ -435,7 +435,7 @@ def test_virtualfile_from_vectors_two_string_columns(): with clib.Session() as lib: with lib.virtualfile_from_vectors(x, y, strings1, strings2) as vfile: with GMTTempFile() as outfile: - lib.call_module("select", f"{vfile} -Vw ->{outfile.name}") + lib.call_module("convert", f"{vfile} ->{outfile.name}") output = outfile.read(keep_tabs=True) expected = "".join( f"{h}\t{i}\t{j} {k}\n" for h, i, j, k in zip(x, y, strings1, strings2)