Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split tests related to virtualfiles into multiple test files #3512

Merged
merged 6 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions pygmt/tests/test_clib_inquire_virtualfile.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we keep test_inquire_virtualfile in test_clib_virtualfiles.py? I'd like to keep that file so that we can still look back at the git history more easily.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I've renamed test_clib_open_virtualfiles.py back to test_clib_virtualfiles.py.

For inquire_virtualfile, I plan to add more tests about GMT_IN in the future.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, that works too, thanks!

Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""
Test the Session.inquire_virtualfile method.
"""

from pygmt import clib


def test_inquire_virtualfile():
"""
Test that the inquire_virtualfile method returns the correct family.

Currently, only output virtual files are tested.
"""
with clib.Session() as lib:
for family in [
"GMT_IS_DATASET",
"GMT_IS_DATASET|GMT_VIA_MATRIX",
"GMT_IS_DATASET|GMT_VIA_VECTOR",
]:
with lib.open_virtualfile(
family, "GMT_IS_PLP", "GMT_OUT|GMT_IS_REFERENCE", None
) as vfile:
assert lib.inquire_virtualfile(vfile) == lib["GMT_IS_DATASET"]

for family, geometry in [
("GMT_IS_GRID", "GMT_IS_SURFACE"),
("GMT_IS_IMAGE", "GMT_IS_SURFACE"),
("GMT_IS_CUBE", "GMT_IS_VOLUME"),
("GMT_IS_PALETTE", "GMT_IS_NONE"),
("GMT_IS_POSTSCRIPT", "GMT_IS_NONE"),
]:
with lib.open_virtualfile(family, geometry, "GMT_OUT", None) as vfile:
assert lib.inquire_virtualfile(vfile) == lib[family]
54 changes: 54 additions & 0 deletions pygmt/tests/test_clib_virtualfile_from_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
Test the Session.virtualfile_from_matrix method.
"""

import numpy as np
import pytest
from pygmt import clib
from pygmt.helpers import GMTTempFile


@pytest.fixture(scope="module", name="dtypes")
def fixture_dtypes():
"""
List of supported numpy dtypes.
"""
return "int8 int16 int32 int64 uint8 uint16 uint32 uint64 float32 float64".split()


@pytest.mark.benchmark
def test_virtualfile_from_matrix(dtypes):
"""
Test transforming a matrix to virtual file dataset.
"""
shape = (7, 5)
for dtype in dtypes:
data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
with clib.Session() as lib:
with lib.virtualfile_from_matrix(data) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
bounds = "\t".join([f"<{col.min():.0f}/{col.max():.0f}>" for col in data.T])
expected = f"<matrix memory>: N = {shape[0]}\t{bounds}\n"
assert output == expected


def test_virtualfile_from_matrix_slice(dtypes):
"""
Test transforming a slice of a larger array to virtual file dataset.
"""
shape = (10, 6)
for dtype in dtypes:
full_data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
rows = 5
cols = 3
data = full_data[:rows, :cols]
with clib.Session() as lib:
with lib.virtualfile_from_matrix(data) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
bounds = "\t".join([f"<{col.min():.0f}/{col.max():.0f}>" for col in data.T])
expected = f"<matrix memory>: N = {rows}\t{bounds}\n"
assert output == expected
106 changes: 106 additions & 0 deletions pygmt/tests/test_clib_virtualfile_from_stringio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""
Test the Session.virtualfile_from_stringio method.
"""

import io

import numpy as np
from pygmt import clib


def _stringio_to_dataset(data: io.StringIO):
"""
A helper function for check the virtualfile_from_stringio method.

The function does the following:

1. Creates a virtual file from the input StringIO object.
2. Pass the virtual file to the ``read`` module, which reads the virtual file and
writes it to another virtual file.
3. Reads the output virtual file as a GMT_DATASET object.
4. Extracts the header and the trailing text from the dataset and returns it as a
string.
"""
with clib.Session() as lib:
with (
lib.virtualfile_from_stringio(data) as vintbl,
lib.virtualfile_out(kind="dataset") as vouttbl,
):
lib.call_module("read", args=[vintbl, vouttbl, "-Td"])
ds = lib.read_virtualfile(vouttbl, kind="dataset").contents

output = []
table = ds.table[0].contents
for segment in table.segment[: table.n_segments]:
seg = segment.contents
output.append(f"> {seg.header.decode()}" if seg.header else ">")
output.extend(np.char.decode(seg.text[: seg.n_rows]))
return "\n".join(output) + "\n"


def test_virtualfile_from_stringio():
"""
Test the virtualfile_from_stringio method.
"""
data = io.StringIO(
"# Comment\n"
"H 24p Legend\n"
"N 2\n"
"S 0.1i c 0.15i p300/12 0.25p 0.3i My circle\n"
)
expected = (
">\n" "H 24p Legend\n" "N 2\n" "S 0.1i c 0.15i p300/12 0.25p 0.3i My circle\n"
)
assert _stringio_to_dataset(data) == expected


def test_one_segment():
"""
Test the virtualfile_from_stringio method with one segment.
"""
data = io.StringIO(
"# Comment\n"
"> Segment 1\n"
"1 2 3 ABC\n"
"4 5 DE\n"
"6 7 8 9 FGHIJK LMN OPQ\n"
"RSTUVWXYZ\n"
)
expected = (
"> Segment 1\n"
"1 2 3 ABC\n"
"4 5 DE\n"
"6 7 8 9 FGHIJK LMN OPQ\n"
"RSTUVWXYZ\n"
)
assert _stringio_to_dataset(data) == expected


def test_multiple_segments():
"""
Test the virtualfile_from_stringio method with multiple segments.
"""
data = io.StringIO(
"# Comment line 1\n"
"# Comment line 2\n"
"> Segment 1\n"
"1 2 3 ABC\n"
"4 5 DE\n"
"6 7 8 9 FG\n"
"# Comment line 3\n"
"> Segment 2\n"
"1 2 3 ABC\n"
"4 5 DE\n"
"6 7 8 9 FG\n"
)
expected = (
"> Segment 1\n"
"1 2 3 ABC\n"
"4 5 DE\n"
"6 7 8 9 FG\n"
"> Segment 2\n"
"1 2 3 ABC\n"
"4 5 DE\n"
"6 7 8 9 FG\n"
)
assert _stringio_to_dataset(data) == expected
172 changes: 172 additions & 0 deletions pygmt/tests/test_clib_virtualfile_from_vectors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
"""
Test the Session.virtualfile_from_vectors method.
"""

from importlib.util import find_spec

import numpy as np
import pandas as pd
import pytest
from pygmt import clib
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import GMTTempFile


@pytest.fixture(scope="module", name="dtypes")
def fixture_dtypes():
"""
List of supported numpy dtypes.
"""
return "int8 int16 int32 int64 uint8 uint16 uint32 uint64 float32 float64".split()


@pytest.fixture(scope="module", name="dtypes_pandas")
def fixture_dtypes_pandas(dtypes):
"""
List of supported pandas dtypes.
"""
dtypes_pandas = dtypes.copy()

if find_spec("pyarrow") is not None:
dtypes_pandas.extend([f"{dtype}[pyarrow]" for dtype in dtypes_pandas])

return tuple(dtypes_pandas)


@pytest.mark.benchmark
def test_virtualfile_from_vectors(dtypes):
"""
Test the automation for transforming vectors to virtual file dataset.
"""
size = 10
for dtype in dtypes:
x = np.arange(size, dtype=dtype)
y = np.arange(size, size * 2, 1, dtype=dtype)
z = np.arange(size * 2, size * 3, 1, dtype=dtype)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, z) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
bounds = "\t".join([f"<{i.min():.0f}/{i.max():.0f}>" for i in (x, y, z)])
expected = f"<vector memory>: N = {size}\t{bounds}\n"
assert output == expected


@pytest.mark.benchmark
@pytest.mark.parametrize("dtype", [str, object])
def test_virtualfile_from_vectors_one_string_or_object_column(dtype):
"""
Test passing in one column with string or object dtype into virtual file dataset.
"""
size = 5
x = np.arange(size, dtype=np.int32)
y = np.arange(size, size * 2, 1, dtype=np.int32)
strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=dtype)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, strings) as vfile:
with GMTTempFile() as outfile:
lib.call_module("convert", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
expected = "".join(
f"{i}\t{j}\t{k}\n" for i, j, k in zip(x, y, strings, strict=True)
)
assert output == expected


@pytest.mark.parametrize("dtype", [str, object])
def test_virtualfile_from_vectors_two_string_or_object_columns(dtype):
"""
Test passing in two columns of string or object dtype into virtual file dataset.
"""
size = 5
x = np.arange(size, dtype=np.int32)
y = np.arange(size, size * 2, 1, dtype=np.int32)
# Catch bug in https://github.com/GenericMappingTools/pygmt/pull/2719
strings1 = np.array(["a", "bc", "def", "ghij", "klmnolooong"], dtype=dtype)
strings2 = np.array(["pqrst", "uvwx", "yz!", "@#", "$"], dtype=dtype)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, strings1, strings2) as vfile:
with GMTTempFile() as outfile:
lib.call_module("convert", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
expected = "".join(
f"{h}\t{i}\t{j} {k}\n"
for h, i, j, k in zip(x, y, strings1, strings2, strict=True)
)
assert output == expected


def test_virtualfile_from_vectors_transpose(dtypes):
"""
Test transforming matrix columns to virtual file dataset.
"""
shape = (7, 5)
for dtype in dtypes:
data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(*data.T) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, "-C", f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
bounds = "\t".join([f"{col.min():.0f}\t{col.max():.0f}" for col in data.T])
expected = f"{bounds}\n"
assert output == expected


def test_virtualfile_from_vectors_diff_size():
"""
Test the function fails for arrays of different sizes.
"""
x = np.arange(5)
y = np.arange(6)
with clib.Session() as lib:
with pytest.raises(GMTInvalidInput):
with lib.virtualfile_from_vectors(x, y):
pass


def test_virtualfile_from_vectors_pandas(dtypes_pandas):
"""
Pass vectors to a dataset using pandas.Series, checking both numpy and pyarrow
dtypes.
"""
size = 13

for dtype in dtypes_pandas:
data = pd.DataFrame(
data={
"x": np.arange(size),
"y": np.arange(size, size * 2, 1),
"z": np.arange(size * 2, size * 3, 1),
},
dtype=dtype,
)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(data.x, data.y, data.z) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
bounds = "\t".join(
[f"<{i.min():.0f}/{i.max():.0f}>" for i in (data.x, data.y, data.z)]
)
expected = f"<vector memory>: N = {size}\t{bounds}\n"
assert output == expected


def test_virtualfile_from_vectors_arraylike():
"""
Pass array-like vectors to a dataset.
"""
size = 13
x = list(range(0, size, 1))
y = tuple(range(size, size * 2, 1))
z = range(size * 2, size * 3, 1)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, z) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
bounds = "\t".join([f"<{min(i):.0f}/{max(i):.0f}>" for i in (x, y, z)])
expected = f"<vector memory>: N = {size}\t{bounds}\n"
assert output == expected
Loading