From 4b8b14e77aaea5828635f0b8aee8617cf2d6e965 Mon Sep 17 00:00:00 2001 From: Elias <110238618+ESadek-MO@users.noreply.github.com> Date: Mon, 22 Aug 2022 16:51:31 +0100 Subject: [PATCH] Tilde's and wildcard recognition within iris.save. (#4913) * Draft: Tilde's and wildcard can now be read by iris.save. Modifications and tests provisionally done, docstrings still need editing, with examples and doctests needing being added/editing. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Docstrings updated for expand_filespecs and save * docstrings for save and expand_filespecs updated to numpy formatting * Reviewed: Added expansion and relative path tests to expand_filespecs. Ensured list indexing returns single item lists. Minor comment changes. * Updated latest What's New, and added ESadek-MO to common links * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fixed isort conflicts Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/src/common_links.inc | 1 + docs/src/whatsnew/latest.rst | 6 +- lib/iris/io/__init__.py | 153 ++++++++++-------- .../tests/unit/io/test_expand_filespecs.py | 24 +++ lib/iris/tests/unit/io/test_save.py | 6 + 5 files changed, 117 insertions(+), 73 deletions(-) diff --git a/docs/src/common_links.inc b/docs/src/common_links.inc index 7ae2463ca9..ec7e1efd6d 100644 --- a/docs/src/common_links.inc +++ b/docs/src/common_links.inc @@ -53,6 +53,7 @@ .. _@cpelley: https://github.com/cpelley .. _@djkirkham: https://github.com/djkirkham .. _@DPeterK: https://github.com/DPeterK +.. _@ESadek-MO: https://github.com/ESadek-MO .. _@esc24: https://github.com/esc24 .. _@jamesp: https://github.com/jamesp .. _@jonseddon: https://github.com/jonseddon diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 215e7d34f0..b94cd11517 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -25,13 +25,15 @@ This document explains the changes made to Iris for this release 📢 Announcements ================ -#. N/A +#. Welcome to `@ESadek-MO`_ who made their first contribution to Iris 🎉 ✨ Features =========== -#. N/A +#. `@ESadek-MO`_ edited :func:`~iris.io.expand_filespecs` to allow expansion of + non-existing paths, and added expansion functionality to :func:`~iris.io.save`. + (:issue:`4772`, :pull:`4913`) 🐛 Bugs Fixed diff --git a/lib/iris/io/__init__.py b/lib/iris/io/__init__.py index 8d5a2e05d2..4659f70ae3 100644 --- a/lib/iris/io/__init__.py +++ b/lib/iris/io/__init__.py @@ -131,20 +131,26 @@ def decode_uri(uri, default="file"): return scheme, part -def expand_filespecs(file_specs): +def expand_filespecs(file_specs, files_expected=True): """ Find all matching file paths from a list of file-specs. - Args: - - * file_specs (iterable of string): - File paths which may contain '~' elements or wildcards. - - Returns: - A well-ordered list of matching absolute file paths. - If any of the file-specs match no existing files, an - exception is raised. - + Parameters + ---------- + file_specs : iterable of str + File paths which may contain ``~`` elements or wildcards. + files_expected : bool, default=True + Whether file is expected to exist (i.e. for load). + + Returns + ------- + list of str + if files_expected is ``True``: + A well-ordered list of matching absolute file paths. + If any of the file-specs match no existing files, an + exception is raised. + if files_expected is ``False``: + A list of expanded file paths. """ # Remove any hostname component - currently unused filenames = [ @@ -154,26 +160,30 @@ def expand_filespecs(file_specs): for fn in file_specs ] - # Try to expand all filenames as globs - glob_expanded = OrderedDict( - [[fn, sorted(glob.glob(fn))] for fn in filenames] - ) - - # If any of the specs expanded to an empty list then raise an error - all_expanded = glob_expanded.values() - - if not all(all_expanded): - msg = "One or more of the files specified did not exist:" - for pattern, expanded in glob_expanded.items(): - if expanded: - msg += '\n - "{}" matched {} file(s)'.format( - pattern, len(expanded) - ) - else: - msg += '\n * "{}" didn\'t match any files'.format(pattern) - raise IOError(msg) + if files_expected: + # Try to expand all filenames as globs + glob_expanded = OrderedDict( + [[fn, sorted(glob.glob(fn))] for fn in filenames] + ) - return [fname for fnames in all_expanded for fname in fnames] + # If any of the specs expanded to an empty list then raise an error + all_expanded = glob_expanded.values() + if not all(all_expanded): + msg = "One or more of the files specified did not exist:" + for pattern, expanded in glob_expanded.items(): + if expanded: + msg += '\n - "{}" matched {} file(s)'.format( + pattern, len(expanded) + ) + else: + msg += '\n * "{}" didn\'t match any files'.format( + pattern + ) + raise IOError(msg) + result = [fname for fnames in all_expanded for fname in fnames] + else: + result = filenames + return result def load_files(filenames, callback, constraints=None): @@ -356,65 +366,64 @@ def save(source, target, saver=None, **kwargs): A custom saver can be provided to the function to write to a different file format. - Args: - - * source: - :class:`iris.cube.Cube`, :class:`iris.cube.CubeList` or - sequence of cubes. - * target: - A filename (or writeable, depending on file format). + Parameters + ---------- + source : :class:`iris.cube.Cube` or :class:`iris.cube.CubeList` + target : str or pathlib.PurePath or io.TextIOWrapper When given a filename or file, Iris can determine the - file format. Filename can be given as a string or - :class:`pathlib.PurePath`. - - Kwargs: - - * saver: - Optional. Specifies the file format to save. + file format. + saver : str or function, optional + Specifies the file format to save. If omitted, Iris will attempt to determine the format. - If a string, this is the recognised filename extension (where the actual filename may not have it). + Otherwise the value is a saver function, of the form: ``my_saver(cube, target)`` plus any custom keywords. It is assumed that a saver will accept an ``append`` keyword - if it's file format can handle multiple cubes. See also + if its file format can handle multiple cubes. See also :func:`iris.io.add_saver`. + **kwargs : dict, optional + All other keywords are passed through to the saver function; see the + relevant saver documentation for more information on keyword arguments. - All other keywords are passed through to the saver function; see the - relevant saver documentation for more information on keyword arguments. - - Examples:: + Warnings + -------- + Saving a cube whose data has been loaded lazily + (if `cube.has_lazy_data()` returns `True`) to the same file it expects + to load data from will cause both the data in-memory and the data on + disk to be lost. - # Save a cube to PP - iris.save(my_cube, "myfile.pp") + .. code-block:: python - # Save a cube list to a PP file, appending to the contents of the file - # if it already exists - iris.save(my_cube_list, "myfile.pp", append=True) + cube = iris.load_cube("somefile.nc") + # The next line causes data loss in 'somefile.nc' and the cube. + iris.save(cube, "somefile.nc") - # Save a cube to netCDF, defaults to NETCDF4 file format - iris.save(my_cube, "myfile.nc") + In general, overwriting a file which is the source for any lazily loaded + data can result in corruption. Users should proceed with caution when + attempting to overwrite an existing file. - # Save a cube list to netCDF, using the NETCDF3_CLASSIC storage option - iris.save(my_cube_list, "myfile.nc", netcdf_format="NETCDF3_CLASSIC") + Examples + -------- + >>> # Setting up + >>> import iris + >>> my_cube = iris.load_cube(iris.sample_data_path('air_temp.pp')) + >>> my_cube_list = iris.load(iris.sample_data_path('space_weather.nc')) - .. warning:: + >>> # Save a cube to PP + >>> iris.save(my_cube, "myfile.pp") - Saving a cube whose data has been loaded lazily - (if `cube.has_lazy_data()` returns `True`) to the same file it expects - to load data from will cause both the data in-memory and the data on - disk to be lost. + >>> # Save a cube list to a PP file, appending to the contents of the file + >>> # if it already exists + >>> iris.save(my_cube_list, "myfile.pp", append=True) - .. code-block:: python + >>> # Save a cube to netCDF, defaults to NETCDF4 file format + >>> iris.save(my_cube, "myfile.nc") - cube = iris.load_cube("somefile.nc") - # The next line causes data loss in 'somefile.nc' and the cube. - iris.save(cube, "somefile.nc") + >>> # Save a cube list to netCDF, using the NETCDF3_CLASSIC storage option + >>> iris.save(my_cube_list, "myfile.nc", netcdf_format="NETCDF3_CLASSIC") - In general, overwriting a file which is the source for any lazily loaded - data can result in corruption. Users should proceed with caution when - attempting to overwrite an existing file. """ from iris.cube import Cube, CubeList @@ -423,6 +432,8 @@ def save(source, target, saver=None, **kwargs): if isinstance(target, pathlib.PurePath): target = str(target) if isinstance(target, str) and saver is None: + # Converts tilde or wildcards to absolute path + (target,) = expand_filespecs([str(target)], False) saver = find_saver(target) elif hasattr(target, "name") and saver is None: saver = find_saver(target.name) diff --git a/lib/iris/tests/unit/io/test_expand_filespecs.py b/lib/iris/tests/unit/io/test_expand_filespecs.py index 0299a415b4..8720478153 100644 --- a/lib/iris/tests/unit/io/test_expand_filespecs.py +++ b/lib/iris/tests/unit/io/test_expand_filespecs.py @@ -10,6 +10,7 @@ import iris.tests as tests # isort:skip import os +from pathlib import Path import shutil import tempfile import textwrap @@ -96,6 +97,29 @@ def test_files_and_none(self): self.assertMultiLineEqual(str(err.exception), expected) + def test_false_bool_absolute(self): + tempdir = self.tmpdir + msg = os.path.join(tempdir, "no_exist.txt") + (result,) = iio.expand_filespecs([msg], False) + self.assertEqual(result, msg) + + def test_false_bool_home(self): + # ensure that not only does files_expected not error, + # but that the path is still expanded from a ~ + msg = str(Path().home() / "no_exist.txt") + (result,) = iio.expand_filespecs(["~/no_exist.txt"], False) + self.assertEqual(result, msg) + + def test_false_bool_relative(self): + cwd = os.getcwd() + try: + os.chdir(self.tmpdir) + item_out = iio.expand_filespecs(["no_exist.txt"], False) + item_in = [os.path.join(self.tmpdir, "no_exist.txt")] + self.assertEqual(item_out, item_in) + finally: + os.chdir(cwd) + if __name__ == "__main__": tests.main() diff --git a/lib/iris/tests/unit/io/test_save.py b/lib/iris/tests/unit/io/test_save.py index b92e26f2d1..623cf417f2 100755 --- a/lib/iris/tests/unit/io/test_save.py +++ b/lib/iris/tests/unit/io/test_save.py @@ -26,6 +26,12 @@ def test_pathlib_save(self): "iris.io.find_saver", return_value=(lambda *args, **kwargs: None) ) + def replace_expand(file_specs, files_expected=True): + return file_specs + + # does not expand filepaths due to patch + self.patch("iris.io.expand_filespecs", replace_expand) + test_variants = [ ("string", "string"), (Path("string/string"), "string/string"),