From fcc4c13169cd81baa9a6205b9f895d5d9c5c65c5 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 28 Mar 2022 15:47:25 +0100 Subject: [PATCH 1/3] get_filenames --- cf/data/__init__.py | 2 ++ cf/data/data.py | 45 +++++++++++++++++++++++++------------------- cf/test/test_Data.py | 3 +++ 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/cf/data/__init__.py b/cf/data/__init__.py index 6a28aa2c46..4d80e10a5e 100644 --- a/cf/data/__init__.py +++ b/cf/data/__init__.py @@ -1,3 +1,5 @@ +from .abstract import FileArray + from .cachedarray import CachedArray from .netcdfarray import NetCDFArray from .umarray import UMArray diff --git a/cf/data/data.py b/cf/data/data.py index c93d48f7d4..8bece7e1fa 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -47,6 +47,7 @@ from ..mixin_container import Container from ..units import Units from . import ( # GatheredSubarray,; RaggedContiguousSubarray,; RaggedIndexedContiguousSubarray,; RaggedIndexedSubarray, + FileArray, NetCDFArray, UMArray, ) @@ -9466,37 +9467,43 @@ def insert_dimension(self, position=0, inplace=False): return d + @daskified(_DASKIFIED_VERBOSE) def get_filenames(self): """Return the names of files containing parts of the data array. :Returns: `set` - The file names in normalized, absolute form. If the data - is are memory then an empty `set` is returned. + The file names in normalized, absolute form. If the + data is are memory then an empty `set` is returned. - **Examples:** + **Examples** + + >>> f = cf.NetCDFArray(TODODASK) + >>> d = cf.Data(f) + >>> d.get_filenames() + {TODODASK} - >>> f = cf.read('../file[123]')[0] - >>> f.get_filenames() - {'/data/user/file1', - '/data/user/file2', - '/data/user/file3'} - >>> a = f.array - >>> f.get_filenames() + >>> d = cf.Data([1, 2, 3]) + >>> d.get_filenames() set() """ - print("TODODASK - is this still possible?") - out = set( - [ - abspath(p.subarray.get_filename()) - for p in self.partitions.matrix.flat - if p.in_file - ] - ) - out.discard(None) + out = set() + dx = self._get_dask() + hlg = dx.dask + dsk = hlg.to_dict() + for key, value in hlg.get_all_dependencies().items(): + if value: + continue + + # This key has no dependencies, and so is raw data. + a = dsk[key] + if isinstance(a, FileArray): + out.add(abspath(a.get_filename())) + + out.discard(None) return out @daskified(_DASKIFIED_VERBOSE) diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 7e991b8f68..90994d321a 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -3927,6 +3927,9 @@ def test_Data_set_units(self): with self.assertRaises(ValueError): d.set_units("km") + def test_Data_get_filenames(self): + raise ValueError("need updated NetCDFArray to test") + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) From 2d960c74835fa270b9852b37c5fbf2787d8c3cb6 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 1 Apr 2022 15:51:22 +0100 Subject: [PATCH 2/3] Typo Co-authored-by: Sadie L. Bartholomew --- cf/data/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf/data/data.py b/cf/data/data.py index 8bece7e1fa..9f011b74be 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -9475,7 +9475,7 @@ def get_filenames(self): `set` The file names in normalized, absolute form. If the - data is are memory then an empty `set` is returned. + data is in memory then an empty `set` is returned. **Examples** From bb8739ceb41cb5b8371df260ab912b48a712b055 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 1 Apr 2022 15:55:01 +0100 Subject: [PATCH 3/3] skip get_filenames test --- cf/test/test_Data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 90994d321a..a3d873690f 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -3927,8 +3927,9 @@ def test_Data_set_units(self): with self.assertRaises(ValueError): d.set_units("km") + @unittest.skipIf(TEST_DASKIFIED_ONLY, "Needs updated NetCDFArray to test") def test_Data_get_filenames(self): - raise ValueError("need updated NetCDFArray to test") + pass if __name__ == "__main__":