Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NO NOT MERGE before cfdm 1.9.1.0] dask: compression refactor #354

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@

# Check the version of cfdm
_minimum_vn = "1.9.0.1"
_maximum_vn = "1.9.1.0"
_maximum_vn = "1.9.2.0"
_cfdm_version = LooseVersion(cfdm.__version__)
if not LooseVersion(_minimum_vn) <= _cfdm_version < LooseVersion(_maximum_vn):
raise RuntimeError(
Expand Down Expand Up @@ -225,6 +225,7 @@
RaggedContiguousArray,
RaggedIndexedArray,
RaggedIndexedContiguousArray,
SubsampledArray,
)

from .aggregate import aggregate
Expand Down
2 changes: 1 addition & 1 deletion cf/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2528,7 +2528,7 @@ def _get_hfl(
if d._pmsize == 1:
partition = d.partitions.matrix.item()
if not partition.part:
key = getattr(partition.subarray, "file_pointer", None)
key = getattr(partition.subarray, "file_address", None)
if key is not None:
hash_value = hfl_cache.hash.get(key, None)
create_hash = hash_value is None
Expand Down
9 changes: 5 additions & 4 deletions cf/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
from .raggedcontiguousarray import RaggedContiguousArray
from .raggedindexedarray import RaggedIndexedArray
from .raggedindexedcontiguousarray import RaggedIndexedContiguousArray
from .subsampledarray import SubsampledArray

from .gatheredsubarray import GatheredSubarray
from .raggedcontiguoussubarray import RaggedContiguousSubarray
from .raggedindexedsubarray import RaggedIndexedSubarray
from .raggedindexedcontiguoussubarray import RaggedIndexedContiguousSubarray
# from .gatheredsubarray import GatheredSubarray
# from .raggedcontiguoussubarray import RaggedContiguousSubarray
# from .raggedindexedsubarray import RaggedIndexedSubarray
# from .raggedindexedcontiguoussubarray import RaggedIndexedContiguousSubarray

from .data import Data
3 changes: 0 additions & 3 deletions cf/data/abstract/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
from .array import Array

# from .compressedarray import CompressedArray
from .compressedsubarray import CompressedSubarray
from .filearray import FileArray
19 changes: 3 additions & 16 deletions cf/data/abstract/array.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import cfdm

from ..mixin import ArrayMixin

class Array(cfdm.Array):

class Array(ArrayMixin, cfdm.Array):
"""Abstract base class for a container of an underlying array.

The form of the array is defined by the initialization parameters
Expand All @@ -10,18 +12,3 @@ class Array(cfdm.Array):
.. versionadded:: 3.0.0

"""

def __repr__(self):
"""Called by the `repr` built-in function.

x.__repr__() <==> repr(x)

.. versionadded:: 3.0.0

"""
return super().__repr__().replace("<", "<CF ", 1)

@property
def dask_asarray(self):
"""TODODASK."""
return False
166 changes: 33 additions & 133 deletions cf/data/abstract/filearray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,149 +3,38 @@


class FileArray(Array):
"""A sub-array stored in a file.

.. note:: Subclasses must define the following methods:
`!__getitem__`, `!__str__`, `!close` and `!open`.

"""
"""An array stored in a file."""

def __getitem__(self, indices):
"""TODO."""
pass

def __str__(self):
"""x.__str__() <==> str(x)"""
return f"{self.shape} in {self.file}"

# ----------------------------------------------------------------
# Dask attributes
# ----------------------------------------------------------------
@property
def dask_lock(self):
return getattr(self._get_Array(), "dask_lock", False)

@property
def dask_asarray(self):
return False

# ----------------------------------------------------------------
# Attributes
# ----------------------------------------------------------------
@property
def dtype(self):
"""Data-type of the data elements.
"""Return a subspace of the array.

**Examples:**
x.__getitem__(indices) <==> x[indices]

>>> a.dtype
dtype('float64')
>>> print(type(a.dtype))
<type 'numpy.dtype'>
Returns a subspace of the array as an independent numpy array.

"""
return self._get_component("dtype")
raise NotImplementedError(
f"Must implement {self.__class__.__name__}.__getitem__"
) # pragma: no cover

@property
def ndim(self):
"""Number of array dimensions.

**Examples:**

>>> a.shape
(73, 96)
>>> a.ndim
2
>>> a.size
7008

>>> a.shape
(1, 1, 1)
>>> a.ndim
3
>>> a.size
1

>>> a.shape
()
>>> a.ndim
0
>>> a.size
1

"""
return self._get_component("ndim")
def __str__(self):
"""x.__str__() <==> str(x)"""
return f"<{self.__class__.__name__}: {self.shape} in {self.file}>"

@property
def shape(self):
"""Tuple of array dimension sizes.

**Examples:**

>>> a.shape
(73, 96)
>>> a.ndim
2
>>> a.size
7008

>>> a.shape
(1, 1, 1)
>>> a.ndim
3
>>> a.size
1

>>> a.shape
()
>>> a.ndim
0
>>> a.size
1
def _lock(self):
"""TODODASK.

"""
return self._get_component("shape")
Concurrent reads are assumed to be supported.

@property
def size(self):
"""Number of elements in the array.

**Examples:**

>>> a.shape
(73, 96)
>>> a.size
7008
>>> a.ndim
2

>>> a.shape
(1, 1, 1)
>>> a.ndim
3
>>> a.size
1

>>> a.shape
()
>>> a.ndim
0
>>> a.size
1
.. versionadded:: (cfdm) 1.9.TODO.0

"""
return self._get_component("size")
return False

@property
def filename(self):
"""The name of the file containing the array.

**Examples:**

>>> a.filename()
'file.nc'

"""
"""The name of the file containing the array."""
return self._get_component("filename")

@property
Expand All @@ -157,7 +46,7 @@ def array(self):
`numpy.ndarray`
An independent numpy array of the data.

**Examples:**
**Examples**

>>> n = numpy.asanyarray(a)
>>> isinstance(n, numpy.ndarray)
Expand All @@ -166,6 +55,12 @@ def array(self):
"""
return self[...]

def close(self):
"""Close the dataset containing the data."""
raise NotImplementedError(
f"Must implement {self.__class__.__name__}.close"
) # pragma: no cover

def inspect(self):
"""Inspect the object for debugging.

Expand All @@ -181,16 +76,21 @@ def inspect(self):
def get_filename(self):
"""Return the name of the file containing the array.

**Examples:**
:Returns:

`str`
The file name.

**Examples**

>>> a.get_filename()
'file.nc'

"""
return self._get_component("filename")

def close(self):
pass

def open(self):
pass
"""Returns an open dataset containing the data array."""
raise NotImplementedError(
f"Must implement {self.__class__.__name__}.open"
) # pragma: no cover
Loading