diff --git a/doc/install.rst b/doc/install.rst
index 9831ca4f..25df62f5 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -23,6 +23,7 @@ Dependencies
* `xarray `__
* `pandas `__
* `rasterio `__
+* `dask `__
Most of the examples in the :ref:`gallery` also use:
diff --git a/environment.yml b/environment.yml
index f42dc853..db830d26 100644
--- a/environment.yml
+++ b/environment.yml
@@ -9,6 +9,7 @@ dependencies:
- xarray
- pandas
- rasterio
+ - dask
# Development requirements
- matplotlib
- cmocean
diff --git a/requirements.txt b/requirements.txt
index 7adef2e1..26e0598c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@ pooch>=0.5
xarray
pandas
rasterio
+dask
diff --git a/rockhound/bedmap2.py b/rockhound/bedmap2.py
index 865df880..6c4f2eb6 100644
--- a/rockhound/bedmap2.py
+++ b/rockhound/bedmap2.py
@@ -24,7 +24,7 @@
}
-def fetch_bedmap2(datasets, *, load=True):
+def fetch_bedmap2(datasets, *, load=True, chunks=1000, **kwargs):
"""
Fetch the Bedmap2 datasets for Antarctica.
@@ -55,8 +55,11 @@ def fetch_bedmap2(datasets, *, load=True):
relative to EIGEN-GL04C geoid (to convert back to WGS84, add this grid)
.. warning ::
- Loading a great number of datasets may require a fair amount of memory that
- could crash your system. We recommend loading only the needed datasets.
+ Loading datasets into memory may require a fair amount of memory.
+ In order to prevent this, the function loads the datasets as Dask arrays if
+ ``chunks`` is not ``None``.
+ Be careful when doing operations that loads the entire datasets into memory,
+ like plotting or performing some computations.
.. warning ::
Loading any dataset along with ``thickness_uncertainty_5km`` would modify the
@@ -70,6 +73,14 @@ def fetch_bedmap2(datasets, *, load=True):
Wether to load the data into an :class:`xarray.Dataset` or just return the
path to the downloaded data tiff files. If False, will return a list with the
paths to the files corresponding to *datasets*.
+ chunks : int, tuple or dict
+ Chunk sizes along each dimension. This argument is passed to the
+ :func:`xarray.open_rasterio` function in order to obtain
+ `Dask arrays `_ inside the
+ returned :class:`xarray.Dataset`.
+ This helps to read the dataset without loading it entirely into memory.
+ **kwargs
+ Extra parameters passed to the :func:`xarray.open_rasterio` function.
Returns
-------
@@ -88,7 +99,7 @@ def fetch_bedmap2(datasets, *, load=True):
return [get_fname(dataset, fnames) for dataset in datasets]
arrays = []
for dataset in datasets:
- array = xr.open_rasterio(get_fname(dataset, fnames))
+ array = xr.open_rasterio(get_fname(dataset, fnames), chunks=chunks, **kwargs)
# Replace no data values with nans
array = array.where(array != array.nodatavals)
# Remove "band" dimension and coordinate