From 9deadb73064e6a11999bed0ae53d548206934cf9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 1 Aug 2022 20:48:06 -0600 Subject: [PATCH 01/20] Add Kvikio backend entrypoint --- cupy_xarray/kvikio.py | 200 ++++++++++++++++++++++++++++++++++++++++++ setup.py | 3 + 2 files changed, 203 insertions(+) create mode 100644 cupy_xarray/kvikio.py diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py new file mode 100644 index 0000000..313a249 --- /dev/null +++ b/cupy_xarray/kvikio.py @@ -0,0 +1,200 @@ +import os + +import cupy as cp +import numpy as np +import zarr +from xarray import Variable +from xarray.backends import zarr as zarr_backend +from xarray.backends.common import _normalize_path # TODO: can this be public +from xarray.backends.store import StoreBackendEntrypoint +from xarray.backends.zarr import ZarrArrayWrapper, ZarrBackendEntrypoint, ZarrStore +from xarray.core import indexing +from xarray.core.utils import close_on_error # TODO: can this be public. + +try: + import kvikio.zarr + + has_kvikio = True +except ImportError: + has_kvikio = False + + +class CupyZarrArrayWrapper(ZarrArrayWrapper): + def __array__(self): + return self.get_array() + + +class EagerCupyZarrArrayWrapper(ZarrArrayWrapper): + """Used to wrap dimension coordinates.""" + + def __array__(self): + return self.datastore.zarr_group[self.variable_name][:].get() + + def get_array(self): + return np.asarray(self) + + +class GDSZarrStore(ZarrStore): + @classmethod + def open_group( + cls, + store, + mode="r", + synchronizer=None, + group=None, + consolidated=False, + consolidate_on_close=False, + chunk_store=None, + storage_options=None, + append_dim=None, + write_region=None, + safe_chunks=True, + stacklevel=2, + ): + + # zarr doesn't support pathlib.Path objects yet. zarr-python#601 + if isinstance(store, os.PathLike): + store = os.fspath(store) + + open_kwargs = dict( + mode=mode, + synchronizer=synchronizer, + path=group, + ########## NEW STUFF + meta_array=cp.empty(()), + ) + open_kwargs["storage_options"] = storage_options + + # TODO: handle consolidated + assert not consolidated + + if chunk_store: + open_kwargs["chunk_store"] = chunk_store + if consolidated is None: + consolidated = False + + store = kvikio.zarr.GDSStore(store) + + if consolidated is None: + try: + zarr_group = zarr.open_consolidated(store, **open_kwargs) + except KeyError: + warnings.warn( + "Failed to open Zarr store with consolidated metadata, " + "falling back to try reading non-consolidated metadata. " + "This is typically much slower for opening a dataset. " + "To silence this warning, consider:\n" + "1. Consolidating metadata in this existing store with " + "zarr.consolidate_metadata().\n" + "2. Explicitly setting consolidated=False, to avoid trying " + "to read consolidate metadata, or\n" + "3. Explicitly setting consolidated=True, to raise an " + "error in this case instead of falling back to try " + "reading non-consolidated metadata.", + RuntimeWarning, + stacklevel=stacklevel, + ) + zarr_group = zarr.open_group(store, **open_kwargs) + elif consolidated: + # TODO: an option to pass the metadata_key keyword + zarr_group = zarr.open_consolidated(store, **open_kwargs) + else: + zarr_group = zarr.open_group(store, **open_kwargs) + + return cls( + zarr_group, + mode, + consolidate_on_close, + append_dim, + write_region, + safe_chunks, + ) + + def open_store_variable(self, name, zarr_array): + + try_nczarr = self._mode == "r" + dimensions, attributes = zarr_backend._get_zarr_dims_and_attrs( + zarr_array, zarr_backend.DIMENSION_KEY, try_nczarr + ) + + #### Changed from zarr array wrapper + if name in dimensions: + # we want indexed dimensions to be loaded eagerly + # Right now we load in to device and then transfer to host + # But these should be small-ish arrays + # TODO: can we tell GDSStore to load as numpy array directly + # not cupy array? + array_wrapper = EagerCupyZarrArrayWrapper + else: + array_wrapper = CupyZarrArrayWrapper + data = indexing.LazilyIndexedArray(array_wrapper(name, self)) + + attributes = dict(attributes) + encoding = { + "chunks": zarr_array.chunks, + "preferred_chunks": dict(zip(dimensions, zarr_array.chunks)), + "compressor": zarr_array.compressor, + "filters": zarr_array.filters, + } + # _FillValue needs to be in attributes, not encoding, so it will get + # picked up by decode_cf + if getattr(zarr_array, "fill_value") is not None: + attributes["_FillValue"] = zarr_array.fill_value + + return Variable(dimensions, data, attributes, encoding) + + +class KvikioBackendEntrypoint(ZarrBackendEntrypoint): + available = has_kvikio + + # disabled by default + # We need to provide this because of the subclassing from + # ZarrBackendEntrypoint + def guess_can_open(self, filename_or_obj): + return False + + def open_dataset( + self, + filename_or_obj, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + group=None, + mode="r", + synchronizer=None, + consolidated=None, + chunk_store=None, + storage_options=None, + stacklevel=3, + ): + + filename_or_obj = _normalize_path(filename_or_obj) + store = GDSZarrStore.open_group( + filename_or_obj, + group=group, + mode=mode, + synchronizer=synchronizer, + consolidated=consolidated, + consolidate_on_close=False, + chunk_store=chunk_store, + storage_options=storage_options, + stacklevel=stacklevel + 1, + ) + + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(store): + ds = store_entrypoint.open_dataset( + store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + return ds diff --git a/setup.py b/setup.py index 8f8d92a..0fc6cef 100644 --- a/setup.py +++ b/setup.py @@ -22,4 +22,7 @@ ], python_requires=">=3.6", install_requires=requirements, + entry_points={ + "xarray.backends": ["kvikio=cupy_xarray.kvikio:KvikioBackendEntrypoint"], + }, ) From aa2dc91649663d3f6732011230d8dc77ecb5ad61 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 2 Aug 2022 15:47:32 -0600 Subject: [PATCH 02/20] Add demo notebook --- docs/kvikio.ipynb | 2360 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2360 insertions(+) create mode 100644 docs/kvikio.ipynb diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb new file mode 100644 index 0000000..2847780 --- /dev/null +++ b/docs/kvikio.ipynb @@ -0,0 +1,2360 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5920bb97-1d76-4363-9aee-d1c5cd395409", + "metadata": {}, + "source": [ + "# Kvikio experiment\n", + "\n", + "To get this to work we need\n", + "1. https://github.com/zarr-developers/zarr-python/pull/934\n", + "2. https://github.com/dcherian/xarray/tree/kvikio" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c9ee3a73-6f7b-4875-b5a6-2e6d48fade44", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The watermark extension is already loaded. To reload it, use:\n", + " %reload_ext watermark\n", + "zarr : 2.12.1.dev38\n", + "sys : 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:10) \n", + "[GCC 10.3.0]\n", + "numpy : 1.23.1\n", + "json : 2.0.9\n", + "xarray: 2022.6.1.dev7+g3f7cc2da3\n", + "pandas: 1.4.3\n", + "\n" + ] + } + ], + "source": [ + "%load_ext watermark\n", + "\n", + "# These imports are currently unnecessary.\n", + "# cupy_xarray registers the kvikio entrypoint on install.\n", + "#import cupy as cp\n", + "#import cupy_xarray\n", + "#import kvikio.zarr\n", + "\n", + "import numpy as np\n", + "import xarray as xr\n", + "import zarr\n", + "\n", + "store = \"./air-temperature.zarr\"\n", + "\n", + "%watermark -iv" + ] + }, + { + "cell_type": "markdown", + "id": "6d301bec-e64b-4a8f-9c20-5dab56721561", + "metadata": {}, + "source": [ + "## Create example dataset\n", + "\n", + "at the momment this needs xarray released version + pooch + netCDF4\n", + "\n", + "- Something is broken on the dcherian/kvikio branch\n", + "- cannot be compressed\n", + "- must read with consolidated=False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d481cc3b-420e-4b7c-8c5e-77d874128b12", + "metadata": {}, + "outputs": [], + "source": [ + "airt = xr.tutorial.open_dataset(\"air_temperature\", engine=\"netcdf4\")\n", + "for var in airt.variables:\n", + " airt[var].encoding[\"compressor\"] = None\n", + "airt.to_zarr(store, mode=\"w\", consolidated=False)" + ] + }, + { + "cell_type": "markdown", + "id": "883d5507-988f-453a-b576-87bb563b540f", + "metadata": {}, + "source": [ + "## Test opening\n", + "\n", + "### Standard usage" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "58063142-b69b-46a5-9e4d-a83944e57857", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/glade/u/home/dcherian/python/xarray/xarray/backends/plugins.py:117: RuntimeWarning: 'netcdf4' fails while guessing\n", + " warnings.warn(f\"{engine!r} fails while guessing\", RuntimeWarning)\n", + "/glade/scratch/dcherian/tmp/ipykernel_172003/3542870433.py:1: RuntimeWarning: Failed to open Zarr store with consolidated metadata, falling back to try reading non-consolidated metadata. This is typically much slower for opening a dataset. To silence this warning, consider:\n", + "1. Consolidating metadata in this existing store with zarr.consolidate_metadata().\n", + "2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or\n", + "3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.\n", + " xr.open_dataset(store).air\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
+       "[3869000 values with dtype=float32]\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
+       "Dimensions without coordinates: time, lon\n",
+       "Attributes:\n",
+       "    GRIB_id:       11\n",
+       "    GRIB_name:     TMP\n",
+       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
+       "    dataset:       NMC Reanalysis\n",
+       "    level_desc:    Surface\n",
+       "    long_name:     4xDaily Air temperature at sigma level 995\n",
+       "    parent_stat:   Other\n",
+       "    precision:     2\n",
+       "    statistic:     Individual Obs\n",
+       "    units:         degK\n",
+       "    var_desc:      Air temperature
" + ], + "text/plain": [ + "\n", + "[3869000 values with dtype=float32]\n", + "Coordinates:\n", + " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", + "Dimensions without coordinates: time, lon\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xr.open_dataset(store).air" + ] + }, + { + "cell_type": "markdown", + "id": "95161182-6b58-4dbd-9752-9961c251be1a", + "metadata": {}, + "source": [ + "### Now with kvikio!" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
+       "Dimensions without coordinates: time, lon\n",
+       "Data variables:\n",
+       "    air      (time, lat, lon) float32 ...\n",
+       "Attributes:\n",
+       "    Conventions:  COARDS\n",
+       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
+       "    platform:     Model\n",
+       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
+       "    title:        4x daily NMC reanalysis (1948)
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2920, lat: 25, lon: 53)\n", + "Coordinates:\n", + " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", + "Dimensions without coordinates: time, lon\n", + "Data variables:\n", + " air (time, lat, lon) float32 ...\n", + "Attributes:\n", + " Conventions: COARDS\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", + " title: 4x daily NMC reanalysis (1948)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Consolidated must be False\n", + "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "metadata": {}, + "source": [ + "## Lazy reading" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1ecc39b1-b788-4831-9160-5b35afb83598", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
+       "[3869000 values with dtype=float32]\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
+       "Dimensions without coordinates: time, lon\n",
+       "Attributes:\n",
+       "    GRIB_id:       11\n",
+       "    GRIB_name:     TMP\n",
+       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
+       "    dataset:       NMC Reanalysis\n",
+       "    level_desc:    Surface\n",
+       "    long_name:     4xDaily Air temperature at sigma level 995\n",
+       "    parent_stat:   Other\n",
+       "    precision:     2\n",
+       "    statistic:     Individual Obs\n",
+       "    units:         degK\n",
+       "    var_desc:      Air temperature
" + ], + "text/plain": [ + "\n", + "[3869000 values with dtype=float32]\n", + "Coordinates:\n", + " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", + "Dimensions without coordinates: time, lon\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.air" + ] + }, + { + "cell_type": "markdown", + "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", + "metadata": {}, + "source": [ + "## Data load for repr" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "00205e73-9b43-4254-9cba-f75435251391", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'air' (lon: 53)>\n",
+       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
+       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
+       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
+       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
+       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
+       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
+       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
+       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
+       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ], dtype=float32)\n",
+       "Coordinates:\n",
+       "    lat      float32 nan\n",
+       "Dimensions without coordinates: lon\n",
+       "Attributes:\n",
+       "    GRIB_id:       11\n",
+       "    GRIB_name:     TMP\n",
+       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
+       "    dataset:       NMC Reanalysis\n",
+       "    level_desc:    Surface\n",
+       "    long_name:     4xDaily Air temperature at sigma level 995\n",
+       "    parent_stat:   Other\n",
+       "    precision:     2\n",
+       "    statistic:     Individual Obs\n",
+       "    units:         degK\n",
+       "    var_desc:      Air temperature
" + ], + "text/plain": [ + "\n", + "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", + " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", + " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", + " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", + " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", + " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", + " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", + " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", + " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ], dtype=float32)\n", + "Coordinates:\n", + " lat float32 nan\n", + "Dimensions without coordinates: lon\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds[\"air\"].isel(time=0, lat=10)" + ] + }, + { + "cell_type": "markdown", + "id": "d34a5cce-7bbc-408f-b643-05da1e121c78", + "metadata": {}, + "source": [ + "## Load to host" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " \n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
+       "array([[[241.2    , 242.5    , 243.5    , ..., 232.79999, 235.5    ,\n",
+       "         238.59999],\n",
+       "        [243.79999, 244.5    , 244.7    , ..., 232.79999, 235.29999,\n",
+       "         239.29999],\n",
+       "        [250.     , 249.79999, 248.89   , ..., 233.2    , 236.39   ,\n",
+       "         241.7    ],\n",
+       "        ...,\n",
+       "        [296.6    , 296.19998, 296.4    , ..., 295.4    , 295.1    ,\n",
+       "         294.69998],\n",
+       "        [295.9    , 296.19998, 296.79   , ..., 295.9    , 295.9    ,\n",
+       "         295.19998],\n",
+       "        [296.29   , 296.79   , 297.1    , ..., 296.9    , 296.79   ,\n",
+       "         296.6    ]],\n",
+       "\n",
+       "       [[242.09999, 242.7    , 243.09999, ..., 232.     , 233.59999,\n",
+       "         235.79999],\n",
+       "        [243.59999, 244.09999, 244.2    , ..., 231.     , 232.5    ,\n",
+       "         235.7    ],\n",
+       "        [253.2    , 252.89   , 252.09999, ..., 230.79999, 233.39   ,\n",
+       "         238.5    ],\n",
+       "...\n",
+       "        [293.69   , 293.88998, 295.38998, ..., 295.09   , 294.69   ,\n",
+       "         294.29   ],\n",
+       "        [296.29   , 297.19   , 297.59   , ..., 295.29   , 295.09   ,\n",
+       "         294.38998],\n",
+       "        [297.79   , 298.38998, 298.49   , ..., 295.69   , 295.49   ,\n",
+       "         295.19   ]],\n",
+       "\n",
+       "       [[245.09   , 244.29   , 243.29   , ..., 241.68999, 241.48999,\n",
+       "         241.79   ],\n",
+       "        [249.89   , 249.29   , 248.39   , ..., 239.59   , 240.29   ,\n",
+       "         241.68999],\n",
+       "        [262.99   , 262.19   , 261.38998, ..., 239.89   , 242.59   ,\n",
+       "         246.29   ],\n",
+       "        ...,\n",
+       "        [293.79   , 293.69   , 295.09   , ..., 295.29   , 295.09   ,\n",
+       "         294.69   ],\n",
+       "        [296.09   , 296.88998, 297.19   , ..., 295.69   , 295.69   ,\n",
+       "         295.19   ],\n",
+       "        [297.69   , 298.09   , 298.09   , ..., 296.49   , 296.19   ,\n",
+       "         295.69   ]]], dtype=float32)\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
+       "Dimensions without coordinates: time, lon\n",
+       "Attributes:\n",
+       "    GRIB_id:       11\n",
+       "    GRIB_name:     TMP\n",
+       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
+       "    dataset:       NMC Reanalysis\n",
+       "    level_desc:    Surface\n",
+       "    long_name:     4xDaily Air temperature at sigma level 995\n",
+       "    parent_stat:   Other\n",
+       "    precision:     2\n",
+       "    statistic:     Individual Obs\n",
+       "    units:         degK\n",
+       "    var_desc:      Air temperature
" + ], + "text/plain": [ + "\n", + "array([[[241.2 , 242.5 , 243.5 , ..., 232.79999, 235.5 ,\n", + " 238.59999],\n", + " [243.79999, 244.5 , 244.7 , ..., 232.79999, 235.29999,\n", + " 239.29999],\n", + " [250. , 249.79999, 248.89 , ..., 233.2 , 236.39 ,\n", + " 241.7 ],\n", + " ...,\n", + " [296.6 , 296.19998, 296.4 , ..., 295.4 , 295.1 ,\n", + " 294.69998],\n", + " [295.9 , 296.19998, 296.79 , ..., 295.9 , 295.9 ,\n", + " 295.19998],\n", + " [296.29 , 296.79 , 297.1 , ..., 296.9 , 296.79 ,\n", + " 296.6 ]],\n", + "\n", + " [[242.09999, 242.7 , 243.09999, ..., 232. , 233.59999,\n", + " 235.79999],\n", + " [243.59999, 244.09999, 244.2 , ..., 231. , 232.5 ,\n", + " 235.7 ],\n", + " [253.2 , 252.89 , 252.09999, ..., 230.79999, 233.39 ,\n", + " 238.5 ],\n", + "...\n", + " [293.69 , 293.88998, 295.38998, ..., 295.09 , 294.69 ,\n", + " 294.29 ],\n", + " [296.29 , 297.19 , 297.59 , ..., 295.29 , 295.09 ,\n", + " 294.38998],\n", + " [297.79 , 298.38998, 298.49 , ..., 295.69 , 295.49 ,\n", + " 295.19 ]],\n", + "\n", + " [[245.09 , 244.29 , 243.29 , ..., 241.68999, 241.48999,\n", + " 241.79 ],\n", + " [249.89 , 249.29 , 248.39 , ..., 239.59 , 240.29 ,\n", + " 241.68999],\n", + " [262.99 , 262.19 , 261.38998, ..., 239.89 , 242.59 ,\n", + " 246.29 ],\n", + " ...,\n", + " [293.79 , 293.69 , 295.09 , ..., 295.29 , 295.09 ,\n", + " 294.69 ],\n", + " [296.09 , 296.88998, 297.19 , ..., 295.69 , 295.69 ,\n", + " 295.19 ],\n", + " [297.69 , 298.09 , 298.09 , ..., 296.49 , 296.19 ,\n", + " 295.69 ]]], dtype=float32)\n", + "Coordinates:\n", + " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", + "Dimensions without coordinates: time, lon\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(type(ds.air.data), type(ds.air.as_numpy().data))\n", + "ds.air.as_numpy()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:miniconda3-kvikio_env]", + "language": "python", + "name": "conda-env-miniconda3-kvikio_env-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 7fb4b946534ae9730375ba368349d4fd037550de Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 16 Aug 2022 11:52:27 -0600 Subject: [PATCH 03/20] Update kvikio notebook --- docs/kvikio.ipynb | 878 ++++++++++++++++++++++++++++++---------------- 1 file changed, 577 insertions(+), 301 deletions(-) diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb index 2847780..1ddd5e0 100644 --- a/docs/kvikio.ipynb +++ b/docs/kvikio.ipynb @@ -5,16 +5,16 @@ "id": "5920bb97-1d76-4363-9aee-d1c5cd395409", "metadata": {}, "source": [ - "# Kvikio experiment\n", + "# Kvikio demo\n", "\n", "To get this to work we need\n", "1. https://github.com/zarr-developers/zarr-python/pull/934\n", - "2. https://github.com/dcherian/xarray/tree/kvikio" + "2. https://github.com/pydata/xarray/pull/6874" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 1, "id": "c9ee3a73-6f7b-4875-b5a6-2e6d48fade44", "metadata": { "tags": [] @@ -24,15 +24,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "The watermark extension is already loaded. To reload it, use:\n", - " %reload_ext watermark\n", - "zarr : 2.12.1.dev38\n", - "sys : 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:10) \n", + "flox : 0.5.10.dev5+g44f3851.d20220816\n", + "cupy : 11.0.0\n", + "json : 2.0.9\n", + "cupy_xarray : 0.1.0+11.gaa2dc91.dirty\n", + "numpy : 1.22.4\n", + "zarr : 2.12.1.dev68\n", + "numpy_groupies: 0.9.19+1.g8f14bbf\n", + "kvikio : 22.10.0a0+22.gd063a3b\n", + "xarray : 2022.6.1.dev51+g5a9a51ba1\n", + "sys : 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:18) \n", "[GCC 10.3.0]\n", - "numpy : 1.23.1\n", - "json : 2.0.9\n", - "xarray: 2022.6.1.dev7+g3f7cc2da3\n", - "pandas: 1.4.3\n", "\n" ] } @@ -40,12 +42,15 @@ "source": [ "%load_ext watermark\n", "\n", - "# These imports are currently unnecessary.\n", + "# These imports are currently unnecessary. I import them to show versions\n", "# cupy_xarray registers the kvikio entrypoint on install.\n", - "#import cupy as cp\n", - "#import cupy_xarray\n", - "#import kvikio.zarr\n", + "import cupy as cp\n", + "#import cudf\n", + "import cupy_xarray\n", + "import kvikio.zarr\n", "\n", + "import flox\n", + "import numpy_groupies\n", "import numpy as np\n", "import xarray as xr\n", "import zarr\n", @@ -58,28 +63,50 @@ { "cell_type": "markdown", "id": "6d301bec-e64b-4a8f-9c20-5dab56721561", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Create example dataset\n", "\n", - "at the momment this needs xarray released version + pooch + netCDF4\n", - "\n", - "- Something is broken on the dcherian/kvikio branch\n", - "- cannot be compressed\n", - "- must read with consolidated=False" + "- cannot be compressed" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "d481cc3b-420e-4b7c-8c5e-77d874128b12", - "metadata": {}, - "outputs": [], + "metadata": { + "jupyter": { + "source_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/glade/u/home/dcherian/python/xarray/xarray/core/dataset.py:2066: SerializationWarning: saving variable None with floating point data as an integer dtype without any _FillValue to use for NaNs\n", + " return to_zarr( # type: ignore\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "airt = xr.tutorial.open_dataset(\"air_temperature\", engine=\"netcdf4\")\n", "for var in airt.variables:\n", " airt[var].encoding[\"compressor\"] = None\n", - "airt.to_zarr(store, mode=\"w\", consolidated=False)" + "airt.to_zarr(store, mode=\"w\", consolidated=True)" ] }, { @@ -94,23 +121,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "58063142-b69b-46a5-9e4d-a83944e57857", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/glade/u/home/dcherian/python/xarray/xarray/backends/plugins.py:117: RuntimeWarning: 'netcdf4' fails while guessing\n", - " warnings.warn(f\"{engine!r} fails while guessing\", RuntimeWarning)\n", - "/glade/scratch/dcherian/tmp/ipykernel_172003/3542870433.py:1: RuntimeWarning: Failed to open Zarr store with consolidated metadata, falling back to try reading non-consolidated metadata. This is typically much slower for opening a dataset. To silence this warning, consider:\n", - "1. Consolidating metadata in this existing store with zarr.consolidate_metadata().\n", - "2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or\n", - "3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.\n", - " xr.open_dataset(store).air\n" - ] - }, { "data": { "text/html": [ @@ -469,8 +483,9 @@ "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
        "[3869000 values with dtype=float32]\n",
        "Coordinates:\n",
-       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
-       "Dimensions without coordinates: time, lon\n",
+       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
+       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
+       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
        "Attributes:\n",
        "    GRIB_id:       11\n",
        "    GRIB_name:     TMP\n",
@@ -482,15 +497,25 @@
        "    precision:     2\n",
        "    statistic:     Individual Obs\n",
        "    units:         degK\n",
-       "    var_desc:      Air temperature
" + " var_desc: Air temperature" ], "text/plain": [ "\n", "[3869000 values with dtype=float32]\n", "Coordinates:\n", - " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", - "Dimensions without coordinates: time, lon\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", "Attributes:\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", @@ -505,13 +530,13 @@ " var_desc: Air temperature" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "xr.open_dataset(store).air" + "xr.open_dataset(store, engine=\"zarr\").air" ] }, { @@ -519,14 +544,38 @@ "id": "95161182-6b58-4dbd-9752-9961c251be1a", "metadata": {}, "source": [ - "### Now with kvikio!" + "### Now with kvikio!\n", + "\n", + " - must read with `consolidated=False` (https://github.com/rapidsai/kvikio/issues/119)\n", + " - dask.from_zarr to GDSStore / open_mfdataset" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", "metadata": {}, + "outputs": [], + "source": [ + "# Consolidated must be False\n", + "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", + "print(ds.air._variable._data)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "metadata": {}, + "source": [ + "## Lazy reading" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1ecc39b1-b788-4831-9160-5b35afb83598", + "metadata": {}, "outputs": [ { "data": { @@ -883,61 +932,77 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.Dataset>\n",
-       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
+       "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
+       "[3869000 values with dtype=float32]\n",
        "Coordinates:\n",
-       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
-       "Dimensions without coordinates: time, lon\n",
-       "Data variables:\n",
-       "    air      (time, lat, lon) float32 ...\n",
+       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
+       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
+       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
        "Attributes:\n",
-       "    Conventions:  COARDS\n",
-       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
-       "    platform:     Model\n",
-       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
-       "    title:        4x daily NMC reanalysis (1948)
" + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature
" ], "text/plain": [ - "\n", - "Dimensions: (time: 2920, lat: 25, lon: 53)\n", + "\n", + "[3869000 values with dtype=float32]\n", "Coordinates:\n", - " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", - "Dimensions without coordinates: time, lon\n", - "Data variables:\n", - " air (time, lat, lon) float32 ...\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", "Attributes:\n", - " Conventions: COARDS\n", - " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", - " platform: Model\n", - " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", - " title: 4x daily NMC reanalysis (1948)" + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Consolidated must be False\n", - "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", - "ds" + "ds.air" ] }, { "cell_type": "markdown", - "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", "metadata": {}, "source": [ - "## Lazy reading" + "## Data load for repr" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "1ecc39b1-b788-4831-9160-5b35afb83598", + "execution_count": 4, + "id": "00205e73-9b43-4254-9cba-f75435251391", "metadata": {}, "outputs": [ { @@ -1295,11 +1360,20 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
-       "[3869000 values with dtype=float32]\n",
+       "
<xarray.DataArray 'air' (lon: 53)>\n",
+       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
+       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
+       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
+       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
+       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
+       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
+       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
+       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
+       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ], dtype=float32)\n",
        "Coordinates:\n",
-       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
-       "Dimensions without coordinates: time, lon\n",
+       "    lat      float32 50.0\n",
+       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
+       "    time     datetime64[ns] 2013-01-01\n",
        "Attributes:\n",
        "    GRIB_id:       11\n",
        "    GRIB_name:     TMP\n",
@@ -1311,15 +1385,36 @@
        "    precision:     2\n",
        "    statistic:     Individual Obs\n",
        "    units:         degK\n",
-       "    var_desc:      Air temperature
" + " var_desc: Air temperature
" ], "text/plain": [ - "\n", - "[3869000 values with dtype=float32]\n", + "\n", + "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", + " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", + " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", + " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", + " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", + " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", + " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", + " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", + " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ], dtype=float32)\n", "Coordinates:\n", - " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", - "Dimensions without coordinates: time, lon\n", + " lat float32 50.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " time datetime64[ns] 2013-01-01\n", "Attributes:\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", @@ -1334,27 +1429,77 @@ " var_desc: Air temperature" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.air" + "ds[\"air\"].isel(time=0, lat=10)" ] }, { "cell_type": "markdown", - "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", + "id": "d0ea31d2-6c52-4346-b489-fc1e43200213", "metadata": {}, "source": [ - "## Data load for repr" + "## CuPy array on load" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "00205e73-9b43-4254-9cba-f75435251391", + "execution_count": 8, + "id": "1b34a68a-a6b3-4273-bf7c-28814ebfce11", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cupy._core.core.ndarray" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(ds[\"air\"].isel(time=0, lat=10).data)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "db69559c-1fde-4b3b-914d-87d8437ec256", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cupy._core.core.ndarray" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(ds[\"air\"].isel(time=0, lat=10).load().data)" + ] + }, + { + "cell_type": "markdown", + "id": "d34a5cce-7bbc-408f-b643-05da1e121c78", + "metadata": {}, + "source": [ + "## Load to host" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "457a612e-04cb-4ffa-8cda-f4371b33bda8", "metadata": {}, "outputs": [ { @@ -1712,19 +1857,52 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.DataArray 'air' (lon: 53)>\n",
-       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
-       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
-       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
-       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
-       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
-       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
-       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
-       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
-       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ], dtype=float32)\n",
+       "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
+       "array([[[241.2    , 242.5    , 243.5    , ..., 232.79999, 235.5    ,\n",
+       "         238.59999],\n",
+       "        [243.79999, 244.5    , 244.7    , ..., 232.79999, 235.29999,\n",
+       "         239.29999],\n",
+       "        [250.     , 249.79999, 248.89   , ..., 233.2    , 236.39   ,\n",
+       "         241.7    ],\n",
+       "        ...,\n",
+       "        [296.6    , 296.19998, 296.4    , ..., 295.4    , 295.1    ,\n",
+       "         294.69998],\n",
+       "        [295.9    , 296.19998, 296.79   , ..., 295.9    , 295.9    ,\n",
+       "         295.19998],\n",
+       "        [296.29   , 296.79   , 297.1    , ..., 296.9    , 296.79   ,\n",
+       "         296.6    ]],\n",
+       "\n",
+       "       [[242.09999, 242.7    , 243.09999, ..., 232.     , 233.59999,\n",
+       "         235.79999],\n",
+       "        [243.59999, 244.09999, 244.2    , ..., 231.     , 232.5    ,\n",
+       "         235.7    ],\n",
+       "        [253.2    , 252.89   , 252.09999, ..., 230.79999, 233.39   ,\n",
+       "         238.5    ],\n",
+       "...\n",
+       "        [293.69   , 293.88998, 295.38998, ..., 295.09   , 294.69   ,\n",
+       "         294.29   ],\n",
+       "        [296.29   , 297.19   , 297.59   , ..., 295.29   , 295.09   ,\n",
+       "         294.38998],\n",
+       "        [297.79   , 298.38998, 298.49   , ..., 295.69   , 295.49   ,\n",
+       "         295.19   ]],\n",
+       "\n",
+       "       [[245.09   , 244.29   , 243.29   , ..., 241.68999, 241.48999,\n",
+       "         241.79   ],\n",
+       "        [249.89   , 249.29   , 248.39   , ..., 239.59   , 240.29   ,\n",
+       "         241.68999],\n",
+       "        [262.99   , 262.19   , 261.38998, ..., 239.89   , 242.59   ,\n",
+       "         246.29   ],\n",
+       "        ...,\n",
+       "        [293.79   , 293.69   , 295.09   , ..., 295.29   , 295.09   ,\n",
+       "         294.69   ],\n",
+       "        [296.09   , 296.88998, 297.19   , ..., 295.69   , 295.69   ,\n",
+       "         295.19   ],\n",
+       "        [297.69   , 298.09   , 298.09   , ..., 296.49   , 296.19   ,\n",
+       "         295.69   ]]], dtype=float32)\n",
        "Coordinates:\n",
-       "    lat      float32 nan\n",
-       "Dimensions without coordinates: lon\n",
+       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
+       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
+       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
        "Attributes:\n",
        "    GRIB_id:       11\n",
        "    GRIB_name:     TMP\n",
@@ -1736,36 +1914,111 @@
        "    precision:     2\n",
        "    statistic:     Individual Obs\n",
        "    units:         degK\n",
-       "    var_desc:      Air temperature
" - ], - "text/plain": [ - "\n", - "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", - " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", - " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", - " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", - " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", - " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", - " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", - " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", - " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ], dtype=float32)\n", - "Coordinates:\n", - " lat float32 nan\n", - "Dimensions without coordinates: lon\n", - "Attributes:\n", - " GRIB_id: 11\n", - " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", - " dataset: NMC Reanalysis\n", - " level_desc: Surface\n", + " var_desc: Air temperature
" + ], + "text/plain": [ + "\n", + "array([[[241.2 , 242.5 , 243.5 , ..., 232.79999, 235.5 ,\n", + " 238.59999],\n", + " [243.79999, 244.5 , 244.7 , ..., 232.79999, 235.29999,\n", + " 239.29999],\n", + " [250. , 249.79999, 248.89 , ..., 233.2 , 236.39 ,\n", + " 241.7 ],\n", + " ...,\n", + " [296.6 , 296.19998, 296.4 , ..., 295.4 , 295.1 ,\n", + " 294.69998],\n", + " [295.9 , 296.19998, 296.79 , ..., 295.9 , 295.9 ,\n", + " 295.19998],\n", + " [296.29 , 296.79 , 297.1 , ..., 296.9 , 296.79 ,\n", + " 296.6 ]],\n", + "\n", + " [[242.09999, 242.7 , 243.09999, ..., 232. , 233.59999,\n", + " 235.79999],\n", + " [243.59999, 244.09999, 244.2 , ..., 231. , 232.5 ,\n", + " 235.7 ],\n", + " [253.2 , 252.89 , 252.09999, ..., 230.79999, 233.39 ,\n", + " 238.5 ],\n", + "...\n", + " [293.69 , 293.88998, 295.38998, ..., 295.09 , 294.69 ,\n", + " 294.29 ],\n", + " [296.29 , 297.19 , 297.59 , ..., 295.29 , 295.09 ,\n", + " 294.38998],\n", + " [297.79 , 298.38998, 298.49 , ..., 295.69 , 295.49 ,\n", + " 295.19 ]],\n", + "\n", + " [[245.09 , 244.29 , 243.29 , ..., 241.68999, 241.48999,\n", + " 241.79 ],\n", + " [249.89 , 249.29 , 248.39 , ..., 239.59 , 240.29 ,\n", + " 241.68999],\n", + " [262.99 , 262.19 , 261.38998, ..., 239.89 , 242.59 ,\n", + " 246.29 ],\n", + " ...,\n", + " [293.79 , 293.69 , 295.09 , ..., 295.29 , 295.09 ,\n", + " 294.69 ],\n", + " [296.09 , 296.88998, 297.19 , ..., 295.69 , 295.69 ,\n", + " 295.19 ],\n", + " [297.69 , 298.09 , 298.09 , ..., 296.49 , 296.19 ,\n", + " 295.69 ]]], dtype=float32)\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", " long_name: 4xDaily Air temperature at sigma level 995\n", " parent_stat: Other\n", " precision: 2\n", @@ -1774,36 +2027,76 @@ " var_desc: Air temperature" ] }, - "execution_count": 6, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds[\"air\"].isel(time=0, lat=10)" + "ds.air.as_numpy()" ] }, { - "cell_type": "markdown", - "id": "d34a5cce-7bbc-408f-b643-05da1e121c78", + "cell_type": "code", + "execution_count": 11, + "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.ndarray" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## Load to host" + "type(ds.air.as_numpy().data)" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", + "execution_count": 12, + "id": "140fe3e2-ea9b-445d-8401-5c624384c182", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " \n" - ] - }, + "data": { + "text/plain": [ + "cupy._core.core.ndarray" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(ds.air.mean(\"time\").load().data)" + ] + }, + { + "cell_type": "markdown", + "id": "1c07c449-bc43-490a-ac38-11e93200133d", + "metadata": {}, + "source": [ + "## GroupBy with flox\n", + "\n", + "Requires\n", + "\n", + "1. flox main branch?\n", + "2. https://github.com/ml31415/numpy-groupies/pull/63" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "c292cf77-c99e-40fa-8cad-d8914c346b29", + "metadata": {}, + "outputs": [ { "data": { "text/html": [ @@ -2159,181 +2452,164 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
-       "array([[[241.2    , 242.5    , 243.5    , ..., 232.79999, 235.5    ,\n",
-       "         238.59999],\n",
-       "        [243.79999, 244.5    , 244.7    , ..., 232.79999, 235.29999,\n",
-       "         239.29999],\n",
-       "        [250.     , 249.79999, 248.89   , ..., 233.2    , 236.39   ,\n",
-       "         241.7    ],\n",
+       "
<xarray.DataArray 'air' (month: 12, lat: 25, lon: 53)>\n",
+       "array([[[246.34975, 246.38591, 246.21495, ..., 243.06096, 244.08774,\n",
+       "         245.64653],\n",
+       "        [248.8575 , 248.90729, 248.71028, ..., 241.52846, 243.50845,\n",
+       "         246.7545 ],\n",
+       "        [251.57713, 251.19649, 250.71451, ..., 243.39871, 246.78438,\n",
+       "         251.56555],\n",
        "        ...,\n",
-       "        [296.6    , 296.19998, 296.4    , ..., 295.4    , 295.1    ,\n",
-       "         294.69998],\n",
-       "        [295.9    , 296.19998, 296.79   , ..., 295.9    , 295.9    ,\n",
-       "         295.19998],\n",
-       "        [296.29   , 296.79   , 297.1    , ..., 296.9    , 296.79   ,\n",
-       "         296.6    ]],\n",
-       "\n",
-       "       [[242.09999, 242.7    , 243.09999, ..., 232.     , 233.59999,\n",
-       "         235.79999],\n",
-       "        [243.59999, 244.09999, 244.2    , ..., 231.     , 232.5    ,\n",
-       "         235.7    ],\n",
-       "        [253.2    , 252.89   , 252.09999, ..., 230.79999, 233.39   ,\n",
-       "         238.5    ],\n",
+       "        [295.8504 , 295.24423, 295.2271 , ..., 295.1864 , 294.65726,\n",
+       "         294.04868],\n",
+       "        [296.54468, 296.47   , 296.16025, ..., 295.35614, 295.0814 ,\n",
+       "         294.53015],\n",
+       "        [297.15448, 297.23843, 297.04913, ..., 296.01816, 295.7758 ,\n",
+       "         295.63678]],\n",
+       "\n",
+       "       [[246.6771 , 246.40562, 245.94829, ..., 241.85826, 243.00186,\n",
+       "         244.44365],\n",
+       "        [247.79994, 247.75986, 247.4774 , ..., 240.64687, 242.26611,\n",
+       "         245.06642],\n",
+       "        [249.07076, 248.57227, 247.94246, ..., 242.42851, 245.33334,\n",
+       "         249.72244],\n",
        "...\n",
-       "        [293.69   , 293.88998, 295.38998, ..., 295.09   , 294.69   ,\n",
-       "         294.29   ],\n",
-       "        [296.29   , 297.19   , 297.59   , ..., 295.29   , 295.09   ,\n",
-       "         294.38998],\n",
-       "        [297.79   , 298.38998, 298.49   , ..., 295.69   , 295.49   ,\n",
-       "         295.19   ]],\n",
-       "\n",
-       "       [[245.09   , 244.29   , 243.29   , ..., 241.68999, 241.48999,\n",
-       "         241.79   ],\n",
-       "        [249.89   , 249.29   , 248.39   , ..., 239.59   , 240.29   ,\n",
-       "         241.68999],\n",
-       "        [262.99   , 262.19   , 261.38998, ..., 239.89   , 242.59   ,\n",
-       "         246.29   ],\n",
+       "        [297.8426 , 297.14062, 296.98776, ..., 297.96893, 297.56882,\n",
+       "         297.16125],\n",
+       "        [298.58783, 298.4203 , 297.96912, ..., 298.16418, 298.194  ,\n",
+       "         297.90833],\n",
+       "        [298.81146, 298.8566 , 298.6211 , ..., 298.7296 , 298.7519 ,\n",
+       "         298.81894]],\n",
+       "\n",
+       "       [[247.97087, 248.02097, 247.91281, ..., 239.77168, 241.02357,\n",
+       "         242.62805],\n",
+       "        [249.73338, 250.16028, 250.48564, ..., 238.78947, 240.96451,\n",
+       "         244.11601],\n",
+       "        [252.02939, 251.53108, 251.36617, ..., 238.07532, 241.91273,\n",
+       "         247.06967],\n",
        "        ...,\n",
-       "        [293.79   , 293.69   , 295.09   , ..., 295.29   , 295.09   ,\n",
-       "         294.69   ],\n",
-       "        [296.09   , 296.88998, 297.19   , ..., 295.69   , 295.69   ,\n",
-       "         295.19   ],\n",
-       "        [297.69   , 298.09   , 298.09   , ..., 296.49   , 296.19   ,\n",
-       "         295.69   ]]], dtype=float32)\n",
+       "        [296.76517, 295.97668, 295.88925, ..., 296.456  , 296.09137,\n",
+       "         295.65768],\n",
+       "        [297.46817, 297.38034, 297.0443 , ..., 296.85565, 296.84668,\n",
+       "         296.52142],\n",
+       "        [297.88092, 297.98676, 297.7755 , ..., 297.60034, 297.5654 ,\n",
+       "         297.53772]]], dtype=float32)\n",
        "Coordinates:\n",
-       "  * lat      (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n",
-       "Dimensions without coordinates: time, lon\n",
-       "Attributes:\n",
-       "    GRIB_id:       11\n",
-       "    GRIB_name:     TMP\n",
-       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
-       "    dataset:       NMC Reanalysis\n",
-       "    level_desc:    Surface\n",
-       "    long_name:     4xDaily Air temperature at sigma level 995\n",
-       "    parent_stat:   Other\n",
-       "    precision:     2\n",
-       "    statistic:     Individual Obs\n",
-       "    units:         degK\n",
-       "    var_desc:      Air temperature
    • lat
      (lat)
      float32
      75.0 72.5 70.0 ... 20.0 17.5 15.0
      array([75. , 72.5, 70. , 67.5, 65. , 62.5, 60. , 57.5, 55. , 52.5, 50. , 47.5,\n",
      +       "       45. , 42.5, 40. , 37.5, 35. , 32.5, 30. , 27.5, 25. , 22.5, 20. , 17.5,\n",
      +       "       15. ], dtype=float32)
    • lon
      (lon)
      float32
      200.0 202.5 205.0 ... 327.5 330.0
      array([200. , 202.5, 205. , 207.5, 210. , 212.5, 215. , 217.5, 220. , 222.5,\n",
      +       "       225. , 227.5, 230. , 232.5, 235. , 237.5, 240. , 242.5, 245. , 247.5,\n",
      +       "       250. , 252.5, 255. , 257.5, 260. , 262.5, 265. , 267.5, 270. , 272.5,\n",
      +       "       275. , 277.5, 280. , 282.5, 285. , 287.5, 290. , 292.5, 295. , 297.5,\n",
      +       "       300. , 302.5, 305. , 307.5, 310. , 312.5, 315. , 317.5, 320. , 322.5,\n",
      +       "       325. , 327.5, 330. ], dtype=float32)
    • month
      (month)
      int64
      1 2 3 4 5 6 7 8 9 10 11 12
      array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])
  • " ], "text/plain": [ - "\n", - "array([[[241.2 , 242.5 , 243.5 , ..., 232.79999, 235.5 ,\n", - " 238.59999],\n", - " [243.79999, 244.5 , 244.7 , ..., 232.79999, 235.29999,\n", - " 239.29999],\n", - " [250. , 249.79999, 248.89 , ..., 233.2 , 236.39 ,\n", - " 241.7 ],\n", + "\n", + "array([[[246.34975, 246.38591, 246.21495, ..., 243.06096, 244.08774,\n", + " 245.64653],\n", + " [248.8575 , 248.90729, 248.71028, ..., 241.52846, 243.50845,\n", + " 246.7545 ],\n", + " [251.57713, 251.19649, 250.71451, ..., 243.39871, 246.78438,\n", + " 251.56555],\n", " ...,\n", - " [296.6 , 296.19998, 296.4 , ..., 295.4 , 295.1 ,\n", - " 294.69998],\n", - " [295.9 , 296.19998, 296.79 , ..., 295.9 , 295.9 ,\n", - " 295.19998],\n", - " [296.29 , 296.79 , 297.1 , ..., 296.9 , 296.79 ,\n", - " 296.6 ]],\n", - "\n", - " [[242.09999, 242.7 , 243.09999, ..., 232. , 233.59999,\n", - " 235.79999],\n", - " [243.59999, 244.09999, 244.2 , ..., 231. , 232.5 ,\n", - " 235.7 ],\n", - " [253.2 , 252.89 , 252.09999, ..., 230.79999, 233.39 ,\n", - " 238.5 ],\n", + " [295.8504 , 295.24423, 295.2271 , ..., 295.1864 , 294.65726,\n", + " 294.04868],\n", + " [296.54468, 296.47 , 296.16025, ..., 295.35614, 295.0814 ,\n", + " 294.53015],\n", + " [297.15448, 297.23843, 297.04913, ..., 296.01816, 295.7758 ,\n", + " 295.63678]],\n", + "\n", + " [[246.6771 , 246.40562, 245.94829, ..., 241.85826, 243.00186,\n", + " 244.44365],\n", + " [247.79994, 247.75986, 247.4774 , ..., 240.64687, 242.26611,\n", + " 245.06642],\n", + " [249.07076, 248.57227, 247.94246, ..., 242.42851, 245.33334,\n", + " 249.72244],\n", "...\n", - " [293.69 , 293.88998, 295.38998, ..., 295.09 , 294.69 ,\n", - " 294.29 ],\n", - " [296.29 , 297.19 , 297.59 , ..., 295.29 , 295.09 ,\n", - " 294.38998],\n", - " [297.79 , 298.38998, 298.49 , ..., 295.69 , 295.49 ,\n", - " 295.19 ]],\n", - "\n", - " [[245.09 , 244.29 , 243.29 , ..., 241.68999, 241.48999,\n", - " 241.79 ],\n", - " [249.89 , 249.29 , 248.39 , ..., 239.59 , 240.29 ,\n", - " 241.68999],\n", - " [262.99 , 262.19 , 261.38998, ..., 239.89 , 242.59 ,\n", - " 246.29 ],\n", + " [297.8426 , 297.14062, 296.98776, ..., 297.96893, 297.56882,\n", + " 297.16125],\n", + " [298.58783, 298.4203 , 297.96912, ..., 298.16418, 298.194 ,\n", + " 297.90833],\n", + " [298.81146, 298.8566 , 298.6211 , ..., 298.7296 , 298.7519 ,\n", + " 298.81894]],\n", + "\n", + " [[247.97087, 248.02097, 247.91281, ..., 239.77168, 241.02357,\n", + " 242.62805],\n", + " [249.73338, 250.16028, 250.48564, ..., 238.78947, 240.96451,\n", + " 244.11601],\n", + " [252.02939, 251.53108, 251.36617, ..., 238.07532, 241.91273,\n", + " 247.06967],\n", " ...,\n", - " [293.79 , 293.69 , 295.09 , ..., 295.29 , 295.09 ,\n", - " 294.69 ],\n", - " [296.09 , 296.88998, 297.19 , ..., 295.69 , 295.69 ,\n", - " 295.19 ],\n", - " [297.69 , 298.09 , 298.09 , ..., 296.49 , 296.19 ,\n", - " 295.69 ]]], dtype=float32)\n", + " [296.76517, 295.97668, 295.88925, ..., 296.456 , 296.09137,\n", + " 295.65768],\n", + " [297.46817, 297.38034, 297.0443 , ..., 296.85565, 296.84668,\n", + " 296.52142],\n", + " [297.88092, 297.98676, 297.7755 , ..., 297.60034, 297.5654 ,\n", + " 297.53772]]], dtype=float32)\n", "Coordinates:\n", - " * lat (lat) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan\n", - "Dimensions without coordinates: time, lon\n", - "Attributes:\n", - " GRIB_id: 11\n", - " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", - " dataset: NMC Reanalysis\n", - " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", - " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * month (month) int64 1 2 3 4 5 6 7 8 9 10 11 12" ] }, - "execution_count": 11, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "print(type(ds.air.data), type(ds.air.as_numpy().data))\n", - "ds.air.as_numpy()" + "ds.air.groupby(\"time.month\").mean(engine=\"numpy\")" ] } ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:miniconda3-kvikio_env]", + "display_name": "Python [conda env:miniconda3-kvikio_nightly]", "language": "python", - "name": "conda-env-miniconda3-kvikio_env-py" + "name": "conda-env-miniconda3-kvikio_nightly-py" }, "language_info": { "codemirror_mode": { From facf5f73c52e1aecb7061715ef46d76ae551a4f6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:27:13 +0000 Subject: [PATCH 04/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/kvikio.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb index 1ddd5e0..72723ea 100644 --- a/docs/kvikio.ipynb +++ b/docs/kvikio.ipynb @@ -45,7 +45,8 @@ "# These imports are currently unnecessary. I import them to show versions\n", "# cupy_xarray registers the kvikio entrypoint on install.\n", "import cupy as cp\n", - "#import cudf\n", + "\n", + "# import cudf\n", "import cupy_xarray\n", "import kvikio.zarr\n", "\n", From f3f51891b79f1e46f7855c5708d55b271313da32 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 17 Aug 2022 11:56:39 -0600 Subject: [PATCH 05/20] Update cupy_xarray/kvikio.py --- cupy_xarray/kvikio.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 313a249..e806865 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -1,4 +1,5 @@ import os +import warnings import cupy as cp import numpy as np From d2da1e4c7b23923aca8969c25531a1817b498de1 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 20 Jan 2023 17:03:07 -0700 Subject: [PATCH 06/20] Add url, description. --- cupy_xarray/kvikio.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index e806865..e6c7c63 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -147,6 +147,8 @@ def open_store_variable(self, name, zarr_array): class KvikioBackendEntrypoint(ZarrBackendEntrypoint): available = has_kvikio + description = "Open zarr files (.zarr) using Kvikio" + url = "https://docs.rapids.ai/api/kvikio/nightly/api.html#zarr" # disabled by default # We need to provide this because of the subclassing from From b87c3c2d6295b05b4416d5aa2e0f93440547c90d Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 18 Aug 2023 16:28:40 -0600 Subject: [PATCH 07/20] Working --- cupy_xarray/kvikio.py | 24 +- docs/kvikio.ipynb | 2216 ++++++++++++++++++++++++++++++++++------- 2 files changed, 1874 insertions(+), 366 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index e6c7c63..669978f 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -20,6 +20,24 @@ has_kvikio = False +class DummyZarrArrayWrapper(ZarrArrayWrapper): + def __init__(self, array: np.ndarray): + assert isinstance(array, np.ndarray) + self._array = array + self.filters = None + self.dtype = array.dtype + self.shape = array.shape + + def __array__(self): + return self._array + + def get_array(self): + return self._array + + def __getitem__(self, key): + return self._array[key] + + class CupyZarrArrayWrapper(ZarrArrayWrapper): def __array__(self): return self.get_array() @@ -32,7 +50,8 @@ def __array__(self): return self.datastore.zarr_group[self.variable_name][:].get() def get_array(self): - return np.asarray(self) + # total hack: make a numpy array look like a Zarr array + return DummyZarrArrayWrapper(self.datastore.zarr_group[self.variable_name][:].get()) class GDSZarrStore(ZarrStore): @@ -52,7 +71,6 @@ def open_group( safe_chunks=True, stacklevel=2, ): - # zarr doesn't support pathlib.Path objects yet. zarr-python#601 if isinstance(store, os.PathLike): store = os.fspath(store) @@ -112,7 +130,6 @@ def open_group( ) def open_store_variable(self, name, zarr_array): - try_nczarr = self._mode == "r" dimensions, attributes = zarr_backend._get_zarr_dims_and_attrs( zarr_array, zarr_backend.DIMENSION_KEY, try_nczarr @@ -174,7 +191,6 @@ def open_dataset( storage_options=None, stacklevel=3, ): - filename_or_obj = _normalize_path(filename_or_obj) store = GDSZarrStore.open_group( filename_or_obj, diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb index 72723ea..4867878 100644 --- a/docs/kvikio.ipynb +++ b/docs/kvikio.ipynb @@ -5,11 +5,7 @@ "id": "5920bb97-1d76-4363-9aee-d1c5cd395409", "metadata": {}, "source": [ - "# Kvikio demo\n", - "\n", - "To get this to work we need\n", - "1. https://github.com/zarr-developers/zarr-python/pull/934\n", - "2. https://github.com/pydata/xarray/pull/6874" + "# Kvikio demo" ] }, { @@ -24,17 +20,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "flox : 0.5.10.dev5+g44f3851.d20220816\n", - "cupy : 11.0.0\n", + "flox : 0.7.3.dev12+g796dcd2\n", "json : 2.0.9\n", - "cupy_xarray : 0.1.0+11.gaa2dc91.dirty\n", - "numpy : 1.22.4\n", - "zarr : 2.12.1.dev68\n", - "numpy_groupies: 0.9.19+1.g8f14bbf\n", - "kvikio : 22.10.0a0+22.gd063a3b\n", - "xarray : 2022.6.1.dev51+g5a9a51ba1\n", - "sys : 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:18) \n", - "[GCC 10.3.0]\n", + "xarray : 2023.7.0\n", + "cupy_xarray : 0.1.1+21.gd2da1e4.dirty\n", + "kvikio : 23.2.0\n", + "zarr : 2.16.0\n", + "numpy : 1.24.4\n", + "sys : 3.9.17 | packaged by conda-forge | (main, Aug 10 2023, 07:02:31) \n", + "[GCC 12.3.0]\n", + "numpy_groupies: 0.9.22+2.gd148074\n", "\n" ] } @@ -44,10 +39,9 @@ "\n", "# These imports are currently unnecessary. I import them to show versions\n", "# cupy_xarray registers the kvikio entrypoint on install.\n", - "import cupy as cp\n", - "\n", + "# import cupy as cp\n", "# import cudf\n", - "import cupy_xarray\n", + "import cupy_xarray # registers cupy accessor\n", "import kvikio.zarr\n", "\n", "import flox\n", @@ -62,68 +56,1482 @@ ] }, { - "cell_type": "markdown", - "id": "6d301bec-e64b-4a8f-9c20-5dab56721561", - "metadata": { - "tags": [] - }, + "cell_type": "code", + "execution_count": 2, + "id": "83b1b514-eeb8-4a81-a3e8-3a7dc82ffce4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'kvikio': \n", + " Open zarr files (.zarr) using Kvikio\n", + " Learn more at https://docs.rapids.ai/api/kvikio/nightly/api.html#zarr,\n", + " 'store': \n", + " Open AbstractDataStore instances in Xarray\n", + " Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.StoreBackendEntrypoint.html,\n", + " 'zarr': \n", + " Open zarr files (.zarr) using zarr in Xarray\n", + " Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.ZarrBackendEntrypoint.html}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xr.backends.list_engines()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "81b2e5cb-4b2d-4a31-b7a0-961aadbc321d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" + ] + }, + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset>\n",
    +       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
    +       "Coordinates:\n",
    +       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    +       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    +       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "Data variables:\n",
    +       "    air      (time, lat, lon) float32 ...\n",
    +       "    scalar   float64 ...\n",
    +       "Attributes:\n",
    +       "    Conventions:  COARDS\n",
    +       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
    +       "    platform:     Model\n",
    +       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
    +       "    title:        4x daily NMC reanalysis (1948)
    " + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2920, lat: 25, lon: 53)\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Data variables:\n", + " air (time, lat, lon) float32 ...\n", + " scalar float64 ...\n", + "Attributes:\n", + " Conventions: COARDS\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", + " title: 4x daily NMC reanalysis (1948)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%autoreload\n", + "\n", + "# Consolidated must be False\n", + "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", + "print(ds.air._variable._data)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "6d301bec-e64b-4a8f-9c20-5dab56721561", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## Create example dataset\n", + "\n", + "- cannot be compressed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d481cc3b-420e-4b7c-8c5e-77d874128b12", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "airt = xr.tutorial.open_dataset(\"air_temperature\", engine=\"netcdf4\")\n", + "for var in airt.variables:\n", + " airt[var].encoding[\"compressor\"] = None\n", + "airt[\"scalar\"] = 12.0\n", + "airt.to_zarr(store, mode=\"w\", consolidated=True)" + ] + }, + { + "cell_type": "markdown", + "id": "883d5507-988f-453a-b576-87bb563b540f", + "metadata": {}, + "source": [ + "## Test opening\n", + "\n", + "### Standard usage" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "58063142-b69b-46a5-9e4d-a83944e57857", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    +       "[3869000 values with dtype=float32]\n",
    +       "Coordinates:\n",
    +       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    +       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    +       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "Attributes:\n",
    +       "    GRIB_id:       11\n",
    +       "    GRIB_name:     TMP\n",
    +       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    +       "    dataset:       NMC Reanalysis\n",
    +       "    level_desc:    Surface\n",
    +       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    +       "    parent_stat:   Other\n",
    +       "    precision:     2\n",
    +       "    statistic:     Individual Obs\n",
    +       "    units:         degK\n",
    +       "    var_desc:      Air temperature
    " + ], + "text/plain": [ + "\n", + "[3869000 values with dtype=float32]\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xr.open_dataset(store, engine=\"zarr\").air" + ] + }, + { + "cell_type": "markdown", + "id": "95161182-6b58-4dbd-9752-9961c251be1a", + "metadata": {}, + "source": [ + "### Now with kvikio!\n", + "\n", + " - must read with `consolidated=False` (https://github.com/rapidsai/kvikio/issues/119)\n", + " - dask.from_zarr to GDSStore / open_mfdataset" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" + ] + }, + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset>\n",
    +       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
    +       "Coordinates:\n",
    +       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    +       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    +       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "Data variables:\n",
    +       "    air      (time, lat, lon) float32 ...\n",
    +       "    scalar   float64 ...\n",
    +       "Attributes:\n",
    +       "    Conventions:  COARDS\n",
    +       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
    +       "    platform:     Model\n",
    +       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
    +       "    title:        4x daily NMC reanalysis (1948)
    " + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2920, lat: 25, lon: 53)\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Data variables:\n", + " air (time, lat, lon) float32 ...\n", + " scalar float64 ...\n", + "Attributes:\n", + " Conventions: COARDS\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", + " title: 4x daily NMC reanalysis (1948)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## Create example dataset\n", - "\n", - "- cannot be compressed" + "# Consolidated must be False\n", + "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", + "print(ds.air._variable._data)\n", + "ds" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "d481cc3b-420e-4b7c-8c5e-77d874128b12", - "metadata": { - "jupyter": { - "source_hidden": true - }, - "tags": [] - }, + "execution_count": 26, + "id": "6c939a04-1588-4693-9483-c6ad7152951a", + "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/glade/u/home/dcherian/python/xarray/xarray/core/dataset.py:2066: SerializationWarning: saving variable None with floating point data as an integer dtype without any _FillValue to use for NaNs\n", - " return to_zarr( # type: ignore\n" - ] - }, { "data": { "text/plain": [ - "" + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=, key=BasicIndexer(()))))" ] }, - "execution_count": 11, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "airt = xr.tutorial.open_dataset(\"air_temperature\", engine=\"netcdf4\")\n", - "for var in airt.variables:\n", - " airt[var].encoding[\"compressor\"] = None\n", - "airt.to_zarr(store, mode=\"w\", consolidated=True)" + "ds.scalar.variable._data" ] }, { "cell_type": "markdown", - "id": "883d5507-988f-453a-b576-87bb563b540f", + "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", "metadata": {}, "source": [ - "## Test opening\n", - "\n", - "### Standard usage" + "## Lazy reading" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "58063142-b69b-46a5-9e4d-a83944e57857", + "execution_count": 27, + "id": "1ecc39b1-b788-4831-9160-5b35afb83598", "metadata": {}, "outputs": [ { @@ -390,6 +1798,11 @@ " grid-column: 4;\n", "}\n", "\n", + ".xr-index-preview {\n", + " grid-column: 2 / 5;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", ".xr-var-name,\n", ".xr-var-dims,\n", ".xr-var-dtype,\n", @@ -411,14 +1824,16 @@ "}\n", "\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " display: none;\n", " background-color: var(--xr-background-color) !important;\n", " padding-bottom: 5px !important;\n", "}\n", "\n", ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", - ".xr-var-data-in:checked ~ .xr-var-data {\n", + ".xr-var-data-in:checked ~ .xr-var-data,\n", + ".xr-index-data-in:checked ~ .xr-index-data {\n", " display: block;\n", "}\n", "\n", @@ -428,13 +1843,16 @@ "\n", ".xr-var-name span,\n", ".xr-var-data,\n", + ".xr-index-name div,\n", + ".xr-index-data,\n", ".xr-attrs {\n", " padding-left: 25px !important;\n", "}\n", "\n", ".xr-attrs,\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " grid-column: 1 / -1;\n", "}\n", "\n", @@ -472,7 +1890,8 @@ "}\n", "\n", ".xr-icon-database,\n", - ".xr-icon-file-text2 {\n", + ".xr-icon-file-text2,\n", + ".xr-no-icon {\n", " display: inline-block;\n", " vertical-align: middle;\n", " width: 1em;\n", @@ -498,17 +1917,37 @@ " precision: 2\n", " statistic: Individual Obs\n", " units: degK\n", - " var_desc: Air temperature
    • lat
      PandasIndex
      PandasIndex(Index([75.0, 72.5, 70.0, 67.5, 65.0, 62.5, 60.0, 57.5, 55.0, 52.5, 50.0, 47.5,\n",
      +       "       45.0, 42.5, 40.0, 37.5, 35.0, 32.5, 30.0, 27.5, 25.0, 22.5, 20.0, 17.5,\n",
      +       "       15.0],\n",
      +       "      dtype='float32', name='lat'))
    • lon
      PandasIndex
      PandasIndex(Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0, 222.5,\n",
      +       "       225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5, 245.0, 247.5,\n",
      +       "       250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0, 267.5, 270.0, 272.5,\n",
      +       "       275.0, 277.5, 280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0, 297.5,\n",
      +       "       300.0, 302.5, 305.0, 307.5, 310.0, 312.5, 315.0, 317.5, 320.0, 322.5,\n",
      +       "       325.0, 327.5, 330.0],\n",
      +       "      dtype='float32', name='lon'))
    • time
      PandasIndex
      PandasIndex(DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 06:00:00',\n",
      +       "               '2013-01-01 12:00:00', '2013-01-01 18:00:00',\n",
      +       "               '2013-01-02 00:00:00', '2013-01-02 06:00:00',\n",
      +       "               '2013-01-02 12:00:00', '2013-01-02 18:00:00',\n",
      +       "               '2013-01-03 00:00:00', '2013-01-03 06:00:00',\n",
      +       "               ...\n",
      +       "               '2014-12-29 12:00:00', '2014-12-29 18:00:00',\n",
      +       "               '2014-12-30 00:00:00', '2014-12-30 06:00:00',\n",
      +       "               '2014-12-30 12:00:00', '2014-12-30 18:00:00',\n",
      +       "               '2014-12-31 00:00:00', '2014-12-31 06:00:00',\n",
      +       "               '2014-12-31 12:00:00', '2014-12-31 18:00:00'],\n",
      +       "              dtype='datetime64[ns]', name='time', length=2920, freq=None))
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ "\n", @@ -531,51 +1970,27 @@ " var_desc: Air temperature" ] }, - "execution_count": 4, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "xr.open_dataset(store, engine=\"zarr\").air" - ] - }, - { - "cell_type": "markdown", - "id": "95161182-6b58-4dbd-9752-9961c251be1a", - "metadata": {}, - "source": [ - "### Now with kvikio!\n", - "\n", - " - must read with `consolidated=False` (https://github.com/rapidsai/kvikio/issues/119)\n", - " - dask.from_zarr to GDSStore / open_mfdataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", - "metadata": {}, - "outputs": [], - "source": [ - "# Consolidated must be False\n", - "ds = xr.open_dataset(store, engine=\"kvikio\", consolidated=False)\n", - "print(ds.air._variable._data)\n", - "ds" + "ds.air" ] }, { "cell_type": "markdown", - "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", "metadata": {}, "source": [ - "## Lazy reading" + "## Data load for repr" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "1ecc39b1-b788-4831-9160-5b35afb83598", + "execution_count": 31, + "id": "00205e73-9b43-4254-9cba-f75435251391", "metadata": {}, "outputs": [ { @@ -842,6 +2257,11 @@ " grid-column: 4;\n", "}\n", "\n", + ".xr-index-preview {\n", + " grid-column: 2 / 5;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", ".xr-var-name,\n", ".xr-var-dims,\n", ".xr-var-dtype,\n", @@ -863,14 +2283,16 @@ "}\n", "\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " display: none;\n", " background-color: var(--xr-background-color) !important;\n", " padding-bottom: 5px !important;\n", "}\n", "\n", ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", - ".xr-var-data-in:checked ~ .xr-var-data {\n", + ".xr-var-data-in:checked ~ .xr-var-data,\n", + ".xr-index-data-in:checked ~ .xr-index-data {\n", " display: block;\n", "}\n", "\n", @@ -880,13 +2302,16 @@ "\n", ".xr-var-name span,\n", ".xr-var-data,\n", + ".xr-index-name div,\n", + ".xr-index-data,\n", ".xr-attrs {\n", " padding-left: 25px !important;\n", "}\n", "\n", ".xr-attrs,\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " grid-column: 1 / -1;\n", "}\n", "\n", @@ -924,7 +2349,8 @@ "}\n", "\n", ".xr-icon-database,\n", - ".xr-icon-file-text2 {\n", + ".xr-icon-file-text2,\n", + ".xr-no-icon {\n", " display: inline-block;\n", " vertical-align: middle;\n", " width: 1em;\n", @@ -933,12 +2359,21 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    -       "[3869000 values with dtype=float32]\n",
    +       "
    <xarray.DataArray 'air' (lon: 53)>\n",
    +       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
    +       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
    +       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
    +       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
    +       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
    +       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
    +       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
    +       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
    +       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ],\n",
    +       "      dtype=float32)\n",
            "Coordinates:\n",
    -       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    +       "    lat      float32 50.0\n",
            "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "    time     datetime64[ns] 2013-01-01\n",
            "Attributes:\n",
            "    GRIB_id:       11\n",
            "    GRIB_name:     TMP\n",
    @@ -950,25 +2385,44 @@
            "    precision:     2\n",
            "    statistic:     Individual Obs\n",
            "    units:         degK\n",
    -       "    var_desc:      Air temperature
  • time
    ()
    datetime64[ns]
    2013-01-01
    long_name :
    Time
    standard_name :
    time
    array('2013-01-01T00:00:00.000000000', dtype='datetime64[ns]')
    • lon
      PandasIndex
      PandasIndex(Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0, 222.5,\n",
      +       "       225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5, 245.0, 247.5,\n",
      +       "       250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0, 267.5, 270.0, 272.5,\n",
      +       "       275.0, 277.5, 280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0, 297.5,\n",
      +       "       300.0, 302.5, 305.0, 307.5, 310.0, 312.5, 315.0, 317.5, 320.0, 322.5,\n",
      +       "       325.0, 327.5, 330.0],\n",
      +       "      dtype='float32', name='lon'))
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ - "\n", - "[3869000 values with dtype=float32]\n", + "\n", + "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", + " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", + " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", + " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", + " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", + " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", + " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", + " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", + " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ],\n", + " dtype=float32)\n", "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " lat float32 50.0\n", " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + " time datetime64[ns] 2013-01-01\n", "Attributes:\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", @@ -983,27 +2437,19 @@ " var_desc: Air temperature" ] }, - "execution_count": 3, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.air" - ] - }, - { - "cell_type": "markdown", - "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", - "metadata": {}, - "source": [ - "## Data load for repr" + "ds[\"air\"].isel(time=0, lat=10).load()" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "00205e73-9b43-4254-9cba-f75435251391", + "execution_count": 29, + "id": "80aa6892-8c7f-44b3-bd52-9795ec4ea6f3", "metadata": {}, "outputs": [ { @@ -1270,6 +2716,11 @@ " grid-column: 4;\n", "}\n", "\n", + ".xr-index-preview {\n", + " grid-column: 2 / 5;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", ".xr-var-name,\n", ".xr-var-dims,\n", ".xr-var-dtype,\n", @@ -1291,14 +2742,16 @@ "}\n", "\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " display: none;\n", " background-color: var(--xr-background-color) !important;\n", " padding-bottom: 5px !important;\n", "}\n", "\n", ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", - ".xr-var-data-in:checked ~ .xr-var-data {\n", + ".xr-var-data-in:checked ~ .xr-var-data,\n", + ".xr-index-data-in:checked ~ .xr-index-data {\n", " display: block;\n", "}\n", "\n", @@ -1308,13 +2761,16 @@ "\n", ".xr-var-name span,\n", ".xr-var-data,\n", + ".xr-index-name div,\n", + ".xr-index-data,\n", ".xr-attrs {\n", " padding-left: 25px !important;\n", "}\n", "\n", ".xr-attrs,\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " grid-column: 1 / -1;\n", "}\n", "\n", @@ -1352,7 +2808,8 @@ "}\n", "\n", ".xr-icon-database,\n", - ".xr-icon-file-text2 {\n", + ".xr-icon-file-text2,\n", + ".xr-no-icon {\n", " display: inline-block;\n", " vertical-align: middle;\n", " width: 1em;\n", @@ -1361,82 +2818,21 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (lon: 53)>\n",
    -       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
    -       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
    -       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
    -       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
    -       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
    -       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
    -       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
    -       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
    -       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ], dtype=float32)\n",
    -       "Coordinates:\n",
    -       "    lat      float32 50.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "    time     datetime64[ns] 2013-01-01\n",
    -       "Attributes:\n",
    -       "    GRIB_id:       11\n",
    -       "    GRIB_name:     TMP\n",
    -       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    -       "    dataset:       NMC Reanalysis\n",
    -       "    level_desc:    Surface\n",
    -       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    -       "    parent_stat:   Other\n",
    -       "    precision:     2\n",
    -       "    statistic:     Individual Obs\n",
    -       "    units:         degK\n",
    -       "    var_desc:      Air temperature
    " + "
    <xarray.DataArray 'scalar' ()>\n",
    +       "[1 values with dtype=float64]
    " ], "text/plain": [ - "\n", - "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", - " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", - " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", - " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", - " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", - " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", - " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", - " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", - " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ], dtype=float32)\n", - "Coordinates:\n", - " lat float32 50.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " time datetime64[ns] 2013-01-01\n", - "Attributes:\n", - " GRIB_id: 11\n", - " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", - " dataset: NMC Reanalysis\n", - " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", - " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" + "\n", + "[1 values with dtype=float64]" ] }, - "execution_count": 4, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds[\"air\"].isel(time=0, lat=10)" + "ds.scalar" ] }, { @@ -1449,17 +2845,17 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 32, "id": "1b34a68a-a6b3-4273-bf7c-28814ebfce11", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "cupy._core.core.ndarray" + "cupy.ndarray" ] }, - "execution_count": 8, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -1470,17 +2866,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 33, "id": "db69559c-1fde-4b3b-914d-87d8437ec256", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "cupy._core.core.ndarray" + "cupy.ndarray" ] }, - "execution_count": 9, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1499,7 +2895,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 34, "id": "457a612e-04cb-4ffa-8cda-f4371b33bda8", "metadata": {}, "outputs": [ @@ -1767,6 +3163,11 @@ " grid-column: 4;\n", "}\n", "\n", + ".xr-index-preview {\n", + " grid-column: 2 / 5;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", ".xr-var-name,\n", ".xr-var-dims,\n", ".xr-var-dtype,\n", @@ -1788,14 +3189,16 @@ "}\n", "\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " display: none;\n", " background-color: var(--xr-background-color) !important;\n", " padding-bottom: 5px !important;\n", "}\n", "\n", ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", - ".xr-var-data-in:checked ~ .xr-var-data {\n", + ".xr-var-data-in:checked ~ .xr-var-data,\n", + ".xr-index-data-in:checked ~ .xr-index-data {\n", " display: block;\n", "}\n", "\n", @@ -1805,13 +3208,16 @@ "\n", ".xr-var-name span,\n", ".xr-var-data,\n", + ".xr-index-name div,\n", + ".xr-index-data,\n", ".xr-attrs {\n", " padding-left: 25px !important;\n", "}\n", "\n", ".xr-attrs,\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " grid-column: 1 / -1;\n", "}\n", "\n", @@ -1849,7 +3255,8 @@ "}\n", "\n", ".xr-icon-database,\n", - ".xr-icon-file-text2 {\n", + ".xr-icon-file-text2,\n", + ".xr-no-icon {\n", " display: inline-block;\n", " vertical-align: middle;\n", " width: 1em;\n", @@ -1915,7 +3322,7 @@ " precision: 2\n", " statistic: Individual Obs\n", " units: degK\n", - " var_desc: Air temperature
    • lat
      PandasIndex
      PandasIndex(Index([75.0, 72.5, 70.0, 67.5, 65.0, 62.5, 60.0, 57.5, 55.0, 52.5, 50.0, 47.5,\n",
      +       "       45.0, 42.5, 40.0, 37.5, 35.0, 32.5, 30.0, 27.5, 25.0, 22.5, 20.0, 17.5,\n",
      +       "       15.0],\n",
      +       "      dtype='float32', name='lat'))
    • lon
      PandasIndex
      PandasIndex(Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0, 222.5,\n",
      +       "       225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5, 245.0, 247.5,\n",
      +       "       250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0, 267.5, 270.0, 272.5,\n",
      +       "       275.0, 277.5, 280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0, 297.5,\n",
      +       "       300.0, 302.5, 305.0, 307.5, 310.0, 312.5, 315.0, 317.5, 320.0, 322.5,\n",
      +       "       325.0, 327.5, 330.0],\n",
      +       "      dtype='float32', name='lon'))
    • time
      PandasIndex
      PandasIndex(DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 06:00:00',\n",
      +       "               '2013-01-01 12:00:00', '2013-01-01 18:00:00',\n",
      +       "               '2013-01-02 00:00:00', '2013-01-02 06:00:00',\n",
      +       "               '2013-01-02 12:00:00', '2013-01-02 18:00:00',\n",
      +       "               '2013-01-03 00:00:00', '2013-01-03 06:00:00',\n",
      +       "               ...\n",
      +       "               '2014-12-29 12:00:00', '2014-12-29 18:00:00',\n",
      +       "               '2014-12-30 00:00:00', '2014-12-30 06:00:00',\n",
      +       "               '2014-12-30 12:00:00', '2014-12-30 18:00:00',\n",
      +       "               '2014-12-31 00:00:00', '2014-12-31 06:00:00',\n",
      +       "               '2014-12-31 12:00:00', '2014-12-31 18:00:00'],\n",
      +       "              dtype='datetime64[ns]', name='time', length=2920, freq=None))
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ "\n", @@ -2028,7 +3455,7 @@ " var_desc: Air temperature" ] }, - "execution_count": 10, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -2039,7 +3466,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 35, "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", "metadata": {}, "outputs": [ @@ -2049,7 +3476,7 @@ "numpy.ndarray" ] }, - "execution_count": 11, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -2060,17 +3487,17 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 36, "id": "140fe3e2-ea9b-445d-8401-5c624384c182", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "cupy._core.core.ndarray" + "cupy.ndarray" ] }, - "execution_count": 12, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -2081,21 +3508,16 @@ }, { "cell_type": "markdown", - "id": "1c07c449-bc43-490a-ac38-11e93200133d", + "id": "cab539a7-d952-4b38-b515-712c52c62501", "metadata": {}, "source": [ - "## GroupBy with flox\n", - "\n", - "Requires\n", - "\n", - "1. flox main branch?\n", - "2. https://github.com/ml31415/numpy-groupies/pull/63" + "## Chunk with dask" ] }, { "cell_type": "code", - "execution_count": 25, - "id": "c292cf77-c99e-40fa-8cad-d8914c346b29", + "execution_count": 37, + "id": "68f93bfe-fe56-488a-a10b-dc4f48029367", "metadata": {}, "outputs": [ { @@ -2362,6 +3784,11 @@ " grid-column: 4;\n", "}\n", "\n", + ".xr-index-preview {\n", + " grid-column: 2 / 5;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", ".xr-var-name,\n", ".xr-var-dims,\n", ".xr-var-dtype,\n", @@ -2383,14 +3810,16 @@ "}\n", "\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " display: none;\n", " background-color: var(--xr-background-color) !important;\n", " padding-bottom: 5px !important;\n", "}\n", "\n", ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", - ".xr-var-data-in:checked ~ .xr-var-data {\n", + ".xr-var-data-in:checked ~ .xr-var-data,\n", + ".xr-index-data-in:checked ~ .xr-index-data {\n", " display: block;\n", "}\n", "\n", @@ -2400,13 +3829,16 @@ "\n", ".xr-var-name span,\n", ".xr-var-data,\n", + ".xr-index-name div,\n", + ".xr-index-data,\n", ".xr-attrs {\n", " padding-left: 25px !important;\n", "}\n", "\n", ".xr-attrs,\n", ".xr-var-attrs,\n", - ".xr-var-data {\n", + ".xr-var-data,\n", + ".xr-index-data {\n", " grid-column: 1 / -1;\n", "}\n", "\n", @@ -2444,7 +3876,8 @@ "}\n", "\n", ".xr-icon-database,\n", - ".xr-icon-file-text2 {\n", + ".xr-icon-file-text2,\n", + ".xr-no-icon {\n", " display: inline-block;\n", " vertical-align: middle;\n", " width: 1em;\n", @@ -2453,164 +3886,223 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (month: 12, lat: 25, lon: 53)>\n",
    -       "array([[[246.34975, 246.38591, 246.21495, ..., 243.06096, 244.08774,\n",
    -       "         245.64653],\n",
    -       "        [248.8575 , 248.90729, 248.71028, ..., 241.52846, 243.50845,\n",
    -       "         246.7545 ],\n",
    -       "        [251.57713, 251.19649, 250.71451, ..., 243.39871, 246.78438,\n",
    -       "         251.56555],\n",
    -       "        ...,\n",
    -       "        [295.8504 , 295.24423, 295.2271 , ..., 295.1864 , 294.65726,\n",
    -       "         294.04868],\n",
    -       "        [296.54468, 296.47   , 296.16025, ..., 295.35614, 295.0814 ,\n",
    -       "         294.53015],\n",
    -       "        [297.15448, 297.23843, 297.04913, ..., 296.01816, 295.7758 ,\n",
    -       "         295.63678]],\n",
    -       "\n",
    -       "       [[246.6771 , 246.40562, 245.94829, ..., 241.85826, 243.00186,\n",
    -       "         244.44365],\n",
    -       "        [247.79994, 247.75986, 247.4774 , ..., 240.64687, 242.26611,\n",
    -       "         245.06642],\n",
    -       "        [249.07076, 248.57227, 247.94246, ..., 242.42851, 245.33334,\n",
    -       "         249.72244],\n",
    -       "...\n",
    -       "        [297.8426 , 297.14062, 296.98776, ..., 297.96893, 297.56882,\n",
    -       "         297.16125],\n",
    -       "        [298.58783, 298.4203 , 297.96912, ..., 298.16418, 298.194  ,\n",
    -       "         297.90833],\n",
    -       "        [298.81146, 298.8566 , 298.6211 , ..., 298.7296 , 298.7519 ,\n",
    -       "         298.81894]],\n",
    -       "\n",
    -       "       [[247.97087, 248.02097, 247.91281, ..., 239.77168, 241.02357,\n",
    -       "         242.62805],\n",
    -       "        [249.73338, 250.16028, 250.48564, ..., 238.78947, 240.96451,\n",
    -       "         244.11601],\n",
    -       "        [252.02939, 251.53108, 251.36617, ..., 238.07532, 241.91273,\n",
    -       "         247.06967],\n",
    -       "        ...,\n",
    -       "        [296.76517, 295.97668, 295.88925, ..., 296.456  , 296.09137,\n",
    -       "         295.65768],\n",
    -       "        [297.46817, 297.38034, 297.0443 , ..., 296.85565, 296.84668,\n",
    -       "         296.52142],\n",
    -       "        [297.88092, 297.98676, 297.7755 , ..., 297.60034, 297.5654 ,\n",
    -       "         297.53772]]], dtype=float32)\n",
    +       "
    <xarray.Dataset>\n",
    +       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
            "Coordinates:\n",
            "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
            "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * month    (month) int64 1 2 3 4 5 6 7 8 9 10 11 12
  • time
    (time)
    datetime64[ns]
    2013-01-01 ... 2014-12-31T18:00:00
    long_name :
    Time
    standard_name :
    time
    array(['2013-01-01T00:00:00.000000000', '2013-01-01T06:00:00.000000000',\n",
    +       "       '2013-01-01T12:00:00.000000000', ..., '2014-12-31T06:00:00.000000000',\n",
    +       "       '2014-12-31T12:00:00.000000000', '2014-12-31T18:00:00.000000000'],\n",
    +       "      dtype='datetime64[ns]')
    • air
      (time, lat, lon)
      float32
      dask.array<chunksize=(10, 25, 53), meta=np.ndarray>
      GRIB_id :
      11
      GRIB_name :
      TMP
      actual_range :
      [185.16000366210938, 322.1000061035156]
      dataset :
      NMC Reanalysis
      level_desc :
      Surface
      long_name :
      4xDaily Air temperature at sigma level 995
      parent_stat :
      Other
      precision :
      2
      statistic :
      Individual Obs
      units :
      degK
      var_desc :
      Air temperature
      \n", + " \n", + " \n", + " \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      Array Chunk
      Bytes 14.76 MiB 51.76 kiB
      Shape (2920, 25, 53) (10, 25, 53)
      Dask graph 292 chunks in 2 graph layers
      Data type float32 numpy.ndarray
      \n", + "
      \n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + " 53\n", + " 25\n", + " 2920\n", + "\n", + "
    • scalar
      ()
      float64
      ...
      [1 values with dtype=float64]
    • lat
      PandasIndex
      PandasIndex(Index([75.0, 72.5, 70.0, 67.5, 65.0, 62.5, 60.0, 57.5, 55.0, 52.5, 50.0, 47.5,\n",
      +       "       45.0, 42.5, 40.0, 37.5, 35.0, 32.5, 30.0, 27.5, 25.0, 22.5, 20.0, 17.5,\n",
      +       "       15.0],\n",
      +       "      dtype='float32', name='lat'))
    • lon
      PandasIndex
      PandasIndex(Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0, 222.5,\n",
      +       "       225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5, 245.0, 247.5,\n",
      +       "       250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0, 267.5, 270.0, 272.5,\n",
      +       "       275.0, 277.5, 280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0, 297.5,\n",
      +       "       300.0, 302.5, 305.0, 307.5, 310.0, 312.5, 315.0, 317.5, 320.0, 322.5,\n",
      +       "       325.0, 327.5, 330.0],\n",
      +       "      dtype='float32', name='lon'))
    • time
      PandasIndex
      PandasIndex(DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 06:00:00',\n",
      +       "               '2013-01-01 12:00:00', '2013-01-01 18:00:00',\n",
      +       "               '2013-01-02 00:00:00', '2013-01-02 06:00:00',\n",
      +       "               '2013-01-02 12:00:00', '2013-01-02 18:00:00',\n",
      +       "               '2013-01-03 00:00:00', '2013-01-03 06:00:00',\n",
      +       "               ...\n",
      +       "               '2014-12-29 12:00:00', '2014-12-29 18:00:00',\n",
      +       "               '2014-12-30 00:00:00', '2014-12-30 06:00:00',\n",
      +       "               '2014-12-30 12:00:00', '2014-12-30 18:00:00',\n",
      +       "               '2014-12-31 00:00:00', '2014-12-31 06:00:00',\n",
      +       "               '2014-12-31 12:00:00', '2014-12-31 18:00:00'],\n",
      +       "              dtype='datetime64[ns]', name='time', length=2920, freq=None))
  • Conventions :
    COARDS
    description :
    Data is from NMC initialized reanalysis\n", + "(4x/day). These are the 0.9950 sigma level values.
    platform :
    Model
    references :
    http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.html
    title :
    4x daily NMC reanalysis (1948)
  • " ], "text/plain": [ - "\n", - "array([[[246.34975, 246.38591, 246.21495, ..., 243.06096, 244.08774,\n", - " 245.64653],\n", - " [248.8575 , 248.90729, 248.71028, ..., 241.52846, 243.50845,\n", - " 246.7545 ],\n", - " [251.57713, 251.19649, 250.71451, ..., 243.39871, 246.78438,\n", - " 251.56555],\n", - " ...,\n", - " [295.8504 , 295.24423, 295.2271 , ..., 295.1864 , 294.65726,\n", - " 294.04868],\n", - " [296.54468, 296.47 , 296.16025, ..., 295.35614, 295.0814 ,\n", - " 294.53015],\n", - " [297.15448, 297.23843, 297.04913, ..., 296.01816, 295.7758 ,\n", - " 295.63678]],\n", - "\n", - " [[246.6771 , 246.40562, 245.94829, ..., 241.85826, 243.00186,\n", - " 244.44365],\n", - " [247.79994, 247.75986, 247.4774 , ..., 240.64687, 242.26611,\n", - " 245.06642],\n", - " [249.07076, 248.57227, 247.94246, ..., 242.42851, 245.33334,\n", - " 249.72244],\n", - "...\n", - " [297.8426 , 297.14062, 296.98776, ..., 297.96893, 297.56882,\n", - " 297.16125],\n", - " [298.58783, 298.4203 , 297.96912, ..., 298.16418, 298.194 ,\n", - " 297.90833],\n", - " [298.81146, 298.8566 , 298.6211 , ..., 298.7296 , 298.7519 ,\n", - " 298.81894]],\n", - "\n", - " [[247.97087, 248.02097, 247.91281, ..., 239.77168, 241.02357,\n", - " 242.62805],\n", - " [249.73338, 250.16028, 250.48564, ..., 238.78947, 240.96451,\n", - " 244.11601],\n", - " [252.02939, 251.53108, 251.36617, ..., 238.07532, 241.91273,\n", - " 247.06967],\n", - " ...,\n", - " [296.76517, 295.97668, 295.88925, ..., 296.456 , 296.09137,\n", - " 295.65768],\n", - " [297.46817, 297.38034, 297.0443 , ..., 296.85565, 296.84668,\n", - " 296.52142],\n", - " [297.88092, 297.98676, 297.7755 , ..., 297.60034, 297.5654 ,\n", - " 297.53772]]], dtype=float32)\n", + "\n", + "Dimensions: (time: 2920, lat: 25, lon: 53)\n", "Coordinates:\n", " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * month (month) int64 1 2 3 4 5 6 7 8 9 10 11 12" + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Data variables:\n", + " air (time, lat, lon) float32 dask.array\n", + " scalar float64 ...\n", + "Attributes:\n", + " Conventions: COARDS\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", + " title: 4x daily NMC reanalysis (1948)" ] }, - "execution_count": 25, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.air.groupby(\"time.month\").mean(engine=\"numpy\")" + "ds.chunk(time=10)" + ] + }, + { + "cell_type": "markdown", + "id": "1c07c449-bc43-490a-ac38-11e93200133d", + "metadata": {}, + "source": [ + "## GroupBy with flox\n", + "\n", + "Requires\n", + "\n", + "1. flox main branch?\n", + "2. https://github.com/ml31415/numpy-groupies/pull/63" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c292cf77-c99e-40fa-8cad-d8914c346b29", + "metadata": {}, + "outputs": [], + "source": [ + "ds.air.groupby(\"time.month\").mean()" ] } ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:miniconda3-kvikio_nightly]", + "display_name": "miniconda3-kvikio_env", "language": "python", - "name": "conda-env-miniconda3-kvikio_nightly-py" + "name": "conda-env-miniconda3-kvikio_env-py" }, "language_info": { "codemirror_mode": { @@ -2622,7 +4114,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.16" }, "widgets": { "application/vnd.jupyter.widget-state+json": { From 87cb74eaad7aff14d6ec74a0962d2143967875f6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 22 Aug 2023 12:09:57 -0600 Subject: [PATCH 08/20] Updated notebook --- docs/kvikio.ipynb | 1648 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 1275 insertions(+), 373 deletions(-) diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb index 4867878..09c5394 100644 --- a/docs/kvikio.ipynb +++ b/docs/kvikio.ipynb @@ -5,7 +5,11 @@ "id": "5920bb97-1d76-4363-9aee-d1c5cd395409", "metadata": {}, "source": [ - "# Kvikio demo" + "# Kvikio demo\n", + "\n", + "Requires\n", + "- [ ] https://github.com/pydata/xarray/pull/8100\n", + "- [ ] Some updates to `dask.array.core.getter`" ] }, { @@ -20,22 +24,25 @@ "name": "stdout", "output_type": "stream", "text": [ - "flox : 0.7.3.dev12+g796dcd2\n", - "json : 2.0.9\n", - "xarray : 2023.7.0\n", - "cupy_xarray : 0.1.1+21.gd2da1e4.dirty\n", + "Exception reporting mode: Minimal\n", "kvikio : 23.2.0\n", - "zarr : 2.16.0\n", + "xarray : 2022.6.1.dev458+g83c2919b2\n", + "numpy_groupies: 0.9.22+2.gd148074\n", + "json : 2.0.9\n", "numpy : 1.24.4\n", + "flox : 0.7.3.dev12+g796dcd2\n", + "zarr : 2.16.1\n", + "dask : 2023.8.1\n", + "cupy_xarray : 0.1.1+21.gd2da1e4.dirty\n", "sys : 3.9.17 | packaged by conda-forge | (main, Aug 10 2023, 07:02:31) \n", "[GCC 12.3.0]\n", - "numpy_groupies: 0.9.22+2.gd148074\n", "\n" ] } ], "source": [ "%load_ext watermark\n", + "%xmode minimal\n", "\n", "# These imports are currently unnecessary. I import them to show versions\n", "# cupy_xarray registers the kvikio entrypoint on install.\n", @@ -50,6 +57,9 @@ "import xarray as xr\n", "import zarr\n", "\n", + "import dask\n", + "dask.config.set(scheduler=\"sync\")\n", + "\n", "store = \"./air-temperature.zarr\"\n", "\n", "%watermark -iv" @@ -86,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 3, "id": "81b2e5cb-4b2d-4a31-b7a0-961aadbc321d", "metadata": {}, "outputs": [ @@ -94,7 +104,43 @@ "name": "stdout", "output_type": "stream", "text": [ - "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" + "> \u001b[0;32m/glade/u/home/dcherian/python/xarray/xarray/core/indexing.py\u001b[0m(485)\u001b[0;36m__array__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m 484 \u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m--> 485 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_duck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m 486 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\n" + ] + }, + { + "name": "stdin", + "output_type": "stream", + "text": [ + "ipdb> c\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> \u001b[0;32m/glade/u/home/dcherian/python/xarray/xarray/core/indexing.py\u001b[0m(485)\u001b[0;36m__array__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m 484 \u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m--> 485 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_duck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m 486 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\n" + ] + }, + { + "name": "stdin", + "output_type": "stream", + "text": [ + "ipdb> c\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" ] }, { @@ -477,26 +523,26 @@ " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", " platform: Model\n", " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", - " title: 4x daily NMC reanalysis (1948)" ], "text/plain": [ @@ -528,7 +574,7 @@ " title: 4x daily NMC reanalysis (1948)" ] }, - "execution_count": 23, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -574,7 +620,10 @@ { "cell_type": "markdown", "id": "883d5507-988f-453a-b576-87bb563b540f", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ "## Test opening\n", "\n", @@ -583,7 +632,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 4, "id": "58063142-b69b-46a5-9e4d-a83944e57857", "metadata": {}, "outputs": [ @@ -970,26 +1019,26 @@ " precision: 2\n", " statistic: Individual Obs\n", " units: degK\n", - " var_desc: Air temperature
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ "\n", @@ -1023,7 +1072,7 @@ " var_desc: Air temperature" ] }, - "execution_count": 24, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -1045,7 +1094,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 5, "id": "8fd27bdf-e317-4de3-891e-41d38d06dcaf", "metadata": {}, "outputs": [ @@ -1053,7 +1102,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None))))))\n" ] }, { @@ -1436,26 +1485,26 @@ " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", " platform: Model\n", " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", - " title: 4x daily NMC reanalysis (1948)" ], "text/plain": [ @@ -1487,7 +1536,7 @@ " title: 4x daily NMC reanalysis (1948)" ] }, - "execution_count": 25, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -1501,38 +1550,9 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 6, "id": "6c939a04-1588-4693-9483-c6ad7152951a", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=, key=BasicIndexer(()))))" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds.scalar.variable._data" - ] - }, - { - "cell_type": "markdown", - "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", - "metadata": {}, - "source": [ - "## Lazy reading" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "1ecc39b1-b788-4831-9160-5b35afb83598", - "metadata": {}, "outputs": [ { "data": { @@ -1900,97 +1920,38 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    -       "[3869000 values with dtype=float32]\n",
    -       "Coordinates:\n",
    -       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    -       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    -       "Attributes:\n",
    -       "    GRIB_id:       11\n",
    -       "    GRIB_name:     TMP\n",
    -       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    -       "    dataset:       NMC Reanalysis\n",
    -       "    level_desc:    Surface\n",
    -       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    -       "    parent_stat:   Other\n",
    -       "    precision:     2\n",
    -       "    statistic:     Individual Obs\n",
    -       "    units:         degK\n",
    -       "    var_desc:      Air temperature
    " + "
    <xarray.DataArray 'scalar' ()>\n",
    +       "[1 values with dtype=float64]
    " ], "text/plain": [ - "\n", - "[3869000 values with dtype=float32]\n", - "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", - "Attributes:\n", - " GRIB_id: 11\n", - " GRIB_name: TMP\n", - " actual_range: [185.16000366210938, 322.1000061035156]\n", - " dataset: NMC Reanalysis\n", - " level_desc: Surface\n", - " long_name: 4xDaily Air temperature at sigma level 995\n", - " parent_stat: Other\n", - " precision: 2\n", - " statistic: Individual Obs\n", - " units: degK\n", - " var_desc: Air temperature" + "\n", + "[1 values with dtype=float64]" ] }, - "execution_count": 27, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.air" + "ds.scalar" ] }, { "cell_type": "markdown", - "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", - "metadata": {}, + "id": "bb84a7ad-84dc-4bb3-8636-3f9416953089", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ - "## Data load for repr" + "## Lazy reading" ] }, { "cell_type": "code", - "execution_count": 31, - "id": "00205e73-9b43-4254-9cba-f75435251391", + "execution_count": 7, + "id": "1ecc39b1-b788-4831-9160-5b35afb83598", "metadata": {}, "outputs": [ { @@ -2359,21 +2320,12 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'air' (lon: 53)>\n",
    -       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
    -       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
    -       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
    -       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
    -       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
    -       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
    -       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
    -       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
    -       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ],\n",
    -       "      dtype=float32)\n",
    +       "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    +       "[3869000 values with dtype=float32]\n",
            "Coordinates:\n",
    -       "    lat      float32 50.0\n",
    +       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
            "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    -       "    time     datetime64[ns] 2013-01-01\n",
    +       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
            "Attributes:\n",
            "    GRIB_id:       11\n",
            "    GRIB_name:     TMP\n",
    @@ -2385,44 +2337,45 @@
            "    precision:     2\n",
            "    statistic:     Individual Obs\n",
            "    units:         degK\n",
    -       "    var_desc:      Air temperature
  • time
    PandasIndex
    PandasIndex(DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 06:00:00',\n",
    +       "               '2013-01-01 12:00:00', '2013-01-01 18:00:00',\n",
    +       "               '2013-01-02 00:00:00', '2013-01-02 06:00:00',\n",
    +       "               '2013-01-02 12:00:00', '2013-01-02 18:00:00',\n",
    +       "               '2013-01-03 00:00:00', '2013-01-03 06:00:00',\n",
    +       "               ...\n",
    +       "               '2014-12-29 12:00:00', '2014-12-29 18:00:00',\n",
    +       "               '2014-12-30 00:00:00', '2014-12-30 06:00:00',\n",
    +       "               '2014-12-30 12:00:00', '2014-12-30 18:00:00',\n",
    +       "               '2014-12-31 00:00:00', '2014-12-31 06:00:00',\n",
    +       "               '2014-12-31 12:00:00', '2014-12-31 18:00:00'],\n",
    +       "              dtype='datetime64[ns]', name='time', length=2920, freq=None))
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ - "\n", - "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", - " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", - " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", - " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", - " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", - " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", - " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", - " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", - " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ],\n", - " dtype=float32)\n", + "\n", + "[3869000 values with dtype=float32]\n", "Coordinates:\n", - " lat float32 50.0\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " time datetime64[ns] 2013-01-01\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", "Attributes:\n", " GRIB_id: 11\n", " GRIB_name: TMP\n", @@ -2437,19 +2390,30 @@ " var_desc: Air temperature" ] }, - "execution_count": 31, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds[\"air\"].isel(time=0, lat=10).load()" + "ds.air" + ] + }, + { + "cell_type": "markdown", + "id": "7d366864-a2b3-4573-9bf7-41d1f6ee457c", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## Data load for repr" ] }, { "cell_type": "code", - "execution_count": 29, - "id": "80aa6892-8c7f-44b3-bd52-9795ec4ea6f3", + "execution_count": 8, + "id": "00205e73-9b43-4254-9cba-f75435251391", "metadata": {}, "outputs": [ { @@ -2818,55 +2782,968 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.DataArray 'scalar' ()>\n",
    -       "[1 values with dtype=float64]
    " + "
    <xarray.DataArray 'air' (lon: 53)>\n",
    +       "array([277.29   , 277.4    , 277.79   , 278.6    , 279.5    , 280.1    ,\n",
    +       "       280.6    , 280.9    , 280.79   , 280.69998, 280.79   , 281.     ,\n",
    +       "       280.29   , 277.69998, 273.5    , 269.     , 265.5    , 264.     ,\n",
    +       "       265.19998, 268.1    , 269.79   , 267.9    , 263.     , 258.1    ,\n",
    +       "       254.59999, 251.79999, 249.59999, 249.89   , 252.29999, 254.     ,\n",
    +       "       254.29999, 255.89   , 260.     , 263.     , 261.5    , 257.29   ,\n",
    +       "       255.5    , 258.29   , 264.     , 268.69998, 270.5    , 270.6    ,\n",
    +       "       271.19998, 272.9    , 274.79   , 276.4    , 278.19998, 280.5    ,\n",
    +       "       282.9    , 284.69998, 286.1    , 286.9    , 286.6    ],\n",
    +       "      dtype=float32)\n",
    +       "Coordinates:\n",
    +       "    lat      float32 50.0\n",
    +       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    +       "    time     datetime64[ns] 2013-01-01\n",
    +       "Attributes:\n",
    +       "    GRIB_id:       11\n",
    +       "    GRIB_name:     TMP\n",
    +       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    +       "    dataset:       NMC Reanalysis\n",
    +       "    level_desc:    Surface\n",
    +       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    +       "    parent_stat:   Other\n",
    +       "    precision:     2\n",
    +       "    statistic:     Individual Obs\n",
    +       "    units:         degK\n",
    +       "    var_desc:      Air temperature
    " ], "text/plain": [ - "\n", - "[1 values with dtype=float64]" + "\n", + "array([277.29 , 277.4 , 277.79 , 278.6 , 279.5 , 280.1 ,\n", + " 280.6 , 280.9 , 280.79 , 280.69998, 280.79 , 281. ,\n", + " 280.29 , 277.69998, 273.5 , 269. , 265.5 , 264. ,\n", + " 265.19998, 268.1 , 269.79 , 267.9 , 263. , 258.1 ,\n", + " 254.59999, 251.79999, 249.59999, 249.89 , 252.29999, 254. ,\n", + " 254.29999, 255.89 , 260. , 263. , 261.5 , 257.29 ,\n", + " 255.5 , 258.29 , 264. , 268.69998, 270.5 , 270.6 ,\n", + " 271.19998, 272.9 , 274.79 , 276.4 , 278.19998, 280.5 ,\n", + " 282.9 , 284.69998, 286.1 , 286.9 , 286.6 ],\n", + " dtype=float32)\n", + "Coordinates:\n", + " lat float32 50.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " time datetime64[ns] 2013-01-01\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds[\"air\"].isel(time=0, lat=10).load()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "80aa6892-8c7f-44b3-bd52-9795ec4ea6f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.DataArray 'scalar' ()>\n",
    +       "[1 values with dtype=float64]
    " + ], + "text/plain": [ + "\n", + "[1 values with dtype=float64]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.scalar" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ba48a2c0-96e0-41d7-9e07-381e05e8dc33", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    +       "[3869000 values with dtype=float32]\n",
    +       "Coordinates:\n",
    +       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
    +       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    +       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    +       "Attributes:\n",
    +       "    GRIB_id:       11\n",
    +       "    GRIB_name:     TMP\n",
    +       "    actual_range:  [185.16000366210938, 322.1000061035156]\n",
    +       "    dataset:       NMC Reanalysis\n",
    +       "    level_desc:    Surface\n",
    +       "    long_name:     4xDaily Air temperature at sigma level 995\n",
    +       "    parent_stat:   Other\n",
    +       "    precision:     2\n",
    +       "    statistic:     Individual Obs\n",
    +       "    units:         degK\n",
    +       "    var_desc:      Air temperature
    " + ], + "text/plain": [ + "\n", + "[3869000 values with dtype=float32]\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Attributes:\n", + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" ] }, - "execution_count": 29, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.scalar" + "ds.air" ] }, { "cell_type": "markdown", "id": "d0ea31d2-6c52-4346-b489-fc1e43200213", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ "## CuPy array on load" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 11, "id": "1b34a68a-a6b3-4273-bf7c-28814ebfce11", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "cupy.ndarray" + "MemoryCachedArray(array=CopyOnWriteArray(array=LazilyIndexedArray(array=_ElementwiseFunctionArray(LazilyIndexedArray(array=, key=BasicIndexer((slice(None, None, None), slice(None, None, None), slice(None, None, None)))), func=functools.partial(, scale_factor=0.01, add_offset=None, dtype=), dtype=dtype('float32')), key=BasicIndexer((0, 10, slice(None, None, None))))))" ] }, - "execution_count": 32, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "type(ds[\"air\"].isel(time=0, lat=10).data)" + "ds[\"air\"].isel(time=0, lat=10).variable._data" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 12, "id": "db69559c-1fde-4b3b-914d-87d8437ec256", "metadata": {}, "outputs": [ @@ -2876,7 +3753,7 @@ "cupy.ndarray" ] }, - "execution_count": 33, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -2888,15 +3765,18 @@ { "cell_type": "markdown", "id": "d34a5cce-7bbc-408f-b643-05da1e121c78", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ "## Load to host" ] }, { "cell_type": "code", - "execution_count": 34, - "id": "457a612e-04cb-4ffa-8cda-f4371b33bda8", + "execution_count": 13, + "id": "09b40d7d-ed38-4a50-af11-c2e5f0242a97", "metadata": {}, "outputs": [ { @@ -3266,47 +4146,7 @@ " fill: currentColor;\n", "}\n", "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    -       "array([[[241.2    , 242.5    , 243.5    , ..., 232.79999, 235.5    ,\n",
    -       "         238.59999],\n",
    -       "        [243.79999, 244.5    , 244.7    , ..., 232.79999, 235.29999,\n",
    -       "         239.29999],\n",
    -       "        [250.     , 249.79999, 248.89   , ..., 233.2    , 236.39   ,\n",
    -       "         241.7    ],\n",
    -       "        ...,\n",
    -       "        [296.6    , 296.19998, 296.4    , ..., 295.4    , 295.1    ,\n",
    -       "         294.69998],\n",
    -       "        [295.9    , 296.19998, 296.79   , ..., 295.9    , 295.9    ,\n",
    -       "         295.19998],\n",
    -       "        [296.29   , 296.79   , 297.1    , ..., 296.9    , 296.79   ,\n",
    -       "         296.6    ]],\n",
    -       "\n",
    -       "       [[242.09999, 242.7    , 243.09999, ..., 232.     , 233.59999,\n",
    -       "         235.79999],\n",
    -       "        [243.59999, 244.09999, 244.2    , ..., 231.     , 232.5    ,\n",
    -       "         235.7    ],\n",
    -       "        [253.2    , 252.89   , 252.09999, ..., 230.79999, 233.39   ,\n",
    -       "         238.5    ],\n",
    -       "...\n",
    -       "        [293.69   , 293.88998, 295.38998, ..., 295.09   , 294.69   ,\n",
    -       "         294.29   ],\n",
    -       "        [296.29   , 297.19   , 297.59   , ..., 295.29   , 295.09   ,\n",
    -       "         294.38998],\n",
    -       "        [297.79   , 298.38998, 298.49   , ..., 295.69   , 295.49   ,\n",
    -       "         295.19   ]],\n",
    -       "\n",
    -       "       [[245.09   , 244.29   , 243.29   , ..., 241.68999, 241.48999,\n",
    -       "         241.79   ],\n",
    -       "        [249.89   , 249.29   , 248.39   , ..., 239.59   , 240.29   ,\n",
    -       "         241.68999],\n",
    -       "        [262.99   , 262.19   , 261.38998, ..., 239.89   , 242.59   ,\n",
    -       "         246.29   ],\n",
    -       "        ...,\n",
    -       "        [293.79   , 293.69   , 295.09   , ..., 295.29   , 295.09   ,\n",
    -       "         294.69   ],\n",
    -       "        [296.09   , 296.88998, 297.19   , ..., 295.69   , 295.69   ,\n",
    -       "         295.19   ],\n",
    -       "        [297.69   , 298.09   , 298.09   , ..., 296.49   , 296.19   ,\n",
    -       "         295.69   ]]], dtype=float32)\n",
    +       "[3869000 values with dtype=float32]\n",
            "Coordinates:\n",
            "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
            "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
    @@ -3322,66 +4162,26 @@
            "    precision:     2\n",
            "    statistic:     Individual Obs\n",
            "    units:         degK\n",
    -       "    var_desc:      Air temperature
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ "\n", - "array([[[241.2 , 242.5 , 243.5 , ..., 232.79999, 235.5 ,\n", - " 238.59999],\n", - " [243.79999, 244.5 , 244.7 , ..., 232.79999, 235.29999,\n", - " 239.29999],\n", - " [250. , 249.79999, 248.89 , ..., 233.2 , 236.39 ,\n", - " 241.7 ],\n", - " ...,\n", - " [296.6 , 296.19998, 296.4 , ..., 295.4 , 295.1 ,\n", - " 294.69998],\n", - " [295.9 , 296.19998, 296.79 , ..., 295.9 , 295.9 ,\n", - " 295.19998],\n", - " [296.29 , 296.79 , 297.1 , ..., 296.9 , 296.79 ,\n", - " 296.6 ]],\n", - "\n", - " [[242.09999, 242.7 , 243.09999, ..., 232. , 233.59999,\n", - " 235.79999],\n", - " [243.59999, 244.09999, 244.2 , ..., 231. , 232.5 ,\n", - " 235.7 ],\n", - " [253.2 , 252.89 , 252.09999, ..., 230.79999, 233.39 ,\n", - " 238.5 ],\n", - "...\n", - " [293.69 , 293.88998, 295.38998, ..., 295.09 , 294.69 ,\n", - " 294.29 ],\n", - " [296.29 , 297.19 , 297.59 , ..., 295.29 , 295.09 ,\n", - " 294.38998],\n", - " [297.79 , 298.38998, 298.49 , ..., 295.69 , 295.49 ,\n", - " 295.19 ]],\n", - "\n", - " [[245.09 , 244.29 , 243.29 , ..., 241.68999, 241.48999,\n", - " 241.79 ],\n", - " [249.89 , 249.29 , 248.39 , ..., 239.59 , 240.29 ,\n", - " 241.68999],\n", - " [262.99 , 262.19 , 261.38998, ..., 239.89 , 242.59 ,\n", - " 246.29 ],\n", - " ...,\n", - " [293.79 , 293.69 , 295.09 , ..., 295.29 , 295.09 ,\n", - " 294.69 ],\n", - " [296.09 , 296.88998, 297.19 , ..., 295.69 , 295.69 ,\n", - " 295.19 ],\n", - " [297.69 , 298.09 , 298.09 , ..., 296.49 , 296.19 ,\n", - " 295.69 ]]], dtype=float32)\n", + "[3869000 values with dtype=float32]\n", "Coordinates:\n", " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", @@ -3455,18 +4215,18 @@ " var_desc: Air temperature" ] }, - "execution_count": 34, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.air.as_numpy()" + "ds.air" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 14, "id": "eeb9ad78-1353-464f-8419-4c44ea499f17", "metadata": {}, "outputs": [ @@ -3476,7 +4236,7 @@ "numpy.ndarray" ] }, - "execution_count": 35, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -3487,7 +4247,28 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 15, + "id": "615efd76-2194-4604-9ab8-61499e7d725d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cupy.ndarray" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(ds.air.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, "id": "140fe3e2-ea9b-445d-8401-5c624384c182", "metadata": {}, "outputs": [ @@ -3497,7 +4278,7 @@ "cupy.ndarray" ] }, - "execution_count": 36, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -3509,14 +4290,24 @@ { "cell_type": "markdown", "id": "cab539a7-d952-4b38-b515-712c52c62501", + "metadata": { + "tags": [] + }, + "source": [ + "## Doesn't work: Chunk with dask" + ] + }, + { + "cell_type": "markdown", + "id": "62c084eb-8df4-4b7f-a187-a736d68d430d", "metadata": {}, "source": [ - "## Chunk with dask" + "`meta` is wrong" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 4, "id": "68f93bfe-fe56-488a-a10b-dc4f48029367", "metadata": {}, "outputs": [ @@ -3886,31 +4677,24 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
    <xarray.Dataset>\n",
    -       "Dimensions:  (time: 2920, lat: 25, lon: 53)\n",
    +       "
    <xarray.DataArray 'air' (time: 2920, lat: 25, lon: 53)>\n",
    +       "dask.array<xarray-air, shape=(2920, 25, 53), dtype=float32, chunksize=(10, 25, 53), chunktype=numpy.ndarray>\n",
            "Coordinates:\n",
            "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
            "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
            "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
    -       "Data variables:\n",
    -       "    air      (time, lat, lon) float32 dask.array<chunksize=(10, 25, 53), meta=np.ndarray>\n",
    -       "    scalar   float64 ...\n",
            "Attributes:\n",
    -       "    Conventions:  COARDS\n",
    -       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
    -       "    platform:     Model\n",
    -       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n",
    -       "    title:        4x daily NMC reanalysis (1948)
    " + " dtype='datetime64[ns]', name='time', length=2920, freq=None))
  • GRIB_id :
    11
    GRIB_name :
    TMP
    actual_range :
    [185.16000366210938, 322.1000061035156]
    dataset :
    NMC Reanalysis
    level_desc :
    Surface
    long_name :
    4xDaily Air temperature at sigma level 995
    parent_stat :
    Other
    precision :
    2
    statistic :
    Individual Obs
    units :
    degK
    var_desc :
    Air temperature
  • " ], "text/plain": [ - "\n", - "Dimensions: (time: 2920, lat: 25, lon: 53)\n", + "\n", + "dask.array\n", "Coordinates:\n", " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", - "Data variables:\n", - " air (time, lat, lon) float32 dask.array\n", - " scalar float64 ...\n", "Attributes:\n", - " Conventions: COARDS\n", - " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", - " platform: Model\n", - " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...\n", - " title: 4x daily NMC reanalysis (1948)" + " GRIB_id: 11\n", + " GRIB_name: TMP\n", + " actual_range: [185.16000366210938, 322.1000061035156]\n", + " dataset: NMC Reanalysis\n", + " level_desc: Surface\n", + " long_name: 4xDaily Air temperature at sigma level 995\n", + " parent_stat: Other\n", + " precision: 2\n", + " statistic: Individual Obs\n", + " units: degK\n", + " var_desc: Air temperature" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.chunk(time=10).air" + ] + }, + { + "cell_type": "markdown", + "id": "3f4c72f6-22e7-4e99-9f4e-2524d6ab4226", + "metadata": {}, + "source": [ + "`dask.array.core.getter` calls `np.asarray` on each chunk.\n", + "\n", + "This calls `ImplicitToExplicitIndexingAdapter.__array__` which calls `np.asarray(cupy.array)` which raises.\n", + "\n", + "Xarray uses `.get_duck_array` internally to remove these adapters. We might need to add\n", + "```python\n", + "# handle xarray internal classes that might wrap cupy\n", + "if hasattr(c, \"get_duck_array\"):\n", + " c = c.get_duck_array()\n", + "else:\n", + " c = np.asarray(c)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e1256d03-9701-433a-8291-80dc8dccffce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" ] }, - "execution_count": 37, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds.chunk(time=10)" + "from dask.utils import is_arraylike\n", + "\n", + "data = ds.air.variable._data\n", + "is_arraylike(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "308affa5-9fb9-4638-989b-97aac2604c16", + "metadata": {}, + "outputs": [], + "source": [ + "from xarray.core.indexing import ImplicitToExplicitIndexingAdapter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "985cd2f8-406e-4e9e-8017-42efb16aa40e", + "metadata": {}, + "outputs": [], + "source": [ + "ImplicitToExplicitIndexingAdapter(data).get_duck_array()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8ef4f7-5014-476f-b4c0-ec2f9abdb6e2", + "metadata": {}, + "outputs": [], + "source": [ + "ds.chunk(time=10).air.compute()" + ] + }, + { + "cell_type": "markdown", + "id": "17dc1bf6-7548-4eee-a5f3-ebcc20d41567", + "metadata": {}, + "source": [ + "### explicit meta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cdd4b4e6-d69a-4898-964a-0e6096ca1942", + "metadata": {}, + "outputs": [], + "source": [ + "import cupy as cp\n", + "\n", + "chunked = ds.chunk(time=10, from_array_kwargs={\"meta\": cp.array([])})\n", + "chunked.air" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74f80d94-ebb6-43c3-9411-79e0442d894e", + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "\n", + "chunked.compute()" ] }, { @@ -4114,7 +5016,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.17" }, "widgets": { "application/vnd.jupyter.widget-state+json": { From d7394ef7bdc941df7e418918ef2aabb7fea5839e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 22 Aug 2023 18:10:33 +0000 Subject: [PATCH 09/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/kvikio.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/kvikio.ipynb b/docs/kvikio.ipynb index 09c5394..6117eb2 100644 --- a/docs/kvikio.ipynb +++ b/docs/kvikio.ipynb @@ -58,6 +58,7 @@ "import zarr\n", "\n", "import dask\n", + "\n", "dask.config.set(scheduler=\"sync\")\n", "\n", "store = \"./air-temperature.zarr\"\n", From ca0cf45d1c2cca5820b0c1ee3f6813bd1519ff10 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 2 Nov 2023 21:49:07 -0600 Subject: [PATCH 10/20] Add tests --- cupy_xarray/kvikio.py | 12 ++++--- cupy_xarray/tests/test_kvikio.py | 54 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 cupy_xarray/tests/test_kvikio.py diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 669978f..410dee4 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -20,6 +20,10 @@ has_kvikio = False +# TODO: minimum kvikio version for supporting consolidated +# TODO: minimum xarray version for ZarrArrayWrapper._array 2023.10.0? + + class DummyZarrArrayWrapper(ZarrArrayWrapper): def __init__(self, array: np.ndarray): assert isinstance(array, np.ndarray) @@ -47,11 +51,12 @@ class EagerCupyZarrArrayWrapper(ZarrArrayWrapper): """Used to wrap dimension coordinates.""" def __array__(self): - return self.datastore.zarr_group[self.variable_name][:].get() + return self._array[:].get() def get_array(self): # total hack: make a numpy array look like a Zarr array - return DummyZarrArrayWrapper(self.datastore.zarr_group[self.variable_name][:].get()) + # this gets us through Xarray's backend layers + return DummyZarrArrayWrapper(self._array[:].get()) class GDSZarrStore(ZarrStore): @@ -84,9 +89,6 @@ def open_group( ) open_kwargs["storage_options"] = storage_options - # TODO: handle consolidated - assert not consolidated - if chunk_store: open_kwargs["chunk_store"] = chunk_store if consolidated is None: diff --git a/cupy_xarray/tests/test_kvikio.py b/cupy_xarray/tests/test_kvikio.py new file mode 100644 index 0000000..de98c09 --- /dev/null +++ b/cupy_xarray/tests/test_kvikio.py @@ -0,0 +1,54 @@ +import cupy as cp +import numpy as np +import pytest +import xarray as xr + +kvikio = pytest.importorskip("kvikio") +zarr = pytest.importorskip("zarr") + +import kvikio.zarr # noqa +import xarray.core.indexing # noqa +from xarray.core.indexing import ExplicitlyIndexedNDArrayMixin + + +@pytest.fixture +def store(tmp_path): + ds = xr.Dataset( + { + "a": ("x", np.arange(10), {"foo": "bar"}), + "scalar": np.array(1), + }, + coords={"x": ("x", np.arange(-5, 5))}, + ) + + for var in ds.variables: + ds[var].encoding["compressor"] = None + + store_path = tmp_path / "kvikio.zarr" + ds.to_zarr(store_path, consolidated=True) + return store_path + + +def test_entrypoint(): + assert "kvikio" in xr.backends.list_engines() + + +@pytest.mark.parametrize("consolidated", [True, False]) +def test_lazy_load(consolidated, store): + with xr.open_dataset(store, engine="kvikio", consolidated=consolidated) as ds: + for _, da in ds.data_vars.items(): + assert isinstance(da.variable._data, ExplicitlyIndexedNDArrayMixin) + + +@pytest.mark.parametrize("indexer", [slice(None), slice(2, 4), 2, [2, 3, 5]]) +def test_lazy_indexing(indexer, store): + with xr.open_dataset(store, engine="kvikio") as ds: + ds = ds.isel(x=indexer) + for _, da in ds.data_vars.items(): + assert isinstance(da.variable._data, ExplicitlyIndexedNDArrayMixin) + + loaded = ds.compute() + for _, da in loaded.data_vars.items(): + if da.ndim == 0: + continue + assert isinstance(da.data, cp.ndarray) From 5d27b261c3880e0f93f78fba23ca2397fcf75819 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 21 Jun 2024 15:29:28 +1200 Subject: [PATCH 11/20] Move kvikio notebook under docs/source Allow it to be rendered under the User Guide section. --- docs/index.md | 1 + docs/{ => source}/kvikio.ipynb | 0 2 files changed, 1 insertion(+) rename docs/{ => source}/kvikio.ipynb (100%) diff --git a/docs/index.md b/docs/index.md index 3bbd9a0..6cc05e9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -57,6 +57,7 @@ Large parts of this documentations comes from [SciPy 2023 Xarray on GPUs tutoria source/high-level-api source/apply-ufunc source/real-example-1 + source/kvikio **Tutorials & Presentations**: diff --git a/docs/kvikio.ipynb b/docs/source/kvikio.ipynb similarity index 100% rename from docs/kvikio.ipynb rename to docs/source/kvikio.ipynb From 85491d70ccc40d6c954beb5ab28bd6f46ed57490 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 22 Jun 2024 12:13:14 +1200 Subject: [PATCH 12/20] Add zarr as a dependency in ci/doc.yml Will need it for the kvikio.zarr docs later. --- ci/doc.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/doc.yml b/ci/doc.yml index 983d20f..945f2a7 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -15,6 +15,7 @@ dependencies: - furo - myst-nb - xarray + - zarr - pip: # relative to this file. Needs to be editable to be accepted. - --editable .. From c470b975beffd5fd53ad563542b17d943ffae41e Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 22 Jun 2024 15:56:34 +1200 Subject: [PATCH 13/20] Add entry for KvikioBackendEntrypoint in API docs Create new section in the API documentation page for the kvikIO engine. Added more docstrings to the kvikio.py file, and fixed some imports so things render nicely on the API page. Also added an intersphinx link to the kvikio docs at https://docs.rapids.ai/api/kvikio/stable. --- cupy_xarray/__init__.py | 3 ++- cupy_xarray/kvikio.py | 16 ++++++++++++++-- docs/api.rst | 13 +++++++++++++ docs/conf.py | 1 + 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/cupy_xarray/__init__.py b/cupy_xarray/__init__.py index 5c3a06c..0bb96aa 100644 --- a/cupy_xarray/__init__.py +++ b/cupy_xarray/__init__.py @@ -1,4 +1,5 @@ from . import _version -from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor # noqa +from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor # noqa: F401 +from .kvikio import KvikioBackendEntrypoint # noqa: F401 __version__ = _version.get_versions()["version"] diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 410dee4..871bc27 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -1,9 +1,13 @@ +""" +:doc:`kvikIO ` backend for xarray to read Zarr stores directly into CuPy +arrays in GPU memory. +""" + import os import warnings import cupy as cp import numpy as np -import zarr from xarray import Variable from xarray.backends import zarr as zarr_backend from xarray.backends.common import _normalize_path # TODO: can this be public @@ -14,6 +18,7 @@ try: import kvikio.zarr + import zarr has_kvikio = True except ImportError: @@ -165,9 +170,16 @@ def open_store_variable(self, name, zarr_array): class KvikioBackendEntrypoint(ZarrBackendEntrypoint): + """ + Xarray backend to read Zarr stores using 'kvikio' engine. + + For more information about the underlying library, visit + :doc:`kvikIO's Zarr page`. + """ + available = has_kvikio description = "Open zarr files (.zarr) using Kvikio" - url = "https://docs.rapids.ai/api/kvikio/nightly/api.html#zarr" + url = "https://docs.rapids.ai/api/kvikio/stable/api/#zarr" # disabled by default # We need to provide this because of the subclassing from diff --git a/docs/api.rst b/docs/api.rst index c1ad4c3..67c1cba 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -51,3 +51,16 @@ Methods Dataset.cupy.as_cupy Dataset.cupy.as_numpy + + +KvikIO engine +------------- + +.. currentmodule:: cupy_xarray + +.. automodule:: cupy_xarray.kvikio + +.. autosummary:: + :toctree: generated/ + + KvikioBackendEntrypoint diff --git a/docs/conf.py b/docs/conf.py index 1ba6a75..3e2d81b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -57,6 +57,7 @@ "python": ("https://docs.python.org/3/", None), "dask": ("https://docs.dask.org/en/latest", None), "cupy": ("https://docs.cupy.dev/en/latest", None), + "kvikio": ("https://docs.rapids.ai/api/kvikio/stable", None), "xarray": ("http://docs.xarray.dev/en/latest/", None), } From 95efa180be992ab877f22781ad240222d438af00 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 22 Jun 2024 19:50:32 +1200 Subject: [PATCH 14/20] Fix input argument into CupyZarrArrayWrapper Fixes error like `TypeError: ZarrArrayWrapper.__init__() takes 2 positional arguments but 3 were given`. --- cupy_xarray/kvikio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 871bc27..078d1f3 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -152,7 +152,7 @@ def open_store_variable(self, name, zarr_array): array_wrapper = EagerCupyZarrArrayWrapper else: array_wrapper = CupyZarrArrayWrapper - data = indexing.LazilyIndexedArray(array_wrapper(name, self)) + data = indexing.LazilyIndexedArray(array_wrapper(zarr_array)) attributes = dict(attributes) encoding = { From ae2a7f1c205358d459a90eae364708f12b4bb918 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 14 Dec 2024 15:37:35 +0000 Subject: [PATCH 15/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cupy_xarray/kvikio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index 078d1f3..fd6cb29 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -163,7 +163,7 @@ def open_store_variable(self, name, zarr_array): } # _FillValue needs to be in attributes, not encoding, so it will get # picked up by decode_cf - if getattr(zarr_array, "fill_value") is not None: + if zarr_array.fill_value is not None: attributes["_FillValue"] = zarr_array.fill_value return Variable(dimensions, data, attributes, encoding) From 15fbafd9410057b19015bc04f5c98883058961a7 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 14 Dec 2024 11:06:07 -0500 Subject: [PATCH 16/20] Re-add kvikio backend entrypoint to pyproject.toml Fix improper merge handling on d684dadc160f997ae7a6c72c73c61839d257f002 --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index d98b3fe..2d5094e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,9 @@ test = [ "pytest", ] +[project.entry-points."xarray.backends"] +kvikio = "cupy_xarray.kvikio:KvikioBackendEntrypoint" + [tool.ruff] line-length = 100 # E501 (line-too-long) exclude = [ From f3df115f2db7516cac43c9cb8d32ac817a3630cb Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 14 Dec 2024 11:20:25 -0500 Subject: [PATCH 17/20] Fix C408 and E402 --- cupy_xarray/kvikio.py | 12 ++++++------ cupy_xarray/tests/test_kvikio.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index fd6cb29..f82a07f 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -85,13 +85,13 @@ def open_group( if isinstance(store, os.PathLike): store = os.fspath(store) - open_kwargs = dict( - mode=mode, - synchronizer=synchronizer, - path=group, + open_kwargs = { + "mode": mode, + "synchronizer": synchronizer, + "path": group, ########## NEW STUFF - meta_array=cp.empty(()), - ) + "meta_array": cp.empty(()), + } open_kwargs["storage_options"] = storage_options if chunk_store: diff --git a/cupy_xarray/tests/test_kvikio.py b/cupy_xarray/tests/test_kvikio.py index de98c09..ba64fbb 100644 --- a/cupy_xarray/tests/test_kvikio.py +++ b/cupy_xarray/tests/test_kvikio.py @@ -2,13 +2,13 @@ import numpy as np import pytest import xarray as xr +from xarray.core.indexing import ExplicitlyIndexedNDArrayMixin kvikio = pytest.importorskip("kvikio") zarr = pytest.importorskip("zarr") import kvikio.zarr # noqa import xarray.core.indexing # noqa -from xarray.core.indexing import ExplicitlyIndexedNDArrayMixin @pytest.fixture From 4e1857ac88b10062d8ae9aa95a112b34c3b29a84 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 14 Dec 2024 15:26:02 -0500 Subject: [PATCH 18/20] Use get_duck_array instead of get_array Fix `TypeError: Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly` on https://github.com/pydata/xarray/blob/v2024.11.0/xarray/core/indexing.py#L578 --- cupy_xarray/kvikio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index f82a07f..aa2e846 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -58,7 +58,7 @@ class EagerCupyZarrArrayWrapper(ZarrArrayWrapper): def __array__(self): return self._array[:].get() - def get_array(self): + def get_duck_array(self): # total hack: make a numpy array look like a Zarr array # this gets us through Xarray's backend layers return DummyZarrArrayWrapper(self._array[:].get()) From 7345b617092a24f9af81f6cbfcbcca5cd63ad2fc Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 17 Dec 2024 11:24:14 +1300 Subject: [PATCH 19/20] Fix SIM108 Use ternary operator --- cupy_xarray/kvikio.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index aa2e846..e5b7c6d 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -143,15 +143,14 @@ def open_store_variable(self, name, zarr_array): ) #### Changed from zarr array wrapper - if name in dimensions: - # we want indexed dimensions to be loaded eagerly - # Right now we load in to device and then transfer to host - # But these should be small-ish arrays - # TODO: can we tell GDSStore to load as numpy array directly - # not cupy array? - array_wrapper = EagerCupyZarrArrayWrapper - else: - array_wrapper = CupyZarrArrayWrapper + # we want indexed dimensions to be loaded eagerly + # Right now we load in to device and then transfer to host + # But these should be small-ish arrays + # TODO: can we tell GDSStore to load as numpy array directly + # not cupy array? + array_wrapper = ( + EagerCupyZarrArrayWrapper if name in dimensions else CupyZarrArrayWrapper + ) data = indexing.LazilyIndexedArray(array_wrapper(zarr_array)) attributes = dict(attributes) From e2b410e9c992c4901a43f69e4fe5b582862016b0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 22:24:47 +0000 Subject: [PATCH 20/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cupy_xarray/kvikio.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cupy_xarray/kvikio.py b/cupy_xarray/kvikio.py index e5b7c6d..3004ef1 100644 --- a/cupy_xarray/kvikio.py +++ b/cupy_xarray/kvikio.py @@ -148,9 +148,7 @@ def open_store_variable(self, name, zarr_array): # But these should be small-ish arrays # TODO: can we tell GDSStore to load as numpy array directly # not cupy array? - array_wrapper = ( - EagerCupyZarrArrayWrapper if name in dimensions else CupyZarrArrayWrapper - ) + array_wrapper = EagerCupyZarrArrayWrapper if name in dimensions else CupyZarrArrayWrapper data = indexing.LazilyIndexedArray(array_wrapper(zarr_array)) attributes = dict(attributes)