Skip to content

Commit

Permalink
Merge pull request #403 from gerritholl/remote-netcdf
Browse files Browse the repository at this point in the history
Add filehandler for remote netcdf reading
  • Loading branch information
olemke authored Apr 25, 2022
2 parents c8f4b55 + f0b163b commit 822352e
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 1 deletion.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ dependencies:
- cython
- fsspec
- gdal
- h5netcdf
- keras
- matplotlib>=1.4
- netCDF4>=1.1.1
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ cartopy
cython
fsspec
gdal
h5netcdf
keras
matplotlib>=1.4
nbsphinx
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
install_requires=[
"docutils",
"fsspec",
"h5netcdf",
"imageio",
"matplotlib>=1.4",
"netCDF4>=1.1.1",
Expand Down
50 changes: 50 additions & 0 deletions typhon/files/handlers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,56 @@ def _split_path(path):
return None, path
return path.rsplit("/", 1)

def _ensure_local_filesystem(self, file_info):
if not isinstance(file_info.file_system, LocalFileSystem):
raise NotImplementedError(
f"File handler {type(self).__name__:s} can only "
"read from local file system, not from "
f"{str(type(file_info.file_system).__name__)}. "
"Use FSNetCDF instead.")


class FSNetCDF(FileHandler):
"""File handler for reading NetCDF files via alternate file systems.
Alternative to the NetCDF file handler for reading files from
alternative file systems, such as remote file systemss. Does not
support writing or multi-group files.
Usage example with fileset::
fs = FileSet(
path=(
"noaa-goes16/GLM-L2-LCFA/{year}/{doy}/{hour}/"
"OR_GLM-L2-LCFA_G16_s{year}{doy}{hour}{minute}{second}*_"
"e{end_year}{end_doy}{end_hour}{end_minute}{end_second}*_c*.nc"),
fs=s3fs.S3FileSystem(anon=True),
handler=FSNetCDF())
finf = fs.find_closest(datetime.datetime(2021, 11, 10, 10))
fs.read(finf)
"""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._open = []

@expects_file_info()
def read(self, file_info, fields=None, mapping=None, **kwargs):
"""Read possibly remote NetCDF file."""
fp = file_info.file_system.open(file_info.path)
ds = xr.open_dataset(fp, engine="h5netcdf")
self._open.append(ds)
return ds

def close_all(self):
"""Close all open files."""
while self._open:
self._open.pop(0).close()

def __del__(self):
self.close_all()


class Plotter(FileHandler):
"""File handler that can save matplotlib.figure objects to a file.
Expand Down
34 changes: 33 additions & 1 deletion typhon/tests/files/handlers/test_netcdf4.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import datetime
import os
import tempfile

import numpy as np
import pytest
import xarray as xr

from typhon.files import NetCDF4
Expand Down Expand Up @@ -71,7 +73,7 @@ def test_scalar_masked(self):
before["a"].encoding = {"_FillValue": 42}
fh.write(before, tfile)
after = fh.read(tfile)
assert np.isnan(after["a"]) # fill value should become nan
assert np.isnan(after["a"]) # fill value should become nan

def test_times(self):
"""Test if times are read correctly
Expand Down Expand Up @@ -113,3 +115,33 @@ def test_scalefactor(self):
fh.write(before, tfile)
after = fh.read(tfile)
assert np.allclose(before["a"], after["a"])


class TestFSNetCDF:
"""Test filesystem-NetCDF file handler."""

@pytest.fixture
def fake_info(self, tmp_path):
"""Create a fake NetCDF file and return associated file info."""
from typhon.files.handlers.common import FileInfo
from fsspec.implementations.local import LocalFileSystem
lfs = LocalFileSystem()
ds = xr.Dataset(
{"soy": xr.DataArray(
np.arange(25).reshape(5, 5),
dims=("y", "x"))})
ds.to_netcdf(tmp_path / "test.nc")
return FileInfo(
os.fspath(tmp_path / "test.nc"),
times=[datetime.datetime.now()]*2,
fs=lfs)

def test_fsnetcdf_handler(self, fake_info):
"""Test that the filehandler reads and closes."""
from typhon.files.handlers.common import FSNetCDF
handler = FSNetCDF()
ds = handler.read(fake_info)
np.testing.assert_array_equal(
ds["soy"].data,
np.arange(25).reshape(5, 5))
handler.close_all()

0 comments on commit 822352e

Please sign in to comment.