Skip to content

Commit e80ca2d

Browse files
authored
adds deprecated streamflow module for backwards compatibility. corrects documentation generation (#32)
1 parent 24e2c10 commit e80ca2d

File tree

8 files changed

+501
-81
lines changed

8 files changed

+501
-81
lines changed

docs/api-documentation.rst

+1-16
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,4 @@ There are 3 modules in the geoglows package.
1111
api-documentation/bias
1212
api-documentation/plots
1313
api-documentation/analyze
14-
15-
16-
FAQ
17-
~~~
18-
19-
How do I save streamflow data to csv?
20-
-------------------------------------
21-
By default, the results of most of the `geoglows.data` functions return a pandas DataFrame. You can save those to
22-
a csv, json, pickle, or other file. For example, save to csv with the dataframe's ``.to_csv()`` method.
23-
24-
.. code-block:: python
25-
26-
# get some data from the geoglows streamflow model
27-
data = geoglows.streamflow.forecast_stats(12341234)
28-
# save it to a csv
29-
data.to_csv('/path/to/save/the/csv/file.csv')
14+
api-documentation/streamflow

docs/api-documentation/data.rst

+5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ To find a LINKNO (river ID number), please refer to https://data.geoglows.org an
1313
Forecasted Streamflow
1414
---------------------
1515

16+
.. automodule:: geoglows.data
17+
:members:
18+
forecast, forecast_stats, forecast_ensembles, forecast_records
19+
:noindex:
20+
1621
Historical Simulation
1722
---------------------
1823

docs/api-documentation/plots.rst

+3-17
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,7 @@ Plots for Streamflow Data
77

88
.. automodule:: geoglows.plots
99
:members:
10-
hydroviewer, forecast_stats, forecast_records, forecast_ensembles, historic_simulation, flow_duration_curve
11-
:noindex:
12-
13-
Tables for Streamflow Data
14-
~~~~~~~~~~~~~~~~~~~~~~~~~~
15-
16-
.. automodule:: geoglows.plots
17-
:members:
18-
probabilities_table, return_periods_table
19-
:noindex:
20-
21-
Plots for Bias Corrected Data
22-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23-
24-
.. automodule:: geoglows.plots
25-
:members:
26-
corrected_historical, corrected_scatterplots, corrected_day_average, corrected_month_average, corrected_volume_compare
10+
forecast, forecast_stats, forecast_ensembles,
11+
retrospective, annual_averages, monthly_averages, daily_averages,
12+
daily_variance, flow_duration_curve
2713
:noindex:

docs/api-documentation/streamflow.rst

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
===================
2+
geoglows.streamflow
3+
===================
4+
5+
THIS MODULE IS DEPRECATED. Please update your code to use the new GEOGLOWS model and data services. Analogous functions
6+
to everything in this module is found in the `geoglows.data` or `geoglows.streams` modules.
7+
8+
The streamflow module provides a series of functions for requesting forecasted and historical data from the GEOGloWS
9+
ECMWF Streamflow Service for Model and Data Services Version 1.
10+
11+
Forecasted Streamflow
12+
---------------------
13+
14+
.. automodule:: geoglows.streamflow
15+
:members:
16+
forecast_stats, forecast_ensembles, forecast_warnings, forecast_records
17+
18+
Historically Simulated Streamflow
19+
---------------------------------
20+
21+
.. automodule:: geoglows.streamflow
22+
:members:
23+
historic_simulation, return_periods, daily_averages, monthly_averages
24+
:noindex:
25+
26+
GEOGloWS Model Utilities
27+
------------------------
28+
29+
.. automodule:: geoglows.streamflow
30+
:members:
31+
available_dates
32+
:noindex:

geoglows/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
import geoglows.analyze
55
import geoglows.streams
66
import geoglows.tables
7+
import geoglows.streamflow
78

89
from ._constants import METADATA_TABLE_PATH
910

1011
__all__ = [
1112
'bias', 'plots', 'data', 'analyze', 'streams', 'tables',
1213
'METADATA_TABLE_PATH'
1314
]
14-
__version__ = '1.0.4'
15+
__version__ = '1.1.0'
1516
__author__ = 'Riley Hales'
1617
__license__ = 'BSD 3-Clause Clear License'

geoglows/data.py

+52-43
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ def from_aws(*args, **kwargs):
4747
warnings.warn('forecast_records are not available from the AWS Open Data Program.')
4848
return from_rest(*args, **kwargs)
4949

50-
reach_id = kwargs.get('reach_id', '')
51-
reach_id = args[0] if len(args) > 0 else None
50+
river_id = kwargs.get('river_id', '')
51+
river_id = args[0] if len(args) > 0 else None
5252

5353
s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(region_name=ODP_S3_BUCKET_REGION))
5454
if kwargs.get('date', '') and not product_name == 'dates':
@@ -68,14 +68,14 @@ def from_aws(*args, **kwargs):
6868
date = dates[-1]
6969
s3store = s3fs.S3Map(root=f'{ODP_FORECAST_S3_BUCKET_URI}/{date}', s3=s3, check=False)
7070

71-
df = xr.open_zarr(s3store).sel(rivid=reach_id).to_dataframe().round(2).reset_index()
71+
df = xr.open_zarr(s3store).sel(rivid=river_id).to_dataframe().round(2).reset_index()
7272

7373
# rename columns to match the REST API
74-
if isinstance(reach_id, int):
74+
if isinstance(river_id, int):
7575
df = df.pivot(index='time', columns='ensemble', values='Qout')
7676
else:
7777
df = df.pivot(index=['time', 'rivid'], columns='ensemble', values='Qout')
78-
df.index.names = ['time', 'LINKNO']
78+
df.index.names = ['time', 'river_id']
7979
df = df[sorted(df.columns)]
8080
df.columns = [f'ensemble_{str(x).zfill(2)}' for x in df.columns]
8181

@@ -102,17 +102,24 @@ def from_rest(*args, **kwargs):
102102
endpoint = f'https://{endpoint}' if not endpoint.startswith(('https://', 'http://')) else endpoint
103103

104104
version = kwargs.get('version', DEFAULT_REST_ENDPOINT_VERSION)
105+
assert version in ('v1', 'v2', ), ValueError(f'Unrecognized model version parameter: {version}')
105106

106107
product_name = function.__name__.replace("_", "").lower()
107108

108-
reach_id = args[0] if len(args) > 0 else None
109-
reach_id = kwargs.get('reach_id', '') if not reach_id else reach_id
109+
river_id = args[0] if len(args) > 0 else None
110+
river_id = kwargs.get('river_id', '') if not river_id else river_id
111+
if isinstance(river_id, list):
112+
raise ValueError('Multiple river_ids are not available via REST API or on v1. '
113+
'Use data_source="aws" and version="v2" for multiple river_ids.')
114+
river_id = int(river_id) if river_id else None
115+
if river_id and version == 'v2':
116+
assert river_id < 1_000_000_000 and river_id >= 110_000_000, ValueError('River ID must be a 9 digit integer')
110117

111118
return_format = kwargs.get('return_format', 'csv')
112119
assert return_format in ('csv', 'json', 'url'), f'Unsupported return format requested: {return_format}'
113120

114121
# request parameter validation before submitting
115-
for key in ('endpoint', 'version', 'reach_id'):
122+
for key in ('endpoint', 'version', 'river_id'):
116123
if key in kwargs:
117124
del kwargs[key]
118125
for key, value in kwargs.items():
@@ -129,7 +136,7 @@ def from_rest(*args, **kwargs):
129136

130137
# piece together the request url
131138
request_url = f'{endpoint}/{version}/{product_name}' # build the base url
132-
request_url = f'{request_url}/{reach_id}' if reach_id else request_url # add the reach_id if it exists
139+
request_url = f'{request_url}/{river_id}' if river_id else request_url # add the river_id if it exists
133140
request_url = f'{request_url}?{params}' # add the query parameters
134141

135142
if return_url:
@@ -158,6 +165,7 @@ def main(*args, **kwargs):
158165
return from_rest(*args, **kwargs)
159166
else:
160167
return from_aws(*args, **kwargs)
168+
main.__doc__ = function.__doc__ # necessary for code documentation auto generators
161169
return main
162170

163171

@@ -181,16 +189,17 @@ def dates(**kwargs) -> dict or str:
181189

182190

183191
@_forecast_endpoint_decorator
184-
def forecast(*, reach_id: int, date: str, return_format: str, data_source: str,
192+
def forecast(*, river_id: int, date: str, return_format: str, data_source: str,
185193
**kwargs) -> pd.DataFrame or dict or str:
186194
"""
187-
Gets the average forecasted flow for a certain reach_id on a certain date
195+
Gets the average forecasted flow for a certain river_id on a certain date
188196
189197
Keyword Args:
190-
reach_id: the ID of a stream, should be a 9 digit integer
191-
date: a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
192-
return_format: csv, json, or url, default csv
193-
data_source: location to query for data, either 'rest' or 'aws'. default is aws.
198+
river_id (str): the ID of a stream, should be a 9 digit integer
199+
date (str): a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
200+
return_format (str): csv, json, or url, default csv
201+
data_source (str): location to query for data, either 'rest' or 'aws'. default is aws.
202+
version (str): the version of the API and model data to retrieve. default is 'v2'. should be 'v1' or 'v2'
194203
195204
Returns:
196205
pd.DataFrame or dict or str
@@ -199,14 +208,14 @@ def forecast(*, reach_id: int, date: str, return_format: str, data_source: str,
199208

200209

201210
@_forecast_endpoint_decorator
202-
def forecast_stats(*, reach_id: int, date: str, return_format: str, data_source: str,
211+
def forecast_stats(*, river_id: int, date: str, return_format: str, data_source: str,
203212
**kwargs) -> pd.DataFrame or dict or str:
204213
"""
205-
Retrieves the min, 25%, mean, median, 75%, and max river discharge of the 51 ensembles members for a reach_id
214+
Retrieves the min, 25%, mean, median, 75%, and max river discharge of the 51 ensembles members for a river_id
206215
The 52nd higher resolution member is excluded
207216
208217
Keyword Args:
209-
reach_id: the ID of a stream, should be a 9 digit integer
218+
river_id: the ID of a stream, should be a 9 digit integer
210219
date: a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
211220
return_format: csv, json, or url, default csv
212221
data_source: location to query for data, either 'rest' or 'aws'. default is aws.
@@ -218,13 +227,13 @@ def forecast_stats(*, reach_id: int, date: str, return_format: str, data_source:
218227

219228

220229
@_forecast_endpoint_decorator
221-
def forecast_ensembles(*, reach_id: int, date: str, return_format: str, data_source: str,
230+
def forecast_ensembles(*, river_id: int, date: str, return_format: str, data_source: str,
222231
**kwargs) -> pd.DataFrame or dict or str:
223232
"""
224-
Retrieves each of 52 time series of forecasted discharge for a reach_id on a certain date
233+
Retrieves each of 52 time series of forecasted discharge for a river_id on a certain date
225234
226235
Keyword Args:
227-
reach_id: the ID of a stream, should be a 9 digit integer
236+
river_id: the ID of a stream, should be a 9 digit integer
228237
date: a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
229238
return_format: csv, json, or url, default csv
230239
data_source: location to query for data, either 'rest' or 'aws'. default is aws.
@@ -236,13 +245,13 @@ def forecast_ensembles(*, reach_id: int, date: str, return_format: str, data_sou
236245

237246

238247
@_forecast_endpoint_decorator
239-
def forecast_records(*, reach_id: int, start_date: str, end_date: str, return_format: str, data_source: str,
248+
def forecast_records(*, river_id: int, start_date: str, end_date: str, return_format: str, data_source: str,
240249
**kwargs) -> pd.DataFrame or dict or str:
241250
"""
242251
Retrieves a csv showing the ensemble average forecasted flow for the year from January 1 to the current date
243252
244253
Keyword Args:
245-
reach_id: the ID of a stream, should be a 9 digit integer
254+
river_id: the ID of a stream, should be a 9 digit integer
246255
start_date: a YYYYMMDD string giving the earliest date this year to include, defaults to 14 days ago.
247256
end_date: a YYYYMMDD string giving the latest date this year to include, defaults to latest available
248257
data_source: location to query for data, either 'rest' or 'aws'. default is aws.
@@ -255,20 +264,20 @@ def forecast_records(*, reach_id: int, start_date: str, end_date: str, return_fo
255264

256265

257266
# Retrospective simulation and derived products
258-
def retrospective(reach_id: int or list) -> pd.DataFrame:
267+
def retrospective(river_id: int or list) -> pd.DataFrame:
259268
"""
260-
Retrieves the retrospective simulation of streamflow for a given reach_id from the
269+
Retrieves the retrospective simulation of streamflow for a given river_id from the
261270
AWS Open Data Program GEOGloWS V2 S3 bucket
262271
263272
Args:
264-
reach_id: the ID of a stream, should be a 9 digit integer
273+
river_id: the ID of a stream, should be a 9 digit integer
265274
266275
Returns:
267276
pd.DataFrame
268277
"""
269278
s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(region_name=ODP_S3_BUCKET_REGION))
270279
s3store = s3fs.S3Map(root=f'{ODP_RETROSPECTIVE_S3_BUCKET_URI}/retrospective.zarr', s3=s3, check=False)
271-
return (xr.open_zarr(s3store).sel(rivid=reach_id).to_dataframe().reset_index().set_index('time')
280+
return (xr.open_zarr(s3store).sel(rivid=river_id).to_dataframe().reset_index().set_index('time')
272281
.pivot(columns='rivid', values='Qout'))
273282

274283

@@ -277,61 +286,61 @@ def historical(*args, **kwargs):
277286
return retrospective(*args, **kwargs)
278287

279288

280-
def daily_averages(reach_id: int or list) -> pd.DataFrame:
289+
def daily_averages(river_id: int or list) -> pd.DataFrame:
281290
"""
282-
Retrieves daily average streamflow for a given reach_id
291+
Retrieves daily average streamflow for a given river_id
283292
284293
Args:
285-
reach_id: the ID of a stream, should be a 9 digit integer
294+
river_id: the ID of a stream, should be a 9 digit integer
286295
287296
Returns:
288297
pd.DataFrame
289298
"""
290-
df = retrospective(reach_id)
299+
df = retrospective(river_id)
291300
return calc_daily_averages(df)
292301

293302

294-
def monthly_averages(reach_id: int or list) -> pd.DataFrame:
303+
def monthly_averages(river_id: int or list) -> pd.DataFrame:
295304
"""
296-
Retrieves monthly average streamflow for a given reach_id
305+
Retrieves monthly average streamflow for a given river_id
297306
298307
Args:
299-
reach_id: the ID of a stream, should be a 9 digit integer
308+
river_id: the ID of a stream, should be a 9 digit integer
300309
301310
Returns:
302311
pd.DataFrame
303312
"""
304-
df = retrospective(reach_id)
313+
df = retrospective(river_id)
305314
return calc_monthly_averages(df)
306315

307316

308-
def annual_averages(reach_id: int or list) -> pd.DataFrame:
317+
def annual_averages(river_id: int or list) -> pd.DataFrame:
309318
"""
310-
Retrieves annual average streamflow for a given reach_id
319+
Retrieves annual average streamflow for a given river_id
311320
312321
Args:
313-
reach_id: the ID of a stream, should be a 9 digit integer
322+
river_id: the ID of a stream, should be a 9 digit integer
314323
315324
Returns:
316325
pd.DataFrame
317326
"""
318-
df = retrospective(reach_id)
327+
df = retrospective(river_id)
319328
return calc_annual_averages(df)
320329

321330

322-
def return_periods(reach_id: int or list) -> pd.DataFrame:
331+
def return_periods(river_id: int or list) -> pd.DataFrame:
323332
"""
324-
Retrieves the return period thresholds based on a specified historic simulation forcing on a certain reach_id.
333+
Retrieves the return period thresholds based on a specified historic simulation forcing on a certain river_id.
325334
326335
Args:
327-
reach_id: the ID of a stream, should be a 9 digit integer
336+
river_id: the ID of a stream, should be a 9 digit integer
328337
329338
Returns:
330339
pd.DataFrame
331340
"""
332341
s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(region_name=ODP_S3_BUCKET_REGION))
333342
s3store = s3fs.S3Map(root=f'{ODP_RETROSPECTIVE_S3_BUCKET_URI}/return-periods.zarr', s3=s3, check=False)
334-
return (xr.open_zarr(s3store).sel(rivid=reach_id)['return_period_flow'].to_dataframe().reset_index()
343+
return (xr.open_zarr(s3store).sel(rivid=river_id)['return_period_flow'].to_dataframe().reset_index()
335344
.pivot(index='rivid', columns='return_period', values='return_period_flow'))
336345

337346

0 commit comments

Comments
 (0)