@@ -47,8 +47,8 @@ def from_aws(*args, **kwargs):
47
47
warnings .warn ('forecast_records are not available from the AWS Open Data Program.' )
48
48
return from_rest (* args , ** kwargs )
49
49
50
- reach_id = kwargs .get ('reach_id ' , '' )
51
- reach_id = args [0 ] if len (args ) > 0 else None
50
+ river_id = kwargs .get ('river_id ' , '' )
51
+ river_id = args [0 ] if len (args ) > 0 else None
52
52
53
53
s3 = s3fs .S3FileSystem (anon = True , client_kwargs = dict (region_name = ODP_S3_BUCKET_REGION ))
54
54
if kwargs .get ('date' , '' ) and not product_name == 'dates' :
@@ -68,14 +68,14 @@ def from_aws(*args, **kwargs):
68
68
date = dates [- 1 ]
69
69
s3store = s3fs .S3Map (root = f'{ ODP_FORECAST_S3_BUCKET_URI } /{ date } ' , s3 = s3 , check = False )
70
70
71
- df = xr .open_zarr (s3store ).sel (rivid = reach_id ).to_dataframe ().round (2 ).reset_index ()
71
+ df = xr .open_zarr (s3store ).sel (rivid = river_id ).to_dataframe ().round (2 ).reset_index ()
72
72
73
73
# rename columns to match the REST API
74
- if isinstance (reach_id , int ):
74
+ if isinstance (river_id , int ):
75
75
df = df .pivot (index = 'time' , columns = 'ensemble' , values = 'Qout' )
76
76
else :
77
77
df = df .pivot (index = ['time' , 'rivid' ], columns = 'ensemble' , values = 'Qout' )
78
- df .index .names = ['time' , 'LINKNO ' ]
78
+ df .index .names = ['time' , 'river_id ' ]
79
79
df = df [sorted (df .columns )]
80
80
df .columns = [f'ensemble_{ str (x ).zfill (2 )} ' for x in df .columns ]
81
81
@@ -102,17 +102,24 @@ def from_rest(*args, **kwargs):
102
102
endpoint = f'https://{ endpoint } ' if not endpoint .startswith (('https://' , 'http://' )) else endpoint
103
103
104
104
version = kwargs .get ('version' , DEFAULT_REST_ENDPOINT_VERSION )
105
+ assert version in ('v1' , 'v2' , ), ValueError (f'Unrecognized model version parameter: { version } ' )
105
106
106
107
product_name = function .__name__ .replace ("_" , "" ).lower ()
107
108
108
- reach_id = args [0 ] if len (args ) > 0 else None
109
- reach_id = kwargs .get ('reach_id' , '' ) if not reach_id else reach_id
109
+ river_id = args [0 ] if len (args ) > 0 else None
110
+ river_id = kwargs .get ('river_id' , '' ) if not river_id else river_id
111
+ if isinstance (river_id , list ):
112
+ raise ValueError ('Multiple river_ids are not available via REST API or on v1. '
113
+ 'Use data_source="aws" and version="v2" for multiple river_ids.' )
114
+ river_id = int (river_id ) if river_id else None
115
+ if river_id and version == 'v2' :
116
+ assert river_id < 1_000_000_000 and river_id >= 110_000_000 , ValueError ('River ID must be a 9 digit integer' )
110
117
111
118
return_format = kwargs .get ('return_format' , 'csv' )
112
119
assert return_format in ('csv' , 'json' , 'url' ), f'Unsupported return format requested: { return_format } '
113
120
114
121
# request parameter validation before submitting
115
- for key in ('endpoint' , 'version' , 'reach_id ' ):
122
+ for key in ('endpoint' , 'version' , 'river_id ' ):
116
123
if key in kwargs :
117
124
del kwargs [key ]
118
125
for key , value in kwargs .items ():
@@ -129,7 +136,7 @@ def from_rest(*args, **kwargs):
129
136
130
137
# piece together the request url
131
138
request_url = f'{ endpoint } /{ version } /{ product_name } ' # build the base url
132
- request_url = f'{ request_url } /{ reach_id } ' if reach_id else request_url # add the reach_id if it exists
139
+ request_url = f'{ request_url } /{ river_id } ' if river_id else request_url # add the river_id if it exists
133
140
request_url = f'{ request_url } ?{ params } ' # add the query parameters
134
141
135
142
if return_url :
@@ -158,6 +165,7 @@ def main(*args, **kwargs):
158
165
return from_rest (* args , ** kwargs )
159
166
else :
160
167
return from_aws (* args , ** kwargs )
168
+ main .__doc__ = function .__doc__ # necessary for code documentation auto generators
161
169
return main
162
170
163
171
@@ -181,16 +189,17 @@ def dates(**kwargs) -> dict or str:
181
189
182
190
183
191
@_forecast_endpoint_decorator
184
- def forecast (* , reach_id : int , date : str , return_format : str , data_source : str ,
192
+ def forecast (* , river_id : int , date : str , return_format : str , data_source : str ,
185
193
** kwargs ) -> pd .DataFrame or dict or str :
186
194
"""
187
- Gets the average forecasted flow for a certain reach_id on a certain date
195
+ Gets the average forecasted flow for a certain river_id on a certain date
188
196
189
197
Keyword Args:
190
- reach_id: the ID of a stream, should be a 9 digit integer
191
- date: a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
192
- return_format: csv, json, or url, default csv
193
- data_source: location to query for data, either 'rest' or 'aws'. default is aws.
198
+ river_id (str): the ID of a stream, should be a 9 digit integer
199
+ date (str): a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
200
+ return_format (str): csv, json, or url, default csv
201
+ data_source (str): location to query for data, either 'rest' or 'aws'. default is aws.
202
+ version (str): the version of the API and model data to retrieve. default is 'v2'. should be 'v1' or 'v2'
194
203
195
204
Returns:
196
205
pd.DataFrame or dict or str
@@ -199,14 +208,14 @@ def forecast(*, reach_id: int, date: str, return_format: str, data_source: str,
199
208
200
209
201
210
@_forecast_endpoint_decorator
202
- def forecast_stats (* , reach_id : int , date : str , return_format : str , data_source : str ,
211
+ def forecast_stats (* , river_id : int , date : str , return_format : str , data_source : str ,
203
212
** kwargs ) -> pd .DataFrame or dict or str :
204
213
"""
205
- Retrieves the min, 25%, mean, median, 75%, and max river discharge of the 51 ensembles members for a reach_id
214
+ Retrieves the min, 25%, mean, median, 75%, and max river discharge of the 51 ensembles members for a river_id
206
215
The 52nd higher resolution member is excluded
207
216
208
217
Keyword Args:
209
- reach_id : the ID of a stream, should be a 9 digit integer
218
+ river_id : the ID of a stream, should be a 9 digit integer
210
219
date: a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
211
220
return_format: csv, json, or url, default csv
212
221
data_source: location to query for data, either 'rest' or 'aws'. default is aws.
@@ -218,13 +227,13 @@ def forecast_stats(*, reach_id: int, date: str, return_format: str, data_source:
218
227
219
228
220
229
@_forecast_endpoint_decorator
221
- def forecast_ensembles (* , reach_id : int , date : str , return_format : str , data_source : str ,
230
+ def forecast_ensembles (* , river_id : int , date : str , return_format : str , data_source : str ,
222
231
** kwargs ) -> pd .DataFrame or dict or str :
223
232
"""
224
- Retrieves each of 52 time series of forecasted discharge for a reach_id on a certain date
233
+ Retrieves each of 52 time series of forecasted discharge for a river_id on a certain date
225
234
226
235
Keyword Args:
227
- reach_id : the ID of a stream, should be a 9 digit integer
236
+ river_id : the ID of a stream, should be a 9 digit integer
228
237
date: a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
229
238
return_format: csv, json, or url, default csv
230
239
data_source: location to query for data, either 'rest' or 'aws'. default is aws.
@@ -236,13 +245,13 @@ def forecast_ensembles(*, reach_id: int, date: str, return_format: str, data_sou
236
245
237
246
238
247
@_forecast_endpoint_decorator
239
- def forecast_records (* , reach_id : int , start_date : str , end_date : str , return_format : str , data_source : str ,
248
+ def forecast_records (* , river_id : int , start_date : str , end_date : str , return_format : str , data_source : str ,
240
249
** kwargs ) -> pd .DataFrame or dict or str :
241
250
"""
242
251
Retrieves a csv showing the ensemble average forecasted flow for the year from January 1 to the current date
243
252
244
253
Keyword Args:
245
- reach_id : the ID of a stream, should be a 9 digit integer
254
+ river_id : the ID of a stream, should be a 9 digit integer
246
255
start_date: a YYYYMMDD string giving the earliest date this year to include, defaults to 14 days ago.
247
256
end_date: a YYYYMMDD string giving the latest date this year to include, defaults to latest available
248
257
data_source: location to query for data, either 'rest' or 'aws'. default is aws.
@@ -255,20 +264,20 @@ def forecast_records(*, reach_id: int, start_date: str, end_date: str, return_fo
255
264
256
265
257
266
# Retrospective simulation and derived products
258
- def retrospective (reach_id : int or list ) -> pd .DataFrame :
267
+ def retrospective (river_id : int or list ) -> pd .DataFrame :
259
268
"""
260
- Retrieves the retrospective simulation of streamflow for a given reach_id from the
269
+ Retrieves the retrospective simulation of streamflow for a given river_id from the
261
270
AWS Open Data Program GEOGloWS V2 S3 bucket
262
271
263
272
Args:
264
- reach_id : the ID of a stream, should be a 9 digit integer
273
+ river_id : the ID of a stream, should be a 9 digit integer
265
274
266
275
Returns:
267
276
pd.DataFrame
268
277
"""
269
278
s3 = s3fs .S3FileSystem (anon = True , client_kwargs = dict (region_name = ODP_S3_BUCKET_REGION ))
270
279
s3store = s3fs .S3Map (root = f'{ ODP_RETROSPECTIVE_S3_BUCKET_URI } /retrospective.zarr' , s3 = s3 , check = False )
271
- return (xr .open_zarr (s3store ).sel (rivid = reach_id ).to_dataframe ().reset_index ().set_index ('time' )
280
+ return (xr .open_zarr (s3store ).sel (rivid = river_id ).to_dataframe ().reset_index ().set_index ('time' )
272
281
.pivot (columns = 'rivid' , values = 'Qout' ))
273
282
274
283
@@ -277,61 +286,61 @@ def historical(*args, **kwargs):
277
286
return retrospective (* args , ** kwargs )
278
287
279
288
280
- def daily_averages (reach_id : int or list ) -> pd .DataFrame :
289
+ def daily_averages (river_id : int or list ) -> pd .DataFrame :
281
290
"""
282
- Retrieves daily average streamflow for a given reach_id
291
+ Retrieves daily average streamflow for a given river_id
283
292
284
293
Args:
285
- reach_id : the ID of a stream, should be a 9 digit integer
294
+ river_id : the ID of a stream, should be a 9 digit integer
286
295
287
296
Returns:
288
297
pd.DataFrame
289
298
"""
290
- df = retrospective (reach_id )
299
+ df = retrospective (river_id )
291
300
return calc_daily_averages (df )
292
301
293
302
294
- def monthly_averages (reach_id : int or list ) -> pd .DataFrame :
303
+ def monthly_averages (river_id : int or list ) -> pd .DataFrame :
295
304
"""
296
- Retrieves monthly average streamflow for a given reach_id
305
+ Retrieves monthly average streamflow for a given river_id
297
306
298
307
Args:
299
- reach_id : the ID of a stream, should be a 9 digit integer
308
+ river_id : the ID of a stream, should be a 9 digit integer
300
309
301
310
Returns:
302
311
pd.DataFrame
303
312
"""
304
- df = retrospective (reach_id )
313
+ df = retrospective (river_id )
305
314
return calc_monthly_averages (df )
306
315
307
316
308
- def annual_averages (reach_id : int or list ) -> pd .DataFrame :
317
+ def annual_averages (river_id : int or list ) -> pd .DataFrame :
309
318
"""
310
- Retrieves annual average streamflow for a given reach_id
319
+ Retrieves annual average streamflow for a given river_id
311
320
312
321
Args:
313
- reach_id : the ID of a stream, should be a 9 digit integer
322
+ river_id : the ID of a stream, should be a 9 digit integer
314
323
315
324
Returns:
316
325
pd.DataFrame
317
326
"""
318
- df = retrospective (reach_id )
327
+ df = retrospective (river_id )
319
328
return calc_annual_averages (df )
320
329
321
330
322
- def return_periods (reach_id : int or list ) -> pd .DataFrame :
331
+ def return_periods (river_id : int or list ) -> pd .DataFrame :
323
332
"""
324
- Retrieves the return period thresholds based on a specified historic simulation forcing on a certain reach_id .
333
+ Retrieves the return period thresholds based on a specified historic simulation forcing on a certain river_id .
325
334
326
335
Args:
327
- reach_id : the ID of a stream, should be a 9 digit integer
336
+ river_id : the ID of a stream, should be a 9 digit integer
328
337
329
338
Returns:
330
339
pd.DataFrame
331
340
"""
332
341
s3 = s3fs .S3FileSystem (anon = True , client_kwargs = dict (region_name = ODP_S3_BUCKET_REGION ))
333
342
s3store = s3fs .S3Map (root = f'{ ODP_RETROSPECTIVE_S3_BUCKET_URI } /return-periods.zarr' , s3 = s3 , check = False )
334
- return (xr .open_zarr (s3store ).sel (rivid = reach_id )['return_period_flow' ].to_dataframe ().reset_index ()
343
+ return (xr .open_zarr (s3store ).sel (rivid = river_id )['return_period_flow' ].to_dataframe ().reset_index ()
335
344
.pivot (index = 'rivid' , columns = 'return_period' , values = 'return_period_flow' ))
336
345
337
346
0 commit comments