6
6
import requests
7
7
import s3fs
8
8
import xarray as xr
9
+ import numpy as np
9
10
10
11
from ._constants import METADATA_TABLE_PATH
11
12
from .analyze import (
@@ -85,7 +86,7 @@ def from_aws(*args, **kwargs):
85
86
df = ds .to_dataframe ().round (2 ).reset_index ()
86
87
87
88
# rename columns to match the REST API
88
- if isinstance (river_id , int ):
89
+ if isinstance (river_id , int ) or isinstance ( river_id , np . int64 ) :
89
90
df = df .pivot (index = 'time' , columns = 'ensemble' , values = 'Qout' )
90
91
else :
91
92
df = df .pivot (index = ['time' , 'rivid' ], columns = 'ensemble' , values = 'Qout' )
@@ -120,7 +121,7 @@ def from_rest(*args, **kwargs):
120
121
endpoint = f'https://{ endpoint } ' if not endpoint .startswith (('https://' , 'http://' )) else endpoint
121
122
122
123
version = kwargs .get ('version' , DEFAULT_REST_ENDPOINT_VERSION )
123
- assert version in ('v1' , ' v2' , ), ValueError (f'Unrecognized model version parameter: { version } ' )
124
+ assert version in ('v2' , ), ValueError (f'Unrecognized model version parameter: { version } ' )
124
125
125
126
product_name = function .__name__ .replace ("_" , "" ).lower ()
126
127
@@ -131,7 +132,7 @@ def from_rest(*args, **kwargs):
131
132
'Use data_source="aws" and version="v2" for multiple river_ids.' )
132
133
river_id = int (river_id ) if river_id else None
133
134
if river_id and version == 'v2' :
134
- assert river_id < 1_000_000_000 and river_id >= 110_000_000 , ValueError ('River ID must be a 9 digit integer' )
135
+ assert 1_000_000_000 > river_id >= 110_000_000 , ValueError ('River ID must be a 9 digit integer' )
135
136
136
137
# request parameter validation before submitting
137
138
for key in ('endpoint' , 'version' , 'river_id' ):
@@ -178,8 +179,7 @@ def main(*args, **kwargs):
178
179
assert source in ('rest' , 'aws' ), ValueError (f'Unrecognized data source requested: { source } ' )
179
180
if source == 'rest' :
180
181
return from_rest (* args , ** kwargs )
181
- else :
182
- return from_aws (* args , ** kwargs )
182
+ return from_aws (* args , ** kwargs )
183
183
main .__doc__ = function .__doc__ # necessary for code documentation auto generators
184
184
return main
185
185
@@ -191,7 +191,7 @@ def dates(**kwargs) -> dict or str:
191
191
Gets a list of available forecast product dates
192
192
193
193
Keyword Args:
194
- data_source: location to query for data, either 'rest' or 'aws'. default is aws.
194
+ data_source (str) : location to query for data, either 'rest' or 'aws'. default is aws.
195
195
196
196
Returns:
197
197
dict or str
@@ -204,14 +204,14 @@ def dates(**kwargs) -> dict or str:
204
204
205
205
@_forecast_endpoint_decorator
206
206
def forecast (* , river_id : int , date : str , format : str , data_source : str ,
207
- ** kwargs ) -> pd .DataFrame or dict or str :
207
+ ** kwargs ) -> pd .DataFrame or xr . Dataset :
208
208
"""
209
209
Gets the average forecasted flow for a certain river_id on a certain date
210
210
211
211
Keyword Args:
212
- river_id (str ): the ID of a stream, should be a 9 digit integer
212
+ river_id (int ): the ID of a stream, should be a 9 digit integer
213
213
date (str): a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
214
- format (str) : csv, json, or url, default csv
214
+ format: if data_source=="rest" : csv, json, or url, default csv. if data_source=="aws": df or xarray
215
215
data_source (str): location to query for data, either 'rest' or 'aws'. default is aws.
216
216
217
217
Returns:
@@ -222,16 +222,16 @@ def forecast(*, river_id: int, date: str, format: str, data_source: str,
222
222
223
223
@_forecast_endpoint_decorator
224
224
def forecast_stats (* , river_id : int , date : str , format : str , data_source : str ,
225
- ** kwargs ) -> pd .DataFrame or dict or str :
225
+ ** kwargs ) -> pd .DataFrame or xr . Dataset :
226
226
"""
227
227
Retrieves the min, 25%, mean, median, 75%, and max river discharge of the 51 ensembles members for a river_id
228
228
The 52nd higher resolution member is excluded
229
229
230
230
Keyword Args:
231
- river_id: the ID of a stream, should be a 9 digit integer
232
- date: a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
233
- format: if data_source=="rest": csv, json, or url, default csv. if data_source=="aws": df or xarray
234
- data_source: location to query for data, either 'rest' or 'aws'. default is aws.
231
+ river_id (int) : the ID of a stream, should be a 9 digit integer
232
+ date (str) : a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
233
+ format (str) : if data_source=="rest": csv, json, or url, default csv. if data_source=="aws": df or xarray
234
+ data_source (str) : location to query for data, either 'rest' or 'aws'. default is aws.
235
235
236
236
Returns:
237
237
pd.DataFrame or dict or str
@@ -241,15 +241,15 @@ def forecast_stats(*, river_id: int, date: str, format: str, data_source: str,
241
241
242
242
@_forecast_endpoint_decorator
243
243
def forecast_ensembles (* , river_id : int , date : str , format : str , data_source : str ,
244
- ** kwargs ) -> pd .DataFrame or dict or str :
244
+ ** kwargs ) -> pd .DataFrame or xr . Dataset :
245
245
"""
246
246
Retrieves each of 52 time series of forecasted discharge for a river_id on a certain date
247
247
248
248
Keyword Args:
249
- river_id: the ID of a stream, should be a 9 digit integer
250
- date: a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
251
- format: if data_source=="rest": csv, json, or url, default csv. if data_source=="aws": df or xarray
252
- data_source: location to query for data, either 'rest' or 'aws'. default is aws.
249
+ river_id (int) : the ID of a stream, should be a 9 digit integer
250
+ date (str) : a string specifying the date to request in YYYYMMDD format, returns the latest available if not specified
251
+ format (str) : if data_source=="rest": csv, json, or url, default csv. if data_source=="aws": df or xarray
252
+ data_source (str) : location to query for data, either 'rest' or 'aws'. default is aws.
253
253
254
254
Returns:
255
255
pd.DataFrame or dict or str
@@ -258,17 +258,16 @@ def forecast_ensembles(*, river_id: int, date: str, format: str, data_source: st
258
258
259
259
260
260
@_forecast_endpoint_decorator
261
- def forecast_records (* , river_id : int , start_date : str , end_date : str , format : str , data_source : str ,
261
+ def forecast_records (* , river_id : int , start_date : str , end_date : str , format : str ,
262
262
** kwargs ) -> pd .DataFrame or dict or str :
263
263
"""
264
264
Retrieves a csv showing the ensemble average forecasted flow for the year from January 1 to the current date
265
265
266
266
Keyword Args:
267
- river_id: the ID of a stream, should be a 9 digit integer
268
- start_date: a YYYYMMDD string giving the earliest date this year to include, defaults to 14 days ago.
269
- end_date: a YYYYMMDD string giving the latest date this year to include, defaults to latest available
270
- data_source: location to query for data, either 'rest' or 'aws'. default is aws.
271
- format: if data_source=="rest": csv, json, or url, default csv. if data_source=="aws": df or xarray
267
+ river_id (int): the ID of a stream, should be a 9 digit integer
268
+ start_date (str): a YYYYMMDD string giving the earliest date this year to include, defaults to 14 days ago.
269
+ end_date (str): a YYYYMMDD string giving the latest date this year to include, defaults to latest available
270
+ format (str): csv, json, or url, default csv.
272
271
273
272
Returns:
274
273
pd.DataFrame or dict or str
@@ -280,11 +279,11 @@ def forecast_records(*, river_id: int, start_date: str, end_date: str, format: s
280
279
def retrospective (river_id : int or list , format : str = 'df' ) -> pd .DataFrame or xr .Dataset :
281
280
"""
282
281
Retrieves the retrospective simulation of streamflow for a given river_id from the
283
- AWS Open Data Program GEOGloWS V2 S3 bucket
282
+ AWS Open Data Program GEOGLOWS V2 S3 bucket
284
283
285
284
Args:
286
- river_id: the ID of a stream, should be a 9 digit integer
287
- format: the format to return the data, either 'df' or 'xarray'. default is 'df'
285
+ river_id (int) : the ID of a stream, should be a 9 digit integer
286
+ format (str) : the format to return the data, either 'df' or 'xarray'. default is 'df'
288
287
289
288
Returns:
290
289
pd.DataFrame
@@ -302,12 +301,12 @@ def historical(*args, **kwargs):
302
301
return retrospective (* args , ** kwargs )
303
302
304
303
305
- def daily_averages (river_id : int or list ) -> pd .DataFrame or xr . Dataset :
304
+ def daily_averages (river_id : int or list ) -> pd .DataFrame :
306
305
"""
307
306
Retrieves daily average streamflow for a given river_id
308
307
309
308
Args:
310
- river_id: the ID of a stream, should be a 9 digit integer
309
+ river_id (int) : the ID of a stream, should be a 9 digit integer
311
310
312
311
Returns:
313
312
pd.DataFrame
@@ -321,7 +320,7 @@ def monthly_averages(river_id: int or list) -> pd.DataFrame:
321
320
Retrieves monthly average streamflow for a given river_id
322
321
323
322
Args:
324
- river_id: the ID of a stream, should be a 9 digit integer
323
+ river_id (int) : the ID of a stream, should be a 9 digit integer
325
324
326
325
Returns:
327
326
pd.DataFrame
@@ -335,7 +334,7 @@ def annual_averages(river_id: int or list) -> pd.DataFrame:
335
334
Retrieves annual average streamflow for a given river_id
336
335
337
336
Args:
338
- river_id: the ID of a stream, should be a 9 digit integer
337
+ river_id (int) : the ID of a stream, should be a 9 digit integer
339
338
340
339
Returns:
341
340
pd.DataFrame
@@ -344,13 +343,13 @@ def annual_averages(river_id: int or list) -> pd.DataFrame:
344
343
return calc_annual_averages (df )
345
344
346
345
347
- def return_periods (river_id : int or list , format : str = 'df' ) -> pd .DataFrame :
346
+ def return_periods (river_id : int or list , format : str = 'df' ) -> pd .DataFrame or xr . Dataset :
348
347
"""
349
348
Retrieves the return period thresholds based on a specified historic simulation forcing on a certain river_id.
350
349
351
350
Args:
352
- river_id: the ID of a stream, should be a 9 digit integer
353
- format: the format to return the data, either 'df' or 'xarray'. default is 'df'
351
+ river_id (int) : the ID of a stream, should be a 9 digit integer
352
+ format (str) : the format to return the data, either 'df' or 'xarray'. default is 'df'
354
353
355
354
Returns:
356
355
pd.DataFrame
@@ -369,7 +368,7 @@ def metadata_tables(columns: list = None) -> pd.DataFrame:
369
368
"""
370
369
Retrieves the master table of rivers metadata and properties as a pandas DataFrame
371
370
Args:
372
- columns: optional subset of columns names to read from the parquet
371
+ columns (list) : optional subset of columns names to read from the parquet
373
372
374
373
Returns:
375
374
pd.DataFrame
@@ -379,6 +378,7 @@ def metadata_tables(columns: list = None) -> pd.DataFrame:
379
378
warn = f"""
380
379
Local copy of geoglows v2 metadata table not found. You should download a copy for optimal performance and
381
380
to make the data available when you are offline. A copy of the table will be cached at { METADATA_TABLE_PATH } .
381
+ Alternatively, set the environment variable PYGEOGLOWS_METADATA_TABLE_PATH to the path of the table.
382
382
"""
383
383
warnings .warn (warn )
384
384
df = pd .read_parquet ('https://geoglows-v2.s3-website-us-west-2.amazonaws.com/tables/package-metadata-table.parquet' )
0 commit comments