From f843cd10fed280b7d728bc5f0e98b6a17328f12e Mon Sep 17 00:00:00 2001 From: James McVey <53623232+jmcvey3@users.noreply.github.com> Date: Mon, 15 Jul 2024 09:10:52 -0700 Subject: [PATCH] Update NOAA Request function (#332) Fixes #223 --- mhkit/river/graphics.py | 2 +- mhkit/river/io/usgs.py | 8 +++--- mhkit/tidal/io/noaa.py | 55 +++++++++++++++++++++++++++++------------ mhkit/wave/io/ndbc.py | 6 ++--- 4 files changed, 47 insertions(+), 24 deletions(-) diff --git a/mhkit/river/graphics.py b/mhkit/river/graphics.py index 396ce1271..7272ded81 100644 --- a/mhkit/river/graphics.py +++ b/mhkit/river/graphics.py @@ -118,7 +118,7 @@ def plot_velocity_duration_curve(V, F, label=None, ax=None): """ # Sort by F temp = xr.Dataset(data_vars={"V": V, "F": F}) - temp.sortby("F", ascending=False) + temp = temp.sortby("F", ascending=False) ax = _xy_plot( temp["V"], diff --git a/mhkit/river/io/usgs.py b/mhkit/river/io/usgs.py index 54c97966c..9b104f826 100644 --- a/mhkit/river/io/usgs.py +++ b/mhkit/river/io/usgs.py @@ -82,7 +82,7 @@ def request_usgs_data( station : str USGS station number (e.g. '08313000') parameter : str - USGS paramter ID (e.g. '00060' for Discharge, cubic feet per second) + USGS parameter ID (e.g. '00060' for Discharge, cubic feet per second) start_date : str Start date in the format 'YYYY-MM-DD' (e.g. '2018-01-01') end_date : str @@ -91,8 +91,8 @@ def request_usgs_data( Data type, options include 'Daily' (return the mean daily value) and 'Instantaneous'. proxy : dict or None - To request data from behind a firewall, define a dictionary of proxy settings, - for example {"http": 'localhost:8080'} + To request data from behind a firewall, define a dictionary of proxy settings, + for example {"http": 'localhost:8080'} write_json : str or None Name of json file to write data clear_cache : bool @@ -106,7 +106,7 @@ def request_usgs_data( Data indexed by datetime with columns named according to the parameter's variable description """ - if not data_type in ["Daily", "Instantaneous"]: + if data_type not in ["Daily", "Instantaneous"]: raise ValueError(f"data_type must be Daily or Instantaneous. Got: {data_type}") if not isinstance(to_pandas, bool): diff --git a/mhkit/tidal/io/noaa.py b/mhkit/tidal/io/noaa.py index 2ea2ad708..d0aadc861 100644 --- a/mhkit/tidal/io/noaa.py +++ b/mhkit/tidal/io/noaa.py @@ -4,14 +4,14 @@ This module provides functions to fetch, process, and read NOAA (National Oceanic and Atmospheric Administration) current data directly from the NOAA Tides and Currents API (https://api.tidesandcurrents.noaa.gov/api/prod/). It -supports loading data into a pandas DataFrame, handling data in XML and +supports loading data into a pandas DataFrame, handling data in XML and JSON formats, and writing data to a JSON file. Functions: ---------- -request_noaa_data(station, parameter, start_date, end_date, proxy=None, +request_noaa_data(station, parameter, start_date, end_date, proxy=None, write_json=None): - Loads NOAA current data from the API into a pandas DataFrame, + Loads NOAA current data from the API into a pandas DataFrame, with optional support for proxy settings and writing data to a JSON file. @@ -56,16 +56,18 @@ def request_noaa_data( Parameters ---------- station : str - NOAA current station number (e.g. 'cp0101') + NOAA current station number (e.g. 'cp0101', "s08010", "9446484") parameter : str - NOAA paramter (e.g. '' for Discharge, cubic feet per second) + NOAA parameter (e.g. "currents", "salinity", "water_level", "water_temperature", + "air_temperature", "wind", "air_pressure") + https://api.tidesandcurrents.noaa.gov/api/prod/ start_date : str Start date in the format yyyyMMdd end_date : str End date in the format yyyyMMdd proxy : dict or None - To request data from behind a firewall, define a dictionary of proxy - settings, for example {"http": 'localhost:8080'} + To request data from behind a firewall, define a dictionary of proxy + settings, for example {"http": 'localhost:8080'} write_json : str or None Name of json file to write data clear_cache : bool @@ -158,26 +160,42 @@ def request_noaa_data( end_date = date_list[i + 1].strftime("%Y%m%d") api_query = f"begin_date={start_date}&end_date={end_date}&station={station}&product={parameter}&units=metric&time_zone=gmt&application=web_services&format=xml" + # Add datum to water level inquiries + if parameter == "water_level": + api_query += "&datum=MLLW" data_url = f"https://tidesandcurrents.noaa.gov/api/datagetter?{api_query}" - print("Data request URL: ", data_url) + print(f"Data request URL: {data_url}\n") # Get response try: response = requests.get(url=data_url, proxies=proxy) response.raise_for_status() - except requests.exceptions.HTTPError as err: - print(f"HTTP error occurred: {err}") - continue - except requests.exceptions.RequestException as err: - print(f"Error occurred: {err}") - continue + # Catch non-exception errors + if "error" in response.content.decode(): + raise Exception(response.content.decode()) + except Exception as err: + if err.__class__ == requests.exceptions.HTTPError: + print(f"HTTP error occurred: {err}") + print(f"Error message: {response.content.decode()}\n") + continue + elif err.__class__ == requests.exceptions.RequestException: + print(f"Requests error occurred: {err}") + print(f"Error message: {response.content.decode()}\n") + continue + else: + print(f"Requests error occurred: {err}\n") + continue + # Convert to DataFrame and save in data_frames list df, metadata = _xml_to_dataframe(response) data_frames.append(df) # Concatenate all DataFrames - data = pd.concat(data_frames, ignore_index=False) + if data_frames: + data = pd.concat(data_frames, ignore_index=False) + else: + raise ValueError("No data retrieved.") # Remove duplicated date values data = data.loc[~data.index.duplicated()] @@ -236,7 +254,12 @@ def _xml_to_dataframe(response): df.drop_duplicates(inplace=True) # Convert data to float - df[["d", "s"]] = df[["d", "s"]].apply(pd.to_numeric) + cols = list(df.columns) + for var in cols: + try: + df[var] = df[var].apply(pd.to_numeric) + except ValueError: + pass return df, metadata diff --git a/mhkit/wave/io/ndbc.py b/mhkit/wave/io/ndbc.py index 268c3390e..12ad3e9a7 100644 --- a/mhkit/wave/io/ndbc.py +++ b/mhkit/wave/io/ndbc.py @@ -329,10 +329,10 @@ def request_data(parameter, filenames, proxy=None, clear_cache=False, to_pandas= 'cwind' : 'Continuous Winds Current Year Historical Data' filenames: pandas Series, pandas DataFrame, xarray DataArray, or xarray Dataset - Data filenames on https://www.ndbc.noaa.gov/data/historical/{parameter}/ + Data filenames on https://www.ndbc.noaa.gov/data/historical/{parameter}/ proxy: dict - Proxy dict passed to python requests, + Proxy dict passed to python requests, (e.g. proxy_dict= {"http": 'http:wwwproxy.yourProxy:80/'}) to_pandas: bool (optional) @@ -631,7 +631,7 @@ def parameter_units(parameter=""): If no parameter is passed then an ordered dictionary of all NDBC parameterz specified unites is returned. If a parameter is specified then only the units associated with that parameter are returned. - Note that many NDBC paramters report multiple measurements and in + Note that many NDBC parameters report multiple measurements and in that case the returned dictionary will contain the NDBC measurement name and associated unit for all the measurements associated with the specified parameter. Optional parameter values are given below.