diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a9e63ce..a2ff450 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: files: requirements-dev.txt - repo: https://github.com/psf/black - rev: 23.12.1 + rev: 24.1.1 hooks: - id: black language_version: python3 @@ -33,10 +33,10 @@ repos: - id: blackdoc - repo: https://github.com/econchick/interrogate - rev: 1.5.0 + rev: 237be78f9c6135fc1a620d211cdfdc5d3885082b hooks: - id: interrogate - exclude: ^(docs|setup.py|tests) + exclude: ^(docs|tests) args: [--config=pyproject.toml] - repo: https://github.com/codespell-project/codespell @@ -56,12 +56,12 @@ repos: - id: add-trailing-comma - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.9 + rev: v0.1.15 hooks: - id: ruff - repo: https://github.com/tox-dev/pyproject-fmt - rev: 1.5.3 + rev: 1.7.0 hooks: - id: pyproject-fmt diff --git a/gliderpy/fetchers.py b/gliderpy/fetchers.py index f5b0f83..c47df35 100644 --- a/gliderpy/fetchers.py +++ b/gliderpy/fetchers.py @@ -30,10 +30,7 @@ def _to_pandas_multiple(glider_grab): glider_grab_copy = copy(glider_grab) for dataset_id in glider_grab_copy.datasets["Dataset ID"]: glider_grab_copy.fetcher.dataset_id = dataset_id - df = glider_grab_copy.fetcher.to_pandas( - index_col="time (UTC)", - parse_dates=True, - ) + df = glider_grab_copy.fetcher.to_pandas() dataset_url = glider_grab_copy.fetcher.get_download_url().split("?")[0] df = standardise_df(df, dataset_url) df_all.update({dataset_id: df}) @@ -45,8 +42,11 @@ def standardise_df(df, dataset_url): Standardise variable names in a dataset and add column for url """ df.columns = df.columns.str.lower() - df.rename(columns=dict(server_parameter_rename), inplace=True) - df.index.rename("time", inplace=True) + df = df.set_index("time (utc)") + df = df.rename(columns=server_parameter_rename) + df.index = pd.to_datetime(df.index) + # We need to sort b/c of the non-sequential submission of files due to the nature of glider data transmission. + df = df.sort_index() df["dataset_url"] = dataset_url return df @@ -79,10 +79,7 @@ def to_pandas(self): :return: pandas dataframe with datetime UTC as index, multiple dataset_ids dataframes are stored in a dictionary """ if self.fetcher.dataset_id: - df = self.fetcher.to_pandas( - index_col="time (UTC)", - parse_dates=True, - ) + df = self.fetcher.to_pandas() elif not self.fetcher.dataset_id and self.datasets is not None: df_all = _to_pandas_multiple(self) # We need to reset to avoid fetching a single dataset_id when making multiple requests. @@ -93,7 +90,7 @@ def to_pandas(self): f"Must provide a {self.fetcher.dataset_id} or `query` terms to download data.", ) - # Standardize variable names. + # Standardize variable names for the single dataset_id. dataset_url = self.fetcher.get_download_url().split("?")[0] df = standardise_df(df, dataset_url) return df diff --git a/gliderpy/servers.py b/gliderpy/servers.py index b33d3de..eae8b4f 100644 --- a/gliderpy/servers.py +++ b/gliderpy/servers.py @@ -3,7 +3,6 @@ """ - server_vars = { "https://gliders.ioos.us/erddap": [ "latitude", @@ -30,4 +29,5 @@ "salinity (1)": "salinity", "temp (degree_celsius)": "temperature", "temperature (celsius)": "temperature", + "time (utc)": "time", } diff --git a/pyproject.toml b/pyproject.toml index 23b6634..7223fce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,6 @@ ignore = [ "tests", "tests/*", ] -## [tool.pytest.ini_options] filterwarnings = [ diff --git a/requirements-dev.txt b/requirements-dev.txt index 77d68bc..c4c6685 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,5 @@ +# Pyarrow will be required in pandas 3.0, +# added here for better performance and to avoid a deprecation warning. cartopy check-manifest jupyter @@ -6,6 +8,7 @@ nbconvert nbsphinx palettable pre-commit +pyarrow pytest pytest-cov pytest-flake8