From dff78f1e5c7ea0fae5c14c6064b2beeb70c968cb Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 25 Jan 2023 15:27:28 +0000 Subject: [PATCH 1/8] implement pyleo.Series.resample --- .gitattributes | 1 + pyleoclim/core/series.py | 42 +++++++++++++++++++++++++++++ pyleoclim/tests/test_core_Series.py | 32 ++++++++++++++++++++++ pyleoclim/utils/tsutils.py | 2 +- 4 files changed, 76 insertions(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 0e235841..e38939d5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ environment.yml merge=ours +*.py diff=python diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 1631784b..55086474 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3951,3 +3951,45 @@ def map(self, projection='Orthographic', proj_default=True, scatter_kwargs=scatter_kwargs, legend=legend, lgd_kwargs=lgd_kwargs, savefig_settings=savefig_settings) return res + + def resample(self, rule, **kwargs): + import re + search = re.search(r'(\d*)([a-zA-Z]+)', rule) + if search is None: + raise ValueError(f"Invalid rule provided: got {rule}") + multiplier = search.group(1) + if multiplier == '': + multiplier = 1 + else: + multiplier = int(multiplier) + unit = search.group(2) + print('multiplier: ', multiplier) + print('unit: ', unit) + if unit.lower() in tsutils.MATCH_A: + pass + elif unit.lower() in tsutils.MATCH_KA: + multiplier *= 1_000 + elif unit.lower() in tsutils.MATCH_MA: + multiplier *= 1_000_000 + elif unit.lower() in tsutils.MATCH_GA: + multiplier *= 1_000_000_000 + else: + raise ValueError(f'Invalid unit received, got: {unit}') + ser = self.to_pandas() + return _SeriesResample(f'{multiplier}Y', ser, self.metadata, kwargs) + + +class _SeriesResample: + def __init__(self, rule, series, metadata, kwargs): + self.rule = rule + self.series = series + self.metadata = metadata + self.kwargs = kwargs + + def __getattr__(self, attr): + attr = getattr(self.series.resample(self.rule, **self.kwargs), attr) + def foo(*args, **kwargs): + series = attr(*args, **kwargs) + from_pandas = Series.from_pandas(series, metadata=self.metadata) + return from_pandas + return foo diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index aee21092..f55c8c62 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -1019,3 +1019,35 @@ def test_sort_t1(self): ts = pyleo.Series(t,v) ts.sort() assert np.all(np.diff(ts.time) >= 0) + + +class TestResample: + @pytest.mark.parametrize('rule', pyleo.utils.tsutils.MATCH_A) + def test_resample_simple(self, rule, dataframe_dt): + # note: resample with large ranges is still not supported, + # so for now we're only testing 'years' as the rule + metadata = {'time_unit': 'years CE', + 'time_name': 'Time', + 'value_unit': 'mb', + 'value_name': 'SOI', + 'label': 'Southern Oscillation Index', + 'lat': None, + 'lon': None, + 'archiveType': None, + 'importedFrom': None, + 'log': ({0: 'clean_ts', 'applied': True, 'verbose': False},) + } + ser = dataframe_dt.loc[:, 0] + with pytest.warns(UserWarning, match='Time unit years CE not recognized. Defaulting to years CE'): + ts = pyleo.Series.from_pandas(ser, metadata) + result =ts.resample(rule).mean() + result_ser = result.to_pandas() + expected_values = np.array([0., 1., 2., 3., 4.]) + expected_idx = pd.DatetimeIndex(['2018-12-30 23:59:59', '2019-12-30 23:59:59', + '2020-12-30 23:59:59', '2021-12-30 23:59:59', + '2022-12-30 23:59:59'], name='datetime').as_unit('s') + expected_ser = pd.Series(expected_values, expected_idx, name='SOI') + expected_metadata = {'time_unit': 'years CE', 'time_name': 'Time', 'value_unit': 'mb', 'value_name': 'SOI', 'label': 'Southern Oscillation Index', 'lat': None, 'lon': None, 'archiveType': None, 'importedFrom': None, 'log': ({0: 'clean_ts', 'applied': True, 'verbose': False}, {2: 'clean_ts', 'applied': True, 'verbose': False}, {3: 'clean_ts', 'applied': True, 'verbose': False})} + pd.testing.assert_series_equal(result_ser, expected_ser) + assert result.metadata == expected_metadata + \ No newline at end of file diff --git a/pyleoclim/utils/tsutils.py b/pyleoclim/utils/tsutils.py index db00c6b6..275ac0b4 100644 --- a/pyleoclim/utils/tsutils.py +++ b/pyleoclim/utils/tsutils.py @@ -76,7 +76,7 @@ def time_unit_to_datum_exp_dir(time_unit, time_name=None): exponent = 9 direction = 'retrograde' else: - warnings.warn(f'Time unit {time_unit} not recognized. Defaulting to years CE') + warnings.warn(f'Time unit {time_unit} not recognized. Defaulting to years CE', stacklevel=4) # deal with statements about datum/direction tu = time_unit.lower().strip('.') # make lowercase + strip stops, so "B.P." --> "bp" From 67e1c31f6dd7fb8e3ad62e20050ce439f7d8f985 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 27 Jan 2023 17:15:17 +0000 Subject: [PATCH 2/8] test invalid rule --- pyleoclim/core/series.py | 6 +++--- pyleoclim/tests/test_core_Series.py | 23 ++++++++++++++++++++++- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 55086474..e48cbc6b 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -8,6 +8,7 @@ """ import operator +import re from ..utils import tsutils, plotting, tsmodel, tsbase, mapping, lipdutils, jsonutils from ..utils import wavelet as waveutils @@ -3953,10 +3954,9 @@ def map(self, projection='Orthographic', proj_default=True, return res def resample(self, rule, **kwargs): - import re search = re.search(r'(\d*)([a-zA-Z]+)', rule) if search is None: - raise ValueError(f"Invalid rule provided: got {rule}") + raise ValueError(f"Invalid rule provided, got: {rule}") multiplier = search.group(1) if multiplier == '': multiplier = 1 @@ -3974,7 +3974,7 @@ def resample(self, rule, **kwargs): elif unit.lower() in tsutils.MATCH_GA: multiplier *= 1_000_000_000 else: - raise ValueError(f'Invalid unit received, got: {unit}') + raise ValueError(f'Invalid unit provided, got: {unit}') ser = self.to_pandas() return _SeriesResample(f'{multiplier}Y', ser, self.metadata, kwargs) diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index f55c8c62..44a4336c 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -1050,4 +1050,25 @@ def test_resample_simple(self, rule, dataframe_dt): expected_metadata = {'time_unit': 'years CE', 'time_name': 'Time', 'value_unit': 'mb', 'value_name': 'SOI', 'label': 'Southern Oscillation Index', 'lat': None, 'lon': None, 'archiveType': None, 'importedFrom': None, 'log': ({0: 'clean_ts', 'applied': True, 'verbose': False}, {2: 'clean_ts', 'applied': True, 'verbose': False}, {3: 'clean_ts', 'applied': True, 'verbose': False})} pd.testing.assert_series_equal(result_ser, expected_ser) assert result.metadata == expected_metadata - \ No newline at end of file + + def test_resample_invalid(self, dataframe_dt): + # note: resample with large ranges is still not supported, + # so for now we're only testing 'years' as the rule + metadata = {'time_unit': 'years CE', + 'time_name': 'Time', + 'value_unit': 'mb', + 'value_name': 'SOI', + 'label': 'Southern Oscillation Index', + 'lat': None, + 'lon': None, + 'archiveType': None, + 'importedFrom': None, + 'log': ({0: 'clean_ts', 'applied': True, 'verbose': False},) + } + ser = dataframe_dt.loc[:, 0] + with pytest.warns(UserWarning, match='Time unit years CE not recognized. Defaulting to years CE'): + ts = pyleo.Series.from_pandas(ser, metadata) + with pytest.raises(ValueError, match='Invalid unit provided, got: foo'): + ts.resample('foo') + with pytest.raises(ValueError, match='Invalid rule provided, got: 412'): + ts.resample('412') From 8ce478e1d99206134392c781ac83239cf90d25b3 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 27 Jan 2023 17:25:59 +0000 Subject: [PATCH 3/8] document --- pyleoclim/core/series.py | 44 ++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index e48cbc6b..cb341242 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3954,6 +3954,28 @@ def map(self, projection='Orthographic', proj_default=True, return res def resample(self, rule, **kwargs): + """ + Run analogue to pandas.Series.resample. + + Parameters + ---------- + rule + The offset string or object representing target conversion. + Can also accept pyleoclim units, such as 'Ka' (1000 years), + 'Ma' (1 million years), and 'Ga' (1 billion years). + kwargs + Any other arguments which will be passed to pandas.Series.resample. + + Returns + ------- + SeriesResampler + Resampler object, not meant to be used to directly. Instead, + an aggregation should be called on it, see examples below. + + Examples + -------- + >>> ts.resample('Ka').mean() # doctest: +SKIP + """ search = re.search(r'(\d*)([a-zA-Z]+)', rule) if search is None: raise ValueError(f"Invalid rule provided, got: {rule}") @@ -3963,8 +3985,6 @@ def resample(self, rule, **kwargs): else: multiplier = int(multiplier) unit = search.group(2) - print('multiplier: ', multiplier) - print('unit: ', unit) if unit.lower() in tsutils.MATCH_A: pass elif unit.lower() in tsutils.MATCH_KA: @@ -3976,10 +3996,22 @@ def resample(self, rule, **kwargs): else: raise ValueError(f'Invalid unit provided, got: {unit}') ser = self.to_pandas() - return _SeriesResample(f'{multiplier}Y', ser, self.metadata, kwargs) + return SeriesResampler(f'{multiplier}Y', ser, self.metadata, kwargs) -class _SeriesResample: +class SeriesResampler: + """ + This is only meant to be used internally, and is not meant to + be public-facing or to be used directly by users. + + If users call + + ts.resample('1Y').mean() + + then they will get back a pyleoclim.Series, and `SeriesResampler` + will only be used in an intermediate step. Think of it as an + implementation detail. + """ def __init__(self, rule, series, metadata, kwargs): self.rule = rule self.series = series @@ -3988,8 +4020,8 @@ def __init__(self, rule, series, metadata, kwargs): def __getattr__(self, attr): attr = getattr(self.series.resample(self.rule, **self.kwargs), attr) - def foo(*args, **kwargs): + def func(*args, **kwargs): series = attr(*args, **kwargs) from_pandas = Series.from_pandas(series, metadata=self.metadata) return from_pandas - return foo + return func From c3cb29cf9ff0e0fa8094dda9ebbe2e5eb244eb4b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 27 Jan 2023 17:28:34 +0000 Subject: [PATCH 4/8] move metadata to fixture --- pyleoclim/tests/conftest.py | 16 +++++++++++++++- pyleoclim/tests/test_core_Series.py | 27 +++------------------------ 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/pyleoclim/tests/conftest.py b/pyleoclim/tests/conftest.py index e79e2056..54da0a23 100644 --- a/pyleoclim/tests/conftest.py +++ b/pyleoclim/tests/conftest.py @@ -16,4 +16,18 @@ def dataframe(): """Pandas dataframe with a non-datetime index and random values""" length = 5 df = pd.DataFrame(np.ones(length)) - return df \ No newline at end of file + return df + +@pytest.fixture +def metadata(): + return {'time_unit': 'years CE', + 'time_name': 'Time', + 'value_unit': 'mb', + 'value_name': 'SOI', + 'label': 'Southern Oscillation Index', + 'lat': None, + 'lon': None, + 'archiveType': None, + 'importedFrom': None, + 'log': ({0: 'clean_ts', 'applied': True, 'verbose': False},) + } \ No newline at end of file diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index 44a4336c..47da3974 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -1023,20 +1023,10 @@ def test_sort_t1(self): class TestResample: @pytest.mark.parametrize('rule', pyleo.utils.tsutils.MATCH_A) - def test_resample_simple(self, rule, dataframe_dt): + def test_resample_simple(self, rule, dataframe_dt, metadata): # note: resample with large ranges is still not supported, # so for now we're only testing 'years' as the rule - metadata = {'time_unit': 'years CE', - 'time_name': 'Time', - 'value_unit': 'mb', - 'value_name': 'SOI', - 'label': 'Southern Oscillation Index', - 'lat': None, - 'lon': None, - 'archiveType': None, - 'importedFrom': None, - 'log': ({0: 'clean_ts', 'applied': True, 'verbose': False},) - } + # https://github.com/pandas-dev/pandas/issues/51024 ser = dataframe_dt.loc[:, 0] with pytest.warns(UserWarning, match='Time unit years CE not recognized. Defaulting to years CE'): ts = pyleo.Series.from_pandas(ser, metadata) @@ -1051,20 +1041,9 @@ def test_resample_simple(self, rule, dataframe_dt): pd.testing.assert_series_equal(result_ser, expected_ser) assert result.metadata == expected_metadata - def test_resample_invalid(self, dataframe_dt): + def test_resample_invalid(self, dataframe_dt, metadata): # note: resample with large ranges is still not supported, # so for now we're only testing 'years' as the rule - metadata = {'time_unit': 'years CE', - 'time_name': 'Time', - 'value_unit': 'mb', - 'value_name': 'SOI', - 'label': 'Southern Oscillation Index', - 'lat': None, - 'lon': None, - 'archiveType': None, - 'importedFrom': None, - 'log': ({0: 'clean_ts', 'applied': True, 'verbose': False},) - } ser = dataframe_dt.loc[:, 0] with pytest.warns(UserWarning, match='Time unit years CE not recognized. Defaulting to years CE'): ts = pyleo.Series.from_pandas(ser, metadata) From d5aa3121fced4a5756a434e2cf16b2aa152e7b26 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 31 Jan 2023 19:39:24 +0000 Subject: [PATCH 5/8] post-merge fixup; --- pyleoclim/tests/test_core_Series.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index 08f91b67..d8f96355 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -1004,10 +1004,9 @@ def test_resample_simple(self, rule, dataframe_dt, metadata): # so for now we're only testing 'years' as the rule # https://github.com/pandas-dev/pandas/issues/51024 ser = dataframe_dt.loc[:, 0] - with pytest.warns(UserWarning, match='Time unit years CE not recognized. Defaulting to years CE'): - ts = pyleo.Series.from_pandas(ser, metadata) - result =ts.resample(rule).mean() - result_ser = result.to_pandas() + ts = pyleo.Series.from_pandas(ser, metadata) + result =ts.resample(rule).mean() + result_ser = result.to_pandas() expected_values = np.array([0., 1., 2., 3., 4.]) expected_idx = pd.DatetimeIndex(['2018-12-30 23:59:59', '2019-12-30 23:59:59', '2020-12-30 23:59:59', '2021-12-30 23:59:59', @@ -1021,8 +1020,7 @@ def test_resample_invalid(self, dataframe_dt, metadata): # note: resample with large ranges is still not supported, # so for now we're only testing 'years' as the rule ser = dataframe_dt.loc[:, 0] - with pytest.warns(UserWarning, match='Time unit years CE not recognized. Defaulting to years CE'): - ts = pyleo.Series.from_pandas(ser, metadata) + ts = pyleo.Series.from_pandas(ser, metadata) with pytest.raises(ValueError, match='Invalid unit provided, got: foo'): ts.resample('foo') with pytest.raises(ValueError, match='Invalid rule provided, got: 412'): From 7363ec9d7f7614651ccf5c11d0557874424732d3 Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Wed, 1 Feb 2023 12:23:22 -0800 Subject: [PATCH 6/8] clean up docstring and loaded packages --- pyleoclim/core/series.py | 13 +++++++++++-- pyleoclim/utils/tsutils.py | 6 +++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index cb341242..2a48eb9b 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3961,7 +3961,7 @@ def resample(self, rule, **kwargs): ---------- rule The offset string or object representing target conversion. - Can also accept pyleoclim units, such as 'Ka' (1000 years), + Can also accept pyleoclim units, such as 'ka' (1000 years), 'Ma' (1 million years), and 'Ga' (1 billion years). kwargs Any other arguments which will be passed to pandas.Series.resample. @@ -3974,7 +3974,16 @@ def resample(self, rule, **kwargs): Examples -------- - >>> ts.resample('Ka').mean() # doctest: +SKIP + >>> ts.resample('ka').mean() # doctest: +SKIP + .. ipython:: python + :okwarning: + :okexcept: + + import pyleoclim as pyleo + ts = pyleo.utils.load_dataset('nino3') + fig, ax = ts.plot() + ts.resample('5y').mean().plot(ax=ax) + """ search = re.search(r'(\d*)([a-zA-Z]+)', rule) if search is None: diff --git a/pyleoclim/utils/tsutils.py b/pyleoclim/utils/tsutils.py index bde2569e..04571be5 100644 --- a/pyleoclim/utils/tsutils.py +++ b/pyleoclim/utils/tsutils.py @@ -33,13 +33,13 @@ from sklearn.cluster import DBSCAN from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score -import matplotlib.pyplot as plt +#import matplotlib.pyplot as plt import statsmodels.tsa.stattools as sms import math -from sys import exit -from .plotting import plot_scatter_xy, plot_xy, savefig +#from sys import exit +#from .plotting import plot_scatter_xy, plot_xy, savefig from .filter import savitzky_golay from .tsbase import ( From 45c8e928417626ac26954dd6f8d846d550a8334b Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Wed, 1 Feb 2023 12:46:25 -0800 Subject: [PATCH 7/8] Series.resample() add resample rule information to series label if present --- pyleoclim/core/series.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 2a48eb9b..b8f05314 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3959,11 +3959,11 @@ def resample(self, rule, **kwargs): Parameters ---------- - rule + rule : str The offset string or object representing target conversion. Can also accept pyleoclim units, such as 'ka' (1000 years), 'Ma' (1 million years), and 'Ga' (1 billion years). - kwargs + kwargs : dict Any other arguments which will be passed to pandas.Series.resample. Returns @@ -4004,8 +4004,13 @@ def resample(self, rule, **kwargs): multiplier *= 1_000_000_000 else: raise ValueError(f'Invalid unit provided, got: {unit}') + + md = self.metadata + if md['label'] is not None: + md['label'] = md['label'] + ' (' + rule + ' resampling)' + ser = self.to_pandas() - return SeriesResampler(f'{multiplier}Y', ser, self.metadata, kwargs) + return SeriesResampler(f'{multiplier}Y', ser, md, kwargs) class SeriesResampler: From 744860d20afb2467de42302e2c15a0d161558ee7 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 2 Feb 2023 09:55:23 +0000 Subject: [PATCH 8/8] fixup test --- pyleoclim/tests/test_core_Series.py | 17 ++++++++++++++++- pyleoclim/utils/tsutils.py | 2 -- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index d8f96355..ade0a12c 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -1012,7 +1012,22 @@ def test_resample_simple(self, rule, dataframe_dt, metadata): '2020-12-30 23:59:59', '2021-12-30 23:59:59', '2022-12-30 23:59:59'], name='datetime').as_unit('s') expected_ser = pd.Series(expected_values, expected_idx, name='SOI') - expected_metadata = {'time_unit': 'years CE', 'time_name': 'Time', 'value_unit': 'mb', 'value_name': 'SOI', 'label': 'Southern Oscillation Index', 'lat': None, 'lon': None, 'archiveType': None, 'importedFrom': None, 'log': ({0: 'clean_ts', 'applied': True, 'verbose': False}, {2: 'clean_ts', 'applied': True, 'verbose': False}, {3: 'clean_ts', 'applied': True, 'verbose': False})} + expected_metadata = { + 'time_unit': 'years CE', + 'time_name': 'Time', + 'value_unit': 'mb', + 'value_name': 'SOI', + 'label': f'Southern Oscillation Index ({rule} resampling)', + 'lat': None, + 'lon': None, + 'archiveType': None, + 'importedFrom': None, + 'log': ( + {0: 'clean_ts', 'applied': True, 'verbose': False}, + {2: 'clean_ts', 'applied': True, 'verbose': False}, + {3: 'clean_ts', 'applied': True, 'verbose': False} + ) + } pd.testing.assert_series_equal(result_ser, expected_ser) assert result.metadata == expected_metadata diff --git a/pyleoclim/utils/tsutils.py b/pyleoclim/utils/tsutils.py index 04571be5..f8321c0b 100644 --- a/pyleoclim/utils/tsutils.py +++ b/pyleoclim/utils/tsutils.py @@ -38,8 +38,6 @@ import statsmodels.tsa.stattools as sms import math -#from sys import exit -#from .plotting import plot_scatter_xy, plot_xy, savefig from .filter import savitzky_golay from .tsbase import (