Skip to content

Commit

Permalink
Merge pull request #348 from LinkedEarth/make_time_axis
Browse files Browse the repository at this point in the history
Make time axis
  • Loading branch information
CommonClimate authored Feb 28, 2023
2 parents 4e27bb5 + 0c32694 commit 0da854c
Show file tree
Hide file tree
Showing 4 changed files with 530 additions and 128 deletions.
87 changes: 53 additions & 34 deletions pyleoclim/core/multipleseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ..core.psds import MultiplePSD
from ..core.spatialdecomp import SpatialDecomp

import warnings
import numpy as np
from copy import deepcopy

Expand Down Expand Up @@ -472,7 +473,7 @@ def increments(self, step_style='median', verbose=False):

return gp

def common_time(self, method='interp', step = None, start = None, stop = None, step_style = None, **kwargs):
def common_time(self, method='interp', step = None, start = None, stop = None, step_style = None, time_axis = None, **kwargs):
''' Aligns the time axes of a MultipleSeries object
The alignment is achieved via binning, interpolation, or Gaussian kernel. Alignment is critical for workflows
Expand Down Expand Up @@ -514,6 +515,9 @@ def common_time(self, method='interp', step = None, start = None, stop = None, s
Method to obtain a representative step among all Series (using tsutils.increments).
Default value is None, so that it will be chosen according to the method: 'max' for bin and gkernel, 'mean' for interp.
time_axis : array
Time axis onto which all the series will be aligned. Will override step,start,stop, and step_style if they are passed.
kwargs: dict
keyword arguments (dictionary) of the bin, gkernel or interp methods
Expand All @@ -525,6 +529,14 @@ def common_time(self, method='interp', step = None, start = None, stop = None, s
The MultipleSeries objects with all series aligned to the same time axis.
Notes
-----
`start`, `stop`, `step`, and `step_style` are interpreted differently depending on the method used.
Interp uses these to specify the `time_axis` onto which interpolation will be applied.
Bin and gkernel use these to specify the `bin_edges` which define the "buckets" used for the
respective methods.
See also
--------
Expand Down Expand Up @@ -588,62 +600,69 @@ def common_time(self, method='interp', step = None, start = None, stop = None, s
pyleo.closefig(fig)
'''

# specify stepping style
if step_style == None: # if step style isn't specified, pick a robust choice according to method
if method == 'bin' or method == 'gkernel':
step_style = 'max'
elif method == 'interp':
step_style = 'mean'

# obtain grid properties with given step_style
gp = self.increments(step_style=step_style)

# define grid step
if step is not None and step > 0:
common_step = step
if time_axis is not None:
if start is not None or stop is not None or step is not None or step_style is not None:
warnings.warn('The time axis has been passed with other time axis relevant arguments {start,stop,step,step_style}. Time_axis takes priority and will be used.')
even_axis=None
else:
if step_style == 'mean':
common_step = gp[:,2].mean()
elif step_style == 'max':
common_step = gp[:,2].max()
elif step_style == 'mode':
common_step = stats.mode(gp[:,2])[0][0]
# specify stepping style
if step_style is None: # if step style isn't specified, pick a robust choice according to method
if method == 'bin' or method == 'gkernel':
step_style = 'max'
elif method == 'interp':
step_style = 'mean'

# obtain grid properties with given step_style
gp = self.increments(step_style=step_style)

# define grid step
if step is not None and step > 0:
common_step = step
else:
common_step = np.median(gp[:,2])

# define start and stop
if start is None:
start = gp[:,0].max() # pick the latest of the start times

if stop is None:
stop = gp[:,1].min() # pick the earliest of the stop times
if step_style == 'mean':
common_step = gp[:,2].mean()
elif step_style == 'max':
common_step = gp[:,2].max()
elif step_style == 'mode':
common_step = stats.mode(gp[:,2])[0][0]
else:
common_step = np.median(gp[:,2])
# define start and stop
if start is None:
start = gp[:,0].max() # pick the latest of the start times
if stop is None:
stop = gp[:,1].min() # pick the earliest of the stop times
if start > stop:
raise ValueError('At least one series has no common time interval with others. Please check the time axis of the series.')

even_axis = tsutils.make_even_axis(start=start,stop=stop,step=common_step)

if start > stop:
raise ValueError('At least one series has no common time interval with others. Please check the time axis of the series.')

ms = self.copy()

# apply each method
if method == 'bin':
for idx,item in enumerate(self.series_list):
ts = item.copy()
d = tsutils.bin(ts.time, ts.value, bin_size=common_step, start=start, stop=stop, evenly_spaced = False, **kwargs)
d = tsutils.bin(ts.time, ts.value, bin_edges=even_axis, time_axis=time_axis, no_nans=False, **kwargs)
ts.time = d['bins']
ts.value = d['binned_values']
ms.series_list[idx] = ts

elif method == 'interp':

if time_axis is None:
time_axis = even_axis
for idx,item in enumerate(self.series_list):
ts = item.copy()
ti, vi = tsutils.interp(ts.time, ts.value, step=common_step, start=start, stop=stop,**kwargs)
ti, vi = tsutils.interp(ts.time, ts.value, time_axis=time_axis, **kwargs)
ts.time = ti
ts.value = vi
ms.series_list[idx] = ts

elif method == 'gkernel':
for idx,item in enumerate(self.series_list):
ts = item.copy()
ti, vi = tsutils.gkernel(ts.time,ts.value,step=common_step, start=start, stop=stop, **kwargs)
ti, vi = tsutils.gkernel(ts.time,ts.value,bin_edges=even_axis, time_axis=time_axis, no_nans=False,**kwargs)
ts.time = ti
ts.value = vi
ms.series_list[idx] = ts.clean() # remove NaNs
Expand Down
25 changes: 23 additions & 2 deletions pyleoclim/tests/test_core_MultipleSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,27 @@ def test_common_time_t1(self):
ms1 = ms.common_time(method='interp', start=1910, stop=2010, step=1/12)

assert (np.diff(ms1.series_list[0].time)[0] - 1/12) < 1e-3

def test_common_time_t2(self):
time = np.arange(1900, 2020, step=1/12)
ndel = 200
seriesList = []
n = 100
for j in range(4):
v = gen_ts(model='colored_noise', nt=n, alpha=1, t=time)
deleted_idx = np.random.choice(range(np.size(time)), ndel, replace=False)
tu = np.delete(time.copy(), deleted_idx)
vu = np.delete(v.value, deleted_idx)
ts = pyleo.Series(time=tu, value=vu, value_name='Series_'+str(j+1))
seriesList.append(ts)

ms = pyleo.MultipleSeries(seriesList)

new_time = np.arange(1950,2000,1)

ms1 = ms.common_time(method='interp', time_axis = new_time)

assert_array_equal(new_time,ms1.series_list[0].time)

class TestMultipleSeriesStackPlot():
''' Test for MultipleSeries.Stackplot
Expand Down Expand Up @@ -501,10 +522,10 @@ def test_to_pandas(self):
ms = pyleo.MultipleSeries([ts1, ts2])
result = ms.to_pandas()
expected_index = pd.DatetimeIndex(
np.array(['0000-12-31 05:48:45', '0002-07-02 02:31:54'], dtype='datetime64[s]'),
np.array(['0000-12-31 05:48:45', '0002-07-02 02:31:54','0003-12-31 23:15:03'], dtype='datetime64[s]'),
name='datetime',
)
expected = pd.DataFrame({'foo': [7, 5.25], 'bar': [7, 7.75]}, index=expected_index)
expected = pd.DataFrame({'foo': [7, 5.25,9.00], 'bar': [7, 7.75,1.00]}, index=expected_index)
pd.testing.assert_frame_equal(result, expected)

def test_to_pandas_args_kwargs(self):
Expand Down
49 changes: 40 additions & 9 deletions pyleoclim/tests/test_utils_tsutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,33 @@ def test_bin_t0(unevenly_spaced_series):
assert isinstance(v,np.ndarray)

def test_bin_t1(unevenly_spaced_series):
res_dict = tsutils.bin(unevenly_spaced_series.time,unevenly_spaced_series.value,evenly_spaced=True)
res_dict = tsutils.bin(unevenly_spaced_series.time,unevenly_spaced_series.value,no_nans=True)
t = res_dict['bins']
assert tsbase.is_evenly_spaced(t)

def test_bin_t2(unevenly_spaced_series):
bins = np.arange(0,100,10)
res_dict = tsutils.bin(unevenly_spaced_series.time,unevenly_spaced_series.value,bins=bins)
bin_edges = np.arange(0,100,10)
res_dict = tsutils.bin(unevenly_spaced_series.time,unevenly_spaced_series.value,bin_edges=bin_edges)
t = res_dict['bins']
assert_array_equal(t,(bins[1:]+bins[:-1])/2)
assert_array_equal(t,(bin_edges[1:]+bin_edges[:-1])/2)

def test_bin_t3(unevenly_spaced_series):
time_axis = np.arange(0,100,10)
res_dict = tsutils.bin(unevenly_spaced_series.time,unevenly_spaced_series.value,time_axis=time_axis)
t = res_dict['bins']
assert_array_equal(time_axis,t)

@pytest.mark.parametrize('statistic',['mean','std','median','count','sum','min','max'])
def test_bin_t3(unevenly_spaced_series,statistic):
res_dict = tsutils.bin(unevenly_spaced_series.time,unevenly_spaced_series.value,statistic=statistic)
def test_bin_t4(unevenly_spaced_series,statistic):
tsutils.bin(unevenly_spaced_series.time,unevenly_spaced_series.value,statistic=statistic)

@pytest.mark.parametrize('start',[None,10])
@pytest.mark.parametrize('stop',[None,90])
@pytest.mark.parametrize('bin_size',[None,20])
@pytest.mark.parametrize('step_style',[None,'median'])
@pytest.mark.parametrize('no_nans',[False,True])
def test_bin_t5(unevenly_spaced_series,start,stop,bin_size,step_style,no_nans):
tsutils.bin(unevenly_spaced_series.time,unevenly_spaced_series.value,start=start,stop=stop,bin_size=bin_size,step_style=step_style,no_nans=no_nans)

def test_gkernel_t0(unevenly_spaced_series):
t,v = tsutils.gkernel(unevenly_spaced_series.time,unevenly_spaced_series.value)
Expand All @@ -38,6 +52,23 @@ def test_gkernel_t1(unevenly_spaced_series):
assert tsbase.is_evenly_spaced(t)

def test_gkernel_t2(unevenly_spaced_series):
bins = np.arange(0,100,10)
t,v = tsutils.gkernel(unevenly_spaced_series.time,unevenly_spaced_series.value,bins=bins)
assert_array_equal(t,(bins[1:]+bins[:-1])/2)
bin_edges = np.arange(0,100,10)
t,v = tsutils.gkernel(unevenly_spaced_series.time,unevenly_spaced_series.value,bin_edges=bin_edges)
assert_array_equal(t,(bin_edges[1:]+bin_edges[:-1])/2)

def test_interp_t0(unevenly_spaced_series):
t,v = tsutils.interp(unevenly_spaced_series.time,unevenly_spaced_series.value)
assert isinstance(t,np.ndarray)
assert isinstance(v,np.ndarray)

def test_interp_t1(unevenly_spaced_series):
time_axis = np.arange(1,100,10)
t,v = tsutils.interp(unevenly_spaced_series.time,unevenly_spaced_series.value,time_axis=time_axis)
assert_array_equal(time_axis,t)

@pytest.mark.parametrize('start',[None,10])
@pytest.mark.parametrize('stop',[None,90])
@pytest.mark.parametrize('step',[None,20])
@pytest.mark.parametrize('step_style',[None,'median'])
def test_interp_t2(unevenly_spaced_series,start,stop,step,step_style):
tsutils.interp(unevenly_spaced_series.time,unevenly_spaced_series.value,start=start,stop=stop,step=step,step_style=step_style)
Loading

0 comments on commit 0da854c

Please sign in to comment.