Skip to content

Commit

Permalink
Merge pull request #507 from LinkedEarth/age-quantile
Browse files Browse the repository at this point in the history
quantiles calculations
  • Loading branch information
CommonClimate authored Jan 25, 2024
2 parents 8ba0fef + 7a4a837 commit fbacdf3
Show file tree
Hide file tree
Showing 2 changed files with 306 additions and 18 deletions.
230 changes: 212 additions & 18 deletions pyleoclim/core/ensembleseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ def slice(self, timespan):



def quantiles(self, qs=[0.05, 0.5, 0.95]):
'''Calculate quantiles of an EnsembleSeries object
def quantiles(self, qs=[0.05, 0.5, 0.95], axis = 'value'):
'''Calculate quantiles of an EnsembleSeries object. If axis is 'value', the calculation requires for the time axis to be the same. You can use the common_time method to do so. In essence, it transforms the time uncertainty into a y-axis uncertainty. If axis is 'time', the values should be the same for all members of the emsemble.
Reuses [scipy.stats.mstats.mquantiles](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mstats.mquantiles.html) function.
Expand All @@ -142,13 +142,22 @@ def quantiles(self, qs=[0.05, 0.5, 0.95]):
qs : list, optional
List of quantiles to consider for the calculation. The default is [0.05, 0.5, 0.95].
axis : ['time', 'value']
Whether to calculate the quantiles over the values or time. Default is 'value'.
Returns
-------
ens_qs : EnsembleSeries
EnsembleSeries object containing empirical quantiles of original
See also
--------
pyleoclim.core.multipleseries.MultipleSeries.common_time : A method to align axes
Examples
--------
Expand All @@ -170,24 +179,68 @@ def quantiles(self, qs=[0.05, 0.5, 0.95]):
ts_ens = pyleo.EnsembleSeries(series_list)
ens_qs = ts_ens.quantiles()
To calculate in the time dimension:
.. jupyter-execute::
nn = 30 #number of age models
time = np.arange(1,20000,100) #create a time vector
std_dev = 20 # Noise to be considered
t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0)
series_list = []
for i in range(nn):
noise = np.random.normal(0,std_dev,len(time))
ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False)
series_list.append(ts)
time_ens = pyleo.EnsembleSeries(series_list)
ens_qs = time_ens.quantiles(axis='time')
'''
time = np.copy(self.series_list[0].time)
vals = []
for ts in self.series_list:
if not np.array_equal(ts.time, time):
raise ValueError('Time axis not consistent across the ensemble!')

vals.append(ts.value)

vals = np.array(vals)
# ens_qs = mquantiles(vals, qs, axis=0)
ens_qs = np.nanquantile(vals, qs, axis=0)

ts_list = []
for i, quant in enumerate(ens_qs):
ts = Series(time=time, value=quant, label=f'{qs[i]*100:g}%', verbose=False)
ts_list.append(ts)
if axis == 'value':
time = np.copy(self.series_list[0].time)
vals = []
for ts in self.series_list:
if not np.array_equal(ts.time, time):
raise ValueError('Time axis not consistent across the ensemble!')

vals.append(ts.value)

vals = np.array(vals)
# ens_qs = mquantiles(vals, qs, axis=0)
ens_qs = np.nanquantile(vals, qs, axis=0)

ts_list = []
for i, quant in enumerate(ens_qs):
ts = Series(time=time, value=quant, label=f'{qs[i]*100:g}%', verbose=False)
ts_list.append(ts)

elif axis == 'time':

value = np.copy(self.series_list[0].value)
vals = []
for ts in self.series_list:
if not np.array_equal(ts.value, value):
raise ValueError('Value axis not consistent across the ensemble!')
vals.append(ts.time)

vals = np.array(vals)

# ens_qs = mquantiles(vals, qs, axis=0)
ens_qs = np.nanquantile(vals, qs, axis=0)

ts_list = []
for i, quant in enumerate(ens_qs):
ts = Series(time=quant, value=value, label=f'{qs[i]*100:g}%', verbose=False)
ts_list.append(ts)

else:
raise ValueError("Axis should be either 'value' or 'time'")

ens_qs = EnsembleSeries(series_list=ts_list)

Expand Down Expand Up @@ -999,3 +1052,144 @@ def histplot(self, figsize=[10, 4], title=None, savefig_settings=None,
return fig, ax
else:
return ax

def to_dataframe(self, axis = 'value'):
'''
Export the ensemble as a Pandas DataFrame, with members of the ensemble as columns. The columns are labeled according to the label in the individual series or numbered if 'label' is None.
Parameters
----------
axis : str, ['time', 'value']
Whether the return the ensemble from value or time. each The default is 'value'.
Raises
------
ValueError
Axis should be either 'time' or 'value'
Returns
-------
df : pandas.DataFrame
A Pandas DataFrame containing members of the ensemble as columns.
.. jupyter-execute::
nn = 30 #number of age models
time = np.arange(1,20000,100) #create a time vector
std_dev = 20 # Noise to be considered
t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0)
series_list = []
for i in range(nn):
noise = np.random.normal(0,std_dev,len(time))
ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False)
series_list.append(ts)
time_ens = pyleo.EnsembleSeries(series_list)
ens_qs = time_ens.quantiles(axis='time')
df=ens_qs.to_dataframe(axis='time')
'''

df_dict = {}
idx = 0

if axis == 'value':
for ts in self.series_list:
if ts.label is None:
df_dict[idx]=ts.value
else:
df_dict[ts.label]=ts.value
idx+=1

elif axis == 'time':
for ts in self.series_list:
if ts.label is None:
df_dict[idx]=ts.time
else:
df_dict[ts.label]=ts.time
idx+=1

else:
raise ValueError('Axis should be either "time" or "value"')

df = pd.DataFrame(df_dict)

return df

def to_array(self, axis='value', labels=True):
'''
Returns an ensemble as a numpy array with an optional list for labels. Each column in the array corresponds to an ensemble member.
Parameters
----------
axis : str, ['time', 'value'], optional
Whether the return the ensemble from value or time. The default is 'value'.
labels : bool, [True,False], optional
Whether to retrun a separate list with the timseries labels. The default is True.
Raises
------
ValueError
Axis should be either 'time' or 'value'
Returns
-------
vals: numpy.array
An array where each column corresponds to an ensemble member
headers: list
A list of corresponding labels for each columm
Example
-------
.. jupyter-execute::
nn = 30 #number of age models
time = np.arange(1,20000,100) #create a time vector
std_dev = 20 # Noise to be considered
t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0)
series_list = []
for i in range(nn):
noise = np.random.normal(0,std_dev,len(time))
ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False)
series_list.append(ts)
time_ens = pyleo.EnsembleSeries(series_list)
ens_qs = time_ens.quantiles(axis='time')
vals,headers=ens_qs.to_dataframe(axis='time')
'''

vals=np.empty((len(self.series_list[0].value),len(self.series_list)))
headers=[]

if axis == 'value':
for i, ts in enumerate(self.series_list):
headers.append(ts.label)
vals[:,i]=ts.value

elif axis == 'time':
for i, ts in enumerate(self.series_list):
headers.append(ts.label)
vals[:,i]=ts.time

else:
raise ValueError('Axis should be either "time" or "value"')

if labels == True:
return vals, headers
else:
return vals



94 changes: 94 additions & 0 deletions pyleoclim/tests/test_core_EnsembleSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,100 @@ def test_histplot_t0(self):
ts_ens.histplot()
pyleo.closefig()

class TestUIEnsembleSeriesQuantiles():
def test_quantiles_t0(self):
nn = 30 # number of noise realizations
nt = 500
series_list = []

t,v = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=1.0)
signal = pyleo.Series(t,v)

for idx in range(nn): # noise
noise = np.random.randn(nt,nn)*100
ts = pyleo.Series(time=signal.time, value=signal.value+noise[:,idx], verbose=False)
series_list.append(ts)

ts_ens = pyleo.EnsembleSeries(series_list)

ens_qs = ts_ens.quantiles()

def test_quantiles_t1(self):

nn = 30 #number of age models
time = np.arange(1,20000,100) #create a time vector
std_dev = 20 # Noise to be considered

t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0)

series_list = []

for i in range(nn):
noise = np.random.normal(0,std_dev,len(time))
ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False)
series_list.append(ts)

time_ens = pyleo.EnsembleSeries(series_list)

ens_qs = time_ens.quantiles(axis='time')

class TestUIEnsembleSeriesDataFrame():
@pytest.mark.parametrize('axis',['time','value'])
def test_to_dataframe_t0(self, axis):
nn = 30 #number of age models
time = np.arange(1,20000,100) #create a time vector
std_dev = 20 # Noise to be considered

t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0)

series_list = []

for i in range(nn):
noise = np.random.normal(0,std_dev,len(time))
ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False)
series_list.append(ts)

time_ens = pyleo.EnsembleSeries(series_list)
ens_qs = time_ens.quantiles(axis='time')

if axis == 'time':
df=ens_qs.to_dataframe(axis='time')
elif axis == 'value':
df=time_ens.to_dataframe(axis='value')

class TestUIEnsembleSeriesArray():

@pytest.mark.parametrize(
('labels', 'mode'),
[
(True,'time'),
(True,'value'),
(False,'time'),
(False,'value'),
]
)
def test_to_array_t0(self,labels,mode):
nn = 30 #number of age models
time = np.arange(1,20000,100) #create a time vector
std_dev = 20 # Noise to be considered

t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0)

series_list = []

for i in range(nn):
noise = np.random.normal(0,std_dev,len(time))
ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False)
series_list.append(ts)

time_ens = pyleo.EnsembleSeries(series_list)
ens_qs = time_ens.quantiles(axis='time')

if labels == True:
vals,headers=ens_qs.to_array(axis=mode)
else:
vals = ens_qs.to_array(axis=mode)

# class TestUIEnsembleSeriesDistplot():
# def test_histplot_t0(self):
# '''Test for EnsembleSeries.distplot()
Expand Down

0 comments on commit fbacdf3

Please sign in to comment.