-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbaseflow_separation.py
146 lines (119 loc) · 6.03 KB
/
baseflow_separation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import baseflow_separation_functions as fun
data_dir = os.path.normpath('river_flow_daily')
export_dir = os.path.normpath('baseflow_data')
filenames = os.listdir(data_dir)
# create a timeseries plot with the estimated baseflow from each method,
# and their average
create_plot = True
# period to create the plot for
period_start = '2009-06-01'
period_end = '2012-09-01'
if not os.path.exists(export_dir): os.makedirs(export_dir)
for filename in filenames:
df = pd.read_csv(os.path.join(data_dir, filename))
# keep only relevant columns
df = df[['date', 'value', 'quality']]
df = df.rename(columns={'value':'flow'})
#any data that are not 'Good' or 'Estimated' quality, treat them as missing
idx = df[(df.quality != 'Good') & (df.quality != 'Estimated')].index
df.loc[idx,'flow'] = np.nan
#temporary dataframe (tempdf and df will have the same index values)
tempdf = df.copy()
# group periods of nans into one group and non-nans into seperate groups
tempdf['group']=df.flow.notnull().astype(int).cumsum()
# keep the nan values
tempdf=tempdf[tempdf.flow.isnull()]
# number of consequtive nans to interpolate
N=7
tempdf = tempdf[tempdf.group.isin(tempdf.group.value_counts()[tempdf.group.value_counts()<=N].index)]
# interpolate periods with less than N nans and ignore the rest
for group in tempdf.group.unique():
idx = tempdf[tempdf.group==group].index
df.loc[idx,'flow'] = df.flow.interpolate(method='spline', order=3)[idx]
df.loc[idx,'quality'] = 'Interpolated'
# now do the inverse:
tempdf = df.copy()
# group periods of non-nans into one group and the nans into seperate groups
tempdf['group'] = tempdf.flow.isnull().astype(int).cumsum()
# keep the non-nan values. this way, every period of not nan is a separate group
tempdf = tempdf[tempdf.flow.notnull()]
# find the periods with consecutive valid flow data
df['bf_filter'] = np.nan
dates = []
for group in tempdf.group.unique():
idx = tempdf[tempdf.group==group].index
if len(idx) >= 365:
# filter method
df.loc[idx,'bf_filter'] = np.array( fun.baseflow_filter(df.flow[idx]) )
bfi_filter = df.bf_filter[idx].sum() / df.flow[idx].sum()
# UKIH method
df.loc[idx,'bf_UKIH'] = np.array( fun.baseflow_UKIH(df.flow[idx]) )
bfi_UKIH = df.bf_UKIH[idx].sum() / df.flow[idx].sum()
# Eckhardt method
k = fun.recession_constant(df.flow[idx])
bfimax = fun.BFImax(df.flow[idx], k)
df.loc[idx,'bf_Eckhardt'] = np.array( fun.baseflow_Eckhardt(df.flow[idx], bfimax, k) )
bfi_Eckhardt = df.bf_Eckhardt[idx].sum() / df.flow[idx].sum()
dates.append( [df.date.loc[idx[0]], df.date.loc[idx[-1]],
len(df.date[idx]), bfi_filter, bfi_UKIH, bfi_Eckhardt])
# remove the irrelevant columns
df = df.drop(columns=['flow', 'quality'])
dates = pd.DataFrame(dates, columns=['start', 'end' ,'duration',
'bfi_filter', 'bfi_UKIH', 'bf_Eckhardt'])
# prepare the export filename for the baseflow
export_filename = filename.split('-')[:2]
export_filename = '-'.join(export_filename)
export_filename = export_filename + '_baseflow.csv'
export_sub_dir = os.path.normpath('baseflow_daily')
if not os.path.exists( os.path.join(export_dir, export_sub_dir)):
os.makedirs(os.path.join(export_dir, export_sub_dir))
export_filename = os.path.join(export_dir, export_sub_dir, export_filename)
# export the baseflow
df.to_csv(export_filename, index=False)
# prepare the export filename for the dates with continuous data
export_filename = filename.split('-')[:2]
export_filename = '-'.join(export_filename)
export_filename = export_filename + '_data_periods.csv'
export_sub_dir = os.path.normpath('data_periods_daily')
if not os.path.exists( os.path.join(export_dir, export_sub_dir)):
os.makedirs(os.path.join(export_dir, export_sub_dir))
export_filename = os.path.join(export_dir, export_sub_dir, export_filename)
# export the dates
dates.to_csv(export_filename, index=False)
if create_plot:
idx_start = df[df.date == period_start].index[0]
idx_end = df[df.date == period_end].index[0]
df = df[idx_start:idx_end]
df.index = range(len(df))
df['date'] = pd.to_datetime(df.date).dt.date
# take the average of all baseflow estimation methods and put it in a new column
df['bf_mean'] = df.iloc[:,1:4].mean(axis=1)
df.set_index('date',inplace=True)
stationname = filename.split('-')[1]
stationname = stationname.split('_')[0]
if stationname=='Craghall': stationname = 'Crag Hall'
fig, ax = plt.subplots(1,1, figsize=(10,6), constrained_layout=False)
ax.set_title(stationname)
ax.plot(df.bf_filter, label='One-parameter RDF')
ax.plot(df.bf_UKIH, label='UKIH')
ax.plot(df.bf_Eckhardt, label='Eckhardt')
ax.plot(df.bf_mean, label='Average')
ax.set_xlabel('Date')
ax.set_ylabel('Baseflow ($m^3$/s)')
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%Y"))
ax.tick_params(axis='x', labelrotation=45)
ax.legend()
export_filename = filename.split('-')[:2]
export_filename = '-'.join(export_filename)
export_filename = export_filename + '_baseflow.png'
export_sub_dir = os.path.normpath('plots_daily')
if not os.path.exists( os.path.join(export_dir, export_sub_dir)):
os.makedirs(os.path.join(export_dir, export_sub_dir))
export_filename = os.path.join(export_dir, export_sub_dir, export_filename)
plt.savefig(export_filename, bbox_inches='tight')