Skip to content

Commit efad292

Browse files
rzinkerzinke
and
rzinke
authored
GNSS functions to gnss_utils and reuse filtered gnss data (#60)
Co-authored-by: rzinke <[email protected]>
1 parent 2cba73d commit efad292

File tree

2 files changed

+398
-358
lines changed

2 files changed

+398
-358
lines changed

methods/secular/Secular_Requirement_Validation.ipynb

+248-358
Large diffs are not rendered by default.

solid_utils/gnss_utils.py

+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
import numpy as np
2+
import math
3+
import warnings
4+
5+
from solid_utils.variogram import remove_trend
6+
from mintpy.utils import time_func, utils as ut
7+
8+
def retrieve_gnss_component(gnss_stn, component:str):
9+
"""Retrieve component of motion.
10+
11+
Parameters: gnss_stn - GNSS obj, GNSS data with displacement time-series
12+
component - str, component of motion (E, N, U, or LOS)
13+
Returns: dis - np.ndarray, displacement time-series
14+
"""
15+
# Check component specified
16+
component = component.upper()
17+
if component not in ['E', 'N', 'U', 'LOS']:
18+
raise Exception('Component of motion must be one of E, N, U, or LOS')
19+
20+
# Retrieve component of motion
21+
if component == 'E':
22+
return gnss_stn.dis_e
23+
elif component == 'N':
24+
return gnss_stn.dis_n
25+
elif component == 'U':
26+
return gnss_stn.dis_u
27+
elif component == 'LOS':
28+
return gnss_stn.dis_los
29+
30+
def model_gnss_timeseries(gnss_stn, component:str, model:dict):
31+
"""Model a GNSS displacement time-series.
32+
33+
Parameters: gnss_stn - GNSS obj, GNSS data with displacement time-series
34+
component - str, component of motion (E, N, U, or LOS)
35+
model - dict, time function model
36+
Returns: dis_hat - np.ndarray, array of predicted displacement values
37+
mhat - np.ndarray, model fit parameters
38+
mhat_se - np.ndarray, standard errors for model fit params
39+
"""
40+
# Construct design matrix from dates and model
41+
date_list = [date.strftime('%Y%m%d') for date in gnss_stn.dates]
42+
G = time_func.get_design_matrix4time_func(date_list, model)
43+
44+
# Invert for model parameters
45+
dis = retrieve_gnss_component(gnss_stn, component)
46+
m_hat = np.linalg.pinv(G).dot(dis)
47+
48+
# Predict displacements
49+
dis_hat = np.dot(G, m_hat)
50+
51+
# Quantify error on model parameters
52+
resids = dis - dis_hat
53+
sse = np.sum(resids**2)
54+
n = len(dis_hat)
55+
dof = len(m_hat)
56+
c = sse/(n-dof) * np.linalg.inv(np.dot(G.T, G))
57+
mhat_se = np.sqrt(np.diag(c))
58+
59+
return dis_hat, m_hat, mhat_se
60+
61+
def modify_gnss_series(gnss_stn, remove_ndxs):
62+
"""Remove dates from all components of a GNSS time-series based on a
63+
logical array.
64+
65+
Parameters: gnss_stn - GNSS obj, GNSS data with displacement time-series
66+
remove_ndxs - np.ndarray, boolean array where True indicates a
67+
value to remove
68+
Returns: gnss_stn - GNSS obj, modified GNSS data
69+
"""
70+
gnss_stn.dates = gnss_stn.dates[~remove_ndxs]
71+
gnss_stn.dis_e = gnss_stn.dis_e[~remove_ndxs]
72+
gnss_stn.dis_n = gnss_stn.dis_n[~remove_ndxs]
73+
gnss_stn.dis_u = gnss_stn.dis_u[~remove_ndxs]
74+
gnss_stn.std_e = gnss_stn.std_e[~remove_ndxs]
75+
gnss_stn.std_n = gnss_stn.std_n[~remove_ndxs]
76+
gnss_stn.std_u = gnss_stn.std_u[~remove_ndxs]
77+
if hasattr(gnss_stn, 'dis_los'):
78+
gnss_stn.dis_los = gnss_stn.dis_los[~remove_ndxs]
79+
gnss_stn.std_los = gnss_stn.std_los[~remove_ndxs]
80+
81+
return gnss_stn
82+
83+
def outliers_zscore(dis:np.ndarray, dis_hat:np.ndarray, threshold:float):
84+
"""Identify outliers using the z-score metric.
85+
86+
Compute the number of standard deviations the data are from the mean
87+
and return the indices of values greater than the specified threshold.
88+
89+
Parameters: dis - np.ndarray, array of displacement values
90+
dis_hat - np.ndarray, array of predicted displacement values
91+
threshold - float, z-score value (standard deviation)
92+
beyond which to exclude data
93+
Returns: outlier_ndxs - np.ndarray, boolean array where True
94+
indicates an outlier
95+
n_outliers - int, number of outliers
96+
"""
97+
zscores = (dis - dis_hat) / np.std(dis)
98+
outlier_ndxs = np.abs(zscores) > threshold
99+
n_outliers = np.sum(outlier_ndxs)
100+
101+
return outlier_ndxs, n_outliers
102+
103+
def remove_gnss_outliers(gnss_stn, component:str, model:dict,
104+
threshold=3, max_iter=2, verbose=False):
105+
"""Determine which data points are outliers based on the z-score
106+
metric and remove those points.
107+
108+
Parameters: gnss_stn - GNSS obj, GNSS data with displacement time-series
109+
component - str, component of motion (E, N, U, or LOS)
110+
model - dict, time function model
111+
threshold - float, standard deviations beyond which values
112+
are considered outliers
113+
max_iter - int, maximutm number of iterations before stopping
114+
Returns: gnss_stn - GNSS obj, GNSS data with outliers removed
115+
"""
116+
if verbose == True:
117+
print('Station {:s} original data set size: {:d}'.\
118+
format(gnss_stn.site, len(gnss_stn.dates)))
119+
120+
# Retrieve observed values and predict values based on model
121+
dis = retrieve_gnss_component(gnss_stn, component)
122+
dis_hat, _, _ = model_gnss_timeseries(gnss_stn, component, model)
123+
124+
# Determine outliers based on z-score
125+
outlier_ndxs, n_outliers = outliers_zscore(dis, dis_hat, threshold)
126+
127+
# Initialize counter
128+
i = 0
129+
130+
# Remove outliers from data set
131+
while (n_outliers > 0) or (i < max_iter):
132+
if verbose == True:
133+
print(f'nb outliers: {n_outliers:d}')
134+
135+
# Update time series
136+
modify_gnss_series(gnss_stn, outlier_ndxs)
137+
138+
# Update number of outliers
139+
dis = retrieve_gnss_component(gnss_stn, component)
140+
dis_hat, _, _ = model_gnss_timeseries(gnss_stn, component, model)
141+
outlier_ndxs, n_outliers = outliers_zscore(dis, dis_hat, threshold)
142+
143+
# Update counter
144+
i += 1
145+
146+
if verbose == True:
147+
print('Station {:s} final data set size: {:d}'.\
148+
format(gnss_stn.site, len(gnss_stn.dates)))
149+
150+
return gnss_stn

0 commit comments

Comments
 (0)