|
| 1 | +import numpy as np |
| 2 | +import math |
| 3 | +import warnings |
| 4 | + |
| 5 | +from solid_utils.variogram import remove_trend |
| 6 | +from mintpy.utils import time_func, utils as ut |
| 7 | + |
| 8 | +def retrieve_gnss_component(gnss_stn, component:str): |
| 9 | + """Retrieve component of motion. |
| 10 | +
|
| 11 | + Parameters: gnss_stn - GNSS obj, GNSS data with displacement time-series |
| 12 | + component - str, component of motion (E, N, U, or LOS) |
| 13 | + Returns: dis - np.ndarray, displacement time-series |
| 14 | + """ |
| 15 | + # Check component specified |
| 16 | + component = component.upper() |
| 17 | + if component not in ['E', 'N', 'U', 'LOS']: |
| 18 | + raise Exception('Component of motion must be one of E, N, U, or LOS') |
| 19 | + |
| 20 | + # Retrieve component of motion |
| 21 | + if component == 'E': |
| 22 | + return gnss_stn.dis_e |
| 23 | + elif component == 'N': |
| 24 | + return gnss_stn.dis_n |
| 25 | + elif component == 'U': |
| 26 | + return gnss_stn.dis_u |
| 27 | + elif component == 'LOS': |
| 28 | + return gnss_stn.dis_los |
| 29 | + |
| 30 | +def model_gnss_timeseries(gnss_stn, component:str, model:dict): |
| 31 | + """Model a GNSS displacement time-series. |
| 32 | +
|
| 33 | + Parameters: gnss_stn - GNSS obj, GNSS data with displacement time-series |
| 34 | + component - str, component of motion (E, N, U, or LOS) |
| 35 | + model - dict, time function model |
| 36 | + Returns: dis_hat - np.ndarray, array of predicted displacement values |
| 37 | + mhat - np.ndarray, model fit parameters |
| 38 | + mhat_se - np.ndarray, standard errors for model fit params |
| 39 | + """ |
| 40 | + # Construct design matrix from dates and model |
| 41 | + date_list = [date.strftime('%Y%m%d') for date in gnss_stn.dates] |
| 42 | + G = time_func.get_design_matrix4time_func(date_list, model) |
| 43 | + |
| 44 | + # Invert for model parameters |
| 45 | + dis = retrieve_gnss_component(gnss_stn, component) |
| 46 | + m_hat = np.linalg.pinv(G).dot(dis) |
| 47 | + |
| 48 | + # Predict displacements |
| 49 | + dis_hat = np.dot(G, m_hat) |
| 50 | + |
| 51 | + # Quantify error on model parameters |
| 52 | + resids = dis - dis_hat |
| 53 | + sse = np.sum(resids**2) |
| 54 | + n = len(dis_hat) |
| 55 | + dof = len(m_hat) |
| 56 | + c = sse/(n-dof) * np.linalg.inv(np.dot(G.T, G)) |
| 57 | + mhat_se = np.sqrt(np.diag(c)) |
| 58 | + |
| 59 | + return dis_hat, m_hat, mhat_se |
| 60 | + |
| 61 | +def modify_gnss_series(gnss_stn, remove_ndxs): |
| 62 | + """Remove dates from all components of a GNSS time-series based on a |
| 63 | + logical array. |
| 64 | +
|
| 65 | + Parameters: gnss_stn - GNSS obj, GNSS data with displacement time-series |
| 66 | + remove_ndxs - np.ndarray, boolean array where True indicates a |
| 67 | + value to remove |
| 68 | + Returns: gnss_stn - GNSS obj, modified GNSS data |
| 69 | + """ |
| 70 | + gnss_stn.dates = gnss_stn.dates[~remove_ndxs] |
| 71 | + gnss_stn.dis_e = gnss_stn.dis_e[~remove_ndxs] |
| 72 | + gnss_stn.dis_n = gnss_stn.dis_n[~remove_ndxs] |
| 73 | + gnss_stn.dis_u = gnss_stn.dis_u[~remove_ndxs] |
| 74 | + gnss_stn.std_e = gnss_stn.std_e[~remove_ndxs] |
| 75 | + gnss_stn.std_n = gnss_stn.std_n[~remove_ndxs] |
| 76 | + gnss_stn.std_u = gnss_stn.std_u[~remove_ndxs] |
| 77 | + if hasattr(gnss_stn, 'dis_los'): |
| 78 | + gnss_stn.dis_los = gnss_stn.dis_los[~remove_ndxs] |
| 79 | + gnss_stn.std_los = gnss_stn.std_los[~remove_ndxs] |
| 80 | + |
| 81 | + return gnss_stn |
| 82 | + |
| 83 | +def outliers_zscore(dis:np.ndarray, dis_hat:np.ndarray, threshold:float): |
| 84 | + """Identify outliers using the z-score metric. |
| 85 | +
|
| 86 | + Compute the number of standard deviations the data are from the mean |
| 87 | + and return the indices of values greater than the specified threshold. |
| 88 | +
|
| 89 | + Parameters: dis - np.ndarray, array of displacement values |
| 90 | + dis_hat - np.ndarray, array of predicted displacement values |
| 91 | + threshold - float, z-score value (standard deviation) |
| 92 | + beyond which to exclude data |
| 93 | + Returns: outlier_ndxs - np.ndarray, boolean array where True |
| 94 | + indicates an outlier |
| 95 | + n_outliers - int, number of outliers |
| 96 | + """ |
| 97 | + zscores = (dis - dis_hat) / np.std(dis) |
| 98 | + outlier_ndxs = np.abs(zscores) > threshold |
| 99 | + n_outliers = np.sum(outlier_ndxs) |
| 100 | + |
| 101 | + return outlier_ndxs, n_outliers |
| 102 | + |
| 103 | +def remove_gnss_outliers(gnss_stn, component:str, model:dict, |
| 104 | + threshold=3, max_iter=2, verbose=False): |
| 105 | + """Determine which data points are outliers based on the z-score |
| 106 | + metric and remove those points. |
| 107 | +
|
| 108 | + Parameters: gnss_stn - GNSS obj, GNSS data with displacement time-series |
| 109 | + component - str, component of motion (E, N, U, or LOS) |
| 110 | + model - dict, time function model |
| 111 | + threshold - float, standard deviations beyond which values |
| 112 | + are considered outliers |
| 113 | + max_iter - int, maximutm number of iterations before stopping |
| 114 | + Returns: gnss_stn - GNSS obj, GNSS data with outliers removed |
| 115 | + """ |
| 116 | + if verbose == True: |
| 117 | + print('Station {:s} original data set size: {:d}'.\ |
| 118 | + format(gnss_stn.site, len(gnss_stn.dates))) |
| 119 | + |
| 120 | + # Retrieve observed values and predict values based on model |
| 121 | + dis = retrieve_gnss_component(gnss_stn, component) |
| 122 | + dis_hat, _, _ = model_gnss_timeseries(gnss_stn, component, model) |
| 123 | + |
| 124 | + # Determine outliers based on z-score |
| 125 | + outlier_ndxs, n_outliers = outliers_zscore(dis, dis_hat, threshold) |
| 126 | + |
| 127 | + # Initialize counter |
| 128 | + i = 0 |
| 129 | + |
| 130 | + # Remove outliers from data set |
| 131 | + while (n_outliers > 0) or (i < max_iter): |
| 132 | + if verbose == True: |
| 133 | + print(f'nb outliers: {n_outliers:d}') |
| 134 | + |
| 135 | + # Update time series |
| 136 | + modify_gnss_series(gnss_stn, outlier_ndxs) |
| 137 | + |
| 138 | + # Update number of outliers |
| 139 | + dis = retrieve_gnss_component(gnss_stn, component) |
| 140 | + dis_hat, _, _ = model_gnss_timeseries(gnss_stn, component, model) |
| 141 | + outlier_ndxs, n_outliers = outliers_zscore(dis, dis_hat, threshold) |
| 142 | + |
| 143 | + # Update counter |
| 144 | + i += 1 |
| 145 | + |
| 146 | + if verbose == True: |
| 147 | + print('Station {:s} final data set size: {:d}'.\ |
| 148 | + format(gnss_stn.site, len(gnss_stn.dates))) |
| 149 | + |
| 150 | + return gnss_stn |
0 commit comments