GeoscienceAustralia · ronaldmaj · Jul 26, 2024 · Jul 4, 2024 · Jul 18, 2024 · Jul 18, 2024
diff --git a/.github/workflows/python-cicd-units.yaml b/.github/workflows/python-cicd-units.yaml
@@ -0,0 +1,21 @@
+name: Python CICD pipelines - Unittesting
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.10"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install boto3 click hatanaka matplotlib numpy pandas plotext==4.2 plotly pymongo pytest scipy tqdm unlzw3 typing_extensions
+      - name: Run tests
+        run: |
+          python -m unittest discover -s tests -v
diff --git a/.gitignore b/.gitignore
@@ -12,4 +12,4 @@ __pycache__
 .vscode
 .idea
 # Other files
-scratch
+scratch/
diff --git a/gnssanalysis/gn_io/common.py b/gnssanalysis/gn_io/common.py
@@ -1,4 +1,5 @@
 """Base functions for file reading"""
+
 import base64 as _base64
 import gzip as _gzip
 import hashlib as _hashlib
@@ -27,13 +28,19 @@ def path2bytes(path: _Union[_Path, str, bytes]) -> bytes:
 
     if isinstance(path, _Path):
         path = path.as_posix()
-
-    if path.endswith(".Z"):
-        databytes = _lzw2bytes(path)
-    elif path.endswith(".gz"):
-        databytes = _gz2bytes(path)
-    else:
-        databytes = _txt2bytes(path)
+    try:
+        if path.endswith(".Z"):
+            databytes = _lzw2bytes(path)
+        elif path.endswith(".gz"):
+            databytes = _gz2bytes(path)
+        else:
+            databytes = _txt2bytes(path)
+    except FileNotFoundError:
+        _logging.error(f"File {path} not found. Returning empty bytes.")
+        return None
+    except Exception as e:
+        _logging.error(f"Error reading file {path} with error {e}. Returning empty bytes.")
+        return None
     return databytes
 
 

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
@@ -1,6 +1,4 @@
 import logging
-
-"""Ephemeris functions"""
 import io as _io
 import os as _os
 import re as _re
@@ -14,7 +12,6 @@
 from .. import gn_datetime as _gn_datetime
 from .. import gn_io as _gn_io
 from .. import gn_transform as _gn_transform
-
 from .. import gn_const
 
 logger = logging.getLogger(__name__)
@@ -44,9 +41,7 @@
 SP3_POS_STD_NODATA = -100
 
 
-def sp3_pos_nodata_to_nan(
-    sp3_df: _pd.DataFrame
-) -> None:
+def sp3_pos_nodata_to_nan(sp3_df: _pd.DataFrame) -> None:
     """
     Converts the SP3 Positional column's nodata values (0.000000) to NaNs.
     See https://files.igs.org/pub/data/format/sp3_docu.txt
@@ -59,12 +54,10 @@ def sp3_pos_nodata_to_nan(
         & (sp3_df[("EST", "Y")] == SP3_POS_NODATA_NUMERIC)
         & (sp3_df[("EST", "Z")] == SP3_POS_NODATA_NUMERIC)
     )
-    sp3_df.loc[nan_mask, [("EST", "X"), ("EST", "Y"), ("EST", "Z")]] = _np.NAN
+    sp3_df.loc[nan_mask, [("EST", "X"), ("EST", "Y"), ("EST", "Z")]] = _np.nan
 
 
-def sp3_clock_nodata_to_nan(
-    sp3_df: _pd.DataFrame
-) -> None:
+def sp3_clock_nodata_to_nan(sp3_df: _pd.DataFrame) -> None:
     """
     Converts the SP3 Clock column's nodata values (999999 or 999999.999999 - the fractional component optional) to NaNs.
     See https://files.igs.org/pub/data/format/sp3_docu.txt
@@ -73,7 +66,7 @@ def sp3_clock_nodata_to_nan(
     :return None
     """
     nan_mask = sp3_df[("EST", "CLK")] >= SP3_CLOCK_NODATA_NUMERIC
-    sp3_df.loc[nan_mask, ("EST", "CLK")] = _np.NAN
+    sp3_df.loc[nan_mask, ("EST", "CLK")] = _np.nan
 
 
 def mapparm(old, new):
@@ -85,104 +78,86 @@ def mapparm(old, new):
     return off, scl
 
 
+def _process_sp3_block(date, data, widths, names):
+    """Process a single block of SP3 data"""
+    if not data or len(data) == 0:
+        return _pd.DataFrame()
+    epochs_dt = _pd.to_datetime(_pd.Series(date).str.slice(2, 21).values.astype(str), format=r"%Y %m %d %H %M %S")
+    temp_sp3 = _pd.read_fwf(_io.StringIO(data), widths=widths, names=names)
+    dt_index = _np.repeat(a=_gn_datetime.datetime2j2000(epochs_dt.values), repeats=len(temp_sp3))
+    temp_sp3.set_index(dt_index, inplace=True)
+    temp_sp3.index.name = "J2000"
+    temp_sp3.set_index(temp_sp3.PRN.astype(str), append=True, inplace=True)
+    temp_sp3.set_index(temp_sp3.PV_FLAG.astype(str), append=True, inplace=True)
+    return temp_sp3
+
+
 def read_sp3(sp3_path, pOnly=True, nodata_to_nan=True):
     """Read SP3 file
     Returns STD values converted to proper units (mm/ps) also if present.
     by default leaves only P* values (positions), removing the P key itself
     """
     content = _gn_io.common.path2bytes(str(sp3_path))
     header_end = content.find(b"/*")
-
     header = content[:header_end]
     content = content[header_end:]
-
     parsed_header = parse_sp3_header(header)
-
+    counts = parsed_header.SV_INFO.count()
     fline_b = header.find(b"%f") + 2  # TODO add to header parser
     fline = header[fline_b : fline_b + 24].strip().split(b"  ")
     base_xyzc = _np.asarray([float(fline[0])] * 3 + [float(fline[1])])  # exponent base
-
+    _RE_SP3 = _re.compile(rb"^\*(.+)\n(.+).+", _re.MULTILINE)
     data_blocks = _np.asarray(_RE_SP3.findall(string=content[: content.rfind(b"EOF")]))
-
-    dates = data_blocks[:, 0]
-    data = data_blocks[:, 1]
-    if not data[-1].endswith(b"\n"):
-        data[-1] += b"\n"
-
-    counts = _np.char.count(data, b"\n")
-
-    epochs_dt = _pd.to_datetime(_pd.Series(dates).str.slice(2, 21).values.astype(str), format=r"%Y %m %d %H %M %S")
-
-    dt_index = _np.repeat(a=_gn_datetime.datetime2j2000(epochs_dt.values), repeats=counts)
-    b_string = b"".join(data.tolist())
-
-    series = _pd.Series(b_string.splitlines())
-    data_width = series.str.len()
-    missing = b" " * (data_width.max() - data_width).values.astype(object)
-    series += missing  # rows need to be of equal len
-    data_test = series.str[4:60].values.astype("S56").view(("S14")).reshape(series.shape[0], -1).astype(float)
-
-    if parsed_header.HEAD.ORB_TYPE in ["FIT", "INT"]:
-        sp3_df = _pd.DataFrame(data_test).astype(float)
-        sp3_df.columns = [
-            ["EST", "EST", "EST", "EST"],
-            [
-                "X",
-                "Y",
-                "Z",
-                "CLK",
-            ],
-        ]
-
-    else:  # parsed_header.HEAD.ORB_TYPE == 'HLM':
-        # might need to output log message
-        std = (series.str[60:69].values + series.str[70:73].values).astype("S12").view("S3").astype(object)
-        std[std == b"   "] = None
-        std = std.astype(float).reshape(series.shape[0], -1)
-
-        ind = (series.str[75:76].values + series.str[79:80].values).astype("S2").view("S1")
-        ind[ind == b" "] = b""
-        ind = ind.reshape(series.shape[0], -1).astype(str)
-
-        sp3_df = _pd.DataFrame(
-            _np.column_stack([data_test, std, ind]),
-        ).astype(
-            {
-                0: float,
-                1: float,
-                2: float,
-                3: float,
-                4: float,
-                5: float,
-                6: float,
-                7: float,
-                8: "category",
-                9: "category",
-            }
-        )
-        sp3_df.columns = [
-            ["EST", "EST", "EST", "EST", "STD", "STD", "STD", "STD", "P_XYZ", "P_CLK"],
-            ["X", "Y", "Z", "CLK", "X", "Y", "Z", "CLK", "", ""],
-        ]
-        sp3_df.STD = base_xyzc**sp3_df.STD.values
-
+    # Compile the regular expression pattern
+    pattern = _re.compile(r"^\*(.+)$", _re.MULTILINE)
+    # Split the content by the lines starting with '*'
+    blocks = pattern.split(content[: content.rfind(b"EOF")].decode())
+    date_lines = blocks[1::2]
+    data_blocks = _np.asarray(blocks[2::2])
+    # print(data_blocks)
+    widths = [1, 3, 14, 14, 14, 14, 1, 2, 1, 2, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1]
+    names = [
+        "PV_FLAG",
+        "PRN",
+        "x_coordinate",
+        "y_coordinate",
+        "z_coordinate",
+        "clock",
+        "Unused1",
+        "x_sdev",
+        "Unused2",
+        "y_sdev",
+        "Unused3",
+        "z_sdev",
+        "Unused4",
+        "c_sdev",
+        "Unused5",
+        "Clock_Event_Flag",
+        "Clock_Pred_Flag",
+        "Unused6",
+        "Maneuver_Flag",
+        "Orbit_Pred_Flag",
+    ]
+    name_float = ["x_coordinate", "y_coordinate", "z_coordinate", "clock", "x_sdev", "y_sdev", "z_sdev", "c_sdev"]
+    sp3_df = _pd.concat([_process_sp3_block(date, data, widths, names) for date, data in zip(date_lines, data_blocks)])
+    sp3_df[name_float] = sp3_df[name_float].apply(_pd.to_numeric, errors="coerce")
+    sp3_df = sp3_df.loc[:, ~sp3_df.columns.str.startswith("Unused")]
+    # remove PRN and PV_FLAG columns
+    sp3_df = sp3_df.drop(columns=["PRN", "PV_FLAG"])
+    # rename columsn x_coordinate -> [EST, X], y_coordinate -> [EST, Y]
+    sp3_df.columns = [
+        ["EST", "EST", "EST", "EST", "STD", "STD", "STD", "STD", "a1", "a2", "a3", "a4"],
+        ["X", "Y", "Z", "CLK", "X", "Y", "Z", "CLK", "", "", "", ""],
+    ]
     if nodata_to_nan:
         sp3_pos_nodata_to_nan(sp3_df)  # Convert 0.000000 (which indicates nodata in the SP3 POS column) to NaN
         sp3_clock_nodata_to_nan(sp3_df)  # Convert 999999* (which indicates nodata in the SP3 CLK column) to NaN
 
+    # print(sp3_df.index.has_duplicates())
     if pOnly or parsed_header.HEAD.loc["PV_FLAG"] == "P":
-        pMask = series.astype("S1") == b"P"
-        sp3_df = sp3_df[pMask].set_index([dt_index[pMask], series.str[1:4].values[pMask].astype(str).astype(object)])
-        sp3_df.index.names = ("J2000", "PRN")
-    else:
-        sp3_df = sp3_df.set_index(
-            [dt_index, series.values.astype("U1"), series.str[1:4].values.astype(str).astype(object)]
-        )
-        sp3_df.index.names = ("J2000", "PV_FLAG", "PRN")
-
+        sp3_df = sp3_df[sp3_df.index.get_level_values("PV_FLAG") == "P"]
     sp3_df.attrs["HEADER"] = parsed_header  # writing header data to dataframe attributes
     sp3_df.attrs["path"] = sp3_path
-
     # Check for duplicate epochs, dedupe and log warning
     if sp3_df.index.has_duplicates:  # a literaly free check
         duplicated_indexes = sp3_df.index.duplicated()  # Typically sub ms time. Marks all but first instance as duped.
@@ -193,7 +168,8 @@ def read_sp3(sp3_path, pOnly=True, nodata_to_nan=True):
             f"SP3 path is: '{str(sp3_path)}'. Duplicates will be removed, keeping first."
         )
         sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")]  # Now dedupe them, keeping the first of any clashes
-
+    sp3_df.attrs["HEADER"] = parsed_header  # writing header data to dataframe attributes
+    sp3_df.attrs["path"] = sp3_path
     return sp3_df
 
 
@@ -228,7 +204,7 @@ def parse_sp3_header(header):
 def getVelSpline(sp3Df):
     """returns in the same units as intput, e.g. km/s (needs to be x10000 to be in cm as per sp3 standard"""
     sp3dfECI = sp3Df.EST.unstack(1)[["X", "Y", "Z"]]  # _ecef2eci(sp3df)
-    datetime = sp3dfECI.index.values
+    datetime = sp3dfECI.index.get_level_values("J2000")
     spline = _interpolate.CubicSpline(datetime, sp3dfECI.values)
     velDf = _pd.DataFrame(data=spline.derivative(1)(datetime), index=sp3dfECI.index, columns=sp3dfECI.columns).stack(1)
     return _pd.concat([sp3Df, _pd.concat([velDf], keys=["VELi"], axis=1)], axis=1)
@@ -496,7 +472,7 @@ def sp3merge(sp3paths, clkpaths=None, nodata_to_nan=False):
     return merged_sp3
 
 
-def sp3_hlm_trans(a: _pd.DataFrame, b: _pd.DataFrame) -> tuple((_pd.DataFrame, list)):
+def sp3_hlm_trans(a: _pd.DataFrame, b: _pd.DataFrame) -> tuple[_pd.DataFrame, list]:
     """Rotates sp3_b into sp3_a. Returns a tuple of updated sp3_b and HLM array with applied computed parameters and residuals"""
     hlm = _gn_transform.get_helmert7(pt1=a.EST[["X", "Y", "Z"]].values, pt2=b.EST[["X", "Y", "Z"]].values)
     b.iloc[:, :3] = _gn_transform.transform7(xyz_in=b.EST[["X", "Y", "Z"]].values, hlm_params=hlm[0])
@@ -539,7 +515,6 @@ def diff_sp3_rac(
         sp3_baseline_eci_vel = getVelSpline(sp3Df=sp3_baseline_eci)
     else:
         sp3_baseline_eci_vel = getVelPoly(sp3Df=sp3_baseline_eci, deg=35)
-
     nd_rac = diff_eci.values[:, _np.newaxis] @ _gn_transform.eci2rac_rot(sp3_baseline_eci_vel)
     df_rac = _pd.DataFrame(
         nd_rac.reshape(-1, 3),

diff --git a/gnssanalysis/gn_transform.py b/gnssanalysis/gn_transform.py
@@ -1,4 +1,5 @@
 """Helmert inversion and transformation functions"""
+
 import numpy as _np
 import pandas as _pd
 
@@ -25,20 +26,18 @@ def gen_helm_aux(pt1, pt2):
     xyz_blk[:, 1, 0] = pt1[:, 2]  # z[1,0]
 
     xyz = pt1.reshape((-1, 1))
-    A = _np.column_stack(
-        [unity_blk, xyz_blk.reshape((-1, 3)), xyz]
-    ) 
+    A = _np.column_stack([unity_blk, xyz_blk.reshape((-1, 3)), xyz])
     rhs = pt2.reshape((-1, 1)) - xyz  # right-hand side
     return A, rhs
 
 
-def get_helmert7(pt1:_np.ndarray, pt2:_np.ndarray, scale_in_ppm:bool = True):
+def get_helmert7(pt1: _np.ndarray, pt2: _np.ndarray, scale_in_ppm: bool = True):
     """inversion of 7 Helmert parameters between 2 sets of points. pt1@hlm -> pt2"""
     A, rhs = gen_helm_aux(pt1, pt2)
     sol = list(_np.linalg.lstsq(A, rhs, rcond=-1))  # parameters
-    sol[0] = sol[0].flatten()*-1.0  # flattening the HLM params arr to [Tx, Ty, Tz, Rx, Ry, Rz, Scale/mu]
+    sol[0] = sol[0].flatten() * -1.0  # flattening the HLM params arr to [Tx, Ty, Tz, Rx, Ry, Rz, Scale/mu]
     if scale_in_ppm:
-        sol[0][-1] *= 1e6 # scale in ppm
+        sol[0][-1] *= 1e6  # scale in ppm
     res = rhs - A @ sol[0]  # computing residuals for pt2
     sol.append(res.reshape(-1, 3))  # appending residuals
     return sol
@@ -55,7 +54,7 @@ def gen_rot_matrix(v):
     return mat + _np.eye(3)
 
 
-def transform7(xyz_in, hlm_params, scale_in_ppm:bool = True):
+def transform7(xyz_in, hlm_params, scale_in_ppm: bool = True):
     """transformation of xyz vector with 7 helmert parameters. The helmert parameters array consists of the following:
     Three transformations Tx, Ty, Tz usually in meters, or the coordinate units used for the computation of the hlm parameters.
     Three rotations Rx, Ry and Rz in radians.
@@ -69,7 +68,7 @@ def transform7(xyz_in, hlm_params, scale_in_ppm:bool = True):
     rotation = gen_rot_matrix(hlm_params[3:6])
     scale = hlm_params[6]
     if scale_in_ppm:
-        scale *= 1e-6 # scaling in ppm thus multiplied by 1e-6
+        scale *= 1e-6  # scaling in ppm thus multiplied by 1e-6
     xyz_out = (xyz_in @ rotation) * (1 + scale) + translation
     return xyz_out
 
@@ -87,7 +86,8 @@ def _xyz2llh_larson(xyz_array: _np.ndarray, ellipsoid: _gn_const.Ellipsoid, tole
         _n = ellipsoid.semimaj / (1 - ellipsoid.ecc1sq * _np.sin(phi0[unconverged_mask]) ** 2) ** (1 / 2)
         height[unconverged_mask] = _r[unconverged_mask] / _np.cos(phi0[unconverged_mask]) - _n
         phi[unconverged_mask] = _np.arctan(
-            (z_arr[unconverged_mask] / _r[unconverged_mask]) / (1 - ellipsoid.ecc1sq * _n / (_n + height[unconverged_mask]))
+            (z_arr[unconverged_mask] / _r[unconverged_mask])
+            / (1 - ellipsoid.ecc1sq * _n / (_n + height[unconverged_mask]))
         )
         unconverged_mask = _np.abs(phi - phi0) > tolerance
         if ~unconverged_mask.any():  # if all less than tolerance
@@ -228,7 +228,7 @@ def llh2rot(phi, lamb, enu_to_ecef=False):
 
     assert phi.size == lamb.size, "phi and lambda arrays should be of the same size"
 
-    rot = _np.zeros((phi.size, 3, 3), dtype=_np.float_)
+    rot = _np.zeros((phi.size, 3, 3), dtype=_np.float64)
     rot[:, 0, 0] = -sin_lamb
     rot[:, 0, 1] = cos_lamb
     #         ^col
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,4 +12,4 @@ __pycache__ @@
     .vscode
     .idea
     # Other files
-    scratch
+    scratch/