From 66ac029fbadf4040a945959ae85ba96db6c4d1fc Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Tue, 9 Jul 2024 08:00:15 +0000
Subject: [PATCH 01/17] NPI-3388 NPI-3389 replace deprecated delim_whitespace
 with sep keyword

---
 gnssanalysis/filenames.py    | 2 +-
 gnssanalysis/gn_frame.py     | 2 +-
 gnssanalysis/gn_io/blq.py    | 2 +-
 gnssanalysis/gn_io/clk.py    | 2 +-
 gnssanalysis/gn_io/discon.py | 2 +-
 gnssanalysis/gn_io/erp.py    | 2 +-
 gnssanalysis/gn_io/pea.py    | 2 +-
 gnssanalysis/gn_io/sinex.py  | 2 +-
 gnssanalysis/gn_io/trace.py  | 2 +-
 gnssanalysis/gn_io/trop.py   | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/gnssanalysis/filenames.py b/gnssanalysis/filenames.py
index 148afaa..f3089a9 100644
--- a/gnssanalysis/filenames.py
+++ b/gnssanalysis/filenames.py
@@ -539,7 +539,7 @@ def determine_snx_name_props(file_path: pathlib.Path) -> Dict[str, Any]:
             if blk:
                 soln_df = pd.read_csv(
                     io.BytesIO(blk[0]),
-                    delim_whitespace=True,
+                    sep="\\s+",  # delim_whitespace is deprecated
                     comment="*",
                     names=["CODE", "PT", "SOLN", "T", "START_EPOCH", "END_EPOCH", "MEAN_EPOCH"],
                     converters={
diff --git a/gnssanalysis/gn_frame.py b/gnssanalysis/gn_frame.py
index 2be8596..0bff02b 100644
--- a/gnssanalysis/gn_frame.py
+++ b/gnssanalysis/gn_frame.py
@@ -16,7 +16,7 @@ def _get_core_list(core_list_path):
     # need to check if solution numbers are consistent with discontinuities selection
     core_df = _pd.read_csv(
         core_list_path,
-        delim_whitespace=True,
+        sep="\\s+",  # delim_whitespace is deprecated
         skiprows=4,
         comment="-",
         usecols=[0, 1, 2, 3],
diff --git a/gnssanalysis/gn_io/blq.py b/gnssanalysis/gn_io/blq.py
index f08e7d4..2ae55db 100644
--- a/gnssanalysis/gn_io/blq.py
+++ b/gnssanalysis/gn_io/blq.py
@@ -28,7 +28,7 @@ def read_blq(path, as_complex=True):
     sites = blq_file_read[:, 0].astype("<U4")
     constituents = ["M2", "S2", "N2", "K2", "K1", "O1", "P1", "Q1", "MF", "MM", "SSA"]
 
-    blq_df = _pd.read_csv(_BytesIO(b"\n".join(blq_file_read[:, 1:].reshape((-1)))), delim_whitespace=True, header=None)
+    blq_df = _pd.read_csv(_BytesIO(b"\n".join(blq_file_read[:, 1:].reshape((-1)))), sep="\\s+", header=None)
     if as_complex:
         # convert extracted A and P to complex phasors X + jY so the comparison of several blq files could be done
         b = blq_df.values.reshape(-1, 11 * 3)
diff --git a/gnssanalysis/gn_io/clk.py b/gnssanalysis/gn_io/clk.py
index e68fb81..9f314ff 100644
--- a/gnssanalysis/gn_io/clk.py
+++ b/gnssanalysis/gn_io/clk.py
@@ -33,7 +33,7 @@ def read_clk(clk_path):
 
     clk_df = _pd.read_csv(
         _BytesIO(data),
-        delim_whitespace=True,
+        sep="\\s+",  # delim_whitespace is deprecated
         header=None,
         usecols=clk_cols,
         names=clk_names,  # type:ignore
diff --git a/gnssanalysis/gn_io/discon.py b/gnssanalysis/gn_io/discon.py
index 80d2f93..5112a6e 100644
--- a/gnssanalysis/gn_io/discon.py
+++ b/gnssanalysis/gn_io/discon.py
@@ -17,7 +17,7 @@ def _read_discontinuities(path):
     out_df = _pd.read_csv(
         filepath_or_buffer=_BytesIO(block),
         usecols=[0, 1, 2, 4, 5, 6],
-        delim_whitespace=True,
+        sep="\\s+",  # delim_whitespace is deprecated
         header=None,
         names=["CODE", "PT", "SOLN", "START", "END", "MODEL"],
         dtype={0: object, 1: object, 2: int, 4: object, 5: object, 6: MODEL_CATEGORY},
diff --git a/gnssanalysis/gn_io/erp.py b/gnssanalysis/gn_io/erp.py
index c7cec3e..0fee6c9 100644
--- a/gnssanalysis/gn_io/erp.py
+++ b/gnssanalysis/gn_io/erp.py
@@ -255,7 +255,7 @@ def read_erp(
     data_of_interest = content[start_of_data:]  # data block
     erp_df = _pd.read_csv(
         _BytesIO(data_of_interest),
-        delim_whitespace=True,
+        sep="\\s+",  # delim_whitespace is deprecated
         names=headers,
         index_col=False,
     )
diff --git a/gnssanalysis/gn_io/pea.py b/gnssanalysis/gn_io/pea.py
index 944c987..7ec3410 100644
--- a/gnssanalysis/gn_io/pea.py
+++ b/gnssanalysis/gn_io/pea.py
@@ -13,7 +13,7 @@ def read_pea_partials(path):
     df = _pd.read_csv(
         _BytesIO(partials[begin:]),
         header=None,
-        delim_whitespace=True,
+        sep="\\s+",  # delim_whitespace is deprecated
         usecols=[0, 1, 2, 9, 10, 11],
         names=[None, "MJD", "TOD", "X", "Y", "Z"],
     )
diff --git a/gnssanalysis/gn_io/sinex.py b/gnssanalysis/gn_io/sinex.py
index ed05abc..47dd884 100644
--- a/gnssanalysis/gn_io/sinex.py
+++ b/gnssanalysis/gn_io/sinex.py
@@ -457,7 +457,7 @@ def _get_snx_matrix(path_or_bytes, stypes=("APR", "EST"), verbose=True, snx_head
     else:
         return None  # not found
 
-    matrix_raw = _pd.read_csv(snx_buffer, delim_whitespace=True, dtype={0: _np.int16, 1: _np.int16})
+    matrix_raw = _pd.read_csv(snx_buffer, sep="\\s+", dtype={0: _np.int16, 1: _np.int16})
     # can be 4 and 5 columns; only 2 first int16
 
     output = []
diff --git a/gnssanalysis/gn_io/trace.py b/gnssanalysis/gn_io/trace.py
index 1bdaa83..30c46b1 100644
--- a/gnssanalysis/gn_io/trace.py
+++ b/gnssanalysis/gn_io/trace.py
@@ -167,7 +167,7 @@ def _find_trace(output_path: str) -> tuple:
 #     LC_bytes = b''.join(trace_LC_list)
 #     LC_bytes = LC_bytes.replace(b'=',b'') #getting rif of '='
 
-#     df_LC = _pd.read_csv(_BytesIO(LC_bytes),delim_whitespace=True,header=None,usecols=[1,2,4,6,8,9,10,11,12,13]).astype(
+#     df_LC = _pd.read_csv(_BytesIO(LC_bytes),sep="\\s+",header=None,usecols=[1,2,4,6,8,9,10,11,12,13]).astype(
 #         {
 #             1: _np.int16, 2:_np.int32, 4: '<U3',
 #             6: '<U1', 8: '<U4',
diff --git a/gnssanalysis/gn_io/trop.py b/gnssanalysis/gn_io/trop.py
index 172e4ee..4908bc7 100644
--- a/gnssanalysis/gn_io/trop.py
+++ b/gnssanalysis/gn_io/trop.py
@@ -22,7 +22,7 @@ def _read_tro_solution(path: str, recenter: bool = True) -> _pd.DataFrame:
     try:
         solution_df = _pd.read_csv(
             _BytesIO(tro_estimate),
-            delim_whitespace=True,
+            sep="\\s+",  # delim_whitespace is deprecated
             comment=b"*",
             index_col=False,
             header=None,

From 7e37f74b6cc1f949c0a53cd0e98fd86cae82e4bd Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 06:04:55 +0000
Subject: [PATCH 02/17] NPI-3388 update some of the igs log parsing code based
 on Pandas deprecations

---
 gnssanalysis/gn_io/igslog.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/gnssanalysis/gn_io/igslog.py b/gnssanalysis/gn_io/igslog.py
index 86cf778..5d2fe7e 100644
--- a/gnssanalysis/gn_io/igslog.py
+++ b/gnssanalysis/gn_io/igslog.py
@@ -463,12 +463,13 @@ def gather_metadata(
         gather_id_loc, columns=["CODE", "DOMES_N", "CITY", "COUNTRY", "X", "Y", "Z", "LAT", "LON", "HEI", "PATH"]
     )
 
-    id_loc_df.CITY[id_loc_df.CITY == ""] = "N/A"
+    id_loc_df.loc[id_loc_df.CITY == "", "CITY"] = "N/A"
     id_loc_df.CITY = id_loc_df.CITY.str.rstrip().str.upper()
     id_loc_df.COUNTRY = translate_series(
         id_loc_df.COUNTRY.str.rstrip().str.upper(), _gn_io.aux_dicts.translation_country
     ).values
-    id_loc_df.DOMES_N[id_loc_df.DOMES_N == ""] = "---------"
+
+    id_loc_df.loc[id_loc_df.DOMES_N == "", "DOMES_N"] = "---------"
 
     xyz_array = (
         id_loc_df[["X", "Y", "Z"]].stack().str.replace(",", ".").replace({"": None}).unstack().values.astype(float)
@@ -504,7 +505,7 @@ def gather_metadata(
     ant_df.RADOME2 = ant_df.RADOME2.str.rstrip().str.upper()
 
     no_rad2_mask = ~ant_df.RADOME.isin(_gn_io.aux_dicts.atx_rad_tbl)
-    ant_df.RADOME[no_rad2_mask] = ant_df.RADOME2[no_rad2_mask]
+    ant_df.loc[no_rad2_mask, "RADOME"] = ant_df.RADOME2[no_rad2_mask]
     # translation_ant.index.name= None
     antennas = translate_series(ant_df.ANTENNA, _gn_io.aux_dicts.translation_ant)
     invalid_ant_mask = ~antennas.index.isin(_gn_io.aux_dicts.atx_ant_tbl)

From 32fe28c01160d7efc5eadab2ab27ba8741571f6d Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 09:46:58 +0000
Subject: [PATCH 03/17] NPI-3388 add overloads for gen_sp3_content() to make
 type system happer. Failed on previous attempt but pulled it off this time.

---
 gnssanalysis/gn_io/sp3.py | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 77cd643..0c0ecd3 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -3,7 +3,7 @@
 import io as _io
 import os as _os
 import re as _re
-from typing import Literal, Optional, Union, List, Tuple
+from typing import Callable, Literal, Mapping, Optional, Union, List, Tuple, overload
 from pathlib import Path
 
 import numpy as _np
@@ -917,12 +917,32 @@ def gen_sp3_header(sp3_df: _pd.DataFrame) -> str:
     return "".join(line1 + line2 + sats_header.tolist() + sv_orb_head.tolist() + head_c + head_fi + comment)
 
 
+# Option 1: don't provide a buffer, the data will be returned as a string
+@overload
 def gen_sp3_content(
     sp3_df: _pd.DataFrame,
+    buf: None = None,
+    sort_outputs: bool = ...,
+    continue_on_unhandled_velocity_data: bool = ...,
+) -> str: ...
+
+
+# Option 2: (not typically used) provide a buffer and have the data written there
+@overload
+def gen_sp3_content(
+    sp3_df: _pd.DataFrame,
+    buf: _io.StringIO,
+    sort_outputs: bool = ...,
+    continue_on_unhandled_velocity_data: bool = ...,
+) -> None: ...
+
+
+def gen_sp3_content(
+    sp3_df: _pd.DataFrame,
+    buf: Union[None, _io.StringIO] = None,
     sort_outputs: bool = False,
-    buf: Union[None, _io.TextIOBase] = None,
     continue_on_unhandled_velocity_data: bool = True,
-) -> str:
+) -> Union[str, None]:
     """
     Organises, formats (including nodata values), then writes out SP3 content to a buffer if provided, or returns
     it otherwise.
@@ -930,10 +950,11 @@ def gen_sp3_content(
     Args:
     :param _pd.DataFrame sp3_df: The DataFrame containing the SP3 data.
     :param bool sort_outputs: Whether to sort the outputs. Defaults to False.
-    :param _io.TextIOBase buf: The buffer to write the SP3 content to. Defaults to None.
+    :param Union[_io.StringIO, None] buf: The buffer to write the SP3 content to. Defaults to None.
     :param bool continue_on_unhandled_velocity_data: If (currently unsupported) velocity data exists in the DataFrame,
         log a warning and skip velocity data, but write out position data. Set to false to raise an exception instead.
-    :return str or None: The SP3 content if `buf` is None, otherwise None.
+    :return str or None: Return SP3 content as a string if `buf` is None, otherwise write SP3 content to `buf`,
+        and return None.
     """
 
     out_buf = buf if buf is not None else _io.StringIO()

From bb9066c9aee3b6b601050a940acb1df245869b6e Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 09:48:07 +0000
Subject: [PATCH 04/17] NPI-3388 add clearer typing on sp3 output formatters
 map

---
 gnssanalysis/gn_io/sp3.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 0c0ecd3..3ee8da6 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -1083,17 +1083,20 @@ def clk_std_formatter(x):
             return "   "
         return format(x, "3d")
 
-    formatters = {
-        "PRN": prn_formatter,
-        "X": pos_formatter,  # pos_formatter() can't handle nodata (Inf / NaN). Handled prior.
-        "Y": pos_formatter,
-        "Z": pos_formatter,
-        "CLK": clk_formatter,  # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string()
-        "STD_X": pos_std_formatter,  # Nodata is represented as an integer, so can be handled here.
-        "STD_Y": pos_std_formatter,
-        "STD_Z": pos_std_formatter,
-        "STD_CLK": clk_std_formatter,  # ditto above
-    }
+    formatters: Mapping[str, Callable] = dict(
+        {
+            "PRN": prn_formatter,
+            "X": pos_formatter,  # pos_formatter() can't handle nodata (Inf / NaN). Handled prior.
+            "Y": pos_formatter,
+            "Z": pos_formatter,
+            "CLK": clk_formatter,  # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string()
+            "STD_X": pos_std_formatter,  # Nodata is represented as an integer, so can be handled here.
+            "STD_Y": pos_std_formatter,
+            "STD_Z": pos_std_formatter,
+            "STD_CLK": clk_std_formatter,  # ditto above
+        }
+    )
+
     for epoch, epoch_vals in out_df.reset_index("PRN").groupby(level="J2000"):
         # Format and write out the epoch in the SP3 format
         epoch_datetime = _gn_datetime.j2000_to_pydatetime(epoch)

From 9603e9e5612ed7d2522c4423461f7d126c0a0f99 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 09:50:28 +0000
Subject: [PATCH 05/17] NPI-3388 rename some nodata constants and add extras
 for strings, to improve clarity and allow reuse

---
 gnssanalysis/gn_io/sp3.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 3ee8da6..b0fb130 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -154,8 +154,12 @@
 SP3_CLOCK_NODATA_NUMERIC = 999999
 SP3_POS_NODATA_STRING = "     0.000000"
 SP3_POS_NODATA_NUMERIC = 0
-SP3_CLOCK_STD_NODATA = -1000
-SP3_POS_STD_NODATA = -100
+# The numeric values below are only relevant within this codebase, and signify nodata / NaN.
+# They are created by the functions pos_log() and clk_log()
+SP3_CLOCK_STD_NODATA_NUMERIC_INTERNAL = -1000
+SP3_CLOCK_STD_NODATA_STRING = "   "
+SP3_POS_STD_NODATA_NUMERIC_INTERNAL = -100
+SP3_POS_STD_NODATA_STRING = "  "
 
 
 def sp3_pos_nodata_to_nan(sp3_df: _pd.DataFrame) -> None:
@@ -322,6 +326,7 @@ def _process_sp3_block(
     epochs_dt = _pd.to_datetime(_pd.Series(date).str.slice(2, 21).values.astype(str), format=r"%Y %m %d %H %M %S")
     temp_sp3 = _pd.read_fwf(_io.StringIO(data), widths=widths, names=names)
     # TODO set datatypes per column in advance
+    # TODO maybe change this after updating everyting else to use actual NaNs ?
     temp_sp3["Clock_Event_Flag"] = temp_sp3["Clock_Event_Flag"].fillna(" ")
     temp_sp3["Clock_Pred_Flag"] = temp_sp3["Clock_Pred_Flag"].fillna(" ")
     temp_sp3["Maneuver_Flag"] = temp_sp3["Maneuver_Flag"].fillna(" ")
@@ -1021,14 +1026,16 @@ def gen_sp3_content(
         def pos_log(x):
             return _np.minimum(  # Cap value at 99
                 _np.nan_to_num(  # If there is data, use the following formula. Else return NODATA value.
-                    _np.rint(_np.log(x) / _np.log(pos_base)), nan=SP3_POS_STD_NODATA  # Rounded to nearest int
+                    _np.rint(_np.log(x) / _np.log(pos_base)),
+                    nan=SP3_POS_STD_NODATA_NUMERIC_INTERNAL,  # Rounded to nearest int
                 ),
                 99,
             ).astype(int)
 
         def clk_log(x):
             return _np.minimum(
-                _np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA), 999  # Cap at 999
+                _np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA_NUMERIC_INTERNAL),
+                999,  # Cap at 999
             ).astype(int)
 
         std_df = sp3_df["STD"]
@@ -1073,14 +1080,15 @@ def clk_formatter(x):
     # only representation.
     def pos_std_formatter(x):
         # We use -100 as our integer NaN/"missing" marker
-        if x <= SP3_POS_STD_NODATA:
-            return "  "
+        # NOTE: this could be NaN, except for the logic in the function that calculates this value.
+        if x <= SP3_POS_STD_NODATA_NUMERIC_INTERNAL:
+            return SP3_POS_STD_NODATA_STRING
         return format(x, "2d")
 
     def clk_std_formatter(x):
         # We use -1000 as our integer NaN/"missing" marker
-        if x <= SP3_CLOCK_STD_NODATA:
-            return "   "
+        if x <= SP3_CLOCK_STD_NODATA_NUMERIC_INTERNAL:
+            return SP3_CLOCK_STD_NODATA_STRING
         return format(x, "3d")
 
     formatters: Mapping[str, Callable] = dict(

From 85dd8cbf3f6b67fc4c861ff59f681577c29250ae Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 10:18:07 +0000
Subject: [PATCH 06/17] NPI-3388 update pos_formatter() and clk_formatter() to
 handle inf values, reducing conversion steps for output formatting

---
 gnssanalysis/gn_io/sp3.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index b0fb130..8869a60 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -1033,6 +1033,7 @@ def pos_log(x):
             ).astype(int)
 
         def clk_log(x):
+            # Replace NaNs with SP3 clk_log nodata value (-1000). Round values and cap them at 999.
             return _np.minimum(
                 _np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA_NUMERIC_INTERNAL),
                 999,  # Cap at 999
@@ -1062,15 +1063,16 @@ def prn_formatter(x):
     # Longer term we should maybe reimplement this again, maybe just processing groups line by line to format them?
 
     def pos_formatter(x):
-        if isinstance(x, str):  # Presume an inf/NaN value, already formatted as nodata string. Pass through.
-            return x  # Expected value "      0.000000"
+        # NaN values handled in df.style.format(), using na_rep; this formatter should not be invoked for them.
+        if x in [_np.inf, _np.NINF]:  # Treat infinite values as nodata
+            return SP3_POS_NODATA_STRING
         return format(x, "13.6f")  # Numeric value, format as usual
 
     def clk_formatter(x):
-        # If this value (nominally a numpy float64) is actually a string, moreover containing the mandated part of the
-        # clock nodata value (per the SP3 spec), we deduce nodata formatting has already been done, and return as is.
-        if isinstance(x, str) and x.strip(" ").startswith("999999."):  # TODO performance: could do just type check
-            return x
+        # NaN is handled by passing a na_rep value to df.style.format() before writing out with to_string().
+        # So we just handle Infinity and normal numeric formatting.
+        if x in [_np.inf, _np.NINF]:
+            return SP3_CLOCK_NODATA_STRING
         return format(x, "13.6f")  # Not infinite or NaN: proceed with normal formatting
 
     # NOTE: the following formatters are fine, as the nodata value is actually a *numeric value*,

From 4aa3da748d508f95bac90d9b3d8c6f33fecbd1a5 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 10:21:06 +0000
Subject: [PATCH 07/17] NPI-3388 switch to Pandas 3 compatible method for
 formatting entries for each sp3 epoch

---
 gnssanalysis/gn_io/sp3.py | 73 +++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 38 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 8869a60..7d7922b 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -1118,45 +1118,42 @@ def clk_std_formatter(x):
         out_buf.write("\n")
 
         # Format this epoch's values in the SP3 format and write to buffer
-
-        # First, we fill NaN and infinity values with the standardised nodata value for each column.
-        # NOTE: DataFrame.to_string() as called below, takes formatter functions per column. It does not, however
-        # invoke them on NaN values!! As such, trying to handle NaNs in the formatter is a fool's errand.
-        # Instead, we do it here, and get the formatters to recognise and skip over the already processed nodata values
-
-        # POS nodata formatting
-        # Fill +/- infinity values with SP3 nodata value for POS columns
-        epoch_vals["X"].replace(to_replace=[_np.inf, -_np.inf], value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals["Y"].replace(to_replace=[_np.inf, -_np.inf], value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals["Z"].replace(to_replace=[_np.inf, -_np.inf], value=SP3_POS_NODATA_STRING, inplace=True)
-        # Now do the same for NaNs
-        epoch_vals["X"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals["Y"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals["Z"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
-        # NOTE: we could use replace() for all this, though fillna() might be faster in some
-        # cases: https://stackoverflow.com/a/76225227
-        # replace() will also handle other types of nodata constants: https://stackoverflow.com/a/54738894
-
-        # CLK nodata formatting
-        # Throw both +/- infinity, and NaN values to the SP3 clock nodata value.
-        # See https://stackoverflow.com/a/17478495
-        epoch_vals["CLK"].replace(to_replace=[_np.inf, -_np.inf], value=SP3_CLOCK_NODATA_STRING, inplace=True)
-        epoch_vals["CLK"].fillna(value=SP3_CLOCK_NODATA_STRING, inplace=True)
-
-        # Now invoke DataFrame to_string() to write out the values, leveraging our formatting functions for the
-        # relevant columns.
-        # NOTE: NaN and infinity values do NOT invoke the formatter, though you can put a string in a primarily numeric
-        # column, so we format the nodata values ahead of time, above.
-        # NOTE: you CAN'T mix datatypes as described above, in Pandas 3 and above, so this approach will need to be
-        # updated to use chained calls to format().
-        epoch_vals.to_string(
-            buf=out_buf,
-            index=False,
-            header=False,
-            formatters=formatters,
+        # Notes:
+        # - DataFrame.to_string() doesn't call formatters on NaN values (nor presumably does DataFrame.Styler.format())
+        # - Mixing datatypes in a column is disallowed in Pandas >=3.
+        # - We consider +/-Infinity, and NaN to be nodata values, along with some column specific nodata values.
+        # Given all this, the following approach is taken:
+        # - Custom formatters are updated to render +/-Infninty values as the SP3 nodata value for that column.
+        # - Chained calls to DataFrame.style.format() are used to set the column's formatter, and string nodata value.
+
+        epoch_vals_styler = (
+            epoch_vals.style.hide(axis="columns", names=False)  # Get rid of column labels
+            .hide(axis="index", names=False)  # Get rid of index labels (i.e. j2000 times)
+            # Format columns, specify how NaN values should be represented (NaNs are NOT passed to formatters)
+            .format(subset=["PRN"], formatter=prn_formatter)
+            .format(subset=["X"], na_rep=SP3_POS_NODATA_STRING, formatter=pos_formatter)
+            .format(subset=["Y"], na_rep=SP3_POS_NODATA_STRING, formatter=pos_formatter)
+            .format(subset=["Z"], na_rep=SP3_POS_NODATA_STRING, formatter=pos_formatter)
+            .format(subset=["CLK"], na_rep=SP3_CLOCK_NODATA_STRING, formatter=clk_formatter)
+            # STD columns don't need NODATA handling: an internal integer nodata value is used by the formatter
+            .format(subset=["STD_X"], formatter=pos_std_formatter)
+            .format(subset=["STD_Y"], formatter=pos_std_formatter)
+            .format(subset=["STD_Z"], formatter=pos_std_formatter)
+            .format(subset=["STD_CLK"], formatter=clk_std_formatter)
+            # NOTE: Passing formatters to the formatter argument throws typing errors, but is valid, as
+            # per https://pandas.pydata.org/docs/reference/api/pandas.io.formats.style.Styler.format.html
+            # But we won't use it anyway, because the more verbose option above allows us to set na_rep as well,
+            # and lay it all out in one place.
+            # .format(formatter=formatters)
         )
-        out_buf.write("\n")
-    if buf is None:
+
+        # NOTE: styler.to_string()'s delimiter="": no space between columns!
+        # TODO to use this though, we need to update the formatters to make the columns appropirately wide.
+        # This has been switched for the 13.6f formatters (to 14.6f), but I'm not positive whether other updates
+        # will be needed - e.g. to flags columns, STD columns, etc.
+        out_buf.write(epoch_vals_styler.to_string(delimiter=" "))  # styler.to_string() adds a trailing newline
+
+    if buf is None:  # No buffer to write to, was passed in. Return a string.
         return out_buf.getvalue()
     return None
 

From 47d1de332bc1a0e5e20075c6b53cd9e37d96a4b6 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 10:23:26 +0000
Subject: [PATCH 08/17] NPI-3388 update input buffer parameter name to
 gen_sp3_content() for clarity

---
 gnssanalysis/gn_io/sp3.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 7d7922b..d842c15 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -926,7 +926,7 @@ def gen_sp3_header(sp3_df: _pd.DataFrame) -> str:
 @overload
 def gen_sp3_content(
     sp3_df: _pd.DataFrame,
-    buf: None = None,
+    in_buf: None = None,
     sort_outputs: bool = ...,
     continue_on_unhandled_velocity_data: bool = ...,
 ) -> str: ...
@@ -936,7 +936,7 @@ def gen_sp3_content(
 @overload
 def gen_sp3_content(
     sp3_df: _pd.DataFrame,
-    buf: _io.StringIO,
+    in_buf: _io.StringIO,
     sort_outputs: bool = ...,
     continue_on_unhandled_velocity_data: bool = ...,
 ) -> None: ...
@@ -944,7 +944,7 @@ def gen_sp3_content(
 
 def gen_sp3_content(
     sp3_df: _pd.DataFrame,
-    buf: Union[None, _io.StringIO] = None,
+    in_buf: Union[None, _io.StringIO] = None,
     sort_outputs: bool = False,
     continue_on_unhandled_velocity_data: bool = True,
 ) -> Union[str, None]:
@@ -955,14 +955,14 @@ def gen_sp3_content(
     Args:
     :param _pd.DataFrame sp3_df: The DataFrame containing the SP3 data.
     :param bool sort_outputs: Whether to sort the outputs. Defaults to False.
-    :param Union[_io.StringIO, None] buf: The buffer to write the SP3 content to. Defaults to None.
+    :param Union[_io.StringIO, None] in_buf: The buffer to write the SP3 content to. Defaults to None.
     :param bool continue_on_unhandled_velocity_data: If (currently unsupported) velocity data exists in the DataFrame,
         log a warning and skip velocity data, but write out position data. Set to false to raise an exception instead.
-    :return str or None: Return SP3 content as a string if `buf` is None, otherwise write SP3 content to `buf`,
+    :return str or None: Return SP3 content as a string if `in_buf` is None, otherwise write SP3 content to `in_buf`,
         and return None.
     """
 
-    out_buf = buf if buf is not None else _io.StringIO()
+    out_buf = in_buf if in_buf is not None else _io.StringIO()
     if sort_outputs:
         # If we need to do particular sorting/ordering of satellites and constellations we can use some of the
         # options that .sort_index() provides
@@ -1153,7 +1153,7 @@ def clk_std_formatter(x):
         # will be needed - e.g. to flags columns, STD columns, etc.
         out_buf.write(epoch_vals_styler.to_string(delimiter=" "))  # styler.to_string() adds a trailing newline
 
-    if buf is None:  # No buffer to write to, was passed in. Return a string.
+    if in_buf is None:  # No buffer to write to, was passed in. Return a string.
         return out_buf.getvalue()
     return None
 

From 7c25eaee75370237da20c7ee9b96e4156a510c65 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 10:28:28 +0000
Subject: [PATCH 09/17] NPI-3388 enable full width (ie moon orbit capable?!)
 SP3 value rendering, thanks to updated formatting approach. Add some helpful
 comments and placeholders from an old stash.

---
 gnssanalysis/gn_io/sp3.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index d842c15..92225da 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -1054,8 +1054,9 @@ def clk_log(x):
     def prn_formatter(x):
         return f"P{x}"
 
-    # TODO NOTE
-    # This is technically incorrect but convenient. The SP3 standard doesn't include a space between the X, Y, Z, and
+    # NOTE This has been updated to full 14.6f format. The following description has been left for background and
+    # reference while testing the change.
+    # (Previously!) technically incorrect but convenient. SP3 standard doesn't include a space between the X, Y, Z, and
     # CLK values but pandas .to_string() put a space between every field. In practice most entries have spaces between
     # the X, Y, Z, and CLK values because the values are small enough that a 14.6f format specification gets padded
     # with spaces. So for now we will use a 13.6f specification and a space between entries, which will be equivalent
@@ -1066,14 +1067,14 @@ def pos_formatter(x):
         # NaN values handled in df.style.format(), using na_rep; this formatter should not be invoked for them.
         if x in [_np.inf, _np.NINF]:  # Treat infinite values as nodata
             return SP3_POS_NODATA_STRING
-        return format(x, "13.6f")  # Numeric value, format as usual
+        return format(x, "14.6f")  # Numeric value, format as usual
 
     def clk_formatter(x):
         # NaN is handled by passing a na_rep value to df.style.format() before writing out with to_string().
         # So we just handle Infinity and normal numeric formatting.
         if x in [_np.inf, _np.NINF]:
             return SP3_CLOCK_NODATA_STRING
-        return format(x, "13.6f")  # Not infinite or NaN: proceed with normal formatting
+        return format(x, "14.6f")  # Not infinite or NaN: proceed with normal formatting
 
     # NOTE: the following formatters are fine, as the nodata value is actually a *numeric value*,
     # so DataFrame.to_string() will invoke them for those values.
@@ -1107,6 +1108,7 @@ def clk_std_formatter(x):
         }
     )
 
+    # TODO maybe we want to set axis=0 in this groupby() (based on a previous experiment, not sure)
     for epoch, epoch_vals in out_df.reset_index("PRN").groupby(level="J2000"):
         # Format and write out the epoch in the SP3 format
         epoch_datetime = _gn_datetime.j2000_to_pydatetime(epoch)
@@ -1151,7 +1153,7 @@ def clk_std_formatter(x):
         # TODO to use this though, we need to update the formatters to make the columns appropirately wide.
         # This has been switched for the 13.6f formatters (to 14.6f), but I'm not positive whether other updates
         # will be needed - e.g. to flags columns, STD columns, etc.
-        out_buf.write(epoch_vals_styler.to_string(delimiter=" "))  # styler.to_string() adds a trailing newline
+        out_buf.write(epoch_vals_styler.to_string(delimiter=""))  # styler.to_string() adds a trailing newline
 
     if in_buf is None:  # No buffer to write to, was passed in. Return a string.
         return out_buf.getvalue()
@@ -1405,7 +1407,9 @@ def diff_sp3_rac(
     nd_rac = diff_eci.values[:, _np.newaxis] @ _gn_transform.eci2rac_rot(sp3_baseline_eci_vel)
     df_rac = _pd.DataFrame(
         nd_rac.reshape(-1, 3),
-        index=sp3_baseline.index,
+        index=sp3_baseline.index,  # Note that if the test and baseline have different SVs, this index will refer to
+        # data which is missing in the 'test' dataframe (and so is likely to be missing in
+        # the diff too).
         columns=[["EST_RAC"] * 3, ["Radial", "Along-track", "Cross-track"]],
     )
 

From 52bea613932568ae33f821bb2ccc8283e81fa6ff Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 10:51:02 +0000
Subject: [PATCH 10/17] NPI-3388 add jinja2 to requirements. Needed for
 DataFrame.style

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 7f412e7..57665c7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 boto3
 click
 hatanaka
+jinja2
 matplotlib
 numpy
 pandas

From 7566f2a97adc78d3673378f051462ed6d3e258d0 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 10:58:40 +0000
Subject: [PATCH 11/17] NPI-3388 try unpinning remaining requirement after
 weird styles related crash on pipeline

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 57665c7..0640ae9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ jinja2
 matplotlib
 numpy
 pandas
-plotext==4.2
+plotext
 plotly
 pyfakefs
 pymongo

From 56bd0a8b1dada42f8c51de3536f593cb20682aa9 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 11:00:52 +0000
Subject: [PATCH 12/17] NPI-3388 revert unpinning of requirement after that
 appeared to make no difference to crash

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 0640ae9..57665c7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ jinja2
 matplotlib
 numpy
 pandas
-plotext
+plotext==4.2
 plotly
 pyfakefs
 pymongo

From bcb1f9cedcec3d7f07b17e84eb2289b953023fef Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 11:36:20 +0000
Subject: [PATCH 13/17] NPI-3388 switch out mock file for bytes object based
 input reading, in test crashing during sp3 write

---
 tests/test_sp3.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_sp3.py b/tests/test_sp3.py
index dcb2739..029316f 100644
--- a/tests/test_sp3.py
+++ b/tests/test_sp3.py
@@ -142,13 +142,13 @@ def test_read_sp3_validation_sv_count_mismatch_header_vs_content(self, mock_file
     # TODO add tests for correctly reading the actual content of the SP3 in addition to the header.
     # TODO add tests for correctly generating sp3 output content with gen_sp3_content() and gen_sp3_header()
 
-    @patch("builtins.open", new_callable=mock_open, read_data=input_data)
-    def test_gen_sp3_content_velocity_exception_handling(self, mock_file):
+    def test_gen_sp3_content_velocity_exception_handling(self):
         """
         gen_sp3_content() velocity output should raise exception (currently unsupported).\
             If asked to continue with warning, it should remove velocity columns before output.
         """
-        sp3_df = sp3.read_sp3("mock_path", pOnly=False)
+        input_data_fresh = input_data + b""  # Lazy attempt at not passing a reference
+        sp3_df = sp3.read_sp3(bytes(input_data_fresh), pOnly=False)
         with self.assertRaises(NotImplementedError):
             generated_sp3_content = sp3.gen_sp3_content(sp3_df, continue_on_unhandled_velocity_data=False)
 

From 9e07df34e7ba55fb1aeba415e3e7b75bb5f40589 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 11:40:42 +0000
Subject: [PATCH 14/17] NPI-3388 remove use of NINF which has been deprecated.
 Not sure how this was missed before

---
 gnssanalysis/gn_io/sp3.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 92225da..4ea88d0 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -1065,14 +1065,14 @@ def prn_formatter(x):
 
     def pos_formatter(x):
         # NaN values handled in df.style.format(), using na_rep; this formatter should not be invoked for them.
-        if x in [_np.inf, _np.NINF]:  # Treat infinite values as nodata
+        if x in [_np.inf, -_np.inf]:  # Treat infinite values as nodata
             return SP3_POS_NODATA_STRING
         return format(x, "14.6f")  # Numeric value, format as usual
 
     def clk_formatter(x):
         # NaN is handled by passing a na_rep value to df.style.format() before writing out with to_string().
         # So we just handle Infinity and normal numeric formatting.
-        if x in [_np.inf, _np.NINF]:
+        if x in [_np.inf, -_np.inf]:
             return SP3_CLOCK_NODATA_STRING
         return format(x, "14.6f")  # Not infinite or NaN: proceed with normal formatting
 

From 572ff8e9d296ee9d5f3ab232a585b5692b31eeea Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 12:12:23 +0000
Subject: [PATCH 15/17] NPI-3388 update POS and CLOCK nodata strings to match
 new full width formatting. This is why the values had initially been
 misaligned

---
 gnssanalysis/gn_io/sp3.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 4ea88d0..1d3c9e4 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -150,9 +150,9 @@
 # not 14 (the official width of the column i.e. F14.6), again because Pandas insists on adding a further space.
 # See comment in gen_sp3_content() line ~645 for further discussion.
 # Another related 'hack' can be found at line ~602, handling the FLAGS columns.
-SP3_CLOCK_NODATA_STRING = "999999.999999"
+SP3_CLOCK_NODATA_STRING = " 999999.999999" # This is currently formatted for full width (ie 14 chars)
 SP3_CLOCK_NODATA_NUMERIC = 999999
-SP3_POS_NODATA_STRING = "     0.000000"
+SP3_POS_NODATA_STRING = "      0.000000" # This is currently formatted for full width (ie 14 chars)
 SP3_POS_NODATA_NUMERIC = 0
 # The numeric values below are only relevant within this codebase, and signify nodata / NaN.
 # They are created by the functions pos_log() and clk_log()

From e31466deac9a25cd133299552eb27e4ddff3d772 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 13 Jan 2025 12:13:30 +0000
Subject: [PATCH 16/17] NPI-3388 add clearer exception for failing to extract a
 V from a dataframe index while reading an sp3 file with velocities. I have
 never seen this in practice, but demonstrated it in testing.

---
 gnssanalysis/gn_io/sp3.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 1d3c9e4..12e07e9 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -575,6 +575,14 @@ def read_sp3(
     else:
         # DF contains interlaced Position & Velocity measurements for each sat. Split the data based on this, and
         # recombine, turning Pos and Vel into separate columns.
+
+        pv_flag_values = sp3_df.index.get_level_values("PV_FLAG").unique().values
+        if "V" not in pv_flag_values:
+            raise ValueError(
+                "SP3 header PV flag was not P, but no V (velocity) index appears to exist! "
+                f"Unique PV flag values seen: {pv_flag_values}"
+            )
+
         position_df = sp3_df.xs("P", level="PV_FLAG")
         velocity_df = sp3_df.xs("V", level="PV_FLAG")
 

From 2c184c1dd75093a7d8bf55a8a1618db74519c39a Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Tue, 14 Jan 2025 03:08:05 +0000
Subject: [PATCH 17/17] NPI-3388 add notes on further unit tests we should
 have. Add clarification about why a test including gen_sp3_content() does not
 use a mock file for input.

---
 tests/test_sp3.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/test_sp3.py b/tests/test_sp3.py
index 029316f..0daa581 100644
--- a/tests/test_sp3.py
+++ b/tests/test_sp3.py
@@ -141,12 +141,22 @@ def test_read_sp3_validation_sv_count_mismatch_header_vs_content(self, mock_file
     # TODO Add test(s) for correctly reading header fundamentals (ACC, ORB_TYPE, etc.)
     # TODO add tests for correctly reading the actual content of the SP3 in addition to the header.
     # TODO add tests for correctly generating sp3 output content with gen_sp3_content() and gen_sp3_header()
+    # These tests should include:
+    # - Correct alignment of POS, CLK, STDPOS STDCLK, (not velocity yet), FLAGS
+    # - Correct alignment of the above when nodata and infinite values are present
+    # - Inclusion of HLM orbit_type in header, after applying Helmert trainsformation (if not covered elsewhere?
+    #   Probably should be covered elsewhere)
+    # - Not including column names (can just test that output matches expected format)
+    # - Not including any NaN value *anywhere*
 
     def test_gen_sp3_content_velocity_exception_handling(self):
         """
         gen_sp3_content() velocity output should raise exception (currently unsupported).\
             If asked to continue with warning, it should remove velocity columns before output.
         """
+        # Input data passed as bytes here, rather than using a mock file, because the mock file setup seems to break
+        # part of Pandas Styler, which is used by gen_sp3_content(). Specifically, some part of Styler's attempt to
+        # load style config files leads to a crash, despite some style config files appearing to read successfully)
         input_data_fresh = input_data + b""  # Lazy attempt at not passing a reference
         sp3_df = sp3.read_sp3(bytes(input_data_fresh), pOnly=False)
         with self.assertRaises(NotImplementedError):