From 66ac029fbadf4040a945959ae85ba96db6c4d1fc Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Tue, 9 Jul 2024 08:00:15 +0000 Subject: [PATCH 01/17] NPI-3388 NPI-3389 replace deprecated delim_whitespace with sep keyword --- gnssanalysis/filenames.py | 2 +- gnssanalysis/gn_frame.py | 2 +- gnssanalysis/gn_io/blq.py | 2 +- gnssanalysis/gn_io/clk.py | 2 +- gnssanalysis/gn_io/discon.py | 2 +- gnssanalysis/gn_io/erp.py | 2 +- gnssanalysis/gn_io/pea.py | 2 +- gnssanalysis/gn_io/sinex.py | 2 +- gnssanalysis/gn_io/trace.py | 2 +- gnssanalysis/gn_io/trop.py | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/gnssanalysis/filenames.py b/gnssanalysis/filenames.py index 148afaa..f3089a9 100644 --- a/gnssanalysis/filenames.py +++ b/gnssanalysis/filenames.py @@ -539,7 +539,7 @@ def determine_snx_name_props(file_path: pathlib.Path) -> Dict[str, Any]: if blk: soln_df = pd.read_csv( io.BytesIO(blk[0]), - delim_whitespace=True, + sep="\\s+", # delim_whitespace is deprecated comment="*", names=["CODE", "PT", "SOLN", "T", "START_EPOCH", "END_EPOCH", "MEAN_EPOCH"], converters={ diff --git a/gnssanalysis/gn_frame.py b/gnssanalysis/gn_frame.py index 2be8596..0bff02b 100644 --- a/gnssanalysis/gn_frame.py +++ b/gnssanalysis/gn_frame.py @@ -16,7 +16,7 @@ def _get_core_list(core_list_path): # need to check if solution numbers are consistent with discontinuities selection core_df = _pd.read_csv( core_list_path, - delim_whitespace=True, + sep="\\s+", # delim_whitespace is deprecated skiprows=4, comment="-", usecols=[0, 1, 2, 3], diff --git a/gnssanalysis/gn_io/blq.py b/gnssanalysis/gn_io/blq.py index f08e7d4..2ae55db 100644 --- a/gnssanalysis/gn_io/blq.py +++ b/gnssanalysis/gn_io/blq.py @@ -28,7 +28,7 @@ def read_blq(path, as_complex=True): sites = blq_file_read[:, 0].astype(" tuple: # LC_bytes = b''.join(trace_LC_list) # LC_bytes = LC_bytes.replace(b'=',b'') #getting rif of '=' -# df_LC = _pd.read_csv(_BytesIO(LC_bytes),delim_whitespace=True,header=None,usecols=[1,2,4,6,8,9,10,11,12,13]).astype( +# df_LC = _pd.read_csv(_BytesIO(LC_bytes),sep="\\s+",header=None,usecols=[1,2,4,6,8,9,10,11,12,13]).astype( # { # 1: _np.int16, 2:_np.int32, 4: ' _pd.DataFrame: try: solution_df = _pd.read_csv( _BytesIO(tro_estimate), - delim_whitespace=True, + sep="\\s+", # delim_whitespace is deprecated comment=b"*", index_col=False, header=None, From 7e37f74b6cc1f949c0a53cd0e98fd86cae82e4bd Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 06:04:55 +0000 Subject: [PATCH 02/17] NPI-3388 update some of the igs log parsing code based on Pandas deprecations --- gnssanalysis/gn_io/igslog.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gnssanalysis/gn_io/igslog.py b/gnssanalysis/gn_io/igslog.py index 86cf778..5d2fe7e 100644 --- a/gnssanalysis/gn_io/igslog.py +++ b/gnssanalysis/gn_io/igslog.py @@ -463,12 +463,13 @@ def gather_metadata( gather_id_loc, columns=["CODE", "DOMES_N", "CITY", "COUNTRY", "X", "Y", "Z", "LAT", "LON", "HEI", "PATH"] ) - id_loc_df.CITY[id_loc_df.CITY == ""] = "N/A" + id_loc_df.loc[id_loc_df.CITY == "", "CITY"] = "N/A" id_loc_df.CITY = id_loc_df.CITY.str.rstrip().str.upper() id_loc_df.COUNTRY = translate_series( id_loc_df.COUNTRY.str.rstrip().str.upper(), _gn_io.aux_dicts.translation_country ).values - id_loc_df.DOMES_N[id_loc_df.DOMES_N == ""] = "---------" + + id_loc_df.loc[id_loc_df.DOMES_N == "", "DOMES_N"] = "---------" xyz_array = ( id_loc_df[["X", "Y", "Z"]].stack().str.replace(",", ".").replace({"": None}).unstack().values.astype(float) @@ -504,7 +505,7 @@ def gather_metadata( ant_df.RADOME2 = ant_df.RADOME2.str.rstrip().str.upper() no_rad2_mask = ~ant_df.RADOME.isin(_gn_io.aux_dicts.atx_rad_tbl) - ant_df.RADOME[no_rad2_mask] = ant_df.RADOME2[no_rad2_mask] + ant_df.loc[no_rad2_mask, "RADOME"] = ant_df.RADOME2[no_rad2_mask] # translation_ant.index.name= None antennas = translate_series(ant_df.ANTENNA, _gn_io.aux_dicts.translation_ant) invalid_ant_mask = ~antennas.index.isin(_gn_io.aux_dicts.atx_ant_tbl) From 32fe28c01160d7efc5eadab2ab27ba8741571f6d Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 09:46:58 +0000 Subject: [PATCH 03/17] NPI-3388 add overloads for gen_sp3_content() to make type system happer. Failed on previous attempt but pulled it off this time. --- gnssanalysis/gn_io/sp3.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 77cd643..0c0ecd3 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -3,7 +3,7 @@ import io as _io import os as _os import re as _re -from typing import Literal, Optional, Union, List, Tuple +from typing import Callable, Literal, Mapping, Optional, Union, List, Tuple, overload from pathlib import Path import numpy as _np @@ -917,12 +917,32 @@ def gen_sp3_header(sp3_df: _pd.DataFrame) -> str: return "".join(line1 + line2 + sats_header.tolist() + sv_orb_head.tolist() + head_c + head_fi + comment) +# Option 1: don't provide a buffer, the data will be returned as a string +@overload def gen_sp3_content( sp3_df: _pd.DataFrame, + buf: None = None, + sort_outputs: bool = ..., + continue_on_unhandled_velocity_data: bool = ..., +) -> str: ... + + +# Option 2: (not typically used) provide a buffer and have the data written there +@overload +def gen_sp3_content( + sp3_df: _pd.DataFrame, + buf: _io.StringIO, + sort_outputs: bool = ..., + continue_on_unhandled_velocity_data: bool = ..., +) -> None: ... + + +def gen_sp3_content( + sp3_df: _pd.DataFrame, + buf: Union[None, _io.StringIO] = None, sort_outputs: bool = False, - buf: Union[None, _io.TextIOBase] = None, continue_on_unhandled_velocity_data: bool = True, -) -> str: +) -> Union[str, None]: """ Organises, formats (including nodata values), then writes out SP3 content to a buffer if provided, or returns it otherwise. @@ -930,10 +950,11 @@ def gen_sp3_content( Args: :param _pd.DataFrame sp3_df: The DataFrame containing the SP3 data. :param bool sort_outputs: Whether to sort the outputs. Defaults to False. - :param _io.TextIOBase buf: The buffer to write the SP3 content to. Defaults to None. + :param Union[_io.StringIO, None] buf: The buffer to write the SP3 content to. Defaults to None. :param bool continue_on_unhandled_velocity_data: If (currently unsupported) velocity data exists in the DataFrame, log a warning and skip velocity data, but write out position data. Set to false to raise an exception instead. - :return str or None: The SP3 content if `buf` is None, otherwise None. + :return str or None: Return SP3 content as a string if `buf` is None, otherwise write SP3 content to `buf`, + and return None. """ out_buf = buf if buf is not None else _io.StringIO() From bb9066c9aee3b6b601050a940acb1df245869b6e Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 09:48:07 +0000 Subject: [PATCH 04/17] NPI-3388 add clearer typing on sp3 output formatters map --- gnssanalysis/gn_io/sp3.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 0c0ecd3..3ee8da6 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -1083,17 +1083,20 @@ def clk_std_formatter(x): return " " return format(x, "3d") - formatters = { - "PRN": prn_formatter, - "X": pos_formatter, # pos_formatter() can't handle nodata (Inf / NaN). Handled prior. - "Y": pos_formatter, - "Z": pos_formatter, - "CLK": clk_formatter, # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string() - "STD_X": pos_std_formatter, # Nodata is represented as an integer, so can be handled here. - "STD_Y": pos_std_formatter, - "STD_Z": pos_std_formatter, - "STD_CLK": clk_std_formatter, # ditto above - } + formatters: Mapping[str, Callable] = dict( + { + "PRN": prn_formatter, + "X": pos_formatter, # pos_formatter() can't handle nodata (Inf / NaN). Handled prior. + "Y": pos_formatter, + "Z": pos_formatter, + "CLK": clk_formatter, # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string() + "STD_X": pos_std_formatter, # Nodata is represented as an integer, so can be handled here. + "STD_Y": pos_std_formatter, + "STD_Z": pos_std_formatter, + "STD_CLK": clk_std_formatter, # ditto above + } + ) + for epoch, epoch_vals in out_df.reset_index("PRN").groupby(level="J2000"): # Format and write out the epoch in the SP3 format epoch_datetime = _gn_datetime.j2000_to_pydatetime(epoch) From 9603e9e5612ed7d2522c4423461f7d126c0a0f99 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 09:50:28 +0000 Subject: [PATCH 05/17] NPI-3388 rename some nodata constants and add extras for strings, to improve clarity and allow reuse --- gnssanalysis/gn_io/sp3.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 3ee8da6..b0fb130 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -154,8 +154,12 @@ SP3_CLOCK_NODATA_NUMERIC = 999999 SP3_POS_NODATA_STRING = " 0.000000" SP3_POS_NODATA_NUMERIC = 0 -SP3_CLOCK_STD_NODATA = -1000 -SP3_POS_STD_NODATA = -100 +# The numeric values below are only relevant within this codebase, and signify nodata / NaN. +# They are created by the functions pos_log() and clk_log() +SP3_CLOCK_STD_NODATA_NUMERIC_INTERNAL = -1000 +SP3_CLOCK_STD_NODATA_STRING = " " +SP3_POS_STD_NODATA_NUMERIC_INTERNAL = -100 +SP3_POS_STD_NODATA_STRING = " " def sp3_pos_nodata_to_nan(sp3_df: _pd.DataFrame) -> None: @@ -322,6 +326,7 @@ def _process_sp3_block( epochs_dt = _pd.to_datetime(_pd.Series(date).str.slice(2, 21).values.astype(str), format=r"%Y %m %d %H %M %S") temp_sp3 = _pd.read_fwf(_io.StringIO(data), widths=widths, names=names) # TODO set datatypes per column in advance + # TODO maybe change this after updating everyting else to use actual NaNs ? temp_sp3["Clock_Event_Flag"] = temp_sp3["Clock_Event_Flag"].fillna(" ") temp_sp3["Clock_Pred_Flag"] = temp_sp3["Clock_Pred_Flag"].fillna(" ") temp_sp3["Maneuver_Flag"] = temp_sp3["Maneuver_Flag"].fillna(" ") @@ -1021,14 +1026,16 @@ def gen_sp3_content( def pos_log(x): return _np.minimum( # Cap value at 99 _np.nan_to_num( # If there is data, use the following formula. Else return NODATA value. - _np.rint(_np.log(x) / _np.log(pos_base)), nan=SP3_POS_STD_NODATA # Rounded to nearest int + _np.rint(_np.log(x) / _np.log(pos_base)), + nan=SP3_POS_STD_NODATA_NUMERIC_INTERNAL, # Rounded to nearest int ), 99, ).astype(int) def clk_log(x): return _np.minimum( - _np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA), 999 # Cap at 999 + _np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA_NUMERIC_INTERNAL), + 999, # Cap at 999 ).astype(int) std_df = sp3_df["STD"] @@ -1073,14 +1080,15 @@ def clk_formatter(x): # only representation. def pos_std_formatter(x): # We use -100 as our integer NaN/"missing" marker - if x <= SP3_POS_STD_NODATA: - return " " + # NOTE: this could be NaN, except for the logic in the function that calculates this value. + if x <= SP3_POS_STD_NODATA_NUMERIC_INTERNAL: + return SP3_POS_STD_NODATA_STRING return format(x, "2d") def clk_std_formatter(x): # We use -1000 as our integer NaN/"missing" marker - if x <= SP3_CLOCK_STD_NODATA: - return " " + if x <= SP3_CLOCK_STD_NODATA_NUMERIC_INTERNAL: + return SP3_CLOCK_STD_NODATA_STRING return format(x, "3d") formatters: Mapping[str, Callable] = dict( From 85dd8cbf3f6b67fc4c861ff59f681577c29250ae Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:18:07 +0000 Subject: [PATCH 06/17] NPI-3388 update pos_formatter() and clk_formatter() to handle inf values, reducing conversion steps for output formatting --- gnssanalysis/gn_io/sp3.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index b0fb130..8869a60 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -1033,6 +1033,7 @@ def pos_log(x): ).astype(int) def clk_log(x): + # Replace NaNs with SP3 clk_log nodata value (-1000). Round values and cap them at 999. return _np.minimum( _np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA_NUMERIC_INTERNAL), 999, # Cap at 999 @@ -1062,15 +1063,16 @@ def prn_formatter(x): # Longer term we should maybe reimplement this again, maybe just processing groups line by line to format them? def pos_formatter(x): - if isinstance(x, str): # Presume an inf/NaN value, already formatted as nodata string. Pass through. - return x # Expected value " 0.000000" + # NaN values handled in df.style.format(), using na_rep; this formatter should not be invoked for them. + if x in [_np.inf, _np.NINF]: # Treat infinite values as nodata + return SP3_POS_NODATA_STRING return format(x, "13.6f") # Numeric value, format as usual def clk_formatter(x): - # If this value (nominally a numpy float64) is actually a string, moreover containing the mandated part of the - # clock nodata value (per the SP3 spec), we deduce nodata formatting has already been done, and return as is. - if isinstance(x, str) and x.strip(" ").startswith("999999."): # TODO performance: could do just type check - return x + # NaN is handled by passing a na_rep value to df.style.format() before writing out with to_string(). + # So we just handle Infinity and normal numeric formatting. + if x in [_np.inf, _np.NINF]: + return SP3_CLOCK_NODATA_STRING return format(x, "13.6f") # Not infinite or NaN: proceed with normal formatting # NOTE: the following formatters are fine, as the nodata value is actually a *numeric value*, From 4aa3da748d508f95bac90d9b3d8c6f33fecbd1a5 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:21:06 +0000 Subject: [PATCH 07/17] NPI-3388 switch to Pandas 3 compatible method for formatting entries for each sp3 epoch --- gnssanalysis/gn_io/sp3.py | 73 +++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 38 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 8869a60..7d7922b 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -1118,45 +1118,42 @@ def clk_std_formatter(x): out_buf.write("\n") # Format this epoch's values in the SP3 format and write to buffer - - # First, we fill NaN and infinity values with the standardised nodata value for each column. - # NOTE: DataFrame.to_string() as called below, takes formatter functions per column. It does not, however - # invoke them on NaN values!! As such, trying to handle NaNs in the formatter is a fool's errand. - # Instead, we do it here, and get the formatters to recognise and skip over the already processed nodata values - - # POS nodata formatting - # Fill +/- infinity values with SP3 nodata value for POS columns - epoch_vals["X"].replace(to_replace=[_np.inf, -_np.inf], value=SP3_POS_NODATA_STRING, inplace=True) - epoch_vals["Y"].replace(to_replace=[_np.inf, -_np.inf], value=SP3_POS_NODATA_STRING, inplace=True) - epoch_vals["Z"].replace(to_replace=[_np.inf, -_np.inf], value=SP3_POS_NODATA_STRING, inplace=True) - # Now do the same for NaNs - epoch_vals["X"].fillna(value=SP3_POS_NODATA_STRING, inplace=True) - epoch_vals["Y"].fillna(value=SP3_POS_NODATA_STRING, inplace=True) - epoch_vals["Z"].fillna(value=SP3_POS_NODATA_STRING, inplace=True) - # NOTE: we could use replace() for all this, though fillna() might be faster in some - # cases: https://stackoverflow.com/a/76225227 - # replace() will also handle other types of nodata constants: https://stackoverflow.com/a/54738894 - - # CLK nodata formatting - # Throw both +/- infinity, and NaN values to the SP3 clock nodata value. - # See https://stackoverflow.com/a/17478495 - epoch_vals["CLK"].replace(to_replace=[_np.inf, -_np.inf], value=SP3_CLOCK_NODATA_STRING, inplace=True) - epoch_vals["CLK"].fillna(value=SP3_CLOCK_NODATA_STRING, inplace=True) - - # Now invoke DataFrame to_string() to write out the values, leveraging our formatting functions for the - # relevant columns. - # NOTE: NaN and infinity values do NOT invoke the formatter, though you can put a string in a primarily numeric - # column, so we format the nodata values ahead of time, above. - # NOTE: you CAN'T mix datatypes as described above, in Pandas 3 and above, so this approach will need to be - # updated to use chained calls to format(). - epoch_vals.to_string( - buf=out_buf, - index=False, - header=False, - formatters=formatters, + # Notes: + # - DataFrame.to_string() doesn't call formatters on NaN values (nor presumably does DataFrame.Styler.format()) + # - Mixing datatypes in a column is disallowed in Pandas >=3. + # - We consider +/-Infinity, and NaN to be nodata values, along with some column specific nodata values. + # Given all this, the following approach is taken: + # - Custom formatters are updated to render +/-Infninty values as the SP3 nodata value for that column. + # - Chained calls to DataFrame.style.format() are used to set the column's formatter, and string nodata value. + + epoch_vals_styler = ( + epoch_vals.style.hide(axis="columns", names=False) # Get rid of column labels + .hide(axis="index", names=False) # Get rid of index labels (i.e. j2000 times) + # Format columns, specify how NaN values should be represented (NaNs are NOT passed to formatters) + .format(subset=["PRN"], formatter=prn_formatter) + .format(subset=["X"], na_rep=SP3_POS_NODATA_STRING, formatter=pos_formatter) + .format(subset=["Y"], na_rep=SP3_POS_NODATA_STRING, formatter=pos_formatter) + .format(subset=["Z"], na_rep=SP3_POS_NODATA_STRING, formatter=pos_formatter) + .format(subset=["CLK"], na_rep=SP3_CLOCK_NODATA_STRING, formatter=clk_formatter) + # STD columns don't need NODATA handling: an internal integer nodata value is used by the formatter + .format(subset=["STD_X"], formatter=pos_std_formatter) + .format(subset=["STD_Y"], formatter=pos_std_formatter) + .format(subset=["STD_Z"], formatter=pos_std_formatter) + .format(subset=["STD_CLK"], formatter=clk_std_formatter) + # NOTE: Passing formatters to the formatter argument throws typing errors, but is valid, as + # per https://pandas.pydata.org/docs/reference/api/pandas.io.formats.style.Styler.format.html + # But we won't use it anyway, because the more verbose option above allows us to set na_rep as well, + # and lay it all out in one place. + # .format(formatter=formatters) ) - out_buf.write("\n") - if buf is None: + + # NOTE: styler.to_string()'s delimiter="": no space between columns! + # TODO to use this though, we need to update the formatters to make the columns appropirately wide. + # This has been switched for the 13.6f formatters (to 14.6f), but I'm not positive whether other updates + # will be needed - e.g. to flags columns, STD columns, etc. + out_buf.write(epoch_vals_styler.to_string(delimiter=" ")) # styler.to_string() adds a trailing newline + + if buf is None: # No buffer to write to, was passed in. Return a string. return out_buf.getvalue() return None From 47d1de332bc1a0e5e20075c6b53cd9e37d96a4b6 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:23:26 +0000 Subject: [PATCH 08/17] NPI-3388 update input buffer parameter name to gen_sp3_content() for clarity --- gnssanalysis/gn_io/sp3.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 7d7922b..d842c15 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -926,7 +926,7 @@ def gen_sp3_header(sp3_df: _pd.DataFrame) -> str: @overload def gen_sp3_content( sp3_df: _pd.DataFrame, - buf: None = None, + in_buf: None = None, sort_outputs: bool = ..., continue_on_unhandled_velocity_data: bool = ..., ) -> str: ... @@ -936,7 +936,7 @@ def gen_sp3_content( @overload def gen_sp3_content( sp3_df: _pd.DataFrame, - buf: _io.StringIO, + in_buf: _io.StringIO, sort_outputs: bool = ..., continue_on_unhandled_velocity_data: bool = ..., ) -> None: ... @@ -944,7 +944,7 @@ def gen_sp3_content( def gen_sp3_content( sp3_df: _pd.DataFrame, - buf: Union[None, _io.StringIO] = None, + in_buf: Union[None, _io.StringIO] = None, sort_outputs: bool = False, continue_on_unhandled_velocity_data: bool = True, ) -> Union[str, None]: @@ -955,14 +955,14 @@ def gen_sp3_content( Args: :param _pd.DataFrame sp3_df: The DataFrame containing the SP3 data. :param bool sort_outputs: Whether to sort the outputs. Defaults to False. - :param Union[_io.StringIO, None] buf: The buffer to write the SP3 content to. Defaults to None. + :param Union[_io.StringIO, None] in_buf: The buffer to write the SP3 content to. Defaults to None. :param bool continue_on_unhandled_velocity_data: If (currently unsupported) velocity data exists in the DataFrame, log a warning and skip velocity data, but write out position data. Set to false to raise an exception instead. - :return str or None: Return SP3 content as a string if `buf` is None, otherwise write SP3 content to `buf`, + :return str or None: Return SP3 content as a string if `in_buf` is None, otherwise write SP3 content to `in_buf`, and return None. """ - out_buf = buf if buf is not None else _io.StringIO() + out_buf = in_buf if in_buf is not None else _io.StringIO() if sort_outputs: # If we need to do particular sorting/ordering of satellites and constellations we can use some of the # options that .sort_index() provides @@ -1153,7 +1153,7 @@ def clk_std_formatter(x): # will be needed - e.g. to flags columns, STD columns, etc. out_buf.write(epoch_vals_styler.to_string(delimiter=" ")) # styler.to_string() adds a trailing newline - if buf is None: # No buffer to write to, was passed in. Return a string. + if in_buf is None: # No buffer to write to, was passed in. Return a string. return out_buf.getvalue() return None From 7c25eaee75370237da20c7ee9b96e4156a510c65 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:28:28 +0000 Subject: [PATCH 09/17] NPI-3388 enable full width (ie moon orbit capable?!) SP3 value rendering, thanks to updated formatting approach. Add some helpful comments and placeholders from an old stash. --- gnssanalysis/gn_io/sp3.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index d842c15..92225da 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -1054,8 +1054,9 @@ def clk_log(x): def prn_formatter(x): return f"P{x}" - # TODO NOTE - # This is technically incorrect but convenient. The SP3 standard doesn't include a space between the X, Y, Z, and + # NOTE This has been updated to full 14.6f format. The following description has been left for background and + # reference while testing the change. + # (Previously!) technically incorrect but convenient. SP3 standard doesn't include a space between the X, Y, Z, and # CLK values but pandas .to_string() put a space between every field. In practice most entries have spaces between # the X, Y, Z, and CLK values because the values are small enough that a 14.6f format specification gets padded # with spaces. So for now we will use a 13.6f specification and a space between entries, which will be equivalent @@ -1066,14 +1067,14 @@ def pos_formatter(x): # NaN values handled in df.style.format(), using na_rep; this formatter should not be invoked for them. if x in [_np.inf, _np.NINF]: # Treat infinite values as nodata return SP3_POS_NODATA_STRING - return format(x, "13.6f") # Numeric value, format as usual + return format(x, "14.6f") # Numeric value, format as usual def clk_formatter(x): # NaN is handled by passing a na_rep value to df.style.format() before writing out with to_string(). # So we just handle Infinity and normal numeric formatting. if x in [_np.inf, _np.NINF]: return SP3_CLOCK_NODATA_STRING - return format(x, "13.6f") # Not infinite or NaN: proceed with normal formatting + return format(x, "14.6f") # Not infinite or NaN: proceed with normal formatting # NOTE: the following formatters are fine, as the nodata value is actually a *numeric value*, # so DataFrame.to_string() will invoke them for those values. @@ -1107,6 +1108,7 @@ def clk_std_formatter(x): } ) + # TODO maybe we want to set axis=0 in this groupby() (based on a previous experiment, not sure) for epoch, epoch_vals in out_df.reset_index("PRN").groupby(level="J2000"): # Format and write out the epoch in the SP3 format epoch_datetime = _gn_datetime.j2000_to_pydatetime(epoch) @@ -1151,7 +1153,7 @@ def clk_std_formatter(x): # TODO to use this though, we need to update the formatters to make the columns appropirately wide. # This has been switched for the 13.6f formatters (to 14.6f), but I'm not positive whether other updates # will be needed - e.g. to flags columns, STD columns, etc. - out_buf.write(epoch_vals_styler.to_string(delimiter=" ")) # styler.to_string() adds a trailing newline + out_buf.write(epoch_vals_styler.to_string(delimiter="")) # styler.to_string() adds a trailing newline if in_buf is None: # No buffer to write to, was passed in. Return a string. return out_buf.getvalue() @@ -1405,7 +1407,9 @@ def diff_sp3_rac( nd_rac = diff_eci.values[:, _np.newaxis] @ _gn_transform.eci2rac_rot(sp3_baseline_eci_vel) df_rac = _pd.DataFrame( nd_rac.reshape(-1, 3), - index=sp3_baseline.index, + index=sp3_baseline.index, # Note that if the test and baseline have different SVs, this index will refer to + # data which is missing in the 'test' dataframe (and so is likely to be missing in + # the diff too). columns=[["EST_RAC"] * 3, ["Radial", "Along-track", "Cross-track"]], ) From 52bea613932568ae33f821bb2ccc8283e81fa6ff Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:51:02 +0000 Subject: [PATCH 10/17] NPI-3388 add jinja2 to requirements. Needed for DataFrame.style --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 7f412e7..57665c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ boto3 click hatanaka +jinja2 matplotlib numpy pandas From 7566f2a97adc78d3673378f051462ed6d3e258d0 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:58:40 +0000 Subject: [PATCH 11/17] NPI-3388 try unpinning remaining requirement after weird styles related crash on pipeline --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 57665c7..0640ae9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ jinja2 matplotlib numpy pandas -plotext==4.2 +plotext plotly pyfakefs pymongo From 56bd0a8b1dada42f8c51de3536f593cb20682aa9 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 11:00:52 +0000 Subject: [PATCH 12/17] NPI-3388 revert unpinning of requirement after that appeared to make no difference to crash --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0640ae9..57665c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ jinja2 matplotlib numpy pandas -plotext +plotext==4.2 plotly pyfakefs pymongo From bcb1f9cedcec3d7f07b17e84eb2289b953023fef Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 11:36:20 +0000 Subject: [PATCH 13/17] NPI-3388 switch out mock file for bytes object based input reading, in test crashing during sp3 write --- tests/test_sp3.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_sp3.py b/tests/test_sp3.py index dcb2739..029316f 100644 --- a/tests/test_sp3.py +++ b/tests/test_sp3.py @@ -142,13 +142,13 @@ def test_read_sp3_validation_sv_count_mismatch_header_vs_content(self, mock_file # TODO add tests for correctly reading the actual content of the SP3 in addition to the header. # TODO add tests for correctly generating sp3 output content with gen_sp3_content() and gen_sp3_header() - @patch("builtins.open", new_callable=mock_open, read_data=input_data) - def test_gen_sp3_content_velocity_exception_handling(self, mock_file): + def test_gen_sp3_content_velocity_exception_handling(self): """ gen_sp3_content() velocity output should raise exception (currently unsupported).\ If asked to continue with warning, it should remove velocity columns before output. """ - sp3_df = sp3.read_sp3("mock_path", pOnly=False) + input_data_fresh = input_data + b"" # Lazy attempt at not passing a reference + sp3_df = sp3.read_sp3(bytes(input_data_fresh), pOnly=False) with self.assertRaises(NotImplementedError): generated_sp3_content = sp3.gen_sp3_content(sp3_df, continue_on_unhandled_velocity_data=False) From 9e07df34e7ba55fb1aeba415e3e7b75bb5f40589 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 11:40:42 +0000 Subject: [PATCH 14/17] NPI-3388 remove use of NINF which has been deprecated. Not sure how this was missed before --- gnssanalysis/gn_io/sp3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 92225da..4ea88d0 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -1065,14 +1065,14 @@ def prn_formatter(x): def pos_formatter(x): # NaN values handled in df.style.format(), using na_rep; this formatter should not be invoked for them. - if x in [_np.inf, _np.NINF]: # Treat infinite values as nodata + if x in [_np.inf, -_np.inf]: # Treat infinite values as nodata return SP3_POS_NODATA_STRING return format(x, "14.6f") # Numeric value, format as usual def clk_formatter(x): # NaN is handled by passing a na_rep value to df.style.format() before writing out with to_string(). # So we just handle Infinity and normal numeric formatting. - if x in [_np.inf, _np.NINF]: + if x in [_np.inf, -_np.inf]: return SP3_CLOCK_NODATA_STRING return format(x, "14.6f") # Not infinite or NaN: proceed with normal formatting From 572ff8e9d296ee9d5f3ab232a585b5692b31eeea Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 12:12:23 +0000 Subject: [PATCH 15/17] NPI-3388 update POS and CLOCK nodata strings to match new full width formatting. This is why the values had initially been misaligned --- gnssanalysis/gn_io/sp3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 4ea88d0..1d3c9e4 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -150,9 +150,9 @@ # not 14 (the official width of the column i.e. F14.6), again because Pandas insists on adding a further space. # See comment in gen_sp3_content() line ~645 for further discussion. # Another related 'hack' can be found at line ~602, handling the FLAGS columns. -SP3_CLOCK_NODATA_STRING = "999999.999999" +SP3_CLOCK_NODATA_STRING = " 999999.999999" # This is currently formatted for full width (ie 14 chars) SP3_CLOCK_NODATA_NUMERIC = 999999 -SP3_POS_NODATA_STRING = " 0.000000" +SP3_POS_NODATA_STRING = " 0.000000" # This is currently formatted for full width (ie 14 chars) SP3_POS_NODATA_NUMERIC = 0 # The numeric values below are only relevant within this codebase, and signify nodata / NaN. # They are created by the functions pos_log() and clk_log() From e31466deac9a25cd133299552eb27e4ddff3d772 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 13 Jan 2025 12:13:30 +0000 Subject: [PATCH 16/17] NPI-3388 add clearer exception for failing to extract a V from a dataframe index while reading an sp3 file with velocities. I have never seen this in practice, but demonstrated it in testing. --- gnssanalysis/gn_io/sp3.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 1d3c9e4..12e07e9 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -575,6 +575,14 @@ def read_sp3( else: # DF contains interlaced Position & Velocity measurements for each sat. Split the data based on this, and # recombine, turning Pos and Vel into separate columns. + + pv_flag_values = sp3_df.index.get_level_values("PV_FLAG").unique().values + if "V" not in pv_flag_values: + raise ValueError( + "SP3 header PV flag was not P, but no V (velocity) index appears to exist! " + f"Unique PV flag values seen: {pv_flag_values}" + ) + position_df = sp3_df.xs("P", level="PV_FLAG") velocity_df = sp3_df.xs("V", level="PV_FLAG") From 2c184c1dd75093a7d8bf55a8a1618db74519c39a Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Tue, 14 Jan 2025 03:08:05 +0000 Subject: [PATCH 17/17] NPI-3388 add notes on further unit tests we should have. Add clarification about why a test including gen_sp3_content() does not use a mock file for input. --- tests/test_sp3.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_sp3.py b/tests/test_sp3.py index 029316f..0daa581 100644 --- a/tests/test_sp3.py +++ b/tests/test_sp3.py @@ -141,12 +141,22 @@ def test_read_sp3_validation_sv_count_mismatch_header_vs_content(self, mock_file # TODO Add test(s) for correctly reading header fundamentals (ACC, ORB_TYPE, etc.) # TODO add tests for correctly reading the actual content of the SP3 in addition to the header. # TODO add tests for correctly generating sp3 output content with gen_sp3_content() and gen_sp3_header() + # These tests should include: + # - Correct alignment of POS, CLK, STDPOS STDCLK, (not velocity yet), FLAGS + # - Correct alignment of the above when nodata and infinite values are present + # - Inclusion of HLM orbit_type in header, after applying Helmert trainsformation (if not covered elsewhere? + # Probably should be covered elsewhere) + # - Not including column names (can just test that output matches expected format) + # - Not including any NaN value *anywhere* def test_gen_sp3_content_velocity_exception_handling(self): """ gen_sp3_content() velocity output should raise exception (currently unsupported).\ If asked to continue with warning, it should remove velocity columns before output. """ + # Input data passed as bytes here, rather than using a mock file, because the mock file setup seems to break + # part of Pandas Styler, which is used by gen_sp3_content(). Specifically, some part of Styler's attempt to + # load style config files leads to a crash, despite some style config files appearing to read successfully) input_data_fresh = input_data + b"" # Lazy attempt at not passing a reference sp3_df = sp3.read_sp3(bytes(input_data_fresh), pOnly=False) with self.assertRaises(NotImplementedError):