From 62342696dc964465f3e7dfbddcfb6c6c3877095d Mon Sep 17 00:00:00 2001
From: David Lougheed <david.lougheed@gmail.com>
Date: Tue, 18 Jun 2024 11:34:07 -0400
Subject: [PATCH] refact(call): use time.perf_counter for perf tracking

---
 strkit/call/call_locus.py         | 28 ++++++++++++++--------------
 strkit/call/call_sample.py        | 11 +++++------
 strkit/call/output/json_report.py |  5 ++---
 3 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/strkit/call/call_locus.py b/strkit/call/call_locus.py
index 5c1f7bc..80b593d 100644
--- a/strkit/call/call_locus.py
+++ b/strkit/call/call_locus.py
@@ -6,10 +6,10 @@
 import numpy as np
 import operator
 import threading
+import time
 
 from collections import Counter
 from collections.abc import Sequence
-from datetime import datetime
 from pysam import FastaFile
 from sklearn.cluster import AgglomerativeClustering
 from sklearn.mixture import GaussianMixture
@@ -787,7 +787,7 @@ def call_locus(
     read_file_has_chr: bool = True,
     ref_file_has_chr: bool = True,
 ) -> Optional[LocusResult]:
-    call_timer = datetime.now()
+    call_timer = time.perf_counter()
 
     # params de-structuring ------------
     consensus = params.consensus
@@ -836,7 +836,7 @@ def call_locus(
 
     # Get reference sequence and copy number ---------------------------------------------------------------------------
 
-    ref_timer = datetime.now()
+    ref_timer = time.perf_counter()
 
     ref_seq_offset_l = left_coord - left_flank_coord
     ref_seq_offset_r = right_coord - left_flank_coord
@@ -922,7 +922,7 @@ def call_locus(
         locus_result["start_adj"] = left_coord_adj
         locus_result["end_adj"] = right_coord_adj
 
-    ref_time = (datetime.now() - ref_timer).total_seconds()
+    ref_time = time.perf_counter() - ref_timer
 
     # Find the initial set of overlapping aligned segments with associated read lengths + whether we have in-locus
     # chimera reads (i.e., reads which aligned twice with different soft-clipping, likely due to a large indel.) -------
@@ -1110,7 +1110,7 @@ def call_locus(
             read_kmers.clear()
             read_kmers.update(tr_read_seq_wc[i:i+motif_size] for i in range(0, tr_len - motif_size + 1))
 
-        rc_timer = datetime.now()
+        rc_timer = time.perf_counter()
         # Set initial integer copy number guess based on aligned TR size, plus the previous read offset (how much the
         # last guess was wrong by, as a delta.)
         read_sc = round(tr_len / motif_size)
@@ -1126,7 +1126,7 @@ def call_locus(
         # Update using +=, since if we use an offset that was correct, the new returned offset will be 0, so we really
         # want to keep the old offset, not set it to 0.
         read_offset_frac_from_starting_guess += new_offset_from_starting_count / max(read_cn, 1)
-        rc_time = (datetime.now() - rc_timer).total_seconds()
+        rc_time = time.perf_counter() - rc_timer
 
         if n_read_cn_iters >= max_rc_iters:
             logger_.debug(f"{locus_log_str} - locus repeat counting exceeded maximum # iterations ({n_read_cn_iters})")
@@ -1153,7 +1153,7 @@ def call_locus(
                 **locus_result,
                 "peaks": None,
                 "read_peaks_called": False,
-                "time": (datetime.now() - call_timer).total_seconds(),
+                "time": time.perf_counter() - call_timer,
             }
 
         if read_adj_score < min_read_align_score:
@@ -1172,7 +1172,7 @@ def call_locus(
                         **locus_result,
                         "peaks": None,
                         "read_peaks_called": False,
-                        "time": (datetime.now() - call_timer).total_seconds(),
+                        "time": time.perf_counter() - call_timer,
                     }
 
             continue
@@ -1270,7 +1270,7 @@ def call_locus(
             **locus_result,
             "peaks": None,
             "read_peaks_called": False,
-            "time": (datetime.now() - call_timer).total_seconds(),
+            "time": time.perf_counter() - call_timer,
         }
 
     # Now, we know we have enough reads to maybe make a call -----------------------------------------------------------
@@ -1291,7 +1291,7 @@ def call_locus(
             have_rare_realigns = True
             break
 
-    allele_start_time = datetime.now()
+    allele_start_time = time.perf_counter()
 
     if params.use_hp:
         top_ps = phase_sets.most_common(1)
@@ -1379,7 +1379,7 @@ def call_locus(
     if single_or_dist_assign:  # Didn't use SNVs, so call the 'old-fashioned' way - using only copy number
         call_data = call_alleles_with_gmm(params, n_alleles, read_dict, assign_method, rng, logger_, locus_log_str)
 
-    allele_time = (datetime.now() - allele_start_time).total_seconds()
+    allele_time = time.perf_counter() - allele_start_time
 
     logger_.debug(f"{locus_log_str} - finished assigning alleles using {assign_method} method: took {allele_time:.4f}s")
 
@@ -1399,7 +1399,7 @@ def call_locus(
 
     # Assign reads to peaks and compute peak k-mers (and optionally consensus sequences) -------------------------------
 
-    assign_start_time = datetime.now()
+    assign_start_time = time.perf_counter()
 
     # We cannot call read-level cluster labels with >2 peaks using distance alone;
     # don't know how re-sampling has occurred.
@@ -1496,11 +1496,11 @@ def call_locus(
         **({"seqs": call_seqs} if consensus else {}),
     } if call_data else None
 
-    assign_time = (datetime.now() - assign_start_time).total_seconds()
+    assign_time = time.perf_counter() - assign_start_time
 
     # Calculate call time ----------------------------------------------------------------------------------------------
 
-    call_time = (datetime.now() - call_timer).total_seconds()
+    call_time = time.perf_counter() - call_timer
 
     if call_time > CALL_WARN_TIME:
         logger_.warning(
diff --git a/strkit/call/call_sample.py b/strkit/call/call_sample.py
index 66ddc9a..8f11095 100644
--- a/strkit/call/call_sample.py
+++ b/strkit/call/call_sample.py
@@ -14,7 +14,6 @@
 import time
 import traceback
 
-from datetime import datetime
 from operator import itemgetter
 from multiprocessing.synchronize import Event as EventClass  # For type hinting
 from typing import Iterable, Literal, Optional
@@ -196,7 +195,7 @@ def locus_worker(
 
 def progress_worker(
     sample_id: Optional[str],
-    start_time: datetime,
+    start_time: float,
     log_level: int,
     locus_queue: mp.Queue,
     locus_counter: mmg.ValueProxy,
@@ -215,7 +214,7 @@ def progress_worker(
     def _log():
         try:
             processed_loci = int(locus_counter.get())
-            n_seconds = (datetime.now() - start_time).total_seconds()
+            n_seconds = time.perf_counter() - start_time
             loci_per_second = processed_loci / n_seconds
             est_time_remaining = ((num_loci - processed_loci) / loci_per_second) if loci_per_second else float("inf")
             lg.info(
@@ -272,7 +271,7 @@ def call_sample(
     logger = get_main_logger()
 
     # Start the call timer
-    start_time = datetime.now()
+    start_time = time.perf_counter()
 
     logger.info(
         f"Starting STR genotyping; sample={params.sample_id}, hq={params.hq}, targeted={params.targeted}, "
@@ -474,9 +473,9 @@ def call_sample(
         finish_event.set()
         progress_job.join()
 
-    time_taken = datetime.now() - start_time
+    time_taken = time.perf_counter() - start_time
 
-    logger.info(f"Finished STR genotyping in {time_taken.total_seconds():.1f}s")
+    logger.info(f"Finished STR genotyping in {time_taken:.1f}s")
 
     if json_path:
         output_json_report_footer(time_taken, json_path, indent_json)
diff --git a/strkit/call/output/json_report.py b/strkit/call/output/json_report.py
index cedb00b..db148c2 100644
--- a/strkit/call/output/json_report.py
+++ b/strkit/call/output/json_report.py
@@ -1,5 +1,4 @@
 import sys
-from datetime import timedelta
 from typing import Callable, Literal
 
 from strkit import __version__
@@ -72,8 +71,8 @@ def output_json_report_results(results: tuple[LocusResult, ...], is_last: bool,
     _write_bytes(results_bytes, json_path, "ab")
 
 
-def output_json_report_footer(time_taken: timedelta, json_path: str, indent_json: bool):
-    runtime_bytes = dumps(time_taken.total_seconds())
+def output_json_report_footer(time_taken: float, json_path: str, indent_json: bool):
+    runtime_bytes = dumps(time_taken)
     if indent_json:
         footer_bytes = b'\n  ],\n  "runtime": ' + runtime_bytes + b'\n}\n'
     else: