Skip to content

Commit

Permalink
fix(call): impose max iters on ref cn getter + fix ref cn logic
Browse files Browse the repository at this point in the history
Was accidentally passing db_seq instead of tr_seq to get_repeat_count
and mis-calculating start_count, resulting in too many iterations and possibly
bad ref copy numbers.
  • Loading branch information
davidlougheed committed Jun 10, 2024
1 parent 5fe8195 commit 491ef33
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
8 changes: 6 additions & 2 deletions strkit/call/call_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,18 +815,22 @@ def call_locus(

# Get reference repeat count by our method, so we can calculate offsets from reference
ref_cn: Union[int, float]
(ref_cn, _), l_offset, r_offset = get_ref_repeat_count(
ref_max_iters: int = 100
(ref_cn, _), l_offset, r_offset, r_n_is = get_ref_repeat_count(
round(len(ref_seq) / motif_size), # Initial estimate of copy number based on coordinates + motif size
ref_seq,
ref_left_flank_seq,
ref_right_flank_seq,
motif,
ref_size=right_coord-left_coord, # reference size, in terms of coordinates (not TRF-recorded size)
max_iters=ref_max_iters,
respect_coords=respect_ref,
)
call_dict_base["ref_cn"] = ref_cn # tag call dictionary with ref_cn

logger_.debug(f"{locus_log_str} - got ref. copy number: {ref_cn} ({l_offset=}; {r_offset=})")
logger_.debug(
f"{locus_log_str} - got ref. copy number: {ref_cn} ({l_offset=}; {r_offset=}; iters={r_n_is}; "
f"slow_ref_count={any(x > 50 for x in r_n_is)})")

# If our reference repeat count getter has altered the TR boundaries a bit (which is done to allow for
# more spaces in which an indel could end up), adjust our coordinates to match.
Expand Down
18 changes: 12 additions & 6 deletions strkit/call/repeats.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,18 @@ def get_ref_repeat_count(
flank_right_seq: str,
motif: str,
ref_size: int,
max_iters: int,
respect_coords: bool = False,
local_search_range: int = DEFAULT_LOCAL_SEARCH_RANGE, # TODO: Parametrize for user
) -> tuple[tuple[Union[int, float], int], int, int]:
) -> tuple[tuple[Union[int, float], int], int, int, tuple[int, int]]:
l_offset: int = 0
r_offset: int = 0

db_seq: str = f"{flank_left_seq}{tr_seq}{flank_right_seq}"
motif_size = len(motif)

n_offset_scores: int = 0

if not respect_coords: # Extend out coordinates from initial definition
to_explore: list[tuple[int, Literal[-1, 0, 1]]] = [
(start_count - 1, -1), (start_count + 1, 1), (start_count, 0)]
Expand Down Expand Up @@ -165,6 +168,8 @@ def get_ref_repeat_count(
fwd_sizes_scores_adj[i] = fwd_rs = res[0]
rev_sizes_scores_adj[i] = rev_rs = res[1]

n_offset_scores += 1

fwd_scores.append((i, fwd_rs, i))
rev_scores.append((i, rev_rs, i))

Expand Down Expand Up @@ -196,12 +201,13 @@ def get_ref_repeat_count(

# ------------------------------------------------------------------------------------------------------------------

final_res, _ = get_repeat_count(
round(start_count + (max(0, l_offset) + max(0, r_offset)) / motif_size), # always start with int here
db_seq,
final_res, n_iters_final_count = get_repeat_count(
# always start with int here:
round(((start_count * motif_size) + (max(0, l_offset) + max(0, r_offset))) / motif_size),
tr_seq,
flank_left_seq,
flank_right_seq,
motif,
max_iters=100)
max_iters=max_iters)

return final_res, l_offset, r_offset
return final_res, l_offset, r_offset, (n_offset_scores, n_iters_final_count)

0 comments on commit 491ef33

Please sign in to comment.