From 44f8731e31236a945bcfea75f2f979526fe4a2a0 Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Wed, 3 Apr 2024 15:02:53 +0200
Subject: [PATCH 01/27] tcrdist draft version implemented

---
 src/scirpy/ir_dist/__init__.py |   5 +-
 src/scirpy/ir_dist/metrics.py  | 164 +++++++++++++++++++++++++++++++++
 2 files changed, 168 insertions(+), 1 deletion(-)
diff --git a/src/scirpy/ir_dist/__init__.py b/src/scirpy/ir_dist/__init__.py
index 6b13a760b..0aad959c5 100644
--- a/src/scirpy/ir_dist/__init__.py
+++ b/src/scirpy/ir_dist/__init__.py
@@ -94,6 +94,8 @@ def _get_distance_calculator(metric: MetricType, cutoff: Union[int, None], *, n_
         dist_calc = metrics.LevenshteinDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
     elif metric == "hamming":
         dist_calc = metrics.HammingDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
+    elif metric == "tcrdist":
+        dist_calc = metrics.TCRdistDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
     else:
         raise ValueError("Invalid distance metric.")
 
@@ -117,6 +119,7 @@ def _ir_dist(
     airr_mod_ref: str = "airr",
     airr_key_ref: str = "airr",
     chain_idx_key_ref: str = "chain_indices",
+    **kwargs,
 ) -> Union[dict, None]:
     """\
     Computes a sequence-distance metric between all unique :term:`VJ <Chain locus>`
@@ -220,7 +223,7 @@ def _get_unique_seqs(tmp_adata, chain_type):
                 result[chain_type][tmp_key] = unique_seqs
 
     # compute distance matrices
-    dist_calc = _get_distance_calculator(metric, cutoff, n_jobs=n_jobs)
+    dist_calc = _get_distance_calculator(metric, cutoff, n_jobs=n_jobs, **kwargs)
     for chain_type in ["VJ", "VDJ"]:
         logging.info(f"Computing sequence x sequence distance matrix for {chain_type} sequences.")  # type: ignore
         result[chain_type]["distances"] = dist_calc.calc_dist_mat(
diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index 6f1a46d19..4adbb67e6 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -14,6 +14,9 @@
 
 from scirpy.util import _doc_params, deprecated, tqdm
 
+import numba as nb
+import multiprocessing
+
 _doc_params_parallel_distance_calculator = """\
 n_jobs
     Number of jobs to use for the pairwise distance calculation.
@@ -369,6 +372,167 @@ def _compute_block(self, seqs1, seqs2, origin):
 
         return result
 
+class TCRdistDistanceCalculator:
+
+    parasail_aa_alphabet = 'ARNDCQEGHILKMFPSTWYVBZX'
+    parasail_aa_alphabet_with_unknown = 'ARNDCQEGHILKMFPSTWYVBZX*'
+    tcr_dict_distance_matrix = {('A', 'A'): 0,  ('A', 'C'): 4,  ('A', 'D'): 4,  ('A', 'E'): 4,  ('A', 'F'): 4,  ('A', 'G'): 4,  ('A', 'H'): 4,  ('A', 'I'): 4,  ('A', 'K'): 4,  ('A', 'L'): 4,  ('A', 'M'): 4,  ('A', 'N'): 4,  ('A', 'P'): 4,  ('A', 'Q'): 4,  ('A', 'R'): 4,  ('A', 'S'): 3,  ('A', 'T'): 4,  ('A', 'V'): 4,  ('A', 'W'): 4,  ('A', 'Y'): 4,  ('C', 'A'): 4,  ('C', 'C'): 0,  ('C', 'D'): 4,  ('C', 'E'): 4,  ('C', 'F'): 4,  ('C', 'G'): 4,  ('C', 'H'): 4,  ('C', 'I'): 4,  ('C', 'K'): 4,  ('C', 'L'): 4,  ('C', 'M'): 4,  ('C', 'N'): 4,  ('C', 'P'): 4,  ('C', 'Q'): 4,  ('C', 'R'): 4,  ('C', 'S'): 4,  ('C', 'T'): 4,  ('C', 'V'): 4,  ('C', 'W'): 4,  ('C', 'Y'): 4,  ('D', 'A'): 4,  ('D', 'C'): 4,  ('D', 'D'): 0,  ('D', 'E'): 2,  ('D', 'F'): 4,  ('D', 'G'): 4,  ('D', 'H'): 4,  ('D', 'I'): 4,  ('D', 'K'): 4,  ('D', 'L'): 4,  ('D', 'M'): 4,  ('D', 'N'): 3,  ('D', 'P'): 4,  ('D', 'Q'): 4,  ('D', 'R'): 4,  ('D', 'S'): 4,  ('D', 'T'): 4,  ('D', 'V'): 4,  ('D', 'W'): 4,  ('D', 'Y'): 4,  ('E', 'A'): 4,  ('E', 'C'): 4,  ('E', 'D'): 2,  ('E', 'E'): 0,  ('E', 'F'): 4,  ('E', 'G'): 4,  ('E', 'H'): 4,  ('E', 'I'): 4,  ('E', 'K'): 3,  ('E', 'L'): 4,  ('E', 'M'): 4,  ('E', 'N'): 4,  ('E', 'P'): 4,  ('E', 'Q'): 2,  ('E', 'R'): 4,  ('E', 'S'): 4,  ('E', 'T'): 4,  ('E', 'V'): 4,  ('E', 'W'): 4,  ('E', 'Y'): 4,  ('F', 'A'): 4,  ('F', 'C'): 4,  ('F', 'D'): 4,  ('F', 'E'): 4,  ('F', 'F'): 0,  ('F', 'G'): 4,  ('F', 'H'): 4,  ('F', 'I'): 4,  ('F', 'K'): 4,  ('F', 'L'): 4,  ('F', 'M'): 4,  ('F', 'N'): 4,  ('F', 'P'): 4,  ('F', 'Q'): 4,  ('F', 'R'): 4,  ('F', 'S'): 4,  ('F', 'T'): 4,  ('F', 'V'): 4,  ('F', 'W'): 3,  ('F', 'Y'): 1,  ('G', 'A'): 4,  ('G', 'C'): 4,  ('G', 'D'): 4,  ('G', 'E'): 4,  ('G', 'F'): 4,  ('G', 'G'): 0,  ('G', 'H'): 4,  ('G', 'I'): 4,  ('G', 'K'): 4,  ('G', 'L'): 4,  ('G', 'M'): 4,  ('G', 'N'): 4,  ('G', 'P'): 4,  ('G', 'Q'): 4,  ('G', 'R'): 4,  ('G', 'S'): 4,  ('G', 'T'): 4,  ('G', 'V'): 4,  ('G', 'W'): 4,  ('G', 'Y'): 4,  ('H', 'A'): 4,  ('H', 'C'): 4,  ('H', 'D'): 4,  ('H', 'E'): 4,  ('H', 'F'): 4,  ('H', 'G'): 4,  ('H', 'H'): 0,  ('H', 'I'): 4,  ('H', 'K'): 4,  ('H', 'L'): 4,  ('H', 'M'): 4,  ('H', 'N'): 3,  ('H', 'P'): 4,  ('H', 'Q'): 4,  ('H', 'R'): 4,  ('H', 'S'): 4,  ('H', 'T'): 4,  ('H', 'V'): 4,  ('H', 'W'): 4,  ('H', 'Y'): 2,  ('I', 'A'): 4,  ('I', 'C'): 4,  ('I', 'D'): 4,  ('I', 'E'): 4,  ('I', 'F'): 4,  ('I', 'G'): 4,  ('I', 'H'): 4,  ('I', 'I'): 0,  ('I', 'K'): 4,  ('I', 'L'): 2,  ('I', 'M'): 3,  ('I', 'N'): 4,  ('I', 'P'): 4,  ('I', 'Q'): 4,  ('I', 'R'): 4,  ('I', 'S'): 4,  ('I', 'T'): 4,  ('I', 'V'): 1,  ('I', 'W'): 4,  ('I', 'Y'): 4,  ('K', 'A'): 4,  ('K', 'C'): 4,  ('K', 'D'): 4,  ('K', 'E'): 3,  ('K', 'F'): 4,  ('K', 'G'): 4,  ('K', 'H'): 4,  ('K', 'I'): 4,  ('K', 'K'): 0,  ('K', 'L'): 4,  ('K', 'M'): 4,  ('K', 'N'): 4,  ('K', 'P'): 4,  ('K', 'Q'): 3,  ('K', 'R'): 2,  ('K', 'S'): 4,  ('K', 'T'): 4,  ('K', 'V'): 4,  ('K', 'W'): 4,  ('K', 'Y'): 4,  ('L', 'A'): 4,  ('L', 'C'): 4,  ('L', 'D'): 4,  ('L', 'E'): 4,  ('L', 'F'): 4,  ('L', 'G'): 4,  ('L', 'H'): 4,  ('L', 'I'): 2,  ('L', 'K'): 4,  ('L', 'L'): 0,  ('L', 'M'): 2,  ('L', 'N'): 4,  ('L', 'P'): 4,  ('L', 'Q'): 4,  ('L', 'R'): 4,  ('L', 'S'): 4,  ('L', 'T'): 4,  ('L', 'V'): 3,  ('L', 'W'): 4,  ('L', 'Y'): 4,  ('M', 'A'): 4,  ('M', 'C'): 4,  ('M', 'D'): 4,  ('M', 'E'): 4,  ('M', 'F'): 4,  ('M', 'G'): 4,  ('M', 'H'): 4,  ('M', 'I'): 3,  ('M', 'K'): 4,  ('M', 'L'): 2,  ('M', 'M'): 0,  ('M', 'N'): 4,  ('M', 'P'): 4,  ('M', 'Q'): 4,  ('M', 'R'): 4,  ('M', 'S'): 4,  ('M', 'T'): 4,  ('M', 'V'): 3,  ('M', 'W'): 4,  ('M', 'Y'): 4,  ('N', 'A'): 4,  ('N', 'C'): 4,  ('N', 'D'): 3,  ('N', 'E'): 4,  ('N', 'F'): 4,  ('N', 'G'): 4,  ('N', 'H'): 3,  ('N', 'I'): 4,  ('N', 'K'): 4,  ('N', 'L'): 4,  ('N', 'M'): 4,  ('N', 'N'): 0,  ('N', 'P'): 4,  ('N', 'Q'): 4,  ('N', 'R'): 4,  ('N', 'S'): 3,  ('N', 'T'): 4,  ('N', 'V'): 4,  ('N', 'W'): 4,  ('N', 'Y'): 4,  ('P', 'A'): 4,  ('P', 'C'): 4,  ('P', 'D'): 4,  ('P', 'E'): 4,  ('P', 'F'): 4,  ('P', 'G'): 4,  ('P', 'H'): 4,  ('P', 'I'): 4,  ('P', 'K'): 4,  ('P', 'L'): 4,  ('P', 'M'): 4,  ('P', 'N'): 4,  ('P', 'P'): 0,  ('P', 'Q'): 4,  ('P', 'R'): 4,  ('P', 'S'): 4,  ('P', 'T'): 4,  ('P', 'V'): 4,  ('P', 'W'): 4,  ('P', 'Y'): 4,  ('Q', 'A'): 4,  ('Q', 'C'): 4,  ('Q', 'D'): 4,  ('Q', 'E'): 2,  ('Q', 'F'): 4,  ('Q', 'G'): 4,  ('Q', 'H'): 4,  ('Q', 'I'): 4,  ('Q', 'K'): 3,  ('Q', 'L'): 4,  ('Q', 'M'): 4,  ('Q', 'N'): 4,  ('Q', 'P'): 4,  ('Q', 'Q'): 0,  ('Q', 'R'): 3,  ('Q', 'S'): 4,  ('Q', 'T'): 4,  ('Q', 'V'): 4,  ('Q', 'W'): 4,  ('Q', 'Y'): 4,  ('R', 'A'): 4,  ('R', 'C'): 4,  ('R', 'D'): 4,  ('R', 'E'): 4,  ('R', 'F'): 4,  ('R', 'G'): 4,  ('R', 'H'): 4,  ('R', 'I'): 4,  ('R', 'K'): 2,  ('R', 'L'): 4,  ('R', 'M'): 4,  ('R', 'N'): 4,  ('R', 'P'): 4,  ('R', 'Q'): 3,  ('R', 'R'): 0,  ('R', 'S'): 4,  ('R', 'T'): 4,  ('R', 'V'): 4,  ('R', 'W'): 4,  ('R', 'Y'): 4,  ('S', 'A'): 3,  ('S', 'C'): 4,  ('S', 'D'): 4,  ('S', 'E'): 4,  ('S', 'F'): 4,  ('S', 'G'): 4,  ('S', 'H'): 4,  ('S', 'I'): 4,  ('S', 'K'): 4,  ('S', 'L'): 4,  ('S', 'M'): 4,  ('S', 'N'): 3,  ('S', 'P'): 4,  ('S', 'Q'): 4,  ('S', 'R'): 4,  ('S', 'S'): 0,  ('S', 'T'): 3,  ('S', 'V'): 4,  ('S', 'W'): 4,  ('S', 'Y'): 4,  ('T', 'A'): 4,  ('T', 'C'): 4,  ('T', 'D'): 4,  ('T', 'E'): 4,  ('T', 'F'): 4,  ('T', 'G'): 4,  ('T', 'H'): 4,  ('T', 'I'): 4,  ('T', 'K'): 4,  ('T', 'L'): 4,  ('T', 'M'): 4,  ('T', 'N'): 4,  ('T', 'P'): 4,  ('T', 'Q'): 4,  ('T', 'R'): 4,  ('T', 'S'): 3,  ('T', 'T'): 0,  ('T', 'V'): 4,  ('T', 'W'): 4,  ('T', 'Y'): 4,  ('V', 'A'): 4,  ('V', 'C'): 4,  ('V', 'D'): 4,  ('V', 'E'): 4,  ('V', 'F'): 4,  ('V', 'G'): 4,  ('V', 'H'): 4,  ('V', 'I'): 1,  ('V', 'K'): 4,  ('V', 'L'): 3,  ('V', 'M'): 3,  ('V', 'N'): 4,  ('V', 'P'): 4,  ('V', 'Q'): 4,  ('V', 'R'): 4,  ('V', 'S'): 4,  ('V', 'T'): 4,  ('V', 'V'): 0,  ('V', 'W'): 4,  ('V', 'Y'): 4,  ('W', 'A'): 4,  ('W', 'C'): 4,  ('W', 'D'): 4,  ('W', 'E'): 4,  ('W', 'F'): 3,  ('W', 'G'): 4,  ('W', 'H'): 4,  ('W', 'I'): 4,  ('W', 'K'): 4,  ('W', 'L'): 4,  ('W', 'M'): 4,  ('W', 'N'): 4,  ('W', 'P'): 4,  ('W', 'Q'): 4,  ('W', 'R'): 4,  ('W', 'S'): 4,  ('W', 'T'): 4,  ('W', 'V'): 4,  ('W', 'W'): 0,  ('W', 'Y'): 2,  ('Y', 'A'): 4,  ('Y', 'C'): 4,  ('Y', 'D'): 4,  ('Y', 'E'): 4,  ('Y', 'F'): 1,  ('Y', 'G'): 4,  ('Y', 'H'): 2,  ('Y', 'I'): 4,  ('Y', 'K'): 4,  ('Y', 'L'): 4,  ('Y', 'M'): 4,  ('Y', 'N'): 4,  ('Y', 'P'): 4,  ('Y', 'Q'): 4,  ('Y', 'R'): 4,  ('Y', 'S'): 4,  ('Y', 'T'): 4,  ('Y', 'V'): 4,  ('Y', 'W'): 2,  ('Y', 'Y'): 0}
+
+    def __init__(self, dist_weight=3, gap_penalty=4, ntrim=3, ctrim=2, fixed_gappos=True, cutoff: Union[None, int] = None, n_jobs: Union[None, int] = None):
+        if cutoff is None:
+            cutoff = 20
+        if n_jobs is None:
+            n_jobs = 1
+
+        self.dist_weight = dist_weight
+        self.gap_penalty = gap_penalty
+        self.ntrim = ntrim
+        self.ctrim = ctrim
+        self.fixed_gappos = fixed_gappos
+        self.cutoff = cutoff
+        self.n_jobs = n_jobs
+
+    @staticmethod
+    def _make_numba_matrix(distance_matrix, alphabet=parasail_aa_alphabet_with_unknown):
+        dm = np.zeros((len(alphabet), len(alphabet)), dtype=np.int32)
+        for (aa1, aa2), d in distance_matrix.items():
+            dm[alphabet.index(aa1), alphabet.index(aa2)] = d
+            dm[alphabet.index(aa2), alphabet.index(aa1)] = d
+        return dm
+    
+    tcr_nb_distance_matrix = _make_numba_matrix(tcr_dict_distance_matrix)
+
+    @staticmethod
+    def _seqs2mat(seqs, alphabet=parasail_aa_alphabet, max_len=None):
+        if max_len is None:
+            max_len = np.max([len(s) for s in seqs])
+        mat = -1 * np.ones((len(seqs), max_len), dtype=np.int8)
+        L = np.zeros(len(seqs), dtype=np.int8)
+        for si, s in enumerate(seqs):
+            L[si] = len(s)
+            for aai in range(max_len):
+                if aai >= len(s):
+                    break
+                try:
+                    mat[si, aai] = alphabet.index(s[aai])
+                except ValueError:
+                    #Unknown symbols given value for last column/row of matrix
+                    mat[si, aai] = len(alphabet)
+        return mat, L
+    
+    @staticmethod
+    @nb.jit(nopython=True, parallel=False, nogil=True)
+    def _nb_tcrdist_mat(seqs_mat1, seqs_mat2, seqs_L1, seqs_L2, distance_matrix=tcr_nb_distance_matrix, dist_weight=3, gap_penalty=4, ntrim=3, ctrim=2, fixed_gappos=True, cutoff=20):
+
+        assert seqs_mat1.shape[0] == seqs_L1.shape[0]
+        assert seqs_mat2.shape[0] == seqs_L2.shape[0]
+
+        data_rows = nb.typed.List()
+        indices_rows = nb.typed.List()
+        row_element_counts = np.zeros(seqs_mat1.shape[0])
+
+        empty_row = np.zeros(0)
+        for i in range(0,seqs_mat1.shape[0]):
+            data_rows.append(empty_row)
+            indices_rows.append(empty_row)
+
+        for row_index in range(seqs_mat1.shape[0]):
+            data_row = np.zeros(seqs_mat2.shape[0])
+            indices_row = np.zeros(seqs_mat2.shape[0])
+            row_end_index = 0
+
+            for col_index in range(seqs_mat2.shape[0]):
+                q_L = seqs_L1[row_index]
+                s_L = seqs_L2[col_index]
+                if q_L == s_L:
+                    distance = 1
+                    for i in range(ntrim, q_L - ctrim):
+                        distance += distance_matrix[seqs_mat1[row_index, i], seqs_mat2[col_index, i]] * dist_weight
+
+                    if(distance <= cutoff + 1):
+                        data_row[row_end_index] = distance
+                        indices_row[row_end_index] = col_index
+                        row_end_index += 1
+                    continue
+
+                short_len = min(q_L, s_L)
+                len_diff = abs(q_L - s_L)
+                if fixed_gappos:
+                    min_gappos = min(6, 3 + (short_len - 5) // 2)
+                    max_gappos = min_gappos
+                else:
+                    min_gappos = 5
+                    max_gappos = short_len - 1 - 4
+                    while min_gappos > max_gappos:
+                        min_gappos -= 1
+                        max_gappos += 1
+                min_dist = -1
+
+                for gappos in range(min_gappos, max_gappos + 1):
+                    tmp_dist = 0
+
+                    remainder = short_len - gappos
+                    for n_i in range(ntrim, gappos):
+                        tmp_dist += distance_matrix[seqs_mat1[row_index, n_i], seqs_mat2[col_index, n_i]]
+
+                    for c_i in range(ctrim, remainder):
+                        tmp_dist += distance_matrix[seqs_mat1[row_index, q_L - 1 - c_i], seqs_mat2[col_index, s_L - 1 - c_i]]
+
+                    if tmp_dist < min_dist or min_dist == -1:
+                        min_dist = tmp_dist
+
+                    if min_dist == 0:
+                        break
+
+                distance = min_dist * dist_weight + len_diff * gap_penalty + 1
+                if(distance <= cutoff + 1):
+                    data_row[row_end_index] = distance
+                    indices_row[row_end_index] = col_index
+                    row_end_index += 1
+
+            data_rows[row_index] = data_row[0:row_end_index]
+            indices_rows[row_index] = indices_row[0:row_end_index]
+            row_element_counts[row_index] = row_end_index
+
+        return data_rows, indices_rows, row_element_counts
+
+    def _calc_dist_mat_block(self, seqs: Sequence[str], seqs2: Optional[Sequence[str]] = None) -> csr_matrix:
+        
+        seqs_mat1, seqs_L1 = self._seqs2mat(seqs)
+        seqs_mat2, seqs_L2 = self._seqs2mat(seqs2)
+        
+        kwargs = {
+            'dist_weight': self.dist_weight,
+            'gap_penalty': self.gap_penalty,
+            'ntrim': self.ntrim,
+            'ctrim': self.ctrim, 
+            'fixed_gappos': self.fixed_gappos,
+            'cutoff': self.cutoff
+        }
+
+        data_rows, indices_rows, row_element_counts = self._nb_tcrdist_mat(seqs_mat1, seqs_mat2, seqs_L1, seqs_L2, **kwargs)
+
+        indptr = np.zeros(row_element_counts.shape[0] + 1)
+        indptr[1:] = np.cumsum(row_element_counts)
+        data, indices = np.concatenate(data_rows), np.concatenate(indices_rows)
+        sparse_distance_matrix = csr_matrix((data, indices, indptr), shape=(len(seqs), len(seqs2)))
+        return sparse_distance_matrix
+
+    def calc_dist_mat(self, seqs: Sequence[str], seqs2: Optional[Sequence[str]] = None) -> csr_matrix:  
+        if(seqs2 is None):
+            seqs2 = seqs
+
+        if(self.n_jobs>1):
+            split_seqs = np.array_split(seqs, self.n_jobs)
+            arguments = [(split_seqs[x],seqs2) for x in range(self.n_jobs)]
+            with multiprocessing.Pool(self.n_jobs) as p:
+                results = p.starmap(self._calc_dist_mat_block, arguments)
+            distance_matrix_csr = scipy.sparse.vstack(results)
+        else:
+            distance_matrix_csr = self._calc_dist_mat_block(seqs, seqs2)
+
+        return distance_matrix_csr
+
 
 @_doc_params(params=_doc_params_parallel_distance_calculator)
 class AlignmentDistanceCalculator(ParallelDistanceCalculator):

From 3081139c8492fec57cf69f03051bee35752258a2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 3 Apr 2024 19:46:13 +0000
Subject: [PATCH 02/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/scirpy/ir_dist/metrics.py | 483 +++++++++++++++++++++++++++++++---
 1 file changed, 453 insertions(+), 30 deletions(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index 8645ab557..d8e05fb44 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -1,10 +1,12 @@
 import abc
 import itertools
+import multiprocessing
 import warnings
 from collections.abc import Sequence
 from typing import Optional, Union
 
 import joblib
+import numba as nb
 import numpy as np
 import scipy.sparse
 import scipy.spatial
@@ -15,9 +17,6 @@
 
 from scirpy.util import _doc_params, _parallelize_with_joblib, deprecated
 
-import numba as nb
-import multiprocessing
-
 _doc_params_parallel_distance_calculator = """\
 n_jobs
     Number of jobs to use for the pairwise distance calculation, passed to
@@ -394,13 +393,423 @@ def _compute_block(self, seqs1, seqs2, origin):
 
         return result
 
-class TCRdistDistanceCalculator:
 
-    parasail_aa_alphabet = 'ARNDCQEGHILKMFPSTWYVBZX'
-    parasail_aa_alphabet_with_unknown = 'ARNDCQEGHILKMFPSTWYVBZX*'
-    tcr_dict_distance_matrix = {('A', 'A'): 0,  ('A', 'C'): 4,  ('A', 'D'): 4,  ('A', 'E'): 4,  ('A', 'F'): 4,  ('A', 'G'): 4,  ('A', 'H'): 4,  ('A', 'I'): 4,  ('A', 'K'): 4,  ('A', 'L'): 4,  ('A', 'M'): 4,  ('A', 'N'): 4,  ('A', 'P'): 4,  ('A', 'Q'): 4,  ('A', 'R'): 4,  ('A', 'S'): 3,  ('A', 'T'): 4,  ('A', 'V'): 4,  ('A', 'W'): 4,  ('A', 'Y'): 4,  ('C', 'A'): 4,  ('C', 'C'): 0,  ('C', 'D'): 4,  ('C', 'E'): 4,  ('C', 'F'): 4,  ('C', 'G'): 4,  ('C', 'H'): 4,  ('C', 'I'): 4,  ('C', 'K'): 4,  ('C', 'L'): 4,  ('C', 'M'): 4,  ('C', 'N'): 4,  ('C', 'P'): 4,  ('C', 'Q'): 4,  ('C', 'R'): 4,  ('C', 'S'): 4,  ('C', 'T'): 4,  ('C', 'V'): 4,  ('C', 'W'): 4,  ('C', 'Y'): 4,  ('D', 'A'): 4,  ('D', 'C'): 4,  ('D', 'D'): 0,  ('D', 'E'): 2,  ('D', 'F'): 4,  ('D', 'G'): 4,  ('D', 'H'): 4,  ('D', 'I'): 4,  ('D', 'K'): 4,  ('D', 'L'): 4,  ('D', 'M'): 4,  ('D', 'N'): 3,  ('D', 'P'): 4,  ('D', 'Q'): 4,  ('D', 'R'): 4,  ('D', 'S'): 4,  ('D', 'T'): 4,  ('D', 'V'): 4,  ('D', 'W'): 4,  ('D', 'Y'): 4,  ('E', 'A'): 4,  ('E', 'C'): 4,  ('E', 'D'): 2,  ('E', 'E'): 0,  ('E', 'F'): 4,  ('E', 'G'): 4,  ('E', 'H'): 4,  ('E', 'I'): 4,  ('E', 'K'): 3,  ('E', 'L'): 4,  ('E', 'M'): 4,  ('E', 'N'): 4,  ('E', 'P'): 4,  ('E', 'Q'): 2,  ('E', 'R'): 4,  ('E', 'S'): 4,  ('E', 'T'): 4,  ('E', 'V'): 4,  ('E', 'W'): 4,  ('E', 'Y'): 4,  ('F', 'A'): 4,  ('F', 'C'): 4,  ('F', 'D'): 4,  ('F', 'E'): 4,  ('F', 'F'): 0,  ('F', 'G'): 4,  ('F', 'H'): 4,  ('F', 'I'): 4,  ('F', 'K'): 4,  ('F', 'L'): 4,  ('F', 'M'): 4,  ('F', 'N'): 4,  ('F', 'P'): 4,  ('F', 'Q'): 4,  ('F', 'R'): 4,  ('F', 'S'): 4,  ('F', 'T'): 4,  ('F', 'V'): 4,  ('F', 'W'): 3,  ('F', 'Y'): 1,  ('G', 'A'): 4,  ('G', 'C'): 4,  ('G', 'D'): 4,  ('G', 'E'): 4,  ('G', 'F'): 4,  ('G', 'G'): 0,  ('G', 'H'): 4,  ('G', 'I'): 4,  ('G', 'K'): 4,  ('G', 'L'): 4,  ('G', 'M'): 4,  ('G', 'N'): 4,  ('G', 'P'): 4,  ('G', 'Q'): 4,  ('G', 'R'): 4,  ('G', 'S'): 4,  ('G', 'T'): 4,  ('G', 'V'): 4,  ('G', 'W'): 4,  ('G', 'Y'): 4,  ('H', 'A'): 4,  ('H', 'C'): 4,  ('H', 'D'): 4,  ('H', 'E'): 4,  ('H', 'F'): 4,  ('H', 'G'): 4,  ('H', 'H'): 0,  ('H', 'I'): 4,  ('H', 'K'): 4,  ('H', 'L'): 4,  ('H', 'M'): 4,  ('H', 'N'): 3,  ('H', 'P'): 4,  ('H', 'Q'): 4,  ('H', 'R'): 4,  ('H', 'S'): 4,  ('H', 'T'): 4,  ('H', 'V'): 4,  ('H', 'W'): 4,  ('H', 'Y'): 2,  ('I', 'A'): 4,  ('I', 'C'): 4,  ('I', 'D'): 4,  ('I', 'E'): 4,  ('I', 'F'): 4,  ('I', 'G'): 4,  ('I', 'H'): 4,  ('I', 'I'): 0,  ('I', 'K'): 4,  ('I', 'L'): 2,  ('I', 'M'): 3,  ('I', 'N'): 4,  ('I', 'P'): 4,  ('I', 'Q'): 4,  ('I', 'R'): 4,  ('I', 'S'): 4,  ('I', 'T'): 4,  ('I', 'V'): 1,  ('I', 'W'): 4,  ('I', 'Y'): 4,  ('K', 'A'): 4,  ('K', 'C'): 4,  ('K', 'D'): 4,  ('K', 'E'): 3,  ('K', 'F'): 4,  ('K', 'G'): 4,  ('K', 'H'): 4,  ('K', 'I'): 4,  ('K', 'K'): 0,  ('K', 'L'): 4,  ('K', 'M'): 4,  ('K', 'N'): 4,  ('K', 'P'): 4,  ('K', 'Q'): 3,  ('K', 'R'): 2,  ('K', 'S'): 4,  ('K', 'T'): 4,  ('K', 'V'): 4,  ('K', 'W'): 4,  ('K', 'Y'): 4,  ('L', 'A'): 4,  ('L', 'C'): 4,  ('L', 'D'): 4,  ('L', 'E'): 4,  ('L', 'F'): 4,  ('L', 'G'): 4,  ('L', 'H'): 4,  ('L', 'I'): 2,  ('L', 'K'): 4,  ('L', 'L'): 0,  ('L', 'M'): 2,  ('L', 'N'): 4,  ('L', 'P'): 4,  ('L', 'Q'): 4,  ('L', 'R'): 4,  ('L', 'S'): 4,  ('L', 'T'): 4,  ('L', 'V'): 3,  ('L', 'W'): 4,  ('L', 'Y'): 4,  ('M', 'A'): 4,  ('M', 'C'): 4,  ('M', 'D'): 4,  ('M', 'E'): 4,  ('M', 'F'): 4,  ('M', 'G'): 4,  ('M', 'H'): 4,  ('M', 'I'): 3,  ('M', 'K'): 4,  ('M', 'L'): 2,  ('M', 'M'): 0,  ('M', 'N'): 4,  ('M', 'P'): 4,  ('M', 'Q'): 4,  ('M', 'R'): 4,  ('M', 'S'): 4,  ('M', 'T'): 4,  ('M', 'V'): 3,  ('M', 'W'): 4,  ('M', 'Y'): 4,  ('N', 'A'): 4,  ('N', 'C'): 4,  ('N', 'D'): 3,  ('N', 'E'): 4,  ('N', 'F'): 4,  ('N', 'G'): 4,  ('N', 'H'): 3,  ('N', 'I'): 4,  ('N', 'K'): 4,  ('N', 'L'): 4,  ('N', 'M'): 4,  ('N', 'N'): 0,  ('N', 'P'): 4,  ('N', 'Q'): 4,  ('N', 'R'): 4,  ('N', 'S'): 3,  ('N', 'T'): 4,  ('N', 'V'): 4,  ('N', 'W'): 4,  ('N', 'Y'): 4,  ('P', 'A'): 4,  ('P', 'C'): 4,  ('P', 'D'): 4,  ('P', 'E'): 4,  ('P', 'F'): 4,  ('P', 'G'): 4,  ('P', 'H'): 4,  ('P', 'I'): 4,  ('P', 'K'): 4,  ('P', 'L'): 4,  ('P', 'M'): 4,  ('P', 'N'): 4,  ('P', 'P'): 0,  ('P', 'Q'): 4,  ('P', 'R'): 4,  ('P', 'S'): 4,  ('P', 'T'): 4,  ('P', 'V'): 4,  ('P', 'W'): 4,  ('P', 'Y'): 4,  ('Q', 'A'): 4,  ('Q', 'C'): 4,  ('Q', 'D'): 4,  ('Q', 'E'): 2,  ('Q', 'F'): 4,  ('Q', 'G'): 4,  ('Q', 'H'): 4,  ('Q', 'I'): 4,  ('Q', 'K'): 3,  ('Q', 'L'): 4,  ('Q', 'M'): 4,  ('Q', 'N'): 4,  ('Q', 'P'): 4,  ('Q', 'Q'): 0,  ('Q', 'R'): 3,  ('Q', 'S'): 4,  ('Q', 'T'): 4,  ('Q', 'V'): 4,  ('Q', 'W'): 4,  ('Q', 'Y'): 4,  ('R', 'A'): 4,  ('R', 'C'): 4,  ('R', 'D'): 4,  ('R', 'E'): 4,  ('R', 'F'): 4,  ('R', 'G'): 4,  ('R', 'H'): 4,  ('R', 'I'): 4,  ('R', 'K'): 2,  ('R', 'L'): 4,  ('R', 'M'): 4,  ('R', 'N'): 4,  ('R', 'P'): 4,  ('R', 'Q'): 3,  ('R', 'R'): 0,  ('R', 'S'): 4,  ('R', 'T'): 4,  ('R', 'V'): 4,  ('R', 'W'): 4,  ('R', 'Y'): 4,  ('S', 'A'): 3,  ('S', 'C'): 4,  ('S', 'D'): 4,  ('S', 'E'): 4,  ('S', 'F'): 4,  ('S', 'G'): 4,  ('S', 'H'): 4,  ('S', 'I'): 4,  ('S', 'K'): 4,  ('S', 'L'): 4,  ('S', 'M'): 4,  ('S', 'N'): 3,  ('S', 'P'): 4,  ('S', 'Q'): 4,  ('S', 'R'): 4,  ('S', 'S'): 0,  ('S', 'T'): 3,  ('S', 'V'): 4,  ('S', 'W'): 4,  ('S', 'Y'): 4,  ('T', 'A'): 4,  ('T', 'C'): 4,  ('T', 'D'): 4,  ('T', 'E'): 4,  ('T', 'F'): 4,  ('T', 'G'): 4,  ('T', 'H'): 4,  ('T', 'I'): 4,  ('T', 'K'): 4,  ('T', 'L'): 4,  ('T', 'M'): 4,  ('T', 'N'): 4,  ('T', 'P'): 4,  ('T', 'Q'): 4,  ('T', 'R'): 4,  ('T', 'S'): 3,  ('T', 'T'): 0,  ('T', 'V'): 4,  ('T', 'W'): 4,  ('T', 'Y'): 4,  ('V', 'A'): 4,  ('V', 'C'): 4,  ('V', 'D'): 4,  ('V', 'E'): 4,  ('V', 'F'): 4,  ('V', 'G'): 4,  ('V', 'H'): 4,  ('V', 'I'): 1,  ('V', 'K'): 4,  ('V', 'L'): 3,  ('V', 'M'): 3,  ('V', 'N'): 4,  ('V', 'P'): 4,  ('V', 'Q'): 4,  ('V', 'R'): 4,  ('V', 'S'): 4,  ('V', 'T'): 4,  ('V', 'V'): 0,  ('V', 'W'): 4,  ('V', 'Y'): 4,  ('W', 'A'): 4,  ('W', 'C'): 4,  ('W', 'D'): 4,  ('W', 'E'): 4,  ('W', 'F'): 3,  ('W', 'G'): 4,  ('W', 'H'): 4,  ('W', 'I'): 4,  ('W', 'K'): 4,  ('W', 'L'): 4,  ('W', 'M'): 4,  ('W', 'N'): 4,  ('W', 'P'): 4,  ('W', 'Q'): 4,  ('W', 'R'): 4,  ('W', 'S'): 4,  ('W', 'T'): 4,  ('W', 'V'): 4,  ('W', 'W'): 0,  ('W', 'Y'): 2,  ('Y', 'A'): 4,  ('Y', 'C'): 4,  ('Y', 'D'): 4,  ('Y', 'E'): 4,  ('Y', 'F'): 1,  ('Y', 'G'): 4,  ('Y', 'H'): 2,  ('Y', 'I'): 4,  ('Y', 'K'): 4,  ('Y', 'L'): 4,  ('Y', 'M'): 4,  ('Y', 'N'): 4,  ('Y', 'P'): 4,  ('Y', 'Q'): 4,  ('Y', 'R'): 4,  ('Y', 'S'): 4,  ('Y', 'T'): 4,  ('Y', 'V'): 4,  ('Y', 'W'): 2,  ('Y', 'Y'): 0}
+class TCRdistDistanceCalculator:
+    parasail_aa_alphabet = "ARNDCQEGHILKMFPSTWYVBZX"
+    parasail_aa_alphabet_with_unknown = "ARNDCQEGHILKMFPSTWYVBZX*"
+    tcr_dict_distance_matrix = {
+        ("A", "A"): 0,
+        ("A", "C"): 4,
+        ("A", "D"): 4,
+        ("A", "E"): 4,
+        ("A", "F"): 4,
+        ("A", "G"): 4,
+        ("A", "H"): 4,
+        ("A", "I"): 4,
+        ("A", "K"): 4,
+        ("A", "L"): 4,
+        ("A", "M"): 4,
+        ("A", "N"): 4,
+        ("A", "P"): 4,
+        ("A", "Q"): 4,
+        ("A", "R"): 4,
+        ("A", "S"): 3,
+        ("A", "T"): 4,
+        ("A", "V"): 4,
+        ("A", "W"): 4,
+        ("A", "Y"): 4,
+        ("C", "A"): 4,
+        ("C", "C"): 0,
+        ("C", "D"): 4,
+        ("C", "E"): 4,
+        ("C", "F"): 4,
+        ("C", "G"): 4,
+        ("C", "H"): 4,
+        ("C", "I"): 4,
+        ("C", "K"): 4,
+        ("C", "L"): 4,
+        ("C", "M"): 4,
+        ("C", "N"): 4,
+        ("C", "P"): 4,
+        ("C", "Q"): 4,
+        ("C", "R"): 4,
+        ("C", "S"): 4,
+        ("C", "T"): 4,
+        ("C", "V"): 4,
+        ("C", "W"): 4,
+        ("C", "Y"): 4,
+        ("D", "A"): 4,
+        ("D", "C"): 4,
+        ("D", "D"): 0,
+        ("D", "E"): 2,
+        ("D", "F"): 4,
+        ("D", "G"): 4,
+        ("D", "H"): 4,
+        ("D", "I"): 4,
+        ("D", "K"): 4,
+        ("D", "L"): 4,
+        ("D", "M"): 4,
+        ("D", "N"): 3,
+        ("D", "P"): 4,
+        ("D", "Q"): 4,
+        ("D", "R"): 4,
+        ("D", "S"): 4,
+        ("D", "T"): 4,
+        ("D", "V"): 4,
+        ("D", "W"): 4,
+        ("D", "Y"): 4,
+        ("E", "A"): 4,
+        ("E", "C"): 4,
+        ("E", "D"): 2,
+        ("E", "E"): 0,
+        ("E", "F"): 4,
+        ("E", "G"): 4,
+        ("E", "H"): 4,
+        ("E", "I"): 4,
+        ("E", "K"): 3,
+        ("E", "L"): 4,
+        ("E", "M"): 4,
+        ("E", "N"): 4,
+        ("E", "P"): 4,
+        ("E", "Q"): 2,
+        ("E", "R"): 4,
+        ("E", "S"): 4,
+        ("E", "T"): 4,
+        ("E", "V"): 4,
+        ("E", "W"): 4,
+        ("E", "Y"): 4,
+        ("F", "A"): 4,
+        ("F", "C"): 4,
+        ("F", "D"): 4,
+        ("F", "E"): 4,
+        ("F", "F"): 0,
+        ("F", "G"): 4,
+        ("F", "H"): 4,
+        ("F", "I"): 4,
+        ("F", "K"): 4,
+        ("F", "L"): 4,
+        ("F", "M"): 4,
+        ("F", "N"): 4,
+        ("F", "P"): 4,
+        ("F", "Q"): 4,
+        ("F", "R"): 4,
+        ("F", "S"): 4,
+        ("F", "T"): 4,
+        ("F", "V"): 4,
+        ("F", "W"): 3,
+        ("F", "Y"): 1,
+        ("G", "A"): 4,
+        ("G", "C"): 4,
+        ("G", "D"): 4,
+        ("G", "E"): 4,
+        ("G", "F"): 4,
+        ("G", "G"): 0,
+        ("G", "H"): 4,
+        ("G", "I"): 4,
+        ("G", "K"): 4,
+        ("G", "L"): 4,
+        ("G", "M"): 4,
+        ("G", "N"): 4,
+        ("G", "P"): 4,
+        ("G", "Q"): 4,
+        ("G", "R"): 4,
+        ("G", "S"): 4,
+        ("G", "T"): 4,
+        ("G", "V"): 4,
+        ("G", "W"): 4,
+        ("G", "Y"): 4,
+        ("H", "A"): 4,
+        ("H", "C"): 4,
+        ("H", "D"): 4,
+        ("H", "E"): 4,
+        ("H", "F"): 4,
+        ("H", "G"): 4,
+        ("H", "H"): 0,
+        ("H", "I"): 4,
+        ("H", "K"): 4,
+        ("H", "L"): 4,
+        ("H", "M"): 4,
+        ("H", "N"): 3,
+        ("H", "P"): 4,
+        ("H", "Q"): 4,
+        ("H", "R"): 4,
+        ("H", "S"): 4,
+        ("H", "T"): 4,
+        ("H", "V"): 4,
+        ("H", "W"): 4,
+        ("H", "Y"): 2,
+        ("I", "A"): 4,
+        ("I", "C"): 4,
+        ("I", "D"): 4,
+        ("I", "E"): 4,
+        ("I", "F"): 4,
+        ("I", "G"): 4,
+        ("I", "H"): 4,
+        ("I", "I"): 0,
+        ("I", "K"): 4,
+        ("I", "L"): 2,
+        ("I", "M"): 3,
+        ("I", "N"): 4,
+        ("I", "P"): 4,
+        ("I", "Q"): 4,
+        ("I", "R"): 4,
+        ("I", "S"): 4,
+        ("I", "T"): 4,
+        ("I", "V"): 1,
+        ("I", "W"): 4,
+        ("I", "Y"): 4,
+        ("K", "A"): 4,
+        ("K", "C"): 4,
+        ("K", "D"): 4,
+        ("K", "E"): 3,
+        ("K", "F"): 4,
+        ("K", "G"): 4,
+        ("K", "H"): 4,
+        ("K", "I"): 4,
+        ("K", "K"): 0,
+        ("K", "L"): 4,
+        ("K", "M"): 4,
+        ("K", "N"): 4,
+        ("K", "P"): 4,
+        ("K", "Q"): 3,
+        ("K", "R"): 2,
+        ("K", "S"): 4,
+        ("K", "T"): 4,
+        ("K", "V"): 4,
+        ("K", "W"): 4,
+        ("K", "Y"): 4,
+        ("L", "A"): 4,
+        ("L", "C"): 4,
+        ("L", "D"): 4,
+        ("L", "E"): 4,
+        ("L", "F"): 4,
+        ("L", "G"): 4,
+        ("L", "H"): 4,
+        ("L", "I"): 2,
+        ("L", "K"): 4,
+        ("L", "L"): 0,
+        ("L", "M"): 2,
+        ("L", "N"): 4,
+        ("L", "P"): 4,
+        ("L", "Q"): 4,
+        ("L", "R"): 4,
+        ("L", "S"): 4,
+        ("L", "T"): 4,
+        ("L", "V"): 3,
+        ("L", "W"): 4,
+        ("L", "Y"): 4,
+        ("M", "A"): 4,
+        ("M", "C"): 4,
+        ("M", "D"): 4,
+        ("M", "E"): 4,
+        ("M", "F"): 4,
+        ("M", "G"): 4,
+        ("M", "H"): 4,
+        ("M", "I"): 3,
+        ("M", "K"): 4,
+        ("M", "L"): 2,
+        ("M", "M"): 0,
+        ("M", "N"): 4,
+        ("M", "P"): 4,
+        ("M", "Q"): 4,
+        ("M", "R"): 4,
+        ("M", "S"): 4,
+        ("M", "T"): 4,
+        ("M", "V"): 3,
+        ("M", "W"): 4,
+        ("M", "Y"): 4,
+        ("N", "A"): 4,
+        ("N", "C"): 4,
+        ("N", "D"): 3,
+        ("N", "E"): 4,
+        ("N", "F"): 4,
+        ("N", "G"): 4,
+        ("N", "H"): 3,
+        ("N", "I"): 4,
+        ("N", "K"): 4,
+        ("N", "L"): 4,
+        ("N", "M"): 4,
+        ("N", "N"): 0,
+        ("N", "P"): 4,
+        ("N", "Q"): 4,
+        ("N", "R"): 4,
+        ("N", "S"): 3,
+        ("N", "T"): 4,
+        ("N", "V"): 4,
+        ("N", "W"): 4,
+        ("N", "Y"): 4,
+        ("P", "A"): 4,
+        ("P", "C"): 4,
+        ("P", "D"): 4,
+        ("P", "E"): 4,
+        ("P", "F"): 4,
+        ("P", "G"): 4,
+        ("P", "H"): 4,
+        ("P", "I"): 4,
+        ("P", "K"): 4,
+        ("P", "L"): 4,
+        ("P", "M"): 4,
+        ("P", "N"): 4,
+        ("P", "P"): 0,
+        ("P", "Q"): 4,
+        ("P", "R"): 4,
+        ("P", "S"): 4,
+        ("P", "T"): 4,
+        ("P", "V"): 4,
+        ("P", "W"): 4,
+        ("P", "Y"): 4,
+        ("Q", "A"): 4,
+        ("Q", "C"): 4,
+        ("Q", "D"): 4,
+        ("Q", "E"): 2,
+        ("Q", "F"): 4,
+        ("Q", "G"): 4,
+        ("Q", "H"): 4,
+        ("Q", "I"): 4,
+        ("Q", "K"): 3,
+        ("Q", "L"): 4,
+        ("Q", "M"): 4,
+        ("Q", "N"): 4,
+        ("Q", "P"): 4,
+        ("Q", "Q"): 0,
+        ("Q", "R"): 3,
+        ("Q", "S"): 4,
+        ("Q", "T"): 4,
+        ("Q", "V"): 4,
+        ("Q", "W"): 4,
+        ("Q", "Y"): 4,
+        ("R", "A"): 4,
+        ("R", "C"): 4,
+        ("R", "D"): 4,
+        ("R", "E"): 4,
+        ("R", "F"): 4,
+        ("R", "G"): 4,
+        ("R", "H"): 4,
+        ("R", "I"): 4,
+        ("R", "K"): 2,
+        ("R", "L"): 4,
+        ("R", "M"): 4,
+        ("R", "N"): 4,
+        ("R", "P"): 4,
+        ("R", "Q"): 3,
+        ("R", "R"): 0,
+        ("R", "S"): 4,
+        ("R", "T"): 4,
+        ("R", "V"): 4,
+        ("R", "W"): 4,
+        ("R", "Y"): 4,
+        ("S", "A"): 3,
+        ("S", "C"): 4,
+        ("S", "D"): 4,
+        ("S", "E"): 4,
+        ("S", "F"): 4,
+        ("S", "G"): 4,
+        ("S", "H"): 4,
+        ("S", "I"): 4,
+        ("S", "K"): 4,
+        ("S", "L"): 4,
+        ("S", "M"): 4,
+        ("S", "N"): 3,
+        ("S", "P"): 4,
+        ("S", "Q"): 4,
+        ("S", "R"): 4,
+        ("S", "S"): 0,
+        ("S", "T"): 3,
+        ("S", "V"): 4,
+        ("S", "W"): 4,
+        ("S", "Y"): 4,
+        ("T", "A"): 4,
+        ("T", "C"): 4,
+        ("T", "D"): 4,
+        ("T", "E"): 4,
+        ("T", "F"): 4,
+        ("T", "G"): 4,
+        ("T", "H"): 4,
+        ("T", "I"): 4,
+        ("T", "K"): 4,
+        ("T", "L"): 4,
+        ("T", "M"): 4,
+        ("T", "N"): 4,
+        ("T", "P"): 4,
+        ("T", "Q"): 4,
+        ("T", "R"): 4,
+        ("T", "S"): 3,
+        ("T", "T"): 0,
+        ("T", "V"): 4,
+        ("T", "W"): 4,
+        ("T", "Y"): 4,
+        ("V", "A"): 4,
+        ("V", "C"): 4,
+        ("V", "D"): 4,
+        ("V", "E"): 4,
+        ("V", "F"): 4,
+        ("V", "G"): 4,
+        ("V", "H"): 4,
+        ("V", "I"): 1,
+        ("V", "K"): 4,
+        ("V", "L"): 3,
+        ("V", "M"): 3,
+        ("V", "N"): 4,
+        ("V", "P"): 4,
+        ("V", "Q"): 4,
+        ("V", "R"): 4,
+        ("V", "S"): 4,
+        ("V", "T"): 4,
+        ("V", "V"): 0,
+        ("V", "W"): 4,
+        ("V", "Y"): 4,
+        ("W", "A"): 4,
+        ("W", "C"): 4,
+        ("W", "D"): 4,
+        ("W", "E"): 4,
+        ("W", "F"): 3,
+        ("W", "G"): 4,
+        ("W", "H"): 4,
+        ("W", "I"): 4,
+        ("W", "K"): 4,
+        ("W", "L"): 4,
+        ("W", "M"): 4,
+        ("W", "N"): 4,
+        ("W", "P"): 4,
+        ("W", "Q"): 4,
+        ("W", "R"): 4,
+        ("W", "S"): 4,
+        ("W", "T"): 4,
+        ("W", "V"): 4,
+        ("W", "W"): 0,
+        ("W", "Y"): 2,
+        ("Y", "A"): 4,
+        ("Y", "C"): 4,
+        ("Y", "D"): 4,
+        ("Y", "E"): 4,
+        ("Y", "F"): 1,
+        ("Y", "G"): 4,
+        ("Y", "H"): 2,
+        ("Y", "I"): 4,
+        ("Y", "K"): 4,
+        ("Y", "L"): 4,
+        ("Y", "M"): 4,
+        ("Y", "N"): 4,
+        ("Y", "P"): 4,
+        ("Y", "Q"): 4,
+        ("Y", "R"): 4,
+        ("Y", "S"): 4,
+        ("Y", "T"): 4,
+        ("Y", "V"): 4,
+        ("Y", "W"): 2,
+        ("Y", "Y"): 0,
+    }
 
-    def __init__(self, dist_weight=3, gap_penalty=4, ntrim=3, ctrim=2, fixed_gappos=True, cutoff: Union[None, int] = None, n_jobs: Union[None, int] = None):
+    def __init__(
+        self,
+        dist_weight=3,
+        gap_penalty=4,
+        ntrim=3,
+        ctrim=2,
+        fixed_gappos=True,
+        cutoff: Union[None, int] = None,
+        n_jobs: Union[None, int] = None,
+    ):
         if cutoff is None:
             cutoff = 20
         if n_jobs is None:
@@ -421,7 +830,7 @@ def _make_numba_matrix(distance_matrix, alphabet=parasail_aa_alphabet_with_unkno
             dm[alphabet.index(aa1), alphabet.index(aa2)] = d
             dm[alphabet.index(aa2), alphabet.index(aa1)] = d
         return dm
-    
+
     tcr_nb_distance_matrix = _make_numba_matrix(tcr_dict_distance_matrix)
 
     @staticmethod
@@ -438,14 +847,25 @@ def _seqs2mat(seqs, alphabet=parasail_aa_alphabet, max_len=None):
                 try:
                     mat[si, aai] = alphabet.index(s[aai])
                 except ValueError:
-                    #Unknown symbols given value for last column/row of matrix
+                    # Unknown symbols given value for last column/row of matrix
                     mat[si, aai] = len(alphabet)
         return mat, L
-    
+
     @staticmethod
     @nb.jit(nopython=True, parallel=False, nogil=True)
-    def _nb_tcrdist_mat(seqs_mat1, seqs_mat2, seqs_L1, seqs_L2, distance_matrix=tcr_nb_distance_matrix, dist_weight=3, gap_penalty=4, ntrim=3, ctrim=2, fixed_gappos=True, cutoff=20):
-
+    def _nb_tcrdist_mat(
+        seqs_mat1,
+        seqs_mat2,
+        seqs_L1,
+        seqs_L2,
+        distance_matrix=tcr_nb_distance_matrix,
+        dist_weight=3,
+        gap_penalty=4,
+        ntrim=3,
+        ctrim=2,
+        fixed_gappos=True,
+        cutoff=20,
+    ):
         assert seqs_mat1.shape[0] == seqs_L1.shape[0]
         assert seqs_mat2.shape[0] == seqs_L2.shape[0]
 
@@ -454,7 +874,7 @@ def _nb_tcrdist_mat(seqs_mat1, seqs_mat2, seqs_L1, seqs_L2, distance_matrix=tcr_
         row_element_counts = np.zeros(seqs_mat1.shape[0])
 
         empty_row = np.zeros(0)
-        for i in range(0,seqs_mat1.shape[0]):
+        for i in range(0, seqs_mat1.shape[0]):
             data_rows.append(empty_row)
             indices_rows.append(empty_row)
 
@@ -471,7 +891,7 @@ def _nb_tcrdist_mat(seqs_mat1, seqs_mat2, seqs_L1, seqs_L2, distance_matrix=tcr_
                     for i in range(ntrim, q_L - ctrim):
                         distance += distance_matrix[seqs_mat1[row_index, i], seqs_mat2[col_index, i]] * dist_weight
 
-                    if(distance <= cutoff + 1):
+                    if distance <= cutoff + 1:
                         data_row[row_end_index] = distance
                         indices_row[row_end_index] = col_index
                         row_end_index += 1
@@ -498,7 +918,9 @@ def _nb_tcrdist_mat(seqs_mat1, seqs_mat2, seqs_L1, seqs_L2, distance_matrix=tcr_
                         tmp_dist += distance_matrix[seqs_mat1[row_index, n_i], seqs_mat2[col_index, n_i]]
 
                     for c_i in range(ctrim, remainder):
-                        tmp_dist += distance_matrix[seqs_mat1[row_index, q_L - 1 - c_i], seqs_mat2[col_index, s_L - 1 - c_i]]
+                        tmp_dist += distance_matrix[
+                            seqs_mat1[row_index, q_L - 1 - c_i], seqs_mat2[col_index, s_L - 1 - c_i]
+                        ]
 
                     if tmp_dist < min_dist or min_dist == -1:
                         min_dist = tmp_dist
@@ -507,7 +929,7 @@ def _nb_tcrdist_mat(seqs_mat1, seqs_mat2, seqs_L1, seqs_L2, distance_matrix=tcr_
                         break
 
                 distance = min_dist * dist_weight + len_diff * gap_penalty + 1
-                if(distance <= cutoff + 1):
+                if distance <= cutoff + 1:
                     data_row[row_end_index] = distance
                     indices_row[row_end_index] = col_index
                     row_end_index += 1
@@ -519,20 +941,21 @@ def _nb_tcrdist_mat(seqs_mat1, seqs_mat2, seqs_L1, seqs_L2, distance_matrix=tcr_
         return data_rows, indices_rows, row_element_counts
 
     def _calc_dist_mat_block(self, seqs: Sequence[str], seqs2: Optional[Sequence[str]] = None) -> csr_matrix:
-        
         seqs_mat1, seqs_L1 = self._seqs2mat(seqs)
         seqs_mat2, seqs_L2 = self._seqs2mat(seqs2)
-        
+
         kwargs = {
-            'dist_weight': self.dist_weight,
-            'gap_penalty': self.gap_penalty,
-            'ntrim': self.ntrim,
-            'ctrim': self.ctrim, 
-            'fixed_gappos': self.fixed_gappos,
-            'cutoff': self.cutoff
+            "dist_weight": self.dist_weight,
+            "gap_penalty": self.gap_penalty,
+            "ntrim": self.ntrim,
+            "ctrim": self.ctrim,
+            "fixed_gappos": self.fixed_gappos,
+            "cutoff": self.cutoff,
         }
 
-        data_rows, indices_rows, row_element_counts = self._nb_tcrdist_mat(seqs_mat1, seqs_mat2, seqs_L1, seqs_L2, **kwargs)
+        data_rows, indices_rows, row_element_counts = self._nb_tcrdist_mat(
+            seqs_mat1, seqs_mat2, seqs_L1, seqs_L2, **kwargs
+        )
 
         indptr = np.zeros(row_element_counts.shape[0] + 1)
         indptr[1:] = np.cumsum(row_element_counts)
@@ -540,13 +963,13 @@ def _calc_dist_mat_block(self, seqs: Sequence[str], seqs2: Optional[Sequence[str
         sparse_distance_matrix = csr_matrix((data, indices, indptr), shape=(len(seqs), len(seqs2)))
         return sparse_distance_matrix
 
-    def calc_dist_mat(self, seqs: Sequence[str], seqs2: Optional[Sequence[str]] = None) -> csr_matrix:  
-        if(seqs2 is None):
+    def calc_dist_mat(self, seqs: Sequence[str], seqs2: Optional[Sequence[str]] = None) -> csr_matrix:
+        if seqs2 is None:
             seqs2 = seqs
 
-        if(self.n_jobs>1):
+        if self.n_jobs > 1:
             split_seqs = np.array_split(seqs, self.n_jobs)
-            arguments = [(split_seqs[x],seqs2) for x in range(self.n_jobs)]
+            arguments = [(split_seqs[x], seqs2) for x in range(self.n_jobs)]
             with multiprocessing.Pool(self.n_jobs) as p:
                 results = p.starmap(self._calc_dist_mat_block, arguments)
             distance_matrix_csr = scipy.sparse.vstack(results)

From 3b96e8a90a2de8f1c7ae83502c74b2b95292666c Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Mon, 8 Apr 2024 19:18:51 +0200
Subject: [PATCH 03/27] tcrdist tests added

---
 src/scirpy/tests/test_ir_dist_metrics.py | 111 ++++++++++++++++++++++-
 1 file changed, 109 insertions(+), 2 deletions(-)

diff --git a/src/scirpy/tests/test_ir_dist_metrics.py b/src/scirpy/tests/test_ir_dist_metrics.py
index ae1f9b32b..df19f18f1 100644
--- a/src/scirpy/tests/test_ir_dist_metrics.py
+++ b/src/scirpy/tests/test_ir_dist_metrics.py
@@ -14,6 +14,7 @@
     IdentityDistanceCalculator,
     LevenshteinDistanceCalculator,
     ParallelDistanceCalculator,
+    TCRdistDistanceCalculator,
 )
 
 from .util import _squarify
@@ -303,10 +304,10 @@ def test_fast_alignment_dist_with_two_seq_arrays():
 
     npt.assert_almost_equal(res.toarray(), np.array([[0, 1, 5], [0, 5, 10], [0, 0, 0], [1, 0, 0]]))
 
-
+"""
 @pytest.mark.parametrize("metric", ["alignment", "fastalignment", "identity", "hamming", "levenshtein"])
 def test_sequence_dist_all_metrics(metric):
-    """Smoke test, no assertions!"""
+    #Smoke test, no assertions!
     unique_seqs = np.array(["AAA", "ARA", "AFFFFFA", "FAFAFA", "FFF"])
     seqs2 = np.array(["RRR", "FAFA", "WWWWWWW"])
     dist_mat = ir.ir_dist.sequence_dist(unique_seqs, metric=metric, cutoff=8, n_jobs=2)
@@ -314,3 +315,109 @@ def test_sequence_dist_all_metrics(metric):
 
     dist_mat = ir.ir_dist.sequence_dist(unique_seqs, seqs2, metric=metric, cutoff=8, n_jobs=2)
     assert dist_mat.shape == (5, 3)
+"""
+
+@pytest.mark.parametrize("test_parameters,test_input,expected_result", [
+    
+    #test more complex strings with unequal length and set high cutoff such that cutoff is neglected
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["WEKFAPIQCMNR", "RDAIYTCCNKSWEQ", "CWWMFGHTVRI", "GWSZNNHI"])),
+      np.array([[57, 74, 65, 33],
+                [66, 68, 65, 33],
+                [53, 58, 49, 25]])),
+    
+    #test very small input sequences
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 0, "ctrim": 0, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
+     (np.array(["A"]),
+      np.array(["C"])),
+      np.array([[13]])),
+    
+    #test standard parameters with simple input sequences
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 0, 21],
+                [0, 1, 21],
+                [21, 21, 1]])),
+    
+    #test standard parameters with simple input sequences with second sequences array set to None
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      None),
+      np.array([[1, 0, 21],
+                [0, 1, 21],
+                [21, 21, 1]])),
+    
+    #test with dist_weight set to 0
+    ({"dist_weight": 0, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 1, 9],
+                [1, 1, 9],
+                [9, 9, 1]])),
+
+    #test with dist_weight set high and cutoff set high to neglect it
+    ({"dist_weight": 30, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 241, 129],
+                [241, 1, 129],
+                [129, 129, 1]])),
+    
+    #test with gap_penalty set to 0
+    ({"dist_weight": 3, "gap_penalty": 0, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 0, 13],
+                [0, 1, 13],
+                [13, 13, 1]])),
+    
+    #test with gap_penalty set high and cutoff set high to neglect it
+    ({"dist_weight": 3, "gap_penalty": 40, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 25, 93],
+                [25, 1, 93],
+                [93, 93, 1]])),
+    
+    #test with fixed_gappos = False and a high cutoff to neglect it
+    #AAAAA added at the beginning of the usual sequences to make to difference of min_gappos and max_gappos more significant
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": False, "cutoff": 1000, "n_jobs": 1},
+     (np.array(["AAAAAAAAAAAAAAA", "AAAAAAAAARRAAAA", "AAAAAAANDAAAA"]),
+      np.array(["AAAAAAAAAAAAAAA", "AAAAAAAAARRAAAA", "AAAAAAANDAAAA"])),
+      np.array([[1, 25, 33],
+                [25, 1, 33],
+                [33, 33, 1]])),
+
+    #test with cutoff set to 0
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 0, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1]])),
+    
+    #test with cutoff set high to neglect it
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 25, 21],
+                [25, 1, 21],
+                [21, 21, 1]])),
+
+    #test with multiple cores by setting n_jobs = 3
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 3},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 0, 21],
+                [0, 1, 21],
+                [21, 21, 1]])),
+])
+def test_tcrdist(test_parameters, test_input, expected_result):
+    tcrdist_calculator = TCRdistDistanceCalculator(**test_parameters)
+    seq1, seq2 = test_input
+    res = tcrdist_calculator.calc_dist_mat(seq1, seq2)
+    assert isinstance(res, scipy.sparse.csr_matrix)
+    assert res.shape == expected_result.shape
+    assert np.array_equal(res.todense(), expected_result)
\ No newline at end of file

From d2c606dc860fe69aa2f8d8ab3c10de136924cabd Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Mon, 8 Apr 2024 19:38:24 +0200
Subject: [PATCH 04/27] fixed ir_dist _get_distance_calculator parameter
 handling

---
 src/scirpy/ir_dist/__init__.py           | 14 +++++---------
 src/scirpy/tests/test_ir_dist_metrics.py |  4 ++--
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/src/scirpy/ir_dist/__init__.py b/src/scirpy/ir_dist/__init__.py
index 9f2f2b2c6..18831a321 100644
--- a/src/scirpy/ir_dist/__init__.py
+++ b/src/scirpy/ir_dist/__init__.py
@@ -73,7 +73,7 @@ def _get_metric_key(metric: MetricType) -> str:
     return "custom" if isinstance(metric, metrics.DistanceCalculator) else metric  # type: ignore
 
 
-def _get_distance_calculator(metric: MetricType, cutoff: Union[int, None], *, n_jobs=-1, **kwargs):
+def _get_distance_calculator(metric: MetricType, cutoff: Union[int, None], *, n_jobs=None, **kwargs):
     """Returns an instance of :class:`~scirpy.ir_dist.metrics.DistanceCalculator`
     given a metric.
 
@@ -83,20 +83,16 @@ def _get_distance_calculator(metric: MetricType, cutoff: Union[int, None], *, n_
         metric = "identity"
         cutoff = 0
 
-    # Let's rely on the default set by the class if cutoff is None
-    if cutoff is not None:
-        kwargs["cutoff"] = cutoff
-
     if isinstance(metric, metrics.DistanceCalculator):
         dist_calc = metric
     elif metric == "alignment":
-        dist_calc = metrics.FastAlignmentDistanceCalculator(n_jobs=n_jobs, estimated_penalty=0, **kwargs)
+        dist_calc = metrics.FastAlignmentDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, estimated_penalty=0, **kwargs)
     elif metric == "fastalignment":
-        dist_calc = metrics.FastAlignmentDistanceCalculator(n_jobs=n_jobs, **kwargs)
+        dist_calc = metrics.FastAlignmentDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
     elif metric == "identity":
-        dist_calc = metrics.IdentityDistanceCalculator(**kwargs)
+        dist_calc = metrics.IdentityDistanceCalculator(cutoff=cutoff, **kwargs)
     elif metric == "levenshtein":
-        dist_calc = metrics.LevenshteinDistanceCalculator(n_jobs=n_jobs, **kwargs)
+        dist_calc = metrics.LevenshteinDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
     elif metric == "hamming":
         dist_calc = metrics.HammingDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
     elif metric == "tcrdist":
diff --git a/src/scirpy/tests/test_ir_dist_metrics.py b/src/scirpy/tests/test_ir_dist_metrics.py
index df19f18f1..0902d7779 100644
--- a/src/scirpy/tests/test_ir_dist_metrics.py
+++ b/src/scirpy/tests/test_ir_dist_metrics.py
@@ -304,7 +304,7 @@ def test_fast_alignment_dist_with_two_seq_arrays():
 
     npt.assert_almost_equal(res.toarray(), np.array([[0, 1, 5], [0, 5, 10], [0, 0, 0], [1, 0, 0]]))
 
-"""
+
 @pytest.mark.parametrize("metric", ["alignment", "fastalignment", "identity", "hamming", "levenshtein"])
 def test_sequence_dist_all_metrics(metric):
     #Smoke test, no assertions!
@@ -315,7 +315,7 @@ def test_sequence_dist_all_metrics(metric):
 
     dist_mat = ir.ir_dist.sequence_dist(unique_seqs, seqs2, metric=metric, cutoff=8, n_jobs=2)
     assert dist_mat.shape == (5, 3)
-"""
+    
 
 @pytest.mark.parametrize("test_parameters,test_input,expected_result", [
     

From 1d223ec6fdd08104f40ae7c6d4770b0f234dc2f5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 8 Apr 2024 17:39:30 +0000
Subject: [PATCH 05/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/scirpy/tests/test_ir_dist_metrics.py | 285 +++++++++++++++--------
 1 file changed, 184 insertions(+), 101 deletions(-)

diff --git a/src/scirpy/tests/test_ir_dist_metrics.py b/src/scirpy/tests/test_ir_dist_metrics.py
index 0902d7779..90e611f0f 100644
--- a/src/scirpy/tests/test_ir_dist_metrics.py
+++ b/src/scirpy/tests/test_ir_dist_metrics.py
@@ -307,7 +307,7 @@ def test_fast_alignment_dist_with_two_seq_arrays():
 
 @pytest.mark.parametrize("metric", ["alignment", "fastalignment", "identity", "hamming", "levenshtein"])
 def test_sequence_dist_all_metrics(metric):
-    #Smoke test, no assertions!
+    # Smoke test, no assertions!
     unique_seqs = np.array(["AAA", "ARA", "AFFFFFA", "FAFAFA", "FFF"])
     seqs2 = np.array(["RRR", "FAFA", "WWWWWWW"])
     dist_mat = ir.ir_dist.sequence_dist(unique_seqs, metric=metric, cutoff=8, n_jobs=2)
@@ -315,109 +315,192 @@ def test_sequence_dist_all_metrics(metric):
 
     dist_mat = ir.ir_dist.sequence_dist(unique_seqs, seqs2, metric=metric, cutoff=8, n_jobs=2)
     assert dist_mat.shape == (5, 3)
-    
-
-@pytest.mark.parametrize("test_parameters,test_input,expected_result", [
-    
-    #test more complex strings with unequal length and set high cutoff such that cutoff is neglected
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["WEKFAPIQCMNR", "RDAIYTCCNKSWEQ", "CWWMFGHTVRI", "GWSZNNHI"])),
-      np.array([[57, 74, 65, 33],
-                [66, 68, 65, 33],
-                [53, 58, 49, 25]])),
-    
-    #test very small input sequences
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 0, "ctrim": 0, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
-     (np.array(["A"]),
-      np.array(["C"])),
-      np.array([[13]])),
-    
-    #test standard parameters with simple input sequences
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 0, 21],
-                [0, 1, 21],
-                [21, 21, 1]])),
-    
-    #test standard parameters with simple input sequences with second sequences array set to None
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      None),
-      np.array([[1, 0, 21],
-                [0, 1, 21],
-                [21, 21, 1]])),
-    
-    #test with dist_weight set to 0
-    ({"dist_weight": 0, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 1, 9],
-                [1, 1, 9],
-                [9, 9, 1]])),
-
-    #test with dist_weight set high and cutoff set high to neglect it
-    ({"dist_weight": 30, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 241, 129],
-                [241, 1, 129],
-                [129, 129, 1]])),
-    
-    #test with gap_penalty set to 0
-    ({"dist_weight": 3, "gap_penalty": 0, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 0, 13],
-                [0, 1, 13],
-                [13, 13, 1]])),
-    
-    #test with gap_penalty set high and cutoff set high to neglect it
-    ({"dist_weight": 3, "gap_penalty": 40, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 25, 93],
-                [25, 1, 93],
-                [93, 93, 1]])),
-    
-    #test with fixed_gappos = False and a high cutoff to neglect it
-    #AAAAA added at the beginning of the usual sequences to make to difference of min_gappos and max_gappos more significant
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": False, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAAAAAAA", "AAAAAAAAARRAAAA", "AAAAAAANDAAAA"]),
-      np.array(["AAAAAAAAAAAAAAA", "AAAAAAAAARRAAAA", "AAAAAAANDAAAA"])),
-      np.array([[1, 25, 33],
-                [25, 1, 33],
-                [33, 33, 1]])),
-
-    #test with cutoff set to 0
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 0, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 0, 0],
-                [0, 1, 0],
-                [0, 0, 1]])),
-    
-    #test with cutoff set high to neglect it
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 25, 21],
-                [25, 1, 21],
-                [21, 21, 1]])),
-
-    #test with multiple cores by setting n_jobs = 3
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 3},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 0, 21],
-                [0, 1, 21],
-                [21, 21, 1]])),
-])
+
+
+@pytest.mark.parametrize(
+    "test_parameters,test_input,expected_result",
+    [
+        # test more complex strings with unequal length and set high cutoff such that cutoff is neglected
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["WEKFAPIQCMNR", "RDAIYTCCNKSWEQ", "CWWMFGHTVRI", "GWSZNNHI"]),
+            ),
+            np.array([[57, 74, 65, 33], [66, 68, 65, 33], [53, 58, 49, 25]]),
+        ),
+        # test very small input sequences
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 0,
+                "ctrim": 0,
+                "fixed_gappos": True,
+                "cutoff": None,
+                "n_jobs": 1,
+            },
+            (np.array(["A"]), np.array(["C"])),
+            np.array([[13]]),
+        ),
+        # test standard parameters with simple input sequences
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": None,
+                "n_jobs": 1,
+            },
+            (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]), np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+            np.array([[1, 0, 21], [0, 1, 21], [21, 21, 1]]),
+        ),
+        # test standard parameters with simple input sequences with second sequences array set to None
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": None,
+                "n_jobs": 1,
+            },
+            (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]), None),
+            np.array([[1, 0, 21], [0, 1, 21], [21, 21, 1]]),
+        ),
+        # test with dist_weight set to 0
+        (
+            {
+                "dist_weight": 0,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": None,
+                "n_jobs": 1,
+            },
+            (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]), np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+            np.array([[1, 1, 9], [1, 1, 9], [9, 9, 1]]),
+        ),
+        # test with dist_weight set high and cutoff set high to neglect it
+        (
+            {
+                "dist_weight": 30,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]), np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+            np.array([[1, 241, 129], [241, 1, 129], [129, 129, 1]]),
+        ),
+        # test with gap_penalty set to 0
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 0,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": None,
+                "n_jobs": 1,
+            },
+            (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]), np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+            np.array([[1, 0, 13], [0, 1, 13], [13, 13, 1]]),
+        ),
+        # test with gap_penalty set high and cutoff set high to neglect it
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 40,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]), np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+            np.array([[1, 25, 93], [25, 1, 93], [93, 93, 1]]),
+        ),
+        # test with fixed_gappos = False and a high cutoff to neglect it
+        # AAAAA added at the beginning of the usual sequences to make to difference of min_gappos and max_gappos more significant
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": False,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAAAAAAA", "AAAAAAAAARRAAAA", "AAAAAAANDAAAA"]),
+                np.array(["AAAAAAAAAAAAAAA", "AAAAAAAAARRAAAA", "AAAAAAANDAAAA"]),
+            ),
+            np.array([[1, 25, 33], [25, 1, 33], [33, 33, 1]]),
+        ),
+        # test with cutoff set to 0
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 0,
+                "n_jobs": 1,
+            },
+            (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]), np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+            np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]),
+        ),
+        # test with cutoff set high to neglect it
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]), np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+            np.array([[1, 25, 21], [25, 1, 21], [21, 21, 1]]),
+        ),
+        # test with multiple cores by setting n_jobs = 3
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": None,
+                "n_jobs": 3,
+            },
+            (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]), np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+            np.array([[1, 0, 21], [0, 1, 21], [21, 21, 1]]),
+        ),
+    ],
+)
 def test_tcrdist(test_parameters, test_input, expected_result):
     tcrdist_calculator = TCRdistDistanceCalculator(**test_parameters)
     seq1, seq2 = test_input
     res = tcrdist_calculator.calc_dist_mat(seq1, seq2)
     assert isinstance(res, scipy.sparse.csr_matrix)
     assert res.shape == expected_result.shape
-    assert np.array_equal(res.todense(), expected_result)
\ No newline at end of file
+    assert np.array_equal(res.todense(), expected_result)

From 4ba2c8d26615a478d551bc5dc330cfe180e4592a Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Wed, 10 Apr 2024 18:33:00 +0200
Subject: [PATCH 06/27] handling of empty input sequences fixed

---
 src/scirpy/ir_dist/metrics.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index d8e05fb44..77c915241 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -941,6 +941,10 @@ def _nb_tcrdist_mat(
         return data_rows, indices_rows, row_element_counts
 
     def _calc_dist_mat_block(self, seqs: Sequence[str], seqs2: Optional[Sequence[str]] = None) -> csr_matrix:
+        
+        if(len(seqs) == 0 or len(seqs2) == 0):
+            return csr_matrix((len(seqs), len(seqs2)))
+        
         seqs_mat1, seqs_L1 = self._seqs2mat(seqs)
         seqs_mat2, seqs_L2 = self._seqs2mat(seqs2)
 

From cf9090496e9ce7ae85e234342fb18c4489b0bb7d Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Wed, 10 Apr 2024 18:40:19 +0200
Subject: [PATCH 07/27] additional tests for tcrdist added

---
 src/scirpy/tests/test_ir_dist_metrics.py | 54 ++++++++++++++++++++----
 1 file changed, 46 insertions(+), 8 deletions(-)

diff --git a/src/scirpy/tests/test_ir_dist_metrics.py b/src/scirpy/tests/test_ir_dist_metrics.py
index 0902d7779..8b9994b84 100644
--- a/src/scirpy/tests/test_ir_dist_metrics.py
+++ b/src/scirpy/tests/test_ir_dist_metrics.py
@@ -326,15 +326,21 @@ def test_sequence_dist_all_metrics(metric):
       np.array([[57, 74, 65, 33],
                 [66, 68, 65, 33],
                 [53, 58, 49, 25]])),
+
+    #test empty input arrays
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
+     (np.array([]),
+      np.array([])),
+      np.empty((0,0))),
     
     #test very small input sequences
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 0, "ctrim": 0, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 0, "ctrim": 0, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
      (np.array(["A"]),
       np.array(["C"])),
       np.array([[13]])),
     
     #test standard parameters with simple input sequences
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
      (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
       np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
       np.array([[1, 0, 21],
@@ -342,7 +348,7 @@ def test_sequence_dist_all_metrics(metric):
                 [21, 21, 1]])),
     
     #test standard parameters with simple input sequences with second sequences array set to None
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
      (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
       None),
       np.array([[1, 0, 21],
@@ -350,7 +356,7 @@ def test_sequence_dist_all_metrics(metric):
                 [21, 21, 1]])),
     
     #test with dist_weight set to 0
-    ({"dist_weight": 0, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
+    ({"dist_weight": 0, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
      (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
       np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
       np.array([[1, 1, 9],
@@ -366,7 +372,7 @@ def test_sequence_dist_all_metrics(metric):
                 [129, 129, 1]])),
     
     #test with gap_penalty set to 0
-    ({"dist_weight": 3, "gap_penalty": 0, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 1},
+    ({"dist_weight": 3, "gap_penalty": 0, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
      (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
       np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
       np.array([[1, 0, 13],
@@ -380,7 +386,39 @@ def test_sequence_dist_all_metrics(metric):
       np.array([[1, 25, 93],
                 [25, 1, 93],
                 [93, 93, 1]])),
-    
+
+    #test with ntrim = 0 and cutoff set high to neglect it
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 0, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 25, 33],
+                [25, 1, 33],
+                [33, 33, 1]])),
+
+    #test with high ntrim - trimmimg with ntrim is only possible until the beginning of the gap for sequences of unequal length
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 10, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 1, 9],
+                [1, 1, 9],
+                [9, 9, 1]])),
+
+    #test with ctrim = 0 and cutoff set high to neglect it
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 0, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 25, 21],
+                [25, 1, 21],
+                [21, 21, 1]])),
+
+    #test with high ctrim - trimmimg with ctrim is only possible until the end of the gap for sequences of unequal length
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 10, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
+     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
+      np.array([[1, 1, 21],
+                [1, 1, 21],
+                [21, 21, 1]])),
+
     #test with fixed_gappos = False and a high cutoff to neglect it
     #AAAAA added at the beginning of the usual sequences to make to difference of min_gappos and max_gappos more significant
     ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": False, "cutoff": 1000, "n_jobs": 1},
@@ -406,8 +444,8 @@ def test_sequence_dist_all_metrics(metric):
                 [25, 1, 21],
                 [21, 21, 1]])),
 
-    #test with multiple cores by setting n_jobs = 3
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": None, "n_jobs": 3},
+    #test with multiple cores by setting n_jobs = 4
+    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 4},
      (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
       np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
       np.array([[1, 0, 21],

From 02a667a3a9206f60cafce9498a18519d77bd4542 Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Wed, 10 Apr 2024 18:44:08 +0200
Subject: [PATCH 08/27] tcrdist test with comparison against reference
 implementation added

---
 .../tcrdist_WU3k_csr_result.npz                | Bin 0 -> 6159 bytes
 .../tcrdist_test_data/tcrdist_WU3k_seqs.npy    | Bin 0 -> 136528 bytes
 src/scirpy/tests/test_ir_dist_metrics.py       |  15 ++++++++++++++-
 3 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 src/scirpy/tests/data/tcrdist_test_data/tcrdist_WU3k_csr_result.npz
 create mode 100644 src/scirpy/tests/data/tcrdist_test_data/tcrdist_WU3k_seqs.npy

diff --git a/src/scirpy/tests/data/tcrdist_test_data/tcrdist_WU3k_csr_result.npz b/src/scirpy/tests/data/tcrdist_test_data/tcrdist_WU3k_csr_result.npz
new file mode 100644
index 0000000000000000000000000000000000000000..75c88cc10ca3f06cb6d6014ccd5eefd805efaac6
GIT binary patch
literal 6159
zcmb`L2Rv2(|HrT5+L_tw;^G$RUaN~_Tzij-h-|V)Mn=h&dF@?he9K6YQudz7S5)>z
zsH}_|@w@km=;!;NzmLb^anC&;=bZQZ^LoFZuaB1MNqkxW06>H}K!7LD0~O>*000hV
z0Du%g3$S&xur;@IJLl-^jR&B_oyJ@Tz+QqKnZvi9U6vmM+*q9M2ZbT;1toAODHjJl
z%i{oD@8@{(4D&HCh(kvuS(pzD;uslu;@-IRg+hO$W-)urV8Ni4RmgfQb=aq8d!1Fg
zL}i%d??&UfO_gLaa^~lDeWG$ZdW<BZ-H9O8VzMa1Du=O3<G;k8d=#~(Wf3$km4BrW
zU1g_Js?N2=STXA@cr{7QK<Ad~+V0P$+s(X@&N-AadO1{?3{Bj84Cxn|z-*;Oo5c*-
z`1Ml6-kD-b2s31Cf(7@ix=t*zN=g1^e{52fvf^!6Y@Aw+oYm8%a|Z1#J{G)$@OF8}
zQC<q~c13>@tyqf9*cd*9X@pin3`D_2MGL8uP(YTgmE@XGc;a+XQ=wh~Nw#+4O}#>3
zwr>2jfr3-FiWL2r`eGG?XGqif60C%02~P?q(N!AuM+`ojQ2o#!z3%>iEAw;LRbO@b
zm;F)SzCYl7y%{q4{h{E`&%IS-H#o+iVfDVflp(OliO3t=xeVPY<BL}OQw+WM{uZ3^
zo8j%-U9<(TsJX6jdVfovij;1a8CVRO{|4y3J_~j7l40ArPEvWs>BknQ|1xM?d^{Z!
z`}Nji>yymrcy)U#QbY9pr&;$MCOtfs#3Y8B9oNf}{l3fs2;ohp(as6xEVKB$@MhZ{
z=Onj`*%S2DO_n9jiFQV_>^svyvFc`rZ_dd+pJwqWyqnA$o==5uCRpRs$m{E)s`(^^
zTo7T};R^0%l=1paJf~d3>AUEgcxCiaEXdgR-hl{i9%5x>1=mq-0*Lp0y*wUrTkmxD
zJl>Ox?M)IEJfw@rMEe>=57*duBh-xF%F^rb^Q9><+q4A$0yM{?(h>Fu6Np%3j6FRG
z#4sqwR}C7Wdk>kABW$^;JfhjaxtPA6k?kBK%W_;&Hs`$Zfp^4rtM!k~Od)}Qt-Rfb
zC9s37qK|lAoQC_UA9qXVWUsz_o5}V)kAAE#UFv&2`%iQd9vMmk2)*!m_d(2a+pr4>
zrexOmT#U^&ElYBJbe63Vxa6Rmrt}0S(dh7XUIM=!3;$Cki`$+xPEwg>ch0;^>4|(W
zhE7cFG81)6>6DzQxEsuG2CXIdIV+JIrfSgQdZi3``LPvw?JA+2LF;YNGAM(tXnCpt
z$i>dNwk+!F7ordC8Xo287r$7tZ^7$iPPt?>vP|%`%0_38a~EFdX6$i|UNsu$On%XI
zy2LeVVY{*R(>PDni*D9$t}$DZ<DeL?E}E<r{x6BUhLJ&Uqn&f`-N+7Q%Za)1g_)!=
zSc=#yDyT-oSd?!gBqgkYn}<MINyQbptlZ{*yW?f(C>bOhNmnC(MkhYxS^b#o%h;FM
z#akr{R%MyLRYb=Iuiewpw{a=3rjp@GT=ofCvU%VtHM=%nUX(mvM=Nda1u&m7L`9b;
zZ^RO!=zT^jPDMuIQI}-#6656}g<Ch5;37rx4)a=KUZ;vQPcl8dcOWQ=xv`{)xL&=5
zrM96s(fW;rkAlsW6LA~rKH@<M3kd5slXfrr5*^(O2`|nyi+wMk@S9WIUE0kO1fE~I
z6HRV*eoQFnA({D^M#g$21!x*{yH39annk?bhTvJIiI<z~RU<}^+NxOtYik&iO;X$W
zt%O3W1zWyarlR@1pg>ZCrpX7{1`QaIZ8z(`=6`iGZ*iA~o2)9!5^INAJUoIFd*1Gg
z3<a&+YCg$=q6~#MXh*wb&}5)!3*j`>y+;!VS|)GryJV)fY_fbHI7yTgh3q=}atf~`
zSW*Rb8=~~gxTTB_G#gG-?I@f>6PVJ(De4lYdXPUh@Kf?|B{ga!G7?pzOro<wSEQuQ
z)W0G+r=$slA;Ue})aXXRXNSFFzMwdGtJ}J_P#{5cn<gH~3Lg6kRWP3#Rm2LknNNdQ
zMJJ*^VS=VZCwfKi0cYZ7LWkajP_a(b{J^-|O`ZE61|FVUdxXhPe^`sJ3{6^p<jk`N
z;3qcf%y04pzS?LKiuZ^2_%^?He|UD}vj)ZO{`*d!)!Bda$L#oa0)Q(g|Gn15zBFfS
z_2OCLqDgJiRGYCbIG>2i#nM@!)Mopc^U2;_)_9m4nE9I%AE%om@&DhJI5x2t?E;Q&
zp|3!*i>r4-B`2YpUL8XB=%J=TG{HnU)KQ;7P#`p1-hPrY*{epymwJimaBHC$6wBiP
zJH<r|<bed7;-kM?aB!u1g?3pG55g_Lh6t3nlmxs@`JC&jKLW*)o$QM$#1yx?6oH>2
zWN8bC<k2Z+w1xO!h`enFKKydi>hr5*R%iFLK^15B^dR&^D-dk#G3TOQQuW@cGM+iT
zX3_){&ZCOr%9;fWcE09+7gQje3m33aK6{PuJ<%ZSSw8F6IcY`lxA4i@)t>*V$Z^6p
zSbHc#c6!&0vzYf3BICo@d7(uJ^qH3j2H}K_Wp?$dnch^&XmPhfAZBVRuyrjWOj0zo
zy6e7I)>aQDK_*`WG3Bj1D4*XPe%3g_p#991ijXmM;Pc1k7nz3`mL*%WHYT@B2FHGj
z<$so}=~%Dezl^5Dcb5eBll5szio&ani)}TiP?F0F!lH0NJB;JZ+b|6>GwS<8O~kIy
z$>lT4%5`?!s5K5kE~;>4Ssi3thM2O9$|&dQB4xP`kK)kA;{Al)r}ui1>cDKTcdVx+
zn0YZSlLf_1U!5yu(n`1pScyHz89Gds_Eh+hQWm1grwZ@T4T#X2)IFmc-1|sHq*ynw
zw@6vwrf%rVfm|ta8znBLRRVPz6`?f3#rw9H<h%<Jv{jY(@Gc<QJx9odz?`L9lFQ?G
zv9Ja*bZwBC@-;H_5GlYS-bRN17+ZWLSJnEvcm*z7km#mj(y~Na3yU`Zx$HNaG5t19
zx=<EFY}GnIgYEijgXL25t2GzBcFX=>x;dy+ztA9S3Bv`uY9w44d<aUBYpB<!u~bG^
z9xi9Jb5iRFe>SiIou)7-TEhSUknHiNv%Bj)eqzsJ2lfi=$Q&}SqSU7cjJH?%O?6i~
z80wD9TaDR&tbEemtyWo(#-mYb2eY2{`$4*BynuFI7dPo0PPy`2BXOQw_UFKAp?`ot
zpxiZgQho+X{TB}3>bmd(o``!cj0!x`eb+N`4Y=EuOg>LZ&ClM?^GLPK?FWnZ3aTvw
z<W_h0?yeZ;F1IXI+W8<b%S5;|f^&Jwj22rGe!bWvRW?31_x|Fd#g9ee7)M!=W3F9V
z0T!3aZvkc=ZkN$;jZ2qAf!WazX&Dmj6i(WFhGvM2vWj*ZEFqs|x=337jCSf7y?my>
zi)7SqYNv}2<g=~WO3N|5P06;E)lH*9xLxZ*-njNQO>|Gx*4S<r)Q3Zs*K=|vKi+2J
zky%g%GB#+1Kr^T!EogHY8*wd8&4M~UwLz}{n#tH@L06I5Xfy-OCg8WAx{qsT`wIiJ
zDzeD!QS?~;u(xFXI9y-)#Z1g+xYhrZbF3W843Q$dhgf5oun^v8u!CZ`<B~;cGzq)9
znU2-vaiW}p3wpI*s+kmV^6HcrT@8rwofI<J?5N#t6??Nfr5(yCY)Yhhp@Ak&3Bf~x
zff6z^DjdHrD@)5#$E9#{gBjZ~V%3RD6V3y(2H=<xVf8dN(f${%@9PqR1NDO<s)cP@
z)QLs1BDhwCiQ_Ws?f@3etubiMk@jvsj2BryEAfiI3oiLjg;B&#zf*j$A|PtJfk(6=
z(ZSxDaGwKMCeJHg=zvDM-HXqdq|k5+sn~9Z?@UV4zi6{8SrWTi-Ezy(Io|No%qh?6
zRzJ3-SVhSh;$E+M)t!mcrQVH(qsv(oz9^b(bi>u1Y34Dswv8NBT3Ha6BvqIaHz!T-
zBLtT?G)$S_j3)F&6PFaRk`gC%d{7=Y7aT&w0mg9va<4OCAmAx73P%9#a>h7F%cGQs
zC=g@T;?sme*yTqG<iC5oxqkt%x~&3bx2=roJzI6=11E#wWhz<E+kNrI-wUuF1^|SG
z)$i3Q9A8CRvP`Acaq|cT7pa+x3#IG<RhPLGseYSTQdxo}zcs;GgJ!0|rvd8rre{A6
z)ob`qdmdrk6fTx&hE{mJvW5#bT;AuL<5Z1iNREvaq}RJ%)p$8gtY-Qm`(_)j)M{|4
z@07$t;nIi1a{3KaSwskK>O<t~60Yb=9UHrEPjo1yad4Z*6-#EpO_SO#d*$h`CeSV?
zg)xM<PQ7OZ`xA089uQ$!tc5jzJkKAgU!T{`1`6j8cN66h2oM*DIM^H%_v@ck#AI;{
zwC5@q>!g9u6cCvu!#;@a1q#6fO@*jgGQCT!SF?hq!@DfmRt@T8>B>{U{FaQJ2K6de
z%F{$1Te2<;*D3g$(xeYJT&XP2Wc`A=>Ob{QVLK{T6m@cS!<f-D)BEarbw`(UiBHpP
z8{TztY*@maX6*GoMyz)*F}wuq(hA}sGbL8^;-uUc>u<mJXB__zg8gNy_d^QAc>tZ5
zAHoknQ8sE=zQm~ZUK0L0jG2D82bgbzOWe7Fa5nlET4Fpjrd1v?&<riG=w-DVobf5Y
zobwH=ZT?;SI$%i^_YxnQ9aU}c6MJ(BiJ^9<?VfWU6``+>bIJRj>ln#udbhY4FGaks
znGjg?>Lrv~3GeltIJ>*TKDOBtldr`mOA}It;NXl4Q#mGr?MfOXI!Ii(V{B96fyHA>
z9R}3Z_hlLkkv%ZL`_z#uF7oIP))2z7#yuMi(reUeSlFn1mV2ny1!jHE5%c@3dg+JI
z-2gJP^KuLw=R<_<#YLJ4n=@dEFCbja45pjXAzUC7_=j~>)&IAs*_#O&4t0oDRs?2l
zcPwIC#!#o!y4&4AT+3*2(QroUZr9sloNDcbUV?M3;hjbkPFS;goKuaQtr-XJ{!{!c
z9dPm6t9lkWl!-P7-UBdi;>9CY{<Yh~OMsWhjb*%R{ZvY~VR8S^GA0x9X!9X*1|8ZM
zddq>~9H`dOO@qVL53_5~m*nxR@FeRT;y=)S55;o#4?N0kbWfJYfKMRiz~YgWldFTN
z`#v6F&teDm3hYS#LO;(;+1{qrqV*&Tc`d^z&oTdjF^&D>lH3PbNhW$a2`Tj1CJ9Iq
zYERWJhz3Cfy)`#o-U&2QnmEaG5?Z_x)Fdb1D4!*>;ti`{{&g7UAjUj^n~kZn<^KJ#
zC$R&20d^dFf5&*+od53r3gUZD05~Tl3ItLD|9FUiuX>UACjfvt5$v`lEx^Lm-E{vM
zu;;J?`#pAiO?$1;c%HJgv!khQ2=tT^!Y3;)djiBQSstLzkWw60U?xoq4kK8mt>KH2
z#vAFQDXHyQmJ#gKbTMm{+Wji=(Yy9#b<LX*h8i;CPTO}Q&!=s*v${1piutTp*Q=kf
z^R9Y)@Xa?FptxS4wD@hI(8}eKxFj3<WuME-SAvYw0~Z&jBq=s-jV_V~`%!;?lj}X8
zxLH;79ai3>wxRd<`}~&P6mQi?@TE_K!i%%>SKsO~wBBBGOTWJCwiId@;#;&uT*bTY
zBf2%@75n0~EK@;$eyRVBZRt00*8Y7TDHG#AzVZ8*8PUi7Y0~%Vov#b;-zOH$+7`PI
zxu8oPO1@t^p>g%chI?PkUnWtkz51nn#`E6yiY4c7>CB_GsFHh|Yn#>s|H_(>*!0n_
z;(0YR!&tH^Qc?vQpqV`NZq-Pf{O>>q$sdD41HE^%?9oNDBHxx7t?V-fJ$<g<qEppZ
z9A85e|CBG@tg~QGAMxD~aP1YUUI5XOj0}5bw1}LeC6Qy8-*~a98rE9<_|7h=`Bw6M
zDcLnty5Ji8^%MUK*B|=4->I8r)jIWlr)i<<p+{$OdgacE2DE@<@|@qfsm6^OV6q*v
zX3134?J>SvaGnN#nrDijnL#1XGq1Zotx@$O#dmVoJ*l%v<l02O&A%GiyyR5k@lafG
z{Ed_2k42dmqml2Yd9_sWPS9f3$?==qxMhO-br%o&=g^k-pA(O|lfx75Vm8Dv6MuP?
zKj$5l?csUZG=M`Q{&U_@wH=<vfq9z)?fp6Js6Gx)%g0RHSBsV^0TBBg0p^De^X5}@
H*iZig5m?LE

literal 0
HcmV?d00001

diff --git a/src/scirpy/tests/data/tcrdist_test_data/tcrdist_WU3k_seqs.npy b/src/scirpy/tests/data/tcrdist_test_data/tcrdist_WU3k_seqs.npy
new file mode 100644
index 0000000000000000000000000000000000000000..fa41fa05dd198315bb44d74df718712778094da6
GIT binary patch
literal 136528
zcmcKAPp|aOcGdCAlvk121t|lFlnG*jfB=F-;lCK6hy*b}1SN<G;#Kg5*&p9$wcI|n
z%T?}9N&eZVkNvGxYp-4I^W6LY{^Z~P@t^*uzxA8{{hR;u+duz{zy2?O_1k~&d%yjM
z|NamD;J3g3d%yj!fB9Gc>#zRofBfga{Hs6z3%~fIKl|^0{TKE5U;np1`=5VNKmVP-
z|A&A0_kREH{{HX%-`{&U{`Noq&2N75kA5Bhy3YUX*Y&@u^S}G`v(NtI*U$g7&hG!C
zUq_vWBV5mV#u?MIuKz<F|K``v|D?|5GXH<71G^jH{)@WdU)F(vhie`zXTLMwozA-D
z7-z2MFzNZ{b(jOkXY|oJ(^$WLjVl*3>;AWO?ZD(O-G0BK`ON*(dL};57+<h3SG}!U
ze{<$y@{`7#qc>Xb>Q#>(<ID{Y!!^xk{XXNtiYHetrUO$ZaB2KhZ<sV;%5Oa3zs$(W
zT`op?lVzIK%o%$}_lTQ*Q-5>B>-4hE?&>$<rdIs5c6^$7wAyncE`9Ntw$yN?AL{mF
zbX+XD%|S1JpY}y`hJ}r#?^ZHfx4p(l?|-b{V&X@4d|A!Jm5URG?8++5K2yh4k4N0F
zal*!3-OK9Mw|&RD<q8j*&L>*yu^~51dD!V)yo{?4Sm|9_`D-5a^7GxkP`4c8%JnP`
zK0kKrRgWCw%*AxX<Mf4jc6EoFcV_k+oZSr6Bd-yzezI;_Wy`)0{o&v3VfDysgezn5
zGj(#_*&AQbx)^bJkv)Re-R~s$BW^N5uQ_)2vJn@{!4rP+0XH7+?ho-7GkS5u{)9`H
zLvC@*k=}D{4s)*b+}U3&Tx*v#Yru+D*DK7r?X7O%o_ko`Rz1bbxa!S1&pF_MId|qb
zbLrB*)=#q|OrP!ea^;34XSAo^!e7;V=6W`Hz<gS>di1SsxaWiRtex)UVx^8(y|}bG
zd!A-YIkwk1tAXFmM#rTy+IeFx%+(BExwOy~F5Wk|>MaYj@)wt{K3nm%Zac<RFYZ}u
zuCaH#7;)_l^jSaM(eFC?@8-C2ck@(J4ab$MX2}5$)Zfp}G~VT_ZaK!4oBbC5Np62(
zfApISq?h?jBdzJRr}@pk6EAl2RT7SI^?^Q2-%H{(t*iT3-Fl3Ut7g33&F$}~Kld|w
z<3;k|v)x`k>$Nv{KKny@E%WTu?#$(BdW**NoxH{ORj;pHdetZz{mu2H2S?oGKrZNe
zlC>)rCw;-+c#KUqZ>)zevz{Jt<L6VYHRW3$=WloGr|Vn!#XV;t-%&o@AHt=V$G)fY
zmoK!dr}OUn7*|a^D`#qzY<!wu++=`me9YY%Uh|MOW76#$&DiE$c`skN7&L|zFU+TY
z=5gN5Z#+=9T~F(g*Es7{e`^g-hQsH{>N}`zed`Nc>vUK@4*kyN+hJezhKrYM#G9Qx
zV#MVG)>EzZ=no&_1+CUD2XwCHpl&_JRd4b|bFMM*<MD2OX~m+|dxFk*E7Qr!O8u^S
z<7?^{o;g>0t#19{uS{X4hiRtMd1d`|+cD01|F}NEOdjGd?v7UAE7$Y!)OojFn8uZh
z6V|S$2k_T_&Ro2_i+Rb@r*mAnc)Z}VEW{H(n?pVM?mT-RUAfjSyWUA*<7JOqt>KkR
ztMy|#=>2p*qf6GX(_7fq@JW7gd3V;!k8kKrFQ^$lUsv{ny6tUT{l!g<c#xXy_VSfW
zC*GM&Z~R`>e&wov@{1qMna|JsHm|IAqvP_!`#8Ma_wt#Gb?z_PaD7JKPA<+|-qDp_
zP&2;zd}r^i+u!yx+GOjTU)Pws{IlM0v#0UlsrDIH-QqXBa<b~jh^u$wg)?USm}RSO
zdyNq{dm3K$hus>ExO|oY{$j88Lf!frBQ8&|<EJ`ij(m3K$Be5F%)tZt^^NoF{Zqd_
z#)z9e$C<vJKKmW@XJ1%z>SpaZcG*|G;qn(V`OQ9@eC+y*%SX)kf&C4=>1S*?!&fit
zJDqjQF-AV%;^FM}!vU@TbX;07v0XdU{i$9o`%dab|IQxK>AmLQQ?BP}b;wk_@cB;d
zu6o0LlC7HhHAdWPzeRU6tKD7wMqKO7zP`hHiLHwfmj_RC5O&9vRXzQr*RZc#Il#02
zZ}wZbyvXr{Khm45nZsTY{h#C)D@XR9CmxUV;wHmS>(_DREIsFXH^&{<UKT%ScTGF~
z%Dy(@;&}es`L%BPd{2<Ec;NbOU#MG-G1BY3X|3sHb6{J?%AQlV4f`#B52SA2(kuU;
zWVh3+?$$07YWKwBk-zz#{)AUDj&bFBH_)nX-^^M5O+d}g`^{<&>ak;7_2S~GTRfIy
z^T?f=u6z$3aq&_+*E_tr{TNqn_Axvf9Z&YO5to<o0aq>@=5jXAT|bYwo_EmBgI#7l
z@}rNjFw<}T9Vz?7C-0$fQ#V?1vq!AvZR7(_Y1SKhk7itRt!nPLVLPXGJG!r2+Vlk$
z&T1djt#9MX#d4_IxsUmLm#w<xHAdVpY1Xrtshew_-;EI$FLjfTCz-f%>9c+Zc4~Ik
zyW&U3#p8#wb(`apoY9LR2lDHh54*V->BWl1r&$|uX-+1b&Eve|@ra8d2k`+{zg+V+
zt6#@eF9&8a@@d`hvY+uc*L<*!l|AjMSKVnmd+?+;I&Ss|bEIA?I_vghjJRhePrlK|
zd`PCOU(I|*^Cxw~%-)ciVe<PEAI{t-IjHH6arHM`TH|f%#^+ft{Eqm$|Bkpga)3iK
zUv|8$+kbq2pcm5_KR@rXM|$PJVNauzC!g+<^qqTdXRoP8Ut^?Kz2qudozr*Lv68nD
zHypb0`9Sx%mUp?ha%oK-T$^LJX7$LAJYdm9tGXq}`k4nS{{~*SwcL2&@1tbST&uk?
z^1<_T(|6v7cRU$!Js&@4#ZH!2b8+QHQ+W8_=!-K~4%BHUUv>L2I=xT(Y{sR}!Hej3
z{v;2c$8^>o=?S0t`Dq{H>4EU0{nI|ivwl2$XX|&~@qARbb9SS9$AgmaqmSjEr}SlS
zpda^>y5VBrsu4ELKGSzAdvDz~)Qd6VnP+ue{q_GXiC%U3mcOa_NblV{;=Got-sCjd
z%KqbXb6Jbu-T8dx%9q;61K!hKt4H7J7A{TxU5d9pbHtz3tc~=_n1lB8o}bP04Q~3F
z{vBrBKJ+0yL9b)CW_MhASL=^;9-Y5>OHVt0Q!lM*Bn#1uzuFs}UQFIT$yhzs)=j>i
zYMpVj-{=qDrRa}0E8o#NAHvQ4BYS*S$Gg3uZfSkM<pYh*xS50gyBf~)%9-_ltKY8w
zc26(;s~M|Xhd!M5GQ8}UwBM~C(-`@nHrdPKt)Kau9=y{(;;I!6e)dT^?^t!qYjj);
z|5hXiIn*!TTkkSA;y%qlP5T-hmquQEvu^aJc4rTKzS3)T+b|EFcVMa)Fa5llvAT80
z%{x9>z?3gJ#?E}Re2n}Jj~6n#%BovmW5&g#b!UGJxZ~pSm3Fz<?d6WkJ6yiz8vm0#
z(1?e@3w(PKzdzyHYiNrX^t%2uXW4J%iEigzFGl`igq3~LXFQ27^v^u$xM$vO7HY~L
z{f6O4PpRQI{0$e+Vcoml)+0l`az$_6=c}ILWnBHGH{6{%g_rY)OKURh+WsKB=3CA2
zj4Ma<f2(eM)o-*qJ9f0z?Zdj^@*x?<PcB@W<8H4Q`QRDPTYuOxhqvm<h@1btINDZg
zRnxx4h>L@%ZuIfMXFKnW5!ae=9p|3-WS^9)v!~enPCi!W8tKK|t$#i8Hbz{Y#s{9t
zUA_~q)^NrRKehAj;EA8rbT&p@&&N+Sr1=~DhMRqXA1nP;x4n&#UjHU#z1cH&-|;5s
zxIE+qAM{_|%QF1LUu(v5c>c`vnf$HP@2WRkzT!k*KA*kY>-N{^xcqgb_i$IU3*K>M
z%X+b}!%rXPo!aHN<Kppx$8__<&+(<>)h);9xX~AO^1SNBh>Ia#>gJq-2e>QyaK}A+
zx|6S(_823s+FQS~UQ<8wxTaan?);c>J&U<pvwCEhGqtvMnsJk@o!%Jf#SKG^==Jm6
z+Uu6r=(uOCuIV@5U2X#&adBh~i^g4MJu>tc*J14rJ-6dQ-QIF#DjDXl+Qs7)KRO@m
z3D!^RZV%TZtzKLjX_g5;ul&1Y-FA#lZ|cS8^hSK(%Uv!yZuS})X~s`~-0i(}YwH6R
z4sUmK*6qjW^nQ{B;EpQ?*3QRo?K^nkI)5v_GUW)Lzr5bzk9<gWWo_l1Q@8yX9oPD4
z%)fo^&N1SKr61&&AG|Qv>i#q0T04d;<hx|}@|Qf%_MaIyI%%9S>eoNUh)bKjjK90S
zW`d4OYxK&;xAbD-(d`<S=c{{hr&ql&Wg_g%g|B|Ic{(nw4z)k)#&)OycGrU)SG~gE
zL2{OP?)>h!wBRPIbYZQI5jRY}-uc+gGvlUb)hF6{edmF>I&L)MB@f<pt9{aO&sj3Z
zt{)?A)}6IGt6R=~e^<kfo1F0=`^Ac{b^9?oZv2%qS{*x?yW`SoPxv;MZvSq;)73qv
zZXM<<S)g^7hiP;^;Hg>uZuwRo@Z{?0J7c6bd#;|f4?oe|acRww@1>vSEFL(^06ymG
zo>R9D>xP%Uz|8lF9e(EnuAHTxztI=;>IFG-Sii$uxxUNENH1-k$K<PX_V~MftZqF<
z$K`2`T@QdeZhVkmz4xhJ>$GmQ-|gLc<Tb8(!&MjF=6mYSo(|q|WsGJAA85~9Jh_`^
z#MNWrdIz6*_?UO&jw`d~+1VS+*>TT(EX#EAV?}G-HssPqV|sbl+Zk8A&i!^*C#>@r
z`9LqOdO6fz?>Vpj+eXLbrA+W5ePTXduV^1}J@>@Zdh9hu+<0s4^wO@5&a3;xjEm`@
z{TunEH(uHwR{X5n-bSZ4EP7>v2j)`4{H`wB(Q#=$?-RWCnPa8*W?cH7=3>T;#&71q
zdS%Kvd*-SK^~iAF@w|i1CmE~9T5jI+_<VMJqc7qCmhXMLS?lT+?zerw_3u)<I_r^R
zbb4h#Z_&Gxg9BRs>A3nA@BBWdeRUt|xa##ckuyx+SD(J)I=z^DNG`I+C(n29qY;;<
zI1ZW66Y7}$;Bnr4-M-Xu`H)Pg7bfj1dvDz~%!B^Iv}OnHZhwsQp7#dabAQlm4!UQf
z<0h-Vx4Y)cYR$~oan+1&tlf8fJu>8CTBo|J*OTw;BfZI3*1hU!NqCKpD-YI>efA=A
zJ?RVjtY593XsySF{$i#6_6K>;*EIV4E@O4ekQ)!u8&C9){5^ANl?UgYUY>FFggRkn
zKhJgY-F<)WxU^yN*ZCX!Lwrr`%yE~iy5((jdaaL6yu35a6CdvMkGQ;)B{{(MGe7TG
zb<1mXTwcUm8fneHcRkH7eYko-#-8+Erx*L|uj}OZ>AsE`uJt>1_u!dcKJao!XWf2`
z5tqJrfGu;o>={?>_`y$^2-_O&GBM)fV9IPVmb$I>-IeS4<kp!FyEUsve(b-vdH<Zh
zH}JqbD<0HsEteOmALi=%F^!QA@gnS9tuwCm;?tb<pZg{C?DmXKFJ^k~8#%+vAq&{F
z*)!$NxqJ82uU@0mE5G<W$dSIFf4489H9DQ|-jQ|7kJ0JPo<r+5{l$91i|~<N?+AY6
zof)5bzUuLai^qp=>s2Qj^~952tH*|YocDV)udW?0W8_16Aig?d@9aGzuKLKAee$Uv
z9oHV5p3__ULmgK=>A1YW#!){%<KFde-ExeMOWzY;e_@At$a^C1BRYMysug>rH{9&q
zyiiBnRd&ZczcZis(Q(robUw*i$4%X2jlbD*R<kkU=6}=E$Gg7PBgeRM^BvFH9q~c!
zcCtL;s(E~HrrY}E?x_#?JBUskS;O9a$Bpz_|M^{!Joqf@Ft0hS#pl*>&UVM8kKUcV
zrfz?Yj{C`;4nE@Ym|kn+0nN@keLdoOULVjbTY7J2Z>ZacdTCXc)W}({BxkgnFYhJu
zU&lx<?wQF4YuM2~;%3jGix!9Pxu0@*t7g&c=k&AB^Y1d|>fTQyy*TmHbEzW^Px*A0
z>k&6P;|Cw?^H}DTjb!dx!}Q#hOOv|$efZW+BPNgi+^xTUb&QTn2bOcZrkg+JbIfuw
z;?hJLW;Cy`>h{+daq;*Nt#qqbaz<x-tjpo(xR}wKKEsta%ul%S^)6F2&5zOPwQk1~
zrb#+3Pj~#pI**PkpS=9GUf$v+!)h1(pVSRkhOwQKW1p|&V&p^KL&+L;_FsFMdfsUs
zakFpH=D6Eq>Xsj4#HA^FS=i}~yZt5;bX=Y~czO2r6E6RF%U5S?2koD7vBOOs=$7Zy
z&>UU;Mm}I-(aP%`opt*$MqEsFayG}4o||#4+nTLc-#N?n-5y!D4EG&6)d`bUemL`E
zg;}??T+h?1cJX&76P*t<I`WQpjqi-l_bv<dt5z?q$ra9OZZVBcFKs-)q&ZrB#&@65
z?DLW5GcG31Y`yjC;TUnPKmJD3H};)yvsQEL=D5>~{iI$oWyDR*qI<PgVLEr*yo>bj
zlfD~q`I@@Pt!vtM_MeWM_q%iQmh(#PMqJ-<X-qFX*(*m}>&3$7N$MA`larb5E7$vh
z*6i!9&5`%V>V7%m$^tGXjnT`O)%|D0jb=Kr<t%HbceKyc$8TfA4UaZG;b&}feBuL+
zgGL9QBkS;)-|yBf*~Qvje?4-Hkq@}(Gg<TV`QH`Ib+<2fT)bp0ea%~W<F$;Kj~}CV
z|A?Ec;_mJX^~f<sTse)$z6;aCm-sW%SHFH6SFUyOj)(N_^g`YK8XZ@LqB-CF<b${7
zvW{7gblhY%npZMVQ@zHF%TqiWzvVBz$*^l{&Y2H)d&EqyyveRQVkXDdV9uRBP9AXR
za?Y_@vwGym`*_6l8I9k_Z}ej6IbLM1;K5yQ%=}HQWX(K&Ua48#*7_j#G-9PruJ3YC
zw;ZF>d-fSW{lSA3{)}tSwQl~>j3*P(|0HMhhRG9pcRcCzX3x3XziYbTm~r)+HOs>8
z9?@~LH{{*%t(=9+2TZzoKC4yz`ZZ>Hvp?+AE8vc6{c`J!|E>JuS@#pI^;p}-@l|%S
zKl0cbRy^)}2zU2;PEC7^J8o)6<GF|V<FjtexH#&S3}cyR#;ac+<BluGJMW2K*lFG3
zv9ot3t=<!#>}Obx_$za?WBSbX%;U}n&+_)$K18RpI?<av_`Y_xSIqRr-_&i@gPOLj
z8=bh`?a}@Xz3PvHxx44oBSXD7_5u5DdV;6sv0vO}p>Db5(u=+0!5x>s-`pqpz=L@4
zB*UYeg)3LG%LmshxvkrdG2`Zaxl*U_a=znczu9HaxcSaQ_ip`WTp3Gm@OSmy#*2<C
z|JfhxF_=5O(Q(fn(E2TZaif#J>PP>o=1#B7%CUN-Zv0+(XVz`4zu9kSezK>{^kQRX
zU%&g?s&0La8JDMWv724+JFZ&MoSwdWuVES;7f(;@?z@=Aj2pIda<}U7jO(4rV;<1G
z!|%8}#lduzTUz6b{_|Ov=Z>5H+TCwzs$<N!@<l6+^zG!h<6`FhCd29L)t;VlvE(&A
zn}>EDrN4Llji<CZ?rK!G{1_v>;YO!y@MSd%Ch55LNIuwaciumB`!PChJb0oNc*fNS
zyvVV$ug$pfMX$Qi&ku9>EH&tQZpMwbIC$UEOK0*c3oAP7wqwln(kK%=2siJKt{)C)
z{b$CdPjB%9o2Ndr4xY@|Gp=XxobhIy`t{rBxO~mNgRM`#(Qg<$aOU$j-a~xEp_67l
zIDEe1ZQa(^EnK`Onmd2xORf02nt{6YH99U&`QVT<`ggpTapT3#d$^`Q#*E90_>qj!
zE`!~Eb3p4q9hax^K@L*CWa24TcEjEAwH`Z0rx!PDdrfj2Kje6(x#OO{o8_Iples%C
zpLt5}Oke%_ZH%~{kGFDc?X;$5{C3Z5bX=LGRnD*-JDKRXJk=NS8qI3uGiUS8G|#y7
zI>L3$AAa2RM#rTK59eEXus*(GhP~r$JvQuP+^ku?^auV<-*rChFlx#lW5l(7Ivu=y
zGRKUI&jX(9^l;ri^f!9rvAoKbpIy^G%WnPpmYe-+bw9^6Iv=c?zchZ+U+d<F{`e+0
z9@ETA^Ef{BH{9JE)YQl5{0)~6PuO?dCmADn#-%ek!{_r|ZtIp~%(%SBp`X2%&6WDi
z=DFk2E5lgH#){?{H+#+#FJ|1Q|65@#ZCW?)jA;GV{-9oYU#3ng`xvdp%!hDgZ1w$q
zKo_`p_V)O(n&UgY+1q6d+Z=iC&+<0oV&*-sA9nW3j!RSez*!y5!-tjn-Eq^$&UmXn
z&$!82zLTsnYx*{4e88bGTKO=mRsH%kI=!FtF!+p{j5&X6KjVS@K^AtI^+>B%pW$Ki
zI<;HfD>@%=Q@2%CJ@OheE_V1~&)z*RXkE;>@pqNA%FW(@FT>yH3;O_8n7iLiIv>tl
z9{c(3Z??MSHSV}Od*cgQ7c(wD&&N(CW?bH?odZ98jjvZ`IqA5V`F@7WW2{w=XIy!}
z^S#V<-XAM_<sFyqyeD>a&$zVGnarhzEA{KRv{|>~$vb1l-R-fG@ES8NAFMg~O4e5X
zjeW-T-6mf0VkHN4+c7$BG^!h(T+pkA)^WFAtZ@As-R?ed$EA(-Z*pZ#@A6_d8#BG?
z=D^?awr+3tNjyD~eF57wKlpN&jZSa4>Yfa{Ud`K#8&7lo7T3DtgY!4~jSpC<Tlm;M
zlaraxGaqEv--F-wSMSQis>ZtYHD-FP)B8*Y_^_I>x^<dg`$G2joh;0_m^3=*cF?=J
zZ+1T5Vx9Hw)UV^lQ+j!@vgg)quQB7wLDp?QxZ4w6(7G6LWkByHXS9ZI9V`5fo0{Rv
zFt+*l?fULJRmb)0H48hwbliM@+FgIiImU?V`vDKF8JEr-t#$jbZu$Pm2X%6Mlbd&@
z{L-yYR{Od0fxpRGbnj>$aqYRuSu)J)?D2fLtLKbs&B@yCJFep<zqqvVdZ(8=?s@&#
z`I+t&pF1u;<FPZAJkYMM@3fD&)|~G;;q3Zx$Hh~BbJ6TNpV8m#ZFgM0hLddFc~FlG
z`<OQM!Are-=4Tq6?=sTq#ldu7$lcC9(Q&i>@Zte>=V|@wHD+9OO5T#QmA|pTJ8pRV
zNao@<&3F1oT>my17RG9=>ekojxU^;8*~vk`9oPDw=)B{~!`a(!?2~HG-<`ZhXU-!Z
zaL@ZMe;qPr9kL=1=I(mB<I<X5<1?1?svjdRt)9pAPV@cgF0XaV+vvDB)*j8y*6d8X
z`O=T0KIyoa$uEXpS^e%%w+{O_HKU6rJfAu9<mtW>oiwMP)$A?<b<1mvd^mGc{~ady
zjLU;voe@0a(jC37)qTZV>^p91M>oIuVc+QbtbTQj85dLCFnNNx`oGK0xTCxmdTDgr
z=;Q0%nnx3L+_Rs!`Zu-He|Mwf@<5%QW^Kf!E&G|e(Mi7}n(y*9<Hif~(2cpePjuY#
z-r(9CJH0&P(nu?wW7UJYb=YrM(TJTII@5jUZQasx^{OLzFb~ZuJzTd<xzWAD!!$a7
z=}I1Gj=!tD*Ky^EpHG<eSgY4w#?$P-Ij?4-(`ygF+UbS5{WV71?7Q^FPjl>ea>u3D
zJi8g5apg=V^4(^KJ>#Z!>5bLDSxU+<58>G(<H3%%cii+v{LmZbN-aI#jm`&sAp`WL
z*Z8}#=hSVj54hI8Q!BiTPOp5)Z}u^sto9mq$9=kfOk>8S^_-2JJ)-041>A4$54@D8
z_^Wo`$nTkJf2Ud2c4{~B;mqalj<0q5F=kv$b9|G_TOQ-?^l#mMj85;F`^4iJ7aNn_
zZ`Casi@)~XPx~0_%ylipKL2DtJ9F`#{D0hL{^~JVvuE;o^>5?4b-3?DZ@4t8+qe7;
zpTB(d^Xgtd^TD1Loh$FOy8SioxICbjPI_10^)qgm_J>{F9XB<5qP1kSdB}GLTw3L9
z$Ky_K-fRAT=Lh{eUfgk?es90y^20v-Exj^@$Jco5v(=vK^kO`D|I}m0=(zbVjjvv`
ze<KgL`R?~*PaEmQq>rx-I^*$bj#s$$-EZXKtk*S-xI65b-t+{2({rm{cU&3M6V9uw
zy7e_W?zvvc2EN1Z(u46y9@RHv#?@QVi^X5huk7tJE*~7>VDtH^tv>oOX57?jXMfBD
z9hXM>ctLaapH<Br7biVu-FQ3ijTx6#JT>>;P&eyX*(*D)?_`*?CR4aPU-6>j>NU?N
zXMX0*>UXp|t~DoTuJN7k_#OA0f#i$_=bow`+cD$nS^o0Ed8J-;+c7#WCY^NeGJ$7Y
zb>nCD1wXH5xZ~2e!>QZf#*C{Uu%CF`ar6JfOOCC7$LASW{ppQnY`wSgJz~U-Rvfud
zx1FA-+lP5LbM*rLPIm9O)^FWB-tpj$o4sAVzRAt^k@&DuFItUR-J+B3-3)i!d{5vp
ze|PKGap?=&-jjEodAyVE_PCCltUcLtYTBW{d51me2TWt67dKhpEkC|dH(Z|bGVf*m
z_G!*=X;dpZ@nzT=R@t2o=PZO9-<{28j&5{ZdpeEy4lH$C?Y%2Ee$vR(<m@i5b?e*c
zxY3BevhFu%{V?Ln!g>8!JO0emGke5M2CRMeJrQ=t#p8i~<}v+xWc9n>m5YJr`Rq0M
z&a+dy6>fSsKCkq~h#RfR82y-^^c$Av<8Rpc4*WC^IQ*n9OxL_#S%2MjjFAtx4qTay
zub<>M8h2`6Q~wxOy)tUe4%x-0clG~YQ@0+Y<Kpo`7E;3<KRPa+BR%cf-s$I+I?lLy
zH$CA@_iF7rt_&w{yUcp@$3B_5#Z&9|S#tM@56J*#_8#l^vorQx-;I30m18=cX?N{>
z_xEeva*U2^-L&$V#&78jH~q&0`C8r6W_q!{&CUCGr^iahYmD@!pRj3ulH-nxXD>I0
z>(zYKt;guN=?m;9{oHY_-66~L+B0ch^}FNdZ+YirST<I3JmTt68Xe*5-TWKc${x{i
zv;XKhYtP<mE<fM(N5_q~YDPb;t{uGKNxUAtr$=0x)XVb@>-Mv=ocQd?eunG0C%k&>
z7@c11c)6<a7jlV<sfO_;KReI%;+bAG&-=nT{M2jabH~-cxEOb}uUmeMjvKA&N7pX<
zj?340fxqHG-8OvBV@0oP$FAm?UTc4v;TiYbx2x+fDaYuz@@f6q!_NDHYnkI!m*E-L
zGklG|Jh|H!>Xu{7xPKEqWS?B^+vv=>(`(JvpX}yZM)Gc&`Le=|_8qTRxXHp!*6z63
zA9gcw$9<Z$JFfjQwc~?cu!g%F-*MIdZ^B<$h_~OY8}5@mZRUf$g$MLHqW4pO!}Og5
zYe)A?Z@4?Wn*JDf-1y6fop<LQ*B*l%Z|xCx*{xgJ_onaXdH`paS&t04dN<!6aUJIz
z&-TZ<^%x@`aPyv5I|ufs{@N3Gs8+P`!5*=a+q&%-BfYrk1^>?ct(@T|15Y)N^!m=A
zZmAWvpPl*qv~IZ3sb=bh_YE$dOxSzQIlzC~7s*)GkG0zyBY)3aTF-h{v(|Av|0G}a
z*fBaT4;-o6?)Rw~7c=i-XEnV0`>Jj^M#q(1KIk`f+pS&4#pFZ!j?UGL)vd#QQXX*m
z5VmuAZl#_hz1eH9yf5S|`sLWXcX=CelUZxW#{7oAxY=`L;L5$zT8|9<#pUhO>~>rl
ztv}hNJ=gL2?j17nH(d4iURJyKe8;O>k1^w7(2DO`9ap@q+lKx|uk&sWFpZhs_`2%p
z?})$q?+RDWR(%b7#>Mj;CEk9zKg3@-kS}#}{f)Ziy9uohIm7q4ez6umJCF9d&Ifx7
z7Us%6Q@6d1j>`)k#9QZ1+Z*uL4|iOe<Lx)N+0SqtyZzDW&AY-Lzw1Fg()Qb3y*)b{
zonBo1D+@eH{Z@Np#*G*J#Wqj)cl%n$&7Ld!n0}_+&nxT^H}6Y2`LUaYdSqBPTpDTP
zg~N67W-dO=MyJ=aIQq`_ck{S5r#<co7ef#8^N9!b*wBY~5WXDXf9k`Ti^X3a(7)nA
z-PY>G;{{%F%V+Gn?A9$)Zt|8I%I&9I?*)44&LL~&FyCE2k8;Lex|}hwq8Tr~;CG(P
zxazgSsM~&wj;mg@o;@~?&&)+XuR1?wTpS*UgSlF}88@DWojuLxPy5*V>5XPO@w+<J
zuii$dH$8{Dnt?O8<65h%C1>B#iz^e>j=9ncb=zyq^zs0k54-Fcmu5^}IIrZOZaYTD
z&3hATXJ7b*9qwnz7H`uFv%Mg!^LeBfHyY{wR=?rWDZ^LIe7@_ox~262Hy+1})jfU2
zwO6SbUsvC0cp06)wBgeFO}+XdKGT~y^v8-HonAHL0j=lS#hbhOb=+vBEnJ@a+3y_K
zU4M66x;&G2XE>|xyAc;tMzOG0*Iu{2M#trAdd_*a<`?XAG2<p*{B_VBAMU<W-Epyb
zjHhm^`}>T0&H{e4uIz0y?yjdb^=sU5v3GO^+;P=N4zhkf&%QT>mvhJU?`Cv<^SzmU
zSGLk~__X`XeC|<K>lp3xBfXx{Yo5Wy;WMst$Eshw#*7<HG@2)Sgt_i=JmT^eD>cKp
z>#w@yZFF2r`r@^pvtQh4H9^OXx6$n9Pjkix983qz->6%-e85fJayI9kCnF!C&GTXJ
zWT77W;jfy|B?EF#Z*sNzH$Xkoa`CQu^VyvTGj4kH>;taDnykURUFNR-#)rHk;`M6(
z)>OynxU^*5e5LPB;{p8jpBXoK!{>*3+Jjd1p&8fOF);0uJYW58*>R&ad*&{C#LfFh
z4SA3n@|?%~xXW_KwJ&-;^~2-C9j|UVM#sg#rPqPmWwn0wW6ZdC;qU66an;VAizyp4
z`)oyb$CWEQIdfjESKWGyj+-pVhKwc4@%gTP9apw6o%yUEb~MkpG^HO_JgwVbW5%UZ
z)-Y-1^D2MF%^tJrX-)eY9oPEP7f&)V<Ej-tKayWQ+-0F|8NT20{=(#GeqQzVj(gVV
zn$FeH`H=5rG|TXA&)spKYMyav%zmbJbg#}c;-+7-uYapAaAklt+5J{u;5e*X-5k4f
zbUyIY9!_`kVy|Yc<K}PdX#Pgd=t~aN&byh`^s_ncba#4jJ)cZqx>iR$lizpzj%)o6
z+A*`Ixz2hpce%7;IOJhBZ=K%!4HTx{@Q!v~`R><oQzyEyqdngEe70|nxEN{_*0=Uw
zJS^JyvFmL;GJId)CI`Gs4Oeo2X>>k>&BIxq4#?m?Gp_nS^`hgZkDt_x{5vi`=$5q|
zpI5jtp=L7eJj-qU`WQ1Vt?3JSPVa?1t7pfxb}ZU(?)s~4c^e%Uk6v|4PdnpxdszMI
zHSW0BbjRD(f0w%BX5Zxn{>;<*^=sU5`N0c3b42%DCT3ju!sNw{w{?5l7y9>{E!-V`
zrxzDT{dc{sM_RpkN38S!&UJKp=}f+M)*t?i8_mw~+I%Z}+>DE{^KYHe>fCXYF<dpI
zc{RIr>y)czyv5zkMW>gxr#)Pc4)uBm*(Z0_jddO)z4@-l1N#C$R`;>6@3_g8Id*zv
z#N|Qub-7K}cKTz)4HHN0{G7Q~zDw5a!@7mb({JvRxOD1;Z|lWWCv`KA%&ojH>$YQz
z{LP-8e&aJgKFve+bi5osb6AJZ?&cWj&9nN$^^?AvalgGkM6Yx7n&WQmb<1mXdU+rR
z@{oC=Km9k$N5{?gK7Z3k=T6o-E=F=e=Qna@?e-i0ALwMt&(=L#d;R(~MtX6reRt1&
zLF;0~_0FWvk(#;Yhu>x2pK0&7*3VP>;<xHXlRRL%{#L(XW&POcjUC;c57_bI$zD0)
zX5F}$&e6J>wT`RL@REznf$y_B{*|l8JQrr_ch)UuPq^}h7nXDOv(;W1>BWui-F=}R
zJ;oh3y6ufL+dF;cJo_#&<0c1u*nKD8akHoKV~0QE>J4j^XWE^yXZub4`WPcFMsnu7
zs};E8%4l+&InswKUUyv2hL@hxYd%}?V})x!(+_wn-qvlc5A^B{8gW0}ck&K)##8@q
z)lF}y8!zdUVZCM!KR?~i&Ro22yf3U@jcCK88QbU1t3HqN5D&0v&)&1+bH_dRg?`2k
z-+XtyG2&XUta=tB^)uJqd!lYR#)wN7R(cM5r-y+%u9~H8@!?Z{>BYp84cd4880ocs
z+2^xs^El7mJN4_gG2+UqHOFI_UCF>1+;Ow+)Xp4^l|2%F#7(`z_VYLLfX4^C^S-va
z=hS0Ez0s+5bnfg6n8wJ5aQPYCuIXLvxjXJ^-`13GW5%_92cKn_*3}HpxctD|%|boW
z`b%Tp@4Mc1T;J6(@pA0cY{ZRD90#t?cD=az8*bL_jLGZO?<95W4}Zgz1H5l@?KgUu
z&+>)udNt1@AM}`<W-r6q?d6V3Bkq$uy&fC-OD_+so!?J9?(}AV+|5_XIL3%8x9Q&}
zT7gGgb;8Rb2ktW;&#mDu%OfrxRx+^qw`)D}8XZ^do_YY@aWSo19(MQMdSuGwgX2k#
zXL|8`m!jLgvs$ydwbiTN)Q%t6bjv|%X^z=m(dos-<L{H+=(uVXy*s(B$Br@M^7M&T
z;Ev0`^at&HH4p99G4r;5eaq!B?yeV|UfSaClf81rjSo*ezT?^_WX^uP(;s(SJf6_*
z__S_V-krNSs7H=5)0=u>`?ms~m~UmD7;)*$y1&`S{)SIq<h?8twBEgc>Xu`S^x~#g
zGW@Nah06o{-CnClhJBpBiLZMw&-L_*{*e!79v1!SY1;k%$-d(`&!02l8e8V<Medo^
zvD1qaU+=Wltv^P`J$pglil=Azj7xKJZ2jrK6+b#IjeJOs&9S3-#H9^Krt-brHSK<0
z>A8-Z{^hfIXy5t%*Kw_XXaB0(-$uvfW&Ss!=;gy|=0;pzroZB|&s_6^H+TGw>v<e}
z*UopWy5%)GE(V^p^Tf|PqvO(wdF{1T%`@)Pp0KWtD+BcM0)N+!j+^@P0h2F$HphzB
z9aq2QZ=!g-!=7<zi*A{9#B=`4ay;V3SNiOY*chvOPTe}J8?JX+zMGx*UF_^hvs!j~
zv)-_G`)<U=!b$JayR)a)?L!}M)#Y4&KWCn&Ig5Wd>UGxoNgszRzq>l?kz;iJCS%Sy
z%%PsM930U4&y1_jQa}5}&U>Td@)gs&l167-x&A~iuIH_NXZ>~iF*?2HeIZ`puV@AC
zxar;W$l2SK_fp5@Y1q!V>213CG|Tdc8(y?N`F?W8#r#&k(VINziL<w}wb!k$(dqRa
zidKBS@?@o6b=xsUTpZb=6^~x|l0D~@_eRI16*GH7dc>S78Sc1vdRC_ReXiY#A2Y7?
zV|=S_$-n)K?r-f6xLNydz3S0pjC_dB)Qr#e$(_FIxN_?_=gmB;?2apAyhzq~!)NoX
z^xTM>Uc+?A!?(DpowZ|UZ|D6=kBs!nY}Svv+Y|N3kM{+gYD1s8@fh3B{Gc~`5udwp
z)f*r4e#2C+l|A7+$B2tXv-PJxcG(>_+5I;6{Qr|P$8PO9z5G<GU0ywMj2V~D_6Fzd
znXb*HzPowwEWMt^_{RQ=h3m|Ru)mdu@bz7oEBj5|evFa7JjKJ-U$QK3{NT^ZdwIl_
zEBknA6urEj)vA8|8XcD|TpH=NhEKWH8Gpm)13$6`zn^Iz>BYUi<9LIK?|iqvjkq-O
zlqMRTefFsjSvy@%{=bOz*l=H$vFvBq4)ycilas7vwuX@pxY7L7iy1eyI`0pw9(P<i
ztzVXK<v>o%_2m069!9j%$CH)bt=nGXs#k7hlh$Naoz%-*t9#Fk8!w*J?v9)NfquGI
zHFw<XXHT@&^gsF{{;Cs=xHRf#dev~n<4!N09(GQCcWO7{W<R7?HvH@?bJ2X)A05})
z?d7tz!>ikm(Q&i(RX@YadBn9=9?%<4lDF0OQO8v;`#?OVo5w3zsN04)3yVg%<F$D(
zSI0;%t^+UY=gl|t;_3~4(&}eybFFwW)9byFobBdg#+BRfc6zNIYwM=|*3N@(^#vYo
z-W%#gFYWyHyC;2&%iE{l9qQ562U;EGaL~sS{pWnAxvQIJtr;_!Sl!F&kybA*9&fWJ
zBpbBeWq77HT;HGY>2-YS!`WZ^#459Ho%%r6xi?nUk7;y1(C46;ujWugnpgIS5!Z7V
zG9|~jwC;SDnsKq{bgbsKZaqfF<pIq(uz9e$2amYcn~ZrEJ*nM@YyGLw?)vM|W6ZeW
z+kbYl3q0b=0G+(x0j4=zuf97wuIG8d>nH4vE5mA!?Hui^d(Vt(?Yq9#Bgg2t)*r3T
z-^j1JCBv?>N3QlnJ<{gkNv-fQMn0&KwP*j44Y}LN+l<Roe&FraujA6nTe>lKysg`Z
z{?eH&?CQicMtZ}=-tC72dc(y(d$F@ebb6Bsn!`5FDtpAu`;xb~d2i<X$;zH~<>r68
zkgxoWEW7x6h2~GV)=n>tSTy6V@;klp^y!|~acQJ)b-%4!-^Pf`KY4KAKY4eKxI9c2
zzUf1_wBoO3wr+imPA|5D7xo-;?ELN0aqVYVjwki&xYqv7zlZdj_0t)jdF<yMpF6$b
zW)AF~y?w^TmR<GQ)jZ<neMz6|?3sSnfA+i7wcCS6T+d?gz~S1l<3-1%5of1Xb^B|K
zxb`xK+{OcI;P>j@KH??|d_3=$_ImoC{Cl<hhMT`%{ha+$Hq=bLx|-Fme!S<g@_tjZ
z-SuM{S08wq?_$XVy}NsG$BnPpn3-!eA00P2h}SZ=;%nV@j1f2arx~ACx_M-tS-tAl
z#~5)j(r@|Ro*uc&!7uEvZt;{}TI0!1Hafl0jBV|X__LyU#>LKo>3Y@Uj;r=(X7+_u
z_KeHdyq8?ln)<n(<)Y(eKhOs&e%5VoqvOh#T7|#5-vr)q>ExvxpL_b#y5WBF`vX06
z@<30-hh3jLA8^kZSj|B_c8rc|Z_gf)p4ee`Tz;fp=@CDNeV4rvS8wq+`wSNCcdWYQ
zH9D^RCTBc}FRSd1t7h3FU7N#PtJ-H=ES}=6>a1IjG2*69bn;ksv7J}Glhkd)_XV!~
z0Egye+4U!V93OEU_}H%bYu-CwMn2$%AzzrQ^{QKsG2^B$u=&Dk*`}W#G<Rdhr4v)$
zb~Sfg`r^fkzPkNwjJP;-rjNs4saN0~7Yq9d6Z?+4>no-)<Lb9>b7f2p_<%20GQ3*D
zPA{F9-_%R1nq_~CW^*LVce+P<<3Tj9?g=&RYjj*19jp3Yz+V?LuDw8S@WWoDFFx6S
zv)|x7$wECg%r7o&-|k~v>kfB!@2y9NdaXS@an8n04M%mujmB^K8*ci|&vd7c@7`^7
zYwLq&)h=s~{uSnoo1WO+Z!nF{-}IY4a4j451UYfO>yM7B*D#XhygPi(6MM&O@9DVS
zoAGw`eBCx29aqhw@w{ic=85mE?xB11Jz~c7951kTHFsQpWAT=r-Hg>EZ64^916t`$
zPuzWfs#}lI=?(YW`Ax>09lQLQUi*S0e((Zc9^y;a*Bv+gAj8<6cV6wg85c**9O-L%
zyUf<FUSr1fJhmQKS^EoG7abRmMr?abxN_HdTEBXY85a*<jm*IVKd*Q^<6_W8BlfD+
zx^?P9{H0axWF&srFJ^vpdeurl<U0=!;>9O<@b6}{@q*9k1AIE~MyJ>FyR7qEBd&E?
zbB<ka@n>8fs}C>C!54G#{LYV#t7hjs$U@lhXEzUcIMHd2-CiE)#n1!sfj)EXyyHgP
z{M#(f6J|X&tXuv!7aDPQ`WMp}=~Zjl;zK-EvmN~%SFPy8a}M8m<$F-al?ObG<clvW
z*{$1JACgyW{lFW1XZ*W8qSKp9@R%o#=;p<p*JmyrS-WRxOK-Wp(^$8>#)wOoBR#iL
zrx)<o#fVE=)@=>v_4`}~cfPM<r2b*WhuI!ekNp@|y|^^d{E5d`E`9OWHIJRG<>|iT
z*)x}Je`oA6>yfv<ICDSw-$Z6yJw|W5;0@2sy^`arzn=H3n#E^4eEzQH;*P6s@`^{l
zxw2>Ialad_;mWmNc{Y0K40mPyb^9@{-2D5j9P`*5be+9$eK*I5%SSxx_I$KI{r<ow
zYtBC7n!osd_eo!bb?y(^cQRJD|LBWw=}Z0S{nTIFFme4HU#vmKc)nXVwZTeOdCFs-
ztKTa7ng`EgV$myiJN2sDk8$N<(j>FV)vCq=`0GC-E}afusuw;s-7DVKZ9no5UwIt8
z=D_vYr`%*>mse9CW8?#!*027qt=~GF@7}|8%P~e=S%_X9SpQDOfJfZyGxXt_V^{Zx
z8?Ex-T844$&F13KZoly{+)v(}Bfa@9$qxs8{BSnsoz}YLhrjVOzNR<Q7b_lg-1r~|
zuCt%5dOPCE7#^K8+b811j^?YsxN4{V(H(DdUh#Our4M&Ev-QYpjJRi=Pdos>a&got
z`?}286L`M5H`J{|E+4Ze(5_zUxVlHytu0sWWHmnUI(b;(k9?pPFU;^(d!im4>XrXw
z_?$O<pFh!?EaYqtJA1II`RW5+wDKdK<oZ0vO!tV(SM|qGKfTNwKA3l9pSW@{=%o+C
z9IoY=Co-HmuIk04j~-gn8&CH4v)=6IxY4+?_Im6!!nJ0eribIN&#|BOTey6nmB-$f
zPxkbB?9~_HrcQF|v!^|G_CbBs%5lx`uFq$#^?RO1TK()yyU$m0TeltK%8j@2d9}a7
z%X!4bie5T-GV6h`&gUyPI>VI*>;J@GJsPgF`kiwe|5x;%^(L=)^d-AHyn5^ySMD`m
z{0RSP9x!RnzZKA_hH~eO%OB^vck@{<E`4WD`LVk<)FW?wjH?H5XiZN)$=X@3eHPEb
z(@%1I<|bRXuIY50Ebr((bCXvr9^~+Iw5#dLJNe4R-~lGhPxhR8?5%F}$`GH`4ikSj
zWA(@nZoG^a-_jc{oq3O{*G{j^^yYg+=Gn=@l}jUElQ&GCrIyjXvKOE85N=pF=WG4A
z!mQig>V_d>o=w(LKiYlfyyEp$Z@6lf?^SZPn~M>bMw)pYPjP*=Iy$b}SugJD+Rt-#
zTr7H>`G9ZFU#;DU>sdXG$?qq+N8G$Od76EK=YBTViq9Pv#}Umkm}}>|tc|!B4t~g6
z{Kca6sorQ)r_{<E^sll<dim%`c2_mlBd>AgV$nsPnw>pf@wFZ~#)z9Whv}?O_`J#<
zaWTS-*LU@=N8ZMji<k9>y|a%6KH~C$&gka3xpwMz<@!En&9pjb&G!nQn`6b}D_36e
z&UpxbCwEtFvS7`4*)Q+Dm)5Os<I0sKyeEHapTSoyef-2$JG#%k=h|m?<H}8q)W+c*
zmp%BCJmBK+IeAN;S%cd7{BDksUR?X5LvHzj=kU8HIiuIw({q?S-Q9z)dU0rq&(^%E
zv2J~h5jVa@yY;)?(R}6d4#)R>d_R9fFK^=o{_1<DZhae9z2VX+S9DuH{*LZ5H>~WB
zGJ$EXc(T%Kb^F`*)Nqrn_^{f$cp1@~^<!H9`S3e)Jjp|P%d=t1mU-;SuCc9&*LQ1(
zUR-sFx9YX?eyH2u#+geeUCx-%y|ey$>=<V*jd?$WZN0qkxlF9SH_lu>@`7d=!;!1Z
zhi#73c9z4AOE0a-+TDIzx4w;4?p^)gp!LI*o8GYIWMY?Tjw@FW@F1Cs$2)t&mFwM3
zXKE*3w8m>|+1(%X1#a?7XWm8T*{$E%2l~!DIvslClRVJM2dwxitFBl3p>93KSucjQ
z^8=fo=Gyh39(l_{xY3Lqzg_dmT=I99y^-E47ss4f(Z0(~#&FI#xZ~9AZ{w;rHA{bF
z{kX23S8{yi^1$I)964}p{c5_Jw-Gl?e7yWEyUMIbe&pet;dnx)4D;Y=-Dt&5rkv?c
z-tf(L=k?VG_0qTD#OqJ}#g#D!P53+OuiK9?(#wmy_rFy)Jawlte<SjOe)H)SXI@!{
zdq?A}*Zt=jr9NTpc#NC==PhoYr&%8SYz`XrNj{tH%ea1@BRTLhUh;<P=j_Yf`s<cs
zT)BM2=1DSv#|wV0Xsz3he#42*)Mr;8)_Kqyed?cm>`ASzTpH~G4m?`TW!*d3s@q?K
z-ZNJoXpS#o-__{_{B?2VrdH0_crsuuGNy*JeATayapuN@aIkS4D?N4qfBomo^?de0
zbI{8ZUPPzs6;IAw_j@ku=QW*}eDM1fX5Dt=H+4&9F?g6>OXhs`L@%x*{<=<{Kk)%4
zdt+)ypU<6n^3(^<@_{#4yE^NUW1Q<|zoe57_}H|n<L*3Hu3F{$HE&b9-Ar7$vPGi<
zm-Z(fkGR&&kK{RfJa1OBcI9GTz2&>lc5-~?${4OZ@PQ}lwd-v?^44$RS}Se1shfH5
z`MuK5SG~#BZmw#&w{hmu!Ux*;={t&RnyrC;=as*Wu3XuQW^*`rlD=E<pl*NjS3l6^
zh;C=vKk?T(lLahwwC+`nb?a-KeZaM5+VJ8B-{XVcGtb?)ay^f4{dn~ACEQi*BQBjd
zyuhaall_;@99Z##cDh%!)~#=O2$Q$TfHUo%axwXtnmPL!-|wE*&6;Hb>&d=bj~(M$
zx9E(&_;m8fd8LQzwjch+SKO!Gc3hernM2*=?P(sYlg{)5pB=k3t4H2^z~!gf+1KdH
zUbO4Uxo*injq>^=xAoXDMqJt)v}4=XKJho%rS<HyYdLc^&x+qyz40J=qnY<l^ANq*
zuIY1DL)X?}o_r57x4A}Rq}N(;c#5l^eMa+>{ULg3@Es?f(!X23kzU;F6XtTP)~jwk
z#)!*<^TAVIVz2JQ9hXL$c@e#z`b%r}9p0|wwQhSGBfZ&o@SkWkLC3|U_31mK<JuGQ
zU5yXsQMc87V#LkAgJ)0s*1nT|pv^U%J6WjPhyI3(W&O!)&fT7Xmof4I7c;%Uhuy5r
zxY@_dbN;uImA$9qCIiVEk9X?Vaii7VfK8*hR=lm-hIQjDeR%pAbN9V5(u>PeIinGG
zH)|c&zL32Ee^>K}izll*pfld?>}?$v1E0sv`8SVMKW1Fs?r5yrU!&vFDN|40LEtm4
zdi_ms`KrgT`73{FxY8S)URq^>AC6qF_D9F1Ek4M=&fZYBxBh0|;e~7LPxoJ(WLG~w
zdFPLOu$QGTqMKg6sM*uH<=<b_%RAH@yL-=7FJ0*g>vzo)-qO3eZ;rU0&-$H{t({Da
zxN5~iT3zq-L*0Iij*A%&a;@j0{VsDOt{TyaXYD*)^|fw2#+4gw`EJH*`M~$CchG&8
zwGkJ~I(M^GkG#f+t4?&&BxAC-Te}fAdENbvQ`5gj$JJvzrTd8=GcKRM$<<@m9;V+M
ztNYJL@0p7)%lg5!-<iXCR?Ci?=fm3dc*NBc`94Wg_BiL&y=TPLU+H69hdx<Zf8BPB
zD;G}=vcF*JkGsCDTi(Wqt1oDK(r1BpT$!TP*}>PHc}Co1H)q+{$!^{L<NE`DW$N4h
z##?6^)ytXY%(arWs}JGX%lMFd_<1K2BW~&?v(fsgzx2}S_{8Ut-t;hy@shVZmbJTg
zaozGG4|ICYf%8=JRc~s=4@Y{z**v^A*If-q+~^E*g;P_#jVm|aV%j6(OY-OUK1-In
zeQCr^&g^TM$7izayprV+_w23uJ*nN9n>9Pj7=E%LZ`Qv0_tupgp0k4&>bUc+sN3K2
zODB$+Ia~kJOkDNi(V5!u0edIAb^A~+50fkWZ*XOaMw;>X`i(r`>QR1RCKJ25ul{Bq
zk7n$=Gj?>3xO$9EKDfr+&Bm3Br%(Biy<kUYJ$8%{H(B5b=B`%YD_2fsAw7~j_zd6g
z@Alrh<p<ZZsaZV6bohKV7gxQw)=w9%v-)AuxuUghd(#^aqRkw4`!~jDob`t5Ojqj1
zW1sQBTq~M8E=F>^TeEufhYxa_y1AyAMjn5n7fapn^atj*^d<)~?3&h3{q;N_FlqgU
zUUiDM&NAjR-XtS+xirpsz~Ci+WebmPKf7M}-cYw4W5iW=T50yT9Dnn@a<%8q-1EQ3
zsTb|mK=W=dU%7n5!}b2$<<%p{2p20ka7}ysrkA(oSnZ87*K_KVOyEW{-m15C>#L7x
zq$_pDG0$#4)Fa0@>y18hIP$+oV(<DKZWyu}?RZc2$a?JM173W^b1lP9wAN#9TsouI
zHRfvl>(;jsAJVVZ9Uq*Z)-PPz(qpd86aFXrI+jDuXg0_0o=}gxM)b;9>gKG!c#UtK
z-S=|GjSuk#JAJ&;cO&j6`KsxUG2-$$dxbfkyo;_}`wV}1%VR#ccC4(wZac=6D-T#S
z%bER>=1=P;7kUBH5l^h)sa`erEM9cG<}beAt&Xz~xO~Lot+Tv+(r?y(=Eh&^$4++5
zv7&pVSKWAQ9-5zceC1-&nBKrmmOrVRHG3W(^PG*H`d#&AUwBfhlJOd6uJ!U%#%Se-
zgJ$#j+<bm_r+vhgH4F!?y5a2f+?7i+ZSsppzcbC)D;cZXUhhr$#k9ugbnWw;&wB6X
zxa!472Jp|Ur#Yk7K8bDZj^xencKo>NRWr|H^B8Z}+j``OzsVD>EJS;L=IhE_XTALM
zd@?2r{KfLQv$d>b;>`6d7EkdozL7KBtdZs?I_t4xT=il*`1~|uz#}dnlCP(luiVs(
z-sFup*?-LQq;AoN8$W1Qzj(jGKI_#JdO^=cud~m0^LFL(A^ng{?C7k=k8$N@{n*(n
z<L}dbCwoJBA$g{Ihe_~NZ!$oy>nB>T+<1`pnKLf_?w)qmi<|dD{_e)jUa_P7%#E)2
z>daT3m@|F7^7qV{o9}2C@dOjk@6^xSyZz=_&+x$W(fw&2a2;XlS9<xXZaZ1y1CAqm
Qx<2-^_45Ni-mdchKR%uwzyJUM

literal 0
HcmV?d00001

diff --git a/src/scirpy/tests/test_ir_dist_metrics.py b/src/scirpy/tests/test_ir_dist_metrics.py
index 8b9994b84..1afd64056 100644
--- a/src/scirpy/tests/test_ir_dist_metrics.py
+++ b/src/scirpy/tests/test_ir_dist_metrics.py
@@ -458,4 +458,17 @@ def test_tcrdist(test_parameters, test_input, expected_result):
     res = tcrdist_calculator.calc_dist_mat(seq1, seq2)
     assert isinstance(res, scipy.sparse.csr_matrix)
     assert res.shape == expected_result.shape
-    assert np.array_equal(res.todense(), expected_result)
\ No newline at end of file
+    assert np.array_equal(res.todense(), expected_result)
+
+
+def test_tcrdist_reference():
+    #test tcrdist against reference implementation
+    seqs = np.load('data/tcrdist_test_data/tcrdist_WU3k_seqs.npy')
+    reference_result = scipy.sparse.load_npz('data/tcrdist_test_data/tcrdist_WU3k_csr_result.npz')
+    
+    tcrdist_calculator = TCRdistDistanceCalculator(dist_weight=3, gap_penalty=4, ntrim=3, ctrim=2, fixed_gappos=True, cutoff=15, n_jobs= 1)
+    res = tcrdist_calculator.calc_dist_mat(seqs, seqs)
+
+    assert np.array_equal(res.data, reference_result.data)
+    assert np.array_equal(res.indices, reference_result.indices)
+    assert np.array_equal(res.indptr, reference_result.indptr)
\ No newline at end of file

From ab6eb467f03914503b563c25283193ee164e5898 Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Wed, 10 Apr 2024 20:28:02 +0200
Subject: [PATCH 09/27] formatting of tcrdist tests improved

---
 src/scirpy/tests/test_ir_dist_metrics.py | 439 +++++++++++++++--------
 1 file changed, 299 insertions(+), 140 deletions(-)

diff --git a/src/scirpy/tests/test_ir_dist_metrics.py b/src/scirpy/tests/test_ir_dist_metrics.py
index 3bf5d1c2c..326d5b686 100644
--- a/src/scirpy/tests/test_ir_dist_metrics.py
+++ b/src/scirpy/tests/test_ir_dist_metrics.py
@@ -317,141 +317,292 @@ def test_sequence_dist_all_metrics(metric):
     assert dist_mat.shape == (5, 3)
 
 
-@pytest.mark.parametrize("test_parameters,test_input,expected_result", [
-    
-    #test more complex strings with unequal length and set high cutoff such that cutoff is neglected
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["WEKFAPIQCMNR", "RDAIYTCCNKSWEQ", "CWWMFGHTVRI", "GWSZNNHI"])),
-      np.array([[57, 74, 65, 33],
-                [66, 68, 65, 33],
-                [53, 58, 49, 25]])),
-
-    #test empty input arrays
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
-     (np.array([]),
-      np.array([])),
-      np.empty((0,0))),
-    
-    #test very small input sequences
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 0, "ctrim": 0, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
-     (np.array(["A"]),
-      np.array(["C"])),
-      np.array([[13]])),
-    
-    #test standard parameters with simple input sequences
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 0, 21],
-                [0, 1, 21],
-                [21, 21, 1]])),
-    
-    #test standard parameters with simple input sequences with second sequences array set to None
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      None),
-      np.array([[1, 0, 21],
-                [0, 1, 21],
-                [21, 21, 1]])),
-    
-    #test with dist_weight set to 0
-    ({"dist_weight": 0, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 1, 9],
-                [1, 1, 9],
-                [9, 9, 1]])),
-
-    #test with dist_weight set high and cutoff set high to neglect it
-    ({"dist_weight": 30, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 241, 129],
-                [241, 1, 129],
-                [129, 129, 1]])),
-    
-    #test with gap_penalty set to 0
-    ({"dist_weight": 3, "gap_penalty": 0, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 0, 13],
-                [0, 1, 13],
-                [13, 13, 1]])),
-    
-    #test with gap_penalty set high and cutoff set high to neglect it
-    ({"dist_weight": 3, "gap_penalty": 40, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 25, 93],
-                [25, 1, 93],
-                [93, 93, 1]])),
-
-    #test with ntrim = 0 and cutoff set high to neglect it
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 0, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 25, 33],
-                [25, 1, 33],
-                [33, 33, 1]])),
-
-    #test with high ntrim - trimmimg with ntrim is only possible until the beginning of the gap for sequences of unequal length
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 10, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 1, 9],
-                [1, 1, 9],
-                [9, 9, 1]])),
-
-    #test with ctrim = 0 and cutoff set high to neglect it
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 0, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 25, 21],
-                [25, 1, 21],
-                [21, 21, 1]])),
-
-    #test with high ctrim - trimmimg with ctrim is only possible until the end of the gap for sequences of unequal length
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 10, "fixed_gappos": True, "cutoff": 20, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 1, 21],
-                [1, 1, 21],
-                [21, 21, 1]])),
-
-    #test with fixed_gappos = False and a high cutoff to neglect it
-    #AAAAA added at the beginning of the usual sequences to make to difference of min_gappos and max_gappos more significant
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": False, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAAAAAAA", "AAAAAAAAARRAAAA", "AAAAAAANDAAAA"]),
-      np.array(["AAAAAAAAAAAAAAA", "AAAAAAAAARRAAAA", "AAAAAAANDAAAA"])),
-      np.array([[1, 25, 33],
-                [25, 1, 33],
-                [33, 33, 1]])),
-
-    #test with cutoff set to 0
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 0, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 0, 0],
-                [0, 1, 0],
-                [0, 0, 1]])),
-    
-    #test with cutoff set high to neglect it
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 1000, "n_jobs": 1},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 25, 21],
-                [25, 1, 21],
-                [21, 21, 1]])),
-
-    #test with multiple cores by setting n_jobs = 4
-    ({"dist_weight": 3, "gap_penalty": 4, "ntrim": 3, "ctrim": 2, "fixed_gappos": True, "cutoff": 20, "n_jobs": 4},
-     (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
-      np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"])),
-      np.array([[1, 0, 21],
-                [0, 1, 21],
-                [21, 21, 1]])),
-])
+@pytest.mark.parametrize(
+    "test_parameters,test_input,expected_result",
+    [
+        # test more complex strings with unequal length and set high cutoff such that cutoff is neglected
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["WEKFAPIQCMNR", "RDAIYTCCNKSWEQ", "CWWMFGHTVRI", "GWSZNNHI"]),
+            ),
+            np.array([[57, 74, 65, 33], [66, 68, 65, 33], [53, 58, 49, 25]]),
+        ),
+        # test empty input arrays
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 20,
+                "n_jobs": 1,
+            },
+            (np.array([]), np.array([])),
+            np.empty((0, 0)),
+        ),
+        # test very small input sequences
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 0,
+                "ctrim": 0,
+                "fixed_gappos": True,
+                "cutoff": 20,
+                "n_jobs": 1,
+            },
+            (np.array(["A"]), np.array(["C"])),
+            np.array([[13]]),
+        ),
+        # test standard parameters with simple input sequences
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 20,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 0, 21], [0, 1, 21], [21, 21, 1]]),
+        ),
+        # test standard parameters with simple input sequences with second sequences array set to None
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 20,
+                "n_jobs": 1,
+            },
+            (np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]), None),
+            np.array([[1, 0, 21], [0, 1, 21], [21, 21, 1]]),
+        ),
+        # test with dist_weight set to 0
+        (
+            {
+                "dist_weight": 0,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 20,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 1, 9], [1, 1, 9], [9, 9, 1]]),
+        ),
+        # test with dist_weight set high and cutoff set high to neglect it
+        (
+            {
+                "dist_weight": 30,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 241, 129], [241, 1, 129], [129, 129, 1]]),
+        ),
+        # test with gap_penalty set to 0
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 0,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 20,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 0, 13], [0, 1, 13], [13, 13, 1]]),
+        ),
+        # test with gap_penalty set high and cutoff set high to neglect it
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 40,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 25, 93], [25, 1, 93], [93, 93, 1]]),
+        ),
+        # test with ntrim = 0 and cutoff set high to neglect it
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 0,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 25, 33], [25, 1, 33], [33, 33, 1]]),
+        ),
+        # test with high ntrim - trimmimg with ntrim is only possible until the beginning of the gap for sequences of unequal length
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 10,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 20,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 1, 9], [1, 1, 9], [9, 9, 1]]),
+        ),
+        # test with ctrim = 0 and cutoff set high to neglect it
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 0,
+                "fixed_gappos": True,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 25, 21], [25, 1, 21], [21, 21, 1]]),
+        ),
+        # test with high ctrim - trimmimg with ctrim is only possible until the end of the gap for sequences of unequal length
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 10,
+                "fixed_gappos": True,
+                "cutoff": 20,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 1, 21], [1, 1, 21], [21, 21, 1]]),
+        ),
+        # test with fixed_gappos = False and a high cutoff to neglect it
+        # AAAAA added at the beginning of the usual sequences to make to difference of min_gappos and max_gappos more significant
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": False,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAAAAAAA", "AAAAAAAAARRAAAA", "AAAAAAANDAAAA"]),
+                np.array(["AAAAAAAAAAAAAAA", "AAAAAAAAARRAAAA", "AAAAAAANDAAAA"]),
+            ),
+            np.array([[1, 25, 33], [25, 1, 33], [33, 33, 1]]),
+        ),
+        # test with cutoff set to 0
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 0,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]),
+        ),
+        # test with cutoff set high to neglect it
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 1000,
+                "n_jobs": 1,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 25, 21], [25, 1, 21], [21, 21, 1]]),
+        ),
+        # test with multiple cores by setting n_jobs = 4
+        (
+            {
+                "dist_weight": 3,
+                "gap_penalty": 4,
+                "ntrim": 3,
+                "ctrim": 2,
+                "fixed_gappos": True,
+                "cutoff": 20,
+                "n_jobs": 4,
+            },
+            (
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+                np.array(["AAAAAAAAAA", "AAAARRAAAA", "AANDAAAA"]),
+            ),
+            np.array([[1, 0, 21], [0, 1, 21], [21, 21, 1]]),
+        ),
+    ],
+)
 def test_tcrdist(test_parameters, test_input, expected_result):
     tcrdist_calculator = TCRdistDistanceCalculator(**test_parameters)
     seq1, seq2 = test_input
@@ -462,11 +613,19 @@ def test_tcrdist(test_parameters, test_input, expected_result):
 
 
 def test_tcrdist_reference():
-    #test tcrdist against reference implementation
-    seqs = np.load('data/tcrdist_test_data/tcrdist_WU3k_seqs.npy')
-    reference_result = scipy.sparse.load_npz('data/tcrdist_test_data/tcrdist_WU3k_csr_result.npz')
-    
-    tcrdist_calculator = TCRdistDistanceCalculator(dist_weight=3, gap_penalty=4, ntrim=3, ctrim=2, fixed_gappos=True, cutoff=15, n_jobs= 1)
+    # test tcrdist against reference implementation
+    seqs = np.load("data/tcrdist_test_data/tcrdist_WU3k_seqs.npy")
+    reference_result = scipy.sparse.load_npz("data/tcrdist_test_data/tcrdist_WU3k_csr_result.npz")
+
+    tcrdist_calculator = TCRdistDistanceCalculator(
+        dist_weight=3,
+        gap_penalty=4,
+        ntrim=3,
+        ctrim=2,
+        fixed_gappos=True,
+        cutoff=15,
+        n_jobs=1,
+    )
     res = tcrdist_calculator.calc_dist_mat(seqs, seqs)
 
     assert np.array_equal(res.data, reference_result.data)

From a9e46b4878db69073c8181dc39bd7ee02bcad121 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 10 Apr 2024 18:29:41 +0000
Subject: [PATCH 10/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/scirpy/ir_dist/metrics.py            | 5 ++---
 src/scirpy/tests/test_ir_dist_metrics.py | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index 77c915241..855d6e2ff 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -941,10 +941,9 @@ def _nb_tcrdist_mat(
         return data_rows, indices_rows, row_element_counts
 
     def _calc_dist_mat_block(self, seqs: Sequence[str], seqs2: Optional[Sequence[str]] = None) -> csr_matrix:
-        
-        if(len(seqs) == 0 or len(seqs2) == 0):
+        if len(seqs) == 0 or len(seqs2) == 0:
             return csr_matrix((len(seqs), len(seqs2)))
-        
+
         seqs_mat1, seqs_L1 = self._seqs2mat(seqs)
         seqs_mat2, seqs_L2 = self._seqs2mat(seqs2)
 
diff --git a/src/scirpy/tests/test_ir_dist_metrics.py b/src/scirpy/tests/test_ir_dist_metrics.py
index 326d5b686..03a993d14 100644
--- a/src/scirpy/tests/test_ir_dist_metrics.py
+++ b/src/scirpy/tests/test_ir_dist_metrics.py
@@ -630,4 +630,4 @@ def test_tcrdist_reference():
 
     assert np.array_equal(res.data, reference_result.data)
     assert np.array_equal(res.indices, reference_result.indices)
-    assert np.array_equal(res.indptr, reference_result.indptr)
\ No newline at end of file
+    assert np.array_equal(res.indptr, reference_result.indptr)

From ba72d4122d6671b6301e056909049a76c00b3227 Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Tue, 16 Apr 2024 20:33:50 +0200
Subject: [PATCH 11/27] comments for TCRdist added

---
 src/scirpy/ir_dist/metrics.py | 156 ++++++++++++++++++++++++++++------
 1 file changed, 132 insertions(+), 24 deletions(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index 77c915241..8a00bd427 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -395,6 +395,57 @@ def _compute_block(self, seqs1, seqs2, origin):
 
 
 class TCRdistDistanceCalculator:
+
+    """Computes pairwise distances between TCR CDR3 sequences based on the "tcrdist" distance metric.
+
+    The code of this class is heavily based on: https://github.com/agartland/pwseqdist/blob/master/pwseqdist
+
+    Using default weight, gap penalty, ntrim and ctrim is equivalent to the
+    original distance published in Dash et al, (2017).
+
+    Parameters
+    ----------
+    dist_weight : int
+        Weight applied to the mismatch distances before summing with the gap penalties
+    gap_penalty : int
+        Distance penalty for the difference in the length of the two sequences
+    ntrim/ctrim : int
+        Positions trimmed off the N-terminus (0) and C-terminus (L-1) ends of the peptide sequence. These symbols will be ignored
+        in the distance calculation.
+    fixed_gappos : bool
+        If True, insert gaps at a fixed position after the cysteine residue statring the CDR3 (typically position 6).
+        If False, find the "optimal" position for inserting the gaps to make up the difference in length
+    cutoff:
+        Will eleminate distances > cutoff to make efficient
+        use of sparse matrices.
+    n_jobs:
+        Number of jobs (processes) to use for the pairwise distance calculation
+    """
+
+    def __init__(
+        self,
+        dist_weight=3,
+        gap_penalty=4,
+        ntrim=3,
+        ctrim=2,
+        fixed_gappos=True,
+        cutoff: Union[None, int] = None,
+        n_jobs: Union[None, int] = None,
+    ):
+        if cutoff is None:
+            cutoff = 20
+        if n_jobs is None:
+            n_jobs = 1
+
+        self.dist_weight = dist_weight
+        self.gap_penalty = gap_penalty
+        self.ntrim = ntrim
+        self.ctrim = ctrim
+        self.fixed_gappos = fixed_gappos
+        self.cutoff = cutoff
+        self.n_jobs = n_jobs
+
+
     parasail_aa_alphabet = "ARNDCQEGHILKMFPSTWYVBZX"
     parasail_aa_alphabet_with_unknown = "ARNDCQEGHILKMFPSTWYVBZX*"
     tcr_dict_distance_matrix = {
@@ -800,31 +851,20 @@ class TCRdistDistanceCalculator:
         ("Y", "Y"): 0,
     }
 
-    def __init__(
-        self,
-        dist_weight=3,
-        gap_penalty=4,
-        ntrim=3,
-        ctrim=2,
-        fixed_gappos=True,
-        cutoff: Union[None, int] = None,
-        n_jobs: Union[None, int] = None,
-    ):
-        if cutoff is None:
-            cutoff = 20
-        if n_jobs is None:
-            n_jobs = 1
-
-        self.dist_weight = dist_weight
-        self.gap_penalty = gap_penalty
-        self.ntrim = ntrim
-        self.ctrim = ctrim
-        self.fixed_gappos = fixed_gappos
-        self.cutoff = cutoff
-        self.n_jobs = n_jobs
-
     @staticmethod
     def _make_numba_matrix(distance_matrix, alphabet=parasail_aa_alphabet_with_unknown):
+        """Creates a numba compatible distance matrix from a dict of tuples.
+
+        Parameters
+        ----------
+        distance_matrix : dict
+            Keys are tuples like ('A', 'C') with values containing an integer.
+        alphabet : str
+
+        Returns
+        -------
+        distance_matrix : np.ndarray, dtype=np.int32"""
+
         dm = np.zeros((len(alphabet), len(alphabet)), dtype=np.int32)
         for (aa1, aa2), d in distance_matrix.items():
             dm[alphabet.index(aa1), alphabet.index(aa2)] = d
@@ -835,6 +875,30 @@ def _make_numba_matrix(distance_matrix, alphabet=parasail_aa_alphabet_with_unkno
 
     @staticmethod
     def _seqs2mat(seqs, alphabet=parasail_aa_alphabet, max_len=None):
+        """Convert a collection of gene sequences into a
+        numpy matrix of integers for fast comparison.
+
+        Parameters
+        ----------
+        seqs : list 
+            List of strings
+
+        Returns
+        -------
+        mat : np.array  
+
+        Examples
+        --------
+        >>> seqs2mat(["CAT","HAT"])
+        array([[ 4,  0, 16],
+            [ 8,  0, 16]], dtype=int8)
+
+        Notes
+        -----
+        Requires all seqs to have the same length, therefore shorter sequences
+        are filled up with -1 entries at the end.
+        """
+
         if max_len is None:
             max_len = np.max([len(s) for s in seqs])
         mat = -1 * np.ones((len(seqs), max_len), dtype=np.int8)
@@ -866,6 +930,46 @@ def _nb_tcrdist_mat(
         fixed_gappos=True,
         cutoff=20,
     ):
+        """Computes the pairwise TCRdist distances for sequences in seqs_mat1 and seqs_mat2.
+
+        Note: to use with non-CDR3 sequences set ntrim and ctrim to 0.
+
+        Parameters
+        ----------
+        seqs_mat1/2 : np.ndarray
+            Matrix containing sequences created by seqs2mat with padding to accomodate
+            sequences of different lengths (-1 padding)
+        seqs_L1/2 : np.ndarray
+            A vector containing the length of each sequence in the respective seqs_mat matrix,
+            without the padding in seqs_mat
+        distance_matrix : np.ndarray [alphabet, alphabet] dtype=int32
+            A square distance matrix (NOT a similarity matrix).
+            Matrix must match the alphabet that was used to create
+            seqs_mat, where each AA is represented by an index into the alphabet.
+        dist_weight : int
+            Weight applied to the mismatch distances before summing with the gap penalties
+        gap_penalty : int
+            Distance penalty for the difference in the length of the two sequences
+        ntrim/ctrim : int
+            Positions trimmed off the N-terminus (0) and C-terminus (L-1) ends of the peptide sequence. These symbols will be ignored
+            in the distance calculation.
+        fixed_gappos : bool
+            If True, insert gaps at a fixed position after the cysteine residue statring the CDR3 (typically position 6).
+            If False, find the "optimal" position for inserting the gaps to make up the difference in length
+
+        Returns
+        -------
+        data_rows : 
+            List with arrays containing the non-zero data values of the result matrix per row,
+            needed to create the final scipy CSR result matrix later
+        indices_rows :
+            List with arrays containing the non-zero entry column indeces of the result matrix per row,
+            needed to create the final scipy CSR result matrix later
+        row_element_counts :
+            Arraay with integers that indicate the amount of non-zero values of the result matrix per row,
+            needed to create the final scipy CSR result matrix later
+        """
+
         assert seqs_mat1.shape[0] == seqs_L1.shape[0]
         assert seqs_mat2.shape[0] == seqs_L2.shape[0]
 
@@ -941,7 +1045,8 @@ def _nb_tcrdist_mat(
         return data_rows, indices_rows, row_element_counts
 
     def _calc_dist_mat_block(self, seqs: Sequence[str], seqs2: Optional[Sequence[str]] = None) -> csr_matrix:
-        
+        "Computes a block of the final TCRdist distance matrix and returns it as CSR matrix"
+
         if(len(seqs) == 0 or len(seqs2) == 0):
             return csr_matrix((len(seqs), len(seqs2)))
         
@@ -968,6 +1073,9 @@ def _calc_dist_mat_block(self, seqs: Sequence[str], seqs2: Optional[Sequence[str
         return sparse_distance_matrix
 
     def calc_dist_mat(self, seqs: Sequence[str], seqs2: Optional[Sequence[str]] = None) -> csr_matrix:
+        """Calculates the pairwise distances between two vectors of gene sequences based on the TCRdist distance metric
+        and returns a CSR distance matrix"""
+        
         if seqs2 is None:
             seqs2 = seqs
 

From 8e8dfe888e09e822ad552c2ed6212005fc03c237 Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Tue, 16 Apr 2024 20:45:01 +0200
Subject: [PATCH 12/27] code formatting

---
 src/scirpy/ir_dist/metrics.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index c9450a9cb..2e43b42b6 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -395,7 +395,6 @@ def _compute_block(self, seqs1, seqs2, origin):
 
 
 class TCRdistDistanceCalculator:
-
     """Computes pairwise distances between TCR CDR3 sequences based on the "tcrdist" distance metric.
 
     The code of this class is heavily based on: https://github.com/agartland/pwseqdist/blob/master/pwseqdist

From 21702cb318f430c09f71a46903d652dec32e54ed Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 16 Apr 2024 18:46:05 +0000
Subject: [PATCH 13/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/scirpy/ir_dist/metrics.py | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index 2e43b42b6..9027570d9 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -444,7 +444,6 @@ def __init__(
         self.cutoff = cutoff
         self.n_jobs = n_jobs
 
-
     parasail_aa_alphabet = "ARNDCQEGHILKMFPSTWYVBZX"
     parasail_aa_alphabet_with_unknown = "ARNDCQEGHILKMFPSTWYVBZX*"
     tcr_dict_distance_matrix = {
@@ -862,8 +861,8 @@ def _make_numba_matrix(distance_matrix, alphabet=parasail_aa_alphabet_with_unkno
 
         Returns
         -------
-        distance_matrix : np.ndarray, dtype=np.int32"""
-
+        distance_matrix : np.ndarray, dtype=np.int32
+        """
         dm = np.zeros((len(alphabet), len(alphabet)), dtype=np.int32)
         for (aa1, aa2), d in distance_matrix.items():
             dm[alphabet.index(aa1), alphabet.index(aa2)] = d
@@ -879,16 +878,16 @@ def _seqs2mat(seqs, alphabet=parasail_aa_alphabet, max_len=None):
 
         Parameters
         ----------
-        seqs : list 
+        seqs : list
             List of strings
 
         Returns
         -------
-        mat : np.array  
+        mat : np.array
 
         Examples
         --------
-        >>> seqs2mat(["CAT","HAT"])
+        >>> seqs2mat(["CAT", "HAT"])
         array([[ 4,  0, 16],
             [ 8,  0, 16]], dtype=int8)
 
@@ -897,7 +896,6 @@ def _seqs2mat(seqs, alphabet=parasail_aa_alphabet, max_len=None):
         Requires all seqs to have the same length, therefore shorter sequences
         are filled up with -1 entries at the end.
         """
-
         if max_len is None:
             max_len = np.max([len(s) for s in seqs])
         mat = -1 * np.ones((len(seqs), max_len), dtype=np.int8)
@@ -958,7 +956,7 @@ def _nb_tcrdist_mat(
 
         Returns
         -------
-        data_rows : 
+        data_rows :
             List with arrays containing the non-zero data values of the result matrix per row,
             needed to create the final scipy CSR result matrix later
         indices_rows :
@@ -968,7 +966,6 @@ def _nb_tcrdist_mat(
             Arraay with integers that indicate the amount of non-zero values of the result matrix per row,
             needed to create the final scipy CSR result matrix later
         """
-
         assert seqs_mat1.shape[0] == seqs_L1.shape[0]
         assert seqs_mat2.shape[0] == seqs_L2.shape[0]
 
@@ -1044,9 +1041,8 @@ def _nb_tcrdist_mat(
         return data_rows, indices_rows, row_element_counts
 
     def _calc_dist_mat_block(self, seqs: Sequence[str], seqs2: Optional[Sequence[str]] = None) -> csr_matrix:
-        "Computes a block of the final TCRdist distance matrix and returns it as CSR matrix"
-
-        if(len(seqs) == 0 or len(seqs2) == 0):
+        """Computes a block of the final TCRdist distance matrix and returns it as CSR matrix"""
+        if len(seqs) == 0 or len(seqs2) == 0:
             return csr_matrix((len(seqs), len(seqs2)))
 
         seqs_mat1, seqs_L1 = self._seqs2mat(seqs)
@@ -1073,8 +1069,8 @@ def _calc_dist_mat_block(self, seqs: Sequence[str], seqs2: Optional[Sequence[str
 
     def calc_dist_mat(self, seqs: Sequence[str], seqs2: Optional[Sequence[str]] = None) -> csr_matrix:
         """Calculates the pairwise distances between two vectors of gene sequences based on the TCRdist distance metric
-        and returns a CSR distance matrix"""
-        
+        and returns a CSR distance matrix
+        """
         if seqs2 is None:
             seqs2 = seqs
 

From 51112d80aaf680d4cf413beddc7206905b26a383 Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Thu, 18 Apr 2024 13:09:13 +0200
Subject: [PATCH 14/27] handling of default values for cutoff and n_jobs in
 _get_distance_calculator adapted

---
 src/scirpy/ir_dist/__init__.py | 18 +++++++++++-------
 src/scirpy/ir_dist/metrics.py  |  9 ++-------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/scirpy/ir_dist/__init__.py b/src/scirpy/ir_dist/__init__.py
index 18831a321..05820f0b6 100644
--- a/src/scirpy/ir_dist/__init__.py
+++ b/src/scirpy/ir_dist/__init__.py
@@ -73,7 +73,7 @@ def _get_metric_key(metric: MetricType) -> str:
     return "custom" if isinstance(metric, metrics.DistanceCalculator) else metric  # type: ignore
 
 
-def _get_distance_calculator(metric: MetricType, cutoff: Union[int, None], *, n_jobs=None, **kwargs):
+def _get_distance_calculator(metric: MetricType, cutoff: Union[int, None], *, n_jobs=-1, **kwargs):
     """Returns an instance of :class:`~scirpy.ir_dist.metrics.DistanceCalculator`
     given a metric.
 
@@ -83,20 +83,24 @@ def _get_distance_calculator(metric: MetricType, cutoff: Union[int, None], *, n_
         metric = "identity"
         cutoff = 0
 
+    # Let's rely on the default set by the class if cutoff is None
+    if cutoff is not None:
+        kwargs["cutoff"] = cutoff
+
     if isinstance(metric, metrics.DistanceCalculator):
         dist_calc = metric
     elif metric == "alignment":
-        dist_calc = metrics.FastAlignmentDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, estimated_penalty=0, **kwargs)
+        dist_calc = metrics.FastAlignmentDistanceCalculator(n_jobs=n_jobs, estimated_penalty=0, **kwargs)
     elif metric == "fastalignment":
-        dist_calc = metrics.FastAlignmentDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
+        dist_calc = metrics.FastAlignmentDistanceCalculator(n_jobs=n_jobs, **kwargs)
     elif metric == "identity":
-        dist_calc = metrics.IdentityDistanceCalculator(cutoff=cutoff, **kwargs)
+        dist_calc = metrics.IdentityDistanceCalculator(**kwargs)
     elif metric == "levenshtein":
-        dist_calc = metrics.LevenshteinDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
+        dist_calc = metrics.LevenshteinDistanceCalculator(n_jobs=n_jobs, **kwargs)
     elif metric == "hamming":
-        dist_calc = metrics.HammingDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
+        dist_calc = metrics.HammingDistanceCalculator(n_jobs=n_jobs, **kwargs)
     elif metric == "tcrdist":
-        dist_calc = metrics.TCRdistDistanceCalculator(cutoff=cutoff, n_jobs=n_jobs, **kwargs)
+        dist_calc = metrics.TCRdistDistanceCalculator(n_jobs=n_jobs, **kwargs)
     else:
         raise ValueError("Invalid distance metric.")
 
diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index 2e43b42b6..31571bd64 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -428,14 +428,9 @@ def __init__(
         ntrim=3,
         ctrim=2,
         fixed_gappos=True,
-        cutoff: Union[None, int] = None,
-        n_jobs: Union[None, int] = None,
+        cutoff: int = 20,
+        n_jobs: int = 1,
     ):
-        if cutoff is None:
-            cutoff = 20
-        if n_jobs is None:
-            n_jobs = 1
-
         self.dist_weight = dist_weight
         self.gap_penalty = gap_penalty
         self.ntrim = ntrim

From 101b4115e97d005968f5aef0d93cb1a01adaaa8f Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Thu, 18 Apr 2024 19:19:58 +0200
Subject: [PATCH 15/27] auto formatting disabled for tcr_dict_distance_matrix

---
 src/scirpy/ir_dist/metrics.py | 406 +---------------------------------
 1 file changed, 4 insertions(+), 402 deletions(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index 31571bd64..e3b7008e8 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -442,409 +442,11 @@ def __init__(
 
     parasail_aa_alphabet = "ARNDCQEGHILKMFPSTWYVBZX"
     parasail_aa_alphabet_with_unknown = "ARNDCQEGHILKMFPSTWYVBZX*"
-    tcr_dict_distance_matrix = {
-        ("A", "A"): 0,
-        ("A", "C"): 4,
-        ("A", "D"): 4,
-        ("A", "E"): 4,
-        ("A", "F"): 4,
-        ("A", "G"): 4,
-        ("A", "H"): 4,
-        ("A", "I"): 4,
-        ("A", "K"): 4,
-        ("A", "L"): 4,
-        ("A", "M"): 4,
-        ("A", "N"): 4,
-        ("A", "P"): 4,
-        ("A", "Q"): 4,
-        ("A", "R"): 4,
-        ("A", "S"): 3,
-        ("A", "T"): 4,
-        ("A", "V"): 4,
-        ("A", "W"): 4,
-        ("A", "Y"): 4,
-        ("C", "A"): 4,
-        ("C", "C"): 0,
-        ("C", "D"): 4,
-        ("C", "E"): 4,
-        ("C", "F"): 4,
-        ("C", "G"): 4,
-        ("C", "H"): 4,
-        ("C", "I"): 4,
-        ("C", "K"): 4,
-        ("C", "L"): 4,
-        ("C", "M"): 4,
-        ("C", "N"): 4,
-        ("C", "P"): 4,
-        ("C", "Q"): 4,
-        ("C", "R"): 4,
-        ("C", "S"): 4,
-        ("C", "T"): 4,
-        ("C", "V"): 4,
-        ("C", "W"): 4,
-        ("C", "Y"): 4,
-        ("D", "A"): 4,
-        ("D", "C"): 4,
-        ("D", "D"): 0,
-        ("D", "E"): 2,
-        ("D", "F"): 4,
-        ("D", "G"): 4,
-        ("D", "H"): 4,
-        ("D", "I"): 4,
-        ("D", "K"): 4,
-        ("D", "L"): 4,
-        ("D", "M"): 4,
-        ("D", "N"): 3,
-        ("D", "P"): 4,
-        ("D", "Q"): 4,
-        ("D", "R"): 4,
-        ("D", "S"): 4,
-        ("D", "T"): 4,
-        ("D", "V"): 4,
-        ("D", "W"): 4,
-        ("D", "Y"): 4,
-        ("E", "A"): 4,
-        ("E", "C"): 4,
-        ("E", "D"): 2,
-        ("E", "E"): 0,
-        ("E", "F"): 4,
-        ("E", "G"): 4,
-        ("E", "H"): 4,
-        ("E", "I"): 4,
-        ("E", "K"): 3,
-        ("E", "L"): 4,
-        ("E", "M"): 4,
-        ("E", "N"): 4,
-        ("E", "P"): 4,
-        ("E", "Q"): 2,
-        ("E", "R"): 4,
-        ("E", "S"): 4,
-        ("E", "T"): 4,
-        ("E", "V"): 4,
-        ("E", "W"): 4,
-        ("E", "Y"): 4,
-        ("F", "A"): 4,
-        ("F", "C"): 4,
-        ("F", "D"): 4,
-        ("F", "E"): 4,
-        ("F", "F"): 0,
-        ("F", "G"): 4,
-        ("F", "H"): 4,
-        ("F", "I"): 4,
-        ("F", "K"): 4,
-        ("F", "L"): 4,
-        ("F", "M"): 4,
-        ("F", "N"): 4,
-        ("F", "P"): 4,
-        ("F", "Q"): 4,
-        ("F", "R"): 4,
-        ("F", "S"): 4,
-        ("F", "T"): 4,
-        ("F", "V"): 4,
-        ("F", "W"): 3,
-        ("F", "Y"): 1,
-        ("G", "A"): 4,
-        ("G", "C"): 4,
-        ("G", "D"): 4,
-        ("G", "E"): 4,
-        ("G", "F"): 4,
-        ("G", "G"): 0,
-        ("G", "H"): 4,
-        ("G", "I"): 4,
-        ("G", "K"): 4,
-        ("G", "L"): 4,
-        ("G", "M"): 4,
-        ("G", "N"): 4,
-        ("G", "P"): 4,
-        ("G", "Q"): 4,
-        ("G", "R"): 4,
-        ("G", "S"): 4,
-        ("G", "T"): 4,
-        ("G", "V"): 4,
-        ("G", "W"): 4,
-        ("G", "Y"): 4,
-        ("H", "A"): 4,
-        ("H", "C"): 4,
-        ("H", "D"): 4,
-        ("H", "E"): 4,
-        ("H", "F"): 4,
-        ("H", "G"): 4,
-        ("H", "H"): 0,
-        ("H", "I"): 4,
-        ("H", "K"): 4,
-        ("H", "L"): 4,
-        ("H", "M"): 4,
-        ("H", "N"): 3,
-        ("H", "P"): 4,
-        ("H", "Q"): 4,
-        ("H", "R"): 4,
-        ("H", "S"): 4,
-        ("H", "T"): 4,
-        ("H", "V"): 4,
-        ("H", "W"): 4,
-        ("H", "Y"): 2,
-        ("I", "A"): 4,
-        ("I", "C"): 4,
-        ("I", "D"): 4,
-        ("I", "E"): 4,
-        ("I", "F"): 4,
-        ("I", "G"): 4,
-        ("I", "H"): 4,
-        ("I", "I"): 0,
-        ("I", "K"): 4,
-        ("I", "L"): 2,
-        ("I", "M"): 3,
-        ("I", "N"): 4,
-        ("I", "P"): 4,
-        ("I", "Q"): 4,
-        ("I", "R"): 4,
-        ("I", "S"): 4,
-        ("I", "T"): 4,
-        ("I", "V"): 1,
-        ("I", "W"): 4,
-        ("I", "Y"): 4,
-        ("K", "A"): 4,
-        ("K", "C"): 4,
-        ("K", "D"): 4,
-        ("K", "E"): 3,
-        ("K", "F"): 4,
-        ("K", "G"): 4,
-        ("K", "H"): 4,
-        ("K", "I"): 4,
-        ("K", "K"): 0,
-        ("K", "L"): 4,
-        ("K", "M"): 4,
-        ("K", "N"): 4,
-        ("K", "P"): 4,
-        ("K", "Q"): 3,
-        ("K", "R"): 2,
-        ("K", "S"): 4,
-        ("K", "T"): 4,
-        ("K", "V"): 4,
-        ("K", "W"): 4,
-        ("K", "Y"): 4,
-        ("L", "A"): 4,
-        ("L", "C"): 4,
-        ("L", "D"): 4,
-        ("L", "E"): 4,
-        ("L", "F"): 4,
-        ("L", "G"): 4,
-        ("L", "H"): 4,
-        ("L", "I"): 2,
-        ("L", "K"): 4,
-        ("L", "L"): 0,
-        ("L", "M"): 2,
-        ("L", "N"): 4,
-        ("L", "P"): 4,
-        ("L", "Q"): 4,
-        ("L", "R"): 4,
-        ("L", "S"): 4,
-        ("L", "T"): 4,
-        ("L", "V"): 3,
-        ("L", "W"): 4,
-        ("L", "Y"): 4,
-        ("M", "A"): 4,
-        ("M", "C"): 4,
-        ("M", "D"): 4,
-        ("M", "E"): 4,
-        ("M", "F"): 4,
-        ("M", "G"): 4,
-        ("M", "H"): 4,
-        ("M", "I"): 3,
-        ("M", "K"): 4,
-        ("M", "L"): 2,
-        ("M", "M"): 0,
-        ("M", "N"): 4,
-        ("M", "P"): 4,
-        ("M", "Q"): 4,
-        ("M", "R"): 4,
-        ("M", "S"): 4,
-        ("M", "T"): 4,
-        ("M", "V"): 3,
-        ("M", "W"): 4,
-        ("M", "Y"): 4,
-        ("N", "A"): 4,
-        ("N", "C"): 4,
-        ("N", "D"): 3,
-        ("N", "E"): 4,
-        ("N", "F"): 4,
-        ("N", "G"): 4,
-        ("N", "H"): 3,
-        ("N", "I"): 4,
-        ("N", "K"): 4,
-        ("N", "L"): 4,
-        ("N", "M"): 4,
-        ("N", "N"): 0,
-        ("N", "P"): 4,
-        ("N", "Q"): 4,
-        ("N", "R"): 4,
-        ("N", "S"): 3,
-        ("N", "T"): 4,
-        ("N", "V"): 4,
-        ("N", "W"): 4,
-        ("N", "Y"): 4,
-        ("P", "A"): 4,
-        ("P", "C"): 4,
-        ("P", "D"): 4,
-        ("P", "E"): 4,
-        ("P", "F"): 4,
-        ("P", "G"): 4,
-        ("P", "H"): 4,
-        ("P", "I"): 4,
-        ("P", "K"): 4,
-        ("P", "L"): 4,
-        ("P", "M"): 4,
-        ("P", "N"): 4,
-        ("P", "P"): 0,
-        ("P", "Q"): 4,
-        ("P", "R"): 4,
-        ("P", "S"): 4,
-        ("P", "T"): 4,
-        ("P", "V"): 4,
-        ("P", "W"): 4,
-        ("P", "Y"): 4,
-        ("Q", "A"): 4,
-        ("Q", "C"): 4,
-        ("Q", "D"): 4,
-        ("Q", "E"): 2,
-        ("Q", "F"): 4,
-        ("Q", "G"): 4,
-        ("Q", "H"): 4,
-        ("Q", "I"): 4,
-        ("Q", "K"): 3,
-        ("Q", "L"): 4,
-        ("Q", "M"): 4,
-        ("Q", "N"): 4,
-        ("Q", "P"): 4,
-        ("Q", "Q"): 0,
-        ("Q", "R"): 3,
-        ("Q", "S"): 4,
-        ("Q", "T"): 4,
-        ("Q", "V"): 4,
-        ("Q", "W"): 4,
-        ("Q", "Y"): 4,
-        ("R", "A"): 4,
-        ("R", "C"): 4,
-        ("R", "D"): 4,
-        ("R", "E"): 4,
-        ("R", "F"): 4,
-        ("R", "G"): 4,
-        ("R", "H"): 4,
-        ("R", "I"): 4,
-        ("R", "K"): 2,
-        ("R", "L"): 4,
-        ("R", "M"): 4,
-        ("R", "N"): 4,
-        ("R", "P"): 4,
-        ("R", "Q"): 3,
-        ("R", "R"): 0,
-        ("R", "S"): 4,
-        ("R", "T"): 4,
-        ("R", "V"): 4,
-        ("R", "W"): 4,
-        ("R", "Y"): 4,
-        ("S", "A"): 3,
-        ("S", "C"): 4,
-        ("S", "D"): 4,
-        ("S", "E"): 4,
-        ("S", "F"): 4,
-        ("S", "G"): 4,
-        ("S", "H"): 4,
-        ("S", "I"): 4,
-        ("S", "K"): 4,
-        ("S", "L"): 4,
-        ("S", "M"): 4,
-        ("S", "N"): 3,
-        ("S", "P"): 4,
-        ("S", "Q"): 4,
-        ("S", "R"): 4,
-        ("S", "S"): 0,
-        ("S", "T"): 3,
-        ("S", "V"): 4,
-        ("S", "W"): 4,
-        ("S", "Y"): 4,
-        ("T", "A"): 4,
-        ("T", "C"): 4,
-        ("T", "D"): 4,
-        ("T", "E"): 4,
-        ("T", "F"): 4,
-        ("T", "G"): 4,
-        ("T", "H"): 4,
-        ("T", "I"): 4,
-        ("T", "K"): 4,
-        ("T", "L"): 4,
-        ("T", "M"): 4,
-        ("T", "N"): 4,
-        ("T", "P"): 4,
-        ("T", "Q"): 4,
-        ("T", "R"): 4,
-        ("T", "S"): 3,
-        ("T", "T"): 0,
-        ("T", "V"): 4,
-        ("T", "W"): 4,
-        ("T", "Y"): 4,
-        ("V", "A"): 4,
-        ("V", "C"): 4,
-        ("V", "D"): 4,
-        ("V", "E"): 4,
-        ("V", "F"): 4,
-        ("V", "G"): 4,
-        ("V", "H"): 4,
-        ("V", "I"): 1,
-        ("V", "K"): 4,
-        ("V", "L"): 3,
-        ("V", "M"): 3,
-        ("V", "N"): 4,
-        ("V", "P"): 4,
-        ("V", "Q"): 4,
-        ("V", "R"): 4,
-        ("V", "S"): 4,
-        ("V", "T"): 4,
-        ("V", "V"): 0,
-        ("V", "W"): 4,
-        ("V", "Y"): 4,
-        ("W", "A"): 4,
-        ("W", "C"): 4,
-        ("W", "D"): 4,
-        ("W", "E"): 4,
-        ("W", "F"): 3,
-        ("W", "G"): 4,
-        ("W", "H"): 4,
-        ("W", "I"): 4,
-        ("W", "K"): 4,
-        ("W", "L"): 4,
-        ("W", "M"): 4,
-        ("W", "N"): 4,
-        ("W", "P"): 4,
-        ("W", "Q"): 4,
-        ("W", "R"): 4,
-        ("W", "S"): 4,
-        ("W", "T"): 4,
-        ("W", "V"): 4,
-        ("W", "W"): 0,
-        ("W", "Y"): 2,
-        ("Y", "A"): 4,
-        ("Y", "C"): 4,
-        ("Y", "D"): 4,
-        ("Y", "E"): 4,
-        ("Y", "F"): 1,
-        ("Y", "G"): 4,
-        ("Y", "H"): 2,
-        ("Y", "I"): 4,
-        ("Y", "K"): 4,
-        ("Y", "L"): 4,
-        ("Y", "M"): 4,
-        ("Y", "N"): 4,
-        ("Y", "P"): 4,
-        ("Y", "Q"): 4,
-        ("Y", "R"): 4,
-        ("Y", "S"): 4,
-        ("Y", "T"): 4,
-        ("Y", "V"): 4,
-        ("Y", "W"): 2,
-        ("Y", "Y"): 0,
-    }
+    # fmt: off
+    tcr_dict_distance_matrix = {('A', 'A'): 0,  ('A', 'C'): 4,  ('A', 'D'): 4,  ('A', 'E'): 4,  ('A', 'F'): 4,  ('A', 'G'): 4,  ('A', 'H'): 4,  ('A', 'I'): 4,  ('A', 'K'): 4,  ('A', 'L'): 4,  ('A', 'M'): 4,  ('A', 'N'): 4,  ('A', 'P'): 4,  ('A', 'Q'): 4,  ('A', 'R'): 4,  ('A', 'S'): 3,  ('A', 'T'): 4,  ('A', 'V'): 4,  ('A', 'W'): 4,  ('A', 'Y'): 4,  ('C', 'A'): 4,  ('C', 'C'): 0,  ('C', 'D'): 4,  ('C', 'E'): 4,  ('C', 'F'): 4,  ('C', 'G'): 4,  ('C', 'H'): 4,  ('C', 'I'): 4,  ('C', 'K'): 4,  ('C', 'L'): 4,  ('C', 'M'): 4,  ('C', 'N'): 4,  ('C', 'P'): 4,  ('C', 'Q'): 4,  ('C', 'R'): 4,  ('C', 'S'): 4,  ('C', 'T'): 4,  ('C', 'V'): 4,  ('C', 'W'): 4,  ('C', 'Y'): 4,  ('D', 'A'): 4,  ('D', 'C'): 4,  ('D', 'D'): 0,  ('D', 'E'): 2,  ('D', 'F'): 4,  ('D', 'G'): 4,  ('D', 'H'): 4,  ('D', 'I'): 4,  ('D', 'K'): 4,  ('D', 'L'): 4,  ('D', 'M'): 4,  ('D', 'N'): 3,  ('D', 'P'): 4,  ('D', 'Q'): 4,  ('D', 'R'): 4,  ('D', 'S'): 4,  ('D', 'T'): 4,  ('D', 'V'): 4,  ('D', 'W'): 4,  ('D', 'Y'): 4,  ('E', 'A'): 4,  ('E', 'C'): 4,  ('E', 'D'): 2,  ('E', 'E'): 0,  ('E', 'F'): 4,  ('E', 'G'): 4,  ('E', 'H'): 4,  ('E', 'I'): 4,  ('E', 'K'): 3,  ('E', 'L'): 4,  ('E', 'M'): 4,  ('E', 'N'): 4,  ('E', 'P'): 4,  ('E', 'Q'): 2,  ('E', 'R'): 4,  ('E', 'S'): 4,  ('E', 'T'): 4,  ('E', 'V'): 4,  ('E', 'W'): 4,  ('E', 'Y'): 4,  ('F', 'A'): 4,  ('F', 'C'): 4,  ('F', 'D'): 4,  ('F', 'E'): 4,  ('F', 'F'): 0,  ('F', 'G'): 4,  ('F', 'H'): 4,  ('F', 'I'): 4,  ('F', 'K'): 4,  ('F', 'L'): 4,  ('F', 'M'): 4,  ('F', 'N'): 4,  ('F', 'P'): 4,  ('F', 'Q'): 4,  ('F', 'R'): 4,  ('F', 'S'): 4,  ('F', 'T'): 4,  ('F', 'V'): 4,  ('F', 'W'): 3,  ('F', 'Y'): 1,  ('G', 'A'): 4,  ('G', 'C'): 4,  ('G', 'D'): 4,  ('G', 'E'): 4,  ('G', 'F'): 4,  ('G', 'G'): 0,  ('G', 'H'): 4,  ('G', 'I'): 4,  ('G', 'K'): 4,  ('G', 'L'): 4,  ('G', 'M'): 4,  ('G', 'N'): 4,  ('G', 'P'): 4,  ('G', 'Q'): 4,  ('G', 'R'): 4,  ('G', 'S'): 4,  ('G', 'T'): 4,  ('G', 'V'): 4,  ('G', 'W'): 4,  ('G', 'Y'): 4,  ('H', 'A'): 4,  ('H', 'C'): 4,  ('H', 'D'): 4,  ('H', 'E'): 4,  ('H', 'F'): 4,  ('H', 'G'): 4,  ('H', 'H'): 0,  ('H', 'I'): 4,  ('H', 'K'): 4,  ('H', 'L'): 4,  ('H', 'M'): 4,  ('H', 'N'): 3,  ('H', 'P'): 4,  ('H', 'Q'): 4,  ('H', 'R'): 4,  ('H', 'S'): 4,  ('H', 'T'): 4,  ('H', 'V'): 4,  ('H', 'W'): 4,  ('H', 'Y'): 2,  ('I', 'A'): 4,  ('I', 'C'): 4,  ('I', 'D'): 4,  ('I', 'E'): 4,  ('I', 'F'): 4,  ('I', 'G'): 4,  ('I', 'H'): 4,  ('I', 'I'): 0,  ('I', 'K'): 4,  ('I', 'L'): 2,  ('I', 'M'): 3,  ('I', 'N'): 4,  ('I', 'P'): 4,  ('I', 'Q'): 4,  ('I', 'R'): 4,  ('I', 'S'): 4,  ('I', 'T'): 4,  ('I', 'V'): 1,  ('I', 'W'): 4,  ('I', 'Y'): 4,  ('K', 'A'): 4,  ('K', 'C'): 4,  ('K', 'D'): 4,  ('K', 'E'): 3,  ('K', 'F'): 4,  ('K', 'G'): 4,  ('K', 'H'): 4,  ('K', 'I'): 4,  ('K', 'K'): 0,  ('K', 'L'): 4,  ('K', 'M'): 4,  ('K', 'N'): 4,  ('K', 'P'): 4,  ('K', 'Q'): 3,  ('K', 'R'): 2,  ('K', 'S'): 4,  ('K', 'T'): 4,  ('K', 'V'): 4,  ('K', 'W'): 4,  ('K', 'Y'): 4,  ('L', 'A'): 4,  ('L', 'C'): 4,  ('L', 'D'): 4,  ('L', 'E'): 4,  ('L', 'F'): 4,  ('L', 'G'): 4,  ('L', 'H'): 4,  ('L', 'I'): 2,  ('L', 'K'): 4,  ('L', 'L'): 0,  ('L', 'M'): 2,  ('L', 'N'): 4,  ('L', 'P'): 4,  ('L', 'Q'): 4,  ('L', 'R'): 4,  ('L', 'S'): 4,  ('L', 'T'): 4,  ('L', 'V'): 3,  ('L', 'W'): 4,  ('L', 'Y'): 4,  ('M', 'A'): 4,  ('M', 'C'): 4,  ('M', 'D'): 4,  ('M', 'E'): 4,  ('M', 'F'): 4,  ('M', 'G'): 4,  ('M', 'H'): 4,  ('M', 'I'): 3,  ('M', 'K'): 4,  ('M', 'L'): 2,  ('M', 'M'): 0,  ('M', 'N'): 4,  ('M', 'P'): 4,  ('M', 'Q'): 4,  ('M', 'R'): 4,  ('M', 'S'): 4,  ('M', 'T'): 4,  ('M', 'V'): 3,  ('M', 'W'): 4,  ('M', 'Y'): 4,  ('N', 'A'): 4,  ('N', 'C'): 4,  ('N', 'D'): 3,  ('N', 'E'): 4,  ('N', 'F'): 4,  ('N', 'G'): 4,  ('N', 'H'): 3,  ('N', 'I'): 4,  ('N', 'K'): 4,  ('N', 'L'): 4,  ('N', 'M'): 4,  ('N', 'N'): 0,  ('N', 'P'): 4,  ('N', 'Q'): 4,  ('N', 'R'): 4,  ('N', 'S'): 3,  ('N', 'T'): 4,  ('N', 'V'): 4,  ('N', 'W'): 4,  ('N', 'Y'): 4,  ('P', 'A'): 4,  ('P', 'C'): 4,  ('P', 'D'): 4,  ('P', 'E'): 4,  ('P', 'F'): 4,  ('P', 'G'): 4,  ('P', 'H'): 4,  ('P', 'I'): 4,  ('P', 'K'): 4,  ('P', 'L'): 4,  ('P', 'M'): 4,  ('P', 'N'): 4,  ('P', 'P'): 0,  ('P', 'Q'): 4,  ('P', 'R'): 4,  ('P', 'S'): 4,  ('P', 'T'): 4,  ('P', 'V'): 4,  ('P', 'W'): 4,  ('P', 'Y'): 4,  ('Q', 'A'): 4,  ('Q', 'C'): 4,  ('Q', 'D'): 4,  ('Q', 'E'): 2,  ('Q', 'F'): 4,  ('Q', 'G'): 4,  ('Q', 'H'): 4,  ('Q', 'I'): 4,  ('Q', 'K'): 3,  ('Q', 'L'): 4,  ('Q', 'M'): 4,  ('Q', 'N'): 4,  ('Q', 'P'): 4,  ('Q', 'Q'): 0,  ('Q', 'R'): 3,  ('Q', 'S'): 4,  ('Q', 'T'): 4,  ('Q', 'V'): 4,  ('Q', 'W'): 4,  ('Q', 'Y'): 4,  ('R', 'A'): 4,  ('R', 'C'): 4,  ('R', 'D'): 4,  ('R', 'E'): 4,  ('R', 'F'): 4,  ('R', 'G'): 4,  ('R', 'H'): 4,  ('R', 'I'): 4,  ('R', 'K'): 2,  ('R', 'L'): 4,  ('R', 'M'): 4,  ('R', 'N'): 4,  ('R', 'P'): 4,  ('R', 'Q'): 3,  ('R', 'R'): 0,  ('R', 'S'): 4,  ('R', 'T'): 4,  ('R', 'V'): 4,  ('R', 'W'): 4,  ('R', 'Y'): 4,  ('S', 'A'): 3,  ('S', 'C'): 4,  ('S', 'D'): 4,  ('S', 'E'): 4,  ('S', 'F'): 4,  ('S', 'G'): 4,  ('S', 'H'): 4,  ('S', 'I'): 4,  ('S', 'K'): 4,  ('S', 'L'): 4,  ('S', 'M'): 4,  ('S', 'N'): 3,  ('S', 'P'): 4,  ('S', 'Q'): 4,  ('S', 'R'): 4,  ('S', 'S'): 0,  ('S', 'T'): 3,  ('S', 'V'): 4,  ('S', 'W'): 4,  ('S', 'Y'): 4,  ('T', 'A'): 4,  ('T', 'C'): 4,  ('T', 'D'): 4,  ('T', 'E'): 4,  ('T', 'F'): 4,  ('T', 'G'): 4,  ('T', 'H'): 4,  ('T', 'I'): 4,  ('T', 'K'): 4,  ('T', 'L'): 4,  ('T', 'M'): 4,  ('T', 'N'): 4,  ('T', 'P'): 4,  ('T', 'Q'): 4,  ('T', 'R'): 4,  ('T', 'S'): 3,  ('T', 'T'): 0,  ('T', 'V'): 4,  ('T', 'W'): 4,  ('T', 'Y'): 4,  ('V', 'A'): 4,  ('V', 'C'): 4,  ('V', 'D'): 4,  ('V', 'E'): 4,  ('V', 'F'): 4,  ('V', 'G'): 4,  ('V', 'H'): 4,  ('V', 'I'): 1,  ('V', 'K'): 4,  ('V', 'L'): 3,  ('V', 'M'): 3,  ('V', 'N'): 4,  ('V', 'P'): 4,  ('V', 'Q'): 4,  ('V', 'R'): 4,  ('V', 'S'): 4,  ('V', 'T'): 4,  ('V', 'V'): 0,  ('V', 'W'): 4,  ('V', 'Y'): 4,  ('W', 'A'): 4,  ('W', 'C'): 4,  ('W', 'D'): 4,  ('W', 'E'): 4,  ('W', 'F'): 3,  ('W', 'G'): 4,  ('W', 'H'): 4,  ('W', 'I'): 4,  ('W', 'K'): 4,  ('W', 'L'): 4,  ('W', 'M'): 4,  ('W', 'N'): 4,  ('W', 'P'): 4,  ('W', 'Q'): 4,  ('W', 'R'): 4,  ('W', 'S'): 4,  ('W', 'T'): 4,  ('W', 'V'): 4,  ('W', 'W'): 0,  ('W', 'Y'): 2,  ('Y', 'A'): 4,  ('Y', 'C'): 4,  ('Y', 'D'): 4,  ('Y', 'E'): 4,  ('Y', 'F'): 1,  ('Y', 'G'): 4,  ('Y', 'H'): 2,  ('Y', 'I'): 4,  ('Y', 'K'): 4,  ('Y', 'L'): 4,  ('Y', 'M'): 4,  ('Y', 'N'): 4,  ('Y', 'P'): 4,  ('Y', 'Q'): 4,  ('Y', 'R'): 4,  ('Y', 'S'): 4,  ('Y', 'T'): 4,  ('Y', 'V'): 4,  ('Y', 'W'): 2,  ('Y', 'Y'): 0}
+    # fmt: on
 
+    
     @staticmethod
     def _make_numba_matrix(distance_matrix, alphabet=parasail_aa_alphabet_with_unknown):
         """Creates a numba compatible distance matrix from a dict of tuples.

From 7ac26ebfcfc8abe9e21994e73a3b24e9d1c53d6e Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Thu, 18 Apr 2024 19:48:56 +0200
Subject: [PATCH 16/27] added data type hints to functions and adapted function
 comments

---
 src/scirpy/ir_dist/metrics.py | 89 ++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 42 deletions(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index e3b7008e8..faa10fea0 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -400,18 +400,18 @@ class TCRdistDistanceCalculator:
     The code of this class is heavily based on: https://github.com/agartland/pwseqdist/blob/master/pwseqdist
 
     Using default weight, gap penalty, ntrim and ctrim is equivalent to the
-    original distance published in Dash et al, (2017).
+    original distance published in :cite:`TCRdist`.
 
     Parameters
     ----------
-    dist_weight : int
+    dist_weight:
         Weight applied to the mismatch distances before summing with the gap penalties
-    gap_penalty : int
+    gap_penalty:
         Distance penalty for the difference in the length of the two sequences
-    ntrim/ctrim : int
+    ntrim/ctrim:
         Positions trimmed off the N-terminus (0) and C-terminus (L-1) ends of the peptide sequence. These symbols will be ignored
         in the distance calculation.
-    fixed_gappos : bool
+    fixed_gappos:
         If True, insert gaps at a fixed position after the cysteine residue statring the CDR3 (typically position 6).
         If False, find the "optimal" position for inserting the gaps to make up the difference in length
     cutoff:
@@ -423,11 +423,11 @@ class TCRdistDistanceCalculator:
 
     def __init__(
         self,
-        dist_weight=3,
-        gap_penalty=4,
-        ntrim=3,
-        ctrim=2,
-        fixed_gappos=True,
+        dist_weight: int = 3,
+        gap_penalty: int = 4,
+        ntrim: int = 3,
+        ctrim: int = 2,
+        fixed_gappos: bool = True,
         cutoff: int = 20,
         n_jobs: int = 1,
     ):
@@ -446,20 +446,22 @@ def __init__(
     tcr_dict_distance_matrix = {('A', 'A'): 0,  ('A', 'C'): 4,  ('A', 'D'): 4,  ('A', 'E'): 4,  ('A', 'F'): 4,  ('A', 'G'): 4,  ('A', 'H'): 4,  ('A', 'I'): 4,  ('A', 'K'): 4,  ('A', 'L'): 4,  ('A', 'M'): 4,  ('A', 'N'): 4,  ('A', 'P'): 4,  ('A', 'Q'): 4,  ('A', 'R'): 4,  ('A', 'S'): 3,  ('A', 'T'): 4,  ('A', 'V'): 4,  ('A', 'W'): 4,  ('A', 'Y'): 4,  ('C', 'A'): 4,  ('C', 'C'): 0,  ('C', 'D'): 4,  ('C', 'E'): 4,  ('C', 'F'): 4,  ('C', 'G'): 4,  ('C', 'H'): 4,  ('C', 'I'): 4,  ('C', 'K'): 4,  ('C', 'L'): 4,  ('C', 'M'): 4,  ('C', 'N'): 4,  ('C', 'P'): 4,  ('C', 'Q'): 4,  ('C', 'R'): 4,  ('C', 'S'): 4,  ('C', 'T'): 4,  ('C', 'V'): 4,  ('C', 'W'): 4,  ('C', 'Y'): 4,  ('D', 'A'): 4,  ('D', 'C'): 4,  ('D', 'D'): 0,  ('D', 'E'): 2,  ('D', 'F'): 4,  ('D', 'G'): 4,  ('D', 'H'): 4,  ('D', 'I'): 4,  ('D', 'K'): 4,  ('D', 'L'): 4,  ('D', 'M'): 4,  ('D', 'N'): 3,  ('D', 'P'): 4,  ('D', 'Q'): 4,  ('D', 'R'): 4,  ('D', 'S'): 4,  ('D', 'T'): 4,  ('D', 'V'): 4,  ('D', 'W'): 4,  ('D', 'Y'): 4,  ('E', 'A'): 4,  ('E', 'C'): 4,  ('E', 'D'): 2,  ('E', 'E'): 0,  ('E', 'F'): 4,  ('E', 'G'): 4,  ('E', 'H'): 4,  ('E', 'I'): 4,  ('E', 'K'): 3,  ('E', 'L'): 4,  ('E', 'M'): 4,  ('E', 'N'): 4,  ('E', 'P'): 4,  ('E', 'Q'): 2,  ('E', 'R'): 4,  ('E', 'S'): 4,  ('E', 'T'): 4,  ('E', 'V'): 4,  ('E', 'W'): 4,  ('E', 'Y'): 4,  ('F', 'A'): 4,  ('F', 'C'): 4,  ('F', 'D'): 4,  ('F', 'E'): 4,  ('F', 'F'): 0,  ('F', 'G'): 4,  ('F', 'H'): 4,  ('F', 'I'): 4,  ('F', 'K'): 4,  ('F', 'L'): 4,  ('F', 'M'): 4,  ('F', 'N'): 4,  ('F', 'P'): 4,  ('F', 'Q'): 4,  ('F', 'R'): 4,  ('F', 'S'): 4,  ('F', 'T'): 4,  ('F', 'V'): 4,  ('F', 'W'): 3,  ('F', 'Y'): 1,  ('G', 'A'): 4,  ('G', 'C'): 4,  ('G', 'D'): 4,  ('G', 'E'): 4,  ('G', 'F'): 4,  ('G', 'G'): 0,  ('G', 'H'): 4,  ('G', 'I'): 4,  ('G', 'K'): 4,  ('G', 'L'): 4,  ('G', 'M'): 4,  ('G', 'N'): 4,  ('G', 'P'): 4,  ('G', 'Q'): 4,  ('G', 'R'): 4,  ('G', 'S'): 4,  ('G', 'T'): 4,  ('G', 'V'): 4,  ('G', 'W'): 4,  ('G', 'Y'): 4,  ('H', 'A'): 4,  ('H', 'C'): 4,  ('H', 'D'): 4,  ('H', 'E'): 4,  ('H', 'F'): 4,  ('H', 'G'): 4,  ('H', 'H'): 0,  ('H', 'I'): 4,  ('H', 'K'): 4,  ('H', 'L'): 4,  ('H', 'M'): 4,  ('H', 'N'): 3,  ('H', 'P'): 4,  ('H', 'Q'): 4,  ('H', 'R'): 4,  ('H', 'S'): 4,  ('H', 'T'): 4,  ('H', 'V'): 4,  ('H', 'W'): 4,  ('H', 'Y'): 2,  ('I', 'A'): 4,  ('I', 'C'): 4,  ('I', 'D'): 4,  ('I', 'E'): 4,  ('I', 'F'): 4,  ('I', 'G'): 4,  ('I', 'H'): 4,  ('I', 'I'): 0,  ('I', 'K'): 4,  ('I', 'L'): 2,  ('I', 'M'): 3,  ('I', 'N'): 4,  ('I', 'P'): 4,  ('I', 'Q'): 4,  ('I', 'R'): 4,  ('I', 'S'): 4,  ('I', 'T'): 4,  ('I', 'V'): 1,  ('I', 'W'): 4,  ('I', 'Y'): 4,  ('K', 'A'): 4,  ('K', 'C'): 4,  ('K', 'D'): 4,  ('K', 'E'): 3,  ('K', 'F'): 4,  ('K', 'G'): 4,  ('K', 'H'): 4,  ('K', 'I'): 4,  ('K', 'K'): 0,  ('K', 'L'): 4,  ('K', 'M'): 4,  ('K', 'N'): 4,  ('K', 'P'): 4,  ('K', 'Q'): 3,  ('K', 'R'): 2,  ('K', 'S'): 4,  ('K', 'T'): 4,  ('K', 'V'): 4,  ('K', 'W'): 4,  ('K', 'Y'): 4,  ('L', 'A'): 4,  ('L', 'C'): 4,  ('L', 'D'): 4,  ('L', 'E'): 4,  ('L', 'F'): 4,  ('L', 'G'): 4,  ('L', 'H'): 4,  ('L', 'I'): 2,  ('L', 'K'): 4,  ('L', 'L'): 0,  ('L', 'M'): 2,  ('L', 'N'): 4,  ('L', 'P'): 4,  ('L', 'Q'): 4,  ('L', 'R'): 4,  ('L', 'S'): 4,  ('L', 'T'): 4,  ('L', 'V'): 3,  ('L', 'W'): 4,  ('L', 'Y'): 4,  ('M', 'A'): 4,  ('M', 'C'): 4,  ('M', 'D'): 4,  ('M', 'E'): 4,  ('M', 'F'): 4,  ('M', 'G'): 4,  ('M', 'H'): 4,  ('M', 'I'): 3,  ('M', 'K'): 4,  ('M', 'L'): 2,  ('M', 'M'): 0,  ('M', 'N'): 4,  ('M', 'P'): 4,  ('M', 'Q'): 4,  ('M', 'R'): 4,  ('M', 'S'): 4,  ('M', 'T'): 4,  ('M', 'V'): 3,  ('M', 'W'): 4,  ('M', 'Y'): 4,  ('N', 'A'): 4,  ('N', 'C'): 4,  ('N', 'D'): 3,  ('N', 'E'): 4,  ('N', 'F'): 4,  ('N', 'G'): 4,  ('N', 'H'): 3,  ('N', 'I'): 4,  ('N', 'K'): 4,  ('N', 'L'): 4,  ('N', 'M'): 4,  ('N', 'N'): 0,  ('N', 'P'): 4,  ('N', 'Q'): 4,  ('N', 'R'): 4,  ('N', 'S'): 3,  ('N', 'T'): 4,  ('N', 'V'): 4,  ('N', 'W'): 4,  ('N', 'Y'): 4,  ('P', 'A'): 4,  ('P', 'C'): 4,  ('P', 'D'): 4,  ('P', 'E'): 4,  ('P', 'F'): 4,  ('P', 'G'): 4,  ('P', 'H'): 4,  ('P', 'I'): 4,  ('P', 'K'): 4,  ('P', 'L'): 4,  ('P', 'M'): 4,  ('P', 'N'): 4,  ('P', 'P'): 0,  ('P', 'Q'): 4,  ('P', 'R'): 4,  ('P', 'S'): 4,  ('P', 'T'): 4,  ('P', 'V'): 4,  ('P', 'W'): 4,  ('P', 'Y'): 4,  ('Q', 'A'): 4,  ('Q', 'C'): 4,  ('Q', 'D'): 4,  ('Q', 'E'): 2,  ('Q', 'F'): 4,  ('Q', 'G'): 4,  ('Q', 'H'): 4,  ('Q', 'I'): 4,  ('Q', 'K'): 3,  ('Q', 'L'): 4,  ('Q', 'M'): 4,  ('Q', 'N'): 4,  ('Q', 'P'): 4,  ('Q', 'Q'): 0,  ('Q', 'R'): 3,  ('Q', 'S'): 4,  ('Q', 'T'): 4,  ('Q', 'V'): 4,  ('Q', 'W'): 4,  ('Q', 'Y'): 4,  ('R', 'A'): 4,  ('R', 'C'): 4,  ('R', 'D'): 4,  ('R', 'E'): 4,  ('R', 'F'): 4,  ('R', 'G'): 4,  ('R', 'H'): 4,  ('R', 'I'): 4,  ('R', 'K'): 2,  ('R', 'L'): 4,  ('R', 'M'): 4,  ('R', 'N'): 4,  ('R', 'P'): 4,  ('R', 'Q'): 3,  ('R', 'R'): 0,  ('R', 'S'): 4,  ('R', 'T'): 4,  ('R', 'V'): 4,  ('R', 'W'): 4,  ('R', 'Y'): 4,  ('S', 'A'): 3,  ('S', 'C'): 4,  ('S', 'D'): 4,  ('S', 'E'): 4,  ('S', 'F'): 4,  ('S', 'G'): 4,  ('S', 'H'): 4,  ('S', 'I'): 4,  ('S', 'K'): 4,  ('S', 'L'): 4,  ('S', 'M'): 4,  ('S', 'N'): 3,  ('S', 'P'): 4,  ('S', 'Q'): 4,  ('S', 'R'): 4,  ('S', 'S'): 0,  ('S', 'T'): 3,  ('S', 'V'): 4,  ('S', 'W'): 4,  ('S', 'Y'): 4,  ('T', 'A'): 4,  ('T', 'C'): 4,  ('T', 'D'): 4,  ('T', 'E'): 4,  ('T', 'F'): 4,  ('T', 'G'): 4,  ('T', 'H'): 4,  ('T', 'I'): 4,  ('T', 'K'): 4,  ('T', 'L'): 4,  ('T', 'M'): 4,  ('T', 'N'): 4,  ('T', 'P'): 4,  ('T', 'Q'): 4,  ('T', 'R'): 4,  ('T', 'S'): 3,  ('T', 'T'): 0,  ('T', 'V'): 4,  ('T', 'W'): 4,  ('T', 'Y'): 4,  ('V', 'A'): 4,  ('V', 'C'): 4,  ('V', 'D'): 4,  ('V', 'E'): 4,  ('V', 'F'): 4,  ('V', 'G'): 4,  ('V', 'H'): 4,  ('V', 'I'): 1,  ('V', 'K'): 4,  ('V', 'L'): 3,  ('V', 'M'): 3,  ('V', 'N'): 4,  ('V', 'P'): 4,  ('V', 'Q'): 4,  ('V', 'R'): 4,  ('V', 'S'): 4,  ('V', 'T'): 4,  ('V', 'V'): 0,  ('V', 'W'): 4,  ('V', 'Y'): 4,  ('W', 'A'): 4,  ('W', 'C'): 4,  ('W', 'D'): 4,  ('W', 'E'): 4,  ('W', 'F'): 3,  ('W', 'G'): 4,  ('W', 'H'): 4,  ('W', 'I'): 4,  ('W', 'K'): 4,  ('W', 'L'): 4,  ('W', 'M'): 4,  ('W', 'N'): 4,  ('W', 'P'): 4,  ('W', 'Q'): 4,  ('W', 'R'): 4,  ('W', 'S'): 4,  ('W', 'T'): 4,  ('W', 'V'): 4,  ('W', 'W'): 0,  ('W', 'Y'): 2,  ('Y', 'A'): 4,  ('Y', 'C'): 4,  ('Y', 'D'): 4,  ('Y', 'E'): 4,  ('Y', 'F'): 1,  ('Y', 'G'): 4,  ('Y', 'H'): 2,  ('Y', 'I'): 4,  ('Y', 'K'): 4,  ('Y', 'L'): 4,  ('Y', 'M'): 4,  ('Y', 'N'): 4,  ('Y', 'P'): 4,  ('Y', 'Q'): 4,  ('Y', 'R'): 4,  ('Y', 'S'): 4,  ('Y', 'T'): 4,  ('Y', 'V'): 4,  ('Y', 'W'): 2,  ('Y', 'Y'): 0}
     # fmt: on
 
-    
+
     @staticmethod
-    def _make_numba_matrix(distance_matrix, alphabet=parasail_aa_alphabet_with_unknown):
+    def _make_numba_matrix(distance_matrix: dict, alphabet: str = parasail_aa_alphabet_with_unknown) -> np.ndarray:
         """Creates a numba compatible distance matrix from a dict of tuples.
 
         Parameters
         ----------
-        distance_matrix : dict
+        distance_matrix:
             Keys are tuples like ('A', 'C') with values containing an integer.
-        alphabet : str
+        alphabet:
 
         Returns
         -------
-        distance_matrix : np.ndarray, dtype=np.int32"""
+        distance_matrix:
+            distance lookup matrix
+        """
 
         dm = np.zeros((len(alphabet), len(alphabet)), dtype=np.int32)
         for (aa1, aa2), d in distance_matrix.items():
@@ -470,18 +472,21 @@ def _make_numba_matrix(distance_matrix, alphabet=parasail_aa_alphabet_with_unkno
     tcr_nb_distance_matrix = _make_numba_matrix(tcr_dict_distance_matrix)
 
     @staticmethod
-    def _seqs2mat(seqs, alphabet=parasail_aa_alphabet, max_len=None):
+    def _seqs2mat(seqs: Sequence[str], alphabet: str = parasail_aa_alphabet, max_len: Union[None, int] = None) -> tuple[np.ndarray, np.ndarray]:
         """Convert a collection of gene sequences into a
         numpy matrix of integers for fast comparison.
 
         Parameters
         ----------
-        seqs : list 
-            List of strings
+        seqs: 
+            Sequence of strings
 
         Returns
         -------
-        mat : np.array  
+        mat:
+            matrix with gene sequences encoded as integers
+        L:
+            vector with length values of the gene sequences in the matrix
 
         Examples
         --------
@@ -514,55 +519,55 @@ def _seqs2mat(seqs, alphabet=parasail_aa_alphabet, max_len=None):
     @staticmethod
     @nb.jit(nopython=True, parallel=False, nogil=True)
     def _nb_tcrdist_mat(
-        seqs_mat1,
-        seqs_mat2,
-        seqs_L1,
-        seqs_L2,
-        distance_matrix=tcr_nb_distance_matrix,
-        dist_weight=3,
-        gap_penalty=4,
-        ntrim=3,
-        ctrim=2,
-        fixed_gappos=True,
-        cutoff=20,
-    ):
+        seqs_mat1: np.ndarray,
+        seqs_mat2: np.ndarray,
+        seqs_L1: np.ndarray,
+        seqs_L2: np.ndarray,
+        distance_matrix: np.ndarray  = tcr_nb_distance_matrix,
+        dist_weight: int = 3,
+        gap_penalty: int = 4,
+        ntrim: int = 3,
+        ctrim: int = 2,
+        fixed_gappos: bool = True,
+        cutoff: int = 20,
+    ) -> tuple[list[np.ndarray], list[np.ndarray], np.ndarray]:
         """Computes the pairwise TCRdist distances for sequences in seqs_mat1 and seqs_mat2.
 
         Note: to use with non-CDR3 sequences set ntrim and ctrim to 0.
 
         Parameters
         ----------
-        seqs_mat1/2 : np.ndarray
+        seqs_mat1/2:
             Matrix containing sequences created by seqs2mat with padding to accomodate
             sequences of different lengths (-1 padding)
-        seqs_L1/2 : np.ndarray
+        seqs_L1/2:
             A vector containing the length of each sequence in the respective seqs_mat matrix,
             without the padding in seqs_mat
-        distance_matrix : np.ndarray [alphabet, alphabet] dtype=int32
+        distance_matrix:
             A square distance matrix (NOT a similarity matrix).
             Matrix must match the alphabet that was used to create
             seqs_mat, where each AA is represented by an index into the alphabet.
-        dist_weight : int
+        dist_weight:
             Weight applied to the mismatch distances before summing with the gap penalties
-        gap_penalty : int
+        gap_penalty:
             Distance penalty for the difference in the length of the two sequences
-        ntrim/ctrim : int
+        ntrim/ctrim:
             Positions trimmed off the N-terminus (0) and C-terminus (L-1) ends of the peptide sequence. These symbols will be ignored
             in the distance calculation.
-        fixed_gappos : bool
+        fixed_gappos:
             If True, insert gaps at a fixed position after the cysteine residue statring the CDR3 (typically position 6).
             If False, find the "optimal" position for inserting the gaps to make up the difference in length
 
         Returns
         -------
-        data_rows : 
+        data_rows: 
             List with arrays containing the non-zero data values of the result matrix per row,
             needed to create the final scipy CSR result matrix later
-        indices_rows :
+        indices_rows:
             List with arrays containing the non-zero entry column indeces of the result matrix per row,
             needed to create the final scipy CSR result matrix later
-        row_element_counts :
-            Arraay with integers that indicate the amount of non-zero values of the result matrix per row,
+        row_element_counts:
+            Array with integers that indicate the amount of non-zero values of the result matrix per row,
             needed to create the final scipy CSR result matrix later
         """
 

From cc173648abbfead367b4c0dc5c2c3f7cbcf40dae Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Thu, 18 Apr 2024 19:56:25 +0200
Subject: [PATCH 17/27] changed testdata import for test cases

---
 src/scirpy/tests/test_ir_dist_metrics.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/scirpy/tests/test_ir_dist_metrics.py b/src/scirpy/tests/test_ir_dist_metrics.py
index 03a993d14..ea4a48f0b 100644
--- a/src/scirpy/tests/test_ir_dist_metrics.py
+++ b/src/scirpy/tests/test_ir_dist_metrics.py
@@ -614,8 +614,9 @@ def test_tcrdist(test_parameters, test_input, expected_result):
 
 def test_tcrdist_reference():
     # test tcrdist against reference implementation
-    seqs = np.load("data/tcrdist_test_data/tcrdist_WU3k_seqs.npy")
-    reference_result = scipy.sparse.load_npz("data/tcrdist_test_data/tcrdist_WU3k_csr_result.npz")
+    from . import TESTDATA
+    seqs = np.load(TESTDATA / "tcrdist_test_data/tcrdist_WU3k_seqs.npy")
+    reference_result = scipy.sparse.load_npz(TESTDATA / "tcrdist_test_data/tcrdist_WU3k_csr_result.npz")
 
     tcrdist_calculator = TCRdistDistanceCalculator(
         dist_weight=3,

From e7fe4eb347ba83afa3fce42f008a966cc9b4e307 Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Thu, 18 Apr 2024 20:00:07 +0200
Subject: [PATCH 18/27] changed __init__ and _nb_tcrdist_mat in
 TCRdistDistanceCalculator to keywords only

---
 src/scirpy/ir_dist/metrics.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index faa10fea0..d38757e1a 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -423,6 +423,7 @@ class TCRdistDistanceCalculator:
 
     def __init__(
         self,
+        *,
         dist_weight: int = 3,
         gap_penalty: int = 4,
         ntrim: int = 3,
@@ -519,6 +520,7 @@ def _seqs2mat(seqs: Sequence[str], alphabet: str = parasail_aa_alphabet, max_len
     @staticmethod
     @nb.jit(nopython=True, parallel=False, nogil=True)
     def _nb_tcrdist_mat(
+        *,
         seqs_mat1: np.ndarray,
         seqs_mat2: np.ndarray,
         seqs_L1: np.ndarray,

From ac803e7904a4e8792d78a7e3b68e6832a3feb9d6 Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Thu, 18 Apr 2024 20:22:54 +0200
Subject: [PATCH 19/27] keywords only for _nb_tcrdist_mat removed, because it
 doesn't work with numba

---
 src/scirpy/ir_dist/metrics.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index d38757e1a..7eefc2c4b 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -520,7 +520,6 @@ def _seqs2mat(seqs: Sequence[str], alphabet: str = parasail_aa_alphabet, max_len
     @staticmethod
     @nb.jit(nopython=True, parallel=False, nogil=True)
     def _nb_tcrdist_mat(
-        *,
         seqs_mat1: np.ndarray,
         seqs_mat2: np.ndarray,
         seqs_L1: np.ndarray,
@@ -656,17 +655,17 @@ def _calc_dist_mat_block(self, seqs: Sequence[str], seqs2: Optional[Sequence[str
         seqs_mat1, seqs_L1 = self._seqs2mat(seqs)
         seqs_mat2, seqs_L2 = self._seqs2mat(seqs2)
 
-        kwargs = {
-            "dist_weight": self.dist_weight,
-            "gap_penalty": self.gap_penalty,
-            "ntrim": self.ntrim,
-            "ctrim": self.ctrim,
-            "fixed_gappos": self.fixed_gappos,
-            "cutoff": self.cutoff,
-        }
-
         data_rows, indices_rows, row_element_counts = self._nb_tcrdist_mat(
-            seqs_mat1, seqs_mat2, seqs_L1, seqs_L2, **kwargs
+            seqs_mat1 = seqs_mat1,
+            seqs_mat2 = seqs_mat2,
+            seqs_L1 = seqs_L1,
+            seqs_L2 = seqs_L2,
+            dist_weight = self.dist_weight,
+            gap_penalty = self.gap_penalty,
+            ntrim = self.ntrim,
+            ctrim = self.ctrim,
+            fixed_gappos = self.fixed_gappos,
+            cutoff = self.cutoff,
         )
 
         indptr = np.zeros(row_element_counts.shape[0] + 1)

From 3a3d53cc275e7f142753329191cb8a9e913e78c9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 18 Apr 2024 18:30:20 +0000
Subject: [PATCH 20/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/scirpy/ir_dist/metrics.py            | 32 ++++++++++++------------
 src/scirpy/tests/test_ir_dist_metrics.py |  1 +
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index e95de0743..9248e1df1 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -446,7 +446,6 @@ def __init__(
     tcr_dict_distance_matrix = {('A', 'A'): 0,  ('A', 'C'): 4,  ('A', 'D'): 4,  ('A', 'E'): 4,  ('A', 'F'): 4,  ('A', 'G'): 4,  ('A', 'H'): 4,  ('A', 'I'): 4,  ('A', 'K'): 4,  ('A', 'L'): 4,  ('A', 'M'): 4,  ('A', 'N'): 4,  ('A', 'P'): 4,  ('A', 'Q'): 4,  ('A', 'R'): 4,  ('A', 'S'): 3,  ('A', 'T'): 4,  ('A', 'V'): 4,  ('A', 'W'): 4,  ('A', 'Y'): 4,  ('C', 'A'): 4,  ('C', 'C'): 0,  ('C', 'D'): 4,  ('C', 'E'): 4,  ('C', 'F'): 4,  ('C', 'G'): 4,  ('C', 'H'): 4,  ('C', 'I'): 4,  ('C', 'K'): 4,  ('C', 'L'): 4,  ('C', 'M'): 4,  ('C', 'N'): 4,  ('C', 'P'): 4,  ('C', 'Q'): 4,  ('C', 'R'): 4,  ('C', 'S'): 4,  ('C', 'T'): 4,  ('C', 'V'): 4,  ('C', 'W'): 4,  ('C', 'Y'): 4,  ('D', 'A'): 4,  ('D', 'C'): 4,  ('D', 'D'): 0,  ('D', 'E'): 2,  ('D', 'F'): 4,  ('D', 'G'): 4,  ('D', 'H'): 4,  ('D', 'I'): 4,  ('D', 'K'): 4,  ('D', 'L'): 4,  ('D', 'M'): 4,  ('D', 'N'): 3,  ('D', 'P'): 4,  ('D', 'Q'): 4,  ('D', 'R'): 4,  ('D', 'S'): 4,  ('D', 'T'): 4,  ('D', 'V'): 4,  ('D', 'W'): 4,  ('D', 'Y'): 4,  ('E', 'A'): 4,  ('E', 'C'): 4,  ('E', 'D'): 2,  ('E', 'E'): 0,  ('E', 'F'): 4,  ('E', 'G'): 4,  ('E', 'H'): 4,  ('E', 'I'): 4,  ('E', 'K'): 3,  ('E', 'L'): 4,  ('E', 'M'): 4,  ('E', 'N'): 4,  ('E', 'P'): 4,  ('E', 'Q'): 2,  ('E', 'R'): 4,  ('E', 'S'): 4,  ('E', 'T'): 4,  ('E', 'V'): 4,  ('E', 'W'): 4,  ('E', 'Y'): 4,  ('F', 'A'): 4,  ('F', 'C'): 4,  ('F', 'D'): 4,  ('F', 'E'): 4,  ('F', 'F'): 0,  ('F', 'G'): 4,  ('F', 'H'): 4,  ('F', 'I'): 4,  ('F', 'K'): 4,  ('F', 'L'): 4,  ('F', 'M'): 4,  ('F', 'N'): 4,  ('F', 'P'): 4,  ('F', 'Q'): 4,  ('F', 'R'): 4,  ('F', 'S'): 4,  ('F', 'T'): 4,  ('F', 'V'): 4,  ('F', 'W'): 3,  ('F', 'Y'): 1,  ('G', 'A'): 4,  ('G', 'C'): 4,  ('G', 'D'): 4,  ('G', 'E'): 4,  ('G', 'F'): 4,  ('G', 'G'): 0,  ('G', 'H'): 4,  ('G', 'I'): 4,  ('G', 'K'): 4,  ('G', 'L'): 4,  ('G', 'M'): 4,  ('G', 'N'): 4,  ('G', 'P'): 4,  ('G', 'Q'): 4,  ('G', 'R'): 4,  ('G', 'S'): 4,  ('G', 'T'): 4,  ('G', 'V'): 4,  ('G', 'W'): 4,  ('G', 'Y'): 4,  ('H', 'A'): 4,  ('H', 'C'): 4,  ('H', 'D'): 4,  ('H', 'E'): 4,  ('H', 'F'): 4,  ('H', 'G'): 4,  ('H', 'H'): 0,  ('H', 'I'): 4,  ('H', 'K'): 4,  ('H', 'L'): 4,  ('H', 'M'): 4,  ('H', 'N'): 3,  ('H', 'P'): 4,  ('H', 'Q'): 4,  ('H', 'R'): 4,  ('H', 'S'): 4,  ('H', 'T'): 4,  ('H', 'V'): 4,  ('H', 'W'): 4,  ('H', 'Y'): 2,  ('I', 'A'): 4,  ('I', 'C'): 4,  ('I', 'D'): 4,  ('I', 'E'): 4,  ('I', 'F'): 4,  ('I', 'G'): 4,  ('I', 'H'): 4,  ('I', 'I'): 0,  ('I', 'K'): 4,  ('I', 'L'): 2,  ('I', 'M'): 3,  ('I', 'N'): 4,  ('I', 'P'): 4,  ('I', 'Q'): 4,  ('I', 'R'): 4,  ('I', 'S'): 4,  ('I', 'T'): 4,  ('I', 'V'): 1,  ('I', 'W'): 4,  ('I', 'Y'): 4,  ('K', 'A'): 4,  ('K', 'C'): 4,  ('K', 'D'): 4,  ('K', 'E'): 3,  ('K', 'F'): 4,  ('K', 'G'): 4,  ('K', 'H'): 4,  ('K', 'I'): 4,  ('K', 'K'): 0,  ('K', 'L'): 4,  ('K', 'M'): 4,  ('K', 'N'): 4,  ('K', 'P'): 4,  ('K', 'Q'): 3,  ('K', 'R'): 2,  ('K', 'S'): 4,  ('K', 'T'): 4,  ('K', 'V'): 4,  ('K', 'W'): 4,  ('K', 'Y'): 4,  ('L', 'A'): 4,  ('L', 'C'): 4,  ('L', 'D'): 4,  ('L', 'E'): 4,  ('L', 'F'): 4,  ('L', 'G'): 4,  ('L', 'H'): 4,  ('L', 'I'): 2,  ('L', 'K'): 4,  ('L', 'L'): 0,  ('L', 'M'): 2,  ('L', 'N'): 4,  ('L', 'P'): 4,  ('L', 'Q'): 4,  ('L', 'R'): 4,  ('L', 'S'): 4,  ('L', 'T'): 4,  ('L', 'V'): 3,  ('L', 'W'): 4,  ('L', 'Y'): 4,  ('M', 'A'): 4,  ('M', 'C'): 4,  ('M', 'D'): 4,  ('M', 'E'): 4,  ('M', 'F'): 4,  ('M', 'G'): 4,  ('M', 'H'): 4,  ('M', 'I'): 3,  ('M', 'K'): 4,  ('M', 'L'): 2,  ('M', 'M'): 0,  ('M', 'N'): 4,  ('M', 'P'): 4,  ('M', 'Q'): 4,  ('M', 'R'): 4,  ('M', 'S'): 4,  ('M', 'T'): 4,  ('M', 'V'): 3,  ('M', 'W'): 4,  ('M', 'Y'): 4,  ('N', 'A'): 4,  ('N', 'C'): 4,  ('N', 'D'): 3,  ('N', 'E'): 4,  ('N', 'F'): 4,  ('N', 'G'): 4,  ('N', 'H'): 3,  ('N', 'I'): 4,  ('N', 'K'): 4,  ('N', 'L'): 4,  ('N', 'M'): 4,  ('N', 'N'): 0,  ('N', 'P'): 4,  ('N', 'Q'): 4,  ('N', 'R'): 4,  ('N', 'S'): 3,  ('N', 'T'): 4,  ('N', 'V'): 4,  ('N', 'W'): 4,  ('N', 'Y'): 4,  ('P', 'A'): 4,  ('P', 'C'): 4,  ('P', 'D'): 4,  ('P', 'E'): 4,  ('P', 'F'): 4,  ('P', 'G'): 4,  ('P', 'H'): 4,  ('P', 'I'): 4,  ('P', 'K'): 4,  ('P', 'L'): 4,  ('P', 'M'): 4,  ('P', 'N'): 4,  ('P', 'P'): 0,  ('P', 'Q'): 4,  ('P', 'R'): 4,  ('P', 'S'): 4,  ('P', 'T'): 4,  ('P', 'V'): 4,  ('P', 'W'): 4,  ('P', 'Y'): 4,  ('Q', 'A'): 4,  ('Q', 'C'): 4,  ('Q', 'D'): 4,  ('Q', 'E'): 2,  ('Q', 'F'): 4,  ('Q', 'G'): 4,  ('Q', 'H'): 4,  ('Q', 'I'): 4,  ('Q', 'K'): 3,  ('Q', 'L'): 4,  ('Q', 'M'): 4,  ('Q', 'N'): 4,  ('Q', 'P'): 4,  ('Q', 'Q'): 0,  ('Q', 'R'): 3,  ('Q', 'S'): 4,  ('Q', 'T'): 4,  ('Q', 'V'): 4,  ('Q', 'W'): 4,  ('Q', 'Y'): 4,  ('R', 'A'): 4,  ('R', 'C'): 4,  ('R', 'D'): 4,  ('R', 'E'): 4,  ('R', 'F'): 4,  ('R', 'G'): 4,  ('R', 'H'): 4,  ('R', 'I'): 4,  ('R', 'K'): 2,  ('R', 'L'): 4,  ('R', 'M'): 4,  ('R', 'N'): 4,  ('R', 'P'): 4,  ('R', 'Q'): 3,  ('R', 'R'): 0,  ('R', 'S'): 4,  ('R', 'T'): 4,  ('R', 'V'): 4,  ('R', 'W'): 4,  ('R', 'Y'): 4,  ('S', 'A'): 3,  ('S', 'C'): 4,  ('S', 'D'): 4,  ('S', 'E'): 4,  ('S', 'F'): 4,  ('S', 'G'): 4,  ('S', 'H'): 4,  ('S', 'I'): 4,  ('S', 'K'): 4,  ('S', 'L'): 4,  ('S', 'M'): 4,  ('S', 'N'): 3,  ('S', 'P'): 4,  ('S', 'Q'): 4,  ('S', 'R'): 4,  ('S', 'S'): 0,  ('S', 'T'): 3,  ('S', 'V'): 4,  ('S', 'W'): 4,  ('S', 'Y'): 4,  ('T', 'A'): 4,  ('T', 'C'): 4,  ('T', 'D'): 4,  ('T', 'E'): 4,  ('T', 'F'): 4,  ('T', 'G'): 4,  ('T', 'H'): 4,  ('T', 'I'): 4,  ('T', 'K'): 4,  ('T', 'L'): 4,  ('T', 'M'): 4,  ('T', 'N'): 4,  ('T', 'P'): 4,  ('T', 'Q'): 4,  ('T', 'R'): 4,  ('T', 'S'): 3,  ('T', 'T'): 0,  ('T', 'V'): 4,  ('T', 'W'): 4,  ('T', 'Y'): 4,  ('V', 'A'): 4,  ('V', 'C'): 4,  ('V', 'D'): 4,  ('V', 'E'): 4,  ('V', 'F'): 4,  ('V', 'G'): 4,  ('V', 'H'): 4,  ('V', 'I'): 1,  ('V', 'K'): 4,  ('V', 'L'): 3,  ('V', 'M'): 3,  ('V', 'N'): 4,  ('V', 'P'): 4,  ('V', 'Q'): 4,  ('V', 'R'): 4,  ('V', 'S'): 4,  ('V', 'T'): 4,  ('V', 'V'): 0,  ('V', 'W'): 4,  ('V', 'Y'): 4,  ('W', 'A'): 4,  ('W', 'C'): 4,  ('W', 'D'): 4,  ('W', 'E'): 4,  ('W', 'F'): 3,  ('W', 'G'): 4,  ('W', 'H'): 4,  ('W', 'I'): 4,  ('W', 'K'): 4,  ('W', 'L'): 4,  ('W', 'M'): 4,  ('W', 'N'): 4,  ('W', 'P'): 4,  ('W', 'Q'): 4,  ('W', 'R'): 4,  ('W', 'S'): 4,  ('W', 'T'): 4,  ('W', 'V'): 4,  ('W', 'W'): 0,  ('W', 'Y'): 2,  ('Y', 'A'): 4,  ('Y', 'C'): 4,  ('Y', 'D'): 4,  ('Y', 'E'): 4,  ('Y', 'F'): 1,  ('Y', 'G'): 4,  ('Y', 'H'): 2,  ('Y', 'I'): 4,  ('Y', 'K'): 4,  ('Y', 'L'): 4,  ('Y', 'M'): 4,  ('Y', 'N'): 4,  ('Y', 'P'): 4,  ('Y', 'Q'): 4,  ('Y', 'R'): 4,  ('Y', 'S'): 4,  ('Y', 'T'): 4,  ('Y', 'V'): 4,  ('Y', 'W'): 2,  ('Y', 'Y'): 0}
     # fmt: on
 
-
     @staticmethod
     def _make_numba_matrix(distance_matrix: dict, alphabet: str = parasail_aa_alphabet_with_unknown) -> np.ndarray:
         """Creates a numba compatible distance matrix from a dict of tuples.
@@ -462,7 +461,6 @@ def _make_numba_matrix(distance_matrix: dict, alphabet: str = parasail_aa_alphab
         distance_matrix:
             distance lookup matrix
         """
-
         dm = np.zeros((len(alphabet), len(alphabet)), dtype=np.int32)
         for (aa1, aa2), d in distance_matrix.items():
             dm[alphabet.index(aa1), alphabet.index(aa2)] = d
@@ -472,13 +470,15 @@ def _make_numba_matrix(distance_matrix: dict, alphabet: str = parasail_aa_alphab
     tcr_nb_distance_matrix = _make_numba_matrix(tcr_dict_distance_matrix)
 
     @staticmethod
-    def _seqs2mat(seqs: Sequence[str], alphabet: str = parasail_aa_alphabet, max_len: Union[None, int] = None) -> tuple[np.ndarray, np.ndarray]:
+    def _seqs2mat(
+        seqs: Sequence[str], alphabet: str = parasail_aa_alphabet, max_len: Union[None, int] = None
+    ) -> tuple[np.ndarray, np.ndarray]:
         """Convert a collection of gene sequences into a
         numpy matrix of integers for fast comparison.
 
         Parameters
         ----------
-        seqs: 
+        seqs:
             Sequence of strings
 
         Returns
@@ -522,7 +522,7 @@ def _nb_tcrdist_mat(
         seqs_mat2: np.ndarray,
         seqs_L1: np.ndarray,
         seqs_L2: np.ndarray,
-        distance_matrix: np.ndarray  = tcr_nb_distance_matrix,
+        distance_matrix: np.ndarray = tcr_nb_distance_matrix,
         dist_weight: int = 3,
         gap_penalty: int = 4,
         ntrim: int = 3,
@@ -559,7 +559,7 @@ def _nb_tcrdist_mat(
 
         Returns
         -------
-        data_rows: 
+        data_rows:
             List with arrays containing the non-zero data values of the result matrix per row,
             needed to create the final scipy CSR result matrix later
         indices_rows:
@@ -652,16 +652,16 @@ def _calc_dist_mat_block(self, seqs: Sequence[str], seqs2: Optional[Sequence[str
         seqs_mat2, seqs_L2 = self._seqs2mat(seqs2)
 
         data_rows, indices_rows, row_element_counts = self._nb_tcrdist_mat(
-            seqs_mat1 = seqs_mat1,
-            seqs_mat2 = seqs_mat2,
-            seqs_L1 = seqs_L1,
-            seqs_L2 = seqs_L2,
-            dist_weight = self.dist_weight,
-            gap_penalty = self.gap_penalty,
-            ntrim = self.ntrim,
-            ctrim = self.ctrim,
-            fixed_gappos = self.fixed_gappos,
-            cutoff = self.cutoff,
+            seqs_mat1=seqs_mat1,
+            seqs_mat2=seqs_mat2,
+            seqs_L1=seqs_L1,
+            seqs_L2=seqs_L2,
+            dist_weight=self.dist_weight,
+            gap_penalty=self.gap_penalty,
+            ntrim=self.ntrim,
+            ctrim=self.ctrim,
+            fixed_gappos=self.fixed_gappos,
+            cutoff=self.cutoff,
         )
 
         indptr = np.zeros(row_element_counts.shape[0] + 1)
diff --git a/src/scirpy/tests/test_ir_dist_metrics.py b/src/scirpy/tests/test_ir_dist_metrics.py
index ea4a48f0b..8db93e43b 100644
--- a/src/scirpy/tests/test_ir_dist_metrics.py
+++ b/src/scirpy/tests/test_ir_dist_metrics.py
@@ -615,6 +615,7 @@ def test_tcrdist(test_parameters, test_input, expected_result):
 def test_tcrdist_reference():
     # test tcrdist against reference implementation
     from . import TESTDATA
+
     seqs = np.load(TESTDATA / "tcrdist_test_data/tcrdist_WU3k_seqs.npy")
     reference_result = scipy.sparse.load_npz(TESTDATA / "tcrdist_test_data/tcrdist_WU3k_csr_result.npz")
 

From 05817d39031632463bb5e3077af7f84148ed6094 Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Thu, 18 Apr 2024 20:40:58 +0200
Subject: [PATCH 21/27] unused control variable changed to _

---
 src/scirpy/ir_dist/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index e95de0743..de0982813 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -577,7 +577,7 @@ def _nb_tcrdist_mat(
         row_element_counts = np.zeros(seqs_mat1.shape[0])
 
         empty_row = np.zeros(0)
-        for i in range(0, seqs_mat1.shape[0]):
+        for _ in range(0, seqs_mat1.shape[0]):
             data_rows.append(empty_row)
             indices_rows.append(empty_row)
 

From f5b9857847bea6d39a77cb3cfefe1bc8f0b7b90c Mon Sep 17 00:00:00 2001
From: felixpetschko <felixpetschko@gmail.com>
Date: Fri, 19 Apr 2024 10:12:08 +0200
Subject: [PATCH 22/27] creation of numba lookup matrix changed

---
 src/scirpy/ir_dist/metrics.py | 58 ++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 31 deletions(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index de0982813..9fdc8a532 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -394,6 +394,27 @@ def _compute_block(self, seqs1, seqs2, origin):
         return result
 
 
+def _make_numba_matrix(distance_matrix: dict, alphabet: str = "ARNDCQEGHILKMFPSTWYVBZX*") -> np.ndarray:
+    """Creates a numba compatible distance matrix from a dict of tuples.
+
+    Parameters
+    ----------
+    distance_matrix:
+        Keys are tuples like ('A', 'C') with values containing an integer.
+    alphabet:
+
+    Returns
+    -------
+    distance_matrix:
+        distance lookup matrix
+    """
+
+    dm = np.zeros((len(alphabet), len(alphabet)), dtype=np.int32)
+    for (aa1, aa2), d in distance_matrix.items():
+        dm[alphabet.index(aa1), alphabet.index(aa2)] = d
+        dm[alphabet.index(aa2), alphabet.index(aa1)] = d
+    return dm
+
 class TCRdistDistanceCalculator:
     """Computes pairwise distances between TCR CDR3 sequences based on the "tcrdist" distance metric.
 
@@ -420,6 +441,12 @@ class TCRdistDistanceCalculator:
     n_jobs:
         Number of jobs (processes) to use for the pairwise distance calculation
     """
+    parasail_aa_alphabet = "ARNDCQEGHILKMFPSTWYVBZX"
+    parasail_aa_alphabet_with_unknown = "ARNDCQEGHILKMFPSTWYVBZX*"
+    # fmt: off
+    tcr_dict_distance_matrix = {('A', 'A'): 0,  ('A', 'C'): 4,  ('A', 'D'): 4,  ('A', 'E'): 4,  ('A', 'F'): 4,  ('A', 'G'): 4,  ('A', 'H'): 4,  ('A', 'I'): 4,  ('A', 'K'): 4,  ('A', 'L'): 4,  ('A', 'M'): 4,  ('A', 'N'): 4,  ('A', 'P'): 4,  ('A', 'Q'): 4,  ('A', 'R'): 4,  ('A', 'S'): 3,  ('A', 'T'): 4,  ('A', 'V'): 4,  ('A', 'W'): 4,  ('A', 'Y'): 4,  ('C', 'A'): 4,  ('C', 'C'): 0,  ('C', 'D'): 4,  ('C', 'E'): 4,  ('C', 'F'): 4,  ('C', 'G'): 4,  ('C', 'H'): 4,  ('C', 'I'): 4,  ('C', 'K'): 4,  ('C', 'L'): 4,  ('C', 'M'): 4,  ('C', 'N'): 4,  ('C', 'P'): 4,  ('C', 'Q'): 4,  ('C', 'R'): 4,  ('C', 'S'): 4,  ('C', 'T'): 4,  ('C', 'V'): 4,  ('C', 'W'): 4,  ('C', 'Y'): 4,  ('D', 'A'): 4,  ('D', 'C'): 4,  ('D', 'D'): 0,  ('D', 'E'): 2,  ('D', 'F'): 4,  ('D', 'G'): 4,  ('D', 'H'): 4,  ('D', 'I'): 4,  ('D', 'K'): 4,  ('D', 'L'): 4,  ('D', 'M'): 4,  ('D', 'N'): 3,  ('D', 'P'): 4,  ('D', 'Q'): 4,  ('D', 'R'): 4,  ('D', 'S'): 4,  ('D', 'T'): 4,  ('D', 'V'): 4,  ('D', 'W'): 4,  ('D', 'Y'): 4,  ('E', 'A'): 4,  ('E', 'C'): 4,  ('E', 'D'): 2,  ('E', 'E'): 0,  ('E', 'F'): 4,  ('E', 'G'): 4,  ('E', 'H'): 4,  ('E', 'I'): 4,  ('E', 'K'): 3,  ('E', 'L'): 4,  ('E', 'M'): 4,  ('E', 'N'): 4,  ('E', 'P'): 4,  ('E', 'Q'): 2,  ('E', 'R'): 4,  ('E', 'S'): 4,  ('E', 'T'): 4,  ('E', 'V'): 4,  ('E', 'W'): 4,  ('E', 'Y'): 4,  ('F', 'A'): 4,  ('F', 'C'): 4,  ('F', 'D'): 4,  ('F', 'E'): 4,  ('F', 'F'): 0,  ('F', 'G'): 4,  ('F', 'H'): 4,  ('F', 'I'): 4,  ('F', 'K'): 4,  ('F', 'L'): 4,  ('F', 'M'): 4,  ('F', 'N'): 4,  ('F', 'P'): 4,  ('F', 'Q'): 4,  ('F', 'R'): 4,  ('F', 'S'): 4,  ('F', 'T'): 4,  ('F', 'V'): 4,  ('F', 'W'): 3,  ('F', 'Y'): 1,  ('G', 'A'): 4,  ('G', 'C'): 4,  ('G', 'D'): 4,  ('G', 'E'): 4,  ('G', 'F'): 4,  ('G', 'G'): 0,  ('G', 'H'): 4,  ('G', 'I'): 4,  ('G', 'K'): 4,  ('G', 'L'): 4,  ('G', 'M'): 4,  ('G', 'N'): 4,  ('G', 'P'): 4,  ('G', 'Q'): 4,  ('G', 'R'): 4,  ('G', 'S'): 4,  ('G', 'T'): 4,  ('G', 'V'): 4,  ('G', 'W'): 4,  ('G', 'Y'): 4,  ('H', 'A'): 4,  ('H', 'C'): 4,  ('H', 'D'): 4,  ('H', 'E'): 4,  ('H', 'F'): 4,  ('H', 'G'): 4,  ('H', 'H'): 0,  ('H', 'I'): 4,  ('H', 'K'): 4,  ('H', 'L'): 4,  ('H', 'M'): 4,  ('H', 'N'): 3,  ('H', 'P'): 4,  ('H', 'Q'): 4,  ('H', 'R'): 4,  ('H', 'S'): 4,  ('H', 'T'): 4,  ('H', 'V'): 4,  ('H', 'W'): 4,  ('H', 'Y'): 2,  ('I', 'A'): 4,  ('I', 'C'): 4,  ('I', 'D'): 4,  ('I', 'E'): 4,  ('I', 'F'): 4,  ('I', 'G'): 4,  ('I', 'H'): 4,  ('I', 'I'): 0,  ('I', 'K'): 4,  ('I', 'L'): 2,  ('I', 'M'): 3,  ('I', 'N'): 4,  ('I', 'P'): 4,  ('I', 'Q'): 4,  ('I', 'R'): 4,  ('I', 'S'): 4,  ('I', 'T'): 4,  ('I', 'V'): 1,  ('I', 'W'): 4,  ('I', 'Y'): 4,  ('K', 'A'): 4,  ('K', 'C'): 4,  ('K', 'D'): 4,  ('K', 'E'): 3,  ('K', 'F'): 4,  ('K', 'G'): 4,  ('K', 'H'): 4,  ('K', 'I'): 4,  ('K', 'K'): 0,  ('K', 'L'): 4,  ('K', 'M'): 4,  ('K', 'N'): 4,  ('K', 'P'): 4,  ('K', 'Q'): 3,  ('K', 'R'): 2,  ('K', 'S'): 4,  ('K', 'T'): 4,  ('K', 'V'): 4,  ('K', 'W'): 4,  ('K', 'Y'): 4,  ('L', 'A'): 4,  ('L', 'C'): 4,  ('L', 'D'): 4,  ('L', 'E'): 4,  ('L', 'F'): 4,  ('L', 'G'): 4,  ('L', 'H'): 4,  ('L', 'I'): 2,  ('L', 'K'): 4,  ('L', 'L'): 0,  ('L', 'M'): 2,  ('L', 'N'): 4,  ('L', 'P'): 4,  ('L', 'Q'): 4,  ('L', 'R'): 4,  ('L', 'S'): 4,  ('L', 'T'): 4,  ('L', 'V'): 3,  ('L', 'W'): 4,  ('L', 'Y'): 4,  ('M', 'A'): 4,  ('M', 'C'): 4,  ('M', 'D'): 4,  ('M', 'E'): 4,  ('M', 'F'): 4,  ('M', 'G'): 4,  ('M', 'H'): 4,  ('M', 'I'): 3,  ('M', 'K'): 4,  ('M', 'L'): 2,  ('M', 'M'): 0,  ('M', 'N'): 4,  ('M', 'P'): 4,  ('M', 'Q'): 4,  ('M', 'R'): 4,  ('M', 'S'): 4,  ('M', 'T'): 4,  ('M', 'V'): 3,  ('M', 'W'): 4,  ('M', 'Y'): 4,  ('N', 'A'): 4,  ('N', 'C'): 4,  ('N', 'D'): 3,  ('N', 'E'): 4,  ('N', 'F'): 4,  ('N', 'G'): 4,  ('N', 'H'): 3,  ('N', 'I'): 4,  ('N', 'K'): 4,  ('N', 'L'): 4,  ('N', 'M'): 4,  ('N', 'N'): 0,  ('N', 'P'): 4,  ('N', 'Q'): 4,  ('N', 'R'): 4,  ('N', 'S'): 3,  ('N', 'T'): 4,  ('N', 'V'): 4,  ('N', 'W'): 4,  ('N', 'Y'): 4,  ('P', 'A'): 4,  ('P', 'C'): 4,  ('P', 'D'): 4,  ('P', 'E'): 4,  ('P', 'F'): 4,  ('P', 'G'): 4,  ('P', 'H'): 4,  ('P', 'I'): 4,  ('P', 'K'): 4,  ('P', 'L'): 4,  ('P', 'M'): 4,  ('P', 'N'): 4,  ('P', 'P'): 0,  ('P', 'Q'): 4,  ('P', 'R'): 4,  ('P', 'S'): 4,  ('P', 'T'): 4,  ('P', 'V'): 4,  ('P', 'W'): 4,  ('P', 'Y'): 4,  ('Q', 'A'): 4,  ('Q', 'C'): 4,  ('Q', 'D'): 4,  ('Q', 'E'): 2,  ('Q', 'F'): 4,  ('Q', 'G'): 4,  ('Q', 'H'): 4,  ('Q', 'I'): 4,  ('Q', 'K'): 3,  ('Q', 'L'): 4,  ('Q', 'M'): 4,  ('Q', 'N'): 4,  ('Q', 'P'): 4,  ('Q', 'Q'): 0,  ('Q', 'R'): 3,  ('Q', 'S'): 4,  ('Q', 'T'): 4,  ('Q', 'V'): 4,  ('Q', 'W'): 4,  ('Q', 'Y'): 4,  ('R', 'A'): 4,  ('R', 'C'): 4,  ('R', 'D'): 4,  ('R', 'E'): 4,  ('R', 'F'): 4,  ('R', 'G'): 4,  ('R', 'H'): 4,  ('R', 'I'): 4,  ('R', 'K'): 2,  ('R', 'L'): 4,  ('R', 'M'): 4,  ('R', 'N'): 4,  ('R', 'P'): 4,  ('R', 'Q'): 3,  ('R', 'R'): 0,  ('R', 'S'): 4,  ('R', 'T'): 4,  ('R', 'V'): 4,  ('R', 'W'): 4,  ('R', 'Y'): 4,  ('S', 'A'): 3,  ('S', 'C'): 4,  ('S', 'D'): 4,  ('S', 'E'): 4,  ('S', 'F'): 4,  ('S', 'G'): 4,  ('S', 'H'): 4,  ('S', 'I'): 4,  ('S', 'K'): 4,  ('S', 'L'): 4,  ('S', 'M'): 4,  ('S', 'N'): 3,  ('S', 'P'): 4,  ('S', 'Q'): 4,  ('S', 'R'): 4,  ('S', 'S'): 0,  ('S', 'T'): 3,  ('S', 'V'): 4,  ('S', 'W'): 4,  ('S', 'Y'): 4,  ('T', 'A'): 4,  ('T', 'C'): 4,  ('T', 'D'): 4,  ('T', 'E'): 4,  ('T', 'F'): 4,  ('T', 'G'): 4,  ('T', 'H'): 4,  ('T', 'I'): 4,  ('T', 'K'): 4,  ('T', 'L'): 4,  ('T', 'M'): 4,  ('T', 'N'): 4,  ('T', 'P'): 4,  ('T', 'Q'): 4,  ('T', 'R'): 4,  ('T', 'S'): 3,  ('T', 'T'): 0,  ('T', 'V'): 4,  ('T', 'W'): 4,  ('T', 'Y'): 4,  ('V', 'A'): 4,  ('V', 'C'): 4,  ('V', 'D'): 4,  ('V', 'E'): 4,  ('V', 'F'): 4,  ('V', 'G'): 4,  ('V', 'H'): 4,  ('V', 'I'): 1,  ('V', 'K'): 4,  ('V', 'L'): 3,  ('V', 'M'): 3,  ('V', 'N'): 4,  ('V', 'P'): 4,  ('V', 'Q'): 4,  ('V', 'R'): 4,  ('V', 'S'): 4,  ('V', 'T'): 4,  ('V', 'V'): 0,  ('V', 'W'): 4,  ('V', 'Y'): 4,  ('W', 'A'): 4,  ('W', 'C'): 4,  ('W', 'D'): 4,  ('W', 'E'): 4,  ('W', 'F'): 3,  ('W', 'G'): 4,  ('W', 'H'): 4,  ('W', 'I'): 4,  ('W', 'K'): 4,  ('W', 'L'): 4,  ('W', 'M'): 4,  ('W', 'N'): 4,  ('W', 'P'): 4,  ('W', 'Q'): 4,  ('W', 'R'): 4,  ('W', 'S'): 4,  ('W', 'T'): 4,  ('W', 'V'): 4,  ('W', 'W'): 0,  ('W', 'Y'): 2,  ('Y', 'A'): 4,  ('Y', 'C'): 4,  ('Y', 'D'): 4,  ('Y', 'E'): 4,  ('Y', 'F'): 1,  ('Y', 'G'): 4,  ('Y', 'H'): 2,  ('Y', 'I'): 4,  ('Y', 'K'): 4,  ('Y', 'L'): 4,  ('Y', 'M'): 4,  ('Y', 'N'): 4,  ('Y', 'P'): 4,  ('Y', 'Q'): 4,  ('Y', 'R'): 4,  ('Y', 'S'): 4,  ('Y', 'T'): 4,  ('Y', 'V'): 4,  ('Y', 'W'): 2,  ('Y', 'Y'): 0}
+    # fmt: on
+    tcr_nb_distance_matrix = _make_numba_matrix(tcr_dict_distance_matrix, parasail_aa_alphabet_with_unknown)
 
     def __init__(
         self,
@@ -440,37 +467,6 @@ def __init__(
         self.cutoff = cutoff
         self.n_jobs = n_jobs
 
-    parasail_aa_alphabet = "ARNDCQEGHILKMFPSTWYVBZX"
-    parasail_aa_alphabet_with_unknown = "ARNDCQEGHILKMFPSTWYVBZX*"
-    # fmt: off
-    tcr_dict_distance_matrix = {('A', 'A'): 0,  ('A', 'C'): 4,  ('A', 'D'): 4,  ('A', 'E'): 4,  ('A', 'F'): 4,  ('A', 'G'): 4,  ('A', 'H'): 4,  ('A', 'I'): 4,  ('A', 'K'): 4,  ('A', 'L'): 4,  ('A', 'M'): 4,  ('A', 'N'): 4,  ('A', 'P'): 4,  ('A', 'Q'): 4,  ('A', 'R'): 4,  ('A', 'S'): 3,  ('A', 'T'): 4,  ('A', 'V'): 4,  ('A', 'W'): 4,  ('A', 'Y'): 4,  ('C', 'A'): 4,  ('C', 'C'): 0,  ('C', 'D'): 4,  ('C', 'E'): 4,  ('C', 'F'): 4,  ('C', 'G'): 4,  ('C', 'H'): 4,  ('C', 'I'): 4,  ('C', 'K'): 4,  ('C', 'L'): 4,  ('C', 'M'): 4,  ('C', 'N'): 4,  ('C', 'P'): 4,  ('C', 'Q'): 4,  ('C', 'R'): 4,  ('C', 'S'): 4,  ('C', 'T'): 4,  ('C', 'V'): 4,  ('C', 'W'): 4,  ('C', 'Y'): 4,  ('D', 'A'): 4,  ('D', 'C'): 4,  ('D', 'D'): 0,  ('D', 'E'): 2,  ('D', 'F'): 4,  ('D', 'G'): 4,  ('D', 'H'): 4,  ('D', 'I'): 4,  ('D', 'K'): 4,  ('D', 'L'): 4,  ('D', 'M'): 4,  ('D', 'N'): 3,  ('D', 'P'): 4,  ('D', 'Q'): 4,  ('D', 'R'): 4,  ('D', 'S'): 4,  ('D', 'T'): 4,  ('D', 'V'): 4,  ('D', 'W'): 4,  ('D', 'Y'): 4,  ('E', 'A'): 4,  ('E', 'C'): 4,  ('E', 'D'): 2,  ('E', 'E'): 0,  ('E', 'F'): 4,  ('E', 'G'): 4,  ('E', 'H'): 4,  ('E', 'I'): 4,  ('E', 'K'): 3,  ('E', 'L'): 4,  ('E', 'M'): 4,  ('E', 'N'): 4,  ('E', 'P'): 4,  ('E', 'Q'): 2,  ('E', 'R'): 4,  ('E', 'S'): 4,  ('E', 'T'): 4,  ('E', 'V'): 4,  ('E', 'W'): 4,  ('E', 'Y'): 4,  ('F', 'A'): 4,  ('F', 'C'): 4,  ('F', 'D'): 4,  ('F', 'E'): 4,  ('F', 'F'): 0,  ('F', 'G'): 4,  ('F', 'H'): 4,  ('F', 'I'): 4,  ('F', 'K'): 4,  ('F', 'L'): 4,  ('F', 'M'): 4,  ('F', 'N'): 4,  ('F', 'P'): 4,  ('F', 'Q'): 4,  ('F', 'R'): 4,  ('F', 'S'): 4,  ('F', 'T'): 4,  ('F', 'V'): 4,  ('F', 'W'): 3,  ('F', 'Y'): 1,  ('G', 'A'): 4,  ('G', 'C'): 4,  ('G', 'D'): 4,  ('G', 'E'): 4,  ('G', 'F'): 4,  ('G', 'G'): 0,  ('G', 'H'): 4,  ('G', 'I'): 4,  ('G', 'K'): 4,  ('G', 'L'): 4,  ('G', 'M'): 4,  ('G', 'N'): 4,  ('G', 'P'): 4,  ('G', 'Q'): 4,  ('G', 'R'): 4,  ('G', 'S'): 4,  ('G', 'T'): 4,  ('G', 'V'): 4,  ('G', 'W'): 4,  ('G', 'Y'): 4,  ('H', 'A'): 4,  ('H', 'C'): 4,  ('H', 'D'): 4,  ('H', 'E'): 4,  ('H', 'F'): 4,  ('H', 'G'): 4,  ('H', 'H'): 0,  ('H', 'I'): 4,  ('H', 'K'): 4,  ('H', 'L'): 4,  ('H', 'M'): 4,  ('H', 'N'): 3,  ('H', 'P'): 4,  ('H', 'Q'): 4,  ('H', 'R'): 4,  ('H', 'S'): 4,  ('H', 'T'): 4,  ('H', 'V'): 4,  ('H', 'W'): 4,  ('H', 'Y'): 2,  ('I', 'A'): 4,  ('I', 'C'): 4,  ('I', 'D'): 4,  ('I', 'E'): 4,  ('I', 'F'): 4,  ('I', 'G'): 4,  ('I', 'H'): 4,  ('I', 'I'): 0,  ('I', 'K'): 4,  ('I', 'L'): 2,  ('I', 'M'): 3,  ('I', 'N'): 4,  ('I', 'P'): 4,  ('I', 'Q'): 4,  ('I', 'R'): 4,  ('I', 'S'): 4,  ('I', 'T'): 4,  ('I', 'V'): 1,  ('I', 'W'): 4,  ('I', 'Y'): 4,  ('K', 'A'): 4,  ('K', 'C'): 4,  ('K', 'D'): 4,  ('K', 'E'): 3,  ('K', 'F'): 4,  ('K', 'G'): 4,  ('K', 'H'): 4,  ('K', 'I'): 4,  ('K', 'K'): 0,  ('K', 'L'): 4,  ('K', 'M'): 4,  ('K', 'N'): 4,  ('K', 'P'): 4,  ('K', 'Q'): 3,  ('K', 'R'): 2,  ('K', 'S'): 4,  ('K', 'T'): 4,  ('K', 'V'): 4,  ('K', 'W'): 4,  ('K', 'Y'): 4,  ('L', 'A'): 4,  ('L', 'C'): 4,  ('L', 'D'): 4,  ('L', 'E'): 4,  ('L', 'F'): 4,  ('L', 'G'): 4,  ('L', 'H'): 4,  ('L', 'I'): 2,  ('L', 'K'): 4,  ('L', 'L'): 0,  ('L', 'M'): 2,  ('L', 'N'): 4,  ('L', 'P'): 4,  ('L', 'Q'): 4,  ('L', 'R'): 4,  ('L', 'S'): 4,  ('L', 'T'): 4,  ('L', 'V'): 3,  ('L', 'W'): 4,  ('L', 'Y'): 4,  ('M', 'A'): 4,  ('M', 'C'): 4,  ('M', 'D'): 4,  ('M', 'E'): 4,  ('M', 'F'): 4,  ('M', 'G'): 4,  ('M', 'H'): 4,  ('M', 'I'): 3,  ('M', 'K'): 4,  ('M', 'L'): 2,  ('M', 'M'): 0,  ('M', 'N'): 4,  ('M', 'P'): 4,  ('M', 'Q'): 4,  ('M', 'R'): 4,  ('M', 'S'): 4,  ('M', 'T'): 4,  ('M', 'V'): 3,  ('M', 'W'): 4,  ('M', 'Y'): 4,  ('N', 'A'): 4,  ('N', 'C'): 4,  ('N', 'D'): 3,  ('N', 'E'): 4,  ('N', 'F'): 4,  ('N', 'G'): 4,  ('N', 'H'): 3,  ('N', 'I'): 4,  ('N', 'K'): 4,  ('N', 'L'): 4,  ('N', 'M'): 4,  ('N', 'N'): 0,  ('N', 'P'): 4,  ('N', 'Q'): 4,  ('N', 'R'): 4,  ('N', 'S'): 3,  ('N', 'T'): 4,  ('N', 'V'): 4,  ('N', 'W'): 4,  ('N', 'Y'): 4,  ('P', 'A'): 4,  ('P', 'C'): 4,  ('P', 'D'): 4,  ('P', 'E'): 4,  ('P', 'F'): 4,  ('P', 'G'): 4,  ('P', 'H'): 4,  ('P', 'I'): 4,  ('P', 'K'): 4,  ('P', 'L'): 4,  ('P', 'M'): 4,  ('P', 'N'): 4,  ('P', 'P'): 0,  ('P', 'Q'): 4,  ('P', 'R'): 4,  ('P', 'S'): 4,  ('P', 'T'): 4,  ('P', 'V'): 4,  ('P', 'W'): 4,  ('P', 'Y'): 4,  ('Q', 'A'): 4,  ('Q', 'C'): 4,  ('Q', 'D'): 4,  ('Q', 'E'): 2,  ('Q', 'F'): 4,  ('Q', 'G'): 4,  ('Q', 'H'): 4,  ('Q', 'I'): 4,  ('Q', 'K'): 3,  ('Q', 'L'): 4,  ('Q', 'M'): 4,  ('Q', 'N'): 4,  ('Q', 'P'): 4,  ('Q', 'Q'): 0,  ('Q', 'R'): 3,  ('Q', 'S'): 4,  ('Q', 'T'): 4,  ('Q', 'V'): 4,  ('Q', 'W'): 4,  ('Q', 'Y'): 4,  ('R', 'A'): 4,  ('R', 'C'): 4,  ('R', 'D'): 4,  ('R', 'E'): 4,  ('R', 'F'): 4,  ('R', 'G'): 4,  ('R', 'H'): 4,  ('R', 'I'): 4,  ('R', 'K'): 2,  ('R', 'L'): 4,  ('R', 'M'): 4,  ('R', 'N'): 4,  ('R', 'P'): 4,  ('R', 'Q'): 3,  ('R', 'R'): 0,  ('R', 'S'): 4,  ('R', 'T'): 4,  ('R', 'V'): 4,  ('R', 'W'): 4,  ('R', 'Y'): 4,  ('S', 'A'): 3,  ('S', 'C'): 4,  ('S', 'D'): 4,  ('S', 'E'): 4,  ('S', 'F'): 4,  ('S', 'G'): 4,  ('S', 'H'): 4,  ('S', 'I'): 4,  ('S', 'K'): 4,  ('S', 'L'): 4,  ('S', 'M'): 4,  ('S', 'N'): 3,  ('S', 'P'): 4,  ('S', 'Q'): 4,  ('S', 'R'): 4,  ('S', 'S'): 0,  ('S', 'T'): 3,  ('S', 'V'): 4,  ('S', 'W'): 4,  ('S', 'Y'): 4,  ('T', 'A'): 4,  ('T', 'C'): 4,  ('T', 'D'): 4,  ('T', 'E'): 4,  ('T', 'F'): 4,  ('T', 'G'): 4,  ('T', 'H'): 4,  ('T', 'I'): 4,  ('T', 'K'): 4,  ('T', 'L'): 4,  ('T', 'M'): 4,  ('T', 'N'): 4,  ('T', 'P'): 4,  ('T', 'Q'): 4,  ('T', 'R'): 4,  ('T', 'S'): 3,  ('T', 'T'): 0,  ('T', 'V'): 4,  ('T', 'W'): 4,  ('T', 'Y'): 4,  ('V', 'A'): 4,  ('V', 'C'): 4,  ('V', 'D'): 4,  ('V', 'E'): 4,  ('V', 'F'): 4,  ('V', 'G'): 4,  ('V', 'H'): 4,  ('V', 'I'): 1,  ('V', 'K'): 4,  ('V', 'L'): 3,  ('V', 'M'): 3,  ('V', 'N'): 4,  ('V', 'P'): 4,  ('V', 'Q'): 4,  ('V', 'R'): 4,  ('V', 'S'): 4,  ('V', 'T'): 4,  ('V', 'V'): 0,  ('V', 'W'): 4,  ('V', 'Y'): 4,  ('W', 'A'): 4,  ('W', 'C'): 4,  ('W', 'D'): 4,  ('W', 'E'): 4,  ('W', 'F'): 3,  ('W', 'G'): 4,  ('W', 'H'): 4,  ('W', 'I'): 4,  ('W', 'K'): 4,  ('W', 'L'): 4,  ('W', 'M'): 4,  ('W', 'N'): 4,  ('W', 'P'): 4,  ('W', 'Q'): 4,  ('W', 'R'): 4,  ('W', 'S'): 4,  ('W', 'T'): 4,  ('W', 'V'): 4,  ('W', 'W'): 0,  ('W', 'Y'): 2,  ('Y', 'A'): 4,  ('Y', 'C'): 4,  ('Y', 'D'): 4,  ('Y', 'E'): 4,  ('Y', 'F'): 1,  ('Y', 'G'): 4,  ('Y', 'H'): 2,  ('Y', 'I'): 4,  ('Y', 'K'): 4,  ('Y', 'L'): 4,  ('Y', 'M'): 4,  ('Y', 'N'): 4,  ('Y', 'P'): 4,  ('Y', 'Q'): 4,  ('Y', 'R'): 4,  ('Y', 'S'): 4,  ('Y', 'T'): 4,  ('Y', 'V'): 4,  ('Y', 'W'): 2,  ('Y', 'Y'): 0}
-    # fmt: on
-
-
-    @staticmethod
-    def _make_numba_matrix(distance_matrix: dict, alphabet: str = parasail_aa_alphabet_with_unknown) -> np.ndarray:
-        """Creates a numba compatible distance matrix from a dict of tuples.
-
-        Parameters
-        ----------
-        distance_matrix:
-            Keys are tuples like ('A', 'C') with values containing an integer.
-        alphabet:
-
-        Returns
-        -------
-        distance_matrix:
-            distance lookup matrix
-        """
-
-        dm = np.zeros((len(alphabet), len(alphabet)), dtype=np.int32)
-        for (aa1, aa2), d in distance_matrix.items():
-            dm[alphabet.index(aa1), alphabet.index(aa2)] = d
-            dm[alphabet.index(aa2), alphabet.index(aa1)] = d
-        return dm
-
-    tcr_nb_distance_matrix = _make_numba_matrix(tcr_dict_distance_matrix)
-
     @staticmethod
     def _seqs2mat(seqs: Sequence[str], alphabet: str = parasail_aa_alphabet, max_len: Union[None, int] = None) -> tuple[np.ndarray, np.ndarray]:
         """Convert a collection of gene sequences into a

From 5d12de52c76bd9a373500e18b5459c5bdab9f0f4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 19 Apr 2024 08:14:36 +0000
Subject: [PATCH 23/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/scirpy/ir_dist/metrics.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index 615d3ae8f..08dd78b37 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -408,13 +408,13 @@ def _make_numba_matrix(distance_matrix: dict, alphabet: str = "ARNDCQEGHILKMFPST
     distance_matrix:
         distance lookup matrix
     """
-
     dm = np.zeros((len(alphabet), len(alphabet)), dtype=np.int32)
     for (aa1, aa2), d in distance_matrix.items():
         dm[alphabet.index(aa1), alphabet.index(aa2)] = d
         dm[alphabet.index(aa2), alphabet.index(aa1)] = d
     return dm
 
+
 class TCRdistDistanceCalculator:
     """Computes pairwise distances between TCR CDR3 sequences based on the "tcrdist" distance metric.
 
@@ -441,6 +441,7 @@ class TCRdistDistanceCalculator:
     n_jobs:
         Number of jobs (processes) to use for the pairwise distance calculation
     """
+
     parasail_aa_alphabet = "ARNDCQEGHILKMFPSTWYVBZX"
     parasail_aa_alphabet_with_unknown = "ARNDCQEGHILKMFPSTWYVBZX*"
     # fmt: off

From fcce8e7a3ab525110e529f92db61f472978db36a Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Sun, 21 Apr 2024 21:15:44 +0200
Subject: [PATCH 24/27] Update CHANGELOG

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 775981963..96ba991df 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning][].
 [keep a changelog]: https://keepachangelog.com/en/1.0.0/
 [semantic versioning]: https://semver.org/spec/v2.0.0.html
 
+## Unreleased
+
+-   Add "TCRdist" as new metric ([#502](https://github.com/scverse/scirpy/pull/502))
+
 ## v0.16.1
 
 ### Fixes

From d6e7191d3fce8bd45873cd3346fa6f77dbd4e48f Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Sun, 21 Apr 2024 21:20:37 +0200
Subject: [PATCH 25/27] Update docstring

---
 docs/api.rst                  | 1 +
 src/scirpy/ir_dist/metrics.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index 958ee9293..1c4a41282 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -302,3 +302,4 @@ distance metrics
    ir_dist.metrics.HammingDistanceCalculator
    ir_dist.metrics.AlignmentDistanceCalculator
    ir_dist.metrics.FastAlignmentDistanceCalculator
+   ir_dist.metrics.TCRdistDistanceCalculator
diff --git a/src/scirpy/ir_dist/metrics.py b/src/scirpy/ir_dist/metrics.py
index 08dd78b37..157bb04b3 100644
--- a/src/scirpy/ir_dist/metrics.py
+++ b/src/scirpy/ir_dist/metrics.py
@@ -418,7 +418,8 @@ def _make_numba_matrix(distance_matrix: dict, alphabet: str = "ARNDCQEGHILKMFPST
 class TCRdistDistanceCalculator:
     """Computes pairwise distances between TCR CDR3 sequences based on the "tcrdist" distance metric.
 
-    The code of this class is heavily based on: https://github.com/agartland/pwseqdist/blob/master/pwseqdist
+    The code of this class is heavily based on `pwseqdist <https://github.com/agartland/pwseqdist/blob/master/pwseqdist>`_.
+    Reused under MIT license, Copyright (c) 2020 Andrew Fiore-Gartland.
 
     Using default weight, gap penalty, ntrim and ctrim is equivalent to the
     original distance published in :cite:`TCRdist`.
@@ -451,13 +452,13 @@ class TCRdistDistanceCalculator:
 
     def __init__(
         self,
+        cutoff: int = 20,
         *,
         dist_weight: int = 3,
         gap_penalty: int = 4,
         ntrim: int = 3,
         ctrim: int = 2,
         fixed_gappos: bool = True,
-        cutoff: int = 20,
         n_jobs: int = 1,
     ):
         self.dist_weight = dist_weight

From 1d83f8bd5350f1e634be2d7bcd8d06280e433db9 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Sun, 21 Apr 2024 21:40:54 +0200
Subject: [PATCH 26/27] Update ir_dist docstring

---
 src/scirpy/ir_dist/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/scirpy/ir_dist/__init__.py b/src/scirpy/ir_dist/__init__.py
index 05820f0b6..223fec74b 100644
--- a/src/scirpy/ir_dist/__init__.py
+++ b/src/scirpy/ir_dist/__init__.py
@@ -174,6 +174,8 @@ def _ir_dist(
         Like `airr_key`, but for `reference`.
     chain_idx_key_ref
         Like `chain_idx_key`, but for `reference`.
+    **kwargs
+        Arguments are passed to the respective :class:`~scirpy.ir_dist.metrics.DistanceCalculator` class.
 
     Returns
     -------

From 74bfce28edcf8d883b4d88cbd4d3f37fabf039bc Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Sun, 21 Apr 2024 21:54:06 +0200
Subject: [PATCH 27/27] Update description in tutorial

---
 docs/tutorials/tutorial_3k_tcr.ipynb | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/docs/tutorials/tutorial_3k_tcr.ipynb b/docs/tutorials/tutorial_3k_tcr.ipynb
index f40e8e404..dd1e91a29 100644
--- a/docs/tutorials/tutorial_3k_tcr.ipynb
+++ b/docs/tutorials/tutorial_3k_tcr.ipynb
@@ -1049,13 +1049,19 @@
     "For instance, a distance of `10` is equivalent to 2 Rs mutating into N.\n",
     "This appoach was initially proposed as *TCRdist* by Dash et al. {cite}`TCRdist`.\n",
     "\n",
-    ":::{tip}\n",
-    "You can use `metric=\"fastalignment\"` for a faster calculation at the cost of a few false-negatives (i.e. sequence pairs\n",
-    "that are actually below the distance cutoff, but are removed during a pre-filtering step). With default parameters, \n",
-    "the false-negative rate (of all sequence pairs actually below the cutoff) was ~2% on the {func}`scirpy.datasets.wu2020`\n",
-    "dataset. \n",
-    "\n",
-    "See also {class}`scirpy.ir_dist.metrics.FastAlignmentDistanceCalculator`. \n",
+    ":::{admonition} Speeding up TCR distance calculation\n",
+    ":class: tip\n",
+    "\n",
+    "Scirpy provides alternative distance metrics that are similar to `\"alignment\"`, but a lot faster: \n",
+    "\n",
+    "* `metric=\"tcrdist\"` is an implementation of [tcrdist3](https://github.com/kmayerb/tcrdist3) within scirpy. The scores\n",
+    "  are calculated differently, but it gives very similar results compared to `metric=\"alignment\"`.\n",
+    "  See also {class}`scirpy.ir_dist.metrics.TCRdistDistanceCalculator`.\n",
+    "* `metric=\"fastalignment\"` uses a heuristic to speed up the `\"alignment\"` metric at the cost of a few false-negatives (i.e. sequence pairs\n",
+    "  that are actually below the distance cutoff, but are removed during a pre-filtering step). With default parameters, \n",
+    "  the false-negative rate (of all sequence pairs actually below the cutoff) was ~2% on the {func}`scirpy.datasets.wu2020`\n",
+    "  dataset.  See also {class}`scirpy.ir_dist.metrics.FastAlignmentDistanceCalculator`.\n",
+    "   \n",
     ":::\n",
     "\n",
     "All cells with a distance between their CDR3 sequences lower than `cutoff` will be connected in the network.\n"