Skip to content

Commit

Permalink
🐛 Removed a print statement leftover from debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
jjmccollum committed Jan 15, 2025
1 parent cd249c5 commit fa4e6b2
Showing 1 changed file with 34 additions and 24 deletions.
58 changes: 34 additions & 24 deletions teiphy/collation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1749,7 +1749,7 @@ def to_distance_matrix(self, drop_constant: bool = False, proportion: bool = Fal
Default value is False.
proportion (bool, optional): An optional flag indicating whether or not to calculate distances as proportions over extant, unambiguous variation units.
Default value is False.
show_ext: An optional flag indicating whether each cell in a distance or similarity matrix
show_ext: An optional flag indicating whether each cell in a distance or similarity matrix
should include the number of their extant, unambiguous variation units after the number of their disagreements.
Default value is False.
Expand All @@ -1772,16 +1772,20 @@ def to_distance_matrix(self, drop_constant: bool = False, proportion: bool = Fal
# The type of the matrix will depend on the input options:
matrix = None
if show_ext:
matrix = np.full((len(witness_labels), len(witness_labels)), "NA", dtype=object) # strings of the form "disagreements/extant"
matrix = np.full(
(len(witness_labels), len(witness_labels)), "NA", dtype=object
) # strings of the form "disagreements/extant"
elif proportion:
matrix = np.full((len(witness_labels), len(witness_labels)), 0.0, dtype=float) # floats of the form disagreements/extant
matrix = np.full(
(len(witness_labels), len(witness_labels)), 0.0, dtype=float
) # floats of the form disagreements/extant
else:
matrix = np.full((len(witness_labels), len(witness_labels)), 0, dtype=int) # ints of the form disagreements
matrix = np.full((len(witness_labels), len(witness_labels)), 0, dtype=int) # ints of the form disagreements
for i, wit_1 in enumerate(witness_labels):
for j, wit_2 in enumerate(witness_labels):
extant_units = 0
disagreements = 0
# If either of the cells for this pair of witnesses has been populated already,
# If either of the cells for this pair of witnesses has been populated already,
# then just copy the entry from the other side of the diagonal without recalculating:
if i > j:
matrix[i, j] = matrix[j, i]
Expand Down Expand Up @@ -1822,7 +1826,7 @@ def to_similarity_matrix(self, drop_constant: bool = False, proportion: bool = F
Default value is False.
proportion (bool, optional): An optional flag indicating whether or not to calculate similarities as proportions over extant, unambiguous variation units.
Default value is False.
show_ext: An optional flag indicating whether each cell in a distance or similarity matrix
show_ext: An optional flag indicating whether each cell in a distance or similarity matrix
should include the number of their extant, unambiguous variation units after the number of agreements.
Default value is False.
Expand All @@ -1845,16 +1849,20 @@ def to_similarity_matrix(self, drop_constant: bool = False, proportion: bool = F
# The type of the matrix will depend on the input options:
matrix = None
if show_ext:
matrix = np.full((len(witness_labels), len(witness_labels)), "NA", dtype=object) # strings of the form "agreements/extant"
matrix = np.full(
(len(witness_labels), len(witness_labels)), "NA", dtype=object
) # strings of the form "agreements/extant"
elif proportion:
matrix = np.full((len(witness_labels), len(witness_labels)), 0.0, dtype=float) # floats of the form agreements/extant
matrix = np.full(
(len(witness_labels), len(witness_labels)), 0.0, dtype=float
) # floats of the form agreements/extant
else:
matrix = np.full((len(witness_labels), len(witness_labels)), 0, dtype=int) # ints of the form agreements
matrix = np.full((len(witness_labels), len(witness_labels)), 0, dtype=int) # ints of the form agreements
for i, wit_1 in enumerate(witness_labels):
for j, wit_2 in enumerate(witness_labels):
extant_units = 0
agreements = 0
# If either of the cells for this pair of witnesses has been populated already,
# If either of the cells for this pair of witnesses has been populated already,
# then just copy the entry from the other side of the diagonal without recalculating:
if i > j:
matrix[i, j] = matrix[j, i]
Expand All @@ -1870,8 +1878,6 @@ def to_similarity_matrix(self, drop_constant: bool = False, proportion: bool = F
wit_2_rdg_inds = [l for l, w in enumerate(wit_2_rdg_support) if w > 0]
if len(wit_1_rdg_inds) != 1 or len(wit_2_rdg_inds) != 1:
continue
if i == 0 and j == 1:
print(vu_id, wit_1_rdg_inds[0], wit_2_rdg_inds[0])
extant_units += 1
if wit_1_rdg_inds[0] == wit_2_rdg_inds[0]:
agreements += 1
Expand Down Expand Up @@ -2047,10 +2053,10 @@ def to_dataframe(
table_type (TableType, optional): A TableType option indicating which type of tabular output to generate.
Only applicable for tabular outputs.
Default value is "matrix".
split_missing: An optional flag indicating whether or not to treat missing characters/variation units as having a contribution of 1 split over all states/readings;
if False, then missing data is ignored (i.e., all states are 0).
split_missing: An optional flag indicating whether or not to treat missing characters/variation units as having a contribution of 1 split over all states/readings;
if False, then missing data is ignored (i.e., all states are 0).
Default value is True.
show_ext: An optional flag indicating whether each cell in a distance or similarity matrix
show_ext: An optional flag indicating whether each cell in a distance or similarity matrix
should include the number of their extant, unambiguous variation units after the number of their disagreements/agreements.
Only applicable for tabular output formats of type \"distance\" or \"similarity\".
Default value is False.
Expand All @@ -2068,11 +2074,15 @@ def to_dataframe(
df = pd.DataFrame(matrix, index=reading_labels, columns=witness_labels)
elif table_type == TableType.distance:
# Convert the collation to a NumPy array and get its row and column labels first:
matrix, witness_labels = self.to_distance_matrix(drop_constant=drop_constant, proportion=proportion, show_ext=show_ext)
matrix, witness_labels = self.to_distance_matrix(
drop_constant=drop_constant, proportion=proportion, show_ext=show_ext
)
df = pd.DataFrame(matrix, index=witness_labels, columns=witness_labels)
elif table_type == TableType.similarity:
# Convert the collation to a NumPy array and get its row and column labels first:
matrix, witness_labels = self.to_similarity_matrix(drop_constant=drop_constant, proportion=proportion, show_ext=show_ext)
matrix, witness_labels = self.to_similarity_matrix(
drop_constant=drop_constant, proportion=proportion, show_ext=show_ext
)
df = pd.DataFrame(matrix, index=witness_labels, columns=witness_labels)
elif table_type == TableType.nexus:
# Convert the collation to a NumPy array and get its row and column labels first:
Expand Down Expand Up @@ -2112,10 +2122,10 @@ def to_csv(
table_type: A TableType option indicating which type of tabular output to generate.
Only applicable for tabular outputs.
Default value is "matrix".
split_missing: An optional flag indicating whether or not to treat missing characters/variation units as having a contribution of 1 split over all states/readings;
if False, then missing data is ignored (i.e., all states are 0).
split_missing: An optional flag indicating whether or not to treat missing characters/variation units as having a contribution of 1 split over all states/readings;
if False, then missing data is ignored (i.e., all states are 0).
Default value is True.
show_ext: An optional flag indicating whether each cell in a distance or similarity matrix
show_ext: An optional flag indicating whether each cell in a distance or similarity matrix
should include the number of their extant, unambiguous variation units after the number of their disagreements/agreements.
Only applicable for tabular output formats of type \"distance\" or \"similarity\".
Default value is False.
Expand Down Expand Up @@ -2166,10 +2176,10 @@ def to_excel(
table_type: A TableType option indicating which type of tabular output to generate.
Only applicable for tabular outputs.
Default value is "matrix".
split_missing: An optional flag indicating whether or not to treat missing characters/variation units as having a contribution of 1 split over all states/readings;
if False, then missing data is ignored (i.e., all states are 0).
split_missing: An optional flag indicating whether or not to treat missing characters/variation units as having a contribution of 1 split over all states/readings;
if False, then missing data is ignored (i.e., all states are 0).
Default value is True.
show_ext: An optional flag indicating whether each cell in a distance or similarity matrix
show_ext: An optional flag indicating whether each cell in a distance or similarity matrix
should include the number of their extant, unambiguous variation units after the number of their disagreements/agreements.
Only applicable for tabular output formats of type \"distance\" or \"similarity\".
Default value is False.
Expand Down Expand Up @@ -2426,7 +2436,7 @@ def to_file(
table_type (TableType, optional): A TableType option indicating which type of tabular output to generate.
Only applicable for tabular outputs.
Default value is "matrix".
show_ext (bool, optional): An optional flag indicating whether each cell in a distance or similarity matrix
show_ext (bool, optional): An optional flag indicating whether each cell in a distance or similarity matrix
should include the number of variation units where both witnesses are extant after the number of their disagreements/agreements.
Only applicable for tabular output formats of type \"distance\" or \"similarity\".
Default value is False.
Expand Down

0 comments on commit fa4e6b2

Please sign in to comment.