Skip to content

Commit 770db8f

Browse files
committed
* Fixed bug where kallisto quantification would sometimes fail to sum transcripts to genes properly.
1 parent 8f5da01 commit 770db8f

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

HISTORY.rst

+6
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
History
33
=======
44

5+
4.?.? (2024-??-??)
6+
-------------------
7+
8+
Fixed
9+
******
10+
* Fixed bug where kallisto quantification would sometimes fail to sum transcripts to genes properly.
511

612
4.1.0 (2024-09-16)
713
-------------------

rnalysis/fastq.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -2016,16 +2016,15 @@ def _sum_transcripts_to_genes(tpm: pl.DataFrame, counts: pl.DataFrame, gtf_path:
20162016
pl.exclude(counts.columns[0]).sum().truediv(10 ** 6)).collect()
20172017
tpm_cpy = tpm.lazy().join(transcript2gene, left_on=tpm.columns[0], right_on='Transcript ID',
20182018
how='left')
2019-
tpm_by_gene = tpm_cpy.drop(cs.first()).group_by('Gene ID').sum()
2019+
tpm_by_gene = tpm_cpy.drop(cs.first()).drop_nulls().group_by('Gene ID').sum()
20202020
count_per_gene = tpm_by_gene.with_columns(
20212021
[(pl.col(col) * library_sizes[col][0]).alias(col) for col in tpm.columns[1:]]).collect()
20222022
elif summation_method == 'raw':
20232023
count_cpy = counts.lazy().join(transcript2gene, left_on=tpm.columns[0],
20242024
right_on='Transcript ID', how='left')
2025-
count_per_gene = count_cpy.drop(cs.first()).group_by('Gene ID').sum().collect()
2025+
count_per_gene = count_cpy.drop(cs.first()).drop_nulls().group_by('Gene ID').sum().collect()
20262026
else:
20272027
raise ValueError(f"Invalid value for 'summation_method': '{summation_method}'.")
2028-
20292028
if len(count_per_gene) == 0:
20302029
continue
20312030
pbar.update(8)

0 commit comments

Comments
 (0)