Skip to content

Commit

Permalink
Merge pull request #542 from fritzsedlazeck/master-pub
Browse files Browse the repository at this point in the history
Merged version 2.6.0
  • Loading branch information
hermannromanek authored Feb 11, 2025
2 parents 0b36db9 + 28d6fa5 commit fdf6e6d
Show file tree
Hide file tree
Showing 14 changed files with 539 additions and 148 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Build a binary wheel and a source tarball
run: python3 -m build
- name: Store the distribution packages
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: python-package-distributions
path: dist/
Expand All @@ -40,7 +40,7 @@ jobs:

steps:
- name: Download all the dists
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: python-package-distributions
path: dist/
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ You can install Sniffles2 using pip or conda using:

or

`conda install sniffles=2.5.3`
`conda install sniffles=2.6.0`

If you previously installed Sniffles1 using conda and want to upgrade to Sniffles2, you can use:

`conda update sniffles=2.5.3`
`conda update sniffles=2.6.0`

## Requirements
* Python ==3.10.15
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = sniffles
version = 2.5.3
version = 2.6.0
author = Moritz Smolka, Hermann Romanek
author_email = [email protected], [email protected]
description = A fast structural variation caller for long-read sequencing data
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='sniffles',
version='2.5.3',
version='2.6.0',
packages=find_packages(),
url='https://github.com/fritzsedlazeck/Sniffles',
license='MIT',
Expand Down
50 changes: 40 additions & 10 deletions src/sniffles/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import sys
import datetime
import argparse
import tempfile
from collections import defaultdict

from typing import Union, Optional
Expand All @@ -21,7 +22,7 @@
from sniffles.region import Region

VERSION = "Sniffles2"
BUILD = "2.5.3"
BUILD = "2.6.0"
SNF_VERSION = "S2_rc4"


Expand All @@ -41,6 +42,8 @@ def tobool(v):


class SnifflesConfig(argparse.Namespace):
GLOBAL: 'SnifflesConfig'

header = f"Sniffles2: A fast structural variant (SV) caller for long-read sequencing data\n Version {BUILD}\n Contact: [email protected]"
example = """ Usage example A - Call SVs for a single sample:
sniffles --input sorted_indexed_alignments.bam --vcf output.vcf
Expand Down Expand Up @@ -91,6 +94,7 @@ def sort(self):
threads: int
contig: Optional[str]
run_id: str
tmp_dir: str

@property
def vcf_output_bgz(self) -> Optional[bool]:
Expand All @@ -101,8 +105,8 @@ def vcf_output_bgz(self) -> Optional[bool]:
path, ext = os.path.splitext(self.vcf)
return ext == ".gz" or ext == ".bgz"


def add_main_args(self, parser):
@staticmethod
def add_main_args(parser):
main_args = parser.add_argument_group("Common parameters")
main_args.add_argument("-i", "--input", metavar="IN", type=str, help="For single-sample calling: A coordinate-sorted and indexed .bam/.cram (BAM/CRAM format) file containing aligned reads. - OR - For multi-sample calling: Multiple .snf files (generated before by running Sniffles2 for individual samples with --snf)", required=True, nargs="+")
main_args.add_argument("-v", "--vcf", metavar="OUT.vcf", type=str, help="VCF output filename to write the called and refined SVs to. If the given filename ends with .gz, the VCF file will be automatically bgzipped and a .tbi index built for it.", required=False)
Expand All @@ -113,12 +117,15 @@ def add_main_args(self, parser):
main_args.add_argument("-t", "--threads", metavar="N", type=int, help="Number of parallel threads to use (speed-up for multi-core CPUs)", default=4)
main_args.add_argument("-c", "--contig", default=None, type=str, help="(Optional) Only process the specified contigs. May be given more than once.", action="append")
main_args.add_argument("--regions", metavar="REGIONS.bed", type=str, help="(Optional) Only process the specified regions.", default=None)
main_args.add_argument("--tmp-dir", type=str, help="(Optional) Directory where temporary files are written, must exist. If it doesn't, default path is used", default="")

minsupport: Union[str, int]
minsvlen: int
minsvlen_screen_ratio: float
max_unknown_pct: float

def add_filter_args(self, parser):
@staticmethod
def add_filter_args(parser):
filter_args = parser.add_argument_group("SV Filtering parameters")
filter_args.add_argument("--minsupport", metavar="auto", type=str, help="Minimum number of supporting reads for a SV to be reported (default: automatically choose based on coverage)", default="auto")
filter_args.add_argument("--minsupport-auto-mult", metavar="0.1/0.025", type=float, help="Coverage based minimum support multiplier for germline mode (only for auto minsupport) ", default=None)
Expand All @@ -143,12 +150,14 @@ def add_filter_args(self, parser):
filter_args.add_argument("--min-alignment-length", metavar="N", type=int, help="Reads with alignments shorter than this length (in bp) will be ignored", default=argparse.SUPPRESS)
filter_args.add_argument("--phase-conflict-threshold", metavar="F", type=float, help="Maximum fraction of conflicting reads permitted for SV phase information to be labelled as PASS (only for --phase)", default=0.1)
filter_args.add_argument("--detect-large-ins", help="Infer insertions that are longer than most reads and therefore are spanned by few alignments only.", metavar="True", type=tobool, default=True)
filter_args.add_argument("--max-unknown-pct", help="Maximum percentage of N for an SV to be emitted.", metavar="0.5", type=float, default=0.5)
# filter_args.add_argument("--large-ins-threshold", metavar="N", type=int, help="Minimum clipping at read ends to be considered a potential large insertion (only with --detect-large-ins)", default=5000)

cluster_binsize: int
cluster_binsize_combine_mult: int

def add_cluster_args(self, parser):
@staticmethod
def add_cluster_args(parser):
cluster_args = parser.add_argument_group("SV Clustering parameters")
cluster_args.add_argument("--cluster-binsize", metavar="N", type=int, help="Initial screening bin size in bp", default=100)
cluster_args.add_argument("--cluster-r", metavar="R", type=float, help="Multiplier for SV start position standard deviation criterion in cluster merging", default=2.5)
Expand All @@ -161,7 +170,8 @@ def add_cluster_args(self, parser):
genotype_ploidy: int
genotype_vcf: str

def add_genotype_args(self, parser):
@staticmethod
def add_genotype_args(parser):
genotype_args = parser.add_argument_group("SV Genotyping parameters")
genotype_args.add_argument("--genotype-ploidy", metavar="N", type=int, help="Sample ploidy (currently fixed at value 2)", default=2)
genotype_args.add_argument("--genotype-error", metavar="N", type=float, help="Estimated false positive rate for leads (relating to total coverage)", default=0.05)
Expand All @@ -182,6 +192,7 @@ def add_genotype_args(self, parser):
combine_pctseq: float
combine_max_inmemory_results: int
combine_support_threshold: int
combine_population: str

@classmethod
def add_multi_args(cls, parser):
Expand All @@ -203,6 +214,7 @@ def add_multi_args(cls, parser):
multi_args.add_argument("--combine-pctseq", default=0.7, type=float, help="Minimum alignment distance as percent of SV length to be merged. Set to 0 to disable alignments for merging.")
multi_args.add_argument("--combine-max-inmemory-results", default=20, type=int, help=argparse.SUPPRESS)
multi_args.add_argument("--combine-support-threshold", default=3, metavar="N", type=int, help="Minimum support for SVs to be considered for multi-sample calling.")
multi_args.add_argument("--combine-population", metavar="population.snf", type=str, help="Name of a population SNF to enable population annotation.")
multi_args.add_argument("--re-qc", metavar="auto", default="auto", type=str, help="Re-QC SVs from SNF files. Set to 0 to disable re-qc of SNF files. Set to 1 to force re-qc. Default of 'auto' will try to fix known errors in SNF files.")

# multi_args.add_argument("--combine-exhaustive", help="(DEV) Disable performance optimization in multi-calling", default=False, action="store_true")
Expand All @@ -211,7 +223,11 @@ def add_multi_args(cls, parser):

allow_overwrite: bool

def add_postprocess_args(self, parser):
@staticmethod
def add_postprocess_args(parser):
"""
Postprocessing arguments
"""
postprocess_args = parser.add_argument_group("SV Postprocessing, QC and output parameters")
postprocess_args.add_argument("--output-rnames", help="Output names of all supporting reads for each SV in the RNAMEs info field", default=False, action="store_true")
postprocess_args.add_argument("--no-consensus", help="Disable consensus sequence generation for insertion SV calls (may improve performance)", default=False, action="store_true")
Expand All @@ -228,7 +244,8 @@ def add_postprocess_args(self, parser):
mosaic_min_reads: int = 3
mosaic_use_strand_thresholds: int = 10

def add_mosaic_args(self, parser):
@staticmethod
def add_mosaic_args(parser):
mosaic_args = parser.add_argument_group("Mosaic calling mode parameters")
mosaic_args.add_argument("--mosaic", help="Set Sniffles run mode to detect rare, somatic and mosaic SVs", default=False, action="store_true")
mosaic_args.add_argument("--mosaic-af-max", help="Maximum allele frequency for which SVs are considered mosaic", metavar="F", default=0.218, type=float)
Expand All @@ -243,8 +260,15 @@ def add_mosaic_args(self, parser):
qc_nm: bool
combine_consensus: bool
low_memory: bool
dev_population_snf: str
dev_population_min_gt: float
dev_debug: int

def add_developer_args(self, parser):
@staticmethod
def add_developer_args(parser):
"""
Developer arguments
"""
developer_args = parser.add_argument_group("Developer parameters")

developer_args.add_argument("--dev-emit-sv-lengths", default=False, action="store_true", help=argparse.SUPPRESS)
Expand Down Expand Up @@ -275,13 +299,16 @@ def add_developer_args(self, parser):
developer_args.add_argument("--cluster-resplit-binsize", metavar="N", type=int, default=20, help=argparse.SUPPRESS)
developer_args.add_argument("--dev-trace-read", default=False, metavar="read_id", type=str, help=argparse.SUPPRESS)
developer_args.add_argument("--dev-split-max-query-distance-mult", metavar="N", type=int, default=5, help=argparse.SUPPRESS)
developer_args.add_argument("--dev-no-qc", default=False, action="store_true", help=argparse.SUPPRESS) # noqc + mapq0 + minAlnLen0
developer_args.add_argument("--dev-no-qc", default=False, action="store_true", help=argparse.SUPPRESS) # noqc + mapq0 + minAlnLen0
developer_args.add_argument("--dev-disable-interblock-threads", default=False, help=argparse.SUPPRESS, action="store_true")
developer_args.add_argument("--dev-combine-medians", default=False, help=argparse.SUPPRESS, action="store_true")
developer_args.add_argument("--dev-monitor-memory", metavar="N", type=int, default=0, help=argparse.SUPPRESS)
developer_args.add_argument("--dev-monitor-filename", metavar="memory.csv", type=str, help=argparse.SUPPRESS)
developer_args.add_argument("--dev-debug-log", default=False, action="store_true", help=argparse.SUPPRESS)
developer_args.add_argument("--dev-progress-log", default=False, action="store_true", help=argparse.SUPPRESS)
developer_args.add_argument("--dev-population-snf", metavar="population.snf", type=str, help=argparse.SUPPRESS)
developer_args.add_argument("--dev-population-min-gt", default=0.75, type=float, help=argparse.SUPPRESS) # min
developer_args.add_argument("--dev-debug", default=0, type=int, help=argparse.SUPPRESS) # Enable debug connection on the given port

# developer_args.add_argument("--qc-strand", help="(DEV)", default=False, action="store_true")

Expand All @@ -302,6 +329,9 @@ def __init__(self, *args, **kwargs):

parser.parse_args(args=args or None, namespace=self)

if not self.tmp_dir or not os.path.exists(self.tmp_dir):
self.tmp_dir = tempfile.gettempdir()

if self.quiet:
sys.stdout = open(os.devnull, "w")

Expand Down
42 changes: 37 additions & 5 deletions src/sniffles/genotyping.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@
Genotyping
"""
import math
from dataclasses import dataclass
from typing import Any

from sniffles.postprocessing import rescale_support
from sniffles.sv import SVCall


class UnknownGenotype(Exception):
class UnknownGenotypeError(Exception):
"""
Unable to determine genotype
"""
Expand All @@ -41,6 +43,23 @@ def likelihood_ratio(q1, q2):
return 0


class UnknownGenotype:
...


@dataclass
class Genotype:
a: int
b: int
qual: int # GQ, 0-60
dr: int
dv: int
phase: Any

UNKNOWN = UnknownGenotype()



class Genotyper:
"""
Generic genotyping
Expand Down Expand Up @@ -85,9 +104,16 @@ def _get_coverage_from_list(self, coverage_list: list = None) -> int:
coverage_list = [each_coverage for each_coverage in coverage_list if each_coverage != 0]

if len(coverage_list) > 0:
return round(sum(coverage_list) / len(coverage_list))
if None in coverage_list:
new_coverage_list = [cov_value for cov_value in coverage_list if cov_value is not None]
if len(new_coverage_list) > 0:
return round(sum(new_coverage_list) / len(new_coverage_list))
else:
raise UnknownGenotypeError()
else:
return round(sum(coverage_list) / len(coverage_list))
else:
raise UnknownGenotype()
raise UnknownGenotypeError()

def _filter_by_z_score(self, z_score: float) -> bool:
"""
Expand All @@ -106,7 +132,7 @@ def calculate(self):
support = self._calculate_support()
try:
coverage = self._calculate_coverage(support)
except UnknownGenotype:
except UnknownGenotypeError:
return

if support > coverage:
Expand Down Expand Up @@ -191,7 +217,13 @@ def _calculate_coverage(self, support: int) -> int:


class DeletionGenotyper(Genotyper):
...

def _calculate_coverage(self, support: int) -> int:
svcall = self.svcall
if support_sa := svcall.get_info('SUPPORT_SA'):
return self._get_coverage_from_list([svcall.coverage_start + support_sa, svcall.coverage_center + support_sa, svcall.coverage_end + support_sa])
else:
return super()._calculate_coverage(support)


GENOTYPER_BY_TYPE = {
Expand Down
4 changes: 2 additions & 2 deletions src/sniffles/leadprov.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

@dataclass
class Lead:
read_id: int = None
read_id: int = None # or tuple[int, str, str] for phased reads, with (read_id, HP, PS)
read_qname: str = None
contig: str = None
ref_start: int = None
Expand Down Expand Up @@ -440,7 +440,7 @@ def read_itersplits(read_id, read, contig, config, read_nm):
if trace_read:
print(f"[DEV_TRACE_READ] [0c/4] [LeadProvider.read_itersplits] [{read.query_name}] all_leads: {all_leads}")

sv.classify_splits(read, all_leads, config, contig)
all_leads = sv.classify_splits(read, all_leads, config, contig)

if trace_read:
print(f"[DEV_TRACE_READ] [0c/4] [LeadProvider.read_itersplits] [{read.query_name}] classify_splits(all_leads): {all_leads}")
Expand Down
6 changes: 5 additions & 1 deletion src/sniffles/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from sniffles import sv
from sniffles.region import Region
from sniffles.result import Result, ErrorResult, CallResult, GenotypeResult, CombineResult
from sniffles.snfp import PopulationSNF


@dataclass
Expand Down Expand Up @@ -93,7 +94,7 @@ def build_leadtab(self):
externals = self.lead_provider.build_leadtab(self.regions if self.regions else [Region(self.contig, self.start, self.end)], self.bam)
return externals, self.lead_provider.read_count

def call_candidates(self, keep_qc_fails, config):
def call_candidates(self, keep_qc_fails, config) -> list[sv.SVCall]:
candidates = []
for svtype in sv.TYPES:
for svcluster in cluster.resolve(svtype, self.lead_provider, config, self.tandem_repeats):
Expand Down Expand Up @@ -343,6 +344,9 @@ def execute(self, worker: 'SnifflesWorker' = None):
if self.config.combine_close_handles:
snf_in.close()

if self.config.combine_population:
self.config.combine_population = PopulationSNF.open(self.config.combine_population)

result = self.result_class(self, [], 0)

# block_groups_keep_threshold=5000
Expand Down
Loading

0 comments on commit fdf6e6d

Please sign in to comment.