Skip to content

Commit

Permalink
Merge branch 'refs/heads/master-priv' into master-pub
Browse files Browse the repository at this point in the history
  • Loading branch information
Hermann Romanek committed Dec 16, 2024
2 parents e07575c + 695cf2b commit 23b810f
Show file tree
Hide file tree
Showing 10 changed files with 86 additions and 21 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ You can install Sniffles2 using pip or conda using:

or

`conda install sniffles=2.5.2`
`conda install sniffles=2.5.3`

If you previously installed Sniffles1 using conda and want to upgrade to Sniffles2, you can use:

`conda update sniffles=2.5.2`
`conda update sniffles=2.5.3`

## Requirements
* Python ==3.10.15
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = sniffles
version = 2.5.2
version = 2.5.3
author = Moritz Smolka, Hermann Romanek
author_email = [email protected], [email protected]
description = A fast structural variation caller for long-read sequencing data
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='sniffles',
version='2.5.2',
version='2.5.3',
packages=find_packages(),
url='https://github.com/fritzsedlazeck/Sniffles',
license='MIT',
Expand Down
33 changes: 31 additions & 2 deletions src/sniffles/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from sniffles.region import Region

VERSION = "Sniffles2"
BUILD = "2.5"
BUILD = "2.5.3"
SNF_VERSION = "S2_rc4"


Expand Down Expand Up @@ -168,7 +168,26 @@ def add_genotype_args(self, parser):
genotype_args.add_argument("--sample-id", type=str, help="Custom ID for this sample, used for later multi-sample calling (stored in .snf)", default=None)
genotype_args.add_argument("--genotype-vcf", metavar="IN.vcf", type=str, help="Determine the genotypes for all SVs in the given input .vcf file (forced calling). Re-genotyped .vcf will be written to the output file specified with --vcf.", default=None)

def add_multi_args(self, parser):
combine_high_confidence: float
combine_low_confidence: float
combine_low_confidence_abs: int
combine_null_min_coverage: int
combine_match: int
combine_match_max: int
combine_separate_intra: bool
combine_output_filtered: bool
combine_pair_relabel: bool
combine_pair_relabel_threshold: int
combine_close_handles: bool
combine_pctseq: float
combine_max_inmemory_results: int
combine_support_threshold: int

@classmethod
def add_multi_args(cls, parser):
"""
Arguments for multi-sample calling
"""
multi_args = parser.add_argument_group("Multi-Sample Calling / Combine parameters")
multi_args.add_argument("--combine-high-confidence", metavar="F", type=float, help="Minimum fraction of samples in which a SV needs to have individually passed QC for it to be reported in combined output (a value of zero will report all SVs that pass QC in at least one of the input samples)", default=0.0)
multi_args.add_argument("--combine-low-confidence", metavar="F", type=float, help="Minimum fraction of samples in which a SV needs to be present (failed QC) for it to be reported in combined output", default=0.2)
Expand All @@ -183,6 +202,9 @@ def add_multi_args(self, parser):
multi_args.add_argument("--combine-close-handles", help="Close .SNF file handles after each use. May lower performance, but may be required when maximum number of file handles supported by OS is reached when merging many samples.", default=False, action="store_true")
multi_args.add_argument("--combine-pctseq", default=0.7, type=float, help="Minimum alignment distance as percent of SV length to be merged. Set to 0 to disable alignments for merging.")
multi_args.add_argument("--combine-max-inmemory-results", default=20, type=int, help=argparse.SUPPRESS)
multi_args.add_argument("--combine-support-threshold", default=3, metavar="N", type=int, help="Minimum support for SVs to be considered for multi-sample calling.")
multi_args.add_argument("--re-qc", metavar="auto", default="auto", type=str, help="Re-QC SVs from SNF files. Set to 0 to disable re-qc of SNF files. Set to 1 to force re-qc. Default of 'auto' will try to fix known errors in SNF files.")

# multi_args.add_argument("--combine-exhaustive", help="(DEV) Disable performance optimization in multi-calling", default=False, action="store_true")
# multi_args.add_argument("--combine-relabel-rare", help="(DEV)", default=False, action="store_true")
# multi_args.add_argument("--combine-with-missing", help="(DEV)", default=False, action="store_true")
Expand Down Expand Up @@ -326,6 +348,13 @@ def __init__(self, *args, **kwargs):
if self.dev_no_qc:
self.no_qc = True

if self.re_qc == 'auto':
self.reqc = 'auto'
elif self.re_qc in ('0', '1'):
self.reqc = bool(int(self.re_qc))
else:
util.fatal_error('Invalid value for --re-qc, allowed values are: auto, 0, 1')

if not hasattr(self, 'mapq'):
self.mapq = 0 if self.dev_no_qc else 20
if not hasattr(self, 'min_alignment_length'):
Expand Down
13 changes: 11 additions & 2 deletions src/sniffles/genotyping.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,19 @@ class Genotyper:
_support: int
_coverage: float

def __init__(self, svcall: SVCall, config, phase):
def __init__(self, svcall: SVCall, config, phase: tuple | None):
self.svcall = svcall
self.config = config
self.phase = phase
self.phase = phase if phase is not None else self._get_phase()

def _get_phase(self) -> tuple | None:
"""
Get phasing information from the genotyped SV, or None if it can't be determined.
"""
try:
return self.svcall.genotypes[0][5]
except (KeyError, IndexError):
return None

def _calculate_support(self) -> int:
return self.svcall.support
Expand Down
13 changes: 10 additions & 3 deletions src/sniffles/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def call_candidates(self, keep_qc_fails, config):
self.coverage_average_total = self.coverage_average_fwd + self.coverage_average_rev
return candidates

def finalize_candidates(self, candidates, keep_qc_fails, config):
def finalize_candidates(self, candidates: list['SVCall'], keep_qc_fails, config):
passed = []
for svcall in candidates:
svcall.qc = svcall.qc and postprocessing.qc_sv(svcall, config)
Expand Down Expand Up @@ -350,6 +350,7 @@ def execute(self, worker: 'SnifflesWorker' = None):
bin_min_size = self.config.combine_min_size
bin_max_candidates = max(25, int(len(self.config.snf_input_info) * 0.5))
overlap_abs = self.config.combine_overlap_abs
support_threshold = self.config.combine_support_threshold

sample_internal_ids = set(samples_headers_snf.keys())

Expand All @@ -375,15 +376,21 @@ def execute(self, worker: 'SnifflesWorker' = None):

for svtype in sv.TYPES:
bins = {}
# svcandidates=[]
for sample_internal_id in samples_headers_snf.keys(): # fetch current block for each file
for sample_internal_id, sample_snf in samples_headers_snf.items(): # fetch current block for each file
blocks = samples_blocks[sample_internal_id]
reqc = sample_snf.reqc

if blocks is None:
continue
for block in blocks: # usually only 1 block
for cand in block[svtype]:
# if config.combine_pass_only and (cand.qc==False or cand.filter!="PASS"):
# continue
if cand.support < support_threshold:
continue

if reqc:
postprocessing.genotype_sv(cand, self.config)

cand.sample_internal_id = sample_internal_id

Expand Down
4 changes: 2 additions & 2 deletions src/sniffles/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def qc_sv(svcall: SVCall, config: SnifflesConfig):
elif svcall.coverage_upstream < svcall.coverage_downstream:
if (upstream_downstream_diff > svcall.coverage_upstream / svcall.coverage_downstream or
svcall.coverage_upstream > svcall.coverage_center):
svcall.filter = "COV_CHANGE_DEL"
svcall.filter = "COV_CHANGE_DUP"
return False
else:
pass
Expand Down Expand Up @@ -402,7 +402,7 @@ def binomial_coef(n, k):
return math.factorial(n) / (math.factorial(k) * math.factorial(n - k))


def genotype_sv(svcall: SVCall, config, phase):
def genotype_sv(svcall: SVCall, config, phase: tuple | None = None):
from sniffles.genotyping import GENOTYPER_BY_TYPE, Genotyper

GENOTYPER_BY_TYPE.get(svcall.svtype, Genotyper)(svcall, config, phase).calculate()
Expand Down
24 changes: 22 additions & 2 deletions src/sniffles/snf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,22 @@
# Maintainer: Hermann Romanek
# Contact: [email protected]
#
import logging
import os
import pickle
import json
import gzip
import math
from collections import OrderedDict
from typing import Optional, Union
from functools import cached_property
from typing import Optional

from sniffles import sv
from sniffles.config import SnifflesConfig


log = logging.getLogger(__name__)


class SNFile:
header_length: int
_header: Optional[dict]
Expand All @@ -42,6 +46,22 @@ def index(self) -> dict:
def header(self) -> dict:
return self._header

@cached_property
def reqc(self) -> bool:
"""
Was this file created by an old Version of sniffles we want to redo qc?
"""
if self.config.reqc == 'auto':
try:
build, _, _ = self.header['config']['build'].partition('-')
except (KeyError, AttributeError):
log.warning(f'Unable to determine version of SNF file {self.filename} for auto-reqc')
return True
else:
return build < '2.5.3'
else:
return self.config.reqc

def is_open(self) -> bool:
return self.handle is not False

Expand Down
6 changes: 3 additions & 3 deletions src/sniffles/sniffles
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ def Sniffles2_Main(processes: list[parallel.SnifflesWorker]):
else:
util.fatal_error_main("Failed to determine .snf files to be combined. Please specify either one or more .snf files OR a single .tsv file as input for multi-calling.")

log.info("The following samples will be processed in multi-calling:")
for snf_internal_id, (input_filename, sample_id) in enumerate(input_snfs_sample_ids):
snf_in = snf.SNFile(config, open(input_filename, "rb"), filename=input_filename)
snf_in.read_header()
Expand All @@ -378,7 +379,9 @@ def Sniffles2_Main(processes: list[parallel.SnifflesWorker]):
else:
sample_id, _ = os.path.splitext(os.path.basename(input_filename))
config.snf_input_info.append({"internal_id": snf_internal_id, "sample_id": sample_id, "filename": input_filename})
reqc_info = f' (Rerunning QC)' if snf_in.reqc else ''
snf_in.close()
log.info(f" {input_filename} (sample ID in output VCF='{sample_id}'{reqc_info})")

if not config.combine_consensus:
for info in config.snf_input_info:
Expand Down Expand Up @@ -422,9 +425,6 @@ def Sniffles2_Main(processes: list[parallel.SnifflesWorker]):
task_id = tasks[-1].id + 1

log.info(f"Verified headers for {len(input_snfs_sample_ids)} .snf files.")
log.info("The following samples will be processed in multi-calling:")
for info in config.snf_input_info:
log.info(f" {info['filename']} (sample ID in output VCF='{info['sample_id']}')")

if config.mode != "genotype_vcf" and config.vcf is not None:
vcf_out.write_header(contig_lengths)
Expand Down
6 changes: 3 additions & 3 deletions src/sniffles/sv.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,13 +203,13 @@ def call(self, config, task) -> Optional[SVCall]:
total_count >= config.combine_low_confidence_abs))

if not qc:
if not config.no_qc and n_samples == 1:
if config.no_qc and n_samples == 1:
pass
else:
return None

if not config.combine_output_filtered and not any(cand.qc and cand.filter == "PASS" for cand in self.candidates):
if not config.no_qc and n_samples == 1:
if config.no_qc and n_samples == 1:
pass
else:
return None
Expand Down Expand Up @@ -263,7 +263,7 @@ def call(self, config, task) -> Optional[SVCall]:
genotypes = {0: (cons_a, cons_b, int(sum(consensus_info["qual"]) / consensus_info["count"]),
sum(consensus_info["dr"]), sum(consensus_info["dv"]))}
if cons_a != 1 and cons_b != 1:
if not config.no_qc and n_samples == 1:
if config.no_qc and n_samples == 1:
pass
else:
return None
Expand Down

0 comments on commit 23b810f

Please sign in to comment.