Skip to content

Commit

Permalink
update cuteSV to 1.0.3
Browse files Browse the repository at this point in the history
  • Loading branch information
tjiangHIT committed Nov 28, 2019
1 parent 57632be commit d442666
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 20 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ For more detailed implementation of SV benchmarks, we show an example [here](htt
|--max_split_parts|Maximum number of split segments a read may be aligned before it is ignored.|7|
|--min_mapq|Minimum mapping quality value of alignment to be taken into account.|20|
|--min_read_len|Ignores reads that only report alignments with not longer then bp.|500|
|--min_support|Minimum number of reads that support a SV to be reported.|3|
|--min_support|Minimum number of reads that support a SV to be reported.|10|
|--min_length|Minimum length of SV to be reported.|30|
|--max_cluster_bias_INS|Maximum distance to cluster read together for insertion.|100|
|--diff_ratio_merging_INS|Do not merge breakpoints with basepair identity more than the ratio of *default* for insertion.|0.2|
Expand All @@ -98,6 +98,10 @@ Please cite the manuscript of cuteSV before using these callsets.
---
### Changelog

cuteSV (v1.0.3):
1.Refine the genotyping model.
2.Adjust the threshold value of heterozygosis alleles.

cuteSV (v1.0.2):
1.Improve the genotyping performance and enable it to be default option.
2.Make the description of parameters better.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name = "cuteSV",
version = "1.0.2",
version = "1.0.3",
description = "Long read based human genomic structural variation detection with cuteSV",
author = "Jiang Tao",
author_email = "[email protected]",
Expand Down
6 changes: 5 additions & 1 deletion src/benchmarks/cmp_NA19240.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def pase_base_info(seq):
except:
pass
if i.split('=')[0] == "SVTYPE":
info[i.split('=')[0]] = i.split('=')[1]
info[i.split('=')[0]] = i.split('=')[1][0:3]
return info


Expand All @@ -45,6 +45,8 @@ def load_base(base_path):
chr = seq[0]
pos = int(seq[1])
ALT = seq[4][1:4]
if ALT not in ["INS", "INV", "DEL", "DUP"]:
continue
if ALT == "DUP":
ALT = "INS"
info = pase_base_info(seq[7])
Expand Down Expand Up @@ -184,6 +186,8 @@ def cmp_callsets(base, call, flag, Bias, Offect):
total_base += 1
if i[3] == flag:
tp_base += 1
# else:
# print(flag, svtype, chr, i[0], i[1], i[2])
# logging.info("Base count: %d"%(total_base))
# logging.info("TP-base count: %d"%(tp_base))
logging.info("====%s===="%(callset[flag]))
Expand Down
64 changes: 50 additions & 14 deletions src/benchmarks/eva_trio.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,23 @@ def pase_info(seq):
info[i.split('=')[0]] = i.split('=')[1]
return info

def pase_info_2(seq, seq2):
info = {'SVLEN': 0, 'END': 0, "SVTYPE": '', "SUPPORT": 0, "CHR2": ''}
for i in seq.split(';'):
if i.split('=')[0] in ["SVLEN", "END", "SUPPORT"]:
try:
info[i.split('=')[0]] = abs(int(i.split('=')[1]))
except:
pass
if i.split('=')[0] in ["SVTYPE"]:
info[i.split('=')[0]] = i.split('=')[1]
if i.split('=')[1] == 'BND':
if seq2[0] == 'N':
info['CHR2'] = seq[2].split(':')[0][2:]
else:
info['CHR2'] = seq[2].split(':')[0][1:]
return info

def load_callset_cuteSV(path, filter, confbed):
callset = dict()
file = open(path, 'r')
Expand Down Expand Up @@ -148,29 +165,47 @@ def load_callset_svim(path, filter, confbed):

chr = seq[0]
pos = int(seq[1])
ALT = seq[4][1:4]

if ALT == "DUP":
ALT = "INS"
info = pase_info(seq[7])
info = pase_info_2(seq[7], seq[4])

if len(confbed) > 0:
if chr not in confbed:
continue
if judge_bed(pos, info["END"], confbed[chr]) == 0:
continue

if ALT not in base_call:
base_call[ALT] = dict()

if chr not in base_call[ALT]:
base_call[ALT][chr] = list()
svtype = info["SVTYPE"]
if svtype == "BND":
chr_2, pos_2 = parse_BND(seq[4])
if len(confbed) > 0:
if chr not in confbed:
continue
if judge_bed(pos, pos_2, confbed[chr]) == 0:
continue

if ALT == "INV":
base_call[ALT][chr].append([pos, info["END"] - pos + 1, info["END"], 0])
if svtype not in base_call:
base_call[svtype] = dict()
if chr not in base_call[svtype]:
base_call[svtype][chr] = dict()
if chr_2 not in base_call[svtype][chr]:
base_call[svtype][chr][chr_2] = list()

if info["SUPPORT"] >= filter:
base_call[svtype][chr][chr_2].append([pos, pos_2, 0])

elif info["SVTYPE"] == "INV":
if info["SVTYPE"] not in base_call:
base_call[info["SVTYPE"]] = dict()
if chr not in base_call[info["SVTYPE"]]:
base_call[info["SVTYPE"]][chr] = list()
if info["END"] - pos + 1 >= 50:
base_call[info["SVTYPE"]][chr].append([pos, info["END"] - pos + 1, info["END"], 0])
else:
if info["SVTYPE"] not in base_call:
base_call[info["SVTYPE"]] = dict()
if chr not in base_call[info["SVTYPE"]]:
base_call[info["SVTYPE"]][chr] = list()
if info["SVLEN"] >= 50:
base_call[ALT][chr].append([pos, info["SVLEN"], info["END"], 0])
base_call[info["SVTYPE"]][chr].append([pos, info["SVLEN"], info["END"], 0])
file.close()
return base_call

Expand Down Expand Up @@ -265,6 +300,7 @@ def eva_record(call_A, call_B, bias, offect):
else:
for i in call_A[svtype][chr]:
for j in call_B[svtype][chr]:
# if min(i[2], j[2]) >= max(i[0], j[0]):
if i[0] - offect <= j[0] <= i[2] + offect or i[0] - offect <= j[2] <= i[2] + offect or j[0] - offect <= i[0] <= j[2] + offect:
if min(i[1], j[1])*1.0/max(i[1], j[1]) >= bias:
i[3] = 1
Expand Down Expand Up @@ -377,7 +413,7 @@ def main_ctrl(args):
logging.info("Evaluate accuracy and sensitivity.")
eva_record(call_child, call_father, args.bias, args.offect)
eva_record(call_child, call_mother, args.bias, args.offect)
svtype = ["DEL", "INS", "INV"]
svtype = ["DEL", "INS", "INV", "BND"]
for i in svtype:
child_r, child_tr = statistics_true_possitive(call_child, i)
father_r, father_tr = statistics_true_possitive(call_father, i)
Expand Down
2 changes: 1 addition & 1 deletion src/cuteSV/cuteSV
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def main_ctrl(args):
para = [("%s%s.sigs"%(temporary_dir, svtype), chr, svtype, args.min_support,
args.max_cluster_bias_INV, args.min_size, args.input)]
result.append(analysis_pools.map_async(run_inv, para))
pass
# pass
if svtype == 'DEL':
para = [("%s%s.sigs"%(temporary_dir, svtype), chr, svtype, args.min_support,
args.diff_ratio_merging_DEL, args.max_cluster_bias_DEL, args.diff_ratio_filtering_DEL,
Expand Down
4 changes: 2 additions & 2 deletions src/cuteSV/cuteSV_Description.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
'''
import argparse

VERSION = '1.0.2'
VERSION = '1.0.3'

class cuteSVdp(object):
'''
Expand Down Expand Up @@ -97,7 +97,7 @@ def parseArgs(argv):
GroupSVCluster = parser.add_argument_group('Generation of SV clusters')
GroupSVCluster.add_argument('-s', '--min_support',
help = "Minimum number of reads that support a SV to be reported.[%(default)s]",
default = 3,
default = 10,
type = int)
GroupSVCluster.add_argument('-l', '--min_size',
help = "Minimum size of SV to be reported.[%(default)s]",
Expand Down

0 comments on commit d442666

Please sign in to comment.