From 5b5575ecb4c8017e4bf9c426dbb77faca17e89a7 Mon Sep 17 00:00:00 2001 From: Aleksander Jankowski Date: Fri, 17 Nov 2017 15:57:21 +0100 Subject: [PATCH 1/3] Fix a bug in handling supplementary alignments SAM format specification does not impose an order of alignment lines (i.e. supplementary alignments might appear before or after the representative alignment). Here in code, the first alignment is taken as "primary", all the following as "supplementary", hence the flag is_supplementary should be ignored. The decision regarding which alignment is representative is usually arbitrary. --- hicexplorer/hicBuildMatrix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hicexplorer/hicBuildMatrix.py b/hicexplorer/hicBuildMatrix.py index 88fba788..61ce991e 100644 --- a/hicexplorer/hicBuildMatrix.py +++ b/hicexplorer/hicBuildMatrix.py @@ -475,7 +475,7 @@ def get_supplementary_alignment(read, pysam_obj): supplementary_alignment = [] for i in range(len(other_alignments)): _sup = pysam_obj.next() - if _sup.is_supplementary and _sup.qname == read.qname: + if _sup.qname == read.qname: supplementary_alignment.append(_sup) return supplementary_alignment From 9b6adb93d40b98f6768caf43cb17763f95480221 Mon Sep 17 00:00:00 2001 From: Aleksander Jankowski Date: Fri, 17 Nov 2017 16:17:22 +0100 Subject: [PATCH 2/3] Fix a bug in choosing the 5'-most alignment in a chimeric alignment This should depend on the position of the first match (operation M in CIGAR string) in the read sequence. Currently, it depends on the number of CIGAR tuples before the first match. The fix makes e.g. 1H64M36H rank higher than 65S36M. --- hicexplorer/hicBuildMatrix.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hicexplorer/hicBuildMatrix.py b/hicexplorer/hicBuildMatrix.py index 61ce991e..94529bb9 100644 --- a/hicexplorer/hicBuildMatrix.py +++ b/hicexplorer/hicBuildMatrix.py @@ -5,6 +5,7 @@ import time from os import unlink import os +import itertools import pysam from six.moves import xrange @@ -530,7 +531,7 @@ def get_correct_map(primary, supplement_list): cigartuples = read.cigartuples[:] first_mapped.append( - [x for x, cig in enumerate(cigartuples) if cig[0] == 0][0]) + sum(count for op, count in itertools.takewhile(lambda (op, count): op != 0, cigartuples))) # find which read has a cigar string that maps first than any of the # others. idx_min = first_mapped.index(min(first_mapped)) From d2949563238eb4c4a903da2f1af224c09e420137 Mon Sep 17 00:00:00 2001 From: Aleksander Jankowski Date: Wed, 22 Nov 2017 17:33:47 +0100 Subject: [PATCH 3/3] Document the code, and use a predefined constant instead of enigmatic 0 --- hicexplorer/hicBuildMatrix.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hicexplorer/hicBuildMatrix.py b/hicexplorer/hicBuildMatrix.py index 94529bb9..5d3690a5 100644 --- a/hicexplorer/hicBuildMatrix.py +++ b/hicexplorer/hicBuildMatrix.py @@ -530,8 +530,11 @@ def get_correct_map(primary, supplement_list): else: cigartuples = read.cigartuples[:] + # For each read in read_list, calculate the position of the first match (operation M in CIGAR string) in the read sequence. + # The calculation is done by adding up the lengths of all the operations until the first match. + # CIGAR string is a list of tuples of (operation, length). Match is stored as CMATCH. first_mapped.append( - sum(count for op, count in itertools.takewhile(lambda (op, count): op != 0, cigartuples))) + sum(count for op, count in itertools.takewhile(lambda (op, count): op != pysam.CMATCH, cigartuples))) # find which read has a cigar string that maps first than any of the # others. idx_min = first_mapped.index(min(first_mapped))