Merge branch 'release/4.0.0.beta3'

mummer4 · Sep 30, 2020 · 8460d85 · 8460d85
2 parents 277dac5 + 21c199b
commit 8460d85
Show file tree

Hide file tree

Showing 15 changed files with 290 additions and 106 deletions.
diff --git a/Makefile.am b/Makefile.am
@@ -135,7 +135,7 @@ EXTRA_DIST += src/tigr/gaps.cc
 # what LIBTOOLS does.
 
 # List of scripts to install
-perl_scripts = mummerplot dnadiff promer
+perl_scripts = mummerplot dnadiff promer delta2vcf
 shell_scripts = exact-tandems
 all_scripts = $(perl_scripts) $(shell_scripts)
 
@@ -192,3 +192,8 @@ include swig/Makefile.am
 #########
 include unittests/Makefile.am
 include tests/Makefile.am
+
+###################################
+# Extra flags in development tree #
+###################################
+-include $(srcdir)/development.mk
diff --git a/configure.ac b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([MUMmer], [4.0.0beta2], [gmarcais@umd.edu])
+AC_INIT([MUMmer], [4.0.0beta3], [alekseyz@jhu.edu])
 AC_CANONICAL_HOST
 AC_CONFIG_MACRO_DIR([m4])
 AM_INIT_AUTOMAKE([subdir-objects foreign parallel-tests color-tests])

diff --git a/development.mk b/development.mk
@@ -0,0 +1,11 @@
+AM_CXXFLAGS += -Werror
+
+# Count lines of code
+.PHONY: cloc
+cloc:
+	cloc --force-lang="Ruby,yaggo" --force-lang="make,am" --force-lang="make,mk" \
+	  --exclude-dir="gtest" --not-match-d="^build-" --ignored=cloc_ignored_src_files $(srcdir) 
+
+# Print the value of a variable
+print-%:
+	@echo $($*)
diff --git a/examples/align_cpp/align.cpp b/examples/align_cpp/align.cpp
@@ -16,8 +16,7 @@ int main(int argc, char *argv[]) {
 
   mummer::nucmer::Options o;
   o.minmatch(10).mincluster(10);
-  auto aligns = mummer::nucmer::align_sequences(ref.c_str(), ref.size(),
-                                                qry.c_str(), qry.size(), o);
+  auto aligns = mummer::nucmer::align_sequences(ref, qry, o);
   for(const auto& a : aligns) {
     std::cout << a.sA << ' ' << a.eA << ' ' << a.sB << ' ' << a.eB << ' '
               << a.Errors << ' ' << a.SimErrors << ' ' << a.NonAlphas << '\n';

diff --git a/include/mummer/nucmer.hpp b/include/mummer/nucmer.hpp
@@ -174,7 +174,7 @@ class SequenceAligner {
 inline void align_sequences(const char* reference, size_t reference_len,
                             const char* query,  size_t query_len,
                             std::vector<postnuc::Alignment>& alignments,
-                            Options opts = Options()) {
+                            const Options opts = Options()) {
   SequenceAligner aligner(reference, reference_len, opts);
   aligner.align(query, query_len, alignments);
 }
@@ -185,6 +185,9 @@ inline std::vector<postnuc::Alignment> align_sequences(const char* reference, si
   SequenceAligner aligner(reference, reference_len, opts);
   return aligner.align(query, query_len);
 }
+inline std::vector<postnuc::Alignment> align_sequences(const std::string& reference, const std::string& query, const Options opts = Options()) {
+  return align_sequences(reference.c_str(), reference.size(), query.c_str(), query.size(), opts);
+}
 
 
 // Sequence concatenated and headers

diff --git a/include/mummer/redirect_to_pager.hpp b/include/mummer/redirect_to_pager.hpp
@@ -52,7 +52,8 @@ struct stdio_launch_pager {
 
   void stop_pager() {
     if(pager_handle) {
-      pclose(pager_handle);
+      fclose(stdout); // Close stdout for the pager to notice EOF
+      pclose(pager_handle); // Then wait for the pager to quit
       pager_handle = NULL;
     }
   }

diff --git a/include/mummer/sw_align.hh b/include/mummer/sw_align.hh
@@ -110,19 +110,13 @@ public:
   const_iterator cend() const { return m_vec.cend(); }
 };
 
-struct Score
-{
-  long int value;
-  char used;
-};
-
 struct Node
 {
-  Score S[3];
-  int   m_max;
+  long int values[3];
+  char used[3];
+  int m_max;
 
-  Score& max() { return S[m_max]; }
-  const Score& max() const { return S[m_max]; }
+  long int max() const { return values[m_max]; }
   void max(int i) { m_max = i; }
   int edit() const { return m_max; }
 };

diff --git a/scripts/delta2vcf.pl b/scripts/delta2vcf.pl
@@ -0,0 +1,154 @@
+#!@PERL@
+#this code converts delta file (stdin) to vcf file (stdout)
+$line=<STDIN>;
+chomp($line);
+my ($ref,$qry)=split(/\s+/,$line);
+$line=<STDIN>;
+chomp($line);
+die ("not a valid delta file") unless($line eq "NUCMER");
+
+#loading reference and querysequences
+open(FILE,$ref);
+while($line=<FILE>){
+  chomp($line);
+  if($line=~/^\>/){
+    my @f=split(/\s+/,$line);
+    if(not($seq eq "")){
+      $rseq{$ctg}=$seq;
+      $seq=reverse($seq);
+      $seq=~tr/ACGTNacgtn/TGCANtgcan/;
+      $rseq_rc{$ctg}=$seq;
+    }
+    $ctg=substr($f[0],1);
+    $seq="";
+  }else{
+    $seq.=$line;
+  }
+}
+if(not($seq eq "")){
+  $rseq{$ctg}=$seq;
+  $seq=reverse($seq);
+  $seq=~tr/ACGTNacgtn/TGCANtgcan/;
+  $rseq_rc{$ctg}=$seq;
+}
+open(FILE,$qry);
+while($line=<FILE>){
+  chomp($line);
+  if($line=~/^\>/){
+    my @f=split(/\s+/,$line);
+    if(not($seq eq "")){
+      $qseq{$ctg}=$seq;
+      $seq=reverse($seq);
+      $seq=~tr/ACGTNacgtn/TGCANtgcan/;
+      $qseq_rc{$ctg}=$seq;
+    }
+    $ctg=substr($f[0],1);
+    $seq="";
+  }else{
+    $seq.=$line;
+  }
+}
+if(not($seq eq "")){
+  $qseq{$ctg}=$seq;
+  $seq=reverse($seq);
+  $seq=~tr/ACGTNacgtn/TGCANtgcan/;
+  $qseq_rc{$ctg}=$seq;
+}
+
+print "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\n";
+@outarray=();
+#going through the delta file
+while($line=<STDIN>){
+  chomp($line);
+  my @f=split(/\s+/,$line);
+  print "$line $#f\n" if($DEBUG);
+  if($#f==3){#we found delta header
+    $refname=substr($f[0],1);
+    $qryname=$f[1];
+    $reflen=$f[2];
+    $qrylen=$f[3];
+  }elsif($#f==6){#we found new alignment header
+    $ref_start=$f[0];
+    $ref_end=$f[1];
+    $qry_start=$f[2];
+    $qry_end=$f[3];
+    @delta_begins=();
+    @delta_lengths=();
+    $last_indel=0;
+    while($line=<STDIN>){
+      print $line if($DEBUG);
+      chomp($line);
+      last if($line eq "0");
+      if($line>1||$line<-1){
+        if($line>0){
+          push(@delta_begins,$line+$last_indel);
+          push(@delta_lengths,1);
+          $last_indel+=$line;
+        }else{
+          push(@delta_begins,-(-$line+$last_indel));
+          push(@delta_lengths,1);
+          $last_indel+=-$line;
+        }
+      }else{
+        $delta_lengths[-1]++;
+        $last_indel++;
+      }
+    }
+#we now read the deltas for this alignment into the arrays.  Next line is either another delta or a new alignment
+#let's process the deltas
+    print "Found an alignment $refname $qryname $ref_start $ref_end $qry_start $qry_end\n" if($DEBUG);
+    print join(" ",@delta_begins),"\n" if($DEBUG);
+    print join(" ",@delta_lengths),"\n" if($DEBUG);
+    $delta_index=0;
+    $rseq=$rseq{$refname};
+    if($qry_start<$qry_end){
+      $qseq=$qseq{$qryname};
+    }else{
+      $qseq=$qseq_rc{$qryname};
+      $qry_start=length($qseq)-$qry_start+1;
+      $qry_end=length($qseq)-$qry_end-1;
+    }
+    $ref_pos=$ref_start;
+    $qry_pos=$qry_start;
+    $alignment_coord=1;
+    while($ref_pos<=$ref_end){
+      if($alignment_coord==abs($delta_begins[$delta_index])){#insertion or deletion
+        if($delta_begins[$delta_index]>0){
+          for($j=0;$j<$delta_lengths[$delta_index];$j++){
+            print substr($rseq,$ref_pos-1,1)," *"," $ref_pos $qry_pos $alignment_coord\n" if($DEBUG);
+            $ref_pos++;
+            $alignment_coord++;
+          }
+          push @outarray, {'refname'=>$refname, 'refpos'=>($ref_pos-$delta_lengths[$delta_index]-1),'ref'=>substr($rseq,$ref_pos-$delta_lengths[$delta_index]-2,$delta_lengths[$delta_index]+1),'qry'=>substr($qseq,$qry_pos-2,1) };
+          $delta_index++;
+        }else{#deletion
+          for($j=0;$j<$delta_lengths[$delta_index];$j++){
+            print "* ",substr($qseq,$qry_pos-1,1)," $ref_pos $qry_pos $alignment_coord\n" if($DEBUG);
+            $qry_pos++;
+            $alignment_coord++;
+          }
+          push @outarray,{'refname'=>$refname,'refpos'=>($ref_pos-1),'ref'=>substr($rseq,$ref_pos-2,1),'qry'=>substr($qseq,$qry_pos-$delta_lengths[$delta_index]-2,$delta_lengths[$delta_index]+1)};
+          $delta_index++;
+        }
+      }else{ 
+        print substr($rseq,$ref_pos-1,1)," ",substr($qseq,$qry_pos-1,1)," $ref_pos $qry_pos $alignment_coord\n" if($DEBUG);
+        push @outarray, {'refname'=>$refname,'refpos'=>$ref_pos,'ref'=>substr($rseq,$ref_pos-1,1),'qry'=>substr($qseq,$qry_pos-1,1)} if(not(uc(substr($rseq,$ref_pos-1,1)) eq uc(substr($qseq,$qry_pos-1,1))));
+        $ref_pos++;
+        $qry_pos++;
+        $alignment_coord++;
+      }
+    }#while
+  }#alignment
+}#top loop
+
+#sort output
+@outarray_sorted=sort { $a->{'refname'} cmp $b->{'refname'} || $a->{'refpos'} <=> $b->{'refpos'} } (@outarray);
+#print
+$last_refname="";
+$last_pos="";
+foreach $l(@outarray_sorted){
+print $l->{'refname'},"\t",$l->{'refpos'},"\t\.\t",$l->{'ref'},"\t",$l->{'qry'},"\t40\tPASS\t*\t*\t0:0:0:0:0:2:2:0\n" if(not($last_refname eq $l->{'refname'}) || not($last_pos==$l->{'refpos'}));
+$last_refname=$l->{'refname'};
+$last_pos=$l->{'refpos'};
+}
+