Skip to content

Commit

Permalink
Adding wrapper for samtools mpileup.
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Jan 24, 2018
1 parent 6a705e5 commit 13c70cb
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ object Cannoli {
Bowtie2,
Bwa,
Freebayes,
Samtools,
SnpEff)),
CommandGroup("CANNOLI TOOLS", List(InterleaveFastq,
SampleReads)))
Expand Down
116 changes: 116 additions & 0 deletions cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.cannoli.cli

import htsjdk.samtools.ValidationStringency
import org.apache.spark.SparkContext
import org.bdgenomics.adam.models.VariantContext
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs
import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, BAMInFormatter }
import org.bdgenomics.adam.rdd.variant.{ VariantContextRDD, VCFOutFormatter }
import org.bdgenomics.utils.cli._
import org.bdgenomics.utils.misc.Logging
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

object Samtools extends BDGCommandCompanion {
val commandName = "samtools"
val commandDescription = "ADAM Pipe API wrapper for samtools mpileup."

def apply(cmdLine: Array[String]) = {
new Samtools(Args4j[SamtoolsArgs](cmdLine))
}
}

class SamtoolsArgs extends Args4jBase with ADAMSaveAnyArgs with ParquetArgs {
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from.", index = 0)
var inputPath: String = null

@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to, in VCF format.", index = 1)
var outputPath: String = null

@Args4jOption(required = false, name = "-samtools_path", usage = "Path to the samtools executable. Defaults to samtools.")
var samtoolsPath: String = "samtools"

@Args4jOption(required = true, name = "-samtools_reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.")
var referencePath: String = null

@Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to heuermh/samtools.")
var dockerImage: String = "heuermh/samtools"

@Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch samtools. If false, uses the samtools executable path.")
var useDocker: Boolean = false

@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.")
var asSingleFile: Boolean = false

@Args4jOption(required = false, name = "-defer_merging", usage = "Defers merging single file output.")
var deferMerging: Boolean = false

@Args4jOption(required = false, name = "-disable_fast_concat", usage = "Disables the parallel file concatenation engine.")
var disableFastConcat: Boolean = false

@Args4jOption(required = false, name = "-stringency", usage = "Stringency level for various checks; can be SILENT, LENIENT, or STRICT. Defaults to STRICT.")
var stringency: String = "STRICT"

// must be defined due to ADAMSaveAnyArgs, but unused here
var sortFastqOutput: Boolean = false
}

/**
* Samtools.
*/
class Samtools(protected val args: SamtoolsArgs) extends BDGSparkCommand[SamtoolsArgs] with Logging {
val companion = Samtools
val stringency: ValidationStringency = ValidationStringency.valueOf(args.stringency)

def run(sc: SparkContext) {
val input: AlignmentRecordRDD = sc.loadAlignments(args.inputPath, stringency = stringency)

implicit val tFormatter = BAMInFormatter
implicit val uFormatter = new VCFOutFormatter

val samtoolsCommand = if (args.useDocker) {
Seq("docker",
"run",
"--interactive",
"--rm",
args.dockerImage,
"samtools",
"mpileup",
"-",
"--reference",
args.referencePath,
"-v",
"-u").mkString(" ")
} else {
Seq(args.samtoolsPath,
"mpileup",
"-",
"--reference",
args.referencePath,
"-v",
"-u").mkString(" ")
}

val output: VariantContextRDD = input.pipe[VariantContext, VariantContextRDD, BAMInFormatter](samtoolsCommand)
.transform(_.cache())

output.saveAsVcf(args, stringency)
}
}

0 comments on commit 13c70cb

Please sign in to comment.