diff --git a/.github/workflows/docker_build_push.yml b/.github/workflows/docker_build_push.yml new file mode 100644 index 000000000..99d36d7f0 --- /dev/null +++ b/.github/workflows/docker_build_push.yml @@ -0,0 +1,42 @@ +name: build and push docker containers + +# Tigger building and testing container only on pull requests +on: + push: + branches: + - master + - develop + tags: + - "**" + +jobs: + main: + name: Docker image build push + runs-on: ubuntu-18.04 + strategy: + fail-fast: true + matrix: + container-name: [align_qc, annotate, coverage_qc, varcall_cnvkit, varcall_py27, varcall_py36] + steps: + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Get branch name + id: get_branch_name + shell: bash + run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF##*/})" + - name: Login to DockerHub + id: docker_login + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Build and push + id: docker_build_push + uses: docker/build-push-action@v2 + with: + push: true + tags: clinicalgenomics/balsamic:${{ steps.get_branch_name.outputs.branch }}-${{ matrix.container-name }} + build-args: + CONTAINER_NAME=${{ matrix.container-name }} diff --git a/.github/workflows/docker_build_test.yml b/.github/workflows/docker_build_test.yml new file mode 100644 index 000000000..39f107b3f --- /dev/null +++ b/.github/workflows/docker_build_test.yml @@ -0,0 +1,45 @@ +name: build and test docker containers + +# Tigger building and testing container only on pull requests +on: + push: + branches-ignore: + - master + - develop + tags-ignore: + - "**" + paths: + - 'BALSAMIC/containers/**' + - 'Dockerfile' + + +jobs: + main: + name: Docker image build test + runs-on: ubuntu-18.04 + strategy: + fail-fast: false + matrix: + container-name: [align_qc, annotate, coverage_qc, varcall_cnvkit, varcall_py27, varcall_py36] + steps: + - name: Git checkout + id: git_checkout + uses: actions/checkout@v2 + - name: Get branch name + id: get_branch_name + shell: bash + run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF##*/})" + - name: Login to DockerHub + id: docker_login + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Build only + id: docker_build_only + uses: docker/build-push-action@v2 + with: + push: false + tags: clinicalgenomics/balsamic:${{ steps.get_branch_name.outputs.branch }}-${{ matrix.container-name }} + build-args: + CONTAINER_NAME=${{ matrix.container-name }} diff --git a/.github/workflows/pytest_and_coveralls.yml b/.github/workflows/pytest_and_coveralls.yml index d5cdeb385..962d175c6 100644 --- a/.github/workflows/pytest_and_coveralls.yml +++ b/.github/workflows/pytest_and_coveralls.yml @@ -30,7 +30,7 @@ jobs: with: activate-environment: balsamic environment-file: BALSAMIC/conda/balsamic.yaml - python-version: ${{ matrix.python-version }} + python-version: ${{ matrix.python-version }} # Install BALSAMIC - name: Install BALSAMIC diff --git a/.gitignore b/.gitignore index 4bef22b3b..ddffda13e 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ demo/demo_run_balsamic/.* BALSAMIC_reference/ BALSAMIC/workflows/umi_workflow BALSAMIC/workflows/umi_workflow/.* +.idea/ diff --git a/BALSAMIC/assets/GenomeAnalysisTK.jar b/BALSAMIC/assets/GenomeAnalysisTK.jar deleted file mode 100644 index 98e0e08d9..000000000 Binary files a/BALSAMIC/assets/GenomeAnalysisTK.jar and /dev/null differ diff --git a/BALSAMIC/assets/gatk_3.8.tar.bz2 b/BALSAMIC/assets/gatk_3.8.tar.bz2 deleted file mode 100644 index 3c2011eb1..000000000 Binary files a/BALSAMIC/assets/gatk_3.8.tar.bz2 and /dev/null differ diff --git a/BALSAMIC/assets/scripts/FilterDuplexUMIconsensus.awk b/BALSAMIC/assets/scripts/FilterDuplexUMIconsensus.awk new file mode 100644 index 000000000..45f4d558b --- /dev/null +++ b/BALSAMIC/assets/scripts/FilterDuplexUMIconsensus.awk @@ -0,0 +1,29 @@ +#! /bin/awk -f + +function min(b) { + return b[1]>b[2]?b[2]:b[1] +} +function max(b) { + return b[1]>b[2]?b[1]:b[2] +} +function sum(b) { + return b[1]+b[2] +} +BEGIN {split(MinR,tmp,","); + mr1=tmp[1]; + mr2=tmp[2] "/dev/stderr" +} +{ if ($0~/^@/) {print;} + else { + for(i=NF;i>=12;i--){ if($i~/^XZ:Z:/) {split($i,a,":");split(a[3],b,","); break;}} + if (sum(b)>=mr1 && min(b)>=mr2 && max(b)>=mr3 ) {reads2++;print;} + reads++; + if (reads % 2000000 == 0) { + print "["strftime("Time = %m/%d/%Y %H:%M:%S", systime())"] Processed "reads" reads. Last read position: "$3":"$4> "/dev/stderr" + } + } +} +END {print "["strftime("Time = %m/%d/%Y %H:%M:%S", systime())"] Done. Processed "reads" reads. "reads2" reads passed the filter."> "/dev/stderr" } diff --git a/BALSAMIC/assets/vcfanno/vcfanno.toml b/BALSAMIC/assets/vcfanno/vcfanno.toml new file mode 100644 index 000000000..a49dba8ff --- /dev/null +++ b/BALSAMIC/assets/vcfanno/vcfanno.toml @@ -0,0 +1,5 @@ +[[annotation]] +file="gnomad.genomes.r2.1.1.sites.vcf.bgz" +fields = ["AF", "AF_popmax"] +ops=["self", "self"] +names=["GNOMADAF", "GNOMADAF_popmax"] \ No newline at end of file diff --git a/BALSAMIC/commands/base.py b/BALSAMIC/commands/base.py index 6cf83c3bf..44d4474a7 100755 --- a/BALSAMIC/commands/base.py +++ b/BALSAMIC/commands/base.py @@ -7,6 +7,7 @@ # Subcommands from BALSAMIC.commands.run.base import run as run_command +from BALSAMIC.commands.init.base import initialize as init_command from BALSAMIC.commands.report.base import report as report_command from BALSAMIC.commands.config.base import config as config_command from BALSAMIC.commands.plugins.base import plugins as plugins_command @@ -67,3 +68,4 @@ def cli(context, loglevel): cli.add_command(report_command) cli.add_command(config_command) cli.add_command(plugins_command) +cli.add_command(init_command) diff --git a/BALSAMIC/commands/config/base.py b/BALSAMIC/commands/config/base.py index fb2dd5de8..e44d300af 100644 --- a/BALSAMIC/commands/config/base.py +++ b/BALSAMIC/commands/config/base.py @@ -2,7 +2,6 @@ import click from BALSAMIC.commands.config.case import case_config as case_command -from BALSAMIC.commands.config.reference import reference as reference_command @click.group() @@ -13,4 +12,3 @@ def config(context): config.add_command(case_command) -config.add_command(reference_command) diff --git a/BALSAMIC/commands/config/case.py b/BALSAMIC/commands/config/case.py index 47c2051de..dba2b362a 100644 --- a/BALSAMIC/commands/config/case.py +++ b/BALSAMIC/commands/config/case.py @@ -1,17 +1,16 @@ +import os import json import logging from pathlib import Path import click -from BALSAMIC.utils.cli import ( - get_sample_dict, - get_panel_chrom, - get_bioinfo_tools_list, - create_fastq_symlink, - generate_graph, -) -from BALSAMIC.utils.constants import CONDA_ENV_PATH, VCF_DICT +from BALSAMIC import __version__ as balsamic_version +from BALSAMIC.utils.cli import (get_sample_dict, get_panel_chrom, + get_bioinfo_tools_version, + create_fastq_symlink, generate_graph) +from BALSAMIC.utils.constants import (CONTAINERS_CONDA_ENV_PATH, VCF_DICT, + BIOINFO_TOOL_ENV) from BALSAMIC.utils.models import BalsamicConfigModel LOG = logging.getLogger(__name__) @@ -19,110 +18,81 @@ @click.command("case", short_help="Create a sample config file from input sample data") -@click.option( - "--case-id", - required=True, - help="Sample id that is used for reporting, \ - naming the analysis jobs, and analysis path", -) -@click.option( - "--umi/--no-umi", - default=True, - show_default=True, - is_flag=True, - help="UMI processing steps for samples with UMI tags", -) -@click.option( - "--umi-trim-length", - default=5, - show_default=True, - type=int, - help="Trim N bases from reads in fastq", -) -@click.option( - "--quality-trim/--no-quality-trim", - default=True, - show_default=True, - is_flag=True, - help="Trim low quality reads in fastq", -) -@click.option( - "--adapter-trim/--no-adapter-trim", - default=True, - show_default=True, - is_flag=True, - help="Trim adapters from reads in fastq", -) -@click.option( - "-r", - "--reference-config", - required=True, - type=click.Path(exists=True, resolve_path=True), - help="Reference config file.", -) -@click.option( - "-p", - "--panel-bed", - type=click.Path(exists=True, resolve_path=True), - required=False, - help="Panel bed file for variant calling.", -) -@click.option( - "-b", - "--background-variants", - type=click.Path(exists=True, resolve_path=True), - required=False, - help="Background set of valid variants for UMI", -) -@click.option( - "--singularity", - type=click.Path(exists=True, resolve_path=True), - required=True, - help="Download singularity image for BALSAMIC", -) -@click.option( - "--analysis-dir", - type=click.Path(exists=True, resolve_path=True), - required=True, - help="Root analysis path to store analysis logs and results. \ - The final path will be analysis-dir/sample-id", -) -@click.option( - "-t", - "--tumor", - type=click.Path(exists=True, resolve_path=True), - required=True, - multiple=True, - help="Fastq files for tumor sample.", -) -@click.option( - "-n", - "--normal", - type=click.Path(exists=True, resolve_path=True), - required=False, - multiple=True, - help="Fastq files for normal sample.", -) +@click.option("--case-id", + required=True, + help="Sample id that is used for reporting, \ + naming the analysis jobs, and analysis path") +@click.option("--umi/--no-umi", + default=True, + show_default=True, + is_flag=True, + help=("UMI processing steps for samples with UMI tags." + "For WGS cases, UMI is always disabled.")) +@click.option("--umi-trim-length", + default=5, + show_default=True, + type=int, + help="Trim N bases from reads in fastq") +@click.option("--quality-trim/--no-quality-trim", + default=True, + show_default=True, + is_flag=True, + help="Trim low quality reads in fastq") +@click.option("--adapter-trim/--no-adapter-trim", + default=True, + show_default=True, + is_flag=True, + help="Trim adapters from reads in fastq") +@click.option("-p", + "--panel-bed", + type=click.Path(exists=True, resolve_path=True), + required=False, + help="Panel bed file for variant calling.") +@click.option("-b", + "--background-variants", + type=click.Path(exists=True, resolve_path=True), + required=False, + help="Background set of valid variants for UMI") +@click.option("--balsamic-cache", + type=click.Path(exists=True, resolve_path=True), + required=True, + help="Path to BALSAMIC cache") +@click.option("--analysis-dir", + type=click.Path(exists=True, resolve_path=True), + required=True, + help="Root analysis path to store analysis logs and results. \ + The final path will be analysis-dir/sample-id" + ) +@click.option("-t", + "--tumor", + type=click.Path(exists=True, resolve_path=True), + required=True, + multiple=True, + help="Fastq files for tumor sample.") +@click.option("-n", + "--normal", + type=click.Path(exists=True, resolve_path=True), + required=False, + multiple=True, + help="Fastq files for normal sample.") +@click.option("--umiworkflow/--no-umiworkflow", + default=True, + show_default=True, + is_flag=True, + help="Enable running UMI workflow") @click.option("--tumor-sample-name", help="Tumor sample name") @click.option("--normal-sample-name", help="Normal sample name") +@click.option("-g", + "--genome-version", + default="hg19", + type=click.Choice(["hg19", "hg38"]), + help=("Genome version to prepare reference. Path to genome" + "will be /genome_version")) @click.pass_context -def case_config( - context, - case_id, - umi, - umi_trim_length, - adapter_trim, - quality_trim, - reference_config, - panel_bed, - background_variants, - singularity, - analysis_dir, - tumor, - normal, - tumor_sample_name, - normal_sample_name, -): +def case_config(context, case_id, umi, umi_trim_length, adapter_trim, + quality_trim, panel_bed, background_variants, analysis_dir, + tumor, normal, umiworkflow, tumor_sample_name, + normal_sample_name, genome_version, balsamic_cache): try: samples = get_sample_dict( @@ -137,19 +107,17 @@ def case_config( ) raise click.Abort() - try: - reference_dict = json.load(open(reference_config))["reference"] - except Exception as e: - LOG.error( - f"Reference config {reference_config} does not follow correct format: {e}" - ) - raise click.Abort() + reference_config = os.path.join(balsamic_cache, + balsamic_version, genome_version, + "reference.json") + with open(reference_config, 'r') as f: + reference_dict = json.load(f)["reference"] config_collection_dict = BalsamicConfigModel( QC={ "quality_trim": quality_trim, "adapter_trim": adapter_trim, - "umi_trim": umi, + "umi_trim": umi if panel_bed else False, "umi_trim_length": umi_trim_length, }, analysis={ @@ -157,13 +125,16 @@ def case_config( "analysis_dir": analysis_dir, "analysis_type": "paired" if normal else "single", "sequencing_type": "targeted" if panel_bed else "wgs", + "umiworkflow": umiworkflow }, reference=reference_dict, - singularity=singularity, + singularity=os.path.join(balsamic_cache, balsamic_version, "containers"), background_variants=background_variants, samples=samples, vcf=VCF_DICT, - bioinfo_tools=get_bioinfo_tools_list(CONDA_ENV_PATH), + bioinfo_tools=BIOINFO_TOOL_ENV, + bioinfo_tools_version=get_bioinfo_tools_version( + BIOINFO_TOOL_ENV, CONTAINERS_CONDA_ENV_PATH), panel={ "capture_kit": panel_bed, "chrom": get_panel_chrom(panel_bed), diff --git a/BALSAMIC/commands/config/reference.py b/BALSAMIC/commands/config/reference.py deleted file mode 100644 index 2f38d5062..000000000 --- a/BALSAMIC/commands/config/reference.py +++ /dev/null @@ -1,111 +0,0 @@ -#! /usr/bin/env python - -import os -import logging -import click -import graphviz -import snakemake -from pathlib import Path - -from BALSAMIC.utils.cli import write_json, merge_json -from BALSAMIC.utils.cli import get_snakefile, get_config -from BALSAMIC.utils.cli import CaptureStdout -from BALSAMIC import __version__ as bv - -LOG = logging.getLogger(__name__) - - -@click.command("reference", - short_help="config workflow for generate reference") -@click.option("-o", - "--outdir", - required=True, - help="output directory for ref files eg: reference") -@click.option("-c", - "--cosmic-key", - required=True, - help="cosmic db authentication key") -@click.option("-s", - "--snakefile", - default=get_snakefile('generate_ref'), - type=click.Path(), - show_default=True, - help="snakefile for reference generation") -@click.option("-d", - "--dagfile", - default="generate_ref_worflow_graph", - show_default=True, - help="DAG file for overview") -@click.option("--singularity", - type=click.Path(), - required=True, - help='Download singularity image for BALSAMIC') -@click.option("-g", - "--genome-version", - default="hg19", - type=click.Choice(["hg19", "hg38"]), - help=("Genome version to prepare reference. Path to genome" - "will be /genome_version")) -@click.pass_context -def reference(context, outdir, cosmic_key, snakefile, dagfile, singularity, - genome_version): - """ Configure workflow for reference generation """ - - LOG.info(f"BALSAMIC started with log level {context.obj['loglevel']}.") - config_path = Path(__file__).parents[2] / "config" - config_path = config_path.absolute() - - balsamic_env = config_path / "balsamic_env.yaml" - rule_directory = Path(__file__).parents[2] - - install_config = dict() - - install_config["conda_env_yaml"] = balsamic_env.as_posix() - install_config["rule_directory"] = rule_directory.as_posix() + "/" - - install_config["singularity"] = dict() - install_config["singularity"]["image"] = Path( - singularity).absolute().as_posix() - - config = dict() - outdir = os.path.abspath(outdir) - config_json = os.path.join(outdir, "config.json") - dagfile_path = os.path.join(outdir, dagfile) - - config["output"] = outdir - if cosmic_key: - config["cosmic_key"] = cosmic_key - - config["genome_version"] = genome_version - - config = merge_json(config, install_config) - - os.makedirs(outdir, exist_ok=True) - - write_json(config, config_json) - LOG.info( - f'Reference generation workflow configured successfully - {config_json}' - ) - - with CaptureStdout() as graph_dot: - snakemake.snakemake(snakefile=snakefile, - dryrun=True, - configfiles=[config_json], - printrulegraph=True) - - graph_title = "_".join(['BALSAMIC', bv, 'Generate reference']) - graph_dot = "".join(graph_dot).replace( - 'snakemake_dag {', - 'BALSAMIC { label="' + graph_title + '";labelloc="t";') - graph_obj = graphviz.Source(graph_dot, - filename=dagfile_path, - format="pdf", - engine="dot") - - try: - graph_pdf = graph_obj.render() - LOG.info( - f'Reference workflow graph generated successfully - {graph_pdf}') - except Exception: - LOG.error(f'Reference workflow graph generation failed') - raise click.Abort() diff --git a/BALSAMIC/commands/init/__init__.py b/BALSAMIC/commands/init/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/BALSAMIC/commands/init/base.py b/BALSAMIC/commands/init/base.py new file mode 100644 index 000000000..7198dc884 --- /dev/null +++ b/BALSAMIC/commands/init/base.py @@ -0,0 +1,201 @@ +import os +import re +import logging +import subprocess +from pathlib import Path + +import click +import graphviz +import snakemake + +from BALSAMIC.utils.constants import BIOINFO_TOOL_ENV, BALSAMIC_DOCKER_PATH, VALID_CONTAINER_CONDA_NAME +from BALSAMIC.utils.cli import write_json, merge_json, CaptureStdout, get_snakefile, SnakeMake +from BALSAMIC import __version__ as balsamic_version + +LOG = logging.getLogger(__name__) + + +@click.command( + "init", + short_help="Download matching version for container and build reference") +@click.option("-o", + "--outdir", + "--out-dir", + required=True, + help=("Output directory for ref files." + "This path will be used as base path for files")) +@click.option("-v", + "--container-version", + show_default=True, + default=balsamic_version, + type=click.Choice(["develop", "master", balsamic_version]), + help="Container for BALSAMIC version to download") +@click.option('-f', + '--force', + show_default=True, + default=False, + is_flag=True, + help="Force re-downloading all containers") +@click.option("-c", + "--cosmic-key", + required=True, + help="cosmic db authentication key") +@click.option("-s", + "--snakefile", + default=get_snakefile('generate_ref'), + type=click.Path(), + show_default=True, + help="snakefile for reference generation") +@click.option("-d", + "--dagfile", + default="generate_ref_worflow_graph", + show_default=True, + help="DAG file for overview") +@click.option("-g", + "--genome-version", + default="hg19", + type=click.Choice(["hg19", "hg38"]), + help=("Genome version to prepare reference. Path to genome" + "will be /genome_version")) +@click.option( + '-r', + '--run-analysis', + show_default=True, + default=False, + is_flag=True, + help=("By default balsamic run_analysis will run in dry run mode." + "Raise this flag to make the actual analysis")) +@click.option( + '-f', + '--force-all', + show_default=True, + default=False, + is_flag=True, + help='Force run all analysis. This is same as snakemake --forceall') +@click.option('--snakemake-opt', + multiple=True, + help='Pass these options directly to snakemake') +@click.option('-q', + '--quiet', + default=False, + is_flag=True, + help=('Instruct snakemake to be quiet!' + 'No output will be printed')) +@click.pass_context +def initialize(context, outdir, container_version, force, cosmic_key, + snakefile, dagfile, genome_version, run_analysis, force_all, + quiet, snakemake_opt): + """ + Initialize various resources after first installation. + - Pull container(s) for BALSAMIC according to matching version + - Download and build a reference + """ + LOG.info("BALSAMIC started with log level %s" % context.obj['loglevel']) + + # resolve outdir to absolute path + outdir = Path(outdir).resolve() + + container_outdir = Path(outdir, balsamic_version, "containers") + pattern = re.compile(r"^(\d+\.)?(\d+\.)?(\*|\d+)$") + if pattern.findall(container_version): + docker_image_base_name = "release_v{}".format(container_version) + else: + docker_image_base_name = container_version + + for image_suffix in VALID_CONTAINER_CONDA_NAME: + + container_stub_url = "{}:{}-{}".format( + BALSAMIC_DOCKER_PATH, docker_image_base_name, image_suffix) + + # Pull container + LOG.info("Singularity image source: {}".format(container_stub_url)) + + # Set container name according to above docker image name + Path(container_outdir).mkdir(parents=True, exist_ok=True) + image_name = Path(container_outdir, + "{}.sif".format(image_suffix)).as_posix() + LOG.info("Image will be downloaded to {}".format(image_name)) + LOG.info("Starting download. This process can take some time...") + + cmd = ["singularity", "pull", "--name", f"{image_name}"] + if force: + cmd.append("--force") + cmd.append(container_stub_url) + + LOG.info("The following command will run: {}".format(" ".join(cmd))) + if run_analysis: + subprocess.run(" ".join(cmd), shell=True) + + config_path = Path(__file__).parents[2] / "config" + config_path = config_path.absolute() + + rule_directory = Path(__file__).parents[2] + + config_dict = dict() + config_dict["bioinfo_tools"] = BIOINFO_TOOL_ENV + config_dict["rule_directory"] = rule_directory.as_posix() + "/" + config_dict["singularity"] = dict() + config_dict["singularity"]["image"] = container_outdir.as_posix() + + reference_outdir = Path(outdir, balsamic_version, genome_version) + Path(reference_outdir).mkdir(parents=True, exist_ok=True) + config_json = Path(reference_outdir, "config.json").as_posix() + dagfile_path = Path(reference_outdir, dagfile).as_posix() + + config_dict["output"] = reference_outdir.as_posix() + if cosmic_key: + config_dict["cosmic_key"] = cosmic_key + + config_dict["genome_version"] = genome_version + + write_json(config_dict, config_json) + LOG.info('Reference generation workflow configured successfully - %s' % + config_json) + + with CaptureStdout() as graph_dot: + snakemake.snakemake(snakefile=snakefile, + dryrun=True, + configfiles=[config_json], + printrulegraph=True) + + graph_title = "_".join( + ['BALSAMIC', balsamic_version, 'Generate reference']) + graph_dot = "".join(graph_dot).replace( + 'snakemake_dag {', + 'BALSAMIC { label="' + graph_title + '";labelloc="t";') + graph_obj = graphviz.Source(graph_dot, + filename=dagfile_path, + format="pdf", + engine="dot") + + try: + graph_pdf = graph_obj.render() + LOG.info('Reference workflow graph generated successfully - %s ' % + graph_pdf) + except Exception: + LOG.error('Reference workflow graph generation failed') + raise click.Abort() + + LOG.info("Reference generation workflow started") + + # Singularity bind path + bind_path = list() + bind_path.append(config_dict['output']) + bind_path.append(config_dict['rule_directory']) + + # Construct snakemake command to run workflow + balsamic_run = SnakeMake() + balsamic_run.working_dir = config_dict['output'] + balsamic_run.snakefile = snakefile + balsamic_run.configfile = config_json + balsamic_run.run_mode = "local" + balsamic_run.forceall = force_all + balsamic_run.run_analysis = run_analysis + balsamic_run.quiet = quiet + balsamic_run.sm_opt = list(snakemake_opt) + ["--cores", "1"] + + # Always use singularity + balsamic_run.use_singularity = True + balsamic_run.singularity_bind = bind_path + + subprocess.run(balsamic_run.build_cmd(), shell=True) diff --git a/BALSAMIC/commands/plugins/base.py b/BALSAMIC/commands/plugins/base.py index 3c1895cfd..620817335 100644 --- a/BALSAMIC/commands/plugins/base.py +++ b/BALSAMIC/commands/plugins/base.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import click from BALSAMIC.commands.plugins.scout import scout as scout_command diff --git a/BALSAMIC/commands/plugins/cov_plot.py b/BALSAMIC/commands/plugins/cov_plot.py index 1e4e97729..bf33141aa 100644 --- a/BALSAMIC/commands/plugins/cov_plot.py +++ b/BALSAMIC/commands/plugins/cov_plot.py @@ -1,14 +1,6 @@ -import re -import os import logging -import glob -import json -import yaml import click -from BALSAMIC.utils.rule import get_result_dir -#from BALSAMIC.utils import plot_cov - LOG = logging.getLogger(__name__) diff --git a/BALSAMIC/commands/plugins/scout.py b/BALSAMIC/commands/plugins/scout.py index 5f2993260..eb5f45622 100644 --- a/BALSAMIC/commands/plugins/scout.py +++ b/BALSAMIC/commands/plugins/scout.py @@ -1,11 +1,8 @@ import os import logging -import glob import json import yaml import click -import shutil -import sys import datetime from BALSAMIC.utils.rule import get_result_dir diff --git a/BALSAMIC/commands/report/deliver.py b/BALSAMIC/commands/report/deliver.py index f7029267c..3c003dc6f 100644 --- a/BALSAMIC/commands/report/deliver.py +++ b/BALSAMIC/commands/report/deliver.py @@ -1,28 +1,21 @@ import os import sys import logging -import glob import json import yaml import click -import copy import snakemake import datetime import subprocess -from collections import defaultdict -from yapf.yapflib.yapf_api import FormatFile from pathlib import Path -from BALSAMIC.utils.cli import get_from_two_key -from BALSAMIC.utils.cli import merge_dict_on_key from BALSAMIC.utils.cli import get_file_extension -from BALSAMIC.utils.cli import find_file_index from BALSAMIC.utils.cli import write_json from BALSAMIC.utils.cli import get_snakefile -from BALSAMIC.utils.cli import CaptureStdout from BALSAMIC.utils.cli import SnakeMake from BALSAMIC.utils.cli import convert_deliverables_tags from BALSAMIC.utils.rule import get_result_dir +from BALSAMIC.utils.constants import VCF_DICT from BALSAMIC.utils.exc import BalsamicError from BALSAMIC.utils.qc_metrics import get_qc_metrics from BALSAMIC.utils.qc_report import render_html, report_data_population @@ -77,13 +70,18 @@ default="a", show_default=True, help=( - "a: append rules-to-deliver to current delivery " - "options. or r: reset current rules to delivery to only the ones specified" - ), + 'a: append rules-to-deliver to current delivery ' + 'options. or r: reset current rules to delivery to only the ones specified' + )) +@click.option( + '--disable-variant-caller', + help= + f'Run workflow with selected variant caller(s) disable. Use comma to remove multiple variant callers. Valid ' + f'values are: {list(VCF_DICT.keys())}', ) @click.pass_context def deliver(context, sample_config, analysis_type, rules_to_deliver, - delivery_mode, sample_id_map, case_id_map): + delivery_mode, disable_variant_caller, sample_id_map, case_id_map): """ cli for deliver sub-command. Writes .hk in result_directory. @@ -166,10 +164,8 @@ def deliver(context, sample_config, analysis_type, rules_to_deliver, report = SnakeMake() report.case_name = case_name report.working_dir = os.path.join( - sample_config_dict["analysis"]["analysis_dir"], - sample_config_dict["analysis"]["case_id"], - "BALSAMIC_run", - ) + sample_config_dict['analysis']['analysis_dir'], + sample_config_dict['analysis']['case_id'], 'BALSAMIC_run') report.report = report_file_name report.configfile = sample_config report.snakefile = snakefile @@ -177,6 +173,8 @@ def deliver(context, sample_config, analysis_type, rules_to_deliver, report.use_singularity = False report.run_analysis = True report.sm_opt = ["--quiet"] + if disable_variant_caller: + report.disable_variant_caller = disable_variant_caller cmd = sys.executable + " -m " + report.build_cmd() subprocess.check_output(cmd.split(), shell=False) LOG.info(f"Workflow report file {report_file_name}") diff --git a/BALSAMIC/commands/report/status.py b/BALSAMIC/commands/report/status.py index 384ce08c2..1de30530e 100644 --- a/BALSAMIC/commands/report/status.py +++ b/BALSAMIC/commands/report/status.py @@ -1,26 +1,16 @@ import os -import sys import logging -import glob import json -import yaml import click -import copy import snakemake + +from pathlib import Path from colorclass import Color -from collections import defaultdict -from yapf.yapflib.yapf_api import FormatFile -from BALSAMIC.utils.cli import get_from_two_key -from BALSAMIC.utils.cli import merge_dict_on_key -from BALSAMIC.utils.cli import get_file_extension -from BALSAMIC.utils.cli import find_file_index -from BALSAMIC.utils.cli import write_json from BALSAMIC.utils.cli import get_snakefile from BALSAMIC.utils.cli import CaptureStdout from BALSAMIC.utils.cli import get_file_status_string from BALSAMIC.utils.rule import get_result_dir -from BALSAMIC.utils.exc import BalsamicError LOG = logging.getLogger(__name__) @@ -64,6 +54,21 @@ def status(context, sample_config, show_only_missing, print_files): sequencing_type = sample_config_dict["analysis"]["sequencing_type"] snakefile = get_snakefile(analysis_type, sequencing_type) + if os.path.isfile(os.path.join(result_dir, "analysis_finish")): + snakemake.snakemake( + snakefile=snakefile, + config={ + "benchmark_plots": "True", + }, + dryrun=True, + configfiles=[sample_config], + quiet=True, + ) + else: + LOG.warning( + "analysis_finish file is missing. Analysis might be incomplete or running." + ) + with CaptureStdout() as summary: snakemake.snakemake( snakefile=snakefile, @@ -75,11 +80,6 @@ def status(context, sample_config, show_only_missing, print_files): summary = [i.split("\t") for i in summary] summary_dict = [dict(zip(summary[0], value)) for value in summary[1:]] - if not os.path.isfile(os.path.join(result_dir, "analysis_finish")): - LOG.warning( - "analysis_finish file is missing. Analysis might be incomplete or running." - ) - existing_files = set() missing_files = set() diff --git a/BALSAMIC/commands/run/analysis.py b/BALSAMIC/commands/run/analysis.py index 68973d4b1..8cefd2e77 100644 --- a/BALSAMIC/commands/run/analysis.py +++ b/BALSAMIC/commands/run/analysis.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import sys import os import logging @@ -11,8 +10,9 @@ # CLI commands and decorators from BALSAMIC.utils.cli import (createDir, get_schedulerpy, get_snakefile, - SnakeMake, get_config, get_fastq_bind_path) -from BALSAMIC.utils.constants import ANALYSIS_TYPES, VCF_DICT + SnakeMake, get_config, get_fastq_bind_path, + job_id_dump_to_yaml) +from BALSAMIC.utils.constants import ANALYSIS_TYPES, VCF_DICT, BALSAMIC_SCRIPTS LOG = logging.getLogger(__name__) @@ -23,60 +23,71 @@ '--analysis-type', required=False, type=click.Choice(ANALYSIS_TYPES), - help='Type of analysis to run from input config file.\ - By default it will read from config file, but it will override config file \ - if it is set here.') -@click.option( - '-S', - '--snake-file', - type=click.Path(), - show_default=True, - help='Input for a custom snakefile. WARNING: This is for internal testing,\ - and should not be used. Providing a snakefile supersedes analysis_type option.' -) + help=('Type of analysis to run from input config file.' + 'By default it will read from config file, but it' + 'will override config file if it is set here.')) +@click.option('-S', + '--snake-file', + type=click.Path(), + show_default=True, + help=('Input for a custom snakefile. WARNING: ' + 'This is for internal testing, and should ' + 'not be used. Providing a snakefile supersedes' + 'analysis_type option.')) @click.option('-s', '--sample-config', required=True, type=click.Path(), help='Sample json config file.') -@click.option( - '--run-mode', - show_default=True, - default='cluster', - type=click.Choice(["local", "cluster"]), - help='Run mode to use. By default SLURM will be used to run the analysis.\ - But local runner also available for local computing') +@click.option('--run-mode', + show_default=True, + default='cluster', + type=click.Choice(["local", "cluster"]), + help=('Run mode to use. By default SLURM will be used to ' + 'run the analysis. But local runner also available ' + 'for local computing')) @click.option('-c', '--cluster-config', show_default=True, default=get_config('cluster'), type=click.Path(), help='cluster config json file. (eg- SLURM, QSUB)') +@click.option('--dragen', + is_flag=True, + default=False, + help="Enable dragen variant caller") @click.option('-p', '--profile', default="slurm", type=click.Choice(["slurm", "qsub"]), help="cluster profile to submit jobs") @click.option( - '-r', - '--run-analysis', - show_default=True, + '--benchmark', default=False, is_flag=True, - help='By default balsamic run_analysis will run in dry run mode. \ - Raise thise flag to make the actual analysis') + help= + "Profile slurm jobs using the value of this option. Make sure you have slurm profiler enabled in your HPC." +) +@click.option('-r', + '--run-analysis', + show_default=True, + default=False, + is_flag=True, + help=('By default balsamic run_analysis will run in ' + 'dry run mode. Raise thise flag to make the ' + 'actual analysis')) @click.option('--qos', type=click.Choice(['low', 'normal', 'high']), show_default=True, default="low", help='QOS for sbatch jobs. Passed to ' + get_schedulerpy()) -@click.option( - '-f', - '--force-all', - show_default=True, - default=False, - is_flag=True, - help='Force run all analysis. This is same as snakemake --forceall') +@click.option('-f', + '--force-all', + show_default=True, + default=False, + is_flag=True, + help=('Force run all analysis. This is same as ' + 'snakemake --forceall')) @click.option('--snakemake-opt', multiple=True, help='Pass these options directly to snakemake') @@ -87,20 +98,33 @@ @click.option( '--mail-user', help='cluster mail user to send out email. e.g.: slurm_mail_user') -@click.option( - '--mail-type', - type=click.Choice( - ['NONE', 'BEGIN', 'END', 'FAIL', 'REQUEUE', 'ALL', 'TIME_LIMIT']), - help='cluster mail type to send out email. \ - This will be applied to all jobs and override snakemake settings.' -) +@click.option('-q', + '--quiet', + default=False, + is_flag=True, + help=('Instruct snakemake to be quiet!' + 'No output will be printed')) +@click.option('--mail-type', + type=click.Choice([ + 'NONE', + 'BEGIN', + 'END', + 'FAIL', + 'REQUEUE', + 'ALL', + 'TIME_LIMIT', + ]), + help=('cluster mail type to send out email. This will ' + 'be applied to all jobs and override snakemake settings.')) @click.option('--disable-variant-caller', - type=click.Choice(list(VCF_DICT.keys())), - help='Run workflow with selected variant caller disable.') + help=(f'Run workflow with selected variant caller(s) disable.' + f'Use comma to remove multiple variant callers. Valid ' + f'values are: {list(VCF_DICT.keys())}')) @click.pass_context def analysis(context, snake_file, sample_config, run_mode, cluster_config, run_analysis, force_all, snakemake_opt, mail_type, mail_user, - account, analysis_type, qos, profile, disable_variant_caller): + account, analysis_type, qos, profile, disable_variant_caller, + quiet, dragen, benchmark): """ Runs BALSAMIC workflow on the provided sample's config file """ @@ -153,6 +177,9 @@ def analysis(context, snake_file, sample_config, run_mode, cluster_config, bind_path.append(os.path.commonpath(sample_config['reference'].values())) if 'panel' in sample_config: bind_path.append(sample_config.get('panel').get('capture_kit')) + if 'background_variants' in sample_config: + bind_path.append(sample_config.get('background_variants')) + bind_path.append(BALSAMIC_SCRIPTS) bind_path.append(sample_config['analysis']['analysis_dir']) bind_path.extend( get_fastq_bind_path(sample_config["analysis"]["fastq_path"])) @@ -179,27 +206,24 @@ def analysis(context, snake_file, sample_config, run_mode, cluster_config, balsamic_run.mail_user = mail_user balsamic_run.forceall = force_all balsamic_run.run_analysis = run_analysis + balsamic_run.quiet = quiet # Always use singularity balsamic_run.use_singularity = True balsamic_run.singularity_bind = bind_path balsamic_run.sm_opt = snakemake_opt + balsamic_run.slurm_profiler = benchmark if disable_variant_caller: balsamic_run.disable_variant_caller = disable_variant_caller - try: - cmd = sys.executable + " -m " + balsamic_run.build_cmd() - subprocess.run(cmd, shell=True) - except Exception as e: - print(e) - raise click.Abort() + if dragen: + balsamic_run.dragen = dragen - if run_analysis: - jobid_file = os.path.join( + cmd = sys.executable + " -m " + balsamic_run.build_cmd() + subprocess.run(cmd, shell=True) + + if run_analysis and run_mode == 'cluster': + jobid_dump = os.path.join( logpath, sample_config["analysis"]["case_id"] + ".sacct") - jobid_dump = os.path.join(resultpath, profile + "_jobids.yaml") - with open(jobid_file, "r") as jobid_in, open(jobid_dump, - "w") as jobid_out: - jobid_list = jobid_in.read().splitlines() - yaml.dump({sample_config['analysis']['case_id']: jobid_list}, - jobid_out) + jobid_yaml = os.path.join(resultpath, profile + "_jobids.yaml") + job_id_dump_to_yaml(jobid_dump, jobid_yaml, case_name) diff --git a/BALSAMIC/commands/run/base.py b/BALSAMIC/commands/run/base.py index 752c8f2fa..c1c999398 100644 --- a/BALSAMIC/commands/run/base.py +++ b/BALSAMIC/commands/run/base.py @@ -1,9 +1,6 @@ -#! /usr/bin/env python - import click from BALSAMIC.commands.run.analysis import analysis as run_analysis_cmd -from BALSAMIC.commands.run.reference import reference as run_reference_cmd @click.group() @@ -14,4 +11,3 @@ def run(context): run.add_command(run_analysis_cmd) -run.add_command(run_reference_cmd) diff --git a/BALSAMIC/commands/run/reference.py b/BALSAMIC/commands/run/reference.py deleted file mode 100644 index ed3695404..000000000 --- a/BALSAMIC/commands/run/reference.py +++ /dev/null @@ -1,86 +0,0 @@ -#! /usr/bin/env python - -import json -import subprocess -import logging -import click - -from BALSAMIC.utils.cli import get_schedulerpy -from BALSAMIC.utils.cli import get_snakefile, SnakeMake, get_config - -LOG = logging.getLogger(__name__) - - -@click.command('reference', short_help="Run the GenerateRef workflow") -@click.option('-s', - "--snakefile", - default=get_snakefile('generate_ref'), - help="snakefile for reference generation") -@click.option('-c', - '--configfile', - required=True, - help="Config file to run the workflow") -@click.option('--run-mode', - default='local', - type=click.Choice(["local"]), - help="Run mode to use. Only local supported for this.") -@click.option('--cluster-config', - show_default=True, - default=get_config('cluster'), - type=click.Path(), - help='SLURM config json file.') -@click.option( - '-l', - '--log-file', - type=click.Path(), - help='Log file output for BALSAMIC. This is raw log output from snakemake.' -) -@click.option( - '-r', - '--run-analysis', - show_default=True, - default=False, - is_flag=True, - help='By default balsamic run_analysis will run in dry run mode. \ - Raise thise flag to make the actual analysis') -@click.option( - '-f', - '--force-all', - show_default=True, - default=False, - is_flag=True, - help='Force run all analysis. This is same as snakemake --forceall') -@click.option('--snakemake-opt', - multiple=True, - help='Pass these options directly to snakemake') -@click.pass_context -def reference(context, snakefile, configfile, run_mode, cluster_config, - log_file, run_analysis, force_all, snakemake_opt): - """ Run generate reference workflow """ - LOG.info(f"BALSAMIC started with log level {context.obj['loglevel']}.") - LOG.info("Reference generation workflow started") - - with open(configfile, "r") as config_fh: - config = json.load(config_fh) - - # Singularity bind path - bind_path = list() - bind_path.append(config['output']) - bind_path.append(config['conda_env_yaml']) - bind_path.append(config['rule_directory']) - - # Construct snakemake command to run workflow - balsamic_run = SnakeMake() - balsamic_run.working_dir = config['output'] - balsamic_run.snakefile = snakefile - balsamic_run.configfile = configfile - balsamic_run.run_mode = run_mode - balsamic_run.forceall = force_all - balsamic_run.run_analysis = run_analysis - balsamic_run.sm_opt = snakemake_opt - # Always use singularity - balsamic_run.use_singularity = True - balsamic_run.singularity_bind = bind_path - balsamic_run.sm_opt = snakemake_opt - - subprocess.run(balsamic_run.build_cmd(), shell=True) diff --git a/BALSAMIC/conda/annotate.yaml b/BALSAMIC/conda/annotate.yaml index c72955973..c792216ae 100644 --- a/BALSAMIC/conda/annotate.yaml +++ b/BALSAMIC/conda/annotate.yaml @@ -6,3 +6,4 @@ channels: dependencies: - ensembl-vep=100.2 - bcftools>=1.10 + - vcfanno=0.3.2 diff --git a/BALSAMIC/conda/varcall_py27.yaml b/BALSAMIC/conda/varcall_py27.yaml index b956b1f67..8ded41a94 100644 --- a/BALSAMIC/conda/varcall_py27.yaml +++ b/BALSAMIC/conda/varcall_py27.yaml @@ -6,7 +6,7 @@ channels: dependencies: - python=2.7 - strelka=2.8.4 - - manta=1.3.0 + - manta=1.6.0 - bcftools=1.10.2 - tabix=0.2.6 - samtools=1.9.0 diff --git a/BALSAMIC/config/balsamic_env.yaml b/BALSAMIC/config/balsamic_env.yaml index 71f14bd50..9837c5138 100644 --- a/BALSAMIC/config/balsamic_env.yaml +++ b/BALSAMIC/config/balsamic_env.yaml @@ -9,6 +9,7 @@ align_qc: - csvkit annotate: - ensembl-vep + - vcfanno coverage_qc: - sambamba - mosdepth diff --git a/BALSAMIC/config/cluster.json b/BALSAMIC/config/cluster.json index 640e7e660..dde7068ee 100644 --- a/BALSAMIC/config/cluster.json +++ b/BALSAMIC/config/cluster.json @@ -47,6 +47,11 @@ "time": "12:00:00", "n": 10 }, + "dragen_align_call_tumor_only": { + "time": "10:00:00", + "n": 24, + "partition": "dragen" + }, "cnvkit_single": { "time": "12:00:00", "n": 10 @@ -163,7 +168,7 @@ "time": "24:00:00", "n": 24 }, - "sentioen_filter_TNscope": { + "sentieon_filter_TNscope": { "time": "24:00:00", "n": 24 }, @@ -190,5 +195,45 @@ "vardict_tumor_only": { "time": "10:00:00", "n": 10 + }, + "sentieon_bwa_umiextract": { + "time": "8:00:00", + "n": 24 + }, + "sentieon_consensuscall_umi": { + "time": "6:00:00", + "n": 24 + }, + "sentieon_bwa_umiconsensus": { + "time": "8:00:00", + "n": 24 + }, + "sentieon_consensusfilter_umi": { + "time": "4:00:00", + "n": 10 + }, + "sentieon_tnscope_umi": { + "time": "4:00:00", + "n": 12 + }, + "vep_umi": { + "time":"4:00:00", + "n": 10 + }, + "picard_umiaware": { + "time": "4:00:00", + "n": 12 + }, + "bcftools_generatebackgroundaf_table": { + "time": "2:00:00", + "n": 8 + }, + "samtools_view_calculatemeanfamilydepth_umi": { + "time": "2:00:00", + "n": 8 + }, + "bcftools_query_calculatenoiseAF_umi": { + "time": "2:00:00", + "n": 8 } } diff --git a/BALSAMIC/config/cluster_minimal.json b/BALSAMIC/config/cluster_minimal.json index c24ded51a..17c3407be 100644 --- a/BALSAMIC/config/cluster_minimal.json +++ b/BALSAMIC/config/cluster_minimal.json @@ -10,5 +10,10 @@ "mail_type": "END", "time": "00:15:00", "n": 1 + }, + "dragen_align_call_tumor_only": { + "time": "10:00:00", + "n": 24, + "partition": "dragen" } } diff --git a/BALSAMIC/containers/Dockerfile.develop b/BALSAMIC/containers/Dockerfile.develop new file mode 100644 index 000000000..19bb0b16b --- /dev/null +++ b/BALSAMIC/containers/Dockerfile.develop @@ -0,0 +1,46 @@ +FROM hassanf/miniconda3:version-4.6.14 + +LABEL maintainer="Hassan Foroughi hassan dot foroughi at scilifelab dot se" +LABEL description="Bioinformatic analysis pipeline for somatic mutations in cancer" +LABEL version="6.0.1" + +ARG GIT_BRANCH=develop + +# create necessary directories +# install balsamic and it's environments +# symlink libreadline for picard to function properly +RUN mkdir -p /git_repos; \ + export PATH=/usr/local/miniconda/bin:$PATH; \ + export LC_ALL=en_US.utf-8; \ + export LANG=en_US.utf-8; \ + conda clean -iy; \ + cd /git_repos && git clone https://github.com/Clinical-Genomics/BALSAMIC && cd BALSAMIC && git checkout ${GIT_BRANCH} && \ + conda env create --file BALSAMIC/conda/align.yaml -n align_qc && \ + source activate align_qc && \ + picard_jar=picard-2.23.2-201-g922891d-SNAPSHOT-all.jar && \ + picard_PATH=BALSAMIC/assets/${picard_jar} && \ + picard_destination=/usr/local/miniconda/envs/align_qc/share/ && \ + cp $picard_PATH ${picard_destination} && \ + ln -s ${picard_destination}/${picard_jar} ${picard_destination}/picard.jar; \ + ln -s /usr/local/miniconda/envs/align_qc/lib/libreadline.so.7.0 /usr/local/miniconda/envs/align_qc/lib/libreadline.so.6 && \ + ln -s /usr/local/miniconda/envs/align_qc/lib/libreadline.so.7.0 /usr/local/miniconda/envs/align_qc/lib/libreadline.so.6.0 && \ + source deactivate && \ + conda env create --file BALSAMIC/conda/annotate.yaml -n annotate && \ + conda env create --file BALSAMIC/conda/coverage.yaml -n coverage_qc && \ + conda env create --file BALSAMIC/conda/varcall_py27.yaml -n varcall_py27 && \ + conda env create --file BALSAMIC/conda/varcall_py36.yaml -n varcall_py36 && \ + source activate varcall_py36 && \ + ln -s /usr/local/miniconda/envs/varcall_py36/lib/libreadline.so.7.0 /usr/local/miniconda/envs/varcall_py36/lib/libreadline.so.6 && \ + ln -s /usr/local/miniconda/envs/varcall_py36/lib/libreadline.so.7.0 /usr/local/miniconda/envs/varcall_py36/lib/libreadline.so.6.0 && \ + source deactivate && \ + conda env create --file BALSAMIC/conda/varcall_cnvkit.yaml -n varcall_cnvkit && \ + conda clean --index-cache --lock --tarballs -y + +# The following fixes the error for Click +# RuntimeError: Click will abort further execution because Python 3 was +# configured to use ASCII as encoding for the environment. Consult +# https://click.palletsprojects.com/en/7.x/python3/ for mitigation steps. +ENV LC_ALL=en_US.utf-8 +ENV LANG=en_US.utf-8 +ENV PATH="/usr/local/miniconda/bin:${PATH}" + diff --git a/BALSAMIC/containers/align_qc/align_qc.sh b/BALSAMIC/containers/align_qc/align_qc.sh new file mode 100644 index 000000000..be3db9f0d --- /dev/null +++ b/BALSAMIC/containers/align_qc/align_qc.sh @@ -0,0 +1,5 @@ +conda env create -n ${1} --file ${1}.yaml +source activate ${1} +ENV_PATH=/opt/conda/envs +ln -s ${ENV_PATH}/${1}/lib/libreadline.so.7.0 ${ENV_PATH}/${1}/lib/libreadline.so.6 +ln -s ${ENV_PATH}/${1}/lib/libreadline.so.7.0 ${ENV_PATH}/${1}/lib/libreadline.so.6.0 diff --git a/BALSAMIC/containers/align_qc/align_qc.yaml b/BALSAMIC/containers/align_qc/align_qc.yaml new file mode 100644 index 000000000..fd1f02891 --- /dev/null +++ b/BALSAMIC/containers/align_qc/align_qc.yaml @@ -0,0 +1,18 @@ +channels: + - bioconda + - conda-forge + - defaults + +dependencies: + - bedtools=2.29.0 + - bwa=0.7.15 + - fastqc=0.11.9 + - samtools=1.9 + - tabix=0.2.5 + - picard=2.23.8 + - r-base + - multiqc=1.9 + - fastp=0.20.0 + - csvkit=1.0.4 + - libiconv + - fontconfig diff --git a/BALSAMIC/containers/annotate/annotate.sh b/BALSAMIC/containers/annotate/annotate.sh new file mode 100644 index 000000000..ef952f01d --- /dev/null +++ b/BALSAMIC/containers/annotate/annotate.sh @@ -0,0 +1,3 @@ +conda env create -n ${1} --file ${1}.yaml +source activate ${1} +pip install --no-cache-dir genmod==3.7.4 diff --git a/BALSAMIC/containers/annotate/annotate.yaml b/BALSAMIC/containers/annotate/annotate.yaml new file mode 100644 index 000000000..9421b9d8b --- /dev/null +++ b/BALSAMIC/containers/annotate/annotate.yaml @@ -0,0 +1,11 @@ +channels: + - anaconda + - bioconda + - defaults + - conda-forge + +dependencies: + - ensembl-vep=100.2 + - bcftools>=1.10 + - vcfanno=0.3.2 + - gxx_linux-64 diff --git a/BALSAMIC/containers/coverage_qc/coverage_qc.sh b/BALSAMIC/containers/coverage_qc/coverage_qc.sh new file mode 100644 index 000000000..b3890fcbf --- /dev/null +++ b/BALSAMIC/containers/coverage_qc/coverage_qc.sh @@ -0,0 +1 @@ +conda env create -n ${1} --file ${1}.yaml diff --git a/BALSAMIC/containers/coverage_qc/coverage_qc.yaml b/BALSAMIC/containers/coverage_qc/coverage_qc.yaml new file mode 100644 index 000000000..d7b6038ff --- /dev/null +++ b/BALSAMIC/containers/coverage_qc/coverage_qc.yaml @@ -0,0 +1,8 @@ +channels: + - bioconda + - conda-forge + - defaults + +dependencies: + - sambamba=0.6.6 + - mosdepth=0.2.9 diff --git a/BALSAMIC/containers/varcall_cnvkit/varcall_cnvkit.sh b/BALSAMIC/containers/varcall_cnvkit/varcall_cnvkit.sh new file mode 100644 index 000000000..5a658d7c8 --- /dev/null +++ b/BALSAMIC/containers/varcall_cnvkit/varcall_cnvkit.sh @@ -0,0 +1,3 @@ +conda env create -n ${1} --file ${1}.yaml +source activate ${1} +pip install --no-cache-dir cnvkit==0.9.4 biopython==1.76 diff --git a/BALSAMIC/containers/varcall_cnvkit/varcall_cnvkit.yaml b/BALSAMIC/containers/varcall_cnvkit/varcall_cnvkit.yaml new file mode 100644 index 000000000..5a1c05e5d --- /dev/null +++ b/BALSAMIC/containers/varcall_cnvkit/varcall_cnvkit.yaml @@ -0,0 +1,13 @@ +channels: + - bioconda + - conda-forge + - defaults + - r + +dependencies: + - python=3.6 + - pip=9.0.3 + - bcftools=1.10.2 + - tabix=0.2.6 + - r-base=3.6.1 + - bioconductor-dnacopy=1.60.0 diff --git a/BALSAMIC/containers/varcall_py27/varcall_py27.sh b/BALSAMIC/containers/varcall_py27/varcall_py27.sh new file mode 100644 index 000000000..b3890fcbf --- /dev/null +++ b/BALSAMIC/containers/varcall_py27/varcall_py27.sh @@ -0,0 +1 @@ +conda env create -n ${1} --file ${1}.yaml diff --git a/BALSAMIC/containers/varcall_py27/varcall_py27.yaml b/BALSAMIC/containers/varcall_py27/varcall_py27.yaml new file mode 100644 index 000000000..ee5884ba6 --- /dev/null +++ b/BALSAMIC/containers/varcall_py27/varcall_py27.yaml @@ -0,0 +1,12 @@ +channels: + - bioconda + - conda-forge + - defaults + +dependencies: + - python=2.7 + - strelka=2.8.4 + - manta=1.6.0 + - bcftools=1.10.2 + - tabix=0.2.6 + - samtools=1.9 diff --git a/BALSAMIC/containers/varcall_py36/varcall_py36.sh b/BALSAMIC/containers/varcall_py36/varcall_py36.sh new file mode 100644 index 000000000..be3db9f0d --- /dev/null +++ b/BALSAMIC/containers/varcall_py36/varcall_py36.sh @@ -0,0 +1,5 @@ +conda env create -n ${1} --file ${1}.yaml +source activate ${1} +ENV_PATH=/opt/conda/envs +ln -s ${ENV_PATH}/${1}/lib/libreadline.so.7.0 ${ENV_PATH}/${1}/lib/libreadline.so.6 +ln -s ${ENV_PATH}/${1}/lib/libreadline.so.7.0 ${ENV_PATH}/${1}/lib/libreadline.so.6.0 diff --git a/BALSAMIC/containers/varcall_py36/varcall_py36.yaml b/BALSAMIC/containers/varcall_py36/varcall_py36.yaml new file mode 100644 index 000000000..a9550b613 --- /dev/null +++ b/BALSAMIC/containers/varcall_py36/varcall_py36.yaml @@ -0,0 +1,14 @@ +channels: + - bioconda + - conda-forge + - defaults + +dependencies: + - python=3.6 + - bcftools=1.11 + - tabix=0.2.6 + - samtools=1.11 + - gatk=3.8 + - vardict=2019.06.04=pl526_0 + - vardict-java=1.7 + - libiconv diff --git a/BALSAMIC/snakemake_rules/align/bwa_mem.rule b/BALSAMIC/snakemake_rules/align/bwa_mem.rule index 712deaca3..4f7f5e4cb 100644 --- a/BALSAMIC/snakemake_rules/align/bwa_mem.rule +++ b/BALSAMIC/snakemake_rules/align/bwa_mem.rule @@ -1,37 +1,73 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_threads +def picard_flag(picarddup): + if picarddup == "mrkdup": + return "FALSE" + else: + return "TRUE" # Following rule will take input fastq files, align them using bwa mem, and convert the output to sam format rule bwa_mem: + input: + fa = config["reference"]["reference_genome"], + read1 = Path(fastq_dir, "{sample}_1.fp.fastq.gz").as_posix(), + read2 = Path(fastq_dir, "{sample}_2.fp.fastq.gz").as_posix(), + refidx = expand(config["reference"]["reference_genome"] + ".{prefix}", prefix=["amb","ann","bwt","pac","sa"]) + output: + bamout = temp(Path(bam_dir, "{sample}.sorted.bam").as_posix()) + params: + bam_header = "'@RG\\tID:" + "{sample}" + "\\tSM:" + "{sample}" + "\\tPL:ILLUMINAi'", + conda = config["bioinfo_tools"].get("bwa"), + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + threads: get_threads(cluster_config, "bwa_mem") + singularity: Path(singularity_image, config["bioinfo_tools"].get("bwa") + ".sif").as_posix() + benchmark: + Path(benchmark_dir, "bwa_mem_{sample}.bwa_mem.tsv").as_posix() + shell: + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +bwa mem \ +-t {threads} \ +-R {params.bam_header} \ +-M \ +-v 1 \ +{input.fa} {input.read1} {input.read2} \ +| samtools sort -T {params.tmpdir} --threads {threads} --output-fmt BAM -o {output.bamout} - ; +samtools index -@ {threads} {output.bamout}; +rm -rf {params.tmpdir}; + """ + +rule MarkDuplicates: input: - fa = config["reference"]["reference_genome"], - read1 = fastq_dir + "{sample}_1.fp.fastq.gz", - read2 = fastq_dir + "{sample}_2.fp.fastq.gz", - refidx = expand(config["reference"]["reference_genome"] + ".{prefix}", prefix=["amb","ann","bwt","pac","sa"]) + Path(bam_dir, "{sample}.sorted.bam").as_posix() output: - bamout = temp(bam_dir + "{sample}.sorted.bam") + mrkdup = Path(bam_dir, "{sample}.sorted." + picarddup + ".bam").as_posix(), + stats = Path(bam_dir, "{sample}.sorted." + picarddup + ".txt").as_posix() params: - header_1 = "'@RG\\tID:" + "{sample}" + "\\tSM:" + "{sample}" + "\\tPL:ILLUMINAi'", - tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"bwa") - threads: get_threads(cluster_config, "bwa_mem") - singularity: singularity_image + conda = config["bioinfo_tools"].get("picard"), + mem = "16g", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + rm_dup = picard_flag(picarddup) + threads: get_threads(cluster_config, "MarkDuplicates") + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: - benchmark_dir + "bwa_mem_" + "{sample}.bwa_mem.tsv" + Path(benchmark_dir, "MarkDuplicates_{sample}.markduplicates.tsv").as_posix() shell: - "source activate {params.conda}; " - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "bwa mem " - "-t {threads} " - "-R {params.header_1} " - "-M " - "-v 1 " - "{input.fa} {input.read1} {input.read2} " - "| samtools sort -T ${{tmpdir}} --threads {threads} --output-fmt BAM -o {output.bamout} - ;" - "samtools index -@ {threads} {output.bamout}; " + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} \ +MarkDuplicates \ +INPUT={input} \ +OUTPUT={output.mrkdup} \ +VALIDATION_STRINGENCY=SILENT \ +MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 \ +REMOVE_DUPLICATES={params.rm_dup} \ +METRICS_FILE='{output.stats}'; +samtools index {output.mrkdup}; +rm -rf {params.tmpdir}; + """ diff --git a/BALSAMIC/snakemake_rules/sentieon/sentieon_alignment.rule b/BALSAMIC/snakemake_rules/align/sentieon_alignment.rule similarity index 54% rename from BALSAMIC/snakemake_rules/sentieon/sentieon_alignment.rule rename to BALSAMIC/snakemake_rules/align/sentieon_alignment.rule index dc0939b5c..6046a25c8 100644 --- a/BALSAMIC/snakemake_rules/sentieon/sentieon_alignment.rule +++ b/BALSAMIC/snakemake_rules/align/sentieon_alignment.rule @@ -1,67 +1,63 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_sample_type -from BALSAMIC.utils.rule import get_threads - - # Following rule will take input fastq files, align them using bwa mem, and convert the output to sam format rule sentieon_align_sort: input: ref = config["reference"]["reference_genome"], - read1 = fastq_dir + "{sample}_1.fp.fastq.gz", - read2 = fastq_dir + "{sample}_2.fp.fastq.gz", + read1 = Path(fastq_dir, "{sample}_1.fp.fastq.gz").as_posix(), + read2 = Path(fastq_dir, "{sample}_2.fp.fastq.gz").as_posix(), refidx = expand(config["reference"]["reference_genome"] + ".{prefix}", prefix=["amb","ann","bwt","pac","sa"]) output: - bamout = bam_dir + "{sample}.bam" + bamout = Path(bam_dir, "{sample}.bam").as_posix() params: - tmpdir = tmp_dir, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), header = "'@RG\\tID:" + "{sample}" + "\\tSM:" + "{sample}" + "\\tPL:ILLUMINAi'", - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_align_sort') log: - bam_dir + "{sample}.bam.log" + Path(bam_dir, "{sample}.bam.log").as_posix() benchmark: - benchmark_dir + 'sentieon_align_sort_' + "{sample}.align_sort.tsv" + Path(benchmark_dir, "sentieon_align_sort_{sample}.align_sort.tsv").as_posix() shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; -{params.sentieon_exec} bwa mem -M -R {params.header} -t {threads} -K 50000000 {input.ref} {input.read1} {input.read2} | {params.sentieon_exec} util sort -o {output.bamout} -t {threads} --block_size 3G --sam2bam -i - +{params.sentieon_exec} bwa mem -M \ +-R {params.header} \ +-t {threads} \ +-K 50000000 \ +{input.ref} {input.read1} {input.read2} \ +| {params.sentieon_exec} util sort -o {output.bamout} -t {threads} --block_size 3G --sam2bam -i - """ rule sentieon_dedup: input: - bam = bam_dir + "{sample}.bam", + bam = Path(bam_dir, "{sample}.bam").as_posix(), output: - bam = bam_dir + "{sample}.dedup.bam", - score = bam_dir + "{sample}.dedup.score", - metrics = bam_dir + "{sample}.dedup.metrics" + bam = Path(bam_dir, "{sample}.dedup.bam").as_posix(), + score = Path(bam_dir, "{sample}.dedup.score").as_posix(), + metrics = Path(bam_dir, "{sample}.dedup.metrics").as_posix() params: - tmpdir = tmp_dir, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), housekeeper_id = {"id": "{sample}", "tags": "scout"}, - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_dedup') log: - bam_dir + "{sample}.dedup.bam.log" + Path(bam_dir, "{sample}.dedup.bam.log").as_posix() benchmark: - benchmark_dir + 'sentieon_dedup_' + "{sample}.dedup.tsv" + Path(benchmark_dir, "sentieon_dedup_{sample}.dedup.tsv").as_posix() shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver -t {threads} -i {input.bam} --algo LocusCollector --fun score_info {output.score}; @@ -75,25 +71,23 @@ rule sentieon_realign: ref = config["reference"]["reference_genome"], mills = config["reference"]["mills_1kg"], indel_1kg = config["reference"]["1kg_known_indel"], - bam = bam_dir + "{sample}.dedup.bam", + bam = Path(bam_dir, "{sample}.dedup.bam").as_posix() output: - bam = bam_dir + "{sample}.dedup.realign.bam", + bam = Path(bam_dir, "{sample}.dedup.realign.bam").as_posix(), params: - tmpdir = tmp_dir, - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_realign') log: - bam_dir + "{sample}.dedup.realign.bam.log" + Path(bam_dir, "{sample}.dedup.realign.bam.log").as_posix() benchmark: - benchmark_dir + 'sentieon_realign_' + "{sample}.dedup_realign.tsv" + Path(benchmark_dir, "sentieon_realign_{sample}.dedup_realign.tsv").as_posix() shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bam} --algo Realigner -k {input.mills} -k {input.indel_1kg} {output} @@ -106,27 +100,25 @@ rule sentieon_base_calibration: mills = config["reference"]["mills_1kg"], indel_1kg = config["reference"]["1kg_known_indel"], dbsnp = config["reference"]["dbsnp"], - bam = bam_dir + "{sample}.dedup.realign.bam", + bam = Path(bam_dir, "{sample}.dedup.realign.bam").as_posix() output: - recal_data_table = bam_dir + "{sample}.dedup.realign.recal_data.table", - qual_recal = bam_dir + "{sample}.dedup.realign.recal.csv", - qual_recal_plot = bam_dir + "{sample}.dedup.realign.recal.pdf", + recal_data_table = Path(bam_dir, "{sample}.dedup.realign.recal_data.table").as_posix(), + qual_recal = Path(bam_dir, "{sample}.dedup.realign.recal.csv").as_posix(), + qual_recal_plot = Path(bam_dir, "{sample}.dedup.realign.recal.pdf").as_posix(), params: - tmpdir = tmp_dir, - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_base_calibration') log: - bam_dir + "{sample}.dedup.realign.recal.log" + Path(bam_dir, "{sample}.dedup.realign.recal.log").as_posix() benchmark: - benchmark_dir + 'sentieon_base_calibration_' + "{sample}.base_recal.tsv" + Path(benchmark_dir, "sentieon_base_calibration_{sample}.base_recal.tsv").as_posix() shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bam} --algo QualCal -k {input.dbsnp} -k {input.mills} -k {input.indel_1kg} {output.recal_data_table}; diff --git a/BALSAMIC/snakemake_rules/annotation/rankscore.rule b/BALSAMIC/snakemake_rules/annotation/rankscore.rule new file mode 100644 index 000000000..ca84d3629 --- /dev/null +++ b/BALSAMIC/snakemake_rules/annotation/rankscore.rule @@ -0,0 +1,23 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 +# Rank variants according to a rankscore model + +rule rankscore: + input: + vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz", + rankscore = config["reference"]["rankscore"] + output: + vcf_ranked = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.ranked.vcf.gz", + params: + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, + conda = config["bioinfo_tools"].get("genmod"), + threads: get_threads(cluster_config, 'rankscore') + singularity: Path(singularity_image, config["bioinfo_tools"].get("genmod") + ".sif").as_posix() + benchmark: + benchmark_dir + 'rankscore_' + "{var_type}.somatic.{case_name}.vardict.vep.tsv" + shell: + """ +source activate {params.conda}; +genmod score -r -c {input.rankscore} {input.vcf} -o {output.vcf_ranked} +tabix -p vcf -f {output.vcf_ranked}; + """ diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter.rule deleted file mode 100644 index 4dd23edc3..000000000 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_filter.rule +++ /dev/null @@ -1,41 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 -# NGS filters for various scenarios - -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_threads -from BALSAMIC.utils.models import VarCallerFilter -from BALSAMIC.utils.constants import VARDICT_SETTINGS - -VARDICT = VarCallerFilter.parse_obj(VARDICT_SETTINGS) - -rule ngs_filter_vardict: - input: - vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.vcf.gz", - output: - vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.vcf.gz", - vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz", - params: - housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, - conda = get_conda_env(config["conda_env_yaml"],"ensembl-vep"), - MQ = [VARDICT.MQ.tag_value, VARDICT.MQ.filter_name], - AD = [VARDICT.AD.tag_value, VARDICT.AD.filter_name], - DP = [VARDICT.DP.tag_value, VARDICT.DP.filter_name], - AF_min = [VARDICT.AF_min.tag_value, VARDICT.AF_min.filter_name], - AF_max = [VARDICT.AF_max.tag_value, VARDICT.AF_max.filter_name], - threads: get_threads(cluster_config, 'vep') - singularity: singularity_image - benchmark: - benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.vardict.vep.tsv" - shell: - "source activate {params.conda}; " - "bcftools view {input.vcf} " - " | bcftools filter --include 'INFO/MQ >= {params.MQ[0]}' --soft-filter '{params.MQ[1]}' --mode '+' " - " | bcftools filter --include 'INFO/DP >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' " - " | bcftools filter --include 'INFO/VD >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' " - " | bcftools filter --include 'INFO/AF >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' " - " | bcftools filter --include 'INFO/AF < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' " - " | bcftools view -o {output.vcf_filtered} -O z; " - "tabix -p vcf -f {output.vcf_filtered}; " - "bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered}; " - "tabix -p vcf -f {output.vcf_pass}; " diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule new file mode 100644 index 000000000..0e9b88deb --- /dev/null +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule @@ -0,0 +1,37 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 +# NGS filters for various scenarios + +rule ngs_filter_vardict: + input: + vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.vcf.gz", + output: + vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.vcf.gz", + vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz", + params: + conda = config["bioinfo_tools"].get("bcftools"), + MQ = [VARDICT.MQ.tag_value, VARDICT.MQ.filter_name], + AD = [VARDICT.AD.tag_value, VARDICT.AD.filter_name], + DP = [VARDICT.DP.tag_value, VARDICT.DP.filter_name], + AF_min = [VARDICT.AF_min.tag_value, VARDICT.AF_min.filter_name], + AF_max = [VARDICT.AF_max.tag_value, VARDICT.AF_max.filter_name], + pop_freq = [VARDICT.pop_freq.tag_value, VARDICT.pop_freq.filter_name], + threads: get_threads(cluster_config, 'ngs_filter_vardict') + singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() + benchmark: + benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.vardict.vep.tsv" + shell: + """ +source activate {params.conda}; +bcftools view {input.vcf} \ +| bcftools filter --include 'SMPL_MIN(FMT/MQ) >= {params.MQ[0]}' --soft-filter '{params.MQ[1]}' --mode + \ +| bcftools filter --include 'INFO/DP >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ +| bcftools filter --include 'INFO/VD >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ +| bcftools filter --include 'INFO/AF >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ +| bcftools filter --include 'INFO/AF < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' \ +| bcftools filter --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \ +| bcftools view -o {output.vcf_filtered} -O z; +tabix -p vcf -f {output.vcf_filtered}; +bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered}; +tabix -p vcf -f {output.vcf_pass}; + """ diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule new file mode 100644 index 000000000..7fcd57373 --- /dev/null +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule @@ -0,0 +1,37 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 +# NGS filters for various scenarios + +rule ngs_filter_vardict: + input: + vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.vcf.gz", + output: + vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.vcf.gz", + vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz", + params: + conda = config["bioinfo_tools"].get("bcftools"), + MQ = [VARDICT.MQ.tag_value, VARDICT.MQ.filter_name], + AD = [VARDICT.AD.tag_value, VARDICT.AD.filter_name], + DP = [VARDICT.DP.tag_value, VARDICT.DP.filter_name], + AF_min = [VARDICT.AF_min.tag_value, VARDICT.AF_min.filter_name], + AF_max = [VARDICT.AF_max.tag_value, VARDICT.AF_max.filter_name], + pop_freq = [VARDICT.pop_freq.tag_value, VARDICT.pop_freq.filter_name], + threads: get_threads(cluster_config, 'ngs_filter_vardict') + singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() + benchmark: + benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.vardict.vep.tsv" + shell: + """ +source activate {params.conda}; +bcftools view {input.vcf} \ +| bcftools filter --include 'INFO/MQ >= {params.MQ[0]}' --soft-filter '{params.MQ[1]}' --mode '+' \ +| bcftools filter --include 'INFO/DP >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ +| bcftools filter --include 'INFO/VD >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ +| bcftools filter --include 'INFO/AF >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ +| bcftools filter --include 'INFO/AF < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' \ +| bcftools filter --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \ +| bcftools view -o {output.vcf_filtered} -O z; +tabix -p vcf -f {output.vcf_filtered}; +bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered}; +tabix -p vcf -f {output.vcf_pass}; + """ diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule new file mode 100644 index 000000000..6eb6037e7 --- /dev/null +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule @@ -0,0 +1,36 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 +# NGS filters for various scenarios + +rule ngs_filter_tnscope: + input: + vcf = vep_dir + "{var_type}.somatic.{case_name}.tnscope.pass.vcf.gz", + output: + vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.vcf.gz", + vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.pass.vcf.gz", + params: + conda = config["bioinfo_tools"].get("bcftools"), + AD = [SENTIEON_CALLER.AD.tag_value, SENTIEON_CALLER.AD.filter_name], + DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name], + AF_min = [SENTIEON_CALLER.AF_min.tag_value, SENTIEON_CALLER.AF_min.filter_name], + AF_max = [SENTIEON_CALLER.AF_max.tag_value, SENTIEON_CALLER.AF_max.filter_name], + pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name], + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"} + threads: get_threads(cluster_config, 'ngs_filter_tnscope') + singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() + benchmark: + benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.tnscope.tsv" + shell: + """ +source activate {params.conda}; +bcftools view {input.vcf} \ +| bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]} || SUM(FORMAT/AD[1:0]+FORMAT/AD[1:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'FORMAT/AF[0] >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'FORMAT/AF[0] < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \ +| bcftools view -o {output.vcf_filtered} -O z; +tabix -p vcf -f {output.vcf_filtered}; +bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered}; +tabix -p vcf -f {output.vcf_pass}; + """ diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule new file mode 100644 index 000000000..382b9ba30 --- /dev/null +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule @@ -0,0 +1,104 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 +# NGS filters for various scenarios + +rule ngs_filter_tnscope: + input: + vcf = vep_dir + "{var_type}.somatic.{case_name}.tnscope.pass.vcf.gz", + wgs_calling_file = config["reference"]["wgs_calling_interval"] + output: + vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.vcf.gz", + params: + conda = config["bioinfo_tools"].get("bcftools"), + DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name], + AD = [SENTIEON_CALLER.AD.tag_value, SENTIEON_CALLER.AD.filter_name], + AF_min = [SENTIEON_CALLER.AF_min.tag_value, SENTIEON_CALLER.AF_min.filter_name], + AF_max = [SENTIEON_CALLER.AF_max.tag_value, SENTIEON_CALLER.AF_max.filter_name], + pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name], + strand_reads = [SENTIEON_CALLER.strand_reads.tag_value, SENTIEON_CALLER.strand_reads.filter_name], + qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name], + sor = [SENTIEON_CALLER.sor.tag_value, SENTIEON_CALLER.sor.filter_name], + threads: + get_threads(cluster_config, 'ngs_filter_tnscope') + singularity: + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() + benchmark: + benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.tnscope.tsv" + shell: + """ +source activate {params.conda}; +grep -v '^@' {input.wgs_calling_file} > {input.wgs_calling_file}.bed +bcftools filter --threads {threads} --regions-file {input.wgs_calling_file}.bed {input.vcf} \ +| bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] > {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'FORMAT/AF > {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'FORMAT/AF < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'SUM(FORMAT/QSS)/SUM(FORMAT/AD) >= {params.qss[0]}' --soft-filter '{params.qss[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'FORMAT/ALT_F1R2 > {params.strand_reads[0]} && (FORMAT/ALT_F1R2 > 0 && FORMAT/ALT_F2R1 > {params.strand_reads[0]} && FORMAT/REF_F1R2 > {params.strand_reads[0]} && FORMAT/REF_F2R1 > {params.strand_reads[0]})' --soft-filter '{params.strand_reads[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \ +| bcftools view -o {output.vcf_filtered} -O z; +tabix -p vcf -f {output.vcf_filtered}; + """ + +rule ngs_filter_tnhaplotyper: + input: + vcf = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.pass.vcf.gz", + wgs_calling_file = config["reference"]["wgs_calling_interval"] + output: + vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.filtered.vcf.gz", + params: + conda = config["bioinfo_tools"].get("bcftools"), + DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name], + AD = [SENTIEON_CALLER.AD.tag_value, SENTIEON_CALLER.AD.filter_name], + AF_min = [SENTIEON_CALLER.AF_min.tag_value, SENTIEON_CALLER.AF_min.filter_name], + AF_max = [SENTIEON_CALLER.AF_max.tag_value, SENTIEON_CALLER.AF_max.filter_name], + pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name], + strand_reads = [SENTIEON_CALLER.strand_reads.tag_value, SENTIEON_CALLER.strand_reads.filter_name], + qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name] + threads: + get_threads(cluster_config, 'ngs_filter_tnhaplotyper') + singularity: + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() + benchmark: + benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.tnhaplotyper.tsv" + shell: + """ +source activate {params.conda}; +grep -v '^@' {input.wgs_calling_file} > {input.wgs_calling_file}.bed +bcftools filter --threads {threads} --regions-file {input.wgs_calling_file}.bed {input.vcf} \ +| bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'FORMAT/AF >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'FORMAT/AF < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'SUM(FORMAT/QSS)/SUM(FORMAT/AD) >= {params.qss[0]}' --soft-filter '{params.qss[1]}' --mode '+' \ +| bcftools filter --threads {threads} --include 'FORMAT/ALT_F1R2 > {params.strand_reads[0]} && (FORMAT/ALT_F1R2 > 0 && FORMAT/ALT_F2R1 > {params.strand_reads[0]} && FORMAT/REF_F1R2 > {params.strand_reads[0]} && FORMAT/REF_F2R1 > {params.strand_reads[0]})' --soft-filter '{params.strand_reads[1]}' --mode '+' \ +| bcftools view -o {output.vcf_filtered} -O z; +tabix -p vcf -f {output.vcf_filtered}; + """ + +rule ngs_filter_intersect: + input: + tnscope = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.vcf.gz", + tnhaplotyper = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.filtered.vcf.gz" + output: + vcf_name = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.pass.vcf.gz" + params: + conda = config["bioinfo_tools"].get("bcftools"), + vcf_dir = vep_dir + "sentieon_callers_intersect", + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"} + threads: + get_threads(cluster_config, 'ngs_filter_intersect') + singularity: + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() + benchmark: + benchmark_dir + 'ngs_filter_intersect' + "{var_type}.somatic.{case_name}.tsv" + shell: + """ +source activate {params.conda}; +bcftools isec {input.tnscope} {input.tnhaplotyper} -p {params.vcf_dir} -O z -f PASS; +cp {params.vcf_dir}/0002.vcf.gz {output.vcf_name}; +tabix -p vcf -f {output.vcf_name}; +rm -r {params.vcf_dir} + """ diff --git a/BALSAMIC/snakemake_rules/annotation/vep.rule b/BALSAMIC/snakemake_rules/annotation/vep.rule index 04dce6f2b..32b650a7a 100644 --- a/BALSAMIC/snakemake_rules/annotation/vep.rule +++ b/BALSAMIC/snakemake_rules/annotation/vep.rule @@ -2,9 +2,6 @@ # coding: utf-8 # VEP annotation module. Annotate all VCFs generated through VEP -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_threads - rule vep_somatic: input: vcf = vcf_dir + "{var_type}.somatic.{case_name}.{var_caller}.vcf.gz", @@ -17,44 +14,49 @@ rule vep_somatic: params: housekeeper_id = {"id": "{case_name}", "tags": "annotated-somatic"}, tmpvcf = vep_dir + "{var_type}.somatic.{case_name}.{var_caller}.tmp.vcf.gz", - conda = get_conda_env(config["conda_env_yaml"],"ensembl-vep"), + ref_path = Path(config["reference"]["gnomad_variant"]).parent.as_posix(), + vcfanno_toml = VCFANNO_TOML, + conda = config["bioinfo_tools"].get("ensembl-vep"), vep_cache = config["reference"]["vep"] threads: get_threads(cluster_config, 'vep') - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() benchmark: benchmark_dir + 'vep_' + "{var_type}.somatic.{case_name}.{var_caller}.vep.tsv" shell: - "source activate {params.conda}; " - "vep_path=$(dirname $(readlink -e $(which vep))); " - "tmpvcf={params.tmpvcf}; " - "export PERL5LIB=;" - "bcftools reheader -s {input.header} {input.vcf} | bcftools view -O z -o $tmpvcf ;" - "vep " - "--dir $vep_path " - "--dir_cache {params.vep_cache} " - "--dir_plugins $vep_path " - "--input_file $tmpvcf " - "--output_file {output.vcf_all} " - "--compress_output bgzip " - "--fork {threads} " - "--vcf " - "--everything " - "--allow_non_variant " - "--dont_skip " - "--buffer_size 10000 " - "--format vcf " - "--offline " - "--variant_class " - "--merged " - "--cache " - "--custom {input.cosmic},COSMIC,vcf,exact,0,CDS,GENE,STRAND,CNT,AA " - "--verbose " - "--force_overwrite; " - "tabix -p vcf -f {output.vcf_all}; " - "bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_all}; " - "tabix -p vcf -f {output.vcf_pass}; " - "rm $tmpvcf " - + """ +source activate {params.conda}; +vep_path=$(dirname $(readlink -f $(which vep))); +tmpvcf={params.tmpvcf}; +export PERL5LIB=; +vcfanno --base-path {params.ref_path} {params.vcfanno_toml} {input.vcf} \ +| bcftools reheader --threads {threads} -s {input.header} \ +| bcftools view -O z -o $tmpvcf ; +vep \ +--dir $vep_path \ +--dir_cache {params.vep_cache} \ +--dir_plugins $vep_path \ +--input_file $tmpvcf \ +--output_file {output.vcf_all} \ +--compress_output bgzip \ +--fork {threads} \ +--vcf \ +--everything \ +--allow_non_variant \ +--dont_skip \ +--buffer_size 10000 \ +--format vcf \ +--offline \ +--variant_class \ +--merged \ +--cache \ +--custom {input.cosmic},COSMIC,vcf,exact,0,CDS,GENE,STRAND,CNT,AA \ +--verbose \ +--force_overwrite; +tabix -p vcf -f {output.vcf_all}; +bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_all}; +tabix -p vcf -f {output.vcf_pass}; +rm $tmpvcf; + """ rule vep_stat: input: @@ -64,11 +66,11 @@ rule vep_stat: params: housekeeper_id = {"id": "{case_name}", "tags": "stat-somatic"}, bed = config["panel"]["capture_kit"] if "panel" in config else "", - conda = get_conda_env(config["conda_env_yaml"],"ensembl-vep"), + conda = config["bioinfo_tools"].get("ensembl-vep"), filter_vep_string = "'not COSMIC and not Existing_variation and synonymous_variant'", bcftools_filter_string = "INFO/DP>=50 && FORMAT/AD>=5 && FORMAT/AF>=0.1", threads: get_threads(cluster_config, 'vep') - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() benchmark: benchmark_dir + "vep_stat_SNV.somatic.{case_name}.{var_caller}.pass.balsamic_stat.tsv" shell: @@ -90,35 +92,37 @@ rule vep_germline: vcf_summary = vep_dir + "{var_type}.germline.{sample}.{var_caller}.vcf.gz_summary.html", params: housekeeper_id = {"id": "{sample}", "tags": "annotated-germline"}, - conda = get_conda_env(config["conda_env_yaml"],"ensembl-vep"), + conda = config["bioinfo_tools"].get("ensembl-vep"), vep_cache = config["reference"]["vep"] threads: get_threads(cluster_config, 'vep') - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() benchmark: benchmark_dir + 'vep_' + "{var_type}.germline.{sample}.{var_caller}.vep.tsv" shell: - "source activate {params.conda}; " - "vep_path=$(dirname $(readlink -e $(which vep))); " - "export PERL5LIB=;" - "vep " - "--dir $vep_path " - "--dir_cache {params.vep_cache} " - "--dir_plugins $vep_path " - "--input_file {input.vcf} " - "--output_file {output.vcf_all} " - "--compress_output bgzip " - "--fork {threads} " - "--vcf " - "--everything " - "--allow_non_variant " - "--dont_skip " - "--buffer_size 10000 " - "--format vcf " - "--offline " - "--variant_class " - "--merged " - "--cache " - "--custom {input.cosmic},COSMIC,vcf,exact,0,CDS,GENE,STRAND,CNT,AA " - "--verbose " - "--force_overwrite; " - "tabix -p vcf -f {output.vcf_all}; " + """ +source activate {params.conda}; +vep_path=$(dirname $(readlink -f $(which vep))); +export PERL5LIB=; +vep \ +--dir $vep_path \ +--dir_cache {params.vep_cache} \ +--dir_plugins $vep_path \ +--input_file {input.vcf} \ +--output_file {output.vcf_all} \ +--compress_output bgzip \ +--fork {threads} \ +--vcf \ +--everything \ +--allow_non_variant \ +--dont_skip \ +--buffer_size 10000 \ +--format vcf \ +--offline \ +--variant_class \ +--merged \ +--cache \ +--custom {input.cosmic},COSMIC,vcf,exact,0,CDS,GENE,STRAND,CNT,AA \ +--verbose \ +--force_overwrite; +tabix -p vcf -f {output.vcf_all}; + """ diff --git a/BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule b/BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule new file mode 100644 index 000000000..05bd3f488 --- /dev/null +++ b/BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule @@ -0,0 +1,66 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 + +rule dragen_align_call_tumor_only: + input: + reference = config["reference"]["reference_genome"], + read1 = Path(fastq_dir, "{mysample}_1.fp.fastq.gz".format(mysample=tumor_sample)).as_posix(), + read2 = Path(fastq_dir, "{mysample}_2.fp.fastq.gz".format(mysample=tumor_sample)).as_posix(), + output: + bam = Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen_tumor.bam").as_posix(), + vcf = Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen.vcf.gz").as_posix() + benchmark: + Path(benchmark_dir, "dragen_align_call_tumor_only" + config["analysis"]["case_id"] + ".tsv").as_posix() + params: + dragen_result_dir = Path(result_dir, "dragen", "result").as_posix(), + tmp_reference_dir = Path(result_dir, "dragen", "reference").as_posix(), + sample_name = tumor_sample, + output_file_prefix = "SNV.somatic." + config["analysis"]["case_id"] + ".dragen" + threads: + get_threads(cluster_config, "dragen_align_call_tumor_only") + message: + "DRAGEN align and variant call for {params.sample_name}" + shell: + """ +echo "Running DRAGEN" +echo "Host: " $HOSTNAME + +echo "Setting ulimit" +ulimit -n 65535 +ulimit -u 16384 + +echo "Setting PATH" +export PATH=$PATH:/opt/edico/bin/ + +echo "preparing reference and loading it to memory" +mkdir -p {params.tmp_reference_dir} + +# Build reference +dragen --build-hash-table true \ + --ht-reference {input.reference} \ + --output-directory {params.tmp_reference_dir} #--ht-alt-liftover /opt/edico/liftover/hg19_alt_liftover.sam + +# Make sure reference loads properly +dragen -l -r {params.tmp_reference_dir} + +echo "will run the following DRAGEN command" +mkdir -p {params.dragen_result_dir} +dragen -f \ +-r {params.tmp_reference_dir} \ +--tumor-fastq1 {input.read1} \ +--tumor-fastq2 {input.read2} \ +--enable-variant-caller true \ +--RGID-tumor {params.sample_name} \ +--RGSM-tumor {params.sample_name} \ +--RGPL-tumor ILLUMINAi \ +--output-directory {params.dragen_result_dir} \ +--output-file-prefix {params.output_file_prefix} \ +--enable-duplicate-marking true \ +--enable-map-align-output true \ +--vc-enable-homologous-mapping-filter True \ +--vc-enable-clustered-events-filter True \ +--vc-clustered-events-threshold 3 \ +--vc-enable-triallelic-filter True \ +--vc-enable-multi-event-alt-allele-in-normal-filter True \ +--vc-enable-af-filter True + """ diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/bcftools.rule b/BALSAMIC/snakemake_rules/obsolete_rules/bcftools.rule deleted file mode 100644 index ca75fd6b5..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/bcftools.rule +++ /dev/null @@ -1,54 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -from BALSAMIC.utils.rule import get_vcf_files - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env - - -var_type = "SNV" - -rule bcftools_SNV_update_variant_ID: - input: - vcf = vcf_dir + var_type + "/" + "{vcf}.vcf.gz" - output: - vcf_dir + var_type + "/" + "id.{vcf}.vcf.gz" - params: - ref_fa = config["reference"]["reference_genome"], - conda = get_conda_env(config["conda_env_yaml"],"bcftools"), - shell: - "source activate {params.conda};" - "bcftools view " - "--apply-filter .,PASS " - "{input.vcf} " - "| bcftools norm -Oz -m-any " - "| bcftools norm -Oz -f {params.ref_fa} " - "| bcftools annotate -Oz " - "-x 'ID' -I +'%CHROM:%POS:%REF:%ALT' " - "> {output} ; " - "tabix -p vcf -f {output}; " - - -var_type = "SV" - -rule bcftools_SV_update_variant_ID: - input: - vcf_dir + var_type + "/" + "{vcf}.vcf.gz" - output: - vcf_dir + var_type + "/" + "id.{vcf}.vcf.gz" - params: - ref_fa = config["reference"]["reference_genome"], - conda = get_conda_env(config["conda_env_yaml"],"bcftools"), - shell: - "source activate {params.conda};" - "bcftools view " - "--apply-filter .,PASS " - "{input} " - "| bcftools norm -Oz -m-any " - "| bcftools annotate -Oz " - "-x 'ID' -I +'%CHROM:%POS:%REF:%ALT' " - "> {output} ; " - "tabix -p vcf -f {output}; " - diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/collectqc_paired.rule b/BALSAMIC/snakemake_rules/obsolete_rules/collectqc_paired.rule deleted file mode 100644 index 5e592bc4b..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/collectqc_paired.rule +++ /dev/null @@ -1,47 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup -from BALSAMIC import __version__ as bv - -picarddup = get_picard_mrkdup(config) - -# Following rule will take input fastq files, align them using bwa mem, and convert the output to sam format -rule collectqc: - input: - contest = expand(bam_dir + "{ct}", ct=["normal_tumor.contest", "tumor_normal.contest"]), - fastqc = expand(fastqc_dir + "{mysample}_{read_num}_fastqc.zip", mysample=config["samples"], read_num=[1, 2]), -# fastqc_ca = expand(fastqc_dir + "{mysample}_{read_num}.ca_fastqc.zip", mysample=config["samples"], read_num=[1, 2]), - insertsize = expand(bam_dir + "{sample}.sorted.insertsizemetric", sample=config["samples"]), - alignmetric = expand(bam_dir + "{sample}.sorted.alignmetric", sample=config["samples"]), - hsmetric = expand(bam_dir + "{sample}.sorted.hsmetric", sample=config["samples"]), - sambamba = expand(bam_dir + "{sample}.sorted." + picarddup + ".cov.bed", sample=config["samples"]), - sambambaexon = expand(bam_dir + "{sample}.sorted." + picarddup + ".exon.cov.bed", sample=config["samples"]), - output: - result_dir + "qc/" + "qc_report.pdf" - params: - # This is using same environment as gatk - resultdir = result_dir, - conda = get_conda_env(config["conda_env_yaml"],"gatk"), - singularity: singularity_image - shell: - "source activate {params.conda};" - "touch {output}; " - "unset FILELIST; " - "FILELIST=`find {params.resultdir}bam -type f`; " - "for i in $FILELIST; " - "do " - " [ `file -b --mime-encoding $i` != \"binary\" ]" - " && grep -qs \"METRICS CLASS\" $i " - " && m=`grep \"METRICS CLASS\" $i | rev | cut -d\".\" -f 1 | rev` " - " && f=`echo $i | rev | cut -d\"/\" -f 1 | rev` " - " && echo $m > {params.resultdir}qc/${{f}}.csv " - " && sed -n '/## METRICS CLASS/,/^\s*$/{{/METRICS/!p}}' $i " - " | awk -v OFS=\"\\t\" -v FS=\"\\t\" ' {{ for (i=1; i<=NF; i++) {{ a[NR,i] = $i }} }} " - "NF>p {{ p = NF }} END {{ for(j=1; j<=p; j++) {{ str=a[1,j]; for(i=2;" - "i<=NR; i++) {{ str=str\"\\t\"a[i,j]; }}; print str }} }}' " - "| awk 'NR==1 {{ v=NF }} NF==v {{ print }}' >> {params.resultdir}qc/${{f}}.csv ; " - "done; " - diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/collectqc_single.rule b/BALSAMIC/snakemake_rules/obsolete_rules/collectqc_single.rule deleted file mode 100644 index f61a28046..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/collectqc_single.rule +++ /dev/null @@ -1,45 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup -from BALSAMIC import __version__ as bv - -picarddup = get_picard_mrkdup(config) - -# Following rule will take input fastq files, align them using bwa mem, and convert the output to sam format -rule collectqc: - input: - fastqc = expand(fastqc_dir + "{mysample}_{read_num}_fastqc.zip", mysample=config["samples"], read_num=[1, 2]), -# fastqc_ca = expand(fastqc_dir + "{mysample}_{read_num}.ca_fastqc.zip", mysample=config["samples"], read_num=[1, 2]), - hsmetric = expand(bam_dir + "{sample}.sorted.hsmetric", sample=config["samples"]), - insertsize = expand(bam_dir + "{sample}.sorted.insertsizemetric", sample=config["samples"]), - alignmetric = expand(bam_dir + "{sample}.sorted.alignmetric", sample=config["samples"]), - sambamba = expand(bam_dir + "{sample}.sorted." + picarddup + ".cov.bed", sample=config["samples"]), - sambambaexon = expand(bam_dir + "{sample}.sorted." + picarddup + ".exon.cov.bed", sample=config["samples"]), - output: - result_dir + "qc/" + "qc_report.pdf" - params: - # This is using same environment as gatk - resultdir = result_dir, - conda = get_conda_env(config["conda_env_yaml"],"gatk"), - singularity: singularity_image - shell: - "source activate {params.conda};" - "touch {output}; " - "unset FILELIST; " - "FILELIST=`find {params.resultdir}bam -type f`; " - "for i in $FILELIST; " - "do " - " [ `file -b --mime-encoding $i` != \"binary\" ]" - " && grep -qs \"METRICS CLASS\" $i " - " && m=`grep \"METRICS CLASS\" $i | rev | cut -d\".\" -f 1 | rev` " - " && f=`echo $i | rev | cut -d\"/\" -f 1 | rev` " - " && echo $m > {params.resultdir}qc/${{f}}.csv " - " && sed -n '/## METRICS CLASS/,/^\s*$/{{/METRICS/!p}}' $i " - " | awk -v OFS=\"\\t\" -v FS=\"\\t\" ' {{ for (i=1; i<=NF; i++) {{ a[NR,i] = $i }} }} " - "NF>p {{ p = NF }} END {{ for(j=1; j<=p; j++) {{ str=a[1,j]; for(i=2;" - "i<=NR; i++) {{ str=str\"\\t\"a[i,j]; }}; print str }} }}' " - "| awk 'NR==1 {{ v=NF }} NF==v {{ print }}' >> {params.resultdir}qc/${{f}}.csv ; " - "done; source deactivate;" diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/freebayes.rule b/BALSAMIC/snakemake_rules/obsolete_rules/freebayes.rule deleted file mode 100644 index 99f91aac3..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/freebayes.rule +++ /dev/null @@ -1,44 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup - - -var_caller = 'freebayes' -freebayes_dir = var_caller + "/" - -chromlist = config["panel"]["chrom"] -picarddup = get_picard_mrkdup(config) -capture_kit = os.path.split(config["panel"]["capture_kit"])[1] - -rule freebayes_germline: - input: - fa = config["reference"]["reference_genome"], - bam = bam_dir + "{sample}.sorted." + picarddup + ".bam", - bed = vcf_dir + "split_bed/" + "{bedchrom}" + "." + capture_kit, - output: - vcf_dir + freebayes_dir + "split_vcf/" + "{sample}.{bedchrom}_" + config["vcf"][var_caller]["default"] - params: - conda = get_conda_env(config["conda_env_yaml"],"freebayes"), - threads: 4 - shell: - "source activate {params.conda}; " - "freebayes --bam {input.bam} --fasta-reference {input.fa} --targets {input.bed} " - "--use-duplicate-reads --min-mapping-quality 30 --min-base-quality 20 --min-alternate-fraction 0.01 " - "--genotype-qualities --use-mapping-quality | bgzip > {output}; " - - -rule freebayes_merge: - input: - expand(vcf_dir + freebayes_dir + "split_vcf/{{sample}}.{chrom}_" + config["vcf"][var_caller]["default"], chrom=chromlist) - output: - vcf_dir + config["vcf"][var_caller]["type"] + "." + config["vcf"][var_caller]["mutation"] + "." + "{sample}" + "." + var_caller + ".vcf.gz" - params: - conda = get_conda_env(config["conda_env_yaml"], "freebayes"), - shell: - "source activate {params.conda}; " - "bcftools concat {input} | bcftools sort - | bgzip > {output}; " - "tabix -f -p vcf {output}; " - "source deactivate;" diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/mergetype_paired_umi.rule b/BALSAMIC/snakemake_rules/obsolete_rules/mergetype_paired_umi.rule deleted file mode 100644 index 22b17adeb..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/mergetype_paired_umi.rule +++ /dev/null @@ -1,57 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_sample_type -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup - -picarddup = get_picard_mrkdup(config) - -rule mergeBam_normal_gatk: - input: - bamN = expand(bam_dir + "{mysample}.sorted." + picarddup + ".ralgn.bsrcl.bam", mysample=get_sample_type(config["samples"], "normal")), - output: - bamN = bam_dir + "normal.sorted." + picarddup + ".ralgn.bsrcl.merged.bam", - params: - conda = get_conda_env(config["conda_env_yaml"],"samtools") - shell: - "source activate {params.conda}; " - "samtools merge {output.bamN} {input.bamN}; samtools index {output.bamN}; " - - -rule mergeBam_normal: - input: - bamN = expand(bam_dir + "{mysample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.fltr.clip.bam", mysample=get_sample_type(config["samples"], "normal")) - output: - bamN = bam_dir + "normal.merged.bam", - params: - conda = get_conda_env(config["conda_env_yaml"],"samtools") - shell: - "source activate {params.conda}; " - "samtools merge {output.bamN} {input.bamN}; samtools index {output.bamN}; " - - -rule mergeBam_tumor_gatk: - input: - bamT = expand(bam_dir + "{mysample}.sorted." + picarddup + ".ralgn.bsrcl.bam", mysample=get_sample_type(config["samples"], "tumor")) - output: - bamT = bam_dir + "tumor.sorted." + picarddup + ".ralgn.bsrcl.merged.bam", - params: - conda = get_conda_env(config["conda_env_yaml"],"samtools") - shell: - "source activate {params.conda}; " - "samtools merge {output.bamT} {input.bamT}; samtools index {output.bamT}; " - - -rule mergeBam_tumor: - input: - bamT = expand(bam_dir + "{mysample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.fltr.clip.bam", mysample=get_sample_type(config["samples"], "tumor")) - output: - bamT = bam_dir + "tumor.merged.bam", - params: - conda = get_conda_env(config["conda_env_yaml"],"samtools") - shell: - "source activate {params.conda}; " - "samtools merge {output.bamT} {input.bamT}; samtools index {output.bamT}; " - diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/mergetype_single_umi.rule b/BALSAMIC/snakemake_rules/obsolete_rules/mergetype_single_umi.rule deleted file mode 100644 index 9c9ecab75..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/mergetype_single_umi.rule +++ /dev/null @@ -1,34 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_sample_type -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup - -picarddup = get_picard_mrkdup(config) - - -rule mergeBam_tumor_gatk: - input: - bamT = expand(bam_dir + "{mysample}.sorted." + picarddup + ".ralgn.bsrcl.bam", mysample=get_sample_type(config["samples"], "tumor")) - output: - bamT = bam_dir + "tumor.sorted." + picarddup + ".ralgn.bsrcl.merged.bam", - params: - conda = get_conda_env(config["conda_env_yaml"],"samtools") - shell: - "source activate {params.conda}; " - "samtools merge {output.bamT} {input.bamT}; samtools index {output.bamT}; " - - -rule mergeBam_tumor: - input: - bamT = expand(bam_dir + "{mysample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.fltr.clip.bam", mysample=get_sample_type(config["samples"], "tumor")) - output: - bamT = bam_dir + "tumor.merged.bam", - params: - conda = get_conda_env(config["conda_env_yaml"],"samtools") - shell: - "source activate {params.conda}; " - "samtools merge {output.bamT} {input.bamT}; samtools index {output.bamT}; " - diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/pindel_single.rule b/BALSAMIC/snakemake_rules/obsolete_rules/pindel_single.rule deleted file mode 100644 index 22e67cc39..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/pindel_single.rule +++ /dev/null @@ -1,31 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env - - -var_caller = "pindel" -pindel_dir = vcf_dir + var_caller + "/" - -rule pindel_somatic_single: - input: - fa = config["reference"]["reference_genome"], - bamT = bam_dir + "tumor.merged.bam", - output: - final = vcf_dir + config["vcf"][var_caller]["type"] + "." + config["vcf"][var_caller]["mutation"] + "." + config["analysis"]["case_id"] + "." + var_caller + ".vcf.gz", - params: - case_id = config["analysis"]["case_id"], - config = pindel_dir + config["analysis"]["case_id"] + ".config", - tmpdir = pindel_dir, - conda = get_conda_env(config["conda_env_yaml"],"pindel") - threads: 4 - shell: - "source activate {params.conda};" - "paste -d\"\\t\" " - "<( echo {input.bamT} ) " - "<( samtools stats {input.bamT} | grep ^SN | cut -f 2- | grep \"insert size average\" | awk \'{{print $NF}}\' ) " - "<( echo {params.case_id} ) > {params.config}; " - "pindel -T {threads} -i {params.config} -f {input.fa} -o {params.tmpdir} && touch {output.final};" - diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/sentieon.rule b/BALSAMIC/snakemake_rules/obsolete_rules/sentieon.rule deleted file mode 100644 index 7fd788d30..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/sentieon.rule +++ /dev/null @@ -1,163 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env, get_sample_type - -# Following rule will take input fastq files, align them using bwa mem, and convert the output to sam format -rule sentieon_align_sort: - input: - ref = config["reference"]["reference_genome"], - read1 = fastq_dir + "{sample}_1.fp.fastq.gz", - read2 = fastq_dir + "{sample}_2.fp.fastq.gz", - refidx = expand(config["reference"]["reference_genome"] + ".{prefix}", prefix=["amb","ann","bwt","pac","sa"]) - output: - bamout = bam_dir + "{sample}.bam" - params: - header = "'@RG\\tID:" + "{sample}" + "\\tSM:" + "{sample}" + "\\tPL:ILLUMINAi'", - sentieon_exec = SENTIEON_INSTALL_DIR + "/bin/sentieon", - sentieon_lic = SENTIEON_LICENSE, - threads: 16 - log: - bam_dir + "{sample}.bam.log" - shell: - """ -export SENTIEON_LICENSE={params.sentieon_lic}; -{params.sentieon_exec} bwa mem -M -R {params.header} -t {threads} \ --K 50000000 {input.ref} {input.read1} {input.read2} \ -| {params.sentieon_exec} util sort -o {output.bamout} -t {threads} \ ---block_size 3G --sam2bam -i - - """ - -rule sentieon_dedup: - input: - bam = bam_dir + "{sample}.bam", - output: - bam = bam_dir + "{sample}.dedup.bam", - score = bam_dir + "{sample}.dedup.score", - metrics = bam_dir + "{sample}.dedup.metrics" - params: - sentieon_exec = SENTIEON_INSTALL_DIR + "/bin/sentieon", - sentieon_lic = SENTIEON_LICENSE, - threads: 16 - log: - bam_dir + "{sample}.dedup.bam.log" - shell: - """ -export SENTIEON_LICENSE={params.sentieon_lic}; -{params.sentieon_exec} driver -t {threads} -i {input.bam} --algo LocusCollector --fun score_info {output.score}; -{params.sentieon_exec} driver -t {threads} -i {input.bam} --algo Dedup --rmdup --score_info {output.score} --metrics {output.metrics} {output.bam}; - """ - -rule sentieon_realign: - input: - ref = config["reference"]["reference_genome"], - mills = config["reference"]["mills_1kg"], - highconf = config["reference"]["1kg_snps_high"], - bam = bam_dir + "{sample}.dedup.bam", - output: - bam = bam_dir + "{sample}.dedup.realign.bam", - params: - sentieon_exec = SENTIEON_INSTALL_DIR + "/bin/sentieon", - sentieon_lic = SENTIEON_LICENSE, - threads: 16 - log: - bam_dir + "{sample}.dedup.realign.bam.log" - shell: - """ -export SENTIEON_LICENSE={params.sentieon_lic}; -{params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bam} --algo Realigner -k {input.mills} -k {input.highconf} {output} - """ - -rule sentieon_base_calibration: - input: - ref = config["reference"]["reference_genome"], - mills = config["reference"]["mills_1kg"], - highconf = config["reference"]["1kg_snps_high"], - dbsnp = config["reference"]["dbsnp"], - bam = bam_dir + "{sample}.dedup.realign.bam", - output: - recal_data_table = bam_dir + "{sample}.dedup.realign.recal_data.table", - qual_recal = bam_dir + "{sample}.dedup.realign.recal.csv", - qual_recal_plot = bam_dir + "{sample}.dedup.realign.recal.pdf", - params: - sentieon_exec = SENTIEON_INSTALL_DIR + "/bin/sentieon", - sentieon_lic = SENTIEON_LICENSE, - threads: 16 - log: - bam_dir + "{sample}.dedup.realign.recal.log" - shell: - """ -export SENTIEON_LICENSE={params.sentieon_lic}; -{params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bam} --algo QualCal -k {input.dbsnp} -k {input.mills} -k {input.highconf} {output.recal_data_table}; -{params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bam} -q {output.recal_data_table} --algo QualCal -k {input.dbsnp} -k {input.mills} -k {input.highconf} {output.recal_data_table}.post -{params.sentieon_exec} driver -t {threads} --algo QualCal --plot --before {output.recal_data_table} --after {output.recal_data_table}.post {output.qual_recal} -{params.sentieon_exec} plot QualCal -o {output.qual_recal_plot} {output.qual_recal} - """ - -rule sentieon_corealign: - input: - ref = config["reference"]["reference_genome"], - bamT = expand(bam_dir + "{mysample}.dedup.realign.bam", mysample=get_sample_type(config["samples"], "tumor")), - bamN = expand(bam_dir + "{mysample}.dedup.realign.bam", mysample=get_sample_type(config["samples"], "normal")), - recalT = expand(bam_dir + "{mysample}.dedup.realign.recal_data.table", mysample=get_sample_type(config["samples"], "tumor")), - recalN = expand(bam_dir + "{mysample}.dedup.realign.recal_data.table", mysample=get_sample_type(config["samples"], "normal")), - mills = config["reference"]["mills_1kg"], - highconf = config["reference"]["1kg_snps_high"], - output: - bam = bam_dir + config["analysis"]["case_id"] + ".corealign.bam" - params: - sentieon_exec = SENTIEON_INSTALL_DIR + "/bin/sentieon", - sentieon_lic = SENTIEON_LICENSE, - threads: 16 - log: - bam_dir + config["analysis"]["case_id"] + ".corealign.log" - shell: - """ -export SENTIEON_LICENSE={params.sentieon_lic}; -{params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bamT} -i {input.bamN} -q {input.recalT} -q {input.recalN} --algo Realigner -k {input.mills} -k {input.highconf} {output.bam} -""" - -rule sentieon_TNsnv: - input: - bam = bam_dir + config["analysis"]["case_id"] + ".corealign.bam", - ref = config["reference"]["reference_genome"], - dbsnp = config["reference"]["dbsnp"], - output: - vcf = vcf_dir + config["analysis"]["case_id"] + ".tnsnv.vcf.gz", - stats = vcf_dir + config["analysis"]["case_id"] + ".tnsnv.call_stats", - params: - tumor = get_sample_type(config["samples"], "tumor"), - normal = get_sample_type(config["samples"], "normal"), - sentieon_exec = SENTIEON_INSTALL_DIR + "/bin/sentieon", - sentieon_lic = SENTIEON_LICENSE, - threads: 16 - log: - bam_dir + config["analysis"]["case_id"] + ".tnsnv.log" - shell: - """ -export SENTIEON_LICENSE={params.sentieon_lic}; -{params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bam} --algo TNsnv --tumor_sample {params.tumor} --normal_sample {params.normal} --dbsnp {input.dbsnp} --call_stats_out {output.stats} {output.vcf} - """ - -rule sentieon_TNhaplotyper: - input: - bam = bam_dir + config["analysis"]["case_id"] + ".corealign.bam", - ref = config["reference"]["reference_genome"], - dbsnp = config["reference"]["dbsnp"], - output: - vcf = vcf_dir + config["analysis"]["case_id"] + ".tnhaplotyper.vcf.gz", - params: - tumor = get_sample_type(config["samples"], "tumor"), - normal = get_sample_type(config["samples"], "normal"), - sentieon_exec = SENTIEON_INSTALL_DIR + "/bin/sentieon", - sentieon_lic = SENTIEON_LICENSE, - threads: 16 - log: - bam_dir + config["analysis"]["case_id"] + ".tnsnv.log" - shell: - """ -export SENTIEON_LICENSE={params.sentieon_lic}; -{params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bam} --algo TNhaplotyper --tumor_sample {params.tumor} --normal_sample {params.normal} --dbsnp {input.dbsnp} {output.vcf} - """ diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/umi/bwa_mem_umi.rule b/BALSAMIC/snakemake_rules/obsolete_rules/umi/bwa_mem_umi.rule deleted file mode 100644 index 305026c51..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/umi/bwa_mem_umi.rule +++ /dev/null @@ -1,41 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env - - -rule bwa_mem_umi_mrkadp: - input: - fa = config["path"]["genomefa"] + config["references"]["genomefa"], - refidx = expand(config["path"]["genomefa"] + config["references"]["genomefa"] + ".{prefix}", prefix=["amb","ann","bwt","pac","sa"]), - fastq = "{sample}.unalg.umi.mrkadp.fastq" - output: - "{sample}.unalg.umi.mrkadp.bwa.sam" - params: - header_1 = "'@RG\\tID:" + "{sample}" + "\\tSM:" + "{sample}" + "\\tPL:ILLUMINAi'", - conda = get_conda_env(config["conda_env_yaml"],"bwa") - threads: 4 - shell: - """ - source activate {params.conda}; - bwa mem -K 1000000 -p -t 8 {input.fa} {input.fastq} > {output} - """ - -rule bwa_mem_umi_cnsalg: - input: - fa = config["path"]["genomefa"] + config["references"]["genomefa"], - refidx = expand(config["path"]["genomefa"] + config["references"]["genomefa"] + ".{prefix}", prefix=["amb","ann","bwt","pac","sa"]), - fastq = "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.fastq" - output: - "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.sam" - params: - header_1 = "'@RG\\tID:" + "{sample}" + "\\tSM:" + "{sample}" + "\\tPL:ILLUMINAi'", - conda = get_conda_env(config["conda_env_yaml"],"bwa") - threads: 4 - shell: - """ - source activate {params.conda}; - bwa mem -K 1000000 -p -t 8 {input.fa} {input.fastq} > {output} - """ diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/umi/fgbio.rule b/BALSAMIC/snakemake_rules/obsolete_rules/umi/fgbio.rule deleted file mode 100644 index 594560a32..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/umi/fgbio.rule +++ /dev/null @@ -1,132 +0,0 @@ -#!python -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup - - -rule fgbio_ExtractUmisFromBam: - input: - bam_dir + "{sample}.unalg.bam" - output: - bam_dir + "{sample}.unalg.umi.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -ExtractUmisFromBam \ ---input={input} \ ---output={output} \ ---read-structure=3M2S146T 3M2S146T \ ---molecular-index-tags=ZA ZB \ ---single-tag=RX - """ - -rule fgbio_GroupReadsByUmi: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -GroupReadsByUmi \ ---strategy=paired \ ---input={input} \ ---output={output} \ ---raw-tag=RX \ ---assign-tag=MI \ ---min-map-q=10 \ ---edits=1 - """ - -rule fgbio_CallDuplexConsensusReads: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -CallDuplexConsensusReads \ ---input={input} \ ---output={output} \ ---min-reads=1 \ ---error-rate-pre-umi=45 \ ---error-rate-post-umi=30 \ ---min-input-base-quality=30 - """ - -rule fgbio_FilterConsensusReads: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.fltr.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - ref = config["path"]["genomefa"] + config["references"]["genomefa"] - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -FilterConsensusReads \ --i {input} \ --o {output} \ ---ref {params.ref} \ ---min-reads 3 1 1 \ ---max-read-error-rate 0.05 \ ---max-base-error-rate 0.1 \ ---min-base-quality 50 \ ---max-no-call-fraction 0.05 - """ - -rule fgio_ClipBam: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.fltr.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.fltr.clip.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - ref = config["path"]["genomefa"] + config["references"]["genomefa"], - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -ClipBam \ ---input={input} \ ---output={output} \ ---ref={params.ref} \ ---soft-clip=false \ ---clip-overlapping-reads=true - """ - -rule fgbio_CollectDuplexSeqMetrics: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.bam.duplex_qc.pdf" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -CollectDuplexSeqMetrics \ ---input={input} \ ---output={input} \ ---description={wildcards.sample} - """ diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/umi/fgbio_v2.rule b/BALSAMIC/snakemake_rules/obsolete_rules/umi/fgbio_v2.rule deleted file mode 100644 index 460565f72..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/umi/fgbio_v2.rule +++ /dev/null @@ -1,267 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup - -picarddup = get_picard_mrkdup(config) - -rule S_1_convert_Fastq_to_unaligned_bam: - input: - fq1 = config["analysis"]["fastq_path"] + "{sample}_1.fastq.gz", - fq2 = config["analysis"]["fastq_path"] + "{sample}_2.fastq.gz", - output: - bam_dir + "{sample}.unalg.bam" - params: - tmpdir = bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard") - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar FastqToSam \ -O={output} \ -F1={input.fq1} \ -F2={input.fq2} \ -SM={wildcards.sample} \ -LB=Library1 \ -PU=NovaSeq \ -PL=illumina - """ - -rule S_2_Extract_inline_umi: - input: - bam_dir + "{sample}.unalg.bam" - output: - bam_dir + "{sample}.unalg.umi.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -ExtractUmisFromBam \ ---input={input} \ ---output={output} \ ---read-structure=3M2S146T 3M2S146T \ ---molecular-index-tags=ZA ZB \ ---single-tag=RX - """ - - -rule S_3_mark_illumina_adapter: - input: - bam_dir + "{sample}.unalg.umi.bam" - output: - bam = bam_dir + "{sample}.unalg.umi.mrkadp.bam", - txt = bam_dir + "{sample}.unalg.umi.mrkadp.txt" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard") - shell: - """ -source activate {params.conda}; -#java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar MarkIlluminaAdapters \ -#I={input} \ -#O={output.bam} \ -#M={output.txt} -ln -s {input} {output.bam}; -touch {output.txt} - """ - -# CLIPPING_ATTRIBUTE=XT \ -# CLIPPING_ACTION=X \ -# CLIPPING_MIN_LENGTH=36 \ -# INCLUDE_NON_PF_READS=true \ - -rule S_4_align_the_reads: - input: - fa = config["path"]["genomefa"] + config["references"]["genomefa"], - refidx = expand(config["path"]["genomefa"] + config["references"]["genomefa"] + ".{prefix}", prefix=["amb","ann","bwt","pac","sa"]), - unaligned = bam_dir + "{sample}.unalg.umi.bam", - mrkadp = bam_dir + "{sample}.unalg.umi.mrkadp.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.bam" - params: - tmpdir=bam_dir, - ref = config["path"]["genomefa"] + config["references"]["genomefa"], - conda = get_conda_env(config["conda_env_yaml"],"picard") - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar SamToFastq \ - I={input.unaligned} \ - F=/dev/stdout \ - INTERLEAVE=true \ -| bwa mem -K 1000000 -p -t 8 {input.fa} /dev/stdin \ -| java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar MergeBamAlignment \ -UNMAPPED={input.unaligned} \ -ALIGNED=/dev/stdin \ -O={output} \ -R={params.ref} \ -CLIP_ADAPTERS=false \ -VALIDATION_STRINGENCY=SILENT \ -EXPECTED_ORIENTATIONS=FR \ -MAX_GAPS=-1 \ -SO=coordinate \ -ALIGNER_PROPER_PAIR_FLAGS=false \ -CREATE_INDEX=true - """ - -rule S_5_1_groupreadsbyumi: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.bam" - output: - hist = bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.hist", - bam = bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -GroupReadsByUmi \ ---strategy=paired \ ---input={input} \ ---output={output.bam} \ ---raw-tag=RX \ ---assign-tag=MI \ ---family-size-histogram={output.hist} \ ---min-map-q=10 \ ---edits=1 - """ - -rule S_5_2_callduplexconsensusreads: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -CallDuplexConsensusReads \ ---input={input} \ ---output={output} \ ---min-reads=1 \ ---error-rate-pre-umi=45 \ ---error-rate-post-umi=30 \ ---min-input-base-quality=30 - """ - -rule S_6_map_duplex_consensus_reads: - input: - fa = config["path"]["genomefa"] + config["references"]["genomefa"], - refidx = expand(config["path"]["genomefa"] + config["references"]["genomefa"] + ".{prefix}", prefix=["amb","ann","bwt","pac","sa"]), - unaligned = bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bam", - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - ref = config["path"]["genomefa"] + config["references"]["genomefa"] - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar SamToFastq \ -VALIDATION_STRINGENCY=SILENT \ -INPUT={input.unaligned} \ -F=/dev/stdout \ -INTERLEAVE=true \ -INCLUDE_NON_PF_READS=true \ -CREATE_INDEX=true \ -| bwa mem -K 1000000 -p -t 8 {input.fa} /dev/stdin \ -| java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar MergeBamAlignment \ -VALIDATION_STRINGENCY=SILENT \ -UNMAPPED={input.unaligned} \ -ALIGNED=/dev/stdin \ -OUTPUT={output} \ -REFERENCE_SEQUENCE={params.ref} \ -CLIP_ADAPTERS=false \ -ORIENTATIONS=FR \ -MAX_GAPS=-1 \ -SORT_ORDER=coordinate \ -ALIGNER_PROPER_PAIR_FLAGS=false \ -ATTRIBUTES_TO_RETAIN=X0 \ -ATTRIBUTES_TO_RETAIN=ZS \ -ATTRIBUTES_TO_RETAIN=ZI \ -ATTRIBUTES_TO_RETAIN=ZM \ -ATTRIBUTES_TO_RETAIN=ZC \ -ATTRIBUTES_TO_RETAIN=ZN \ -ATTRIBUTES_TO_REVERSE=ad \ -ATTRIBUTES_TO_REVERSE=bd \ -ATTRIBUTES_TO_REVERSE=cd \ -ATTRIBUTES_TO_REVERSE=ae \ -ATTRIBUTES_TO_REVERSE=be \ -ATTRIBUTES_TO_REVERSE=ce \ -CREATE_INDEX=true - """ - -rule S_7_filter_mapped_duplex_consensus_reads: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.fltr.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - ref = config["path"]["genomefa"] + config["references"]["genomefa"] - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -FilterConsensusReads \ --i {input} \ --o {output} \ ---ref {params.ref} \ ---min-reads 3 1 1 \ ---max-read-error-rate 0.05 \ ---max-base-error-rate 0.1 \ ---min-base-quality 50 \ ---max-no-call-fraction 0.05 - """ - -rule S_8_clip_overlapping_reads: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.fltr.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.fltr.clip.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - ref = config["path"]["genomefa"] + config["references"]["genomefa"], - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -ClipBam \ ---input={input} \ ---output={output} \ ---ref={params.ref} \ ---soft-clip=false \ ---clip-overlapping-reads=true - """ - -rule S_9_get_duplex_seq_metrics: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.bam.duplex_qc.pdf" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/fgbio/fgbio.jar \ -CollectDuplexSeqMetrics \ ---duplex-umi-counts=true \ ---input={input} \ ---output={input} \ ---description={wildcards.sample} - """ diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/umi/picard.rule b/BALSAMIC/snakemake_rules/obsolete_rules/umi/picard.rule deleted file mode 100644 index 306bb315e..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/umi/picard.rule +++ /dev/null @@ -1,161 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup - -picarddup = get_picard_mrkdup(config) - -def picard_flag(picarddup): - if picarddup == "mrkdup": - return "FALSE" - else: - return "TRUE" - -rule picard_FastqToSam_umi: - input: - fq1 = config["analysis"]["fastq_path"] + "{sample}_1.fastq.gz", - fq2 = config["analysis"]["fastq_path"] + "{sample}_2.fastq.gz", - output: - bam_dir + "{sample}.unalg.bam" - params: - tmpdir = bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard") - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar FastqToSam \ -O={output} \ -F1={input.fq1} \ -F2={input.fq2} \ -SM={wildcards.sample} \ -LB=Library1 \ -PU=NovaSeq \ -PL=illumina - """ - -rule picard_MarkIlluminaAdaptersi_umi: - input: - bam_dir + "{sample}.unalg.umi.bam" - output: - bam = bam_dir + "{sample}.unalg.umi.mrkadp.bam", - txt = bam_dir + "{sample}.unalg.umi.mrkadp.txt" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard") - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar MarkIlluminaAdapters \ -I={input} \ -O={output.bam} \ -M={output.txt} - """ - -rule picard_SamToFastq_umi: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.fastq" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard") - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar SamToFastq \ - I={input} \ - CLIPPING_ATTRIBUTE=XT \ - CLIPPING_ACTION=X \ - CLIPPING_MIN_LENGTH=36 \ - INCLUDE_NON_PF_READS=true \ - F={output} \ - INTERLEAVE=true - """ - -rule picard_MergeBamAlignment_mrkadp_umi: - input: - aligned = bam_dir + "{sample}.unalg.umi.mrkadp.bwa.sam", - unaligned = bam_dir + "{sample}.unalg.umi.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - ref = config["path"]["genomefa"] + config["references"]["genomefa"] - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar MergeBamAlignment \ -UNMAPPED={input.unaligned} \ -ALIGNED={input.aligned} \ -O={output} \ -R={params.ref} \ -CLIP_ADAPTERS=false \ -VALIDATION_STRINGENCY=SILENT \ -EXPECTED_ORIENTATIONS=FR \ -MAX_GAPS=-1 \ -SO=coordinate \ -ALIGNER_PROPER_PAIR_FLAGS=false \ -CREATE_INDEX=true - """ - -rule picard_SamToFastq_cnsalg_umi: - input: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.fastq" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar SamToFastq \ -VALIDATION_STRINGENCY=SILENT \ -INPUT={input} \ -F={output} \ -INTERLEAVE=true \ -INCLUDE_NON_PF_READS=true \ -CREATE_INDEX=true - """ - -rule picard_MergeBamAlignment_cnsalg_umi: - input: - aligned = bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.sam", - unaligned = bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bam" - output: - bam_dir + "{sample}.unalg.umi.mrkadp.bwa.map.umi.cnsunalg.bwa.map.bam" - params: - tmpdir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - ref = config["path"]["genomefa"] + config["references"]["genomefa"] - shell: - """ -source activate {params.conda}; -java -Djava.io.tmpdir={params.tmpdir} -jar -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar MergeBamAlignment \ -VALIDATION_STRINGENCY=SILENT \ -UNMAPPED={input.unaligned} \ -ALIGNED={input.aligned} \ -OUTPUT={output} \ -REFERENCE_SEQUENCE={params.ref} \ -CLIP_ADAPTERS=false \ -ORIENTATIONS=FR \ -MAX_GAPS=-1 \ -SORT_ORDER=coordinate \ -ALIGNER_PROPER_PAIR_FLAGS=false \ -ATTRIBUTES_TO_RETAIN=X0 \ -ATTRIBUTES_TO_RETAIN=ZS \ -ATTRIBUTES_TO_RETAIN=ZI \ -ATTRIBUTES_TO_RETAIN=ZM \ -ATTRIBUTES_TO_RETAIN=ZC \ -ATTRIBUTES_TO_RETAIN=ZN \ -ATTRIBUTES_TO_REVERSE=ad \ -ATTRIBUTES_TO_REVERSE=bd \ -ATTRIBUTES_TO_REVERSE=cd \ -ATTRIBUTES_TO_REVERSE=ae \ -ATTRIBUTES_TO_REVERSE=be \ -ATTRIBUTES_TO_REVERSE=ce \ -CREATE_INDEX=true - """ diff --git a/BALSAMIC/snakemake_rules/obsolete_rules/umi/vardict_single_umi.rule b/BALSAMIC/snakemake_rules/obsolete_rules/umi/vardict_single_umi.rule deleted file mode 100644 index f16302d40..000000000 --- a/BALSAMIC/snakemake_rules/obsolete_rules/umi/vardict_single_umi.rule +++ /dev/null @@ -1,50 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env, get_chrom - - -var_caller = "vardict" -vardict_dir = var_caller + "/" - -chromlist = get_chrom(config["path"]["panel"] + config["bed"]["capture_kit"]) - -rule vardict_single: - input: - fa = config["path"]["genomefa"] + config["references"]["genomefa"], - bamT = bam_dir + "tumor.merged.bam", - bed = vcf_dir + "split_bed/" + "{bedchrom}" + "." + config["bed"]["capture_kit"], - output: - vcf_dir + vardict_dir + "split_vcf/" + "{bedchrom}_" + config["vcf"][var_caller]["default"] - params: - af = "0.0005", - vardict = "-c 1 -S 2 -E 3 -g 4 -r 1 -F 0", - var2vcf = "-E", - name = config["analysis"]["case_id"], - conda = get_conda_env(config["conda_env_yaml"],"vardict"), - shell: - "source activate {params.conda}; " - "vardict -G {input.fa} -f {params.af} -N {params.name} " - "-b {input.bamT} " - "{params.vardict} {input.bed} " - "| teststrandbias.R " - "| var2vcf_valid.pl {params.var2vcf} " - "-f {params.af} -N {params.name} " - "| bgzip > {output}; " - "tabix -p vcf {output}; " - "source deactivate;" - -rule vardict_merge: - input: - expand(vcf_dir + vardict_dir + "split_vcf/{chrom}_" + config["vcf"][var_caller]["default"], chrom=chromlist) - output: - vcf_dir + config["vcf"][var_caller]["type"] + "." + config["vcf"][var_caller]["mutation"] + "." + config["analysis"]["case_id"] + "." + var_caller + ".vcf.gz" - params: - conda = get_conda_env(config["conda_env_yaml"],"vardict"), - shell: - "source activate {params.conda} ; " - "bcftools concat {input} | bcftools sort - | bgzip > {output}; " - "tabix -f -p vcf {output}; " - "source deactivate;" diff --git a/BALSAMIC/snakemake_rules/quality_control/GATK.rule b/BALSAMIC/snakemake_rules/quality_control/GATK.rule index 0a6e7f991..eae6f8343 100644 --- a/BALSAMIC/snakemake_rules/quality_control/GATK.rule +++ b/BALSAMIC/snakemake_rules/quality_control/GATK.rule @@ -1,13 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup -from BALSAMIC import __version__ as bv - -picarddup = get_picard_mrkdup(config) - rule RealignerTargetCreator: input: bam = bam_dir + "{sample}" + ".sorted." + picarddup + ".bam", @@ -17,8 +10,8 @@ rule RealignerTargetCreator: bam_dir + "{sample}.sorted." + picarddup + ".intervals", params: tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"gatk"), - singularity: singularity_image + conda = config["bioinfo_tools"].get("gatk"), + singularity: Path(singularity_image, config["bioinfo_tools"].get("gatk") + ".sif").as_posix() benchmark: benchmark_dir + "RealignerTargetCreator_" + "{sample}.realign_targetcreator.tsv" shell: @@ -44,8 +37,8 @@ rule BaseRecalibrator: bam_dir + "{sample}.sorted." + picarddup + ".bsrcl.bam", params: tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"gatk"), - singularity: singularity_image + conda = config["bioinfo_tools"].get("gatk"), + singularity: Path(singularity_image, config["bioinfo_tools"].get("gatk") + ".sif").as_posix() benchmark: benchmark_dir + "BaseRecalibrator_" + "{sample}.base_recalibrator.tsv" shell: @@ -75,15 +68,15 @@ rule PreparePopVCF: output: popvcf = result_dir + "popvcf.vcf" params: - conda = get_conda_env(config["conda_env_yaml"],"bcftools"), + conda = config["bioinfo_tools"].get("bcftools"), anno_str1 = "FORMAT/GT,FORMAT/GL,FORMAT/DS,^INFO/AC,^INFO/AF,^INFO/AN,^INFO/", popcode = "EUR" - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() benchmark: benchmark_dir + "PreparePopVCF_" + "tumor_prepare_pop_vcf.tsv" shell: "source activate {params.conda}; " - "readlink -e {input.bam}; " + "readlink -f {input.bam}; " "bcftools annotate " "-x {params.anno_str1}{params.popcode}_AF " "{input.ref1kg} " diff --git a/BALSAMIC/snakemake_rules/quality_control/contest.rule b/BALSAMIC/snakemake_rules/quality_control/contest.rule index a2195fa06..195b8dcca 100644 --- a/BALSAMIC/snakemake_rules/quality_control/contest.rule +++ b/BALSAMIC/snakemake_rules/quality_control/contest.rule @@ -1,12 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC import __version__ as bv - - rule GATK_contest: input: bamN = bam_dir + "normal.merged.bam", @@ -17,11 +11,11 @@ rule GATK_contest: N_vs_T = bam_dir + "normal_tumor.contest", T_vs_N = bam_dir + "tumor_normal.contest", params: - conda = get_conda_env(config["conda_env_yaml"],"gatk"), + conda = config["bioinfo_tools"].get("gatk"), tmpdir = tmp_dir, min_genotype_ratio="0.95", popcode = "EUR" - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("gatk") + ".sif").as_posix() benchmark: benchmark_dir + 'GATK_contest_' + config["analysis"]["case_id"] + ".markduplicates.tsv" shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/fastp.rule b/BALSAMIC/snakemake_rules/quality_control/fastp.rule index 920101e7c..b1c41d57d 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastp.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastp.rule @@ -1,9 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_threads - if 'quality_trim' in config['QC'].keys(): fastp_param_qc = list() fastp_param_adapter = list() @@ -28,52 +25,79 @@ if 'quality_trim' in config['QC'].keys(): "--umi_len", config['QC']['umi_trim_length'], "--umi_prefix","UMI"]) -# Double pass to hard trim adapter and UMIs -rule fastp: +rule fastp_umi: input: read1=config["analysis"]["fastq_path"] + "{sample}" + "_1.fastq.gz", read2=config["analysis"]["fastq_path"] + "{sample}" + "_2.fastq.gz", output: - read1 = fastq_dir + "{sample}_1.fp.fastq.gz", - read2 = fastq_dir + "{sample}_2.fp.fastq.gz", - json = qc_dir + "fastp/{sample}_fastp.json", - html = qc_dir + "fastp/{sample}_fastp.html", + read1 = fastq_dir + "{sample}_1.umi_optimized.fastq.gz", + read2 = fastq_dir + "{sample}_2.umi_optimized.fastq.gz", + json = qc_dir + "fastp/{sample}_fastp_umi.json", + html = qc_dir + "fastp/{sample}_fastp_umi.html", + benchmark: + benchmark_dir + "fastp_umi" + "{sample}_fastp.tsv" + singularity: + Path(singularity_image, config["bioinfo_tools"].get("fastp") + ".sif").as_posix() params: - read1_interm = fastq_dir + "{sample}_1.interm.fastq.gz", - read2_interm = fastq_dir + "{sample}_2.interm.fastq.gz", - json_out_interm = fastq_dir + "{sample}_interm_fastp.json", - html_out_interm = fastq_dir + "{sample}_interm_fastp.html", - housekeeper_id = {"id": "{sample}", "tags": "quality-trimmed-fastq"}, tmpdir = tmp_dir, - fastq_dir = fastq_dir, qc = " ".join(fastp_param_qc), - umi = " ".join(fastp_param_umi), adapter = " ".join(fastp_param_adapter), - minimum_length = config["QC"]["min_seq_length"], - conda = get_conda_env(config["conda_env_yaml"],"fastp") - singularity: singularity_image + sample_name = "{sample}", + conda = config["bioinfo_tools"].get("fastp") threads: get_threads(cluster_config, 'fastp') - benchmark: - benchmark_dir + "fastp_" + "{sample}_fastp.tsv" + message: + "Quality control and trimming input fastq for {params.sample_name}" shell: """ source activate {params.conda}; export TMPDIR={params.tmpdir}; + fastp \ --thread {threads} \ --in1 {input.read1} \ --in2 {input.read2} \ ---out1 {params.read1_interm} \ ---out2 {params.read2_interm} \ ---json {params.json_out_interm} \ ---html {params.html_out_interm} \ +--out1 {output.read1} \ +--out2 {output.read2} \ +--json {output.json} \ +--html {output.html} \ --overrepresentation_analysis \ {params.qc} \ {params.adapter}; + """ + +# Double pass to hard trim adapter and UMIs +rule fastp: + input: + read1 = fastq_dir + "{sample}_1.umi_optimized.fastq.gz", + read2 = fastq_dir + "{sample}_2.umi_optimized.fastq.gz" + output: + read1 = fastq_dir + "{sample}_1.fp.fastq.gz", + read2 = fastq_dir + "{sample}_2.fp.fastq.gz", + json = qc_dir + "fastp/{sample}_fastp.json", + html = qc_dir + "fastp/{sample}_fastp.html" + benchmark: + benchmark_dir + "fastp_" + "{sample}_fastp.tsv" + singularity: + Path(singularity_image, config["bioinfo_tools"].get("fastp") + ".sif").as_posix() + params: + housekeeper_id = {"id": "{sample}", "tags": "quality-trimmed-fastq"}, + tmpdir = tmp_dir, + umi = " ".join(fastp_param_umi), + minimum_length = config["QC"]["min_seq_length"], + sample_name = "{sample}", + conda = config["bioinfo_tools"].get("fastp") + threads: get_threads(cluster_config, 'fastp') + message: + "Quality control and trimming of umi optimized fastq file for {params.sample_name}" + shell: + """ +source activate {params.conda}; +export TMPDIR={params.tmpdir}; + fastp \ --thread {threads} \ ---in1 {params.read1_interm} \ ---in2 {params.read2_interm} \ +--in1 {input.read1} \ +--in2 {input.read2} \ --out1 {output.read1} \ --out2 {output.read2} \ --json {output.json} \ @@ -84,8 +108,4 @@ fastp \ --disable_trim_poly_g \ --length_required {params.minimum_length} \ {params.umi}; -rm {params.read1_interm}; -rm {params.read2_interm}; -rm {params.json_out_interm}; -rm {params.html_out_interm}; """ diff --git a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule index 3d9bedef8..2511691f1 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule @@ -1,12 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC import __version__ as bv - - # Following rule will take input fastq files, align them using bwa mem, and convert the output to sam format rule fastqc: input: @@ -16,10 +10,10 @@ rule fastqc: read1 = fastqc_dir + "{sample}_1_fastqc.zip", read2 = fastqc_dir + "{sample}_2_fastqc.zip" params: - conda = get_conda_env(config["conda_env_yaml"],"fastqc"), + conda = config["bioinfo_tools"].get("fastqc"), tmpdir = tmp_dir, fastqc_dir = fastqc_dir - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("fastqc") + ".sif").as_posix() benchmark: benchmark_dir + "fastqc_" + "{sample}.fastqc.tsv" shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule b/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule index 33d0a2eb5..c9d69db50 100644 --- a/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule +++ b/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule @@ -1,11 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup -from BALSAMIC import __version__ as bv - -picarddup = get_picard_mrkdup(config) - rule mosdepth_coverage: input: bam = bam_dir + "{sample}" + ".sorted." + picarddup + ".bam", @@ -22,9 +17,9 @@ rule mosdepth_coverage: quantize='0:1:50:150:', sample_name='{sample}', output_dir=bam_dir, - conda = get_conda_env(config["conda_env_yaml"],"mosdepth") + conda = config["bioinfo_tools"].get("mosdepth") threads: get_threads(cluster_config, "mosdepth") - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("mosdepth") + ".sif").as_posix() benchmark: benchmark_dir + "panel_depth_" + "{sample}.mosdepth.tsv" shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/multiqc.rule b/BALSAMIC/snakemake_rules/quality_control/multiqc.rule index 60653d195..7d9a5aea3 100644 --- a/BALSAMIC/snakemake_rules/quality_control/multiqc.rule +++ b/BALSAMIC/snakemake_rules/quality_control/multiqc.rule @@ -1,17 +1,17 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_picard_mrkdup -from BALSAMIC import __version__ as bv - -picarddup = get_picard_mrkdup(config) - -picard_metrics_wildcard = ["alignment_summary_metrics", "base_distribution_by_cycle_metrics", "base_distribution_by_cycle.pdf", "insert_size_histogram.pdf", "insert_size_metrics", "quality_by_cycle_metrics", "quality_by_cycle.pdf", "quality_distribution_metrics", "quality_distribution.pdf"] multiqc_input = [] # Following rule will take input fastq files, align them using bwa mem, and convert the output to sam format if config["analysis"]["sequencing_type"] == 'wgs': + picard_metrics_wildcard = ["alignment_summary_metrics", "base_distribution_by_cycle_metrics", + "base_distribution_by_cycle.pdf", "insert_size_histogram.pdf", "insert_size_metrics", + "quality_by_cycle_metrics", + "quality_by_cycle.pdf", "quality_distribution_metrics", "quality_distribution.pdf"] + # fastqc metrics + multiqc_input.extend(expand(fastqc_dir + "{sample}_{read_num}_fastqc.zip", sample=config["samples"], read_num=[1, 2])) + # fastp metrics multiqc_input.extend(expand(qc_dir + "fastp/{sample}_fastp.json", sample=config["samples"])) multiqc_input.extend(expand(qc_dir + "fastp/{sample}_fastp.html", sample=config["samples"])) @@ -20,6 +20,10 @@ if config["analysis"]["sequencing_type"] == 'wgs': multiqc_input.extend(expand(qc_dir + "{sample}_picard_wgs_metrics.txt", sample=config["samples"])) multiqc_input.extend(expand(qc_dir + "{sample}.multiple_metrics.{metrics_wc}", sample=config["samples"], metrics_wc=picard_metrics_wildcard)) + # Sentieon metrics + multiqc_input.extend(expand(qc_dir + "{sample}_sentieon_wgs_metrics.txt", sample=config["samples"])) + multiqc_input.extend(expand(qc_dir + "{sample}_coverage.gz", sample=config["samples"])) + # dir list dir_list = [qc_dir] @@ -48,6 +52,11 @@ else: # dir list dir_list = [bam_dir, fastqc_dir] + if config["analysis"]["umiworkflow"]: + multiqc_input.extend(expand(umi_qc_dir + "{sample}.umi.collect_hsmetric", sample=config["samples"])) + + dir_list = [bam_dir, fastqc_dir, umi_qc_dir] + rule multiqc: input: @@ -59,8 +68,8 @@ rule multiqc: housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "multiqc"}, dir_list = "\n".join(dir_list), qc_dir = qc_dir, - conda = get_conda_env(config["conda_env_yaml"],"multiqc"), - singularity: singularity_image + conda = config["bioinfo_tools"].get("multiqc"), + singularity: Path(singularity_image, config["bioinfo_tools"].get("multiqc") + ".sif").as_posix() benchmark: benchmark_dir + "multiqc_" + config["analysis"]["case_id"] + ".multiqc.tsv" shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/picard.rule b/BALSAMIC/snakemake_rules/quality_control/picard.rule index 86cfadbf5..81d9f812c 100644 --- a/BALSAMIC/snakemake_rules/quality_control/picard.rule +++ b/BALSAMIC/snakemake_rules/quality_control/picard.rule @@ -1,47 +1,12 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup -from BALSAMIC import __version__ as bv - -picarddup = get_picard_mrkdup(config) - def picard_flag(picarddup): if picarddup == "mrkdup": return "FALSE" else: return "TRUE" -rule MarkDuplicates: - input: - bam_dir + "{sample}.sorted.bam" - output: - mrkdup = bam_dir + "{sample}.sorted." + picarddup + ".bam", - stats = bam_dir + "{sample}.sorted." + picarddup + ".txt" - params: - tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - rm_dup = picard_flag(picarddup) - singularity: singularity_image - benchmark: - benchmark_dir + "MarkDuplicates_" + "{sample}.markduplicates.tsv" - shell: - "source activate {params.conda};" - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "java -jar -Djava.io.tmpdir=${{tmpdir}} -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar " - "MarkDuplicates " - "INPUT={input} " - "OUTPUT={output.mrkdup} " - "VALIDATION_STRINGENCY=SILENT " - "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 " - "REMOVE_DUPLICATES={params.rm_dup} " - "METRICS_FILE='{output.stats}'; " - "samtools index {output.mrkdup}; " - - rule CollectHsMetrics: input: fadict = (config["reference"]["reference_genome"]).replace(".fasta",".dict"), @@ -51,24 +16,23 @@ rule CollectHsMetrics: output: mrkdup = bam_dir + "{sample}.sorted." + picarddup + ".hsmetric" params: - tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), + mem = "16g", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + conda = config["bioinfo_tools"].get("picard"), baitsetname = os.path.basename(config["panel"]["capture_kit"]) - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: benchmark_dir + "CollectHsMetrics_" + "{sample}.collect_hsmetrics.tsv" shell: "source activate {params.conda};" - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "java -jar -Djava.io.tmpdir=${{tmpdir}} -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar " + "mkdir -p {params.tmpdir}; " + "export TMPDIR={params.tmpdir}; " + "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " "BedToIntervalList " "I={input.bed} " "O={input.bam}.picard.bedintervals " "SD={input.fadict}; " - "java -jar -Djava.io.tmpdir=${{tmpdir}} -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar " + "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " "CollectHsMetrics " "BI={input.bam}.picard.bedintervals " "TI={input.bam}.picard.bedintervals " @@ -87,19 +51,18 @@ rule CollectAlignmentSummaryMetrics: output: bam_dir + "{sample}.sorted.alignmetric" params: - tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), + mem = "16g", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + conda = config["bioinfo_tools"].get("picard"), adapter = config["QC"]["adapter"] - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: benchmark_dir + "CollectAlignmentSummaryMetrics_" + "{sample}.collect_alignment_summary.tsv" shell: "source activate {params.conda};" - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "java -jar -Djava.io.tmpdir=${{tmpdir}} -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar " + "mkdir -p {params.tmpdir}; " + "export TMPDIR={params.tmpdir}; " + "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " "CollectAlignmentSummaryMetrics " "R={input.fa} " "I={input.bam} " @@ -116,18 +79,17 @@ rule CollectInsertSizeMetrics: pdf = bam_dir + "{sample}.sorted.insertsizemetric.pdf", txt = bam_dir + "{sample}.sorted.insertsizemetric" params: - tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard") - singularity: singularity_image + mem = "16g", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + conda = config["bioinfo_tools"].get("picard") + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: benchmark_dir + "CollectInsertSizeMetrics_" + "{sample}.collect_insertsize_metrics.tsv" shell: "source activate {params.conda};" - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "java -jar -Djava.io.tmpdir=${{tmpdir}} -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar " + "mkdir -p {params.tmpdir}; " + "export TMPDIR={params.tmpdir}; " + "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " "CollectInsertSizeMetrics " "I={input.bam} " "H={output.pdf} " diff --git a/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule b/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule index 1409506cb..cc8d60a16 100644 --- a/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule +++ b/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule @@ -1,10 +1,10 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_conda_env - -picard_metrics_wildcard = ["alignment_summary_metrics", "base_distribution_by_cycle_metrics", "base_distribution_by_cycle.pdf", "insert_size_histogram.pdf", "insert_size_metrics", "quality_by_cycle_metrics", "quality_by_cycle.pdf", "quality_distribution_metrics", "quality_distribution.pdf"] - +picard_metrics_wildcard = ["alignment_summary_metrics", "base_distribution_by_cycle_metrics", + "base_distribution_by_cycle.pdf", "insert_size_histogram.pdf", + "insert_size_metrics", "quality_by_cycle_metrics", + "quality_by_cycle.pdf", "quality_distribution_metrics", "quality_distribution.pdf"] rule CollectMultipleMetrics: input: @@ -13,19 +13,18 @@ rule CollectMultipleMetrics: output: expand(qc_dir + "{{sample}}.multiple_metrics.{metrics_wc}", sample=config["samples"], metrics_wc=picard_metrics_wildcard) params: - tmpdir = tmp_dir, + mem = "16g", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), output_prefix = qc_dir + "{sample}.multiple_metrics", - conda = get_conda_env(config["conda_env_yaml"],"picard"), - singularity: singularity_image + conda = config["bioinfo_tools"].get("picard"), + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: benchmark_dir + "CollectMultipleMetrics_" + "{sample}.picard_collect_multiple_metrics.tsv" shell: "source activate {params.conda};" - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "java -jar -Djava.io.tmpdir=${{tmpdir}} -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar " + "mkdir -p {params.tmpdir}; " + "export TMPDIR={params.tmpdir}; " + "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " " CollectMultipleMetrics " " I={input.bam} " " O={params.output_prefix} " @@ -40,18 +39,17 @@ rule CollectWgsMetrics: output: qc_dir + "{sample}_picard_wgs_metrics.txt" params: - tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"picard"), - singularity: singularity_image + mem = "16g", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + conda = config["bioinfo_tools"].get("picard"), + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: benchmark_dir + "CollectWgsMetrics_" + "{sample}.picard_collect_wgs_metrics.tsv" shell: "source activate {params.conda};" - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "java -jar -Djava.io.tmpdir=${{tmpdir}} -Xms8G -Xmx16G $CONDA_PREFIX/share/picard.jar " + "mkdir -p {params.tmpdir}; " + "export TMPDIR={params.tmpdir}; " + "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " " CollectWgsMetrics " " I={input.bam} " " O={output} " diff --git a/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule b/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule index 474cc00e7..0387e02ca 100644 --- a/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule +++ b/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule @@ -1,14 +1,7 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup -from BALSAMIC import __version__ as bv - -picarddup = get_picard_mrkdup(config) - -rule panel_depth: +rule sambamba_panel_depth: input: bam = bam_dir + "{sample}" + ".sorted." + picarddup + ".bam", bed = config["panel"]["capture_kit"] @@ -20,8 +13,8 @@ rule panel_depth: cov_end=1000, cov_step=50, filter_string="'not (unmapped or mate_is_unmapped) and not duplicate and not failed_quality_control and mapping_quality > 10'", - conda = get_conda_env(config["conda_env_yaml"],"sambamba") - singularity: singularity_image + conda = config["bioinfo_tools"].get("sambamba") + singularity: Path(singularity_image, config["bioinfo_tools"].get("sambamba") + ".sif").as_posix() benchmark: benchmark_dir + "panel_depth_" + "{sample}.sambamba_panel_depth.tsv" shell: @@ -34,7 +27,7 @@ rule panel_depth: "`echo $covStr` {input.bam} > {output}; " -rule exon_depth: +rule sambamba_exon_depth: input: bam = bam_dir + "{sample}" + ".sorted." + picarddup + ".bam", bed = config["reference"]["exon_bed"] @@ -48,8 +41,8 @@ rule exon_depth: cov_4="200", cov_5="250", filter_string="'not (unmapped or mate_is_unmapped) and not duplicate and not failed_quality_control and mapping_quality > 10'", - conda = get_conda_env(config["conda_env_yaml"],"sambamba") - singularity: singularity_image + conda = config["bioinfo_tools"].get("sambamba") + singularity: Path(singularity_image, config["bioinfo_tools"].get("sambamba") + ".sif").as_posix() benchmark: benchmark_dir + "exon_depth_" + "{sample}.sambamba_exon_depth.tsv" shell: diff --git a/BALSAMIC/snakemake_rules/sentieon/sentieon_qc_metrics.rule b/BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule similarity index 92% rename from BALSAMIC/snakemake_rules/sentieon/sentieon_qc_metrics.rule rename to BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule index 1acada204..c3dc99236 100644 --- a/BALSAMIC/snakemake_rules/sentieon/sentieon_qc_metrics.rule +++ b/BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule @@ -1,9 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_threads - - def repeat(param, values): param_values = [] @@ -25,7 +22,7 @@ rule sentieon_wgs_metrics: min_base_qual = '10', gene_list = config["reference"]["refGene"], cov_threshold = repeat("--cov_thresh", [50, 100, 150, 200, 250]), - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_wgs_metrics') benchmark: diff --git a/BALSAMIC/snakemake_rules/sentieon/sentieon_germline.rule b/BALSAMIC/snakemake_rules/sentieon/sentieon_germline.rule deleted file mode 100644 index 03b4bcfa4..000000000 --- a/BALSAMIC/snakemake_rules/sentieon/sentieon_germline.rule +++ /dev/null @@ -1,60 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -from BALSAMIC.utils.rule import get_threads - -rule sentieon_DNAscope: - input: - ref = config["reference"]["reference_genome"], - dbsnp = config["reference"]["dbsnp"], - bam = bam_dir + "{sample}.dedup.realign.bam", - recal_table = bam_dir + "{sample}.dedup.realign.recal_data.table" - output: - vcf = vcf_dir + "SNV.germline.{sample}.dnascope.vcf.gz", - params: - tmpdir = tmp_dir, - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", - sentieon_lic = config["SENTIEON_LICENSE"], - sentieon_ml_dnascope = SENTIEON_DNASCOPE - threads: get_threads(cluster_config, 'sentieon_DNAscope') - log: - vcf_dir + "{sample}.dnascope.log" - benchmark: - benchmark_dir + 'sentieon_DNAscope_' + "{sample}.dnascope.tsv" - shell: - """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; -export SENTIEON_LICENSE={params.sentieon_lic}; -export SENTIEON_DNASCOPE={params.sentieon_ml_dnascope}; - -{params.sentieon_exec} driver -t {threads} -r {input.ref} -i {input.bam} -q {input.recal_table} --algo DNAscope -d {input.dbsnp} {output.vcf} - """ - - -# rule sentieon_filter_DNAscope: -# input: -# ref = config["reference"]["reference_genome"], -# dnascope_vcf = vcf_dir + "sentieon_dnascope/SNV.germline.{sample}.dnascope.vcf.gz" -# output: -# dnascope_filtered_vcf = vcf_dir + "SNV.germline.{sample}.dnascope.vcf.gz" -# params: -# sentieon_exec = SENTIEON_INSTALL_DIR + "/bin/sentieon", -# sentieon_lic = SENTIEON_LICENSE, -# sentieon_ml_dnascope = SENTIEON_DNASCOPE -# threads: 16 -# log: -# vcf_dir + "{sample}.dnascope.filtered.log" -# benchmark: -# benchmark_dir + "{sample}.dnascope_filter.tsv" -# shell: -# """ -# export SENTIEON_LICENSE={params.sentieon_lic}; -# export SENTIEON_DNASCOPE={params.sentieon_ml_dnascope}; - -# {params.sentieon_exec} driver -t {threads} -r {input.ref} --algo DNAModelApply --model {params.sentieon_ml_dnascope} -v {input.dnascope_vcf} {output.dnascope_filtered_vcf} -# """ - diff --git a/BALSAMIC/snakemake_rules/umi/annotate_vep.rule b/BALSAMIC/snakemake_rules/umi/annotate_vep.rule deleted file mode 100644 index 5ed40abd3..000000000 --- a/BALSAMIC/snakemake_rules/umi/annotate_vep.rule +++ /dev/null @@ -1,57 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_threads - -# Annotate vcf files with VEP -rule vep_somatic: - input: - vcf = vcf_dir + '{sample}.{var_caller}.umi.vcf.gz', - cosmic = config["reference"]["cosmic"] - output: - vcf_all = vep_dir + '{sample}.{var_caller}.umi.all.vcf.gz', - vcf_summary = vep_dir + '{sample}.{var_caller}.umi.all.vcf.gz_summary.html', - vcf_pass = vep_dir + '{sample}.{var_caller}.umi.pass.vcf.gz' - params: - conda = get_conda_env(config["conda_env_yaml"],"ensembl-vep"), - vep_cache = config["reference"]["vep"], - default_options = '--compress_output bgzip --vcf --everything --allow_non_variant --dont_skip --buffer_size 10000 --format vcf --offline --variant_class --merged --cache --verbose --force_overwrite', - sample_id = '{sample}', - var_caller = '{var_caller}' - threads: get_threads(cluster_config, 'vep_somatic') - singularity: singularity_image - log: - log_dir + '{sample}_{var_caller}_vep.log' - benchmark: - benchmark_dir + '{sample}_{var_caller}_vep.tsv' - message: - 'Annotating {params.var_caller} VCF file with VEP for sample {params.sample_id}' - shell: - "source activate {params.conda}\n" - "vep_path=$(dirname $(readlink -e $(which vep)))\n" - "export PERL5LIB=\n" - "vep " - "--dir $vep_path " - "--dir_cache {params.vep_cache} " - "--dir_plugins $vep_path " - "--input_file {input.vcf} " - "--output_file {output.vcf_all} " - "--compress_output bgzip " - "--fork {threads} " - "--vcf " - "--everything " - "--allow_non_variant " - "--dont_skip " - "--buffer_size 10000 " - "--format vcf " - "--offline " - "--variant_class " - "--merged " - "--cache " - "--custom {input.cosmic},COSMIC,vcf,exact,0,CDS,GENE,STRAND,CNT,AA " - "--verbose " - "--force_overwrite; " - "tabix -p vcf -f {output.vcf_all}; " - "bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_all}; " - "tabix -p vcf -f {output.vcf_pass}; " diff --git a/BALSAMIC/snakemake_rules/umi/generate_AF_tables.rule b/BALSAMIC/snakemake_rules/umi/generate_AF_tables.rule new file mode 100644 index 000000000..060ea2c8c --- /dev/null +++ b/BALSAMIC/snakemake_rules/umi/generate_AF_tables.rule @@ -0,0 +1,32 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 + +# Generate tables for AF scatterplots +rule bcftools_query_generatebackgroundaf_umitable: + input: + vcf = vcf_dir + "SNV.somatic.{case_name}.{var_caller}.vcf.gz" + output: + AF = umi_qc_dir + "{case_name}.{var_caller}.AFtable.txt" + benchmark: + Path(benchmark_dir + "bcftools_query_generatebackgroundaf_umitable_{case_name}_{var_caller}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() + params: + validated_set= config["background_variants"], + conda = config["bioinfo_tools"].get("bcftools"), + sample_id = "{case_name}" + threads: + get_threads(cluster_config, "bcftools_query_generatebackgroundaf_umitable") + message: + "Creating Allelic frequency table from VCF file for sample {params.sample_id}" + shell: + """ +source activate {params.conda}; +bcftools query \ +--regions-file {params.validated_set} \ +-f \"%CHROM\\t%POS\\t%REF\\t%ALT\\t%FILTER\\t[%AF\\t%AD{{0}}\\t%AD{{1}}]\n\" \ +{input.vcf} | \ +awk -v file={params.sample_id} \ +\'{{print $1\":\"$2\"_\"$3\"->\"$4\"\\t\"$8/($7+$8)\"\\t\"file}}\' \ +> {output.AF}; + """ diff --git a/BALSAMIC/snakemake_rules/umi/generate_AF_tables.rules b/BALSAMIC/snakemake_rules/umi/generate_AF_tables.rules deleted file mode 100644 index 0260f46a1..000000000 --- a/BALSAMIC/snakemake_rules/umi/generate_AF_tables.rules +++ /dev/null @@ -1,30 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - - -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_threads - - -# Generate tables for AF scatterplots -rule calculate_AF: - input: - vcf = vcf_dir + '{sample}.{var_caller}.umi.vcf.gz' - output: - AF = table_dir + '{sample}.{var_caller}.umi.AFtable.txt' - params: - validated_set= config["background_variants"], - sample_id = '{sample}' - threads: get_threads(cluster_config, 'calculate_AF') - log: - log_dir + '{sample}.{var_caller}.AFcalculate.log' - benchmark: - benchmark_dir + '{sample}.{var_caller}.AFcalculate.tsv' - message: "Creating Allelic frequency table from VCF file for sample {params.sample_id}" - shell: - "bcftools query --regions-file {params.validated_set} " - "-f \"%CHROM\\t%POS\\t%REF\\t%ALT\\t%FILTER\\t[%AF\\t%AD{{0}}\\t%AD{{1}}]\\n\" " - "{input.vcf} | " - "awk -v file={params.sample_id} " - "\'{{print $1\":\"$2\"_\"$3\"->\"$4\"\\t\"$8/($7+$8)\"\\t\"file}}\' " - "> {output.AF}\n" diff --git a/BALSAMIC/snakemake_rules/umi/qc_umi.rule b/BALSAMIC/snakemake_rules/umi/qc_umi.rule new file mode 100644 index 000000000..330e9fdb7 --- /dev/null +++ b/BALSAMIC/snakemake_rules/umi/qc_umi.rule @@ -0,0 +1,107 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 + +## UmiAwareMarkDuplicatesWithMateCigar - umimetrics +rule picard_umiaware: + input: + bam = umi_dir + "{sample}.consensusfiltered.umi.bam" + output: + bam = umi_qc_dir + "{sample}.picard.umiaware.bam", + duplicates = umi_qc_dir + "{sample}.umi.duplicatemetrics", + umimetrics = umi_qc_dir + "{sample}.umi.metrics" + benchmark: + Path(benchmark_dir + "picard_umiaware_{sample}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() + params: + conda = config["bioinfo_tools"].get("picard"), + sample_id = "{sample}" + threads: + get_threads(cluster_config, "picard_umiaware") + message: + "Picard Umiaware mark dups for sample {params.sample_id}" + shell: + """ +source activate {params.conda}; + +picard UmiAwareMarkDuplicatesWithMateCigar \ +I={input.bam} \ +O={output.bam} \ +M={output.duplicates} \ +UMI_METRICS={output.umimetrics}; + """ + +## CollectHSmetrics - median target coverage-required +rule picard_collecthsmetrics_umi: + input: + fadict = (config["reference"]["reference_genome"]).replace(".fasta",".dict"), + bed = config["panel"]["capture_kit"], + bam = umi_dir + "{sample}.consensusfiltered.umi.bam", + fa = config["reference"]["reference_genome"] + output: + mrkdup = umi_qc_dir + "{sample}.umi.collect_hsmetric" + benchmark: + Path(benchmark_dir + "picard_collecthsmetrics_umi_{sample}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() + params: + conda = config["bioinfo_tools"].get("picard"), + baitsetname = os.path.basename(config["panel"]["capture_kit"]), + sample_id = "{sample}" + threads: + get_threads(cluster_config, "CollectHsMetrics") + message: + "Collect HSmetrics using Picardtools for {params.sample_id}" + shell: + """ +source activate {params.conda}; + +picard BedToIntervalList \ +I={input.bed} \ +O={input.bam}.picard.bedintervals \ +SD={input.fadict}; + +picard CollectHsMetrics \ +BI={input.bam}.picard.bedintervals \ +TI={input.bam}.picard.bedintervals \ +I={input.bam} \ +O={output.mrkdup}; + """ + +## SUM(Reads in each family)/ the number of families after correction, collapsing on supporting reads. +rule samtools_view_calculatemeanfamilydepth_umi: + input: + bam = umi_dir + "{sample}.consensusfiltered.umi.bam" + output: + temp_fl = temp (umi_qc_dir + "{sample}.umi.temp.fl"), + totalsum = umi_qc_dir + "{sample}.umi.mean_family_depth" + benchmark: + Path(benchmark_dir + "samtools_view_calculatemeanfamilydepth_umi_{sample}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("samtools") + ".sif").as_posix() + params: + conda = config["bioinfo_tools"].get("samtools"), + sample_id = "{sample}" + threads: + get_threads(cluster_config, "samtools_view_calculatemeanfamilydepth_umi") + message: + "Calculating mean family depth using samtools and awk for {params.sample_id}" + shell: + """ +source activate {params.conda}; + +samtools view -@ {threads} {input.bam} | \ +grep 'RX:Z:' | \ +sed 's/.*RX:Z:\\([ACGT-].*\\).*/\\1/' | \ +cut -f1 | \ +grep -v 'N' | \ +sort | uniq -c | \ +sed -e 's/ */\\t/g' | \ +cut -f2,3 > {output.temp_fl}; + +awk -F'\\t' \ +'{{sum+=$1;}} \ +END{{printf(\"{params.sample_id}_meandepth: \"sum/NR)}}' \ +{output.temp_fl} > \ +{output.totalsum} + """ diff --git a/BALSAMIC/snakemake_rules/umi/sentieon_consensuscall.rule b/BALSAMIC/snakemake_rules/umi/sentieon_consensuscall.rule index 9f4333017..7a96c178c 100644 --- a/BALSAMIC/snakemake_rules/umi/sentieon_consensuscall.rule +++ b/BALSAMIC/snakemake_rules/umi/sentieon_consensuscall.rule @@ -1,57 +1,95 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 - -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_threads - - # UMI-consensus calling -rule sentieon_umi_consensus_call: +rule sentieon_consensuscall_umi: input: - sam_consensus = umi_dir + '{sample}.umialign.sam', + sam_consensus = umi_dir + "{sample}.align.umi.bam" output: - fastq_consensus = umi_dir + '{sample}.consensuscall.fastq.gz', + fastq_consensus = temp(umi_dir + "{sample}.consensuscall.umi.fastq.gz") + benchmark: + Path(benchmark_dir + "sentieon_consensuscall_umi_{sample}.tsv").as_posix() params: - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], - tag = 'XR', - ip_format = 'SAM', + tag = paramsumi.consensuscall.tag, + ip_format = paramsumi.consensuscall.align_format, sample_id = '{sample}' threads: - get_threads(cluster_config, 'sentieon_umi_consensus_call') - log: - log_dir + '{sample}.consensuscall.log' - benchmark : - benchmark_dir + '{sample}.consensuscall.tsv' + get_threads(cluster_config, "sentieon_consensuscall_umi") message: - "Consensus molecule creation for sample {params.sample_id}" + "Consensus molecule creation using sentieon for sample {params.sample_id}" shell: - "export SENTIEON_LICENSE={params.sentieon_lic}\n" - "{params.sentieon_exec} umi consensus -t {threads} -i {input.sam_consensus} -o {output.fastq_consensus} --input_format {params.ip_format} --umi_tag {params.tag} --read_name_prefix 'UMI-' &> {log}\n" + """ +{params.sentieon_exec} umi consensus \ +-t {threads} \ +-i {input.sam_consensus} \ +-o {output.fastq_consensus} \ +--input_format {params.ip_format} \ +--umi_tag {params.tag} \ +--read_name_prefix 'UMI-'; + """ # Alignment of consensus reads -rule sentieon_umi_consensus_align: +rule sentieon_bwa_umiconsensus: input: ref_fa = config["reference"]["reference_genome"], - fq_consensus = umi_dir + '{sample}.consensuscall.fastq.gz', + fq_consensus = umi_dir + "{sample}.consensuscall.umi.fastq.gz" output: - align_consensus = umi_dir + '{sample}.consensusalign.bam', + align_consensus = umi_dir + "{sample}.consensuscalled.umi.bam" + benchmark: + Path(benchmark_dir + "sentieon_bwa_umiconsensus_{sample}.tsv").as_posix() params: - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], - sheader = "'@RG\\tID:Group\\tSM:{sample}\\tLB:TargetPanel\\tPL:ILLUMINA'", - ip_bases = '1000000', - sample_id = '{sample}' + sheader = paramsumi.common.align_header, + ip_bases = paramsumi.common.align_intbases, + sample_id = "{sample}" threads: - get_threads(cluster_config, 'sentieon_umi_consensus_align') - log: - log_dir + '{sample}.consensusalign.log' - benchmark : - benchmark_dir + '{sample}.consensusalign.tsv' + get_threads(cluster_config, "sentieon_bwa_umiconsensus") message: - "Mapping of consensus reads with the bwa mem, sorting for sample {params.sample_id}" + "Mapping of consensus reads with the sentieon bwa mem, sorting for sample {params.sample_id}" shell: - "export SENTIEON_LICENSE={params.sentieon_lic}\n" - "{params.sentieon_exec} bwa mem -R {params.sheader} -t {threads} -K {params.ip_bases} -p -C {input.ref_fa} {input.fq_consensus} | {params.sentieon_exec} util sort -r {input.ref_fa} --sam2bam -o {output.align_consensus} -i - 2> {log}\n" + """ +{params.sentieon_exec} bwa mem \ +-R {params.sheader} \ +-t {threads} \ +-K {params.ip_bases} \ +-p -C {input.ref_fa} \ +{input.fq_consensus} | \ +{params.sentieon_exec} util sort \ +-r {input.ref_fa} \ +--sam2bam \ +-o {output.align_consensus} \ +-i - ; + """ + +# Filter consensus called reads based on 'XZ' filtering +rule sentieon_consensusfilter_umi: + input: + umi_dir + "{case_name}.consensuscalled.umi.bam" + output: + umi_dir + "{case_name}.consensusfiltered.umi.bam" + benchmark: + Path(benchmark_dir + "sentieon_consensusfilter_umi_{case_name}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("samtools") + ".sif").as_posix() + params: + consensusfilter_script = get_script_path("FilterDuplexUMIconsensus.awk"), + minreads = paramsumi.consensuscall.filter_minreads, + sample_id = '{case_name}', + conda = config["bioinfo_tools"].get("samtools") + threads: + get_threads(cluster_config, "sentieon_consensusfilter_umi") + message: + "Filtering consensus reads based on XZ tag for sample {params.sample_id}" + shell: + """ +source activate {params.conda}; + +samtools view -h {input} | \ +awk -v MinR={params.minreads} -v OFS=\'\\t\' -f {params.consensusfilter_script} | \ +samtools view -bh - > {output}; +samtools index {output}; + """ diff --git a/BALSAMIC/snakemake_rules/umi/sentieon_umiextract.rule b/BALSAMIC/snakemake_rules/umi/sentieon_umiextract.rule index 38a5778af..86ef33bf9 100644 --- a/BALSAMIC/snakemake_rules/umi/sentieon_umiextract.rule +++ b/BALSAMIC/snakemake_rules/umi/sentieon_umiextract.rule @@ -1,57 +1,59 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 - -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_threads - # Extract umi tags using the defined read structure. -rule sentieon_umi_extract: +rule sentieon_umiextract: input: - read1 = fastq_dir + "{sample}_1.fastq.gz", - read2 = fastq_dir + "{sample}_2.fastq.gz" + read1 = Path(fastq_dir + "{sample}_1.umi_optimized.fastq.gz").as_posix(), + read2 = Path(fastq_dir + "{sample}_2.umi_optimized.fastq.gz").as_posix() output: - ds_umi = umi_dir + '{sample}.umiextract.fastq.gz' + ds_umi = temp(umi_dir + "{sample}.umiextract.fastq.gz") + benchmark: + Path(benchmark_dir + "sentieon_umiextract_{sample}.tsv").as_posix() params: - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], - ds_params= ['-d', '3M2S+T,3M2S+T'], - sample_id = '{sample}' - log: - log_dir + '{sample}.umiextract.log' - benchmark: - benchmark_dir + '{sample}.umiextract.tsv' + ds_params= paramsumi.umiextract.read_structure, + sample_id = "{sample}" + threads: + get_threads(cluster_config, "sentieon_umiextract") message: - "UMI tag extraction using Sentieon for sample {params.sample_id}" - threads: - get_threads(cluster_config, 'sentieon_umi_extract') + "UMI tag extraction using sentieon for sample {params.sample_id}" shell: - "export SENTIEON_LICENSE={params.sentieon_lic}\n" - "{params.sentieon_exec} umi extract {params.ds_params} {input.read1} {input.read2} -o {output.ds_umi} &> {log} \n" - - + """ +{params.sentieon_exec} umi extract \ +{params.ds_params} {input.read1} {input.read2} \ +-o {output.ds_umi}; + """ # Align the UMI-extracted reads -rule sentieon_umi_align: +rule sentieon_bwa_umiextract: input: - ref_fa = config['reference']['reference_genome'], - fastq_umi = umi_dir + '{sample}.umiextract.fastq.gz' + ref_fa = config["reference"]["reference_genome"], + fastq_umi = umi_dir + "{sample}.umiextract.fastq.gz" output: - align_umi = temp(umi_dir + '{sample}.umialign.sam') + align_umi = temp(umi_dir + "{sample}.align.umi.bam") + benchmark: + Path(benchmark_dir + "sentieon_bwa_umiextract_{sample}.tsv").as_posix() params: - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], - sheader = "'@RG\\tID:Group\\tSM:{sample}\\tLB:TargetPanel\\tPL:ILLUMINA'", - ip_bases = '1000000', - sample_id = '{sample}' + sample_id = '{sample}', + sheader = paramsumi.common.align_header, + ip_bases = paramsumi.common.align_intbases threads: - get_threads(cluster_config, 'sentieon_umi_align') - log: - log_dir + '{sample}.umialign.log' - benchmark: - benchmark_dir + '{sample}.umialign.benchmark' + get_threads(cluster_config, "sentieon_bwa_umiextract") message: - "Aligning of UMI extracted reads with bwa mem, sorting for sample {params.sample_id}" + "Aligning of UMI extracted reads with sentieon bwa mem, sorting for sample {params.sample_id}" shell: - "export SENTIEON_LICENSE={params.sentieon_lic}\n" - "{params.sentieon_exec} bwa mem -R {params.sheader} -K {params.ip_bases} -p -t {threads} -C {input.ref_fa} {input.fastq_umi} > {output.align_umi} 2> {log}\n" + """ +{params.sentieon_exec} bwa mem \ +-R {params.sheader} \ +-K {params.ip_bases} \ +-p -t {threads} \ +-C {input.ref_fa} {input.fastq_umi} | \ +{params.sentieon_exec} util sort \ +-r {input.ref_fa} \ +--sam2bam \ +-o {output.align_umi} -i - ; + """ diff --git a/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule index b72baf089..477c3b197 100644 --- a/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule +++ b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule @@ -1,34 +1,50 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_threads - # Variant-calling using TNscope -rule sentieon_umi_tnscope: + +rule sentieon_tnscope_umi: input: - bam = umi_dir + '{sample}.consensusalign.bam', - ref_fa = config['reference']['reference_genome'], - bed = config['panel']['capture_kit'], - dbsnp = config['reference']['dbsnp'] + bam = expand(umi_dir + "{mysample}.consensusfiltered.umi.bam", mysample=config["samples"]), + ref_fa = config["reference"]["reference_genome"], + bed = config["panel"]["capture_kit"], + dbsnp = config["reference"]["dbsnp"] output: - vcf = vcf_dir + '{sample}.TNscope.umi.vcf.gz', + vcf = vcf_dir + "SNV.somatic."+ config["analysis"]["case_id"] + ".TNscope_umi.vcf.gz", + namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".TNscope_umi.sample_name_map" + benchmark: + Path(benchmark_dir + "sentieon_tnscope_umi_"+ config["analysis"]["case_id"] +".tsv").as_posix() params: - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], - algo = 'TNscope', - tAF = '0.0005', - TL = '0.5', - detect = 'sv', - error_rate = '5', - prune_factor = '3', - sample_id = '{sample}' - threads: get_threads(cluster_config, 'sentieon_umi_tnscope') - log: - log_dir + '{sample}.varcall.TNscope.log' - benchmark : - benchmark_dir + '{sample}.varcall.TNscope.tsv' - message: "Calling SNVs using TNscope for sample {params.sample_id}" + tumor_af = paramsumi.common.filter_tumor_af, + algo = paramsumi.tnscope.algo, + disable_detect = paramsumi.tnscope.disable_detect, + tumor_lod = paramsumi.tnscope.min_tumorLOD, + error_rate = paramsumi.tnscope.error_rate, + prune_factor = paramsumi.tnscope.prunefactor, + tumor = get_sample_type(config["samples"], "tumor"), + threads: + get_threads(cluster_config, "sentieon_tnscope_umi") + message: + "Calling SNVs using TNscope for sample {params.tumor}" shell: - "export SENTIEON_LICENSE={params.sentieon_lic}\n" - "{params.sentieon_exec} driver -t {threads} -r {input.ref_fa} -i {input.bam} --algo {params.algo} --tumor_sample {params.sample_id} --dbsnp {input.dbsnp} --min_tumor_allele_frac {params.tAF} --filter_t_alt_frac {params.tAF} --min_init_tumor_lod {params.TL} --disable_detector {params.detect} --max_error_per_read {params.error_rate} --pcr_indel_model NONE --prune_factor {params.prune_factor} {output.vcf} &> {log}\n" + """ +{params.sentieon_exec} driver \ +-t {threads} \ +-r {input.ref_fa} \ +-i {input.bam} \ +--algo {params.algo} \ +--tumor_sample {params.tumor} \ +--dbsnp {input.dbsnp} \ +--min_tumor_allele_frac {params.tumor_af} \ +--filter_t_alt_frac {params.tumor_af} \ +--min_init_tumor_lod {params.tumor_lod} \ +--disable_detector {params.disable_detect} \ +--max_error_per_read {params.error_rate} \ +--pcr_indel_model NONE \ +--prune_factor {params.prune_factor} \ +{output.vcf}; + +echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; + """ diff --git a/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule new file mode 100644 index 000000000..d1a408aa5 --- /dev/null +++ b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule @@ -0,0 +1,56 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 + +# Variant-calling using TNscope +rule sentieon_tnscope_umi_tn: + input: + bamT = expand(umi_dir + "{mysample}.consensusfiltered.umi.bam", + mysample=get_sample_type(config["samples"], "tumor")), + bamN = expand(umi_dir + "{mysample}.consensusfiltered.umi.bam", + mysample=get_sample_type(config["samples"], "normal")), + ref_fa = config["reference"]["reference_genome"], + bed = config["panel"]["capture_kit"], + dbsnp = config["reference"]["dbsnp"] + output: + vcf = vcf_dir + "SNV.somatic."+ config["analysis"]["case_id"] + ".TNscope_umi.vcf.gz", + namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".TNscope_umi.sample_name_map" + benchmark: + Path(benchmark_dir + "sentieon_tnscope_umi_"+ config["analysis"]["case_id"] + ".tsv").as_posix() + params: + sentieon_exec = config["SENTIEON_EXEC"], + sentieon_lic = config["SENTIEON_LICENSE"], + tumor_af = paramsumi.common.filter_tumor_af, + algo = paramsumi.tnscope.algo, + disable_detect = paramsumi.tnscope.disable_detect, + tumor_lod = paramsumi.tnscope.min_tumorLOD, + error_rate = paramsumi.tnscope.error_rate, + prune_factor = paramsumi.tnscope.prunefactor, + tumor = get_sample_type(config["samples"], "tumor"), + normal = get_sample_type(config["samples"], "normal") + threads: + get_threads(cluster_config, "sentieon_tnscope_umi") + message: + "Calling SNVs using TNscope for tumor sample: {params.tumor}" + " versus normal sample: {params.normal}" + shell: + """ +{params.sentieon_exec} driver \ +-t {threads} \ +-r {input.ref_fa} \ +-i {input.bamT} \ +-i {input.bamN} \ +--algo {params.algo} \ +--tumor_sample {params.tumor} \ +--normal_sample {params.normal} \ +--dbsnp {input.dbsnp} \ +--min_tumor_allele_frac {params.tumor_af} \ +--filter_t_alt_frac {params.tumor_af} \ +--min_init_tumor_lod {params.tumor_lod} \ +--disable_detector {params.disable_detect} \ +--max_error_per_read {params.error_rate} \ +--pcr_indel_model NONE \ +--prune_factor {params.prune_factor} \ +{output.vcf}; + +echo -e \"{params.tumor}\\tTUMOR\n{params.normal}\\tNORMAL\" > {output.namemap}; + """ diff --git a/BALSAMIC/snakemake_rules/umi/varcall_vardict.rule b/BALSAMIC/snakemake_rules/umi/varcall_vardict.rule deleted file mode 100644 index e97e49ea9..000000000 --- a/BALSAMIC/snakemake_rules/umi/varcall_vardict.rule +++ /dev/null @@ -1,33 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_threads - -# Variant-calling using vardict -rule vardict: - input: - bam = umi_dir + '{sample}.consensusalign.bam', - ref_fa = config['reference']['reference_genome'], - bed = config['panel']['capture_kit'] - output: - vardict = vcf_dir + '{sample}.vardict.umi.vcf.gz', - params: - conda = get_conda_env(config["conda_env_yaml"],'vardict'), - af = "0.0005", - sample_id = '{sample}', - vardict = "-c 1 -S 2 -E 3 -g 4 -r 1 -F 0", - var2vcf = '-E' - singularity: singularity_image - threads: get_threads(cluster_config, 'vardict') - log: - log_dir + '{sample}.varcall.vardict.log' - benchmark: - benchmark_dir + '{sample}.varcall.vardict.tsv' - message: - 'Variant calling using Vardict for sample {params.sample_id}' - shell: - "source activate {params.conda}\n" - "vardict -G {input.ref_fa} -f {params.af} -N {params.sample_id} -b {input.bam} {params.vardict} {input.bed} | teststrandbias.R | var2vcf_valid.pl {params.var2vcf} -f {params.af} -N {params.sample_id} | bgzip > {output.vardict}\n" - "tabix -p vcf {output.vardict}\n" - "source deactivate" diff --git a/BALSAMIC/snakemake_rules/variant_calling/cnvkit_paired.rule b/BALSAMIC/snakemake_rules/variant_calling/cnvkit_paired.rule index 72c6cc218..acd78993e 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/cnvkit_paired.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/cnvkit_paired.rule @@ -1,15 +1,8 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_conda_env, get_chrom -from BALSAMIC import __version__ as bv - - fasta = config["reference"]["reference_genome"] refflat = config["reference"]["refflat"] -case_id = config["analysis"]["case_id"] if config["analysis"]["sequencing_type"] == 'wgs': normal_bam = "{normal}.dedup.realign".format(normal = get_sample_type(config["samples"], "normal")[0]) @@ -24,59 +17,80 @@ else: cnvkit_params += f" --targets {cnv_dir}/targets.bed " cnvkit_params += " --target-avg-size 50 " -rule cnvkit_paired: +rule cnvkit_batch_paired: input: - fasta = fasta, - refflat = refflat, - bamN = bam_dir + normal_bam + ".bam", - bamT = bam_dir + tumor_bam + ".bam" + fasta = config["reference"]["reference_genome"], + bamN = bam_dir + normal_bam + ".bam", + bamT = bam_dir + tumor_bam + ".bam", + refflat = config["reference"]["refflat"], output: - vcf = temp(vcf_dir + "CNV.somatic." + case_id + ".cnvkit.vcf.gz"), - namemap = temp(vcf_dir + "CNV.somatic." + case_id + ".cnvkit.sample_name_map"), - cns = cnv_dir + tumor_bam + ".cns", cnr = cnv_dir + tumor_bam + ".cnr", - scatter = cnv_dir + tumor_bam + "-scatter.pdf", + cns = cnv_dir + tumor_bam + ".cns", diagram = cnv_dir + tumor_bam + "-diagram.pdf", - gene_breaks = cnv_dir + case_id + ".gene_breaks", - gene_metrics = cnv_dir + case_id + ".gene_metrics", + gene_breaks = cnv_dir + config["analysis"]["case_id"] + ".gene_breaks", + gene_metrics = cnv_dir + config["analysis"]["case_id"] + ".gene_metrics", + namemap = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".cnvkit.sample_name_map"), + scatter = cnv_dir + tumor_bam + "-scatter.pdf", + vcf = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".cnvkit.vcf.gz"), + singularity: + Path(singularity_image, config["bioinfo_tools"].get("cnvkit") + ".sif").as_posix() + benchmark: + benchmark_dir + "cnvkit_batch_paired_" + config["analysis"]["case_id"] + ".tsv" params: - housekeeper_id = {"id": case_id, "tags": "cnv"}, - tmpdir = tmp_dir, + cnv_dir = cnv_dir, + conda = config["bioinfo_tools"].get("cnvkit"), extra = cnvkit_params, - target = config["panel"]["capture_kit"] if "panel" in config else "None", - name = case_id, + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "cnv"}, + name = config["analysis"]["case_id"], normal_name = normal_bam, + target = config["panel"]["capture_kit"] if "panel" in config else "None", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor_name = tumor_bam, - cnv_dir = cnv_dir, - conda = get_conda_env(config["conda_env_yaml"], "cnvkit"), - singularity: singularity_image - benchmark: - benchmark_dir + "cnvkit_paired_" + case_id + ".cnvkit_paired.tsv" shell: - "source activate {params.conda}; " - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "if [ {params.target} != None ]; then " - "cnvkit.py target {params.target} --annotate {input.refflat} --split -o {params.cnv_dir}/targets.bed; " - "fi; " - "cnvkit.py batch {input.bamT} " - "--normal {input.bamN} " - "{params.extra} " - "--output-reference {params.cnv_dir}/Reference.cnn " - "--scatter --diagram " - "--output-dir {params.cnv_dir}; " - "cnvkit.py genemetrics {output.cnr} " - "-s {output.cns} " - "--drop-low-coverage -y " - "--output {output.gene_metrics}; " - "cnvkit.py breaks {output.cnr} {output.cns} " - "| cut -f1 | sort -u > {output.gene_breaks}; " - "cnvkit.py export vcf {output.cns} --cnr {output.cnr} " - "-o {params.cnv_dir}/{params.tumor_name}.vcf --sample-id TUMOR; " - "bgzip -f {params.cnv_dir}/{params.tumor_name}.vcf; " - "tabix -p vcf -f {params.cnv_dir}/{params.tumor_name}.vcf.gz; " - "bcftools sort -o {output.vcf} --temp-dir ${{tmpdir}} -O z {params.cnv_dir}/{params.tumor_name}.vcf.gz; " - "tabix -p vcf -f {output.vcf}; " - "echo -e \"TUMOR\\tTUMOR\" > {output.namemap}; " + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; + +if [ {params.target} != None ]; then +cnvkit.py target {params.target} \ +--annotate {input.refflat} \ +--split \ +-o {params.cnv_dir}/targets.bed; +fi; + +cnvkit.py batch {input.bamT} \ +--normal {input.bamN} \ +{params.extra} \ +--output-reference {params.cnv_dir}/Reference.cnn \ +--scatter --diagram \ +--output-dir {params.cnv_dir}; + +cnvkit.py genemetrics {output.cnr} \ +-s {output.cns} \ +--drop-low-coverage -y \ +--output {output.gene_metrics}; + +cnvkit.py breaks {output.cnr} {output.cns} \ +| cut -f1 | sort -u > {output.gene_breaks}; + +cnvkit.py export vcf {output.cns} \ +--cnr {output.cnr} \ +-o {params.cnv_dir}/{params.tumor_name}.vcf \ +--sample-id TUMOR; + +bgzip -f {params.cnv_dir}/{params.tumor_name}.vcf; + +tabix -p vcf -f {params.cnv_dir}/{params.tumor_name}.vcf.gz; + +bcftools sort \ +-o {output.vcf} \ +--temp-dir {params.tmpdir} \ +-O z {params.cnv_dir}/{params.tumor_name}.vcf.gz; + +tabix -p vcf -f {output.vcf}; + +echo -e \"TUMOR\\tTUMOR\" > {output.namemap}; + +rm -rf {params.tmpdir}; + """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/cnvkit_single.rule b/BALSAMIC/snakemake_rules/variant_calling/cnvkit_single.rule index 441e26451..5d9ed09ee 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/cnvkit_single.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/cnvkit_single.rule @@ -1,14 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_conda_env, get_chrom -from BALSAMIC import __version__ as bv - -fasta = config["reference"]["reference_genome"] -refflat = config["reference"]["refflat"] -wgs_calling_interval = config["reference"]["wgs_calling_interval"] -case_id = config["analysis"]["case_id"] - if config["analysis"]["sequencing_type"] == 'wgs': tumor_bam = "{tumor}.dedup.realign".format(tumor = get_sample_type(config["samples"], "tumor")[0]) cnvkit_params = " --method wgs " @@ -18,59 +10,85 @@ else: rule cnvkit_single: input: - fasta = fasta, - refflat = refflat, - wgs_calling_interval = wgs_calling_interval, + fasta = config["reference"]["reference_genome"], + refflat = config["reference"]["refflat"], + wgs_calling_interval = config["reference"]["wgs_calling_interval"], bamT = bam_dir + tumor_bam + ".bam", output: - vcf = temp(vcf_dir + "CNV.somatic." + case_id + ".cnvkit.vcf.gz"), - namemap = temp(vcf_dir + "CNV.somatic." + case_id + ".cnvkit.sample_name_map"), + vcf = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".cnvkit.vcf.gz"), + namemap = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".cnvkit.sample_name_map"), cns = cnv_dir + tumor_bam + ".cns", cnr = cnv_dir + tumor_bam + ".cnr", scatter = cnv_dir + tumor_bam + "-scatter.pdf", diagram = cnv_dir + tumor_bam + "-diagram.pdf", - gene_breaks = cnv_dir + case_id + ".gene_breaks", - gene_metrics = cnv_dir + case_id + ".gene_metrics", + gene_breaks = cnv_dir + config["analysis"]["case_id"] + ".gene_breaks", + gene_metrics = cnv_dir + config["analysis"]["case_id"] + ".gene_metrics", params: - housekeeper_id = {"id": case_id, "tags": "cnv"}, - tmpdir = tmp_dir, + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "cnv"}, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), extra = cnvkit_params, refcnn = cnv_dir + "FlatReference.cnn", target = config["panel"]["capture_kit"] if "panel" in config else "None", - name = case_id, + name = config["analysis"]["case_id"], tumor_name = tumor_bam, cnv_dir = cnv_dir, - conda = get_conda_env(config["conda_env_yaml"], "cnvkit"), - singularity: singularity_image + conda = config["bioinfo_tools"].get("cnvkit"), + singularity: Path(singularity_image, config["bioinfo_tools"].get("cnvkit") + ".sif").as_posix() benchmark: - benchmark_dir + 'cnvkit_single_' + case_id + ".cnvkit_single.tsv" + benchmark_dir + 'cnvkit_single_' + config["analysis"]["case_id"] + ".cnvkit_single.tsv" shell: - "source activate {params.conda}; " - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "if [ {params.target} != None ]; then " - "cnvkit.py target {params.target} --annotate {input.refflat} --split -o {params.cnv_dir}/targets.bed; " - "cnvkit.py reference -o {params.refcnn} -f {input.fasta} -t {params.cnv_dir}/targets.bed; " - "else " - "cnvkit.py reference -o {params.refcnn} -f {input.fasta} -t {input.wgs_calling_interval}; " - "fi; " - "cnvkit.py batch {input.bamT} " - "{params.extra} " - " --reference {params.refcnn} " - " --scatter --diagram " - " --output-dir {params.cnv_dir}; " - "cnvkit.py genemetrics {output.cnr} " - "-s {output.cns} " - "--drop-low-coverage -y " - "--output {output.gene_metrics}; " - "cnvkit.py breaks {output.cnr} {output.cns} " - "| cut -f1 | sort -u > {output.gene_breaks}; " - "cnvkit.py export vcf {output.cns} --cnr {output.cnr} " - "-o {params.cnv_dir}/{params.tumor_name}.vcf --sample-id TUMOR; " - "bgzip -f {params.cnv_dir}/{params.tumor_name}.vcf; " - "tabix -p vcf -f {params.cnv_dir}/{params.tumor_name}.vcf.gz; " - "bcftools sort -o {output.vcf} --temp-dir ${{tmpdir}} -O z {params.cnv_dir}/{params.tumor_name}.vcf.gz; " - "tabix -p vcf -f {output.vcf}; " - "echo -e \"TUMOR\\tTUMOR\" > {output.namemap}; " + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; + +if [ {params.target} != None ]; then +cnvkit.py target {params.target} \ +--annotate {input.refflat} \ +--split \ +-o {params.cnv_dir}/targets.bed; +cnvkit.py reference \ +-o {params.refcnn} \ +-f {input.fasta} \ +-t {params.cnv_dir}/targets.bed; +else +cnvkit.py reference \ +-o {params.refcnn} \ +-f {input.fasta} \ +-t {input.wgs_calling_interval}; +fi; + +cnvkit.py batch {input.bamT} \ +{params.extra} \ +--reference {params.refcnn} \ +--scatter --diagram \ +--output-dir {params.cnv_dir}; + +cnvkit.py genemetrics {output.cnr} \ +-s {output.cns} \ +--drop-low-coverage -y \ +--output {output.gene_metrics}; + +cnvkit.py breaks {output.cnr} {output.cns} \ +| cut -f1 | sort -u > {output.gene_breaks}; + +cnvkit.py export vcf {output.cns} \ +--cnr {output.cnr} \ +-o {params.cnv_dir}/{params.tumor_name}.vcf \ +--sample-id TUMOR; + +bgzip -f {params.cnv_dir}/{params.tumor_name}.vcf; + +tabix -p vcf -f {params.cnv_dir}/{params.tumor_name}.vcf.gz; + +bcftools sort \ +-o {output.vcf} \ +--temp-dir {params.tmpdir} \ +-O z {params.cnv_dir}/{params.tumor_name}.vcf.gz; + +tabix -p vcf -f {output.vcf}; + +echo -e \"TUMOR\\tTUMOR\" > {output.namemap}; + +rm -rf {params.tmpdir}; + """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/germline.rule b/BALSAMIC/snakemake_rules/variant_calling/germline.rule index 98e840ef8..df9779296 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/germline.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/germline.rule @@ -1,19 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -__author__ = "Hassan Foroughi Asl" - -import os -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_picard_mrkdup -from BALSAMIC.utils.rule import get_threads -from BALSAMIC import __version__ as bv - - -chromlist = config["panel"]["chrom"] -picarddup = get_picard_mrkdup(config) -capture_kit = os.path.split(config["panel"]["capture_kit"])[1] - rule haplotypecaller: input: fa = config["reference"]["reference_genome"], @@ -22,25 +9,25 @@ rule haplotypecaller: output: vcf_dir + "haplotypecaller/split_vcf/{sample}.{bedchrom}_haplotypecaller.vcf.gz" params: - tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"gatk"), + conda = config["bioinfo_tools"].get("gatk"), + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), threads: get_threads(cluster_config, 'haplotypecaller') - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("gatk") + ".sif").as_posix() benchmark: benchmark_dir + 'haplotypecaller_' + "{sample}.{bedchrom}.haplotypecaller.tsv" shell: - "source activate {params.conda}; " - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "java -jar -Djava.io.tmpdir=${{tmpdir}} -Xmx32G $CONDA_PREFIX/opt/gatk-3.8/GenomeAnalysisTK.jar " - "-T HaplotypeCaller " - "-R {input.fa} " - "-I {input.bam} " - "-L {input.bed} " - "| bgzip > {output}; " - + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +java -jar -Djava.io.tmpdir={params.tmpdir} -Xms8G -Xmx32G $CONDA_PREFIX/opt/gatk-3.8/GenomeAnalysisTK.jar \ +-T HaplotypeCaller \ +-R {input.fa} \ +-I {input.bam} \ +-L {input.bed} \ +| bgzip > {output}; +rm -rf {params.tmpdir}; + """ rule haplotypecaller_merge: @@ -49,50 +36,20 @@ rule haplotypecaller_merge: output: vcf_dir + "SNV.germline.{sample}.haplotypecaller.vcf.gz" params: - tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"], "gatk"), - singularity: singularity_image + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + conda = config["bioinfo_tools"].get("gatk"), + singularity: Path(singularity_image, config["bioinfo_tools"].get("gatk") + ".sif").as_posix() benchmark: - benchmark_dir + 'haplotypecaller_merge_' + "SNV.germline.{sample}.haplotypecaller.vcf.gz" + benchmark_dir + 'haplotypecaller_merge_' + "SNV.germline.{sample}.haplotypecaller.tsv" shell: - "source activate {params.conda}; " - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "bcftools concat {input} | bcftools sort --temp-dir ${{tmpdir}} - | bgzip > {output}; " - "tabix -f -p vcf {output}; " - - - -rule strelka_germline: - input: - fa = config["reference"]["reference_genome"], - bam = bam_dir + "{sample}.sorted." + picarddup + ".bam", - mantaindel = vcf_dir + "manta_germline_{sample}/results/variants/candidateSmallIndels.vcf.gz" - output: - final = vcf_dir + "SNV.germline.{sample}.strelka_germline.vcf.gz" - params: - tmpdir = vcf_dir + "strelka_germline_{sample}/", - runmode = "local", - conda = get_conda_env(config["conda_env_yaml"],"strelka"), - threads: get_threads(cluster_config, "strelka_germline") - singularity: singularity_image - benchmark: - benchmark_dir + 'strelka_germline_' + "{sample}.strelka_germline.tsv" - shell: - "source activate {params.conda};" - "rm -rf {params.tmpdir}; " - "configureStrelkaGermlineWorkflow.py " - "--bam={input.bam} " - "--referenceFasta={input.fa} " - "--indelCandidates {input.mantaindel} " - "--exome " - "--runDir={params.tmpdir}; " - "python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; " - "cp {params.tmpdir}/results/variants/variants.vcf.gz {output.final}; " - "tabix -p vcf -f {output.final}; " - + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +bcftools concat {input} | bcftools sort --temp-dir {params.tmpdir} - | bgzip > {output}; +tabix -f -p vcf {output}; +rm -rf {params.tmpdir}; + """ rule manta_germline: @@ -101,26 +58,26 @@ rule manta_germline: bam = bam_dir + "{sample}.sorted." + picarddup + ".bam", output: final = vcf_dir + "SV.germline.{sample}.manta_germline.vcf.gz", - candidateindel = vcf_dir + "manta_germline_{sample}/results/variants/candidateSmallIndels.vcf.gz" params: - tmpdir = vcf_dir + "manta_germline_{sample}/", + conda = config["bioinfo_tools"].get("manta"), + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), runmode = "local", - conda = get_conda_env(config["conda_env_yaml"],"manta"), threads: get_threads(cluster_config, "manta_germline") - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix() benchmark: benchmark_dir + 'manta_germline_' + "{sample}.manta_germline.tsv" shell: - "source activate {params.conda}; " - "rm -rf {params.tmpdir}; " - "configManta.py " - "--bam={input.bam} " - "--referenceFasta={input.fa} " - "--runDir={params.tmpdir}; " - "python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; " - "cp {params.tmpdir}/results/variants/diploidSV.vcf.gz {output.final}; " - "tabix -p vcf -f {output.final}; " - + """ +source activate {params.conda}; +configManta.py \ +--bam={input.bam} \ +--referenceFasta={input.fa} \ +--runDir={params.tmpdir}; +python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; +cp {params.tmpdir}/results/variants/diploidSV.vcf.gz {output.final}; +tabix -p vcf -f {output.final}; +rm -rf {params.tmpdir}; + """ rule sentieon_DNAscope: @@ -132,8 +89,8 @@ rule sentieon_DNAscope: output: vcf = vcf_dir + "SNV.germline.{sample}.dnascope.vcf.gz", params: - tmpdir = tmp_dir, - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"] threads: get_threads(cluster_config, 'sentieon_DNAscope') log: @@ -142,12 +99,11 @@ rule sentieon_DNAscope: benchmark_dir + "{sample}.dnascope.tsv" shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver -t {threads} -r {input.ref} -i {input.bam} --interval {input.interval} --algo DNAscope -d {input.dbsnp} {output.vcf} +rm -rf {params.tmpdir}; """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/mergetype.rule b/BALSAMIC/snakemake_rules/variant_calling/mergetype.rule index a73d52d60..8d50f4430 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/mergetype.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/mergetype.rule @@ -1,20 +1,8 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -__author__ = "Hassan Foroughi Asl" - -from BALSAMIC.utils.rule import get_sample_type -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup -from BALSAMIC import __version__ as bv - -picarddup = get_picard_mrkdup(config) - picard_extra_normal=" ".join(["RGPU=ILLUMINAi", "RGID=NORMAL","RGSM=NORMAL", "RGPL=ILLUMINAi", "RGLB=ILLUMINAi"]) picard_extra_tumor=" ".join(["RGPU=ILLUMINAi", "RGID=TUMOR", "RGSM=TUMOR", "RGPL=ILLUMINAi", "RGLB=ILLUMINAi"]) -normal_sample = get_sample_type(config["samples"], "normal")[0] -tumor_sample = get_sample_type(config["samples"], "tumor")[0] -case_id = config["analysis"]["case_id"] - rule mergeBam_normal_gatk: input: @@ -23,9 +11,9 @@ rule mergeBam_normal_gatk: output: bamN = bam_dir + "normal.sorted." + picarddup + ".bsrcl.merged.bam", params: - conda = get_conda_env(config["conda_env_yaml"],"picard"), + conda = config["bioinfo_tools"].get("picard"), picard = picard_extra_normal - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: benchmark_dir + 'mergeBam_normal_gatk_' + "{mysample}.mergebam_normal_gatk.tsv".format(mysample = normal_sample) shell: @@ -44,10 +32,10 @@ rule mergeBam_normal: cram = bam_dir + "normal.merged.cram", params: housekeeper_id = {"id": normal_sample, "tags": "normal"}, - conda = get_conda_env(config["conda_env_yaml"],"picard"), + conda = config["bioinfo_tools"].get("picard"), picard = picard_extra_normal threads: get_threads(cluster_config, "mergeBam_normal") - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: benchmark_dir + 'mergeBam_normal_' + "{mysample}.mergebam_normal.tsv".format(mysample = normal_sample) shell: @@ -68,10 +56,10 @@ rule mergeBam_tumor: cram = bam_dir + "tumor.merged.cram", params: housekeeper_id = {"id": tumor_sample, "tags": "tumor"}, - conda = get_conda_env(config["conda_env_yaml"],"picard"), + conda = config["bioinfo_tools"].get("picard"), picard = picard_extra_tumor threads: get_threads(cluster_config, "mergeBam_tumor") - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: benchmark_dir + 'mergeBam_tumor_' + "{mysample}.mergebam_tumor.tsv".format(mysample = tumor_sample) shell: @@ -85,14 +73,14 @@ rule mergeBam_tumor: rule mergeBam_tumor_gatk: input: bamT = bam_dir + "{mysample}.sorted.{picardstr}.bsrcl.bam".format(mysample = tumor_sample, - picardstr = picarddup) + picardstr = picarddup) output: bamT = bam_dir + "tumor.sorted." + picarddup + ".bsrcl.merged.bam", params: - conda = get_conda_env(config["conda_env_yaml"],"picard"), + conda = config["bioinfo_tools"].get("picard"), picard = picard_extra_tumor threads: get_threads(cluster_config, "samtools_sort_index") - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: benchmark_dir + 'mergeBam_tumor_gatk_' + "{mysample}.mergebam_tumor_gatk.tsv".format(mysample = tumor_sample) shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/mergetype_tumor.rule b/BALSAMIC/snakemake_rules/variant_calling/mergetype_tumor.rule index d8c6d3fb8..1cafe0c41 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/mergetype_tumor.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/mergetype_tumor.rule @@ -1,14 +1,8 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_sample_type -from BALSAMIC.utils.rule import get_conda_env, get_picard_mrkdup -from BALSAMIC import __version__ as bv - -picarddup = get_picard_mrkdup(config) picard_extra_tumor=" ".join(["RGPU=ILLUMINAi", "RGID=TUMOR", "RGSM=TUMOR", "RGPL=ILLUMINAi", "RGLB=ILLUMINAi"]) tumor_sample = get_sample_type(config["samples"], "tumor")[0] -case_id = config["analysis"]["case_id"] rule mergeBam_tumor: @@ -21,10 +15,10 @@ rule mergeBam_tumor: cram = bam_dir + "tumor.merged.cram", params: housekeeper_id = {"id": tumor_sample, "tags": "tumor"}, - conda = get_conda_env(config["conda_env_yaml"],"picard"), + conda = config["bioinfo_tools"].get("picard"), picard = picard_extra_tumor threads: get_threads(cluster_config, "mergeBam_tumor") - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: benchmark_dir + 'mergeBam_tumor_' + "{mysample}.mergebam_tumor.tsv".format(mysample = tumor_sample) shell: @@ -42,9 +36,9 @@ rule mergeBam_tumor_gatk: output: bamT = bam_dir + "tumor.sorted." + picarddup + ".bsrcl.merged.bam", params: - conda = get_conda_env(config["conda_env_yaml"],"picard"), + conda = config["bioinfo_tools"].get("picard"), picard = picard_extra_tumor - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() benchmark: benchmark_dir + "{mysample}.mergebam_tumor_gatk.tsv".format(mysample = tumor_sample) shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/sentieon_germline.rule b/BALSAMIC/snakemake_rules/variant_calling/sentieon_germline.rule new file mode 100644 index 000000000..c56cbc6ca --- /dev/null +++ b/BALSAMIC/snakemake_rules/variant_calling/sentieon_germline.rule @@ -0,0 +1,32 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 + +rule sentieon_DNAscope: + input: + ref = config["reference"]["reference_genome"], + dbsnp = config["reference"]["dbsnp"], + bam = bam_dir + "{sample}.dedup.realign.bam", + recal_table = bam_dir + "{sample}.dedup.realign.recal_data.table" + output: + vcf = vcf_dir + "SNV.germline.{sample}.dnascope.vcf.gz", + params: + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + sentieon_exec = config["SENTIEON_EXEC"], + sentieon_lic = config["SENTIEON_LICENSE"], + sentieon_ml_dnascope = config["SENTIEON_DNASCOPE"] + threads: get_threads(cluster_config, 'sentieon_DNAscope') + log: + vcf_dir + "{sample}.dnascope.log" + benchmark: + benchmark_dir + 'sentieon_DNAscope_' + "{sample}.dnascope.tsv" + shell: + """ +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; +export SENTIEON_LICENSE={params.sentieon_lic}; +export SENTIEON_DNASCOPE={params.sentieon_ml_dnascope}; + +{params.sentieon_exec} driver -t {threads} -r {input.ref} -i {input.bam} -q {input.recal_table} --algo DNAscope -d {input.dbsnp} {output.vcf} +rm -rf {params.tmpdir}; + """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/sentieon_split_snv_sv.rule b/BALSAMIC/snakemake_rules/variant_calling/sentieon_split_snv_sv.rule new file mode 100644 index 000000000..4971ed4ae --- /dev/null +++ b/BALSAMIC/snakemake_rules/variant_calling/sentieon_split_snv_sv.rule @@ -0,0 +1,31 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 + +rule bcftools_view_split_variant: + input: + ref = config["reference"]["reference_genome"], + vcf = vcf_dir + "sentieon_tnscope/ALL.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", + output: + vcf_snv = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", + vcf_sv = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", + params: + conda = config["bioinfo_tools"].get("bcftools"), + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + threads: + get_threads(cluster_config, 'split_sentieon_sv_snv') + singularity: + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() + benchmark: + benchmark_dir + 'split_sentieon_sv_snv_' + config["analysis"]["case_id"] + ".tsv" + shell: + """ +source activate {params.conda}; +export TMPDIR={params.tmpdir}; +mkdir -p {params.tmpdir}; + +bcftools view --include 'INFO/SVTYPE=="."' -O z -o {output.vcf_snv} {input.vcf}; +tabix -p vcf -f {output.vcf_snv}; + +bcftools view --include 'INFO/SVTYPE!="."' -O z -o {output.vcf_sv} {input.vcf}; +tabix -p vcf -f {output.vcf_sv}; + """ diff --git a/BALSAMIC/snakemake_rules/sentieon/sentieon_t_varcall.rule b/BALSAMIC/snakemake_rules/variant_calling/sentieon_t_varcall.rule similarity index 64% rename from BALSAMIC/snakemake_rules/sentieon/sentieon_t_varcall.rule rename to BALSAMIC/snakemake_rules/variant_calling/sentieon_t_varcall.rule index 10c49942c..4e94d0187 100644 --- a/BALSAMIC/snakemake_rules/sentieon/sentieon_t_varcall.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/sentieon_t_varcall.rule @@ -1,11 +1,7 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 - -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_sample_type -from BALSAMIC.utils.rule import get_threads - +from BALSAMIC.utils.constants import VARCALL_PARAMS def get_pon(config): """ return pon cli string, complete with file """ @@ -27,10 +23,10 @@ rule sentieon_TNsnv_tumor_only: stats = vcf_dir + config["analysis"]["case_id"] + ".tnsnv.call_stats", namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnsnv.sample_name_map", params: - tmpdir = tmp_dir, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = get_sample_type(config["samples"], "tumor"), pon = " " if get_pon(config) is None else " ".join(["--pon", get_pon(config)]), - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_TNsnv_tumor_only') log: @@ -39,16 +35,15 @@ rule sentieon_TNsnv_tumor_only: benchmark_dir + config["analysis"]["case_id"] + ".tnsnv.tsv" shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver -r {input.ref} -t {threads} -q {input.recal_data_table} -i {input.bam} --algo TNsnv --tumor_sample {params.tumor} {params.pon} --cosmic {input.cosmic} --dbsnp {input.dbsnp} --call_stats_out {output.stats} {output.vcf} echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; +rm -rf {params.tmpdir}; """ rule sentieon_TNhaplotyper_tumor_only: @@ -62,10 +57,10 @@ rule sentieon_TNhaplotyper_tumor_only: vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnhaplotyper.vcf.gz", namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnhaplotyper.sample_name_map", params: - tmpdir = tmp_dir, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = get_sample_type(config["samples"], "tumor"), pon = " " if get_pon(config) is None else " ".join(["--pon", get_pon(config)]), - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_TNhaplotyper_tumor_only') log: @@ -74,16 +69,15 @@ rule sentieon_TNhaplotyper_tumor_only: benchmark_dir + config["analysis"]["case_id"] + ".tnhaplotyper.tsv" shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bam} -q {input.recal_data_table} --algo TNhaplotyper --tumor_sample {params.tumor} {params.pon} --cosmic {input.cosmic} --dbsnp {input.dbsnp} {output.vcf} echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; +rm -rf {params.tmpdir}; """ @@ -94,13 +88,16 @@ rule sentieon_TNscope_tumor_only: bam = expand(bam_dir + "{tumor}.dedup.realign.bam", tumor=get_sample_type(config["samples"], "tumor")), recal = expand(bam_dir + "{tumor}.dedup.realign.recal_data.table", tumor=get_sample_type(config["samples"], "tumor")), output: - vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", - namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map", + vcf = vcf_dir + "sentieon_tnscope/ALL.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", + namemap_snv = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map", + namemap_sv = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map", params: - tmpdir = tmp_dir, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = get_sample_type(config["samples"], "tumor"), + tumor_options = VARCALL_PARAMS["tnscope"]["tumor"], pon = " " if get_pon(config) is None else " ".join(["--pon", get_pon(config)]), - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_ml_tnscope = config["SENTIEON_TNSCOPE"], + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_TNscope_tumor_only') log: @@ -109,39 +106,18 @@ rule sentieon_TNscope_tumor_only: benchmark_dir + config["analysis"]["case_id"] + ".tnscope.tsv" shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; -{params.sentieon_exec} driver -t {threads} -r {input.ref} -i {input.bam} -q {input.recal} --algo TNscope --tumor_sample {params.tumor} {params.pon} --dbsnp {input.dbsnp} {output.vcf}; +{params.sentieon_exec} driver -t {threads} -r {input.ref} \ +-i {input.bam} -q {input.recal} --algo TNscope \ +--tumor_sample {params.tumor} {params.pon} \ +--dbsnp {input.dbsnp} \ +{params.tumor_options} {output.vcf}; -echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; +echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap_snv}; +cp {output.namemap_snv} {output.namemap_sv} +rm -rf {params.tmpdir}; """ - - -#rule sentioen_filter_TNscope_tumor_only: -# input: -# ref = config["reference"]["reference_genome"], -# tnscope_vcf = vcf_dir + "sentieon_tnscope/SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", -# output: -# tnscope_filtered_vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", -# params: -# sentieon_exec = SENTIEON_INSTALL_DIR + "/bin/sentieon", -# sentieon_lic = SENTIEON_LICENSE, -# sentieon_ml_tnscope = SENTIEON_TNSCOPE -# log: -# vcf_dir + config["analysis"]["case_id"] + ".tnscope.filtered.log", -# benchmark: -# vcf_dir + "benchmarks/" + config["analysis"]["case_id"] + ".tnscope_filter.tsv" -# shell: -# """ -#export SENTIEON_LICENSE={params.sentieon_lic}; -#export SENTIEON_TNSCOPE={params.sentieon_ml_tnscope}; -# -#{params.sentieon_exec} driver -r {input.ref} --algo TNModelApply -m {params.sentieon_ml_tnscope} -v {input.tnscope_vcf} {output.tnscope_filtered_vcf} -# """ - - diff --git a/BALSAMIC/snakemake_rules/sentieon/sentieon_tn_varcall.rule b/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall.rule similarity index 63% rename from BALSAMIC/snakemake_rules/sentieon/sentieon_tn_varcall.rule rename to BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall.rule index 490389af2..a0872d7f2 100644 --- a/BALSAMIC/snakemake_rules/sentieon/sentieon_tn_varcall.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall.rule @@ -1,11 +1,7 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 - -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_sample_type -from BALSAMIC.utils.rule import get_threads - +from BALSAMIC.utils.constants import VARCALL_PARAMS rule sentieon_TN_corealign: input: @@ -19,8 +15,8 @@ rule sentieon_TN_corealign: output: bam = bam_dir + config["analysis"]["case_id"] + ".corealign.bam" params: - tmpdir = tmp_dir, - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_TN_corealign') log: @@ -29,15 +25,14 @@ rule sentieon_TN_corealign: benchmark_dir + 'sentieon_TN_corealign_' + config["analysis"]["case_id"] + ".corealign.tsv" shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bamT} -i {input.bamN} -q {input.recalT} -q {input.recalN} --algo Realigner -k {input.mills} -k {input.indel_1kg} {output.bam} +rm -rf {params.tmpdir}; """ @@ -51,10 +46,10 @@ rule sentieon_TNsnv: stats = vcf_dir + config["analysis"]["case_id"] + ".tnsnv.call_stats", namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnsnv.sample_name_map", params: - tmpdir = tmp_dir, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = get_sample_type(config["samples"], "tumor"), normal = get_sample_type(config["samples"], "normal"), - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_TNsnv') log: @@ -63,16 +58,15 @@ rule sentieon_TNsnv: benchmark_dir + 'sentieon_TNsnv_' + config["analysis"]["case_id"] + ".tnsnv.tsv" shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bam} --algo TNsnv --tumor_sample {params.tumor} --normal_sample {params.normal} --dbsnp {input.dbsnp} --call_stats_out {output.stats} {output.vcf} echo -e \"{params.tumor}\\tTUMOR\n{params.normal}\\tNORMAL\" > {output.namemap}; +rm -rf {params.tmpdir}; """ @@ -85,10 +79,10 @@ rule sentieon_TNhaplotyper: vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnhaplotyper.vcf.gz", namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnhaplotyper.sample_name_map", params: - tmpdir = tmp_dir, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = get_sample_type(config["samples"], "tumor"), normal = get_sample_type(config["samples"], "normal"), - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_TNhaplotyper') log: @@ -97,16 +91,15 @@ rule sentieon_TNhaplotyper: benchmark_dir + 'sentieon_TNhaplotyper_' + config["analysis"]["case_id"] + ".tnhaplotyper.tsv" shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bam} --algo TNhaplotyper --tumor_sample {params.tumor} --normal_sample {params.normal} --dbsnp {input.dbsnp} {output.vcf} echo -e \"{params.tumor}\\tTUMOR\n{params.normal}\\tNORMAL\" > {output.namemap}; +rm -rf {params.tmpdir}; """ @@ -119,14 +112,17 @@ rule sentieon_TNscope: recalT = expand(bam_dir + "{tumor}.dedup.realign.recal_data.table", tumor=get_sample_type(config["samples"], "tumor")), recalN = expand(bam_dir + "{normal}.dedup.realign.recal_data.table", normal=get_sample_type(config["samples"], "normal")), output: - vcf = vcf_dir + "sentieon_tnscope/SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", - namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map", + vcf_all = vcf_dir + "sentieon_tnscope/ALL.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", + namemap_snv = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map", + namemap_sv = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map", params: - tmpdir = tmp_dir, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = get_sample_type(config["samples"], "tumor"), normal = get_sample_type(config["samples"], "normal"), - variant_setting = "--min_init_normal_lod 0.5 --min_normal_lod 1.0 --min_init_tumor_lod 1.0 --min_tumor_lod 8", - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + tumor_options = VARCALL_PARAMS["tnscope"]["tumor"], + normal_options = VARCALL_PARAMS["tnscope"]["normal"], + sentieon_ml_tnscope = config["SENTIEON_TNSCOPE"], + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_TNscope') log: @@ -135,45 +131,23 @@ rule sentieon_TNscope: benchmark_dir + 'sentieon_TNscope_' + config["analysis"]["case_id"] + ".tnscope.tsv" shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; -{params.sentieon_exec} driver -t {threads} -r {input.ref} -i {input.bamT} -q {input.recalT} -i {input.bamN} -q {input.recalN} --algo TNscope --tumor_sample {params.tumor} --normal_sample {params.normal} --dbsnp {input.dbsnp} {params.variant_setting} {output.vcf} - -echo -e \"{params.tumor}\\tTUMOR\n{params.normal}\\tNORMAL\" > {output.namemap}; - """ +intermediate_vcf={params.tmpdir}/tn_sentieon_varcall_file +{params.sentieon_exec} driver -t {threads} \ +-r {input.ref} -i {input.bamT} -q {input.recalT} -i {input.bamN} \ +-q {input.recalN} --algo TNscope --tumor_sample {params.tumor} \ +--normal_sample {params.normal} --dbsnp {input.dbsnp} \ +{params.tumor_options} {params.normal_options} $intermediate_vcf -rule sentioen_filter_TNscope: - input: - ref = config["reference"]["reference_genome"], - tnscope_vcf = vcf_dir + "sentieon_tnscope/SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", - output: - tnscope_filtered_vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", - params: - tmpdir = tmp_dir, - sentieon_ml_tnscope = SENTIEON_TNSCOPE, - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", - sentieon_lic = config["SENTIEON_LICENSE"], - threads: get_threads(cluster_config, 'sentieon_filter_TNscope') - log: - vcf_dir + config["analysis"]["case_id"] + ".tnscope.filtered.log", - benchmark: - benchmark_dir + 'sentioen_filter_TNscope_' + config["analysis"]["case_id"] + ".tnscope_filter.tsv" - shell: - """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; -export SENTIEON_LICENSE={params.sentieon_lic}; -export SENTIEON_TNSCOPE={params.sentieon_ml_tnscope} +{params.sentieon_exec} driver -r {input.ref} --algo TNModelApply \ +-m {params.sentieon_ml_tnscope} -v $intermediate_vcf {output.vcf_all} -{params.sentieon_exec} driver -r {input.ref} --algo TNModelApply -m {params.sentieon_ml_tnscope} -v {input.tnscope_vcf} {output.tnscope_filtered_vcf} +echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap_snv}; +cp {output.namemap_snv} {output.namemap_sv} +rm -rf {params.tmpdir}; """ - diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index 740ebf2df..9c9a8337c 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -1,12 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from snakemake.shell import shell - -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_sample_type -from BALSAMIC import __version__ as bv - normal_bam = "normal.merged.bam" tumor_bam = "tumor.merged.bam" @@ -21,27 +15,36 @@ rule manta_tumor_normal: bamN = bam_dir + normal_bam, bamT = bam_dir + tumor_bam, output: - candidateindel = vcf_dir + "manta/results/variants/candidateSmallIndels.vcf.gz", final = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".manta.vcf.gz", namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".manta.sample_name_map", params: - tmpdir = vcf_dir + "manta/", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), runmode = "local", - conda = get_conda_env(config["conda_env_yaml"],"manta") - singularity: singularity_image + tumor = get_sample_type(config["samples"], "tumor"), + normal = get_sample_type(config["samples"], "normal"), + conda = config["bioinfo_tools"].get("manta") + singularity: Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix() benchmark: benchmark_dir + 'manta_tumor_normal_' + config["analysis"]["case_id"] + ".manta.tsv" threads: get_threads(cluster_config, "manta_tumor_normal") shell: - "source activate {params.conda};" - "rm -rf {params.tmpdir}; " - "configManta.py " - "--normalBam={input.bamN} " - "--tumorBam={input.bamT} " - "--referenceFasta={input.fa} " - "--runDir={params.tmpdir}; " - "python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads};" - "cp {params.tmpdir}/results/variants/somaticSV.vcf.gz {output.final}; " - "tabix -p vcf -f {output.final}; " - "echo -e \"TUMOR\\tTUMOR\nNORMAL\\tNORMAL\" > {output.namemap}; " - + """ +source activate {params.conda}; + +configManta.py + "--normalBam={input.bamN} + "--tumorBam={input.bamT} + "--referenceFasta={input.fa} + "--runDir={params.tmpdir}; + +python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; + +cp {params.tmpdir}/results/variants/somaticSV.vcf.gz {output.final}; + +tabix -p vcf -f {output.final}; + +echo -e \"{params.tumor}\\tTUMOR\n{params.normal}\\tNORMAL\" > {output.namemap}; + +rm -rf {params.tmpdir}; + """ + diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule index 558c941d5..8e3fa4606 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule @@ -1,10 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_sample_type -from BALSAMIC import __version__ as bv - tumor_bam = "tumor.merged.bam" if config["analysis"]["sequencing_type"] == 'wgs': @@ -16,26 +12,34 @@ rule manta_tumor_only: fa = config["reference"]["reference_genome"], bamT = bam_dir + tumor_bam output: - candidateindel = vcf_dir + "manta/results/variants/candidateSmallIndels.vcf.gz", final = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".manta.vcf.gz", namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".manta.sample_name_map" params: - tmpdir = vcf_dir + "manta/", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), runmode = "local", - conda = get_conda_env(config["conda_env_yaml"],"manta") + tumor = get_sample_type(config["samples"], "tumor"), + conda = config["bioinfo_tools"].get("manta") threads: get_threads(cluster_config, "manta_tumor_only") - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix() benchmark: benchmark_dir + 'manta_tumor_only_' + config["analysis"]["case_id"] + ".manta.tsv" shell: - "source activate {params.conda};" - "rm -rf {params.tmpdir}; " - "configManta.py " - "--tumorBam={input.bamT} " - "--referenceFasta={input.fa} " - "--runDir={params.tmpdir}; " - "python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads};" - "cp {params.tmpdir}/results/variants/tumorSV.vcf.gz {output.final}; " - "tabix -p vcf -f {output.final}; " - "echo -e \"TUMOR\\tTUMOR\" > {output.namemap}; " - + """ +source activate {params.conda}; + +configManta.py + "--tumorBam={input.bamT} + "--referenceFasta={input.fa} + "--runDir={params.tmpdir}; + +python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; + +cp {params.tmpdir}/results/variants/tumorSV.vcf.gz {output.final}; + +tabix -p vcf -f {output.final}; + +echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; + +rm -rf {params.tmpdir}; + """ + diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_normal.rule index 86becaf73..444cbba27 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_normal.rule @@ -1,18 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -import os -from BALSAMIC.utils.rule import get_picard_mrkdup -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_chrom -from BALSAMIC.utils.rule import get_sample_type -from BALSAMIC.utils.rule import get_threads -from BALSAMIC import __version__ as bv - -picarddup = get_picard_mrkdup(config) -chromlist = config["panel"]["chrom"] -capture_kit = os.path.split(config["panel"]["capture_kit"])[1] - rule vardict_tumor_normal: input: fa = config["reference"]["reference_genome"], @@ -22,35 +10,35 @@ rule vardict_tumor_normal: output: temp(vcf_dir + "vardict/split_vcf/{bedchrom}_vardict.vcf.gz") params: - tmpdir = tmp_dir, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), af = "0.005", max_pval = "0.9", max_mm = "4.5", col_info = "-c 1 -S 2 -E 3 -g 4", name = config["analysis"]["case_id"], - conda = get_conda_env(config["conda_env_yaml"], "vardict"), - singularity: singularity_image + conda = config["bioinfo_tools"].get("vardict"), threads: get_threads(cluster_config, "vardict_tumor_normal") + singularity: Path(singularity_image, config["bioinfo_tools"].get("vardict") + ".sif").as_posix() benchmark: benchmark_dir + 'vardict_tumor_normal_' + "{bedchrom}.vardict_tumor_normal.tsv" shell: - "source activate {params.conda}; " - "export PERL5LIB=;" - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "export VAR_DICT_OPTS='\"-Djava.io.tmpdir=${{tmpdir}}\" \"-Xmx48G\"'; " - "vardict-java -U -u -I 600 -G {input.fa} -f {params.af} -N {params.name} " - " -th {threads} " - "-b \"{input.bamT}|{input.bamN}\" " - "{params.col_info} {input.bed} " - "| testsomatic.R " - "| var2vcf_paired.pl -P {params.max_pval} " - "-m {params.max_mm} -M -f {params.af} -N {params.name} " - "| bgzip > {output}; " - "tabix -p vcf {output}; " - "source deactivate;" + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export VAR_DICT_OPTS='\"-Djava.io.tmpdir={params.tmpdir}\" \"-Xmx32G\"'; +vardict-java -U -u -I 600 -G {input.fa} -f {params.af} -N {params.name} \ +-b \"{input.bamT}|{input.bamN}\" \ +-th {threads} \ +{params.col_info} {input.bed} \ +| testsomatic.R \ +| var2vcf_paired.pl -P {params.max_pval} \ +-m {params.max_mm} -M -f {params.af} -N {params.name} \ +| bgzip > {output}; +tabix -p vcf {output}; +rm -rf {params.tmpdir}; + """ + rule vardict_merge: input: @@ -60,174 +48,24 @@ rule vardict_merge: yaml = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.yaml", namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.sample_name_map" params: - tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"], "vardict"), + conda = config["bioinfo_tools"].get("vardict"), + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), name = config["analysis"]["case_id"], - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("vardict") + ".sif").as_posix() benchmark: benchmark_dir + 'vardict_merge_' + config["analysis"]["case_id"] + ".vardict_merge.tsv" shell: - "source activate {params.conda} ; " - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "bcftools concat {input} | bcftools sort --temp-dir ${{tmpdir}} - | bgzip > {output.vcf}; " - "tabix -f -p vcf {output.vcf}; " - "echo -e \"{params.name}\\tTUMOR\n{params.name}-match\\tNORMAL\" > {output.namemap}; " - "echo -e \"{params.name}\" > {output.namemap}.tumor; " - "echo -e \"{params.name}-match\" > {output.namemap}.normal; " - "echo '{{ vcf: {{ vardict: {{ name: vardict, path: {output.vcf} }} }} }}' > {output.yaml}; " - "source deactivate;" - - -rule mutect2_tumor_normal: - input: - fa = config["reference"]["reference_genome"], - dbsnp = config["reference"]["dbsnp"], - cosmic = config["reference"]["cosmic"], - bamT = bam_dir + "tumor.sorted." + picarddup + ".bsrcl.merged.bam", - bamN = bam_dir + "normal.sorted." + picarddup + ".bsrcl.merged.bam", - bed = vcf_dir + "split_bed/{bedchrom}." + capture_kit, - output: - temp(vcf_dir + "mutect/split_vcf/{bedchrom}_mutect.vcf.gz") - params: - tmpdir = tmp_dir, - result_dir = vcf_dir + "mutect/", - conda = get_conda_env(config["conda_env_yaml"],"gatk") - threads: get_threads(cluster_config, "mutect2_tumor_normal") - singularity: singularity_image - benchmark: - benchmark_dir + 'mutect2_tumor_normal_' + "{bedchrom}.mutect2_tumor_normal.tsv" - shell: - "source activate {params.conda};" - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "mkdir -p {params.result_dir}; " - "java -jar -Djava.io.tmpdir=${{tmpdir}} -Xmx32G $CONDA_PREFIX/opt/gatk-3.8/GenomeAnalysisTK.jar " - "-T MuTect2 " - "-R {input.fa} " - "--cosmic {input.cosmic} " - "--dbsnp {input.dbsnp} " - "-I:normal {input.bamN} " - "-I:tumor {input.bamT} " - "--useNewAFCalculator " - "--disable_auto_index_creation_and_locking_when_reading_rods " - "-L {input.bed} " - " | bgzip > {output}; " - "tabix -p vcf {output}; " - - -rule mutect2_merge: - input: - expand(vcf_dir + "mutect/split_vcf/{chrom}_mutect.vcf.gz", chrom=chromlist) - output: - namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".mutect.sample_name_map", - yaml = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".mutect.yaml", - vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".mutect.vcf.gz" - params: - tmpdir = tmp_dir, - name = config["analysis"]["case_id"], - conda = get_conda_env(config["conda_env_yaml"],"bcftools"), - singularity: singularity_image - benchmark: - benchmark_dir + 'mutect2_merge_' + config["analysis"]["case_id"] + ".mutect2_merge.tsv" - shell: - "source activate {params.conda} ; " - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "bcftools concat {input} | bcftools sort --temp-dir ${{tmpdir}} - | bgzip > {output.vcf}; " - "tabix -f -p vcf {output.vcf}; " - "echo -e \"TUMOR\\tTUMOR\nNORMAL\\tNORMAL\" > {output.namemap}; " - "echo -e \"TUMOR\" > {output.namemap}.tumor; " - "echo -e \"NORMAL\" > {output.namemap}.normal; " - "echo '{{ vcf: {{ mutect: {{ name: mutect2, path: {output.vcf} }} }} }}' > {output.yaml}; " - "source deactivate;" - -rule strelka_tumor_normal: - input: - fa = config["reference"]["reference_genome"], - bamN = bam_dir + "normal.merged.bam", - bamT = bam_dir + "tumor.merged.bam", - mantaindel = vcf_dir + "manta/results/variants/candidateSmallIndels.vcf.gz" - output: - namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".strelka.sample_name_map", - yaml = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".strelka.yaml", - final = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".strelka.vcf.gz", - default = expand(vcf_dir + "strelka/results/variants/{vcf_file}", vcf_file=["somatic.snvs.vcf.gz", "somatic.indels.vcf.gz"]) - params: - name = config["analysis"]["case_id"], - tmpdir = vcf_dir + "strelka/", - runmode = "local", - conda = get_conda_env(config["conda_env_yaml"],"strelka"), - threads: get_threads(cluster_config, "strelka_tumor_normal") - singularity: singularity_image - benchmark: - benchmark_dir + 'strelka_tumor_normal_' + config["analysis"]["case_id"] + ".strelka_tumor_normal.tsv" - shell: - "source activate {params.conda};" - "rm -rf {params.tmpdir}; " - "configureStrelkaSomaticWorkflow.py " - "--normalBam={input.bamN} " - "--tumorBam={input.bamT} " - "--referenceFasta={input.fa} " - "--indelCandidates {input.mantaindel} " - "--outputCallableRegions " - "--exome " - "--runDir={params.tmpdir}; " - "python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; " - "bcftools concat -a " - "-o {output.final} " - "-O z " - "{output.default}; " - "tabix -f -p vcf {output.final}; " - "echo -e \"TUMOR\\tTUMOR\nNORMAL\\tNORMAL\" > {output.namemap}; " - "echo -e \"TUMOR\" > {output.namemap}.tumor; " - "echo -e \"NORMAL\" > {output.namemap}.normal; " - "echo '{{ vcf: {{ strelka: {{ name: strelka , path: {output.final} }} }} }}' > {output.yaml}; " - - - -rule somatic_snv_indel_vcf_merge: - input: - name_map = expand(vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".{var_caller}.sample_name_map", var_caller=somatic_caller_snv), - varcall_yaml = expand(vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".{var_caller}.yaml", var_caller=somatic_caller_snv), - refdict = (config["reference"]["reference_genome"]).replace(".fasta",".dict"), - reffasta = config["reference"]["reference_genome"], - bamN = bam_dir + "normal.merged.bam", - bamT = bam_dir + "tumor.merged.bam", - output: - vcfmerge = vcf_dir + "vcfmerge/SNV.somatic." + config["analysis"]["case_id"] + ".vcfmerge.vcf.gz", - vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vcfmerge.vcf.gz" - params: - workdir = vcf_dir + "/vcfmerge", - conda = get_conda_env(config["conda_env_yaml"],"vcfmerge"), - threads: get_threads(cluster_config, 'somatic_snv_indel_vcf_merge') - singularity: singularity_image - benchmark: - benchmark_dir + "somatic_snv_inde_vcf_merge.tsv" - shell: - "source activate {params.conda}; " - "mkdir -p {params.workdir}; " - "cat {input.name_map} > {params.workdir}/sample_name.map; " - "echo '{{bam: {{NORMAL: {input.bamN}, TUMOR: {input.bamT} }} }}' | " - " yq -s '{{ vcf: map(.vcf) | add }} * .[0]' - {input.varcall_yaml} " - " > {params.workdir}/vcf.yaml; " - "vcfmerge --sample-config {params.workdir}/vcf.yaml " - " --reference-dict {input.refdict} " - " --reference {input.reffasta} " - " --sample-names {params.workdir}/sample_name.map " - " --aggr-func max " - " --output-dir {params.workdir} " - " --mapq 10 " - " --include-optional " - " --output-vcf {output.vcfmerge}; " - "cp {output.vcfmerge} {output.vcf}; " - + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +bcftools concat {input} | bcftools sort --temp-dir {params.tmpdir} - | bgzip > {output.vcf}; +tabix -f -p vcf {output.vcf}; +echo -e \"{params.name}\\tTUMOR\\n{params.name}-match\\tNORMAL\" > {output.namemap}; +echo -e \"{params.name}\" > {output.namemap}.tumor; +echo -e \"{params.name}-match\" > {output.namemap}.normal; +echo '{{ vcf: {{ vardict: {{ name: vardict, path: {output.vcf} }} }} }}' > {output.yaml}; +rm -rf {params.tmpdir}; + """ rule sentieon_TNhaplotyper: @@ -241,10 +79,10 @@ rule sentieon_TNhaplotyper: vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnhaplotyper.vcf.gz", namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnhaplotyper.sample_name_map", params: - tmpdir = tmp_dir, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = get_sample_type(config["samples"], "tumor"), normal = get_sample_type(config["samples"], "normal"), - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_TNhaplotyper') log: @@ -253,15 +91,13 @@ rule sentieon_TNhaplotyper: benchmark_dir + 'sentieon_TNhaplotyper_' + config["analysis"]["case_id"] + ".tnhaplotyper.tsv" shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bamT} -i {input.bamN} --interval {input.interval} --algo TNhaplotyper --tumor_sample TUMOR --normal_sample NORMAL --dbsnp {input.dbsnp} {output.vcf} echo -e \"TUMOR\\tTUMOR\nNORMAL\\tNORMAL\" > {output.namemap}; +rm -rf {params.tmpdir}; """ - diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_only.rule index 9b8b37e6a..0f78838f3 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_only.rule @@ -1,20 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -import os - -from BALSAMIC.utils.rule import get_picard_mrkdup -from BALSAMIC.utils.rule import get_conda_env -from BALSAMIC.utils.rule import get_chrom -from BALSAMIC.utils.rule import get_sample_type -from BALSAMIC.utils.rule import get_threads -from BALSAMIC import __version__ as bv - -picarddup = get_picard_mrkdup(config) -chromlist = config["panel"]["chrom"] -capture_kit = os.path.split(config["panel"]["capture_kit"])[1] - - def get_pon(config): """ return pon cli string, complete with file """ if "PON" in config["analysis"]: @@ -36,9 +22,9 @@ rule vardict_tumor_only: max_mm = "4.5", col_info = "-c 1 -S 2 -E 3 -g 4", name = config["analysis"]["case_id"], - conda = get_conda_env(config["conda_env_yaml"],"vardict"), - singularity: singularity_image + conda = config["bioinfo_tools"].get("vardict"), threads: get_threads(cluster_config, "vardict_tumor_only") + singularity: Path(singularity_image, config["bioinfo_tools"].get("vardict") + ".sif").as_posix() benchmark: benchmark_dir + 'vardict_tumor_only_' + '{bedchrom}.vardict.tsv' shell: @@ -52,6 +38,7 @@ rule vardict_tumor_only: "vardict-java -u -I 600 -G {input.fa} -f {params.af} -N {params.name} " " -th {threads} " "-b {input.bamT} " + " -th {threads} " "{params.col_info} {input.bed} " "| teststrandbias.R " "| var2vcf_valid.pl -P {params.max_pval} " @@ -69,9 +56,9 @@ rule vardict_merge: vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.vcf.gz" params: tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"vardict"), + conda = config["bioinfo_tools"].get("vardict"), name = config["analysis"]["case_id"], - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("vardict") + ".sif").as_posix() benchmark: benchmark_dir + 'vardict_merge_' + config["analysis"]["case_id"] + ".vardict_merge.tsv" shell: @@ -87,106 +74,6 @@ rule vardict_merge: "echo '{{ vcf: {{ vardict: {{ name: vardict , path: {output.vcf} }} }} }}' > {output.yaml}; " "source deactivate;" - -rule mutect2_tumor_only: - input: - fa = config["reference"]["reference_genome"], - dbsnp = config["reference"]["dbsnp"], - cosmic = config["reference"]["cosmic"], - bamT = bam_dir + "tumor.sorted." + picarddup + ".bsrcl.merged.bam", - bed = vcf_dir + "split_bed/{bedchrom}." + capture_kit, - output: - temp(vcf_dir + "mutect/split_vcf/{bedchrom}_mutect.vcf.gz") - params: - tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"gatk") - threads: get_threads(cluster_config, "mutect2_tumor_only") - singularity: singularity_image - benchmark: - benchmark_dir + 'mutect2_tumor_only_' + "{bedchrom}.mutect2_tumor_only.tsv" - shell: - "source activate {params.conda};" - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "java -jar -Djava.io.tmpdir=${{tmpdir}} -Xmx32G $CONDA_PREFIX/opt/gatk-3.8/GenomeAnalysisTK.jar " - "-T MuTect2 " - "-R {input.fa} " - "--cosmic {input.cosmic} " - "--dbsnp {input.dbsnp} " - "-I:tumor {input.bamT} " - "--useNewAFCalculator " - "--disable_auto_index_creation_and_locking_when_reading_rods " - "-L {input.bed} " - " | bgzip > {output}; " - "tabix -p vcf {output}; " - - -rule mutect2_merge: - input: - expand(vcf_dir + "mutect/split_vcf/{chrom}_mutect.vcf.gz", chrom=chromlist) - output: - namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".mutect.sample_name_map", - yaml = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".mutect.yaml", - vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".mutect.vcf.gz" - params: - tmpdir = tmp_dir, - conda = get_conda_env(config["conda_env_yaml"],"bcftools"), - singularity: singularity_image - benchmark: - benchmark_dir + 'mutect2_merge_' + config["analysis"]["case_id"] + ".mutect2_merge.tsv" - shell: - "source activate {params.conda} ; " - "rand_str=$(openssl rand -hex 5); " - "tmpdir={params.tmpdir}/${{rand_str}}; " - "mkdir -p ${{tmpdir}}; " - "export TMPDIR=${{tmpdir}}; " - "bcftools concat {input} | bcftools sort --temp-dir ${{tmpdir}} - | bgzip > {output.vcf}; " - "tabix -f -p vcf {output.vcf}; " - "echo -e \"TUMOR\\tTUMOR\" > {output.namemap}; " - "echo -e \"TUMOR\" > {output.namemap}.tumor; " - "echo '{{ vcf: {{ mutect: {{ name: mutect2 , path: {output.vcf} }} }} }}' > {output.yaml}; " - "source deactivate;" - - -rule somatic_snv_indel_vcf_merge: - input: - name_map = expand(vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".{var_caller}.sample_name_map", var_caller=somatic_caller_snv), - varcall_yaml = expand(vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".{var_caller}.yaml", var_caller=somatic_caller_snv), - refdict = (config["reference"]["reference_genome"]).replace(".fasta",".dict"), - reffasta = config["reference"]["reference_genome"], - bamT = bam_dir + "tumor.merged.bam", - output: - vcfmerge = vcf_dir + "vcfmerge/SNV.somatic." + config["analysis"]["case_id"] + ".vcfmerge.vcf.gz", - vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vcfmerge.vcf.gz" - params: - workdir = vcf_dir + "/vcfmerge", - conda = get_conda_env(config["conda_env_yaml"],"vcfmerge") - threads: get_threads(cluster_config, 'somatic_snv_indel_vcf_merge') - singularity: singularity_image - benchmark: - benchmark_dir + "somatic_snv_inde_vcf_merge.tsv" - shell: - "source activate {params.conda}; " - "mkdir -p {params.workdir}; " - "cat {input.name_map} > {params.workdir}/sample_name.map; " - "echo '{{bam: {{TUMOR: {input.bamT} }} }}' | " - " yq -s '{{ vcf: map(.vcf) | add }} * .[0]' - {input.varcall_yaml} " - " > {params.workdir}/vcf.yaml; " - "vcfmerge --sample-config {params.workdir}/vcf.yaml " - " --reference-dict {input.refdict} " - " --reference {input.reffasta} " - " --sample-names {params.workdir}/sample_name.map " - " --aggr-func max " - " --output-dir {params.workdir} " - " --mapq 10 " - " --include-optional " - " --output-vcf {output.vcf}; " - "cp {output.vcfmerge} {output.vcf}; " - - - rule sentieon_TNhaplotyper_tumor_only: input: bam = bam_dir + "tumor.sorted." + picarddup + ".bsrcl.merged.bam", @@ -201,7 +88,7 @@ rule sentieon_TNhaplotyper_tumor_only: tmpdir = tmp_dir, tumor = get_sample_type(config["samples"], "tumor"), pon = " " if get_pon(config) is None else " ".join(["--pon", get_pon(config)]), - sentieon_exec = config["SENTIEON_INSTALL_DIR"] + "/bin/sentieon", + sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], threads: get_threads(cluster_config, 'sentieon_TNhaplotyper_tumor_only') log: @@ -216,8 +103,6 @@ mkdir -p ${{tmpdir}}; export TMPDIR=${{tmpdir}}; export SENTIEON_TMPDIR=${{tmpdir}}; export SENTIEON_LICENSE={params.sentieon_lic}; - {params.sentieon_exec} driver -r {input.ref} -t {threads} -i {input.bam} --interval {input.interval} --algo TNhaplotyper --tumor_sample TUMOR {params.pon} --cosmic {input.cosmic} --dbsnp {input.dbsnp} {output.vcf} - echo -e \"TUMOR\\tTUMOR\" > {output.namemap}; """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/split_bed.rule b/BALSAMIC/snakemake_rules/variant_calling/split_bed.rule index 632c3b4b4..daa829814 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/split_bed.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/split_bed.rule @@ -1,15 +1,6 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -__author__ = "Hassan Foroughi Asl" - -import os -from BALSAMIC.utils.rule import get_conda_env, get_chrom, get_picard_mrkdup - -chromlist = config["panel"]["chrom"] -capture_kit = os.path.split(config["panel"]["capture_kit"])[1] -picarddup = get_picard_mrkdup(config) - rule split_bed_by_chrom: input: bed = config["panel"]["capture_kit"], @@ -21,10 +12,10 @@ rule split_bed_by_chrom: tmpdir = tmp_dir, split_bed_dir = vcf_dir + "split_bed/", origin_bed = capture_kit, - conda = get_conda_env(config["conda_env_yaml"],"bedtools"), + conda = config["bioinfo_tools"].get("bedtools"), benchmark: benchmark_dir + 'split_bed_by_chrom.tsv' - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("bedtools") + ".sif").as_posix() shell: "source activate {params.conda}; " "tmpdir={params.tmpdir}/${{rand_str}}; " @@ -41,4 +32,4 @@ rule split_bed_by_chrom: " > {params.split_bed_dir}$c.{params.origin_bed}; " "done; " "unset chromlist; " - "readlink -e {input.bam}; " + "readlink -f {input.bam}; " diff --git a/BALSAMIC/utils/cli.py b/BALSAMIC/utils/cli.py index 6f770a3da..68f8fca4a 100644 --- a/BALSAMIC/utils/cli.py +++ b/BALSAMIC/utils/cli.py @@ -1,27 +1,25 @@ import os import json -import yaml +import shutil +import logging import sys import collections -import BALSAMIC -import snakemake import re -import shutil -import logging -import click -import graphviz - +import subprocess from pathlib import Path -from colorclass import Color from io import StringIO -from itertools import chain -from collections import defaultdict -from BALSAMIC.utils.constants import CONDA_ENV_PATH +from distutils.spawn import find_executable -LOG = logging.getLogger(__name__) +import yaml +import snakemake +import graphviz +from colorclass import Color +import BALSAMIC from BALSAMIC.utils.exc import BalsamicError +LOG = logging.getLogger(__name__) + class CaptureStdout(list): """ @@ -44,7 +42,6 @@ class SnakeMake: To build a snakemake command using cli options Params: - case_name - analysis case name working_dir - working directory for snakemake configfile - sample configuration file (json) output of balsamic-config-sample @@ -61,9 +58,12 @@ class SnakeMake: run_analysis - To run pipeline use_singularity - To use singularity singularity_bind- Singularity bind path + quiet - Quiet mode for snakemake singularity_arg - Singularity arguments to pass to snakemake sm_opt - snakemake additional options disable_variant_caller - Disable variant caller + dragen - enable/disable dragen suite + slurm_profiler - enable slurm profiler """ def __init__(self): @@ -84,20 +84,24 @@ def __init__(self): self.mail_user = str() self.forceall = False self.run_analysis = False + self.quiet = False self.report = str() self.use_singularity = True self.singularity_bind = str() self.singularity_arg = str() self.sm_opt = str() self.disable_variant_caller = str() + self.dragen = False + self.slurm_profiler = str() def build_cmd(self): forceall = str() + quiet_mode = str() sm_opt = str() cluster_cmd = str() dryrun = str() report = str() - snakemake_config_key_value = str() + snakemake_config_key_value = list() if self.forceall: forceall = "--forceall" @@ -105,6 +109,9 @@ def build_cmd(self): if self.report: report = "--report {}".format(self.report) + if self.quiet: + quiet_mode = " --quiet " + if self.sm_opt: sm_opt = " ".join(self.sm_opt) @@ -112,9 +119,14 @@ def build_cmd(self): dryrun = "--dryrun" if self.disable_variant_caller: - snakemake_config_key_value = ( - f" --config disable_variant_caller={self.disable_variant_caller} " - ) + snakemake_config_key_value.append( + f'disable_variant_caller={self.disable_variant_caller}') + + if self.dragen: + snakemake_config_key_value.append('dragen=True') + + if snakemake_config_key_value: + snakemake_config_key_value.insert(0, "--config") if self.use_singularity: self.singularity_arg = "--use-singularity --singularity-args ' --cleanenv " @@ -140,6 +152,10 @@ def build_cmd(self): self.result_path, )) + if self.slurm_profiler: + sbatch_cmd += " --slurm-profiler {}".format( + self.slurm_profiler) + if self.mail_user: sbatch_cmd += " --mail-user {} ".format(self.mail_user) @@ -154,22 +170,15 @@ def build_cmd(self): self.case_name, self.cluster_config, sbatch_cmd)) - sm_cmd = (" snakemake --notemp -p " - " --directory {} --snakefile {} --configfiles {} " - " {} {} {} {} {} {} {} {}".format( - self.working_dir, - self.snakefile, - self.configfile, - self.cluster_config, - self.singularity_arg, - forceall, - dryrun, - cluster_cmd, - report, - snakemake_config_key_value, - sm_opt, - )) + # Merge snakmake config key value list + snakemake_config_key_value = " ".join(snakemake_config_key_value) + sm_cmd = ( + f" snakemake --notemp -p " + f" --directory {self.working_dir} --snakefile {self.snakefile} --configfiles {self.configfile} " + f" {self.cluster_config} {self.singularity_arg} {quiet_mode} " + f" {forceall} {dryrun} {cluster_cmd} " + f" {report} {snakemake_config_key_value} {sm_opt}") return sm_cmd @@ -233,7 +242,7 @@ def get_schedulerpy(): """ p = Path(__file__).parents[1] - scheduler = str(Path(p, "commands/run/scheduler.py")) + scheduler = str(Path(p, "utils", "scheduler.py")) return scheduler @@ -244,14 +253,9 @@ def get_snakefile(analysis_type, sequencing_type="targeted"): """ p = Path(__file__).parents[1] - if analysis_type == "qc": - snakefile = Path(p, "workflows", "Alignment.smk") - elif analysis_type in ["single", "paired"]: - snakefile = Path(p, "workflows", "VariantCalling.smk") - if sequencing_type == "wgs": - snakefile = Path(p, "workflows", "VariantCalling_sentieon.smk") - elif analysis_type == "generate_ref": - snakefile = Path(p, "workflows", "GenerateRef") + snakefile = Path(p, "workflows", "balsamic.smk") + if analysis_type == "generate_ref": + snakefile = Path(p, 'workflows', 'reference.smk') elif analysis_type == "umi": snakefile = Path(p, "workflows", "UMIworkflow.smk") @@ -290,18 +294,6 @@ def convert_defaultdict_to_regular_dict(inputdict: dict): return inputdict -def merge_dict_on_key(dict_1, dict_2, by_key): - """ - Merge two list of dictionaries based on key - """ - merged_dict = defaultdict(dict) - for interm_list in (dict_1, dict_2): - for item in interm_list: - merged_dict[item[by_key]].update(item) - merged_dict_list = merged_dict.values() - return merged_dict_list - - def find_file_index(file_path): indexible_files = { ".bam": [".bam.bai", ".bai"], @@ -441,36 +433,29 @@ def get_panel_chrom(panel_bed) -> list: return {s.split("\t")[0] for s in lines} -def get_bioinfo_tools_list(conda_env_path) -> dict: +def get_bioinfo_tools_version(bioinfo_tools: dict, + container_conda_env_path: os.PathLike) -> dict: """Parses the names and versions of bioinfo tools used by BALSAMIC from config YAML into a dict """ - bioinfo_tools = {} - for yaml_file in Path(conda_env_path).rglob("*.yaml"): + bioinfo_tools_version = {} + for container_conda_env_name in set(bioinfo_tools.values()): + yaml_file = Path(container_conda_env_path, container_conda_env_name, + container_conda_env_name + ".yaml") with open(yaml_file, "r") as f: packages = yaml.safe_load(f).get("dependencies") for p in packages: - if isinstance(p, dict): - for pip_package in p["pip"]: - name, version = pip_package.split("==") - if name in bioinfo_tools: - bioinfo_tools[name] = ",".join( - set([bioinfo_tools[name], version])) - else: - bioinfo_tools[name] = version + name = p.split("=")[0] + version = "=".join(p.split("=")[1:]) + if name not in bioinfo_tools: + continue + if name in bioinfo_tools_version: + bioinfo_tools_version[name].append(version) + bioinfo_tools_version[name] = list( + set(bioinfo_tools_version[name])) else: - try: - name = p.split("=")[0] - version = "=".join(p.split("=")[1:]) - except ValueError: - name, version = p, None - finally: - if name in bioinfo_tools: - bioinfo_tools[name] = ",".join( - set([bioinfo_tools[name], version])) - else: - bioinfo_tools[name] = version - return bioinfo_tools + bioinfo_tools_version[name] = list([version]) + return bioinfo_tools_version def get_sample_dict(tumor: str, @@ -585,3 +570,36 @@ def convert_deliverables_tags(delivery_json: dict, file_tags.append(sample_name) file["tag"] = list(set(file_tags)) return delivery_json + + +def check_executable(exe_name: str) -> bool: + """Checks for executable exe_name in PATH""" + exe_exist = True + + if find_executable(exe_name) is None: + exe_exist = False + + return exe_exist + + +def generate_h5(job_name: str, job_id: str, file_path: str) -> str: + """Generates H5 file for a finished job. Returns None if it cannot generate H5 file""" + h5_file_name = Path(file_path, job_name + ".h5") + sh5util_output = subprocess.check_output( + ["sh5util", "-o", + h5_file_name.as_posix(), "-S", "-j", job_id], + stderr=subprocess.STDOUT) + + if "sh5util: No node-step files found for jobid" in sh5util_output.decode( + "utf-8"): + h5_file_name = None + + return h5_file_name + + +def job_id_dump_to_yaml(job_id_dump: Path, job_id_yaml: Path, case_name: str): + """Write an input job_id_sacct_file to yaml output""" + with open(job_id_dump, "r") as jobid_in, open(job_id_yaml, + "w") as jobid_out: + jobid_list = jobid_in.read().splitlines() + yaml.dump({case_name: jobid_list}, jobid_out) diff --git a/BALSAMIC/utils/constants.py b/BALSAMIC/utils/constants.py index 53d04f869..dea34d118 100644 --- a/BALSAMIC/utils/constants.py +++ b/BALSAMIC/utils/constants.py @@ -2,33 +2,57 @@ import sys from pathlib import Path -import BALSAMIC +# DOCKER hub path +BALSAMIC_DOCKER_PATH = "docker://clinicalgenomics/balsamic" -# Path to conda folder containing YAML files with verions of software usen un BALSAMIC workflow -CONDA_ENV_PATH = Path( - Path(sys.modules["BALSAMIC"].__file__).parent.resolve() / - "conda").as_posix() +# BALSAMIC base dir +BALSAMIC_BASE_DIR = Path(sys.modules["BALSAMIC"].__file__).parent.resolve() -# Path to config YAML file to be accessed by Snakemake -CONDA_ENV_YAML = Path( - Path(sys.modules["BALSAMIC"].__file__).parent.resolve() / "config" / - "balsamic_env.yaml").as_posix() +# BALSAMIC scripts dir +BALSAMIC_SCRIPTS = Path(BALSAMIC_BASE_DIR, "assets/scripts").as_posix() + +# Path to containers directory containing YAML files for conda installation for each one +CONTAINERS_CONDA_ENV_PATH = Path(BALSAMIC_BASE_DIR / "containers").as_posix() # Path to rule files to be accessed by Snakemake -RULE_DIRECTORY = ( - Path(sys.modules["BALSAMIC"].__file__).parent.resolve().as_posix() + "/") +RULE_DIRECTORY = BALSAMIC_BASE_DIR.as_posix() + +# Path to vcfanno toml files +VCFANNO_TOML = Path( + BALSAMIC_BASE_DIR / "assets" / "vcfanno" / "vcfanno.toml").as_posix() -# BALSAMIC version -BALSAMIC_VERSION = BALSAMIC.__version__ +# Sentieon specific +SENTIEON_DNASCOPE = Path( + BALSAMIC_BASE_DIR / + 'assets/sentieon_models/SentieonDNAscopeModelBeta0.4a-201808.05.model' +).as_posix() +SENTIEON_TNSCOPE = Path( + BALSAMIC_BASE_DIR / + 'assets/sentieon_models/SentieonTNscopeModel_GiAB_HighAF_LowFP-201711.05.model' +) # Analysis related constants MUTATION_CLASS = ["somatic", "germline"] MUTATION_TYPE = ["SNV", "SV", "CNV"] ANALYSIS_TYPES = ["paired", "single", "umi", "qc"] -WORKFLOW_SOLUTION = ["BALSAMIC", "Sentieon", "DRAGEN"] +WORKFLOW_SOLUTION = ["BALSAMIC", "Sentieon", "DRAGEN", "Sentieon_umi"] +# Variantcaller parameters +VARCALL_PARAMS = { +"tnscope": +{ +"tumor": "--min_init_tumor_lod 1.0 --min_tumor_lod 8", +"normal": "--min_init_normal_lod 0.5 --min_normal_lod 1.0", +} +} # Configuration of VCF settings VCF_DICT = { + "TNscope_umi": { + "mutation": "somatic", + "type": "SNV", + "analysis_type": ["single", "paired"], + "workflow_solution": ["Sentieon_umi"] + }, "tnsnv": { "mutation": "somatic", "type": "SNV", @@ -65,24 +89,12 @@ "analysis_type": ["paired", "single"], "workflow_solution": ["BALSAMIC"] }, - "mutect": { - "mutation": "somatic", - "type": "SNV", - "analysis_type": ["paired", "single"], - "workflow_solution": ["BALSAMIC"] - }, "vardict": { "mutation": "somatic", "type": "SNV", "analysis_type": ["paired", "single"], "workflow_solution": ["BALSAMIC"] }, - "strelka": { - "mutation": "somatic", - "type": "SNV", - "analysis_type": ["paired"], - "workflow_solution": ["BALSAMIC"] - }, "manta_germline": { "mutation": "germline", "type": "SV", @@ -95,15 +107,52 @@ "analysis_type": ["paired", "single"], "workflow_solution": ["BALSAMIC"] }, - "strelka_germline": { - "mutation": "germline", - "type": "SNV", - "analysis_type": ["paired", "single"], - "workflow_solution": ["BALSAMIC"] +} + +# Minimum required QC-values from HS metrics to be able to pass analysis +HSMETRICS_QC_CHECK = { + "gicfdna_3.1_hg19_design.bed": { + "MEAN_TARGET_COVERAGE": 500, + "FOLD_80_BASE_PENALTY": 1.5, + "PCT_OFF_BAIT": 0.35 + }, + "gmcksolid_4.1_hg19_design.bed": { + "MEAN_TARGET_COVERAGE": 500, + "FOLD_80_BASE_PENALTY": 1.7, + "PCT_OFF_BAIT": 0.3 + }, + "gmsmyeloid_5.2_hg19_design.bed": { + "MEAN_TARGET_COVERAGE": 1000, + "FOLD_80_BASE_PENALTY": 1.5, + "PCT_OFF_BAIT": 0.4 + }, + "lymphoma_6.1_hg19_design.bed": { + "MEAN_TARGET_COVERAGE": 1000, + "FOLD_80_BASE_PENALTY": 1.5, + "PCT_OFF_BAIT": 0.35 + }, + "gmslymphoid_7.1_hg19_design.bed": { + "MEAN_TARGET_COVERAGE": 1000, + "FOLD_80_BASE_PENALTY": 1.5, + "PCT_OFF_BAIT": 0.35 + }, + "twistexomerefseq_9.1_hg19_design.bed": { + "MEAN_TARGET_COVERAGE": 100, + "FOLD_80_BASE_PENALTY": 1.8, + "PCT_OFF_BAIT": 0.25 + }, + "wgs": { + "MEAN_TARGET_COVERAGE": 30 + }, + "METRIC_CRITERIA": { + "MEAN_TARGET_COVERAGE": "gt", + "FOLD_80_BASE_PENALTY": "lt", + "PCT_OFF_BAIT": "lt" } } # Configuration of VARDICT settings + VARDICT_SETTINGS = { "AD": { "tag_value": 5, @@ -130,12 +179,66 @@ "filter_name": "balsamic_low_af", "field": "INFO" }, + "pop_freq": { + "tag_value": 0.005, + "filter_name": "balsamic_high_pop_freq", + "field": "INFO" + }, "varcaller_name": "VarDict", "filter_type": "general", "analysis_type": "tumor_only", "description": "General purpose filters used for filtering VarDict", } +# Configuration for SENTIEON settings: + +SENTIEON_VARCALL_SETTINGS = { + "AD": { + "tag_value": 3, + "filter_name": "balsamic_low_tumor_ad", + "field": "FORMAT" + }, + "DP": { + "tag_value": 10, + "filter_name": "balsamic_low_tumor_dp", + "field": "FORMAT", + }, + "AF_max": { + "tag_value": 1, + "filter_name": "balsamic_af_one", + "field": "FORMAT" + }, + "AF_min": { + "tag_value": 0.05, + "filter_name": "balsamic_low_af", + "field": "FORMAT" + }, + "pop_freq": { + "tag_value": 0.001, + "filter_name": "balsamic_high_pop_freq", + "field": "INFO" + }, + "qss": { + "tag_value": 20 , + "filter_name": "balsamic_low_quality_scores", + "field": "FORMAT" + }, + "strand_reads": { + "tag_value": 0 , + "filter_name": "balsamic_low_strand_read_counts", + "field": "FORMAT" + }, + "sor": { + "tag_value": 3, + "filter_name": "balsamic_high_strand_oddsratio", + "field": "INFO" + }, + "varcaller_name": "sentieon", + "filter_type": "general", + "analysis_type": "tumor_only", + "description": "General purpose filters used for filtering tnscope and tnhaplotyper" +} + # reference related constants VALID_REF_FORMAT = ["fasta", "vcf", "text", "gtf", "gff"] VALID_GENOME_VER = ["hg19", "hg38"] @@ -150,7 +253,7 @@ "gzip": False, "genome_version": "hg38", "output_file": "Homo_sapiens_assembly38.fasta", - "output_path": "genome", + "output_path": "genome" }, "dbsnp": { "url": @@ -159,7 +262,7 @@ "gzip": False, "genome_version": "hg38", "output_file": "Homo_sapiens_assembly38.dbsnp138.vcf", - "output_path": "variants", + "output_path": "variants" }, "hc_vcf_1kg": { "url": @@ -168,7 +271,7 @@ "gzip": True, "genome_version": "hg38", "output_file": "1000G_phase1.snps.high_confidence.hg38.vcf", - "output_path": "variants", + "output_path": "variants" }, "mills_1kg": { "url": @@ -177,7 +280,7 @@ "gzip": True, "genome_version": "hg38", "output_file": "Mills_and_1000G_gold_standard.indels.hg38.vcf", - "output_path": "variants", + "output_path": "variants" }, "known_indel_1kg": { "url": @@ -186,7 +289,7 @@ "gzip": True, "genome_version": "hg38", "output_file": "Homo_sapiens_assembly38.known_indels.vcf", - "output_path": "variants", + "output_path": "variants" }, "vcf_1kg": { "url": @@ -196,7 +299,25 @@ "genome_version": "hg38", "output_file": "1000G.phase3.integrated.sites_only.no_MATCHED_REV.hg38.vcf", - "output_path": "variants", + "output_path": "variants" + }, + "gnomad_variant": { + "url": + "gs://gnomad-public/release/2.1.1/liftover_grch38/vcf/genomes/gnomad.genomes.r2.1.1.sites.liftover_grch38.vcf.bgz", + "file_type": "vcf", + "gzip": False, + "genome_version": "hg38", + "output_file": "gnomad.genomes.r2.1.1.sites.vcf.bgz", + "output_path": "variants" + }, + "gnomad_variant_index": { + "url": + "gs://gnomad-public/release/2.1.1/liftover_grch38/vcf/genomes/gnomad.genomes.r2.1.1.sites.liftover_grch38.vcf.bgz.tbi", + "file_type": "vcf", + "gzip": False, + "genome_version": "hg38", + "output_file": "gnomad.genomes.r2.1.1.sites.vcf.bgz.tbi", + "output_path": "variants" }, "cosmicdb": { "url": @@ -205,7 +326,7 @@ "gzip": True, "genome_version": "hg38", "output_file": "cosmic_coding_muts_v92.vcf", - "output_path": "variants", + "output_path": "variants" }, "wgs_calling": { "url": @@ -214,7 +335,7 @@ "gzip": False, "genome_version": "hg38", "output_file": "wgs_calling_regions.v1", - "output_path": "genome", + "output_path": "genome" }, "genome_chrom_size": { "url": @@ -223,7 +344,7 @@ "gzip": False, "genome_version": "hg38", "output_file": "hg38.chrom.sizes", - "output_path": "genome", + "output_path": "genome" }, "refgene_txt": { "url": @@ -232,7 +353,7 @@ "gzip": True, "genome_version": "hg38", "output_file": "refGene.txt", - "output_path": "genome", + "output_path": "genome" }, "refgene_sql": { "url": @@ -241,7 +362,16 @@ "gzip": False, "genome_version": "hg38", "output_file": "refGene.sql", - "output_path": "genome", + "output_path": "genome" + }, + "rankscore": { + "url": + "https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/cancer/rank_model/cancer_rank_model_-v0.1-.ini", + "file_type": "text", + "gzip": False, + "genome_version": "hg38", + "output_file": "cancer_rank_model_-v0.1-.ini", + "output_path": "genome" }, }, "hg19": { @@ -251,7 +381,7 @@ "gzip": True, "genome_version": "hg19", "output_file": "human_g1k_v37.fasta", - "output_path": "genome", + "output_path": "genome" }, "dbsnp": { "url": "gs://gatk-legacy-bundles/b37/dbsnp_138.b37.vcf.gz", @@ -259,7 +389,7 @@ "gzip": True, "genome_version": "hg19", "output_file": "dbsnp_grch37_b138.vcf", - "output_path": "variants", + "output_path": "variants" }, "hc_vcf_1kg": { "url": @@ -268,7 +398,7 @@ "gzip": True, "genome_version": "hg19", "output_file": "1kg_phase1_snps_high_confidence_b37.vcf", - "output_path": "variants", + "output_path": "variants" }, "mills_1kg": { "url": @@ -277,7 +407,7 @@ "gzip": True, "genome_version": "hg19", "output_file": "mills_1kg_index.vcf", - "output_path": "variants", + "output_path": "variants" }, "known_indel_1kg": { "url": @@ -285,8 +415,8 @@ "file_type": "vcf", "gzip": True, "genome_version": "hg19", - "output_file": "1kg_known_indels_b37.vcf.gz", - "output_path": "variants", + "output_file": "1kg_known_indels_b37.vcf", + "output_path": "variants" }, "vcf_1kg": { "url": @@ -295,7 +425,25 @@ "gzip": True, "genome_version": "hg19", "output_file": "1k_genome_wgs_p1_v3_all_sites.vcf", - "output_path": "variants", + "output_path": "variants" + }, + "gnomad_variant": { + "url": + "gs://gnomad-public/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.vcf.bgz", + "file_type": "vcf", + "gzip": False, + "genome_version": "hg19", + "output_file": "gnomad.genomes.r2.1.1.sites.vcf.bgz", + "output_path": "variants" + }, + "gnomad_variant_index": { + "url": + "gs://gnomad-public/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.vcf.bgz.tbi", + "file_type": "vcf", + "gzip": False, + "genome_version": "hg19", + "output_file": "gnomad.genomes.r2.1.1.sites.vcf.bgz.tbi", + "output_path": "variants" }, "cosmicdb": { "url": @@ -304,7 +452,7 @@ "gzip": True, "genome_version": "hg19", "output_file": "cosmic_coding_muts_v90.vcf", - "output_path": "variants", + "output_path": "variants" }, "wgs_calling": { "url": @@ -313,7 +461,7 @@ "gzip": False, "genome_version": "hg19", "output_file": "wgs_calling_regions.v1", - "output_path": "genome", + "output_path": "genome" }, "genome_chrom_size": { "url": @@ -322,7 +470,7 @@ "gzip": False, "genome_version": "hg19", "output_file": "hg19.chrom.sizes", - "output_path": "genome", + "output_path": "genome" }, "refgene_txt": { "url": @@ -331,7 +479,7 @@ "gzip": True, "genome_version": "hg19", "output_file": "refGene.txt", - "output_path": "genome", + "output_path": "genome" }, "refgene_sql": { "url": @@ -340,9 +488,78 @@ "gzip": False, "genome_version": "hg19", "output_file": "refGene.sql", - "output_path": "genome", + "output_path": "genome" + }, + "rankscore": { + "url": + "https://raw.githubusercontent.com/Clinical-Genomics/reference-files/master/cancer/rank_model/cancer_rank_model_-v0.1-.ini", + "file_type": "text", + "gzip": False, + "genome_version": "hg19", + "output_file": "cancer_rank_model_-v0.1-.ini", + "output_path": "genome" }, + } +} + +umiworkflow_params = { + "common": { + "align_header": + "'@RG\\tID:{sample}\\tSM:{sample}\\tLB:TargetPanel\\tPL:ILLUMINA'", + "align_intbases": 1000000, + "filter_tumor_af": 0.0005 + }, + "consensuscall": { + "align_format": "BAM", + "filter_minreads": "3,1,1", + "tag": "XR" + }, + "umiextract": { + "read_structure": "-d '3M2S+T,3M2S+T'" + }, + "tnscope": { + "algo": "TNscope", + "min_tumorLOD": 0.5, + "error_rate": 5, + "prunefactor": 3, + "disable_detect": "sv" + }, + "vardict": { + "vardict_filters": "-c 1 -S 2 -E 3 -g 4 -r 1 -F 0" }, + "vep": { + "vep_filters": + "--compress_output bgzip --vcf --everything --allow_non_variant --dont_skip --buffer_size 10000 --format vcf --offline --variant_class --merged --cache --verbose --force_overwrite" + } +} + +# list of bioinfo tools for each conda env +VALID_CONTAINER_CONDA_NAME = { + "align_qc", "annotate", "coverage_qc", "varcall_py36", "varcall_py27", + "varcall_cnvkit" +} + +BIOINFO_TOOL_ENV = { + "bedtools": "align_qc", + "bwa": "align_qc", + "fastqc": "align_qc", + "samtools": "align_qc", + "picard": "align_qc", + "multiqc": "align_qc", + "fastp": "align_qc", + "csvkit": "align_qc", + "ensembl-vep": "annotate", + "genmod": "annotate", + "vcfanno": "annotate", + "sambamba": "coverage_qc", + "mosdepth": "coverage_qc", + "bcftools": "varcall_py36", + "tabix": "varcall_py36", + "gatk": "varcall_py36", + "vardict": "varcall_py36", + "strelka": "varcall_py27", + "manta": "varcall_py27", + "cnvkit": "varcall_cnvkit", } REPORT_MODEL = { diff --git a/BALSAMIC/utils/models.py b/BALSAMIC/utils/models.py index 643e01e1d..7e151a5ac 100644 --- a/BALSAMIC/utils/models.py +++ b/BALSAMIC/utils/models.py @@ -6,17 +6,11 @@ from pydantic import BaseModel, validator, Field, AnyUrl from pydantic.types import DirectoryPath, FilePath +from BALSAMIC import __version__ as balsamic_version + from BALSAMIC.utils.constants import ( - CONDA_ENV_YAML, - ANALYSIS_TYPES, - WORKFLOW_SOLUTION, - MUTATION_CLASS, - MUTATION_TYPE, - RULE_DIRECTORY, - BALSAMIC_VERSION, - VALID_GENOME_VER, - VALID_REF_FORMAT, -) + BIOINFO_TOOL_ENV, ANALYSIS_TYPES, WORKFLOW_SOLUTION, MUTATION_CLASS, + MUTATION_TYPE, VALID_GENOME_VER, VALID_REF_FORMAT) class VCFAttributes(BaseModel): @@ -45,22 +39,30 @@ class VarCallerFilter(BaseModel): This class handles attributes and filter for variant callers Attributes: - AD: VCFAttributes (required); minimum allelic depth - AF_min: VCFAttributes (optional); minimum allelic fraction - AF_max: VCFAttributes (optional); maximum allelic fraction - MQ: VCFAttributes (optional); minimum mapping quality - DP: VCFAttributes (optional); minimum read depth - varcaller_name: str (required); variant caller name - filter_type: str (required); filter name for variant caller - analysis_type: str (required); analysis type e.g. tumor_normal or tumor_only - description: str (required); comment section for description + AD: VCFAttributes (required); minimum allelic depth + AF_min: VCFAttributes (optional); minimum allelic fraction + AF_max: VCFAttributes (optional); maximum allelic fraction + MQ: VCFAttributes (optional); minimum mapping quality + DP: VCFAttributes (optional); minimum read depth + pop_freq: VCFAttributes (optional); maximum gnomad_af + strand_reads: VCFAttributes (optional); minimum strand specific read counts + qss: VCFAttributes (optional); minimum sum of base quality scores + sor: VCFAttributes (optional); minimum symmetrical log-odds ratio + varcaller_name: str (required); variant caller name + filter_type: str (required); filter name for variant caller + analysis_type: str (required); analysis type e.g. tumor_normal or tumor_only + description: str (required); comment section for description """ AD: VCFAttributes AF_min: Optional[VCFAttributes] AF_max: Optional[VCFAttributes] MQ: Optional[VCFAttributes] - DP: VCFAttributes + DP: Optional[VCFAttributes] + pop_freq: VCFAttributes + strand_reads: Optional[VCFAttributes] + qss : Optional[VCFAttributes] + sor : Optional[VCFAttributes] varcaller_name: str filter_type: str analysis_type: str @@ -79,8 +81,8 @@ class QCModel(BaseModel): umi_trim_length : Field(str(int)); length of UMI to be trimmed from reads Raises: - ValueError: - When the input in min_seq_length and umi_trim_length cannot + ValueError: + When the input in min_seq_length and umi_trim_length cannot be interpreted as integer and coerced to string """ @@ -154,15 +156,13 @@ class VCFModel(BaseModel): tnsnv: VarcallerAttribute manta: VarcallerAttribute cnvkit: VarcallerAttribute - mutect: VarcallerAttribute vardict: VarcallerAttribute - strelka: VarcallerAttribute tnscope: VarcallerAttribute dnascope: VarcallerAttribute tnhaplotyper: VarcallerAttribute manta_germline: VarcallerAttribute haplotypecaller: VarcallerAttribute - strelka_germline: VarcallerAttribute + TNscope_umi: VarcallerAttribute class AnalysisModel(BaseModel): @@ -178,6 +178,7 @@ class AnalysisModel(BaseModel): targeted : if capture kit was used to enrich specific genomic regions wgs : if whole genome sequencing was performed analysis_dir : Field(required); existing path where to save files + umiworkflow : Field(bool); whether UMI workflow to run parallely fastq_path : Field(optional); Path where fastq files will be stored script : Field(optional); Path where snakemake scripts will be stored @@ -205,8 +206,9 @@ class AnalysisModel(BaseModel): result: Optional[DirectoryPath] benchmark: Optional[DirectoryPath] dag: Optional[FilePath] - BALSAMIC_version: str = BALSAMIC_VERSION + BALSAMIC_version: str = balsamic_version config_creation_date: Optional[str] + umiworkflow: bool = True class Config: validate_all = True @@ -259,9 +261,9 @@ def parse_analysis_to_benchmark_path(cls, value, values, **kwargs) -> str: @validator("dag") def parse_analysis_to_dag_path(cls, value, values, **kwargs) -> str: - return (Path(values.get("analysis_dir"), values.get("case_id"), - values.get("case_id")).as_posix() + - f"_BALSAMIC_{BALSAMIC_VERSION}_graph.pdf") + return Path(values.get("analysis_dir"), values.get("case_id"), + values.get("case_id")).as_posix( + ) + f'_BALSAMIC_{balsamic_version}_graph.pdf' @validator("config_creation_date") def datetime_as_string(cls, value): @@ -303,23 +305,6 @@ def set_sample_id_if_missing_value(cls, value, values, **kwargs): return values.get("file_prefix") -class BioinfoToolsModel(BaseModel): - """Holds versions of current bioinformatic tools used in analysis""" - - tabix: Optional[str] - bcftools: Optional[str] - fastqc: Optional[str] - manta: Optional[str] - picard: Optional[str] - bwa: Optional[str] - strelka: Optional[str] - gatk: Optional[str] - samtools: Optional[str] - sambamba: Optional[str] - vardict: Optional[str] - cutadapt: Optional[str] - - class PanelModel(BaseModel): """Holds attributes of PANEL BED file if provided Attributes: @@ -349,11 +334,12 @@ class BalsamicConfigModel(BaseModel): samples : Field(Dict); dictionary containing samples submitted for analysis reference : Field(Dict); dictionary containign paths to reference genome files panel : Field(PanelModel(optional)); variables relevant to PANEL BED if capture kit is used - bioinfo_tools : Field(BioinfoToolsModel); dictionary of bioinformatics software and their versions used for the analysis + bioinfo_tools : Field(dict); dictionary of bioinformatics software and which conda/container they are in + bioinfo_tools_version : Field(dict); dictionary of bioinformatics software and their versions used for the analysis singularity : Field(Path); path to singularity container of BALSAMIC background_variants: Field(Path(optional)); path to BACKGROUND VARIANTS for UMI - conda_env_yaml : Field(Path(CONVA_ENV_YAML)); path where Balsamic configs can be found rule_directory : Field(Path(RULE_DIRECTORY)); path where snakemake rules can be found + umiworkflow : Field(bool); whether UMI workflow to run parallely with balsamic workflow """ @@ -362,12 +348,12 @@ class BalsamicConfigModel(BaseModel): analysis: AnalysisModel samples: Dict[str, SampleInstanceModel] reference: Dict[str, Path] - singularity: FilePath + singularity: DirectoryPath background_variants: Optional[FilePath] - conda_env_yaml: FilePath = CONDA_ENV_YAML - rule_directory: DirectoryPath = RULE_DIRECTORY - bioinfo_tools: Optional[BioinfoToolsModel] + bioinfo_tools: dict + bioinfo_tools_version: dict panel: Optional[PanelModel] + umiworkflow: bool = True @validator("reference") def abspath_as_str(cls, value): @@ -461,9 +447,11 @@ class ReferenceMeta(BaseModel): vcf_1kg: ReferenceUrlsModel. Optional field for 1000Genome all SNPs wgs_calling: ReferenceUrlsModel. Optional field for wgs calling intervals genome_chrom_size: ReferenceUrlsModel. Optional field for geneome's chromosome sizes + gnomad_variant: ReferenceUrlsModel. Optional gnomad variants (non SV) as vcf cosmicdb: ReferenceUrlsModel. Optional COSMIC database's variants as vcf refgene_txt: ReferenceUrlsModel. Optional refseq's gene flat format from UCSC refgene_sql: ReferenceUrlsModel. Optional refseq's gene sql format from UCSC + rankscore: ReferenceUrlsModel. Optional rankscore model """ basedir: str = "" @@ -475,9 +463,12 @@ class ReferenceMeta(BaseModel): vcf_1kg: Optional[ReferenceUrlsModel] wgs_calling: Optional[ReferenceUrlsModel] genome_chrom_size: Optional[ReferenceUrlsModel] + gnomad_variant: Optional[ReferenceUrlsModel] + gnomad_variant_index: Optional[ReferenceUrlsModel] cosmicdb: Optional[ReferenceUrlsModel] refgene_txt: Optional[ReferenceUrlsModel] refgene_sql: Optional[ReferenceUrlsModel] + rankscore: Optional[ReferenceUrlsModel] @validator("*", pre=True) def validate_path(cls, value, values, **kwargs): @@ -493,3 +484,100 @@ def validate_path(cls, value, values, **kwargs): output_value = value return output_value + + +class UMIParamsCommon(BaseModel): + """This class defines the common params settings used as constants across various rules in UMI workflow. + + Attributes: + align_format: str (required); output alignment format. eg. 'BAM' + align_header: str (required); header line appended to the aligned BAM output + align_intbases: int; input bases in each batch regardless of threads, for reproducibility + filter_tumor_af: float (required); settings to filter minimum allelic frequency + """ + + align_header: str + align_intbases: int + filter_tumor_af: float + + +class UMIParamsUMIextract(BaseModel): + """This class defines the params settings used as constants in UMI workflow-rule umextract. + + Attributes: + read_structure: str (required); settings to define UMI read structure + """ + + read_structure: str = "-d, 'rs1,rs2'" + + +class UMIParamsConsensuscall(BaseModel): + """This class defines the params settings used as constants in UMI workflow-rule consensuscall. + + Attributes: + align_format: str (required); output alignment format. eg. 'BAM' + filter_minreads: str (required); settings to filter consensus tags based on group size + tag: str; Logic UMI tag + """ + + align_format: str = 'BAM' + filter_minreads: str = '3,1,1' + tag: str = 'XR' + + +class UMIParamsTNscope(BaseModel): + """This class defines the params settings used as constants in UMI workflow- rule tnscope. + + Attributes: + algo: str; choice of sentieon varcall algorithm. eg. 'TNscope' + disable_detect: str; disable variant detector. eg 'sv' or 'snv_indel' + filter_tumor_af: float (required); minimum allelic frequency to detect + min_tumorLOD: float (required); Minimum tumorLOD value + error_rate: int (required); allow error-rate to consider in calling + prunefactor: int (required); pruning factor in the kmer graph + """ + + algo: str + min_tumorLOD: float + error_rate: int + prunefactor: int + disable_detect: str + + +class UMIParamsVardict(BaseModel): + """This class defines the params settings used as constants in UMIworkflow-rule vardict. + + Attributes: + vardict_filters: str (required); set of filters to apply for variant-calling using vardict + """ + vardict_filters: str + + +class UMIParamsVEP(BaseModel): + """This class defines the params settings used as constants in UMIworkflow-rule vep. + + Attributes: + vep_filters: str (required); set of filters to apply for variant-calling using vardict + """ + vep_filters: str + + +class UMIworkflowConfig(BaseModel): + """ Defines set of rules in UMI workflow. + + Handles attributes for corresponding rules. + + Attributes: + common: global params defined across all rules in UMI workflow + umiextract: params defined in the rule sentieon_umiextract + consensuscall: params defined in the rule sentieon_consensuscall + tnscope: params defined in the rule sentieon_tnscope_umi + vardict: params defined in the rule vardict_umi + vep: params defined in the rule vep_umi + """ + common: UMIParamsCommon + umiextract: UMIParamsUMIextract + consensuscall: UMIParamsConsensuscall + tnscope: UMIParamsTNscope + vardict: UMIParamsVardict + vep: UMIParamsVEP diff --git a/BALSAMIC/utils/qc_check.py b/BALSAMIC/utils/qc_check.py new file mode 100644 index 000000000..520398215 --- /dev/null +++ b/BALSAMIC/utils/qc_check.py @@ -0,0 +1,238 @@ +import pandas as pd +import numpy as np +import json +import os +from BALSAMIC.utils.constants import HSMETRICS_QC_CHECK +from BALSAMIC.utils.rule import get_sample_type + + +def read_hs_metrics(hs_metrics_file: str): + """Reads the HS metrics (json-format) and returns it as a DataFrame + + Args: + hs_metrics_file: A string path to the file + + Returns: + metrics_df: DataFrame + + """ + with open(hs_metrics_file): + metrics_df = pd.read_json(hs_metrics_file) + return metrics_df + + +def read_qc_table(qc_table: dict): + """Reads the QC-table (json-format) and returns it as a DataFrame + + Args: + qc_table: Dictionary imported from constants + + Returns: + qc_df: DataFrame + + """ + qc_df = pd.DataFrame.from_dict(qc_table) + return qc_df + + +def get_bait_name(input_config: str): + """Get the bait name from case config + + Args: + input_config: Path to config + + Returns: + bait: string + + """ + with open(input_config) as f: + load_config = json.load(f) + + # Read the config file and return the bait name from the json file + bait = os.path.basename(load_config["panel"]["capture_kit"]) + + return bait + + +def get_sample_name(input_config: str): + """ Get the sample names from the config file + + Args: + input_config: Path to config + + Returns: + tumor, normal: string + + """ + with open(input_config) as f: + load_config = json.load(f) + + # get_sample_type returns a list, extracting the sample name with [0] + normal = get_sample_type(load_config["samples"], "normal")[0] + tumor = get_sample_type(load_config["samples"], "tumor")[0] + + return normal, tumor + + +def get_qc_criteria(input_df: pd.DataFrame, bait: str) -> pd.DataFrame: + """ Creates a new DataFrame with the QC criteria for only the desired bait set + + Args: + input_df: qc table as DataFrame + bait: desired bait as string + + Returns: + qc_df: DataFrame + + """ + # Copy the desired columns + qc_df = input_df[[bait, "METRIC_CRITERIA"]].copy() + + # Changing the column with the bait name + qc_df = qc_df.rename(columns={bait: bait + "_criteria"}) + + return qc_df + + +def check_qc_criteria(input_qc_df: pd.DataFrame, + input_hsmetrics_df: pd.DataFrame, normal_sample: str, + tumor_sample: str) -> pd.DataFrame: + """ This function can be divided in different parts: + 1) Merging intersected values for the df with the desired QC criteria and bait set, with the HS Metrics df + 2) Creating new columns with the QC-differences from the QC criteria + 3) Setting QC flags + 4) Extract the columns with the QC flag as a new DataFrame + + Args: + input_qc_df: DataFrame + input_hsmetrics_df: DataFrame + normal_sample: String + tumor_sample: String + + Returns: + qc_check_df: DataFrame + + """ + + # 1) Merge the two df by col (axis = 1) for those rows that are shared (intersected) by passing join='inner' + merged_df = pd.concat([input_hsmetrics_df, input_qc_df], + axis=1, + join='inner') + column_header = list(merged_df.columns) + + # 2) Adding new col with the calculated difference in the qc values + merged_df['qc_diff_' + normal_sample] = merged_df[ + column_header[2]] - merged_df[column_header[0]] + merged_df['qc_diff_' + tumor_sample] = merged_df[ + column_header[2]] - merged_df[column_header[1]] + + # 3) Desired conditions for normal and tumor sample to pass. Two different conditions are required + # since the conditions are different for the samples and should not overwrite each other. + conditions_normal = [(merged_df['qc_diff_' + normal_sample] <= 0) & + (merged_df['METRIC_CRITERIA'] == 'gt'), + (merged_df['qc_diff_' + normal_sample] >= 0) & + (merged_df['METRIC_CRITERIA'] == 'lt')] + + conditions_tumor = [(merged_df['qc_diff_' + tumor_sample] <= 0) & + (merged_df['METRIC_CRITERIA'] == 'gt'), + (merged_df['qc_diff_' + tumor_sample] >= 0) & + (merged_df['METRIC_CRITERIA'] == 'lt')] + + # If above conditions are "True", set them as "pass" + set_qc = ['Pass', 'Pass'] + + # Adding new column with qc flag. + merged_df['qc_check_' + normal_sample] = np.select(conditions_normal, + set_qc, + default="Fail") + merged_df['qc_check_' + tumor_sample] = np.select(conditions_tumor, + set_qc, + default="Fail") + + # 4) create a new df and copy the desired columns (separated by ','). + qc_check_df = merged_df[[ + 'qc_check_' + normal_sample, 'qc_diff_' + normal_sample, + 'qc_check_' + tumor_sample, 'qc_diff_' + tumor_sample + ]].copy() + + return qc_check_df + + +def failed_qc(input_df: pd.DataFrame, normal_sample: str, + tumor_sample: str) -> pd.DataFrame: + """ Outputs if the QC failed + + Args: + input_df: DataFrame with qc parameters and qc differences + normal_sample: String + tumor_sample: String + + Returns: + String + + """ + + # copy the columns with qc criteria + copy_qc_df = input_df[[ + 'qc_check_' + normal_sample, 'qc_check_' + tumor_sample + ]].copy() + + # Creating df which set "True" for "Fail" values and "False" for "Pass" values + qc_boolean = copy_qc_df.isin(["Fail"]) + + # Create a Series to check whether any element is set as True by .any() and convert to list with .tolist() + boolean_check = qc_boolean.any().tolist() + + # Loop trough the list to check for True booleans which indicates for failed qc criteria. + for n in range(len(boolean_check)): + if boolean_check[n]: + qc = "QC failed" + return qc + + +def write_output(input_df: pd.DataFrame, output_path: str) -> pd.DataFrame: + """ Outputs the QC parameters as csv-file + + Args: + input_df: DataFrame with qc parameters and qc differences + output_path: String with the desired output path + + Returns: + CSV-file + + """ + + output_df = input_df.to_csv(output_path, sep='\t') + + return output_df + + +def get_qc_check(hs_metrics, output, config): + """ Runs all above functions to provide the desired outputs + + Args: + hs_metrics: Path to hs_metrics file + output: Path for output csv-file + config: Path to case config + + Returns: + CSV-file and prints if the QC-failed + + """ + # Read the HS metrics and qc table and convert to df + hs_metrics_df = read_hs_metrics(hs_metrics) + qc_table_df = read_qc_table(HSMETRICS_QC_CHECK) + + # Extract the bait name and create a new df with the desired qc criteria + bait_set = get_bait_name(config) + sample_names = get_sample_name(config) + qc_criteria_df = get_qc_criteria(qc_table_df, bait_set) + + # Create a df with qc-flag for each criteria for each sample + extract_criteria = check_qc_criteria(qc_criteria_df, hs_metrics_df, + sample_names[0], sample_names[1]) + + # Check if qc failed + failed_qc(extract_criteria, sample_names[0], sample_names[1]) + + write_output(extract_criteria, output) diff --git a/BALSAMIC/utils/rule.py b/BALSAMIC/utils/rule.py index 3f59822a8..6d5abcb7f 100644 --- a/BALSAMIC/utils/rule.py +++ b/BALSAMIC/utils/rule.py @@ -1,4 +1,3 @@ -import os import re import yaml from pathlib import Path @@ -102,30 +101,6 @@ def get_result_dir(config): return config['analysis']['result'] -def get_conda_env(yaml_file, pkg): - """ - Retrieve conda environment for package from a predefined yaml file - - input: balsamic_env - output: string of conda env where packge is in - """ - - with open(yaml_file, 'r') as file_in: - yaml_in = yaml.safe_load(file_in) - - conda_env_found = None - - for conda_env, pkgs in yaml_in.items(): - if pkg in pkgs: - conda_env_found = conda_env - break - - if conda_env_found is not None: - return conda_env_found - else: - raise KeyError(f'Installed package {pkg} was not found in {yaml_file}') - - def get_picard_mrkdup(config): """ input: sample config file output from BALSAMIC @@ -264,18 +239,22 @@ def get_delivery_id(id_candidate: str, file_to_store: str, tags: list, def get_reference_output_files(reference_files_dict: dict, - file_type: str) -> list: + file_type: str, + gzip: bool = None) -> list: """ Returns list of files matching a file_type from reference files Args: reference_files_dict: A validated dict model from reference file_type: a file type string, e.g. vcf, fasta - + gzip: a list of boolean + Returns: - ref_vcf_list: list of file_type files that are found in reference_files_dict + ref_vcf_list: list of file_type files that are found in reference_files_dict """ ref_vcf_list = [] for reference_key, reference_item in reference_files_dict.items(): if reference_item['file_type'] == file_type: + if gzip is not None and reference_item['gzip'] != gzip: + continue ref_vcf_list.append(reference_item['output_file']) return ref_vcf_list diff --git a/BALSAMIC/commands/run/scheduler.py b/BALSAMIC/utils/scheduler.py similarity index 65% rename from BALSAMIC/commands/run/scheduler.py rename to BALSAMIC/utils/scheduler.py index 92a6eadf4..c9e1c5985 100644 --- a/BALSAMIC/commands/run/scheduler.py +++ b/BALSAMIC/utils/scheduler.py @@ -1,7 +1,6 @@ -#!/usr/bin/env python3 -# import sys import os import re +import logging import subprocess import json import argparse @@ -10,7 +9,7 @@ class SbatchScheduler: - ''' + """ Builds sbatch command. Commands map to SLURM sbatch options. Params: ------ @@ -23,7 +22,7 @@ class SbatchScheduler: output - -o/--output qos - -q/--qos time - -t/--time - ''' + """ def __init__(self): self.account = None @@ -33,28 +32,41 @@ def __init__(self): self.mail_user = None self.ntasks = None self.output = None + self.partition = None self.qos = None self.script = None self.time = None + self.profile = None + self.acctg_freq = None def build_cmd(self): - ''' builds sbatch command matching its options ''' - sbatch_options = list() + """ builds sbatch command matching its options """ + sbatch = ['sbatch'] job_attributes = [ - 'account', 'dependency', 'error', 'output', 'mail_type', - 'mail_user', 'ntasks', 'qos', 'time' + 'account', + 'dependency', + 'error', + 'output', + 'mail_type', + 'mail_user', + 'ntasks', + 'qos', + 'time', + 'partition', + 'profile', + 'acctg_freq', ] for attribute in job_attributes: if getattr(self, attribute): attribute_value = getattr(self, attribute) - sbatch_options.append('--{} \"{}\"'.format( - attribute.replace("_", "-"), attribute_value)) + sbatch.append('--{} \"{}\"'.format(attribute.replace("_", "-"), + attribute_value)) - sbatch_options.append(self.script) + sbatch.append(self.script) - return 'sbatch' + ' ' + ' '.join(sbatch_options) + return ' '.join(sbatch) class QsubScheduler: @@ -81,15 +93,11 @@ def build_cmd(self): # Exclusive node resource_params += " -l excl=1 " - # if self.time: - # resource_params += " -l \"walltime={}\" ".format(str(self.time)) if self.ntasks: - # resource_params += "nodes=1:ppn={}\" ".format(str(self.ntasks)) resource_params += " -pe mpi {} ".format(str(self.ntasks)) if self.account: - # qsub_options.append(" -A " + str(self.account)) qsub_options.append(" -q " + str(self.account)) if self.error: @@ -99,8 +107,7 @@ def build_cmd(self): qsub_options.append(" -o " + str(self.output)) if self.mail_type: - # qsub_options.append(" -m " + str(self.mail_type)) - qsub_options.append(" -m s ") # + str(self.mail_type)) + qsub_options.append(" -m s ") if self.mail_user: qsub_options.append(" -M " + str(self.mail_user)) @@ -114,7 +121,6 @@ def build_cmd(self): if self.dependency: for jobid in self.dependency: depend = depend + ":" + jobid - #qsub_options.append(" -W \"depend=afterok" + str(depend) + "\"") qsub_options.append(" -hold_jid " + ",".join(self.dependency)) if self.script: @@ -130,28 +136,23 @@ def read_sample_config(input_json): with open(input_json) as f: return json.load(f) except Exception as e: + logging.exception("Can not load {} file".format(input_json)) raise e -def write_sacct_file(sacct_file, job_id): +def write_sacct_file(sacct_file, job_id, job_name=str()): ''' writes a yaml file with job ids ''' try: with open(sacct_file, 'a') as f: - f.write(job_id + "\n") + f.write(job_id) + if job_name: + f.write("," + job_name) + f.write("\n") except FileNotFoundError as e: + logging.exception("Can not write {} file".format(sacct_file)) raise e -# def write_scheduler_dump(scheduler_file, cmd): -# ''' writes sbatch dump for debuging purpose ''' -# try: -# with open(scheduler_file, 'a') as f: -# f.write(cmd + "\n") -# f.write(sys.executable + "\n") -# except OSError: -# raise - - def submit_job(sbatch_cmd, profile): ''' subprocess call for sbatch command ''' # run sbatch cmd @@ -161,6 +162,7 @@ def submit_job(sbatch_cmd, profile): shell=True, stdout=subprocess.PIPE) except subprocess.CalledProcessError as e: + logging.exception("Failed to submit {}".format(sbatch_cmd)) raise e # Get jobid @@ -177,35 +179,6 @@ def submit_job(sbatch_cmd, profile): return jobid -# def singularity_param(sample_config, script_dir, jobscript, sbatch_script): -# ''' write a modified sbatch script based on singularity parameters ''' -# if 'bind_path' not in sample_config['singularity']: -# raise KeyError("bind_path was not found in sample config.") - -# if 'main_env' not in sample_config['singularity']: -# raise KeyError("main_env was not found in sample config.") - -# if 'container_path' not in sample_config['singularity']: -# raise KeyError("container_path was not found sample config.") - -# try: -# bind_path = sample_config['singularity']['bind_path'] -# main_env = sample_config['singularity']['main_env'] -# container_path = sample_config['singularity']['container_path'] -# with open(sbatch_script, 'a') as f: -# f.write("#!/bin/bash" + "\n") -# f.write( -# f"function balsamic-run {{ singularity exec -B {bind_path} --app {main_env} {container_path} $@; }}" -# + "\n") -# f.write(f"# Snakemake original script {jobscript}" + "\n") -# f.write(f"balsamic-run bash {jobscript}" + "\n") -# sbatch_file = os.path.join( -# script_dir, sample_config["analysis"]["case_id"] + ".sbatch") -# return sbatch_file -# except OSError: -# raise - - def get_parser(): ''' argument parser ''' parser = argparse.ArgumentParser(description=''' @@ -220,6 +193,14 @@ def get_parser(): parser.add_argument("--sample-config", help='balsamic config sample output') parser.add_argument("--profile", help="profile to run jobs") + parser.add_argument( + "--slurm-profiler", + help= + "Slurm profiler type (e.g. task). Refer to your SLURM manual to adjust this value" + ) + parser.add_argument("--slurm-profiler-interval", + default="10", + help="Profiler interval in seconds") parser.add_argument("--account", required=True, help='cluster account name') @@ -251,6 +232,11 @@ def main(args=None): if args.dependencies: scheduler_cmd.dependency = ','.join( ["afterok:%s" % d for d in args.dependencies]) + if args.slurm_profiler: + scheduler_cmd.profile = args.slurm_profiler + scheduler_cmd.acct_freq = "{}={}".format( + args.slurm_profiler, args.slurm_profiler_interval + ) #"--profile task --acctg-freq=task=15" elif args.profile == 'qsub': jobid = '${JOB_ID}' scheduler_cmd = QsubScheduler() @@ -263,16 +249,8 @@ def main(args=None): sacct_file = os.path.join(args.log_dir, sample_config["analysis"]["case_id"] + ".sacct") - - balsamic_run_mode = os.getenv("BALSAMIC_STATUS", "conda") - # if balsamic_run_mode == 'container' and 'singularity' in sample_config: - # sbatch_script = os.path.join(args.script_dir, - # "sbatch." + os.path.basename(jobscript)) - # sbatch_file = singularity_param(sample_config=sample_config, - # script_dir=args.script_dir, - # jobscript=jobscript, - # sbatch_script=sbatch_script) - # jobscript = sbatch_script + sacct_file_extended = os.path.join( + args.log_dir, sample_config["analysis"]["case_id"] + "_extended.sacct") scheduler_cmd.account = args.account scheduler_cmd.mail_type = mail_type @@ -287,14 +265,15 @@ def main(args=None): scheduler_cmd.time = job_properties["cluster"]["time"] scheduler_cmd.mail_user = args.mail_user scheduler_cmd.script = jobscript + if "partition" in job_properties["cluster"]: + scheduler_cmd.partition = job_properties["cluster"]["partition"] jobid = submit_job(scheduler_cmd.build_cmd(), args.profile) - # scheduler_file = os.path.join(args.script_dir, sample_config["analysis"]["case_id"] + ".scheduler_dump") - # if balsamic_run_mode == 'container' and 'singularity' in sample_config: - # write_scheduler_dump(scheduler_file=scheduler_file, cmd=scheduler_cmd.build_cmd()) - write_sacct_file(sacct_file=sacct_file, job_id=jobid) + write_sacct_file(sacct_file=sacct_file_extended, + job_id=jobid, + job_name=os.path.basename(jobscript)) if __name__ == '__main__': diff --git a/BALSAMIC/utils/workflowscripts.py b/BALSAMIC/utils/workflowscripts.py new file mode 100644 index 000000000..ae4411965 --- /dev/null +++ b/BALSAMIC/utils/workflowscripts.py @@ -0,0 +1,143 @@ +import os +import subprocess +import json +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np +import h5py +import typing + +from BALSAMIC.utils.rule import get_threads +from BALSAMIC.utils.cli import get_config +from BALSAMIC.utils.cli import generate_h5 + + +def plot_analysis(log_file: Path, h5_file: Path, + fig_name: Path) -> typing.Union[None, Path]: + """ + plots analysis job. + """ + + cluster_config = get_config('cluster') + with open(cluster_config, 'r') as f: + cluster_config = json.load(f) + + log_file_list = Path(log_file).name.split(".") + + job_name = ".".join(log_file_list[0:4]) + rule_name = log_file_list[2] + mem_per_core = 5222 + requested_cores = get_threads(cluster_config, rule_name) + case_name = log_file_list[1] + job_id = log_file_list[4].split("_")[1] + + # This is lazy and memory inefficient, but it gets the job done. + df_array = h5py.File(h5_file, 'r') + node_name = list(df_array["Steps"]["batch"]["Nodes"].keys())[0] + + if not "Tasks" in list(df_array["Steps"]["batch"]["Nodes"][node_name]): + return None + + df = pd.DataFrame( + np.array(df_array["Steps"]["batch"]["Nodes"][node_name]["Tasks"]["0"])) + + # Convert kilohurtz to gigahurtz + df["CPUFrequency"] = df["CPUFrequency"] / 1e6 + + # Convert kb to gb + df["RSS"] = df["RSS"] / 1e6 + df["VMSize"] = df["VMSize"] / 1e6 + + figure_title = "Case name: {}\nRule: {}\nRun time: {} seconds\nJob name: {}\nJob ID: {}".format( + case_name, rule_name, df["ElapsedTime"].iloc[-1], job_name, job_id) + + plt.rcParams['figure.figsize'] = [10, 10] + + fig, (cpu_ax, mem_ax, io_ax) = plt.subplots(nrows=3) + fig.suptitle(figure_title, fontsize=12, horizontalalignment="center") + + cpu_ax_color = 'b' + df.plot(y="CPUUtilization", + x="ElapsedTime", + ax=cpu_ax, + color=cpu_ax_color, + style='--') + cpu_ax.set_title("CPU statistics") + cpu_ax.set_xlabel("Wall seconds") + cpu_ax.set_ylabel("Core usage (max {}%)".format(requested_cores * 100)) + cpu_ax.yaxis.label.set_color(cpu_ax_color) + cpu_ax.yaxis.label.set_color(cpu_ax_color) + cpu_ax.tick_params(axis='y', colors=cpu_ax_color) + cpu_ax.legend(loc="best", frameon=False) + cpu_ax.spines['top'].set_visible(False) + cpu_ax.spines['right'].set_visible(False) + max_cpu_line = cpu_ax.axhline(requested_cores * 100, + color=cpu_ax_color, + ls='-') + max_cpu_line.set_label("Max available") + + mem_ax_color = 'g' + df.plot(y="VMSize", + x="ElapsedTime", + ax=mem_ax, + color=mem_ax_color, + style="--") + mem_ax.set_title("Memory statistics") + mem_ax.set_xlabel("Wall seconds") + mem_ax.set_ylabel("Memory usage GB (max {}GB)".format( + round(mem_per_core * requested_cores / 1024))) + mem_ax.yaxis.label.set_color(mem_ax_color) + mem_ax.yaxis.label.set_color(mem_ax_color) + mem_ax.tick_params(axis='y', colors=mem_ax_color) + mem_ax.legend(loc="best", frameon=False) + mem_ax.spines['top'].set_visible(False) + mem_ax.spines['right'].set_visible(False) + max_cpu_line = mem_ax.axhline(round(mem_per_core * requested_cores / 1024), + color=mem_ax_color, + ls='-') + max_cpu_line.set_label("Max available mem") + + read_io_ax_color = 'm' + read_io_ax = df.plot(y="ReadMB", + x="ElapsedTime", + color=read_io_ax_color, + style="--", + ax=io_ax, + legend=False) + read_io_ax.set_xlabel("Wall seconds") + read_io_ax.set_ylabel("Disk read (MANIFEST.inB)") + read_io_ax.yaxis.label.set_color(read_io_ax_color) + read_io_ax.yaxis.label.set_color(read_io_ax_color) + read_io_ax.tick_params(axis='y', colors=read_io_ax_color) + read_io_ax.spines['top'].set_visible(False) + + write_io_ax = read_io_ax.twinx() + write_io_ax_color = 'olive' + write_io_ax = df.plot(y="WriteMB", + x="ElapsedTime", + ax=write_io_ax, + color=write_io_ax_color, + style="--", + legend=False) + write_io_ax.set_title("Disk I/O statistics") + write_io_ax.set_xlabel("Wall seconds") + write_io_ax.set_ylabel("Disk write (MB)") + write_io_ax.yaxis.label.set_color(write_io_ax_color) + write_io_ax.yaxis.label.set_color(write_io_ax_color) + write_io_ax.tick_params(axis='y', colors=write_io_ax_color) + write_io_ax.yaxis.tick_right() + write_io_ax.spines['top'].set_visible(False) + + handles, labels = [], [] + for ax in [write_io_ax, read_io_ax]: + for h, l in zip(*ax.get_legend_handles_labels()): + handles.append(h) + labels.append(l) + + plt.legend(handles, labels, loc='best', ncol=len(handles), frameon=False) + + plt.tight_layout() + plt.savefig(fig_name, dpi=300) + plt.close() + return fig_name diff --git a/BALSAMIC/workflows/Alignment.smk b/BALSAMIC/workflows/Alignment.smk deleted file mode 100644 index d85f1e915..000000000 --- a/BALSAMIC/workflows/Alignment.smk +++ /dev/null @@ -1,94 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -import os -import logging - -from yapf.yapflib.yapf_api import FormatFile -from BALSAMIC.utils.cli import write_json -from BALSAMIC.utils.rule import get_rule_output -from BALSAMIC.utils.rule import get_result_dir - -LOG = logging.getLogger(__name__) - -shell.prefix("set -eo pipefail; ") - -# Set temporary dir environment variable -os.environ['TMPDIR'] = get_result_dir(config) - -tmp_dir = os.path.join(get_result_dir(config), "tmp") -rule_dir = config["rule_directory"] -benchmark_dir = config["analysis"]["benchmark"] -fastq_dir = get_result_dir(config) + "/fastq/" -bam_dir = get_result_dir(config) + "/bam/" -cnv_dir = get_result_dir(config) + "/cnv/" -cutadapt_dir = get_result_dir(config) + "/cutadapt/" -fastqc_dir = get_result_dir(config) + "/fastqc/" -result_dir = get_result_dir(config) + "/" -vcf_dir = get_result_dir(config) + "/vcf/" -vep_dir = get_result_dir(config) + "/vep/" -qc_dir = result_dir + "qc/" - -singularity_image = config['singularity']['image'] - -# explictly check if cluster_config dict has zero keys. -if len(cluster_config.keys()) == 0: - cluster_config = config - -# Use fastp_hard_trim for QC purpose -pre_align = ["snakemake_rules/quality_control/fastp.rule", - "snakemake_rules/quality_control/fastqc.rule"] - -align_qc = ["snakemake_rules/align/bwa_mem.rule", - "snakemake_rules/quality_control/picard.rule", - "snakemake_rules/quality_control/sambamba_depth.rule", - "snakemake_rules/quality_control/mosdepth.rule", - "snakemake_rules/quality_control/multiqc.rule", - "snakemake_rules/quality_control/GATK.rule"] - -config["rules"] = pre_align + align_qc - -for r in config["rules"]: - include: os.path.join(rule_dir + r) - -if 'delivery' in config: - wildcard_dict = { "sample": list(config["samples"].keys()), - "case_name": config["analysis"]["case_id"], - "allow_missing": True - } - - if 'rules_to_deliver' in config: - rules_to_deliver = config['rules_to_deliver'].split(",") - else: - rules_to_deliver = ['multiqc'] - - output_files_ready = [('path', 'path_index', 'step', 'tag', 'id', 'format')] - - for my_rule in set(rules_to_deliver): - try: - housekeeper_id = getattr(rules, my_rule).params.housekeeper_id - except (ValueError, AttributeError, RuleException, WorkflowError) as e: - LOG.warning("Cannot deliver step (rule) {}: {}".format(my_rule,e)) - continue - - LOG.info("Delivering step (rule) {}.".format(my_rule)) - output_files_ready.extend(get_rule_output(rules=rules, - rule_name=my_rule, - output_file_wildcards=wildcard_dict)) - - output_files_ready = [dict(zip(output_files_ready[0], value)) for value in output_files_ready[1:]] - delivery_ready = os.path.join(get_result_dir(config), - "delivery_report", - config["analysis"]["case_id"] + "_delivery_ready.hk" ) - write_json(output_files_ready, delivery_ready) - FormatFile(delivery_ready) - - -rule all: - input: - os.path.join(*([result_dir + "qc/" + "multiqc_report.html"])), - output: - os.path.join(get_result_dir(config), "analysis_finish") - shell: - "date +'%Y-%m-%d T%T %:z' > {output}" - diff --git a/BALSAMIC/workflows/UMIworkflow.smk b/BALSAMIC/workflows/UMIworkflow.smk index db53cc392..7a6a901b9 100644 --- a/BALSAMIC/workflows/UMIworkflow.smk +++ b/BALSAMIC/workflows/UMIworkflow.smk @@ -4,24 +4,26 @@ import os import logging -from BALSAMIC.utils.rule import get_result_dir +from BALSAMIC.utils.rule import (get_threads, get_result_dir, + get_sample_type, get_script_path, get_vcf) +from BALSAMIC.utils.models import UMIworkflowConfig +from BALSAMIC.utils.constants import RULE_DIRECTORY, VCFANNO_TOML, umiworkflow_params LOG = logging.getLogger(__name__) shell.prefix("set -eo pipefail; ") -rule_dir = config["rule_directory"] fastq_dir = get_result_dir(config) + "/fastq/" benchmark_dir = config["analysis"]["benchmark"] umi_dir = get_result_dir(config) + "/umi/" vcf_dir = get_result_dir(config) + "/vcf/" vep_dir = get_result_dir(config) + "/vep/" -log_dir = config["analysis"]["log"] -table_dir = get_result_dir(config) + "/tables/" -plot_dir = get_result_dir(config) + "/plots/" +umi_qc_dir = get_result_dir(config) + "/qc/" +qc_dir = get_result_dir(config) + "/qc/" +tmp_dir = os.path.join(get_result_dir(config), "tmp", "" ) +Path.mkdir(Path(tmp_dir), exist_ok=True) - -singularity_image = config['singularity']['image'] +singularity_image = config["singularity"]["image"] # Declare sentieon variables sentieon = True @@ -35,38 +37,51 @@ if len(cluster_config.keys()) == 0: try: config["SENTIEON_LICENSE"] = os.environ["SENTIEON_LICENSE"] - config["SENTIEON_INSTALL_DIR"] = os.environ["SENTIEON_INSTALL_DIR"] + config["SENTIEON_EXEC"] = Path(os.environ["SENTIEON_INSTALL_DIR"], "bin", "sentieon").as_posix() except Exception as error: LOG.error("ERROR: Set SENTIEON_LICENSE and SENTIEON_INSTALL_DIR environment variable to run this pipeline.") raise # Define umiworkflow rules +fastp_umi = ["snakemake_rules/quality_control/fastp.rule"] umi_call = [ "snakemake_rules/umi/sentieon_umiextract.rule", "snakemake_rules/umi/sentieon_consensuscall.rule" ] -variant_call = [ - "snakemake_rules/umi/sentieon_varcall_tnscope.rule", - "snakemake_rules/umi/varcall_vardict.rule" -] +if config["analysis"]["analysis_type"] == "single": + variant_call = ["snakemake_rules/umi/sentieon_varcall_tnscope.rule"] +else: + variant_call = ["snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule"] + +annotate_vcf = ["snakemake_rules/annotation/vep.rule"] -annotate_vcf = ["snakemake_rules/umi/annotate_vep.rule"] +qc = ["snakemake_rules/umi/qc_umi.rule"] -generate_plots = ["snakemake_rules/umi/generate_AF_tables.rules"] +generate_tables = ["snakemake_rules/umi/generate_AF_tables.rule"] + +# parse parameters as workflow constants +paramsumi = UMIworkflowConfig.parse_obj(umiworkflow_params) # Define wildcards SAMPLES = config["samples"] -VAR_CALLER = ['TNscope','vardict'] +CASE_NAME = config["analysis"]["case_id"] # Define outputs -analysis_output = [expand(vcf_dir + "{sample}.{var_caller}.umi.vcf.gz", sample=SAMPLES, var_caller=VAR_CALLER), expand(vep_dir + "{sample}.{var_caller}.umi.{filler}.vcf.gz", sample=SAMPLES, var_caller=VAR_CALLER, filler=['all','pass']), expand(table_dir + "{sample}.{var_caller}.umi.AFtable.txt", sample=SAMPLES, var_caller=VAR_CALLER)] +analysis_output = [expand(vep_dir + "{var_type}.somatic.{case_name}.{var_caller}.pass.vcf.gz", var_type= "SNV", case_name=CASE_NAME, var_caller=["TNscope_umi"]), +expand(umi_qc_dir + "{sample}.umi.{metric}", sample=SAMPLES, metric = ["metrics", "mean_family_depth"])] + +config["rules"] = fastp_umi + umi_call + variant_call + annotate_vcf + qc -config["rules"] = umi_call + variant_call + annotate_vcf + generate_plots +if "background_variants" in config: + analysis_output.extend([expand(umi_qc_dir + "{case_name}.{var_caller}.AFtable.txt", + case_name = config["analysis"]["case_id"], + var_caller =["TNscope_umi"])]) + config["rules"] = config["rules"] + generate_tables for r in config["rules"]: - include: os.path.join(rule_dir + r) + include: Path(RULE_DIRECTORY, r).as_posix() rule all: input: diff --git a/BALSAMIC/workflows/VariantCalling.smk b/BALSAMIC/workflows/VariantCalling.smk deleted file mode 100644 index 152a7df73..000000000 --- a/BALSAMIC/workflows/VariantCalling.smk +++ /dev/null @@ -1,173 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -import os -import logging - -from yapf.yapflib.yapf_api import FormatFile - -from BALSAMIC.utils.cli import write_json -from BALSAMIC.utils.rule import get_variant_callers -from BALSAMIC.utils.rule import get_rule_output -from BALSAMIC.utils.rule import get_result_dir -from BALSAMIC.utils.rule import get_vcf - -shell.prefix("set -eo pipefail; ") - -LOG = logging.getLogger(__name__) - -# Set temporary dir environment variable -os.environ['TMPDIR'] = get_result_dir(config) - -tmp_dir = os.path.join(get_result_dir(config), "tmp") -rule_dir = config["rule_directory"] -benchmark_dir = config["analysis"]["benchmark"] -fastq_dir = get_result_dir(config) + "/fastq/" -bam_dir = get_result_dir(config) + "/bam/" -cnv_dir = get_result_dir(config) + "/cnv/" -cutadapt_dir = get_result_dir(config) + "/cutadapt/" -fastqc_dir = get_result_dir(config) + "/fastqc/" -result_dir = get_result_dir(config) + "/" -vcf_dir = get_result_dir(config) + "/vcf/" -vep_dir = get_result_dir(config) + "/vep/" -qc_dir = result_dir + "qc/" -delivery_dir = get_result_dir(config) + "/delivery/" - -singularity_image = config['singularity']['image'] - -# Declare sentieon variables -sentieon = True -SENTIEON_LICENSE = '' -SENTIEON_INSTALL_DIR = '' - -# explictly check if cluster_config dict has zero keys. -if len(cluster_config.keys()) == 0: - cluster_config = config - -try: - config["SENTIEON_LICENSE"] = os.environ["SENTIEON_LICENSE"] - config["SENTIEON_INSTALL_DIR"] = os.environ["SENTIEON_INSTALL_DIR"] -except Exception as error: - sentieon = False - LOG.warn("Set environment variables SENTIEON_LICENSE and SENTIEON_INSTALL_DIR to run SENTIEON variant callers") - -# Define set of rules -qc_rules = [ - "snakemake_rules/quality_control/fastp.rule", - "snakemake_rules/quality_control/fastqc.rule", - "snakemake_rules/quality_control/GATK.rule", - "snakemake_rules/quality_control/multiqc.rule", - "snakemake_rules/quality_control/picard.rule", - "snakemake_rules/quality_control/sambamba_depth.rule", - "snakemake_rules/quality_control/mosdepth.rule" - ] - -align_rules = [ - "snakemake_rules/align/bwa_mem.rule" - ] - -annotation_rules = [ - "snakemake_rules/annotation/vep.rule" - ] - -variantcalling_rules = [ - "snakemake_rules/variant_calling/germline.rule", - "snakemake_rules/variant_calling/split_bed.rule" - ] - -germline_caller = ["haplotypecaller", "strelka_germline", "manta_germline"] - -if sentieon: - germline_caller.append('dnascope') - -if config['analysis']['analysis_type'] == "paired": - - qc_rules.append("snakemake_rules/quality_control/contest.rule") - - variantcalling_rules.extend([ - "snakemake_rules/variant_calling/somatic_tumor_normal.rule", - "snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule", - "snakemake_rules/variant_calling/mergetype.rule", - "snakemake_rules/variant_calling/cnvkit_paired.rule" - ]) - - somatic_caller_snv = get_variant_callers(config=config, analysis_type="paired", workflow_solution="BALSAMIC", mutation_type="SNV", mutation_class="somatic") - sentieon_callers = ["tnhaplotyper"] if sentieon else [] - somatic_caller_sv = ["manta", "cnvkit"] - -else: - - annotation_rules.append("snakemake_rules/annotation/varcaller_filter.rule") - - variantcalling_rules.extend([ - "snakemake_rules/variant_calling/cnvkit_single.rule", - "snakemake_rules/variant_calling/mergetype_tumor.rule", - "snakemake_rules/variant_calling/somatic_tumor_only.rule", - "snakemake_rules/variant_calling/somatic_sv_tumor_only.rule" - ]) - - somatic_caller_snv = get_variant_callers(config=config, analysis_type="single", workflow_solution="BALSAMIC", mutation_type="SNV", mutation_class="somatic") - sentieon_callers = ["tnhaplotyper"] if sentieon else [] - somatic_caller_sv = ["manta", "cnvkit"] - -somatic_caller = somatic_caller_snv + somatic_caller_sv + sentieon_callers -if "disable_variant_caller" in config: - somatic_caller.remove(config["disable_variant_caller"]) - -config["rules"] = align_rules + qc_rules + variantcalling_rules + annotation_rules - -for r in config["rules"]: - include: os.path.join(rule_dir + r) - -# Define common and analysis specific outputs -common_output = [ result_dir + "qc/" + "multiqc_report.html", -expand(vep_dir + "{vcf}.vcf.gz", vcf=get_vcf(config, germline_caller, config["samples"])), -expand(vep_dir + "{vcf}.{filters}.vcf.gz", vcf=get_vcf(config, somatic_caller, [config["analysis"]["case_id"]]), filters = ["all", "pass"]), -expand(vep_dir + "{vcf}.pass.balsamic_stat", vcf=get_vcf(config, ["vardict"], [config["analysis"]["case_id"]]))] - -analysis_specific_output = [] -if config['analysis']['analysis_type'] == "single": - analysis_specific_output.extend(expand(vep_dir + "{vcf}.all.filtered.vcf.gz", vcf=get_vcf(config, ["vardict"], [config["analysis"]["case_id"]]))) - - -if 'delivery' in config: - wildcard_dict = { "sample": list(config["samples"].keys()), - "case_name": config["analysis"]["case_id"], - "var_type": ["CNV", "SNV", "SV"], - "var_class": ["somatic", "germline"], - "var_caller": somatic_caller + germline_caller, - "bedchrom": config["panel"]["chrom"] if "panel" in config else [], - "allow_missing": True - } - - - if 'rules_to_deliver' in config: - rules_to_deliver = config['rules_to_deliver'].split(",") - else: - rules_to_deliver = ['multiqc'] - - output_files_ready = [('path', 'path_index', 'step', 'tag', 'id', 'format')] - - for my_rule in set(rules_to_deliver): - try: - housekeeper_id = getattr(rules, my_rule).params.housekeeper_id - except (ValueError, AttributeError, RuleException, WorkflowError) as e: - LOG.warning("Cannot deliver step (rule) {}: {}".format(my_rule,e)) - continue - - LOG.info("Delivering step (rule) {}.".format(my_rule)) - output_files_ready.extend(get_rule_output(rules=rules, rule_name=my_rule, output_file_wildcards=wildcard_dict)) - - output_files_ready = [dict(zip(output_files_ready[0], value)) for value in output_files_ready[1:]] - delivery_ready = os.path.join(get_result_dir(config), "delivery_report", config["analysis"]["case_id"] + "_delivery_ready.hk" ) - write_json(output_files_ready, delivery_ready) - FormatFile(delivery_ready) - -rule all: - input: - common_output, - analysis_specific_output - output: - os.path.join(get_result_dir(config), "analysis_finish") - shell: - "date +'%Y-%m-%d T%T %:z' > {output}" diff --git a/BALSAMIC/workflows/VariantCalling_sentieon.smk b/BALSAMIC/workflows/VariantCalling_sentieon.smk deleted file mode 100644 index e253ef593..000000000 --- a/BALSAMIC/workflows/VariantCalling_sentieon.smk +++ /dev/null @@ -1,130 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -import os -import logging - -from yapf.yapflib.yapf_api import FormatFile - -from BALSAMIC.utils.cli import write_json -from BALSAMIC.utils.rule import get_rule_output -from BALSAMIC.utils.rule import get_result_dir -from BALSAMIC.utils.rule import get_vcf - -shell.prefix("set -eo pipefail; ") - -LOG = logging.getLogger(__name__) - -# Set temporary dir environment variable -os.environ['TMPDIR'] = get_result_dir(config) - -tmp_dir = os.path.join(get_result_dir(config), "tmp") -rule_dir = config["rule_directory"] -benchmark_dir = config["analysis"]["benchmark"] -fastq_dir = get_result_dir(config) + "/fastq/" -bam_dir = get_result_dir(config) + "/bam/" -cnv_dir = get_result_dir(config) + "/cnv/" -cutadapt_dir = get_result_dir(config) + "/cutadapt/" -result_dir = get_result_dir(config) + "/" -qc_dir = get_result_dir(config) + "/qc/" -vcf_dir = get_result_dir(config) + "/vcf/" -vep_dir = get_result_dir(config) + "/vep/" - -singularity_image = config['singularity']['image'] - -try: - config["SENTIEON_LICENSE"] = os.environ["SENTIEON_LICENSE"] - config["SENTIEON_INSTALL_DIR"] = os.environ["SENTIEON_INSTALL_DIR"] -except Exception as error: - LOG.error("ERROR: Set SENTIEON_LICENSE and SENTIEON_INSTALL_DIR environment variable to run this pipeline.") - raise - -SENTIEON_DNASCOPE = rule_dir + 'assets/sentieon_models/SentieonDNAscopeModelBeta0.4a-201808.05.model' -SENTIEON_TNSCOPE = rule_dir + 'assets/sentieon_models/SentieonTNscopeModel_GiAB_HighAF_LowFP-201711.05.model' -os.environ["SENTIEON_TMPDIR"] = result_dir - -# explictly check if cluster_config dict has zero keys. -if len(cluster_config.keys()) == 0: - cluster_config = config - -# rules for pipeline -quality_check = ["snakemake_rules/quality_control/fastp.rule", \ - "snakemake_rules/sentieon/sentieon_qc_metrics.rule", \ - "snakemake_rules/quality_control/picard_wgs.rule", \ - "snakemake_rules/quality_control/multiqc.rule"] -preprocessing = ["snakemake_rules/sentieon/sentieon_alignment.rule"] - -if config['analysis']['analysis_type'] == "paired": - variant_calling = ["snakemake_rules/sentieon/sentieon_tn_varcall.rule", \ - "snakemake_rules/sentieon/sentieon_germline.rule", \ - "snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule", \ - "snakemake_rules/variant_calling/cnvkit_paired.rule"] - somatic_caller = ['tnhaplotyper','tnsnv', 'tnscope', 'manta', 'cnvkit'] - germline_caller = ['dnascope'] - -else: - variant_calling = ["snakemake_rules/sentieon/sentieon_t_varcall.rule", \ - "snakemake_rules/sentieon/sentieon_germline.rule", \ - "snakemake_rules/variant_calling/somatic_sv_tumor_only.rule", \ - "snakemake_rules/variant_calling/cnvkit_single.rule"] - somatic_caller = ['tnhaplotyper','tnsnv', 'tnscope', 'manta', 'cnvkit'] - germline_caller = ['dnascope'] - -annotation = ["snakemake_rules/annotation/vep.rule"] - -pipeline = quality_check + preprocessing + variant_calling + annotation - - -for rule in pipeline: - include: os.path.join(rule_dir, rule) - -if 'delivery' in config: - wildcard_dict = { "sample": list(config["samples"].keys()), - "case_name": config["analysis"]["case_id"], - "var_type": ["CNV", "SNV", "SV"], - "var_class": ["somatic", "germline"], - "var_caller": somatic_caller + germline_caller, - "bedchrom": config["panel"]["chrom"] if "panel" in config else [], - "allow_missing": True - } - - - if 'rules_to_deliver' in config: - rules_to_deliver = config['rules_to_deliver'].split(",") - else: - rules_to_deliver = ['multiqc'] - - output_files_ready = [('path', 'path_index', 'step', 'tag', 'id', 'format')] - for my_rule in set(rules_to_deliver): - try: - housekeeper_id = getattr(rules, my_rule).params.housekeeper_id - except (ValueError, AttributeError, RuleException, WorkflowError) as e: - LOG.warning("Cannot deliver step (rule) {}: {}".format(my_rule,e)) - continue - - LOG.info("Delivering step (rule) {}.".format(my_rule)) - output_files_ready.extend(get_rule_output(rules=rules, rule_name=my_rule, output_file_wildcards=wildcard_dict)) - - output_files_ready = [dict(zip(output_files_ready[0], value)) for value in output_files_ready[1:]] - delivery_ready = os.path.join(get_result_dir(config), "delivery_report", config["analysis"]["case_id"] + "_delivery_ready.hk" ) - write_json(output_files_ready, delivery_ready) - FormatFile(delivery_ready) - - -rule all: - input: - expand(bam_dir + "{sample}.bam", sample=config["samples"]), - expand(bam_dir + "{sample}.dedup.bam", sample=config["samples"]), - expand(bam_dir + "{sample}.dedup.realign.bam", sample=config["samples"]), - expand(bam_dir + "{sample}.dedup.realign.recal_data.table", sample=config["samples"]), - expand(bam_dir + "{sample}.dedup.realign.recal.csv", sample=config["samples"]), - expand(bam_dir + "{sample}.dedup.realign.recal.pdf", sample=config["samples"]), - expand(vep_dir + "{vcf}.{filters}.vcf.gz", vcf=get_vcf(config, somatic_caller, [config["analysis"]["case_id"]]), filters = ["all", "pass"]), - expand(qc_dir + "{sample}_sentieon_wgs_metrics.txt", sample=config["samples"]), - expand(qc_dir + "{sample}_coverage.gz", sample=config["samples"]), - expand(qc_dir + "multiqc_report.html"), - output: - os.path.join(get_result_dir(config), "analysis_finish") - shell: - "date +'%Y-%m-%d T%T %:z' > {output}" - diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk new file mode 100644 index 000000000..db3969db7 --- /dev/null +++ b/BALSAMIC/workflows/balsamic.smk @@ -0,0 +1,382 @@ +# vim: syntax=python tabstop=4 expandtab +# coding: utf-8 + +import os +import logging +import tempfile + +from pathlib import Path +from yapf.yapflib.yapf_api import FormatFile + +from snakemake.exceptions import RuleException, WorkflowError + +from PyPDF2 import PdfFileMerger + +from BALSAMIC.utils.exc import BalsamicError + +from BALSAMIC.utils.cli import write_json +from BALSAMIC.utils.cli import check_executable +from BALSAMIC.utils.cli import generate_h5 + +from BALSAMIC.utils.models import VarCallerFilter, UMIworkflowConfig + +from BALSAMIC.utils.workflowscripts import plot_analysis + +from BALSAMIC.utils.rule import (get_variant_callers, get_rule_output, get_result_dir, + get_vcf, get_picard_mrkdup, get_sample_type, + get_threads, get_script_path) + +from BALSAMIC.utils.constants import (SENTIEON_DNASCOPE, SENTIEON_TNSCOPE, RULE_DIRECTORY, + VARDICT_SETTINGS, SENTIEON_VARCALL_SETTINGS, VCFANNO_TOML, + umiworkflow_params) + +shell.executable("/bin/bash") +shell.prefix("set -eo pipefail; ") + +LOG = logging.getLogger(__name__) + +# Create a temporary directory with trailing / +tmp_dir = os.path.join(get_result_dir(config), "tmp", "" ) +Path.mkdir(Path(tmp_dir), exist_ok=True) + +benchmark_dir = config["analysis"]["benchmark"] +fastq_dir = get_result_dir(config) + "/fastq/" +bam_dir = get_result_dir(config) + "/bam/" +cnv_dir = get_result_dir(config) + "/cnv/" +fastqc_dir = get_result_dir(config) + "/fastqc/" +result_dir = get_result_dir(config) + "/" +vcf_dir = get_result_dir(config) + "/vcf/" +vep_dir = get_result_dir(config) + "/vep/" +qc_dir = get_result_dir(config) + "/qc/" +delivery_dir = get_result_dir(config) + "/delivery/" + +umi_dir = get_result_dir(config) + "/umi/" +umi_qc_dir = qc_dir + "umi_qc/" + +singularity_image = config['singularity']['image'] + +# picarddup flag +picarddup = get_picard_mrkdup(config) + +# Varcaller filter settings +VARDICT = VarCallerFilter.parse_obj(VARDICT_SETTINGS) +SENTIEON_CALLER = VarCallerFilter.parse_obj(SENTIEON_VARCALL_SETTINGS) + +# parse parameters as constants for umiworkflow +paramsumi = UMIworkflowConfig.parse_obj(umiworkflow_params) + +# Capture kit name +if config["analysis"]["sequencing_type"] != "wgs": + capture_kit = os.path.split(config["panel"]["capture_kit"])[1] + +# Sample names for tumor or normal +tumor_sample = get_sample_type(config["samples"], "tumor")[0] +if config['analysis']['analysis_type'] == "paired": + normal_sample = get_sample_type(config["samples"], "normal")[0] + +# Set case id/name +case_id = config["analysis"]["case_id"] + +# Declare sentieon variables +sentieon = True +SENTIEON_LICENSE = '' +SENTIEON_INSTALL_DIR = '' + +# explicitly check if cluster_config dict has zero keys. +if len(cluster_config.keys()) == 0: + cluster_config = config + +try: + config["SENTIEON_LICENSE"] = os.environ["SENTIEON_LICENSE"] + if os.getenv("SENTIEON_EXEC") is not None: + config["SENTIEON_EXEC"] = os.environ["SENTIEON_EXEC"] + else: + config["SENTIEON_EXEC"] = Path(os.environ["SENTIEON_INSTALL_DIR"], "bin", "sentieon").as_posix() + + config["SENTIEON_TNSCOPE"] = SENTIEON_TNSCOPE + config["SENTIEON_DNASCOPE"] = SENTIEON_DNASCOPE +except KeyError as error: + sentieon = False + LOG.warning("Set environment variables SENTIEON_LICENSE, SENTIEON_INSTALL_DIR, SENTIEON_EXEC " + "to run SENTIEON variant callers") + +if not Path(config["SENTIEON_EXEC"]).exists(): + LOG.error("Senteion exectuable not found {}".format(Path(config["SENTIEON_EXEC"]).as_posix())) + raise BalsamicError + +if config["analysis"]["sequencing_type"] == "wgs" and not sentieon: + LOG.error("Set environment variables SENTIEON_LICENSE, SENTIEON_INSTALL_DIR, SENTIEON_EXEC " + "to run SENTIEON variant callers") + raise BalsamicError + +# Set temporary dir environment variable +os.environ["SENTIEON_TMPDIR"] = result_dir +os.environ['TMPDIR'] = get_result_dir(config) + +# Define set of rules +qc_rules = [ + "snakemake_rules/quality_control/fastp.rule", + "snakemake_rules/quality_control/fastqc.rule", + "snakemake_rules/quality_control/multiqc.rule", +] + + +if config["analysis"]["sequencing_type"] == "wgs": + qc_rules.extend([ + "snakemake_rules/quality_control/sentieon_qc_metrics.rule", + "snakemake_rules/quality_control/picard_wgs.rule"]) + + align_rules = ["snakemake_rules/align/sentieon_alignment.rule"] +else: + chromlist = config["panel"]["chrom"] + qc_rules.extend([ + "snakemake_rules/quality_control/GATK.rule", + "snakemake_rules/quality_control/picard.rule", + "snakemake_rules/quality_control/sambamba_depth.rule", + "snakemake_rules/quality_control/mosdepth.rule" + ]) + + align_rules = [ + "snakemake_rules/align/bwa_mem.rule", + "snakemake_rules/umi/sentieon_umiextract.rule", + "snakemake_rules/umi/sentieon_consensuscall.rule" + ] + + +annotation_rules = [ "snakemake_rules/annotation/vep.rule"] + +umiqc_rules = [ "snakemake_rules/umi/qc_umi.rule" ] + +generatetable_umi_rules = [ "snakemake_rules/umi/generate_AF_tables.rule" ] + +if config["analysis"]["sequencing_type"] == "wgs": + variantcalling_rules = ["snakemake_rules/variant_calling/sentieon_germline.rule"] + germline_caller = ["dnascope"] +else: + variantcalling_rules = [ + "snakemake_rules/variant_calling/germline.rule", + "snakemake_rules/variant_calling/split_bed.rule" + ] + + germline_caller_SNV = get_variant_callers(config=config, analysis_type="paired", + workflow_solution="BALSAMIC", + mutation_type="SNV", + mutation_class="germline") + germline_caller_SV = get_variant_callers(config=config, analysis_type="paired", + workflow_solution="BALSAMIC", + mutation_type="SV", + mutation_class="germline") + + germline_caller = germline_caller_SNV + germline_caller_SV + + + if sentieon: + germline_caller.append("dnascope") + +somatic_caller_sv = ['manta', 'cnvkit'] +if config["analysis"]["sequencing_type"] == "wgs": + somatic_caller_snv = ['tnhaplotyper', 'tnsnv', 'tnscope'] + variantcalling_rules.append("snakemake_rules/variant_calling/sentieon_split_snv_sv.rule") + if config['analysis']['analysis_type'] == "paired": + variantcalling_rules.extend(["snakemake_rules/variant_calling/sentieon_tn_varcall.rule", + "snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule", + "snakemake_rules/variant_calling/cnvkit_paired.rule"]) + annotation_rules.append("snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule") + else: + variantcalling_rules.extend(["snakemake_rules/variant_calling/sentieon_t_varcall.rule", + "snakemake_rules/variant_calling/somatic_sv_tumor_only.rule", + "snakemake_rules/dragen_suite/dragen_dna.rule", + "snakemake_rules/variant_calling/cnvkit_single.rule"]) + annotation_rules.append("snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule") +else: + sentieon_callers = ["tnhaplotyper"] if sentieon else [] + annotation_rules.append("snakemake_rules/annotation/rankscore.rule") + + if config['analysis']['analysis_type'] == "paired": + annotation_rules.append("snakemake_rules/annotation/varcaller_filter_tumor_normal.rule") + + qc_rules.append("snakemake_rules/quality_control/contest.rule") + + variantcalling_rules.extend([ + "snakemake_rules/variant_calling/somatic_tumor_normal.rule", + "snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule", + "snakemake_rules/variant_calling/mergetype.rule", + "snakemake_rules/variant_calling/cnvkit_paired.rule", + "snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule" + ]) + + somatic_caller_snv = get_variant_callers(config=config, + analysis_type="paired", + workflow_solution="BALSAMIC", + mutation_type="SNV", + mutation_class="somatic") + + somatic_caller_snv_umi = get_variant_callers(config=config, + analysis_type="paired", + workflow_solution="Sentieon_umi", + mutation_type="SNV", + mutation_class="somatic") + + somatic_caller_snv = somatic_caller_snv + sentieon_callers + somatic_caller_snv_umi + else: + + annotation_rules.append("snakemake_rules/annotation/varcaller_filter_tumor_only.rule") + + variantcalling_rules.extend([ + "snakemake_rules/variant_calling/cnvkit_single.rule", + "snakemake_rules/variant_calling/mergetype_tumor.rule", + "snakemake_rules/variant_calling/somatic_tumor_only.rule", + "snakemake_rules/variant_calling/somatic_sv_tumor_only.rule", + "snakemake_rules/umi/sentieon_varcall_tnscope.rule" + ]) + + somatic_caller_snv = get_variant_callers(config=config, + analysis_type="single", + workflow_solution="BALSAMIC", + mutation_type="SNV", + mutation_class="somatic") + + somatic_caller_snv_umi = get_variant_callers(config=config, + analysis_type="single", + workflow_solution="Sentieon_umi", + mutation_type="SNV", + mutation_class="somatic") + + somatic_caller_snv = somatic_caller_snv + sentieon_callers + somatic_caller_snv_umi + +somatic_caller = somatic_caller_snv + somatic_caller_sv + +# Remove variant callers from list of callers +if "disable_variant_caller" in config: + variant_callers_to_remove = config["disable_variant_caller"].split(",") + for var_caller in variant_callers_to_remove: + if var_caller in somatic_caller: + somatic_caller.remove(var_caller) + if var_caller in germline_caller: + germline_caller.remove(var_caller) + +config["rules"] = align_rules + qc_rules + +# Define common and analysis specific outputs +quality_control_results = [result_dir + "qc/" + "multiqc_report.html"] + +analysis_specific_results = [] +if config['analysis']["analysis_type"] in ["paired", "single"]: + config["rules"] = config["rules"] + variantcalling_rules + annotation_rules + analysis_specific_results = [expand(vep_dir + "{vcf}.vcf.gz", + vcf=get_vcf(config, germline_caller, config["samples"])), + expand(vep_dir + "{vcf}.{filters}.vcf.gz", + vcf=get_vcf(config, somatic_caller, [config["analysis"]["case_id"]]), + filters=["all", "pass"])] + +if config['analysis']["analysis_type"] in ["paired", "single"] and config["analysis"]["sequencing_type"] != "wgs" and config["analysis"]["umiworkflow"]: + analysis_specific_results.extend(expand(vep_dir + "{vcf}.pass.balsamic_stat", + vcf=get_vcf(config, ["vardict"], [config["analysis"]["case_id"]]))) + analysis_specific_results.extend([expand(vep_dir + "{vcf}.all.filtered.pass.ranked.vcf.gz", + vcf=get_vcf(config, ["vardict"], [config["analysis"]["case_id"]]))]) + + analysis_specific_results.extend([expand(vep_dir + "{vcf}.pass.vcf.gz", + vcf=get_vcf(config, ["TNscope_umi"], [config["analysis"]["case_id"]])), + expand(umi_qc_dir + "{sample}.umi.mean_family_depth", + sample = config["samples"])]) + config["rules"] = config["rules"] + umiqc_rules + + if "background_variants" in config: + analysis_specific_results.extend([expand(umi_qc_dir + "{case_name}.{var_caller}.AFtable.txt", + case_name = config["analysis"]["case_id"], + var_caller =["TNscope_umi"])]), + config["rules"] = config["rules"] + generatetable_umi_rules + +else: + analysis_specific_results.extend([expand(vep_dir + "{vcf}.filtered.pass.vcf.gz", + vcf=get_vcf(config, ["tnscope"], [config["analysis"]["case_id"]]))]) + +if config["analysis"]["sequencing_type"] == "wgs" and config['analysis']['analysis_type'] == "single": + if "dragen" in config: + analysis_specific_results.extend([Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen_tumor.bam").as_posix(), + Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen.vcf.gz").as_posix()]) + +for r in config["rules"]: + include: Path(RULE_DIRECTORY, r).as_posix() + +if 'benchmark_plots' in config: + log_dir = config["analysis"]["log"] + if not check_executable("sh5util"): + LOG.warning("sh5util executable does not exist. Won't be able to plot analysis") + else: + # Make individual plot per job + for log_file in Path(log_dir).glob("*.err"): + log_file_list = log_file.name.split(".") + job_name = ".".join(log_file_list[0:4]) + job_id = log_file_list[4].split("_")[1] + h5_file = generate_h5(job_name, job_id, log_file.parent) + benchmark_plot = Path(benchmark_dir, job_name + ".pdf") + + log_file_plot = plot_analysis(log_file, h5_file, benchmark_plot) + logging.debug("Plot file for {} available at: {}".format(log_file.as_posix(), log_file_plot)) + + # Merge plots into one based on rule name + for my_rule in vars(rules).keys(): + my_rule_pdf = PdfFileMerger() + my_rule_plots = list() + for plots in Path(benchmark_dir).glob(f"BALSAMIC*.{my_rule}.*.pdf"): + my_rule_pdf.append(plots.as_posix()) + my_rule_plots.append(plots) + my_rule_pdf.write(Path(benchmark_dir, my_rule+".pdf").as_posix()) + my_rule_pdf.close() + + # Delete previous plots after merging + for plots in my_rule_plots: + plots.unlink() + + + +if 'delivery' in config: + wildcard_dict = {"sample": list(config["samples"].keys()), + "case_name": config["analysis"]["case_id"], + "allow_missing": True + } + + if config['analysis']["analysis_type"] in ["paired", "single"]: + wildcard_dict.update({"var_type": ["CNV", "SNV", "SV"], + "var_class": ["somatic", "germline"], + "var_caller": somatic_caller + germline_caller, + "bedchrom": config["panel"]["chrom"] if "panel" in config else [], + }) + + if 'rules_to_deliver' in config: + rules_to_deliver = config['rules_to_deliver'].split(",") + else: + rules_to_deliver = ['multiqc'] + + output_files_ready = [('path', 'path_index', 'step', 'tag', 'id', 'format')] + + for my_rule in set(rules_to_deliver): + try: + housekeeper_id = getattr(rules, my_rule).params.housekeeper_id + except (ValueError, AttributeError, RuleException, WorkflowError) as e: + LOG.warning("Cannot deliver step (rule) {}: {}".format(my_rule, e)) + continue + + LOG.info("Delivering step (rule) {}.".format(my_rule)) + output_files_ready.extend(get_rule_output(rules=rules, + rule_name=my_rule, + output_file_wildcards=wildcard_dict)) + + output_files_ready = [dict(zip(output_files_ready[0], value)) for value in output_files_ready[1:]] + delivery_ready = os.path.join(get_result_dir(config), + "delivery_report", + config["analysis"]["case_id"] + "_delivery_ready.hk") + write_json(output_files_ready, delivery_ready) + FormatFile(delivery_ready) + +rule all: + input: + quality_control_results + analysis_specific_results + output: + os.path.join(get_result_dir(config), "analysis_finish") + run: + import datetime + + with open(str(output[0]), mode='w') as finish_file: + finish_file.write('%s\n' % datetime.datetime.now()) diff --git a/BALSAMIC/workflows/GenerateRef b/BALSAMIC/workflows/reference.smk similarity index 65% rename from BALSAMIC/workflows/GenerateRef rename to BALSAMIC/workflows/reference.smk index 42d2a6f63..5547400c6 100644 --- a/BALSAMIC/workflows/GenerateRef +++ b/BALSAMIC/workflows/reference.smk @@ -1,4 +1,3 @@ -#! python # syntax=python tabstop=4 expandtab # coding: utf-8 @@ -8,10 +7,9 @@ import logging from datetime import date -from BALSAMIC.utils.rule import get_conda_env from BALSAMIC.utils.rule import get_script_path from BALSAMIC.utils.rule import get_reference_output_files -from BALSAMIC.utils.models import ReferenceUrlsModel, ReferenceMeta +from BALSAMIC.utils.models import ReferenceMeta from BALSAMIC.utils.constants import REFERENCE_FILES LOG = logging.getLogger(__name__) @@ -29,7 +27,7 @@ else: genome_ver = 'hg19' # essential path reference files -basedir = os.path.join(config['output'], genome_ver) +basedir = os.path.join(config['output']) genome_dir = os.path.join(basedir, "genome") vcf_dir = os.path.join(basedir, "variants") vep_dir = os.path.join(basedir, "vep") @@ -38,10 +36,12 @@ cosmicdb_key = config['cosmic_key'] # Set temporary dir environment variable os.environ['TMPDIR'] = basedir -# VCF files list for wildcards -VCF = get_reference_output_files(REFERENCE_FILES[genome_ver], 'vcf') +# indexable VCF files +indexable_vcf_files = get_reference_output_files(REFERENCE_FILES[genome_ver], + file_type='vcf', + gzip = True) -# intialize reference files +# intialize reference files REFERENCE_FILES[genome_ver]['basedir'] = basedir reference_file_model = ReferenceMeta.parse_obj(REFERENCE_FILES[genome_ver]) @@ -51,17 +51,21 @@ hc_vcf_1kg_url = reference_file_model.hc_vcf_1kg mills_1kg_url = reference_file_model.mills_1kg known_indel_1kg_url = reference_file_model.known_indel_1kg vcf_1kg_url = reference_file_model.vcf_1kg +gnomad_url = reference_file_model.gnomad_variant +gnomad_tbi_url = reference_file_model.gnomad_variant_index cosmicdb_url = reference_file_model.cosmicdb wgs_calling_url = reference_file_model.wgs_calling genome_chrom_size_url = reference_file_model.genome_chrom_size refgene_txt_url = reference_file_model.refgene_txt refgene_sql_url = reference_file_model.refgene_sql +rankscore_url = reference_file_model.rankscore # add secrets from config to items that need them cosmicdb_url.secret=config['cosmic_key'] -check_md5 = os.path.join(basedir, "reference_" + str(current_day) + ".md5") +check_md5 = os.path.join(basedir, "reference.json.md5") +shell.executable("/bin/bash") shell.prefix("set -eo pipefail; ") def get_md5(filename): @@ -79,13 +83,11 @@ def create_md5(reference, check_md5): if os.path.isfile(value): fh.write( get_md5(value) + ' ' + value + '\n') - singularity_image = config['singularity']['image'] ########################################################## # Generating Reference files for BALSAMIC pipeline # Writing reference json file -# ########################################################## rule all: @@ -102,17 +104,22 @@ rule all: tg_high_vcf = hc_vcf_1kg_url.get_output_file+ ".gz", mills_1kg = mills_1kg_url.get_output_file + ".gz", known_indel_1kg = known_indel_1kg_url.get_output_file + ".gz", + gnomad_variant_vcf = gnomad_url.get_output_file, + gnomad_variant_index = gnomad_tbi_url.get_output_file, cosmic_vcf = cosmicdb_url.get_output_file + ".gz", - variants_idx = expand( os.path.join(vcf_dir,"{vcf}.gz.tbi"), vcf=VCF), + variants_idx = expand(os.path.join(vcf_dir,"{vcf}.gz.tbi"), vcf=indexable_vcf_files), vep = directory(vep_dir), wgs_calling = wgs_calling_url.get_output_file, - genome_chrom_size = genome_chrom_size_url.get_output_file + genome_chrom_size = genome_chrom_size_url.get_output_file, + rankscore = rankscore_url.get_output_file, output: finished = os.path.join(basedir,"reference.finished"), reference_json = os.path.join(basedir, "reference.json"), check_md5 = check_md5 log: os.path.join(basedir, "reference.json.log") + params: + genome_ver = genome_ver run: import json @@ -124,13 +131,16 @@ rule all: "1kg_snps_high": input.tg_high_vcf, "1kg_known_indel": input.known_indel_1kg, "mills_1kg": input.mills_1kg, + "gnomad_variant": input.gnomad_variant_vcf, "cosmic": input.cosmic_vcf, "exon_bed": input.refseq_bed, "refflat": input.refseq_flat, "refGene": input.refgene, "wgs_calling_interval": input.wgs_calling, "genome_chrom_size": input.genome_chrom_size, - "vep": input.vep + "vep": input.vep, + "genome": params.genome_ver, + "rankscore": input.rankscore, } with open(str(output.reference_json), "w") as fh: @@ -143,25 +153,27 @@ rule all: ########################################################## # Download the reference genome, variant db -# - .fasta, dbsnp.vcf, 1kg.vcf, refFlat ########################################################## -reference_data = [reference_genome_url, dbsnp_url, hc_vcf_1kg_url, mills_1kg_url, known_indel_1kg_url, vcf_1kg_url, -wgs_calling_url, genome_chrom_size_url, cosmicdb_url, refgene_txt_url, refgene_sql_url] +download_content = [reference_genome_url, dbsnp_url, hc_vcf_1kg_url, + mills_1kg_url, known_indel_1kg_url, vcf_1kg_url, + wgs_calling_url, genome_chrom_size_url, + gnomad_url, gnomad_tbi_url, + cosmicdb_url, refgene_txt_url, refgene_sql_url, rankscore_url] rule download_reference: output: - expand("{output}", output=[ref.get_output_file for ref in reference_data]) + expand("{output}", output=[ref.get_output_file for ref in download_content]) run: import requests - for ref in reference_data: + for ref in download_content: output_file = ref.get_output_file log_file = output_file + ".log" if ref.url.scheme == "gs": - cmd = "gsutil cp -L {}.log {} -".format(log_file, ref.url) + cmd = "export TMPDIR=/tmp; gsutil cp -L {} {} -".format(log_file, ref.url) else: - cmd = "wget -a {}.log -O - {}".format(log_file, ref.url) + cmd = "wget -a {} -O - {}".format(log_file, ref.url) if ref.secret: try: @@ -185,90 +197,92 @@ rule prepare_refgene: refgene_txt = refgene_txt_url.get_output_file, refgene_sql = refgene_sql_url.get_output_file params: - refgene_sql_awk = get_script_path('refseq_sql.awk'), - conda_env = get_conda_env(config["conda_env_yaml"], "bedtools") + refgene_sql_awk = get_script_path('refseq_sql.awk'), + conda_env = config["bioinfo_tools"].get("bedtools") output: refflat = refgene_txt_url.get_output_file.replace("txt", "flat"), bed = refgene_txt_url.get_output_file.replace("txt", "flat") + ".bed" log: refgene_sql = os.path.join(basedir, "genome", "refgene_sql.log"), refgene_txt = os.path.join(basedir, "genome", "refgene_txt.log") - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("bedtools") + ".sif").as_posix() shell: - "source activate {params.conda_env}; " - "header=$(awk -f {params.refgene_sql_awk} {input.refgene_sql}); " - "(echo \"$header\"; cat {input.refgene_txt};) " - "| csvcut -t -c chrom,exonStarts,exonEnds,name,score,strand,exonCount,txStart,txEnd,name2 " - "| csvformat -T " - "| bedtools expand -c 2,3 " - "| awk '$1~/chr[1-9]/ && $1!~/[_]/' | cut -c 4- | sort -k1,1 -k2,2n > {output.bed}; " - "awk -v OFS=\"\\t\" '$3!~/_/ {{ gsub(\"chr\",\"\",$3); $1=$13; print }}' {input.refgene_txt}" - "| cut -f 1-11 > {output.refflat}; " - "sed -i 's/chr//g' {input.refgene_txt}; " - "source deactivate; " + """ +source activate {params.conda_env}; +header=$(awk -f {params.refgene_sql_awk} {input.refgene_sql}); +(echo \"$header\"; cat {input.refgene_txt};) \ +| csvcut -t -c chrom,exonStarts,exonEnds,name,score,strand,exonCount,txStart,txEnd,name2 \ +| csvformat -T \ +| bedtools expand -c 2,3 \ +| awk '$1~/chr[1-9]/ && $1!~/[_]/' | cut -c 4- | sort -k1,1 -k2,2n > {output.bed}; + +awk -v OFS=\"\\t\" '$3!~/_/ {{ gsub(\"chr\",\"\",$3); $1=$13; print }}' {input.refgene_txt} \ +| cut -f 1-11 > {output.refflat}; +sed -i 's/chr//g' {input.refgene_txt}; + """ ########################################################## -# Bgzipping and tabix the vcf files -# +# bgzip and tabix the vcf files that are vcf ########################################################## rule bgzip_tabix: input: - os.path.join(vcf_dir, "{vcf}") + os.path.join(vcf_dir, "{vcf}.vcf") params: type = 'vcf', - conda_env = get_conda_env(config["conda_env_yaml"], "tabix") + conda_env = config["bioinfo_tools"].get("tabix") output: - os.path.join(vcf_dir, "{vcf}.gz"), - os.path.join(vcf_dir, "{vcf}.gz.tbi") + os.path.join(vcf_dir, "{vcf}.vcf.gz"), + os.path.join(vcf_dir, "{vcf}.vcf.gz.tbi") log: - os.path.join(vcf_dir, "{vcf}.gz_tbi.log") - singularity: singularity_image + os.path.join(vcf_dir, "{vcf}.vcf.gz_tbi.log") + singularity: Path(singularity_image, config["bioinfo_tools"].get("tabix") + ".sif").as_posix() shell: - "source activate {params.conda_env};" - "bgzip {input} && tabix -p {params.type} {input}.gz 2> {log};" - "source deactivate;" + """ +source activate {params.conda_env}; +bgzip {input} && tabix -p {params.type} {input}.gz 2> {log}; + """ ########################################################## # Create BWA Index for reference genome -# ########################################################## rule bwa_index: input: reference_genome_url.get_output_file params: - conda_env = get_conda_env(config["conda_env_yaml"], "bwa") + conda_env = config["bioinfo_tools"].get("bwa") output: expand(reference_genome_url.get_output_file + "{ext}", ext=['.amb','.ann','.bwt','.pac','.sa']) log: reference_genome_url.get_output_file + ".bwa_index.log" - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("bwa") + ".sif").as_posix() shell: - "source activate {params.conda_env};" - "bwa index -a bwtsw {input} 2> {log};" - "source deactivate;" + """ +source activate {params.conda_env}; +bwa index -a bwtsw {input} 2> {log}; + """ ########################################################## # Create index for fasta file - .fai -# ########################################################## rule samtools_index_fasta: input: reference_genome_url.get_output_file params: - conda_env = get_conda_env(config["conda_env_yaml"], "samtools") + conda_env = config["bioinfo_tools"].get("samtools") output: reference_genome_url.get_output_file + ".fai" log: reference_genome_url.get_output_file + ".faidx.log" - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("samtools") + ".sif").as_posix() shell: - "source activate {params.conda_env};" - "samtools faidx {input} 2> {log};" - "source deactivate;" + """ +source activate {params.conda_env}; +samtools faidx {input} 2> {log}; + """ ########################################################## @@ -280,18 +294,17 @@ rule picard_ref_dict: input: reference_genome_url.get_output_file params: - conda_env = get_conda_env(config["conda_env_yaml"], "picard") + conda_env = config["bioinfo_tools"].get("picard") output: reference_genome_url.get_output_file.replace("fasta","dict") log: reference_genome_url.get_output_file + ".ref_dict.log" - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() shell: - "source activate {params.conda_env};" - "picard CreateSequenceDictionary " - " REFERENCE={input} " - " OUTPUT={output} 2> {log};" - "source deactivate;" + """ +source activate {params.conda_env}; +picard CreateSequenceDictionary REFERENCE={input} OUTPUT={output} 2> {log}; + """ ########################################################## @@ -302,21 +315,22 @@ rule picard_ref_dict: rule vep_install: params: species = "homo_sapiens_merged", - assembly = "GRCh37", + assembly = "GRCh37" if genome_ver == 'hg19' else "GRCh38", plugins = "all", - conda_env = get_conda_env(config["conda_env_yaml"], "ensembl-vep") + conda_env = config["bioinfo_tools"].get("ensembl-vep") output: directory(vep_dir) log: os.path.join(vep_dir, "vep_install_cache.log") - singularity: singularity_image + singularity: Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() shell: - "source activate {params.conda_env};" - "vep_install --SPECIES {params.species} " - " --AUTO cfp " - " --ASSEMBLY {params.assembly} " - " --CACHEDIR {output} " - " --PLUGINS {params.plugins} " - " --NO_HTSLIB --CONVERT --NO_UPDATE 2> {log}; " - "source deactivate;" + """ +source activate {params.conda_env}; +vep_install --SPECIES {params.species} \ +--AUTO cfp \ +--ASSEMBLY {params.assembly} \ +--CACHEDIR {output} \ +--PLUGINS {params.plugins} \ +--NO_HTSLIB --CONVERT --NO_UPDATE 2> {log}; + """ diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 01f821ffc..52bd14b3e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,9 +1,51 @@ +[7.0.0] +------ + +Added +^^^^^ + +* ``balsamic init`` to download reference and related containers done in PRs #464 #538 +* ``balsamic config case`` now only take a cache path instead of container and reference #538 +* UMI workflow added to main workflow in series of PRs #469 #477 #483 #498 #503 #514 #517 +* DRAGEN for WGS applications in PR #488 +* A framework for QC check PR #401 +* ``--quiet``` option for ``run analysis`` PR #491 +* Benchmark SLURM jobs after the analysis is finished PR #534 +* One container per conda environment (i.e. decouple containers) PR #511 #525 #522 +* ``--disable-variant-caller`` command for ``report deliver`` PR #439 +* Added genmod and rankscore in series of two PRs #531 and #533 +* Variant filtering to Tumor-Normal in PR #534 +* Split SNV/InDels and SVs from TNScope variant caller PR #540 +* WGS Tumor only variant filters added in PR #548 + +Changed +^^^^^^^ + +* Update Manta to 1.6.0 PR #470 +* Update FastQC to 0.11.9 PR #532 +* Update BCFTools to 1.11 PR #537 +* Update Samtools to 1.11 PR #537 +* Increase resources and runtime for various workflows in PRs #482 +* Python package dependenicies versions fixed in PR #480 +* QoL changes to workflow in series of PR #471 +* Series of documentation updates in PRs #489 #553 +* QoL changes to scheduler script PR #491 +* QoL changes to how temporary directories are handlded PR #516 +* TNScope model apply rule merged with TNScope variant calling for tumor-normal in WGS #540 +* Decoupled ``fastp`` rule into two rules to make it possible to use it for UMI runs #570 + + +Fixed +^^^^^ + +* A bug in Manta variant calling rules that didn't name samples properly to TUMOR/NORMAL in the VCF file #572 + + [6.1.2] ------- Changed ^^^^^^^ - * Changed hk delivery tag for coverage-qc-report @@ -13,6 +55,20 @@ Changed Fixed ^^^^^ +* No UMI trimming for WGS applications #486 +* Fixed a bug where BALSAMIC was checking for sacct/jobid file in local mode PR #497 +* ``readlink`` command in ``vep_germline``, ``vep_somatic``, ``split_bed``, and ``GATK_popVCF`` #533 +* Fix various bugs for memory handling of Picardtools and its executable in PR #534 +* Fixed various issues with ``gsutils`` in PR #550 + +Removed +^^^^^^^ + +* ``gatk-register`` command removed from installing GATK PR #496 + +[6.1.1] +------- + * Fixed a bug with missing QC templates after ``pip install`` @@ -24,6 +80,7 @@ Added * CLI option to expand report generation for TGA and WES runs. Please see ``balsamic report deliver --help`` * BALSAMIC now generates a custom HTML report for TGA and WES cases. + [6.0.4] ------- diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..5e21ffa0a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +FROM continuumio/miniconda3:4.8.2-alpine + +LABEL base_image="continuumio/miniconda3:4.8.2-alpine" +LABEL about.home="https://github.com/Clinical-Genomics/BALSAMIC" +LABEL about.documentation="https://balsamic.readthedocs.io/" +LABEL about.license="MIT License (MIT)" +LABEL about.maintainer="Hassan Foroughi hassan dot foroughi at scilifelab dot se" +LABEL about.description="Bioinformatic analysis pipeline for somatic mutations in cancer" +LABEL about.version="6.0.2" + +ENV PATH="/opt/conda/bin/:${PATH}" + +ARG CONTAINER_NAME + +# Copy all project files +COPY BALSAMIC/containers/${CONTAINER_NAME}/${CONTAINER_NAME}.yaml ./${CONTAINER_NAME}.yaml +COPY BALSAMIC/containers/${CONTAINER_NAME}/${CONTAINER_NAME}.sh ./${CONTAINER_NAME}.sh + +USER root + +RUN apk add --no-cache bash +RUN /bin/sh ${CONTAINER_NAME}.sh ${CONTAINER_NAME} && conda clean --all --yes diff --git a/MANIFEST.in b/MANIFEST.in index 421d6836f..a5d543699 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -13,6 +13,7 @@ recursive-include BALSAMIC/assets/scripts * recursive-include BALSAMIC/assets/sentieon_models * recursive-include BALSAMIC/assets/report_template * include BALSAMIC/assets/scout_config_template.yaml +recursive-include BALSAMIC/assets/vcfanno * # SnakeMake assets recursive-include BALSAMIC/snakemake_rules * diff --git a/docs/cli_package.rst b/docs/cli_package.rst index 311eb972f..e446f96a7 100644 --- a/docs/cli_package.rst +++ b/docs/cli_package.rst @@ -1,6 +1,6 @@ -===================== -CLI code reference -===================== +============= +CLI reference +============= .. click:: BALSAMIC.commands.base:cli :prog: BALSAMIC diff --git a/docs/cli_reference.rst b/docs/cli_reference.rst deleted file mode 100644 index b8906bafd..000000000 --- a/docs/cli_reference.rst +++ /dev/null @@ -1,206 +0,0 @@ -============= -CLI reference -============= - -BALSAMIC (**version** = 6.1.2) is using Click. This section covers the CLI reference for BALSAMIC. - -.. contents:: - -Short summary -------------- -Base command, ``balsamic`` has three subcommands: 1) ``install_env`` which is used for installting -conda environemnts 2) ``create_config`` is to create a config file -necessary for running the analysis. 3) ``run_analysis`` is for running -the actual workflow. - -Base command -~~~~~~~~~~~~ - -:: - - Usage: balsamic [OPTIONS] COMMAND [ARGS]... - - BALSAMIC 6.1.2: Bioinformatic Analysis pipeLine for SomAtic MutatIons in - Cancer - - Options: - --loglevel [DEBUG|INFO|WARNING|ERROR|CRITICAL] - Set the level of log output. [default: - DEBUG] - --version Show the version and exit. - --help Show this message and exit. - - Commands: - config create config files required for running the pipeline. - plugins Additional and helper utilities for third party applications - run Run BALSAMIC on a provided config file - - -create config for case analysis -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:: - - Usage: balsamic config [OPTIONS] COMMAND [ARGS]... - - create config files required for running the pipeline. - - Options: - --help Show this message and exit. - - Commands: - case Create a sample config file from input sample data - reference config workflow for generate reference - -:: - - Usage: balsamic config case [OPTIONS] - - Prepares a config file for balsamic run_analysis. For now it is just - treating json as dictionary and merging them as it is. So this is just a - placeholder for future. - - Options: - --umi / --no-umi UMI processing steps for samples with umi - tags [default: True] - --umi-trim-length INTEGER Trim N bases from reads in fastq [default: - 5] - --quality-trim / --no-quality-trim - Trim low quality reads in fastq [default: - True] - --adapter-trim / --no-adapter-trim - Trim adapters from reads in fastq [default: - False] - -r, --reference-config PATH Reference config file. [required] - -p, --panel-bed PATH Panel bed file for variant calling. - -o, --output-config TEXT Output a json config filename ready to be - imported for run-analysis - -t, --tumor TEXT Fastq files for tumor sample. - Example: if files are - tumor_fqreads_1.fastq.gz - tumor_fqreads_2.fastq.gz, the - input should be --tumor tumor_fqreads - [required] - -n, --normal TEXT Fastq files for normal sample. - Example: if files are - normal_fqreads_1.fastq.gz - normal_fqreads_2.fastq.gz, the - input should be --normal normal_fqreads - --case-id TEXT Sample id that is used for reporting, - naming the analysis jobs, and analysis path - [required] - --fastq-prefix TEXT Prefix to fastq file. The - string that comes after readprefix - --analysis-dir PATH Root analysis path to store - analysis logs and results. The final path - will be analysis-dir/sample-id - --overwrite-config / --no-overwrite-config - Overwrite output config file - --create-dir / --no-create-dir Create analysis directiry. - --singularity PATH Download singularity image for BALSAMIC - [required] - --help Show this message and exit. - -:: - - Usage: balsamic config reference [OPTIONS] - - Configure workflow for reference generation - - Options: - -o, --outdir TEXT output directory for ref files eg: reference - [required] - -c, --cosmic-key TEXT cosmic db authentication key [required] - -s, --snakefile PATH snakefile for reference generation [default: /home/h - assan.foroughi/repos/BALSAMIC/BALSAMIC/workflows/Gene - rateRef] - -d, --dagfile TEXT DAG file for overview [default: - generate_ref_worflow_graph] - --singularity PATH Download singularity image for BALSAMIC [required] - --help Show this message and exit. - -run case analysis and reference creation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:: - - Usage: balsamic run [OPTIONS] COMMAND [ARGS]... - - Run BALSAMIC on a provided config file - - Options: - --help Show this message and exit. - - Commands: - analysis Run the analysis on a provided sample config-file - reference Run the GenerateRef workflow - -:: - - Usage: balsamic run analysis [OPTIONS] - - Runs BALSAMIC workflow on the provided sample's config file - - Options: - -a, --analysis-type [qc|paired|single] - Type of analysis to run from input config - file. By default it will read - from config file, but it will override - config file if it is set here. - -S, --snake-file PATH Input for a custom snakefile. WARNING: This - is for internal testing, and - should not be used. Providing a snakefile - supersedes analysis_type option. - -s, --sample-config PATH Sample json config file. [required] - --run-mode [local|slurm] Run mode to use. By default SLURM will be - used to run the analysis. But - local runner also available for local - computing [default: slurm] - -c, --cluster-config PATH SLURM config json file. [default: /home/has - san.foroughi/repos/BALSAMIC/BALSAMIC/config/ - cluster.json] - -l, --log-file PATH Log file output for BALSAMIC. - This is raw log output from snakemake. - -r, --run-analysis By default balsamic run_analysis will run in - dry run mode. Raise thise flag - to make the actual analysis [default: - False] - --qos [low|normal|high] QOS for sbatch jobs. Passed to /home/hassan. - foroughi/repos/BALSAMIC/BALSAMIC/commands/ru - n/sbatch.py [default: low] - -f, --force-all Force run all analysis. This is same as - snakemake --forceall [default: False] - --snakemake-opt TEXT Pass these options directly to snakemake - --slurm-account TEXT SLURM account to run jobs - --slurm-mail-user TEXT SLURM mail user to send out email. - --slurm-mail-type [NONE|BEGIN|END|FAIL|REQUEUE|ALL|TIME_LIMIT] - SLURM mail type to send out email. - This will be applied to all jobs and - override snakemake settings. - --help Show this message and exit. - -:: - - Usage: balsamic run reference [OPTIONS] - - Run generate reference workflow - - Options: - -s, --snakefile TEXT snakefile for reference generation - -c, --configfile TEXT Config file to run the workflow [required] - --run-mode [slurm|local] Run mode to use.(LOCAL, SLURM for HPC) - --cluster-config PATH SLURM config json file. [default: /home/hassan.fo - roughi/repos/BALSAMIC/BALSAMIC/config/cluster.json - ] - -l, --log-file PATH Log file output for BALSAMIC. This is raw log - output from snakemake. - -r, --run-analysis By default balsamic run_analysis will run in dry - run mode. Raise thise flag to make - the actual analysis [default: False] - --qos [low|normal|high] QOS for sbatch jobs. Passed to /home/hassan.foroug - hi/repos/BALSAMIC/BALSAMIC/commands/run/sbatch.py - [default: low] - -f, --force-all Force run all analysis. This is same as snakemake - --forceall [default: False] - --snakemake-opt TEXT Pass these options directly to snakemake - --help Show this message and exit. diff --git a/docs/conf.py b/docs/conf.py index 0f04154f3..08d0d05df 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -47,7 +47,9 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +# html_theme = 'sphinx_rtd_theme' +html_logo = "../BALSAMIC/assets/balsamic_logo.png" +html_theme = 'furo' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/docs/index.rst b/docs/index.rst index d4ae512a4..2a69ce25c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,14 +20,12 @@ .. toctree:: - :caption: API and CLI reference + :caption: CLI reference :name: api_cli_reference :hidden: :maxdepth: 1 - cli_reference cli_package - source/modules .. toctree:: :caption: Other Info diff --git a/docs/install.rst b/docs/install.rst index 77c790630..3b2e931fd 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -4,78 +4,57 @@ Installation This section describes steps to install BALSAMIC (**version** = 4.1.0) -.. contents:: + Software Requirments ~~~~~~~~~~~~~~~~~~~~ - Conda >=version 4.5.0: For detailed software and python requirments please see ``requirments.txt`` and ``BALSAMIC/conda/balsamic.yaml`` -- Singularity >=version 3.0.0: BALSAMIC uses singularity to run vairous parts of the workflow. Either a container has to - be built matching the BALSAMIC version from ``BALSAMIC/containers/Dockerfile.latest`` or one can pull Singularity -container from Docker using: ``singularity pull path_container_file docker://hassanf/balsamic:tag`` - +- Singularity >=version 3.0.0: BALSAMIC uses singularity to run vairous parts of the workflow. - Python 3.6 -Manual Installation -~~~~~~~~~~~~~~~~~~~ +Step 1. Installing BALSAMIC +~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1. Create a conda environment using ``BALSAMIC/conda/balsamic.yaml``: :: - conda env create --file BALSAMIC/conda/balsamic.yaml -n name_of_environment - - -You can also set conda environment prefix via ``--prefix`` option. Consult conda documentation for further instructions. - -2. Install BALSAMIC using ``pip`` within the newly created environment: ``pip install -r requirements.txt -e .`` - -3. Pull container using Singularity: ``singularity pull path_container_file docker://hassanf/balsamic`` + conda env create --file BALSAMIC/conda/balsamic.yaml --name balsamic -If you'd like to install release 5.0.0 the instruction will look like below: +2. Activate environment: :: - # Checkout a specific tag or branch. If you'd like to install latest changes, use master branch - git checkout v5.0.0 + conda activate balsamic - # Create a conda env: balsamic_base - conda env create --file BALSAMIC/conda/balsamic.yaml --name balsamic_base - # Activate conda environment - source activate balsamic_base - # If you don't want to install in editable mode, remove `-e` - pip install -r requirements.txt -e . - - # Pull container for release_v5.0.0 - singularity pull balsamic_release_v5.0.0 docker://hassanf/balsamic:release_v5.0.0 +3. Install BALSAMIC using ``pip`` within the newly created environment: +:: -Automatic Installation -~~~~~~~~~~~~~~~~~~~~~~3 + pip install -r requirements.txt -e . -NOTE: The following instructions are for internal use only. -Use ``install.sh`` script, assuming `${CONDA_ENVS_PATH}` is set to the path for conda environment: +Step 2. generate BALSAMIC cache and pull containers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -:: - - ./install.sh -s S -p path_to_conda_env_location -P path_to_container_store_location -c +First generate your own COSMIC database key via: https://cancer.sanger.ac.uk/cosmic/help/file_download +The following commands will create and download reference directory at ``~/balsamic_cache`` (change this path if you +want it to be created in another location): -In above example, the final conda environment will be named: `S_BALSAMIC` +NOTE: This process can take couple of hours :: - USAGE: ../install.sh [-s _condaprefix -v _balsamic_ver -p _condapath -c] - 1. Conda naming convention: [P,D,S]_[ENVNAME]_%DATE. P: Production, D: Development, S: Stage - 2. Conda environment prefix: Path to conda env. e.g. /home/user/conda_env/ - - -s _condaprefix Conda env name prefix. This will be P or D in the help above - -v _balsamic_ver Balsamic version tag to install (4.0.0+), or it could be the branch name - -e _envsuffix Balsamic conda env suffix. This will be added to the conda env name - -p _condapath Conda env path prefix. See point 2 in help above - -P _containerpath Container path to store container files. Default set to current directory - -c If set it will use Singularity container for conda instead + # Note: + # 1. COSMIC key is in variable $COSMIC_KEY + # 2. For genome version hg38, set --genome-version to hg38 + + balsamic init --outdir ~/balsamic_cache \ + --cosmic-key "${COSMIC_KEY}" \ + --genome-version hg19 \ + --run-analysis diff --git a/docs/requirements.txt b/docs/requirements.txt index 2ab6c6cfe..16f0d3bae 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -7,3 +7,4 @@ sphinx-argparse sphinx-click sphinx_rtd_theme sphinxcontrib-napoleon +furo diff --git a/docs/resources.rst b/docs/resources.rst index db867c07d..b9dbe305e 100644 --- a/docs/resources.rst +++ b/docs/resources.rst @@ -1,7 +1,7 @@ =============== Other resources =============== -.. contents:: + Resources --------- diff --git a/docs/snakemake_etiquette.rst b/docs/snakemake_etiquette.rst index 643abcb46..462a93ce9 100644 --- a/docs/snakemake_etiquette.rst +++ b/docs/snakemake_etiquette.rst @@ -1,12 +1,16 @@ -======================== +=================== Snakemake Etiquette -======================== +=================== -The bioinformatics core analysis in BALSAMIC is defined by set of rules written as a Snakemake rules (``*.rule``) and Snakemake workflow as (``*.smk``). Main ``balsamic.smk`` workflow uses these rules to create sets of output files from sets of input files. Using ``{wildcards}`` Snakemake can automatically determine the dependencies between the rules by matching file names. The following guidelines describe the general conventions for naming and order of the rules, while writing a Snakemake file in BALSAMIC. For further description of how Snakemake works, please refer to Snakemake official documentation: https://snakemake.readthedocs.io/ +The bioinformatics core analysis in BALSAMIC is defined by set of rules written as a Snakemake rules (``*.rule``) and Snakemake +workflow as (``*.smk``). Main ``balsamic.smk`` workflow uses these rules to create sets of output files from sets of input files. +Using ``{wildcards}`` Snakemake can automatically determine the dependencies between the rules by matching file names. The +following guidelines describe the general conventions for naming and order of the rules, while writing a Snakemake file in +BALSAMIC. For further description of how Snakemake works, please refer to Snakemake official documentation: https://snakemake.readthedocs.io/ **Structure of Snakemake rules** -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :: @@ -40,7 +44,7 @@ The bioinformatics core analysis in BALSAMIC is defined by set of rules written """ **Descriptions** -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~ **rulename**: Rule name briefly should outline the program and functions utilized inside the rule. Each word is seperated by a underscore ``_``. First word is the bioinformatic tool or script's name. The following words describe subcommand within that bioinformatic tool and then followed by workflow specific description. The word length shouldn't exceed more than 3 or 4 words. Make sure rule names are updated within ``config/cluster.json`` and it is all lowercase. Examples: ``picard_collecthsmetrics_umi``, ``bcftools_query_calculateaftable_umi`` @@ -64,38 +68,38 @@ Example: :: -java -jar \ --Djava.io.tmpdir=${{tmpdir}} \ --Xms8G -Xmx16G \ -$CONDA_PREFIX/share/picard.jar \ -MarkDuplicates \ -{input.named_input_1} \ -{output.named_output_1}; + java -jar \ + -Djava.io.tmpdir=${{tmpdir}} \ + -Xms8G -Xmx16G \ + $CONDA_PREFIX/share/picard.jar \ + MarkDuplicates \ + {input.named_input_1} \ + {output.named_output_1}; Example for external python scripts that can be saved as modules in ``utils/*.py`` and can use them as definitions in rules as: :: - from BALSAMIC.utils.workflowscripts import get_densityplot - get_densityplot(input.named_input1, params.named_params_1, output.named_output1 ) + from BALSAMIC.utils.workflowscripts import get_densityplot + get_densityplot(input.named_input1, params.named_params_1, output.named_output1 ) Similarly ``awk`` or ``R`` external scripts can be saved in ``assets/scripts/*awk`` and can be invoked using `get_script_path` as: :: - params: - consensusfilter_script = get_script_path("FilterDuplexUMIconsensus.awk") - shell: - """ - samtools view -h {input} | \ - awk -v MinR={params.minreads} \ - -v OFS=\'\\t\' -f {params.consensusfilter_script} | \ - samtools view -bh - > {output} - """ + params: + consensusfilter_script = get_script_path("FilterDuplexUMIconsensus.awk") + shell: + """ + samtools view -h {input} | \ + awk -v MinR={params.minreads} \ + -v OFS=\'\\t\' -f {params.consensusfilter_script} | \ + samtools view -bh - > {output} + """ **References** -~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~ 1. https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html 2. https://snakemake.readthedocs.io/en/stable/snakefiles/writing_snakefiles.html diff --git a/docs/source/BALSAMIC.assets.rst b/docs/source/BALSAMIC.assets.rst deleted file mode 100644 index c3c0acaee..000000000 --- a/docs/source/BALSAMIC.assets.rst +++ /dev/null @@ -1,10 +0,0 @@ -BALSAMIC.assets package -======================= - -Module contents ---------------- - -.. automodule:: BALSAMIC.assets - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/BALSAMIC.commands.config.rst b/docs/source/BALSAMIC.commands.config.rst deleted file mode 100644 index 9668ad099..000000000 --- a/docs/source/BALSAMIC.commands.config.rst +++ /dev/null @@ -1,38 +0,0 @@ -BALSAMIC.commands.config package -================================ - -Submodules ----------- - -BALSAMIC.commands.config.base module ------------------------------------- - -.. automodule:: BALSAMIC.commands.config.base - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.commands.config.case module ------------------------------------- - -.. automodule:: BALSAMIC.commands.config.case - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.commands.config.reference module ------------------------------------------ - -.. automodule:: BALSAMIC.commands.config.reference - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: BALSAMIC.commands.config - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/BALSAMIC.commands.plugins.rst b/docs/source/BALSAMIC.commands.plugins.rst deleted file mode 100644 index bd6792c6b..000000000 --- a/docs/source/BALSAMIC.commands.plugins.rst +++ /dev/null @@ -1,38 +0,0 @@ -BALSAMIC.commands.plugins package -================================= - -Submodules ----------- - -BALSAMIC.commands.plugins.base module -------------------------------------- - -.. automodule:: BALSAMIC.commands.plugins.base - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.commands.plugins.cov\_plot module ------------------------------------------- - -.. automodule:: BALSAMIC.commands.plugins.cov_plot - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.commands.plugins.scout module --------------------------------------- - -.. automodule:: BALSAMIC.commands.plugins.scout - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: BALSAMIC.commands.plugins - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/BALSAMIC.commands.report.rst b/docs/source/BALSAMIC.commands.report.rst deleted file mode 100644 index 518036958..000000000 --- a/docs/source/BALSAMIC.commands.report.rst +++ /dev/null @@ -1,38 +0,0 @@ -BALSAMIC.commands.report package -================================ - -Submodules ----------- - -BALSAMIC.commands.report.base module ------------------------------------- - -.. automodule:: BALSAMIC.commands.report.base - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.commands.report.deliver module ---------------------------------------- - -.. automodule:: BALSAMIC.commands.report.deliver - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.commands.report.status module --------------------------------------- - -.. automodule:: BALSAMIC.commands.report.status - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: BALSAMIC.commands.report - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/BALSAMIC.commands.rst b/docs/source/BALSAMIC.commands.rst deleted file mode 100644 index cbc291cc4..000000000 --- a/docs/source/BALSAMIC.commands.rst +++ /dev/null @@ -1,33 +0,0 @@ -BALSAMIC.commands package -========================= - -Subpackages ------------ - -.. toctree:: - :maxdepth: 4 - - BALSAMIC.commands.config - BALSAMIC.commands.plugins - BALSAMIC.commands.report - BALSAMIC.commands.run - -Submodules ----------- - -BALSAMIC.commands.base module ------------------------------ - -.. automodule:: BALSAMIC.commands.base - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: BALSAMIC.commands - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/BALSAMIC.commands.run.rst b/docs/source/BALSAMIC.commands.run.rst deleted file mode 100644 index 9886c2fb3..000000000 --- a/docs/source/BALSAMIC.commands.run.rst +++ /dev/null @@ -1,46 +0,0 @@ -BALSAMIC.commands.run package -============================= - -Submodules ----------- - -BALSAMIC.commands.run.analysis module -------------------------------------- - -.. automodule:: BALSAMIC.commands.run.analysis - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.commands.run.base module ---------------------------------- - -.. automodule:: BALSAMIC.commands.run.base - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.commands.run.reference module --------------------------------------- - -.. automodule:: BALSAMIC.commands.run.reference - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.commands.run.scheduler module --------------------------------------- - -.. automodule:: BALSAMIC.commands.run.scheduler - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: BALSAMIC.commands.run - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/BALSAMIC.rst b/docs/source/BALSAMIC.rst deleted file mode 100644 index 43381b438..000000000 --- a/docs/source/BALSAMIC.rst +++ /dev/null @@ -1,20 +0,0 @@ -BALSAMIC package -================ - -Subpackages ------------ - -.. toctree:: - :maxdepth: 4 - - BALSAMIC.assets - BALSAMIC.commands - BALSAMIC.utils - -Module contents ---------------- - -.. automodule:: BALSAMIC - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/BALSAMIC.utils.rst b/docs/source/BALSAMIC.utils.rst deleted file mode 100644 index 49586ea66..000000000 --- a/docs/source/BALSAMIC.utils.rst +++ /dev/null @@ -1,46 +0,0 @@ -BALSAMIC.utils package -====================== - -Submodules ----------- - -BALSAMIC.utils.cli module -------------------------- - -.. automodule:: BALSAMIC.utils.cli - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.utils.exc module -------------------------- - -.. automodule:: BALSAMIC.utils.exc - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.utils.models module ----------------------------- - -.. automodule:: BALSAMIC.utils.models - :members: - :undoc-members: - :show-inheritance: - -BALSAMIC.utils.rule module --------------------------- - -.. automodule:: BALSAMIC.utils.rule - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: BALSAMIC.utils - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/modules.rst b/docs/source/modules.rst deleted file mode 100644 index 38ad2403a..000000000 --- a/docs/source/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -BALSAMIC -======== - -.. toctree:: - :maxdepth: 4 - - BALSAMIC diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 4e04e6190..02c163e16 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -4,36 +4,10 @@ Short tutorial Here a short toturial is provided for BALSAMIC (**version** = 6.1.2). -.. contents:: +Running a test sample +--------------------- -Step 1. generate a reference ----------------------------- - - -First reference files must be downloaded. Let's assume BALSAMIC is installed and available at `D_BALSAMIC-base_5.0.0`, -and a COSMIC key is generated via: https://cancer.sanger.ac.uk/cosmic/help/file_download - -The following commands will create and download reference directory at `./BALSAMIC_reference` (change this path if you -want it to be created in another location): - -:: - - cd BALSAMIC - - balsamic config reference \ - --cosmic-key ${COSMIC_KEY} \ - --outdir ./BALSAMIC_reference \ - --singularity BALSAMIC/containers/BALSAMIC_latest.sif - - # This might couple of hours - balsamic run reference --configfile reference/config.json --run-analysis --snakemake-opt "--cores 1" - - -A `json` file with reference specificaions is created at: `BALSAMIC_reference/hg19/reference.json` - -Step 2. Running a test sample ------------------------------ -Now a config file for a test run must be created. Let's use the test data in `tests` directory: +Given the :: @@ -43,14 +17,14 @@ Now a config file for a test run must be created. Let's use the test data in `te --case-id demo_run_balsamic \ --analysis-dir demo/ \ --panel-bed tests/test_data/references/panel/panel.bed \ - --reference-config BALSAMIC_reference/GRCh37/reference.json \ - --singularity BALSAMIC/containers/BALSAMIC_latest.sif \ - --output-config demo_run_balsamic.json + --balsamic-cache ~/balsamic_cache \ + --quiet + Notes: -- If you want to test tumor_only mode, remove the `--normal tests/test_data/fastq/S2_R_1.fastq.gz` line. -- `--output-config demo_run_balsamic.json` is also optional +- If you want to test tumor_only mode, remove the ``--normal tests/test_data/fastq/S2_R_1.fastq.gz`` line. +- ``--output-config demo_run_balsamic.json`` is also optional Let's try a dry run and see everything is in place: @@ -97,7 +71,7 @@ Command above should exit a similar output as below: 72 This was a dry-run (flag -n). The order of jobs does not reflect the order of execution. -And now run balsamic through SLURM. Make sure you set your SLURM project account using `--account` if your local +And now run balsamic through SLURM. Make sure you set your SLURM project account using ``--account`` if your local settings require it: :: @@ -105,7 +79,7 @@ settings require it: balsamic run analysis --sample-config demo/demo_run_balsamic/demo_run_balsamic.json \ --profile slurm --qos low --account development --run-analysis -And now run balsamic through QSUB. Make sure you set your QSUB project account using `--account` if your local +And now run balsamic through QSUB. Make sure you set your QSUB project account using ``--account`` if your local settings require it: :: diff --git a/requirements-dev.txt b/requirements-dev.txt index a5e9ea447..8a204e0ae 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ pytest>=3.7.1 pytest-cov==2.8.1 -coveralls==2.0.0 +coveralls pylint diff --git a/requirements.txt b/requirements.txt index ac2ecca33..38b48c2df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,18 +1,22 @@ -click -pyyaml -yapf -graphviz -pygments -coloredlogs -psutil -snakemake==5.13.0 -gsutil -six>=1.12.0 -jinja2 -networkx -colorclass -pydantic -numpy +click>=7.1.2 +colorclass>=2.2.0 +coloredlogs>=14.0 cyvcf2<0.10.0 +graphviz>=0.14 +gsutil==4.50 +jinja2>=2.11.2 +matplotlib>=3.3.0 +networkx>=2.4 +numpy>=1.19.2 +pandas>1.1.0 +psutil>=5.7.0 +pydantic>=1.5.1 +pygments>=2.6.1 +pyyaml>=5.3.1 +six>=1.12.0 +snakemake==5.13.0 +yapf>=0.30.0 +h5py>=3.1.0 +PyPDF2>=1.26.0 markdown==3.3.3 cryptography<3.4 diff --git a/run_validate.sh b/run_validate.sh index 81638d834..6bbdc1eb2 100755 --- a/run_validate.sh +++ b/run_validate.sh @@ -74,8 +74,7 @@ mkdir -p ${_analysis_dir} _genome_ver=hg19 _cluster_config=BALSAMIC/config/cluster.json -_singularity=BALSAMIC/containers/BALSAMIC_latest.sif -_reference=reference/${_genome_ver}/reference.json +_balsamic_cache=balsamic_cache _tumor_fastq=tests/test_data/fastq/S1_R_1.fastq.gz _normal_fastq=tests/test_data/fastq/S2_R_1.fastq.gz _analysis_config=${_analysis_dir}'/'${_analysis}_${_ngstype}'/'${_analysis}_${_ngstype}'.json' @@ -91,20 +90,22 @@ else fi function balsamic_config() { +set -x balsamic config case \ -t ${_tumor_fastq} \ ${_normal_option} \ --case-id ${_analysis}_${_ngstype} \ --analysis-dir ${_analysis_dir} \ - -r ${_reference} \ ${_panel_option} \ - --singularity ${_singularity} + --balsamic-cache ${_balsamic_cache} } balsamic_run() { balsamic run analysis \ -s ${_analysis_config} \ -c ${_cluster_config} \ + -a qc \ + --benchmark \ --account development ${_run_analysis} } diff --git a/setup.py b/setup.py index 688996eed..d0af029d5 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ install_requires=install_requires, packages=find_packages(), package_data={ - "assets": ["scripts/*R", "sentieon_models/*model"], + "assets": ["scripts/*R", "sentieon_models/*model", "vcfanno/*.toml"], "config": ["*.json"] }, include_package_data=True, diff --git a/tests/commands/config/test_config_reference.py b/tests/commands/config/test_config_reference.py deleted file mode 100644 index aa8898f7a..000000000 --- a/tests/commands/config/test_config_reference.py +++ /dev/null @@ -1,58 +0,0 @@ -import graphviz -from pathlib import Path -from unittest import mock - - -def test_config_reference_write_json(invoke_cli, tmp_path, - singularity_container): - # Given test_reference.json - test_new_dir = tmp_path / "test_reference_dir" - test_new_dir.mkdir() - - # WHEN creating config.json in reference dir - test_output_reference_config = test_new_dir / "config.json" - test_output_reference_pdf = test_new_dir / "generate_ref_worflow_graph.pdf" - - result = invoke_cli([ - 'config', 'reference', '-c', 'secret_key', '--singularity', - singularity_container, '-o', - str(test_new_dir) - ]) - - # THEN output config and pdf file generate and command exit code 0 - assert result.exit_code == 0 - assert Path(test_output_reference_pdf).exists() - assert Path(test_output_reference_config).exists() - - -def test_config_reference_no_write_perm( - tmp_path, invoke_cli, singularity_container, no_write_perm_path): - # Given a path with no write permission - test_new_dir = str(no_write_perm_path) - - # WHEN invoking config sample - result = invoke_cli([ - 'config', 'reference', '-c', 'secret_key', '--singularity', - singularity_container, '-o', - str(test_new_dir) - ]) - - # THEN it should create test_reference.json and exist with no error - assert result.exit_code == 1 - - -def test_config_reference_exception(invoke_cli, tmp_path, - singularity_container): - # Given test_reference.json - test_new_dir = tmp_path / "test_reference_dir" - test_new_dir.mkdir() - - with mock.patch.object(graphviz, 'Source') as mocked: - mocked.return_value = None - result = invoke_cli([ - 'config', 'reference', '-c', 'secret_key', '--singularity', - singularity_container, '-o', - str(test_new_dir) - ]) - - assert result.exit_code == 1 diff --git a/tests/commands/config/test_config_sample.py b/tests/commands/config/test_config_sample.py index 8d6f25781..80eb1ac55 100644 --- a/tests/commands/config/test_config_sample.py +++ b/tests/commands/config/test_config_sample.py @@ -1,22 +1,101 @@ import os -import re import json -import pytest -import click -from pathlib import Path from unittest import mock + +import graphviz + +from pathlib import Path from click.testing import CliRunner from BALSAMIC.commands.base import cli -def test_dag_graph_success( - tumor_normal_wgs_config, - tumor_only_config, - tumor_normal_config, - tumor_only_wgs_config, -): - # WHEN creating config using standard CLI input +def test_tumor_normal_config(invoke_cli, sample_fastq, tmp_path, + balsamic_cache, panel_bed_file, + sentieon_license, sentieon_install_dir): + # GIVEN a case ID, fastq files, and an analysis dir + test_analysis_dir = tmp_path / "test_analysis_dir" + test_analysis_dir.mkdir() + case_id = "sample_tumor_normal" + tumor = sample_fastq["tumor"] + normal = sample_fastq["normal"] + + # WHEN creating a case analysis + with mock.patch.dict( + "os.environ", { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + result = invoke_cli( + [ + "config", + "case", + "-p", + panel_bed_file, + "-t", + tumor, + "-n", + normal, + "--case-id", + case_id, + "--analysis-dir", + test_analysis_dir, + "--balsamic-cache", + balsamic_cache, + "--tumor-sample-name", + "ACC1", + "--normal-sample-name", + "ACC2", + ], + ) + + # THEN a config should be created and exist + assert result.exit_code == 0 + assert Path(test_analysis_dir, case_id, case_id + ".json").exists() + + +def test_tumor_only_config(invoke_cli, sample_fastq, tmp_path, + balsamic_cache, panel_bed_file, + sentieon_license, sentieon_install_dir): + # GIVEN a case ID, fastq files, and an analysis dir + test_analysis_dir = tmp_path / "test_analysis_dir" + test_analysis_dir.mkdir() + case_id = "sample_tumor_only" + tumor = sample_fastq["tumor"] + + # WHEN creating a case analysis + with mock.patch.dict( + "os.environ", { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + result = invoke_cli( + [ + "config", + "case", + "-p", + panel_bed_file, + "-t", + tumor, + "--case-id", + case_id, + "--analysis-dir", + test_analysis_dir, + "--balsamic-cache", + balsamic_cache, + "--tumor-sample-name", + "ACC1", + ], + ) + + # THEN a config should be created and exist + assert result.exit_code == 0 + assert Path(test_analysis_dir, case_id, case_id + ".json").exists() + +def test_dag_graph_success(tumor_normal_wgs_config, tumor_only_config, + tumor_normal_config, tumor_only_wgs_config, + tumor_only_umi_config): + # WHEN creating config using standard CLI input and setting Sentieon env vars # THEN DAG graph should be created successfully assert Path(json.load( open(tumor_normal_config))["analysis"]["dag"]).exists() @@ -25,14 +104,16 @@ def test_dag_graph_success( open(tumor_only_wgs_config))["analysis"]["dag"]).exists() assert Path(json.load( open(tumor_normal_wgs_config))["analysis"]["dag"]).exists() + assert Path(json.load( + open(tumor_only_umi_config))["analysis"]["dag"]).exists() def test_tumor_only_config_bad_filename( - tmp_path_factory, - analysis_dir, - singularity_container, - panel_bed_file, - reference_json, + invoke_cli, + tmp_path_factory, + analysis_dir, + panel_bed_file, + balsamic_cache, ): # GIVEN existing fastq file with wrong naming convention @@ -43,9 +124,7 @@ def test_tumor_only_config_bad_filename( tumor = Path(faulty_fastq_dir / "error.fastq.gz").as_posix() # Invoke CLI command using file as argument - runner = CliRunner() - result = runner.invoke( - cli, + result = invoke_cli( [ "config", "case", @@ -57,10 +136,8 @@ def test_tumor_only_config_bad_filename( case_id, "--analysis-dir", analysis_dir, - "--singularity", - singularity_container, - "--reference-config", - reference_json, + "--balsamic-cache", + balsamic_cache, ], ) @@ -68,18 +145,18 @@ def test_tumor_only_config_bad_filename( assert result.exit_code == 1 -def test_tumor_only_config_bad_reference(sample_fastq, singularity_container, - analysis_dir, panel_bed_file): - # GIVEN CLI arguments including a bad reference config - faulty_reference_json = "tests/test_data/references/error_reference.json" - Path(faulty_reference_json).touch() +def test_run_without_permissions( + invoke_cli, + no_write_perm_path, + sample_fastq, + panel_bed_file, + balsamic_cache, +): + # GIVEN CLI arguments including an analysis_dir without write permissions case_id = "sample_tumor_only" tumor = sample_fastq["tumor"] - reference_json = faulty_reference_json - runner = CliRunner() - result = runner.invoke( - cli, + result = invoke_cli( [ "config", "case", @@ -90,47 +167,62 @@ def test_tumor_only_config_bad_reference(sample_fastq, singularity_container, "--case-id", case_id, "--analysis-dir", - analysis_dir, - "--singularity", - singularity_container, - "--reference-config", - reference_json, + no_write_perm_path, + "--balsamic-cache", + balsamic_cache, ], ) # THEN program exits before completion assert result.exit_code == 1 -def test_run_without_permissions( - no_write_perm_path, - sample_fastq, - singularity_container, - panel_bed_file, - reference_json, -): - # GIVEN CLI arguments including an analysis_dir without write permissions - case_id = "sample_tumor_only" +def test_tumor_only_umi_config_background_file( + invoke_cli, + sample_fastq, analysis_dir, balsamic_cache, + panel_bed_file): + + # GIVEN CLI arguments including a background variant file + case_id = "sample_umi_tumor_only" tumor = sample_fastq["tumor"] + background_file = "tests/test_data/references/panel/background_variants.txt" + background_variant_file = background_file - runner = CliRunner() - result = runner.invoke( - cli, + result = invoke_cli( [ - "config", - "case", - "-p", - panel_bed_file, - "-t", - tumor, - "--case-id", - case_id, - "--analysis-dir", - no_write_perm_path, - "--singularity", - singularity_container, - "--reference-config", - reference_json, + "config", "case", "-p", panel_bed_file, "-t", tumor, "--case-id", + case_id, "--analysis-dir", analysis_dir, + "--background-variants", background_variant_file, "--balsamic-cache", + balsamic_cache, ], ) - # THEN program exits before completion + # THEN program exits and checks for filepath + assert result.exit_code == 0 + assert Path(background_variant_file).exists() + + +def test_config_case_graph_failed(invoke_cli, sample_fastq, analysis_dir, balsamic_cache, panel_bed_file): + # GIVEN an analysis config + case_id = "sample_tumor_only" + tumor = sample_fastq["tumor"] + + + with mock.patch.object(graphviz, 'Source') as mocked: + mocked.return_value = None + result = invoke_cli( + [ + "config", + "case", + "-p", + panel_bed_file, + "-t", + tumor, + "--case-id", + case_id, + "--analysis-dir", + analysis_dir, + "--balsamic-cache", + balsamic_cache + ], + ) + assert result.exit_code == 1 diff --git a/tests/commands/init/test_init.py b/tests/commands/init/test_init.py new file mode 100644 index 000000000..f7b2a23a3 --- /dev/null +++ b/tests/commands/init/test_init.py @@ -0,0 +1,165 @@ +import subprocess +import logging +import graphviz + +from pathlib import Path +from unittest import mock +from BALSAMIC import __version__ as balsamic_version + + +def test_init_reference_write_json( + invoke_cli, + tmp_path, +): + # Given test_reference.json + test_genome_version = "hg19" + test_container_version = "develop" + test_new_dir = tmp_path / "test_reference_dir" + test_new_dir.mkdir() + + # WHEN creating config.json in reference dir + test_output_reference_config = test_new_dir / balsamic_version / test_genome_version / "config.json" + test_output_reference_pdf = test_new_dir / balsamic_version / test_genome_version / "generate_ref_worflow_graph.pdf" + + result = invoke_cli([ + 'init', + '-o', + str(test_new_dir), + '-c', + 'secret_key', + '-v', + test_container_version, + ]) + + # THEN output config and pdf file generate and command exit code 0 + assert result.exit_code == 0 + assert Path(test_output_reference_pdf).exists() + assert Path(test_output_reference_config).exists() + + +def test_init_reference_no_write_perm(tmp_path, invoke_cli, + no_write_perm_path): + # Given a path with no write permission + test_genome_version = "hg19" + test_container_version = "develop" + test_new_dir = str(no_write_perm_path) + + # WHEN invoking config sample + result = invoke_cli([ + 'init', + '-o', + str(test_new_dir), + '-c', + 'secret_key', + '-v', + test_container_version, + '-g', + test_genome_version, + ]) + + # THEN it should create test_reference.json and exist with no error + assert result.exit_code == 1 + + +def test_init_reference_graph_exception(invoke_cli, tmp_path): + # Given test_reference.json + test_new_dir = tmp_path / "test_reference_nonfunctional_graph" + test_new_dir.mkdir() + + with mock.patch.object(graphviz, 'Source') as mocked: + mocked.return_value = None + result = invoke_cli([ + 'init', + '-o', + str(test_new_dir), + '-c', + 'secret_key', + ]) + + assert result.exit_code == 1 + + +def test_init_container_force_dry(invoke_cli, tmp_path): + # Given a dummy path + test_new_dir = tmp_path / "test_container_dry_force" + test_new_dir.mkdir() + test_container_version = "develop" + + # WHEN force pull dry-run container + result = invoke_cli([ + 'init', + '--outdir', + str(test_new_dir), + '-c', + 'secret_key', + '--force', + '-v', + test_container_version, + ]) + + # THEN command exit code 0 + assert result.exit_code == 0 + + +def test_init_container_specific_tag(invoke_cli, tmp_path): + # Given a dummy path + test_new_dir = tmp_path / "test_container_dir" + test_new_dir.mkdir() + dummy_tag = "develop" + + # WHEN pulling a specific tag other than standard version + result = invoke_cli([ + 'init', + '--outdir', + str(test_new_dir), + '-c', + 'secret_key', + '--container-version', + dummy_tag, + ]) + + # THEN command exit code 0 + assert result.exit_code == 0 + + +def test_init_container_without_dry_run(invoke_cli, tmp_path): + # Given a dummy path + test_new_dir = tmp_path / "test_container_dir" + test_new_dir.mkdir() + + with mock.patch.object(subprocess, 'run') as mocked: + mocked.return_value = 0 + + # WHEN pulling a container in a non dry-run mode + result = invoke_cli([ + 'init', + '--outdir', + str(test_new_dir), + '-c', + 'secret_key', + '--run-analysis', + ]) + + # THEN output config and pdf file generate and command exit code 0 + assert result.exit_code == 0 + + +def test_init_container_wrong_tag(invoke_cli, tmp_path): + # Given a dummy path + test_new_dir = tmp_path / "test_container_dir" + test_new_dir.mkdir() + dummy_tag = "some_tag_that_does_not_exist_ngrtf123jsds3wqe2" + + # WHEN pulling a wrong container tag + result = invoke_cli([ + 'init', + '--outdir', + str(test_new_dir), + '-c', + 'secret_key', + '--container-version', + dummy_tag, + ]) + + # THEN capture error log and error code + assert result.exit_code > 0 diff --git a/tests/commands/plugins/test_scout.py b/tests/commands/plugins/test_scout.py index ce275c2c6..c327547e4 100644 --- a/tests/commands/plugins/test_scout.py +++ b/tests/commands/plugins/test_scout.py @@ -1,9 +1,3 @@ -import pytest - -import BALSAMIC -from BALSAMIC.commands.base import cli - - def test_scout_tumor_normal(invoke_cli, tumor_normal_config): # GIVEN a tumor-normal config file # WHEN running analysis diff --git a/tests/commands/plugins/test_vcfutils_createvcf.py b/tests/commands/plugins/test_vcfutils_createvcf.py index 3e3150762..c4750ff2e 100644 --- a/tests/commands/plugins/test_vcfutils_createvcf.py +++ b/tests/commands/plugins/test_vcfutils_createvcf.py @@ -9,7 +9,6 @@ from pathlib import Path import re -import pytest def test_readinput_return_dict(input_file): diff --git a/tests/commands/report/test_deliver.py b/tests/commands/report/test_deliver.py index f0aba0309..0c299ada5 100644 --- a/tests/commands/report/test_deliver.py +++ b/tests/commands/report/test_deliver.py @@ -1,30 +1,33 @@ -import pytest - from pathlib import Path - -import BALSAMIC -from BALSAMIC.commands.base import cli +from unittest import mock -def test_deliver_tumor_only_panel(invoke_cli, tumor_only_config, helpers): +def test_deliver_tumor_only_panel(invoke_cli, tumor_only_config, helpers, + sentieon_install_dir, sentieon_license): # GIVEN a tumor-normal config file helpers.read_config(tumor_only_config) actual_delivery_report = Path(helpers.delivery_dir, helpers.case_id + ".hk") - # WHEN running analysis - result = invoke_cli([ - 'report', 'deliver', '--sample-config', tumor_only_config, - '--sample-id-map', 'tumor:tumor:KS454', '--case-id-map', - 'gmck-solid:KSK899:apptag' - ]) + with mock.patch.dict( + 'os.environ', { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + # WHEN running analysis + result = invoke_cli([ + 'report', 'deliver', '--sample-config', tumor_only_config, + '--sample-id-map', 'tumor:tumor:KS454', '--case-id-map', + 'gmck-solid:KSK899:apptag' + ]) - # THEN it should run without any error - assert result.exit_code == 0 - assert actual_delivery_report.is_file() + # THEN it should run without any error + assert result.exit_code == 0 + assert actual_delivery_report.is_file() -def test_deliver_tumor_normal_panel(invoke_cli, tumor_normal_config, helpers): +def test_deliver_tumor_normal_panel(invoke_cli, tumor_normal_config, helpers, + sentieon_install_dir, sentieon_license): # GIVEN a tumor-normal config file helpers.read_config(tumor_normal_config) @@ -55,13 +58,18 @@ def test_deliver_tumor_normal_panel(invoke_cli, tumor_normal_config, helpers): vcf_result_dir, "CNV.somatic." + helpers.case_id + ".cnvkit.vcf.gz") touch_temp_no_delivery_file.touch() - # WHEN running analysis - result = invoke_cli([ - 'report', 'deliver', '--sample-config', tumor_normal_config, - '--sample-id-map', 'tumor:tumor:KS454,normal:normal:KS999', - '--case-id-map', 'gmck-solid:KSK899:apptag' - ]) + with mock.patch.dict( + 'os.environ', { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + # WHEN running analysis + result = invoke_cli([ + 'report', 'deliver', '--sample-config', tumor_normal_config, + '--sample-id-map', 'tumor:tumor:KS454,normal:normal:KS999', + '--case-id-map', 'gmck-solid:KSK899:apptag' + ]) - # THEN it should run without any error - assert result.exit_code == 0 - assert actual_delivery_report.is_file() + # THEN it should run without any error + assert result.exit_code == 0 + assert actual_delivery_report.is_file() diff --git a/tests/commands/report/test_status.py b/tests/commands/report/test_status.py index 895606c83..e78198743 100644 --- a/tests/commands/report/test_status.py +++ b/tests/commands/report/test_status.py @@ -1,24 +1,93 @@ -import pytest +from pathlib import Path +from unittest import mock -import BALSAMIC -from BALSAMIC.commands.base import cli + +def test_status_tumor_only_panel(invoke_cli, tumor_only_config, + sentieon_install_dir, sentieon_license): + # GIVEN a tumor-only config file + # WHEN running analysis + with mock.patch.dict( + 'os.environ', { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + result = invoke_cli([ + 'report', 'status', '--show-only-missing', '--sample-config', + tumor_only_config + ]) + + # THEN it should run without any error + assert result.exit_code == 0 -def test_status_tumor_only_panel(invoke_cli, tumor_only_config): +def test_status_tumor_normal_panel(invoke_cli, tumor_normal_config, helpers, + sentieon_install_dir, sentieon_license): # GIVEN a tumor-normal config file - # WHEN running analysis - result = invoke_cli( - ['report', 'status', '--sample-config', tumor_only_config]) + # WHEN running analysis with three actual delivery files + # Actual delivery files dummies with and without index + helpers.read_config(tumor_normal_config) + normal_bam_result_dir = Path(helpers.result_dir, "bam") + normal_bam_result_dir.mkdir(parents=True, exist_ok=True) + normal_bam_delivery_file = Path(normal_bam_result_dir, "normal.merged.bam") + normal_bam_delivery_file.touch() + + tumor_bam_result_dir = Path(helpers.result_dir, "bam") + tumor_bam_result_dir.mkdir(parents=True, exist_ok=True) + tumor_bam_delivery_file = Path(tumor_bam_result_dir, "tumor.merged.bam") + tumor_bam_delivery_file.touch() + + with mock.patch.dict( + 'os.environ', { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + result = invoke_cli([ + 'report', 'status', '--print-files', '--sample-config', + tumor_normal_config + ]) - # THEN it should run without any error - assert result.exit_code == 0 + # THEN it should run without any error + assert result.exit_code == 0 -def test_status_tumor_normal_panel(invoke_cli, tumor_normal_config): +def test_status_analysis_finish(invoke_cli, tumor_normal_config, helpers, + sentieon_install_dir, sentieon_license): # GIVEN a tumor-normal config file - # WHEN running analysis - result = invoke_cli( - ['report', 'status', '--sample-config', tumor_normal_config]) + helpers.read_config(tumor_normal_config) + + # Actual delivery files dummies with and without index + cnv_result_dir = Path(helpers.result_dir, "cnv") + cnv_result_dir.mkdir(parents=True, exist_ok=True) + actual_delivery_file = Path(cnv_result_dir, "tumor.merged.cnr") + actual_delivery_file.touch() + + vep_result_dir = Path(helpers.result_dir, "vep") + vep_result_dir.mkdir(parents=True, exist_ok=True) + touch_vcf_delivery_file = Path( + vep_result_dir, + "SNV.somatic." + helpers.case_id + ".vardict.all.vcf.gz") + touch_vcf_delivery_file.touch() + touch_vcf_delivery_file_index = Path( + vep_result_dir, + "SNV.somatic." + helpers.case_id + ".vardict.all.vcf.gz.tbi") + touch_vcf_delivery_file_index.touch() + + # An analysis_finish file to mock a finished analysis + result_dir = Path(helpers.result_dir) + result_dir.mkdir(parents=True, exist_ok=True) + actual_analysis_finish_file = Path(result_dir, "analysis_finish") + actual_analysis_finish_file.touch() + + with mock.patch.dict( + 'os.environ', { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + # WHEN running analysis + result = invoke_cli([ + 'report', 'status', '--print-files', '--sample-config', + tumor_normal_config + ]) - # THEN it should run without any error - assert result.exit_code == 0 + # THEN it should run without any error + assert result.exit_code == 0 diff --git a/tests/commands/run/test_run_analysis.py b/tests/commands/run/test_run_analysis.py index b86e4f29b..440ef2c03 100644 --- a/tests/commands/run/test_run_analysis.py +++ b/tests/commands/run/test_run_analysis.py @@ -5,6 +5,16 @@ from pathlib import Path +def test_run_analysis_dragen(invoke_cli, tumor_only_wgs_config): + # GIVEN a WGS config file + # WHEN running analysis + result = invoke_cli( + ['run', 'analysis', '-s', tumor_only_wgs_config, '--dragen']) + + # THEN it should run without any error + assert result.exit_code == 0 + + def test_run_analysis_disable_variant_caller(invoke_cli, tumor_only_config): # GIVEN a tumor-only config file and variant caller to disable disabled_varcaller = "mutect" @@ -63,8 +73,19 @@ def test_run_analysis_create_dir(invoke_cli, tumor_only_config): with mock.patch.object(subprocess, 'run') as mocked: mocked.return_value.stdout = 1 invoke_cli([ - 'run', 'analysis', '-s', tumor_only_config, '-r', '--account', - 'development' + 'run', 'analysis', '-s', tumor_only_config, '-r', '--benchmark', + '--account', 'development' ]) # THEN it should abort with error assert Path(re.sub('/$', '.1/', log_dir)).exists() + + +def test_run_analysis_umi(invoke_cli, tumor_only_umi_config): + # GIVEN a tumor-only config file + # WHEN running analysis + + result = invoke_cli( + ['run', 'analysis', '-s', tumor_only_umi_config, '-a', 'umi']) + + # THEN it should run without any error + assert result.exit_code == 0 diff --git a/tests/commands/run/test_run_reference.py b/tests/commands/run/test_run_reference.py deleted file mode 100644 index 3a388badc..000000000 --- a/tests/commands/run/test_run_reference.py +++ /dev/null @@ -1,30 +0,0 @@ -import pytest -import glob -from pathlib import Path -from click.testing import CliRunner - -import BALSAMIC -from BALSAMIC.commands.base import cli - - -def test_run_reference(invoke_cli, tmp_path, singularity_container): - # Given test_reference.json - test_new_dir = tmp_path / "test_reference_dir_with_run" - test_new_dir.mkdir() - - test_output_reference_config = test_new_dir / "config.json" - test_output_reference_pdf = test_new_dir / "generate_ref_worflow_graph.pdf" - - result_config = invoke_cli([ - 'config', 'reference', '-c', 'secret_key', '--singularity', - singularity_container, '-o', - str(test_new_dir) - ]) - - # WHEN creating config.json in reference dir - result_run = invoke_cli( - ['run', 'reference', '-c', - str(test_output_reference_config)]) - - # THEN output config, pdf file generation, and reference dry run command exit code 0 - assert result_run.exit_code == 0 diff --git a/tests/commands/run/test_scheduler.py b/tests/commands/run/test_scheduler.py index f7afcfb39..3aa881be9 100644 --- a/tests/commands/run/test_scheduler.py +++ b/tests/commands/run/test_scheduler.py @@ -4,19 +4,16 @@ from unittest import mock -from BALSAMIC.commands.run.scheduler import SbatchScheduler -from BALSAMIC.commands.run.scheduler import QsubScheduler -from BALSAMIC.commands.run.scheduler import submit_job -from BALSAMIC.commands.run.scheduler import read_sample_config -from BALSAMIC.commands.run.scheduler import write_sacct_file -from BALSAMIC.commands.run.scheduler import submit_job -from BALSAMIC.commands.run.scheduler import main as scheduler_main -from BALSAMIC.utils.cli import get_schedulerpy +from BALSAMIC.utils.scheduler import SbatchScheduler +from BALSAMIC.utils.scheduler import QsubScheduler +from BALSAMIC.utils.scheduler import read_sample_config +from BALSAMIC.utils.scheduler import write_sacct_file +from BALSAMIC.utils.scheduler import submit_job +from BALSAMIC.utils.scheduler import main as scheduler_main from BALSAMIC.utils.cli import createDir -def test_scheduler_slurm_py(snakemake_job_script, tumor_normal_config, tmpdir, - capsys): +def test_scheduler_slurm_py(snakemake_job_script, tumor_normal_config, capsys): # GIVEN a jobscript, dependencies, joutput job id, and sample comamnd test_jobid = '999999999999' test_return_value = 'Submitted batch job ' + test_jobid @@ -36,7 +33,8 @@ def test_scheduler_slurm_py(snakemake_job_script, tumor_normal_config, tmpdir, "--sample-config", tumor_normal_config, "--profile", scheduler_profile_slurm, "--qos", "low", "--account", "development", "--log-dir", log_dir, "--script-dir", script_dir, "--result-dir", - sample_config['analysis']['result'] + sample_config['analysis']['result'], "--slurm-profiler", "task", + "--slurm-profiler-interval", "10" ] scheduler_cmd.extend(scheduler_args) @@ -54,8 +52,7 @@ def test_scheduler_slurm_py(snakemake_job_script, tumor_normal_config, tmpdir, assert captured.out == test_jobid + "\n" -def test_scheduler_qsub_py(snakemake_job_script, tumor_normal_config, tmpdir, - capsys): +def test_scheduler_qsub_py(snakemake_job_script, tumor_normal_config, capsys): # GIVEN a jobscript, dependencies, joutput job id, and sample comamnd test_jobname = 'script.sh' test_return_value = f'Your job 31415 ("{test_jobname}") has been submitted' @@ -134,6 +131,7 @@ def test_SbatchScheduler(): sbatch_cmd.qos = "low" sbatch_cmd.time = "01:00:00" sbatch_cmd.script = "example_script.sh" + sbatch_cmd.partition = "dummy_partition" # WHEN sbatch command is built sbatch_cmd = sbatch_cmd.build_cmd() @@ -143,7 +141,8 @@ def test_SbatchScheduler(): assert sbatch_cmd == ( 'sbatch --account "development" --dependency "afterok:12345" --error "test_job.err" ' '--output "test_job.out" --mail-type "FAIL" --mail-user "john.doe@example.com" ' - '--ntasks "2" --qos "low" --time "01:00:00" example_script.sh') + '--ntasks "2" --qos "low" --time "01:00:00" --partition "dummy_partition" example_script.sh' + ) def test_qsub_scheduler(): diff --git a/tests/commands/test_cli.py b/tests/commands/test_cli.py index 2ca204084..0c2af7a69 100644 --- a/tests/commands/test_cli.py +++ b/tests/commands/test_cli.py @@ -1,12 +1,4 @@ -import pytest -import glob import BALSAMIC -import json -import logging - -from pathlib import Path -from click.testing import CliRunner -from BALSAMIC.commands.base import cli def test_cli(invoke_cli): @@ -25,7 +17,6 @@ def test_config(invoke_cli): # THEN It should show config options in result assert 'case' in result.output - assert 'reference' in result.output def test_config_case(invoke_cli): @@ -117,26 +108,4 @@ def test_run_analysis_invalid(invoke_cli): # THEN It should throw invalid value error assert result.exit_code == 2 - assert 'Error: Invalid value' in result.output - - -def test_run_reference(invoke_cli): - # WHEN invoking run reference command - result = invoke_cli(['run', 'reference', '--help']) - - # THEN It should show the help message with all params - assert "--snakefile" in result.output - assert "--configfile" in result.output - assert "--run-mode" in result.output - assert "--cluster-config" in result.output - assert "--run-analysis" in result.output - assert result.exit_code == 0 - - -def test_run_ref_invalid(invoke_cli): - # WHEN invoking run reference command with invalid param - result = invoke_cli(['run', 'reference', '--run-mode', 'foo']) - - # THEN It should throw invalid value error - assert result.exit_code == 2 - assert 'Error: Invalid value' in result.output + assert 'Error: Invalid value' in result.output \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 1622116b5..551e58d42 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,16 +1,17 @@ -#! /usr/bin/python - import pytest -import yaml import json import os +from unittest import mock from distutils.dir_util import copy_tree from pathlib import Path from functools import partial from click.testing import CliRunner from .helpers import ConfigHelper from BALSAMIC.commands.base import cli +from BALSAMIC import __version__ as balsamic_version + +MOCKED_OS_ENVIRON = 'os.environ' @pytest.fixture @@ -30,32 +31,47 @@ def invoke_cli(cli_runner): def config_files(): """ dict: path of the config files """ return { - "sample": "BALSAMIC/config/sample.json", - "reference": "tests/test_data/references/reference.json", - "analysis_paired": "BALSAMIC/config/analysis_paired.json", - "cluster_json": "BALSAMIC/config/cluster.json", - "analysis_paired_umi": "BALSAMIC/config/analysis_paired_umi.json", - "analysis_single": "BALSAMIC/config/analysis_single.json", - "analysis_single_umi": "BALSAMIC/config/analysis_single_umi.json", - "panel_bed_file": "tests/test_data/references/panel/panel.bed", - "test_reference": "tests/test_data/references/reference.json", + "sample": + "BALSAMIC/config/sample.json", + "analysis_paired": + "BALSAMIC/config/analysis_paired.json", + "cluster_json": + "BALSAMIC/config/cluster.json", + "analysis_paired_umi": + "BALSAMIC/config/analysis_paired_umi.json", + "analysis_single": + "BALSAMIC/config/analysis_single.json", + "analysis_single_umi": + "BALSAMIC/config/analysis_single_umi.json", + "panel_bed_file": + "tests/test_data/references/panel/panel.bed", + "background_variant_file": + "tests/test_data/references/panel/background_variants.txt" } @pytest.fixture(scope="session") -def conda(): - """ - conda env config file paths - """ +def reference(): + """ reference json model """ return { - "balsamic": "BALSAMIC/conda/balsamic.yaml", - "varcall_py27": "BALSAMIC/conda/varcall_py27.yaml", - "varcall_py36": "BALSAMIC/conda/varcall_py36.yaml", - "align_qc": "BALSAMIC/conda/align.yaml", - "annotate": "BALSAMIC/conda/annotate.yaml", - "coverage": "BALSAMIC/conda/coverage.yaml", + "reference": { + "reference_genome": "tests/test_data/references/genome/human_g1k_v37_decoy.fasta", + "dbsnp": "tests/test_data/references/variants/dbsnp_grch37_b138.vcf.gz", + "1kg_snps_all": "tests/test_data/references/variants/1k_genome_wgs_p1_v3_all_sites.vcf.gz", + "1kg_snps_high": "tests/test_data/references/variants/1kg_phase1_snps_high_confidence_b37.vcf.gz", + "1kg_known_indel": "tests/test_data/references/variants/1kg_known_indels_b37.vcf.gz", + "mills_1kg": "tests/test_data/references/variants/mills_1kg_index.vcf.gz", + "gnomad_variant": "tests/test_data/reference/variants/gnomad.genomes.r2.1.1.sites.vcf.bgz", + "cosmic": "tests/test_data/references/variants/cosmic_coding_muts_v89.vcf.gz", + "vep": "tests/test_data/references/vep/", + "refflat": "tests/test_data/references/genome/refseq.flat", + "refGene": "tests/test_data/references/genome/refGene.txt", + "wgs_calling_interval": "tests/test_data/references/genome/wgs_calling_regions.v1", + "genome_chrom_size": "tests/test_data/references/genome/hg19.chrom.sizes", + "exon_bed": "tests/test_data/references/genome/refseq.flat.bed", + "rankscore": "tests/test_data/references/genome/cancer_rank_model_-v0.1-.ini", } - +} @pytest.fixture(scope="session") def panel_bed_file(): @@ -63,8 +79,33 @@ def panel_bed_file(): @pytest.fixture(scope="session") -def reference_json(): - return "tests/test_data/references/reference.json" +def background_variant_file(): + return "tests/test_data/references/panel/background_variants.txt" + + +@pytest.fixture(scope="session") +def sentieon_license(tmp_path_factory): + """ + Sentieon's license path fixture + """ + sentieon_license_dir = tmp_path_factory.mktemp("sentieon_licence") + sentieon_license_path = sentieon_license_dir / "license_file.lic" + sentieon_license_path.touch() + + return sentieon_license_path.as_posix() + + +@pytest.fixture(scope="session") +def sentieon_install_dir(tmp_path_factory): + """ + Sentieon's license path fixture + """ + sentieon_install_dir = tmp_path_factory.mktemp("sentieon_install_dir") + Path(sentieon_install_dir / "bin").mkdir(exist_ok=True) + sentieon_executable = sentieon_install_dir / "bin" / "sentieon" + sentieon_executable.touch() + + return sentieon_install_dir.as_posix() @pytest.fixture(scope="session") @@ -116,16 +157,27 @@ def sample_fastq(tmp_path_factory): @pytest.fixture(scope="session") -def singularity_container(tmp_path_factory): +def balsamic_cache(tmp_path_factory, reference): """ Create singularity container """ - container_dir = tmp_path_factory.mktemp("test_container") - container_file = container_dir / "singularity_container.simg" - container_file.touch() + cache_dir = tmp_path_factory.mktemp("balsmic_coche") + + cache_container = cache_dir / balsamic_version / "containers" / "align_qc" + cache_container.mkdir(parents=True, exist_ok=True) + cache_container_example = cache_container / "example.sif" + cache_container_example.touch() - return container_file.as_posix() + cache_reference = cache_dir / balsamic_version / "hg19" + cache_reference.mkdir(parents=True, exist_ok=True) + + cache_reference_json = cache_reference / "reference.json" + cache_reference_json.touch() + with open(cache_reference_json, 'w') as fp: + json.dump(reference, fp) + + return cache_dir.as_posix() @pytest.fixture(scope="session") @@ -158,14 +210,9 @@ def snakemake_job_script(tmp_path_factory, tumor_normal_config): @pytest.fixture(scope="session") -def tumor_normal_config( - tmp_path_factory, - sample_fastq, - analysis_dir, - singularity_container, - reference_json, - panel_bed_file, -): +def tumor_normal_config(tmp_path_factory, sample_fastq, analysis_dir, + balsamic_cache, panel_bed_file, + sentieon_license, sentieon_install_dir): """ invokes balsamic config sample -t xxx -n xxx to create sample config for tumor-normal @@ -174,32 +221,35 @@ def tumor_normal_config( tumor = sample_fastq["tumor"] normal = sample_fastq["normal"] - runner = CliRunner() - runner.invoke( - cli, - [ - "config", - "case", - "-p", - panel_bed_file, - "-t", - tumor, - "-n", - normal, - "--case-id", - case_id, - "--singularity", - singularity_container, - "--analysis-dir", - analysis_dir, - "--reference-config", - reference_json, - "--tumor-sample-name", - "ACC1", - "--normal-sample-name", - "ACC2", - ], - ) + with mock.patch.dict( + MOCKED_OS_ENVIRON, { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + runner = CliRunner() + runner.invoke( + cli, + [ + "config", + "case", + "-p", + panel_bed_file, + "-t", + tumor, + "-n", + normal, + "--case-id", + case_id, + "--analysis-dir", + analysis_dir, + "--balsamic-cache", + balsamic_cache, + "--tumor-sample-name", + "ACC1", + "--normal-sample-name", + "ACC2", + ], + ) qc_dir = Path(analysis_dir, case_id, "analysis", "qc", "multiqc_data") qc_dir.mkdir(parents=True, exist_ok=False) @@ -216,8 +266,8 @@ def fixture_config_helpers(): @pytest.fixture(scope="session") -def tumor_normal_wgs_config(tmp_path_factory, sample_fastq, analysis_dir, - singularity_container, reference_json): +def tumor_normal_wgs_config(tmp_path_factory, sample_fastq, analysis_dir, balsamic_cache, + sentieon_license, sentieon_install_dir): """ invokes balsamic config sample -t xxx -n xxx to create sample config for tumor-normal @@ -226,39 +276,37 @@ def tumor_normal_wgs_config(tmp_path_factory, sample_fastq, analysis_dir, tumor = sample_fastq["tumor"] normal = sample_fastq["normal"] - runner = CliRunner() - runner.invoke( - cli, - [ - "config", - "case", - "-t", - tumor, - "-n", - normal, - "--case-id", - case_id, - "--singularity", - singularity_container, - "--analysis-dir", - analysis_dir, - "--reference-config", - reference_json, - ], - ) + with mock.patch.dict( + MOCKED_OS_ENVIRON, { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + runner = CliRunner() + runner.invoke( + cli, + [ + "config", + "case", + "-t", + tumor, + "-n", + normal, + "--case-id", + case_id, + "--balsamic-cache", + balsamic_cache, + "--analysis-dir", + analysis_dir, + ], + ) return Path(analysis_dir, case_id, case_id + ".json").as_posix() @pytest.fixture(scope="session") -def tumor_only_config( - tmpdir_factory, - sample_fastq, - singularity_container, - analysis_dir, - reference_json, - panel_bed_file, -): +def tumor_only_config(tmpdir_factory, sample_fastq, balsamic_cache, + analysis_dir, panel_bed_file, + sentieon_license, sentieon_install_dir): """ invokes balsamic config sample -t xxx to create sample config for tumor only @@ -266,26 +314,29 @@ def tumor_only_config( case_id = "sample_tumor_only" tumor = sample_fastq["tumor"] - runner = CliRunner() - runner.invoke( - cli, - [ - "config", - "case", - "-p", - panel_bed_file, - "-t", - tumor, - "--case-id", - case_id, - "--analysis-dir", - analysis_dir, - "--singularity", - singularity_container, - "--reference-config", - reference_json, - ], - ) + with mock.patch.dict( + MOCKED_OS_ENVIRON, { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + runner = CliRunner() + runner.invoke( + cli, + [ + "config", + "case", + "-p", + panel_bed_file, + "-t", + tumor, + "--case-id", + case_id, + "--analysis-dir", + analysis_dir, + "--balsamic-cache", + balsamic_cache, + ], + ) qc_dir = Path(analysis_dir, case_id, "analysis", "qc", "multiqc_data") qc_dir.mkdir(parents=True, exist_ok=False) @@ -296,8 +347,8 @@ def tumor_only_config( @pytest.fixture(scope="session") -def tumor_only_wgs_config(tmp_path_factory, sample_fastq, analysis_dir, - singularity_container, reference_json): +def tumor_only_wgs_config(tmp_path_factory, sample_fastq, analysis_dir, balsamic_cache, + sentieon_license, sentieon_install_dir): """ invokes balsamic config sample -t xxx to create sample config for tumor only @@ -305,24 +356,59 @@ def tumor_only_wgs_config(tmp_path_factory, sample_fastq, analysis_dir, case_id = "sample_tumor_only_wgs" tumor = sample_fastq["tumor"] - runner = CliRunner() - runner.invoke( - cli, - [ - "config", - "case", - "-t", - tumor, - "--case-id", - case_id, - "--analysis-dir", - analysis_dir, - "--singularity", - singularity_container, - "--reference-config", - reference_json, - ], - ) + with mock.patch.dict( + MOCKED_OS_ENVIRON, { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + runner = CliRunner() + runner.invoke( + cli, + [ + "config", + "case", + "-t", + tumor, + "--case-id", + case_id, + "--analysis-dir", + analysis_dir, + "--balsamic-cache", + balsamic_cache, + ], + ) + + return Path(analysis_dir, case_id, case_id + ".json").as_posix() + + +@pytest.fixture(scope="session") +def tumor_only_umi_config(tmpdir_factory, sample_fastq, balsamic_cache, + analysis_dir, panel_bed_file, + background_variant_file, sentieon_license, + sentieon_install_dir): + """ + invokes balsamic config sample -t xxx to create sample config + for tumor only with background variant file for umi workflow + """ + case_id = "sample_tumor_only_umi" + tumor = sample_fastq["tumor"] + + with mock.patch.dict( + MOCKED_OS_ENVIRON, { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + runner = CliRunner() + runner.invoke( + cli, + [ + "config", "case", "-p", panel_bed_file, + "--background-variants", background_variant_file, "-t", tumor, + "--case-id", case_id, "--analysis-dir", analysis_dir, + "--balsamic-cache", + balsamic_cache, + ], + ) return Path(analysis_dir, case_id, case_id + ".json").as_posix() @@ -344,26 +430,18 @@ def sample_config(): "umi_trim_length": "5", }, "analysis": { - "case_id": - "id1", - "analysis_type": - "paired", - "analysis_dir": - "tests/test_data/", - "fastq_path": - "tests/test_data/id1/fastq/", - "script": - "tests/test_data/id1/scripts/", - "log": - "tests/test_data/id1/logs/", - "result": - "tests/test_data/id1/analysis/", - "config_creation_date": - "yyyy-mm-dd xx", - "BALSAMIC_version": - "2.9.8", + "case_id": "id1", + "analysis_type": "paired", + "analysis_dir": "tests/test_data/", + "fastq_path": "tests/test_data/id1/fastq/", + "script": "tests/test_data/id1/scripts/", + "log": "tests/test_data/id1/logs/", + "result": "tests/test_data/id1/analysis/", + "config_creation_date": "yyyy-mm-dd xx", + "BALSAMIC_version": "2.9.8", "dag": "tests/test_data/id1/id1_analysis.json_BALSAMIC_2.9.8_graph.pdf", + "umiworkflow": "true" }, "vcf": { "manta": { diff --git a/tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.h5 b/tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.h5 new file mode 100644 index 000000000..b1e0766fa Binary files /dev/null and b/tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.h5 differ diff --git a/tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.sh_31415926535.err b/tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.sh_31415926535.err new file mode 100644 index 000000000..098eeaf10 --- /dev/null +++ b/tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.sh_31415926535.err @@ -0,0 +1,4282 @@ +Building DAG of jobs... +Using shell: /bin/bash +Provided cores: 16 +Rules claiming more threads will be scaled down. +Job counts: + count jobs + 1 bwa_mem + 1 + +[Wed Dec 30 14:00:11 2020] +rule bwa_mem: + input: /home/hassan.foroughi/repos/BALSAMIC/init/reference/6.0.3/hg19/genome/human_g1k_v37.fasta, /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/fastq/concatenated_ACC5962A16_XXXXXX_R_1.fp.fastq.gz, /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/fastq/concatenated_ACC5962A16_XXXXXX_R_2.fp.fastq.gz, /home/hassan.foroughi/repos/BALSAMIC/init/reference/6.0.3/hg19/genome/human_g1k_v37.fasta.amb, /home/hassan.foroughi/repos/BALSAMIC/init/reference/6.0.3/hg19/genome/human_g1k_v37.fasta.ann, /home/hassan.foroughi/repos/BALSAMIC/init/reference/6.0.3/hg19/genome/human_g1k_v37.fasta.bwt, /home/hassan.foroughi/repos/BALSAMIC/init/reference/6.0.3/hg19/genome/human_g1k_v37.fasta.pac, /home/hassan.foroughi/repos/BALSAMIC/init/reference/6.0.3/hg19/genome/human_g1k_v37.fasta.sa + output: /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/bam/concatenated_ACC5962A16_XXXXXX_R.sorted.bam + jobid: 0 + benchmark: /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/benchmarks/bwa_mem_concatenated_ACC5962A16_XXXXXX_R.bwa_mem.tsv + wildcards: sample=concatenated_ACC5962A16_XXXXXX_R + threads: 16 + + +source activate align_qc; +mkdir -p /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/tmp/d8za6ph2; +export TMPDIR=/home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/tmp/d8za6ph2; +bwa mem -t 16 -R '@RG\tID:concatenated_ACC5962A16_XXXXXX_R\tSM:concatenated_ACC5962A16_XXXXXX_R\tPL:ILLUMINAi' -M -v 1 /home/hassan.foroughi/repos/BALSAMIC/init/reference/6.0.3/hg19/genome/human_g1k_v37.fasta /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/fastq/concatenated_ACC5962A16_XXXXXX_R_1.fp.fastq.gz /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/fastq/concatenated_ACC5962A16_XXXXXX_R_2.fp.fastq.gz | samtools sort -T /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/tmp/d8za6ph2 --threads 16 --output-fmt BAM -o /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/bam/concatenated_ACC5962A16_XXXXXX_R.sorted.bam - ; +samtools index -@ 16 /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/bam/concatenated_ACC5962A16_XXXXXX_R.sorted.bam; +rm -rf /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/tmp/d8za6ph2; + +Activating singularity image /home/hassan.foroughi/repos/BALSAMIC/init/containers/align_qc.sif +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 158, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (179.85, 85.51) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 615) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 487) +[M::mem_pestat] mean and std.dev: (189.92, 86.58) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 608) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (107, 165, 219) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 443) +[M::mem_pestat] mean and std.dev: (171.26, 75.74) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 555) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 172, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 527) +[M::mem_pestat] mean and std.dev: (190.16, 91.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 663) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 487) +[M::mem_pestat] mean and std.dev: (190.49, 86.61) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 608) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (134, 187, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (191.31, 80.29) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (115, 176, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 559) +[M::mem_pestat] mean and std.dev: (198.58, 101.50) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 707) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (191.09, 87.03) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 612) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 183, 260) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 522) +[M::mem_pestat] mean and std.dev: (202.00, 96.04) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 653) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (132, 181, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 504) +[M::mem_pestat] mean and std.dev: (202.26, 95.49) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 628) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.60, 88.01) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 176, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 473) +[M::mem_pestat] mean and std.dev: (190.44, 86.55) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 588) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 180, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 551) +[M::mem_pestat] mean and std.dev: (199.25, 98.86) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 695) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.65, 88.20) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 195, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (196.89, 85.54) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (117, 180, 238) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 480) +[M::mem_pestat] mean and std.dev: (188.21, 87.45) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 601) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (190.96, 87.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 612) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 197, 261) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 519) +[M::mem_pestat] mean and std.dev: (206.08, 98.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 648) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (131, 189, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (197.05, 89.28) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 611) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.86, 88.17) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 176, 281) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 607) +[M::mem_pestat] mean and std.dev: (206.31, 113.14) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 770) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (117, 169, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 573) +[M::mem_pestat] mean and std.dev: (190.62, 97.31) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 725) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.32, 88.53) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (138, 182, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 492) +[M::mem_pestat] mean and std.dev: (202.47, 94.45) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 610) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 186, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (198.17, 95.24) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 612) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.31, 89.12) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (134, 187, 275) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 557) +[M::mem_pestat] mean and std.dev: (211.83, 101.10) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 698) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 180, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 513) +[M::mem_pestat] mean and std.dev: (196.23, 97.12) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 641) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (192.68, 89.21) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 624) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (134, 182, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 503) +[M::mem_pestat] mean and std.dev: (202.14, 98.22) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (126, 180, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 510) +[M::mem_pestat] mean and std.dev: (193.94, 87.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 638) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.57, 88.48) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (116, 176, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 530) +[M::mem_pestat] mean and std.dev: (192.54, 92.88) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 668) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 179, 265) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 539) +[M::mem_pestat] mean and std.dev: (201.44, 94.08) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 676) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.08, 88.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (141, 191, 270) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 528) +[M::mem_pestat] mean and std.dev: (207.97, 98.77) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 657) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (113, 167, 234) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 476) +[M::mem_pestat] mean and std.dev: (178.36, 83.87) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 597) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.35, 88.73) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (119, 172, 278) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 596) +[M::mem_pestat] mean and std.dev: (198.33, 102.98) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 755) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (135, 192, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 528) +[M::mem_pestat] mean and std.dev: (206.29, 97.31) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 659) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.28, 89.14) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 186, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 511) +[M::mem_pestat] mean and std.dev: (203.21, 95.24) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 638) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 185, 270) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 552) +[M::mem_pestat] mean and std.dev: (207.49, 101.48) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 693) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.10, 89.25) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (194.05, 91.71) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (118, 178, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 553) +[M::mem_pestat] mean and std.dev: (198.63, 103.13) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 698) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 178, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (191.24, 88.61) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 619) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 184, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 541) +[M::mem_pestat] mean and std.dev: (198.15, 97.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 680) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 181, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 484) +[M::mem_pestat] mean and std.dev: (190.98, 89.60) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 605) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.43, 88.94) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 183, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 542) +[M::mem_pestat] mean and std.dev: (199.96, 95.36) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 680) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (134, 193, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (202.50, 91.98) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.29, 88.91) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 189, 261) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 529) +[M::mem_pestat] mean and std.dev: (202.27, 96.15) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 663) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (137, 196, 294) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 608) +[M::mem_pestat] mean and std.dev: (218.47, 108.57) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 765) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (192.93, 88.91) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 170, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 476) +[M::mem_pestat] mean and std.dev: (187.98, 84.21) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 593) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 170, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 492) +[M::mem_pestat] mean and std.dev: (192.14, 91.39) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.68, 89.57) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 181, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 509) +[M::mem_pestat] mean and std.dev: (194.90, 91.95) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 636) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 487) +[M::mem_pestat] mean and std.dev: (193.40, 93.59) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 608) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (191.62, 88.99) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 619) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 184, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 538) +[M::mem_pestat] mean and std.dev: (204.38, 102.06) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 674) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (132, 178, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 534) +[M::mem_pestat] mean and std.dev: (201.71, 94.17) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 668) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.44, 88.98) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (136, 198, 271) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 541) +[M::mem_pestat] mean and std.dev: (206.64, 91.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 676) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (130, 204, 286) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 598) +[M::mem_pestat] mean and std.dev: (216.83, 101.45) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 754) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.46, 88.91) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 181, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (195.06, 89.43) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (130, 203, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 517) +[M::mem_pestat] mean and std.dev: (203.61, 89.57) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 646) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (192.86, 89.07) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 624) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 179, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 473) +[M::mem_pestat] mean and std.dev: (193.13, 84.23) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 588) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 184, 271) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 557) +[M::mem_pestat] mean and std.dev: (210.79, 107.80) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 700) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.69, 89.46) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 175, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 519) +[M::mem_pestat] mean and std.dev: (194.76, 91.16) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 650) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 198, 277) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 575) +[M::mem_pestat] mean and std.dev: (214.16, 110.80) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 724) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.06, 89.04) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 185, 271) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 553) +[M::mem_pestat] mean and std.dev: (202.97, 98.69) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 694) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 182, 244) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (190.94, 85.23) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 619) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 492) +[M::mem_pestat] mean and std.dev: (190.64, 87.89) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 615) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (109, 167, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 520) +[M::mem_pestat] mean and std.dev: (180.41, 84.97) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 657) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (114, 157, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 516) +[M::mem_pestat] mean and std.dev: (185.88, 93.13) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 650) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (191.11, 87.56) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 612) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (140, 184, 272) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 536) +[M::mem_pestat] mean and std.dev: (202.80, 85.98) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 668) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (112, 170, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 517) +[M::mem_pestat] mean and std.dev: (188.34, 93.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 652) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.97, 88.21) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 182, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 540) +[M::mem_pestat] mean and std.dev: (204.40, 94.16) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 676) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (133, 182, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (198.81, 90.36) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 609) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.65, 88.30) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 189, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 518) +[M::mem_pestat] mean and std.dev: (198.34, 90.95) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 648) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (125, 175, 244) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 482) +[M::mem_pestat] mean and std.dev: (191.74, 91.80) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 601) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.16, 88.74) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (122, 179, 275) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 581) +[M::mem_pestat] mean and std.dev: (204.18, 100.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 734) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 185, 244) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 474) +[M::mem_pestat] mean and std.dev: (189.79, 80.07) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 589) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 487) +[M::mem_pestat] mean and std.dev: (190.66, 86.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 608) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 186, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 510) +[M::mem_pestat] mean and std.dev: (198.10, 93.85) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 637) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (113, 173, 240) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (185.49, 86.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 485) +[M::mem_pestat] mean and std.dev: (191.07, 86.45) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 605) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (117, 167, 241) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 489) +[M::mem_pestat] mean and std.dev: (185.74, 90.57) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 185, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 533) +[M::mem_pestat] mean and std.dev: (198.43, 91.20) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 668) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (191.96, 87.12) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 173, 260) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 532) +[M::mem_pestat] mean and std.dev: (197.73, 100.22) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 668) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (117, 175, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 537) +[M::mem_pestat] mean and std.dev: (191.12, 98.77) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 677) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (194.00, 88.54) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 180, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 477) +[M::mem_pestat] mean and std.dev: (194.40, 89.51) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 593) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 192, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 516) +[M::mem_pestat] mean and std.dev: (201.05, 92.65) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 645) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (193.78, 89.00) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 187, 280) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 576) +[M::mem_pestat] mean and std.dev: (207.41, 95.38) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 724) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (122, 181, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 554) +[M::mem_pestat] mean and std.dev: (199.53, 97.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 698) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.67, 88.66) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (142, 180, 240) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 436) +[M::mem_pestat] mean and std.dev: (191.61, 74.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 534) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (130, 175, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 508) +[M::mem_pestat] mean and std.dev: (198.62, 98.33) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 634) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.53, 88.34) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (119, 169, 230) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 452) +[M::mem_pestat] mean and std.dev: (180.52, 82.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 563) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (122, 183, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 482) +[M::mem_pestat] mean and std.dev: (191.30, 82.91) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 602) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (193.86, 88.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (120, 190, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 525) +[M::mem_pestat] mean and std.dev: (196.30, 93.21) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 660) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (136, 185, 262) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 514) +[M::mem_pestat] mean and std.dev: (202.71, 88.11) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 640) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 504) +[M::mem_pestat] mean and std.dev: (195.15, 90.11) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 630) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 183, 265) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 533) +[M::mem_pestat] mean and std.dev: (206.54, 99.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 667) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (126, 187, 262) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 534) +[M::mem_pestat] mean and std.dev: (204.61, 99.82) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 670) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.04, 89.24) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 188, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 558) +[M::mem_pestat] mean and std.dev: (209.25, 105.79) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 703) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (135, 205, 282) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 576) +[M::mem_pestat] mean and std.dev: (216.11, 99.09) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 723) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.63, 88.86) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 181, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 523) +[M::mem_pestat] mean and std.dev: (186.85, 82.51) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 657) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (133, 196, 262) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 520) +[M::mem_pestat] mean and std.dev: (198.90, 92.53) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 649) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (193.62, 88.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 177, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 529) +[M::mem_pestat] mean and std.dev: (193.72, 92.03) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 664) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 190, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 528) +[M::mem_pestat] mean and std.dev: (198.09, 95.11) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 663) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.39, 89.64) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 181, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (190.93, 83.42) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 611) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (120, 174, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 525) +[M::mem_pestat] mean and std.dev: (190.07, 94.42) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 660) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.35, 89.87) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 180, 278) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 576) +[M::mem_pestat] mean and std.dev: (208.60, 110.87) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 725) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (132, 177, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (197.51, 86.53) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.08, 89.05) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 176, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (192.29, 89.40) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (126, 187, 273) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 567) +[M::mem_pestat] mean and std.dev: (203.04, 101.74) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 714) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.33, 88.98) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 176, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 508) +[M::mem_pestat] mean and std.dev: (196.82, 92.09) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 634) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (122, 178, 278) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 590) +[M::mem_pestat] mean and std.dev: (203.70, 105.28) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 746) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (194.02, 89.25) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 191, 282) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 596) +[M::mem_pestat] mean and std.dev: (212.26, 115.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 753) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 181, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 517) +[M::mem_pestat] mean and std.dev: (199.03, 95.86) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 648) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.77, 89.97) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 178, 264) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 530) +[M::mem_pestat] mean and std.dev: (199.07, 94.62) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 663) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (120, 176, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 564) +[M::mem_pestat] mean and std.dev: (201.91, 106.12) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 712) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.55, 90.08) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 210, 272) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 552) +[M::mem_pestat] mean and std.dev: (207.36, 96.70) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 692) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 182, 235) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 459) +[M::mem_pestat] mean and std.dev: (181.24, 77.55) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 571) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.63, 89.49) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 184, 274) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 560) +[M::mem_pestat] mean and std.dev: (212.54, 106.45) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 703) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (112, 171, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 526) +[M::mem_pestat] mean and std.dev: (191.78, 103.69) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 664) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.45, 89.18) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 173, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 481) +[M::mem_pestat] mean and std.dev: (189.62, 86.27) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 599) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (108, 175, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 552) +[M::mem_pestat] mean and std.dev: (189.55, 92.59) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 700) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (194.20, 89.38) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 172, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 529) +[M::mem_pestat] mean and std.dev: (196.79, 101.70) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 666) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (131, 191, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 545) +[M::mem_pestat] mean and std.dev: (203.60, 96.57) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 683) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.65, 89.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (134, 193, 267) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 533) +[M::mem_pestat] mean and std.dev: (201.79, 90.62) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 666) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (137, 194, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 524) +[M::mem_pestat] mean and std.dev: (209.33, 97.80) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 653) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.78, 90.03) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 180, 265) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 549) +[M::mem_pestat] mean and std.dev: (194.81, 92.86) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 691) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 270) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 560) +[M::mem_pestat] mean and std.dev: (205.76, 99.59) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 705) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.52, 89.00) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (133, 191, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 541) +[M::mem_pestat] mean and std.dev: (207.14, 94.97) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 677) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 505) +[M::mem_pestat] mean and std.dev: (190.28, 87.56) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 632) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (192.02, 87.85) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (135, 182, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 471) +[M::mem_pestat] mean and std.dev: (198.89, 90.35) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 583) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 199, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 519) +[M::mem_pestat] mean and std.dev: (201.08, 96.16) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 649) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (193.38, 88.40) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 184, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 510) +[M::mem_pestat] mean and std.dev: (195.76, 86.59) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 637) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (136, 203, 264) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 520) +[M::mem_pestat] mean and std.dev: (206.36, 95.66) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 648) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (194.05, 88.71) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 170, 241) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 481) +[M::mem_pestat] mean and std.dev: (188.29, 89.54) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 601) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (122, 194, 279) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 593) +[M::mem_pestat] mean and std.dev: (211.16, 107.65) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 750) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.52, 89.50) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (134, 197, 282) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 578) +[M::mem_pestat] mean and std.dev: (215.72, 100.20) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 726) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 175, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 470) +[M::mem_pestat] mean and std.dev: (188.78, 84.91) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 584) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 492) +[M::mem_pestat] mean and std.dev: (191.13, 88.17) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 615) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (116, 179, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 518) +[M::mem_pestat] mean and std.dev: (193.15, 97.01) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 652) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 179, 239) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 475) +[M::mem_pestat] mean and std.dev: (190.21, 86.74) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 593) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (191.01, 87.75) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 612) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (117, 169, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 531) +[M::mem_pestat] mean and std.dev: (192.17, 93.81) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 669) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (115, 180, 267) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 571) +[M::mem_pestat] mean and std.dev: (197.34, 100.07) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 723) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.44, 87.93) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 173, 253) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 511) +[M::mem_pestat] mean and std.dev: (191.95, 85.24) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 640) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 191, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 520) +[M::mem_pestat] mean and std.dev: (197.99, 91.93) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 651) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.02, 88.32) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 169, 228) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 432) +[M::mem_pestat] mean and std.dev: (177.78, 77.58) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 534) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 192, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 550) +[M::mem_pestat] mean and std.dev: (202.72, 100.12) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 692) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.62, 88.23) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 202, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 549) +[M::mem_pestat] mean and std.dev: (206.22, 97.45) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 689) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (134, 184, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 536) +[M::mem_pestat] mean and std.dev: (201.75, 92.15) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 670) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 178, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (191.26, 88.62) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 619) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (106, 177, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 550) +[M::mem_pestat] mean and std.dev: (191.34, 93.98) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 698) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 186, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 508) +[M::mem_pestat] mean and std.dev: (195.71, 90.21) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 635) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.52, 88.66) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (139, 184, 264) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 514) +[M::mem_pestat] mean and std.dev: (202.74, 92.49) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 639) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 182, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 525) +[M::mem_pestat] mean and std.dev: (196.00, 90.96) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 659) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (192.78, 89.23) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 624) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (142, 207, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 520) +[M::mem_pestat] mean and std.dev: (208.16, 91.09) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 646) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 510) +[M::mem_pestat] mean and std.dev: (192.04, 85.01) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 638) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.10, 89.10) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 169, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 514) +[M::mem_pestat] mean and std.dev: (189.53, 92.36) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 645) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (131, 199, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 512) +[M::mem_pestat] mean and std.dev: (204.22, 94.28) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 639) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.24, 89.45) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 193, 262) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 524) +[M::mem_pestat] mean and std.dev: (202.76, 92.46) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 655) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (136, 209, 270) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 538) +[M::mem_pestat] mean and std.dev: (210.32, 89.93) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 672) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 178, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (191.43, 88.97) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 619) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 195, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 536) +[M::mem_pestat] mean and std.dev: (209.71, 100.39) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 671) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (134, 187, 270) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 542) +[M::mem_pestat] mean and std.dev: (206.03, 94.23) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 678) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (191.96, 88.41) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (122, 175, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 485) +[M::mem_pestat] mean and std.dev: (188.34, 83.62) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 606) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 173, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (191.95, 90.51) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (192.92, 88.75) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 175, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 504) +[M::mem_pestat] mean and std.dev: (192.75, 90.07) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 629) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 526) +[M::mem_pestat] mean and std.dev: (193.30, 88.81) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 660) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.47, 89.01) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 177, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 526) +[M::mem_pestat] mean and std.dev: (198.13, 101.48) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 660) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (116, 176, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 521) +[M::mem_pestat] mean and std.dev: (189.92, 86.93) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 656) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.38, 89.22) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 182, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 488) +[M::mem_pestat] mean and std.dev: (197.18, 82.69) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 609) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (132, 188, 279) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 573) +[M::mem_pestat] mean and std.dev: (206.95, 101.11) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 720) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.03, 89.08) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 175, 260) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 534) +[M::mem_pestat] mean and std.dev: (198.05, 96.29) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 671) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 175, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (190.94, 90.81) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (191.26, 87.66) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 612) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 166, 235) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 469) +[M::mem_pestat] mean and std.dev: (183.94, 85.93) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 586) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (132, 197, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 534) +[M::mem_pestat] mean and std.dev: (202.49, 92.45) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 668) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.39, 88.58) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 192, 237) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 459) +[M::mem_pestat] mean and std.dev: (190.91, 78.51) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 570) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (116, 192, 276) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 596) +[M::mem_pestat] mean and std.dev: (200.88, 98.72) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 756) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (192.99, 88.59) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (117, 174, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 531) +[M::mem_pestat] mean and std.dev: (193.15, 94.97) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 669) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (132, 192, 278) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 570) +[M::mem_pestat] mean and std.dev: (207.57, 97.63) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 716) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (194.04, 88.96) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 177, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 523) +[M::mem_pestat] mean and std.dev: (194.03, 91.40) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 657) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 174, 228) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 430) +[M::mem_pestat] mean and std.dev: (181.80, 73.45) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 531) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.25, 88.85) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (133, 199, 283) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 583) +[M::mem_pestat] mean and std.dev: (214.35, 102.69) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 733) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 172, 267) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 553) +[M::mem_pestat] mean and std.dev: (199.34, 98.63) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 696) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.51, 88.03) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (136, 178, 260) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 508) +[M::mem_pestat] mean and std.dev: (199.29, 86.79) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 632) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (111, 170, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 528) +[M::mem_pestat] mean and std.dev: (183.21, 84.99) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 667) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.29, 88.27) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 175, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 529) +[M::mem_pestat] mean and std.dev: (195.94, 94.36) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 665) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (116, 175, 229) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 455) +[M::mem_pestat] mean and std.dev: (181.30, 79.25) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 568) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.49, 88.34) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 272) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 566) +[M::mem_pestat] mean and std.dev: (200.66, 98.25) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 713) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 184, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 489) +[M::mem_pestat] mean and std.dev: (189.23, 85.54) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 611) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (194.15, 89.02) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (137, 186, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 503) +[M::mem_pestat] mean and std.dev: (202.38, 90.69) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (131, 173, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 467) +[M::mem_pestat] mean and std.dev: (188.93, 85.48) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 579) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.30, 89.09) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (138, 200, 279) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 561) +[M::mem_pestat] mean and std.dev: (211.22, 103.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 702) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (132, 179, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 471) +[M::mem_pestat] mean and std.dev: (186.53, 78.98) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 584) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 177, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 492) +[M::mem_pestat] mean and std.dev: (190.33, 88.06) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 615) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 188, 260) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 538) +[M::mem_pestat] mean and std.dev: (202.89, 96.38) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 677) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 177, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (192.85, 92.20) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 629) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.42, 88.11) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 195, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 548) +[M::mem_pestat] mean and std.dev: (202.49, 95.06) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 689) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 170, 240) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 472) +[M::mem_pestat] mean and std.dev: (186.79, 88.57) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 588) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (191.93, 88.43) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 185, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (195.60, 90.17) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 176, 236) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 460) +[M::mem_pestat] mean and std.dev: (187.84, 86.25) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 572) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.22, 88.92) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 168, 241) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 481) +[M::mem_pestat] mean and std.dev: (185.06, 82.36) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 601) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 185, 265) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 539) +[M::mem_pestat] mean and std.dev: (201.18, 94.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 676) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.23, 88.91) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 554) +[M::mem_pestat] mean and std.dev: (205.41, 106.04) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 697) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (122, 175, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 563) +[M::mem_pestat] mean and std.dev: (199.31, 96.17) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 710) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.73, 87.97) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 175, 233) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 457) +[M::mem_pestat] mean and std.dev: (185.68, 83.77) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 569) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 180, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 548) +[M::mem_pestat] mean and std.dev: (199.67, 105.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 688) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.67, 88.11) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (119, 182, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 509) +[M::mem_pestat] mean and std.dev: (191.70, 94.26) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 639) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 177, 264) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 550) +[M::mem_pestat] mean and std.dev: (193.77, 94.55) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 693) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.39, 88.85) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 180, 261) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 547) +[M::mem_pestat] mean and std.dev: (198.22, 98.17) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 690) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 191, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 521) +[M::mem_pestat] mean and std.dev: (202.24, 95.88) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 652) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.53, 89.31) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (133, 193, 276) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 562) +[M::mem_pestat] mean and std.dev: (208.46, 96.69) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 705) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (118, 167, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 508) +[M::mem_pestat] mean and std.dev: (189.62, 96.49) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 638) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.99, 89.65) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 181, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 482) +[M::mem_pestat] mean and std.dev: (193.92, 86.70) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 600) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (130, 178, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 538) +[M::mem_pestat] mean and std.dev: (202.91, 96.31) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 674) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.00, 89.14) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 485) +[M::mem_pestat] mean and std.dev: (191.09, 83.10) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 604) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 199, 280) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 586) +[M::mem_pestat] mean and std.dev: (214.70, 108.78) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 739) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.67, 89.39) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 173, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 531) +[M::mem_pestat] mean and std.dev: (198.01, 92.64) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 665) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 176, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 543) +[M::mem_pestat] mean and std.dev: (194.80, 90.96) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 683) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.78, 89.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 185, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 505) +[M::mem_pestat] mean and std.dev: (201.29, 96.07) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 630) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 179, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 472) +[M::mem_pestat] mean and std.dev: (189.08, 82.41) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 587) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 504) +[M::mem_pestat] mean and std.dev: (195.30, 90.29) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 630) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 186, 270) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 558) +[M::mem_pestat] mean and std.dev: (204.25, 96.85) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 702) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (142, 195, 273) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 535) +[M::mem_pestat] mean and std.dev: (215.51, 103.01) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 666) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.91, 89.60) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (133, 171, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 463) +[M::mem_pestat] mean and std.dev: (192.22, 86.62) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 573) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (137, 201, 281) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 569) +[M::mem_pestat] mean and std.dev: (218.09, 107.12) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 713) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.38, 88.39) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 184, 267) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 551) +[M::mem_pestat] mean and std.dev: (201.85, 98.13) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 693) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (109, 176, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 550) +[M::mem_pestat] mean and std.dev: (192.60, 101.00) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 697) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.30, 88.69) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 177, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 484) +[M::mem_pestat] mean and std.dev: (195.15, 91.02) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 602) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (125, 175, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 524) +[M::mem_pestat] mean and std.dev: (200.53, 99.11) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 657) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.69, 89.56) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 185, 279) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 583) +[M::mem_pestat] mean and std.dev: (205.65, 100.49) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 735) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (139, 196, 273) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 541) +[M::mem_pestat] mean and std.dev: (203.96, 92.16) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 675) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 183, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (195.81, 89.95) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 627) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 180, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 529) +[M::mem_pestat] mean and std.dev: (192.11, 90.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 665) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (111, 193, 267) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 579) +[M::mem_pestat] mean and std.dev: (196.44, 102.81) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 735) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.31, 88.92) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (119, 174, 224) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 434) +[M::mem_pestat] mean and std.dev: (182.82, 82.70) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 539) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (107, 162, 227) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 467) +[M::mem_pestat] mean and std.dev: (173.89, 83.78) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 587) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.18, 88.12) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 174, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 518) +[M::mem_pestat] mean and std.dev: (197.61, 99.63) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 649) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (113, 171, 241) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (186.89, 92.44) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (193.65, 88.77) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 173, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (194.22, 85.73) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (133, 196, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (199.39, 80.60) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.80, 89.27) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (112, 175, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 544) +[M::mem_pestat] mean and std.dev: (193.39, 98.22) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 688) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 172, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 508) +[M::mem_pestat] mean and std.dev: (188.13, 88.36) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 637) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.52, 89.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (134, 185, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 458) +[M::mem_pestat] mean and std.dev: (191.60, 84.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 566) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 198, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 525) +[M::mem_pestat] mean and std.dev: (200.12, 87.86) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 659) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.16, 88.32) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (111, 173, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 516) +[M::mem_pestat] mean and std.dev: (186.84, 92.83) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 651) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 185, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 514) +[M::mem_pestat] mean and std.dev: (194.56, 87.25) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 643) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.41, 88.21) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (117, 188, 264) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 558) +[M::mem_pestat] mean and std.dev: (199.57, 98.85) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 705) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 175, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (190.80, 93.13) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.35, 88.94) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 173, 237) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 451) +[M::mem_pestat] mean and std.dev: (189.51, 84.09) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 558) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 170, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 562) +[M::mem_pestat] mean and std.dev: (198.09, 102.39) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 709) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.63, 89.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (133, 187, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (200.56, 92.34) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 172, 234) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 446) +[M::mem_pestat] mean and std.dev: (183.69, 81.11) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 552) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (191.94, 88.94) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 529) +[M::mem_pestat] mean and std.dev: (201.91, 100.03) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 664) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 183, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (194.22, 89.79) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 623) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 492) +[M::mem_pestat] mean and std.dev: (190.84, 88.04) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 615) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 168, 222) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 410) +[M::mem_pestat] mean and std.dev: (177.58, 73.21) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 504) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 181, 271) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 565) +[M::mem_pestat] mean and std.dev: (199.02, 94.08) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 712) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.67, 88.11) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (115, 164, 271) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 583) +[M::mem_pestat] mean and std.dev: (196.49, 102.51) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 739) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (131, 199, 270) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 548) +[M::mem_pestat] mean and std.dev: (212.47, 102.65) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 687) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.34, 88.70) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 182, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 509) +[M::mem_pestat] mean and std.dev: (195.95, 88.78) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 636) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (133, 183, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 541) +[M::mem_pestat] mean and std.dev: (209.00, 107.13) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 677) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.12, 89.14) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 171, 261) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 537) +[M::mem_pestat] mean and std.dev: (193.50, 98.26) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 675) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 179, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 489) +[M::mem_pestat] mean and std.dev: (193.85, 91.42) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 611) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 492) +[M::mem_pestat] mean and std.dev: (190.45, 88.02) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 615) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 185, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 492) +[M::mem_pestat] mean and std.dev: (197.24, 89.44) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (130, 180, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 505) +[M::mem_pestat] mean and std.dev: (196.65, 92.79) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 630) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.32, 88.28) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 173, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 476) +[M::mem_pestat] mean and std.dev: (190.24, 91.48) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 593) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 171, 224) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 434) +[M::mem_pestat] mean and std.dev: (179.00, 75.09) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 539) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.65, 87.98) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (138, 183, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 486) +[M::mem_pestat] mean and std.dev: (196.84, 84.21) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 602) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (131, 176, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 479) +[M::mem_pestat] mean and std.dev: (195.73, 89.96) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 595) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.03, 89.06) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 168, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 505) +[M::mem_pestat] mean and std.dev: (191.46, 92.08) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 634) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 194, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 541) +[M::mem_pestat] mean and std.dev: (206.17, 96.86) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 680) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.26, 89.14) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 193, 280) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 576) +[M::mem_pestat] mean and std.dev: (215.86, 108.53) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 724) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (125, 182, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 488) +[M::mem_pestat] mean and std.dev: (191.88, 89.99) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 609) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 492) +[M::mem_pestat] mean and std.dev: (190.42, 87.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 615) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 166, 238) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 478) +[M::mem_pestat] mean and std.dev: (183.58, 88.73) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 598) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 181, 270) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 572) +[M::mem_pestat] mean and std.dev: (202.05, 100.07) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 723) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.19, 88.44) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (120, 182, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (193.03, 91.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 628) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (125, 178, 261) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 533) +[M::mem_pestat] mean and std.dev: (194.77, 92.96) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 669) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (191.87, 87.74) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 173, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 487) +[M::mem_pestat] mean and std.dev: (189.91, 85.41) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 607) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 175, 253) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 521) +[M::mem_pestat] mean and std.dev: (189.20, 89.75) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 655) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (192.92, 88.79) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (116, 175, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 575) +[M::mem_pestat] mean and std.dev: (201.42, 109.79) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 728) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (130, 187, 274) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 562) +[M::mem_pestat] mean and std.dev: (211.46, 109.23) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 706) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.57, 89.57) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 181, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 562) +[M::mem_pestat] mean and std.dev: (198.54, 97.70) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 709) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (125, 196, 280) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 590) +[M::mem_pestat] mean and std.dev: (212.21, 103.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 745) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.38, 88.07) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 173, 267) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 555) +[M::mem_pestat] mean and std.dev: (191.81, 93.86) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 699) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (113, 170, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 527) +[M::mem_pestat] mean and std.dev: (188.26, 96.34) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 665) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.28, 87.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (120, 180, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 522) +[M::mem_pestat] mean and std.dev: (189.23, 88.05) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 656) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (104, 159, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 554) +[M::mem_pestat] mean and std.dev: (183.67, 97.14) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 704) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (191.62, 87.22) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 172, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 504) +[M::mem_pestat] mean and std.dev: (190.17, 83.53) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 631) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 173, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 529) +[M::mem_pestat] mean and std.dev: (193.00, 88.17) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 664) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.91, 88.22) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 168, 241) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 475) +[M::mem_pestat] mean and std.dev: (184.03, 86.62) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 592) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 184, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 511) +[M::mem_pestat] mean and std.dev: (197.26, 92.28) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 639) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.93, 89.75) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 186, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 524) +[M::mem_pestat] mean and std.dev: (199.73, 94.43) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 657) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (116, 163, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 512) +[M::mem_pestat] mean and std.dev: (186.72, 91.13) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 644) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.01, 88.79) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (120, 173, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 567) +[M::mem_pestat] mean and std.dev: (193.72, 92.37) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 716) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (118, 171, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (182.84, 81.92) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.02, 89.02) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (117, 175, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 531) +[M::mem_pestat] mean and std.dev: (191.74, 94.78) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 669) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (116, 162, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (181.83, 88.11) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.40, 88.29) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 172, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 506) +[M::mem_pestat] mean and std.dev: (192.89, 89.14) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 631) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (120, 188, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 528) +[M::mem_pestat] mean and std.dev: (197.89, 95.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 664) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.77, 88.36) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 187, 260) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 520) +[M::mem_pestat] mean and std.dev: (201.91, 89.34) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 650) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 176, 265) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 547) +[M::mem_pestat] mean and std.dev: (193.60, 96.11) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 688) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.69, 89.27) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (116, 175, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 506) +[M::mem_pestat] mean and std.dev: (188.95, 91.38) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 636) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (133, 180, 267) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 535) +[M::mem_pestat] mean and std.dev: (201.86, 93.62) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 669) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.30, 89.18) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 185, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 505) +[M::mem_pestat] mean and std.dev: (193.73, 86.45) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 633) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (125, 178, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (191.23, 89.64) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (192.83, 89.51) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 624) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 172, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (193.46, 95.83) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (133, 196, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 505) +[M::mem_pestat] mean and std.dev: (200.33, 84.43) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 629) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.53, 88.93) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 179, 253) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 507) +[M::mem_pestat] mean and std.dev: (192.39, 90.44) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 634) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 182, 264) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 546) +[M::mem_pestat] mean and std.dev: (203.23, 106.54) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 687) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (192.83, 88.42) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 177, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 520) +[M::mem_pestat] mean and std.dev: (196.88, 96.66) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 653) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (136, 194, 276) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 556) +[M::mem_pestat] mean and std.dev: (208.66, 96.29) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 696) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.67, 89.47) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 175, 262) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 538) +[M::mem_pestat] mean and std.dev: (199.33, 96.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 676) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 185, 279) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 595) +[M::mem_pestat] mean and std.dev: (212.81, 111.71) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 753) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (191.92, 88.87) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 174, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 508) +[M::mem_pestat] mean and std.dev: (194.10, 93.09) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 635) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (116, 171, 237) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 479) +[M::mem_pestat] mean and std.dev: (184.94, 89.34) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 600) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.17, 88.74) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 181, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 482) +[M::mem_pestat] mean and std.dev: (192.21, 84.31) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 600) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 168, 236) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 470) +[M::mem_pestat] mean and std.dev: (183.63, 89.51) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 587) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (192.81, 89.00) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 162, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (181.34, 88.03) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (132, 190, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 486) +[M::mem_pestat] mean and std.dev: (196.75, 88.78) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 604) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.47, 88.33) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 173, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 485) +[M::mem_pestat] mean and std.dev: (193.06, 85.66) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 604) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (136, 184, 265) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 523) +[M::mem_pestat] mean and std.dev: (204.85, 95.16) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 652) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.69, 89.29) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 183, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 550) +[M::mem_pestat] mean and std.dev: (203.32, 105.57) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 691) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (132, 185, 262) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 522) +[M::mem_pestat] mean and std.dev: (198.64, 93.55) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 652) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (192.75, 89.52) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 624) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (116, 180, 234) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 470) +[M::mem_pestat] mean and std.dev: (186.55, 90.72) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 588) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (118, 162, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 532) +[M::mem_pestat] mean and std.dev: (188.43, 99.29) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 670) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.64, 88.54) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (136, 182, 241) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 451) +[M::mem_pestat] mean and std.dev: (193.38, 84.23) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 556) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 180, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 550) +[M::mem_pestat] mean and std.dev: (198.69, 96.20) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 691) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (193.62, 88.52) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (113, 169, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 539) +[M::mem_pestat] mean and std.dev: (185.60, 90.80) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 681) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 186, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 487) +[M::mem_pestat] mean and std.dev: (195.77, 87.09) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 607) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (193.71, 88.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (138, 183, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (196.08, 87.96) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 195, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 549) +[M::mem_pestat] mean and std.dev: (203.26, 96.48) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 689) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 182, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (195.34, 89.81) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 627) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (145, 198, 271) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 523) +[M::mem_pestat] mean and std.dev: (212.48, 94.88) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 649) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (143, 196, 285) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 569) +[M::mem_pestat] mean and std.dev: (216.87, 104.56) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 711) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.67, 89.88) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (117, 174, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 534) +[M::mem_pestat] mean and std.dev: (195.69, 100.43) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 673) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (126, 180, 261) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 531) +[M::mem_pestat] mean and std.dev: (202.45, 101.58) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 666) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.47, 88.26) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 185, 244) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 470) +[M::mem_pestat] mean and std.dev: (195.07, 85.97) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 583) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 174, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 484) +[M::mem_pestat] mean and std.dev: (191.61, 92.31) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 605) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (193.29, 88.30) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 179, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (190.63, 89.32) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (130, 184, 261) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 523) +[M::mem_pestat] mean and std.dev: (201.10, 96.32) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 654) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (193.78, 88.66) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (133, 186, 260) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 514) +[M::mem_pestat] mean and std.dev: (200.54, 92.97) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 641) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 184, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 525) +[M::mem_pestat] mean and std.dev: (197.58, 95.94) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 659) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (195.05, 90.04) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 179, 274) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 568) +[M::mem_pestat] mean and std.dev: (201.44, 101.74) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 715) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (130, 188, 268) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 544) +[M::mem_pestat] mean and std.dev: (204.93, 96.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 682) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.23, 89.62) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 175, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 514) +[M::mem_pestat] mean and std.dev: (194.24, 91.28) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 644) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 181, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (193.48, 91.32) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 492) +[M::mem_pestat] mean and std.dev: (192.71, 87.64) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 614) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 181, 283) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 587) +[M::mem_pestat] mean and std.dev: (201.94, 98.40) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 739) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 171, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 530) +[M::mem_pestat] mean and std.dev: (187.46, 88.19) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 667) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (193.20, 87.82) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 173, 237) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 475) +[M::mem_pestat] mean and std.dev: (185.53, 88.94) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 594) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (120, 180, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 549) +[M::mem_pestat] mean and std.dev: (200.82, 100.26) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 692) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (194.87, 89.10) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 623) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 196, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 510) +[M::mem_pestat] mean and std.dev: (211.50, 102.52) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 636) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 179, 261) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 545) +[M::mem_pestat] mean and std.dev: (200.67, 100.56) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 687) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 183, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (195.59, 90.23) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 627) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (134, 188, 276) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 560) +[M::mem_pestat] mean and std.dev: (210.50, 97.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 702) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 183, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 547) +[M::mem_pestat] mean and std.dev: (199.22, 93.13) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 689) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.90, 89.57) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (135, 200, 270) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 540) +[M::mem_pestat] mean and std.dev: (207.46, 98.99) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 675) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (131, 182, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 512) +[M::mem_pestat] mean and std.dev: (197.75, 94.07) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 639) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (194.27, 89.26) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (140, 186, 270) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 530) +[M::mem_pestat] mean and std.dev: (206.31, 99.39) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 660) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 184, 264) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 534) +[M::mem_pestat] mean and std.dev: (204.61, 99.69) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 669) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (194.05, 88.81) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (138, 192, 261) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 507) +[M::mem_pestat] mean and std.dev: (204.52, 93.64) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 630) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (130, 190, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 508) +[M::mem_pestat] mean and std.dev: (197.34, 96.02) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 634) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 183, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (195.90, 89.59) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 627) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (143, 186, 264) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 506) +[M::mem_pestat] mean and std.dev: (204.70, 85.72) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 627) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (136, 192, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 535) +[M::mem_pestat] mean and std.dev: (207.93, 98.79) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 668) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 183, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 504) +[M::mem_pestat] mean and std.dev: (195.64, 90.46) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 630) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (135, 190, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 519) +[M::mem_pestat] mean and std.dev: (206.66, 94.31) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 647) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 184, 281) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 585) +[M::mem_pestat] mean and std.dev: (210.84, 106.64) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 737) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 503) +[M::mem_pestat] mean and std.dev: (194.37, 90.03) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 629) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (120, 180, 261) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 543) +[M::mem_pestat] mean and std.dev: (195.11, 94.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 684) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 179, 262) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 544) +[M::mem_pestat] mean and std.dev: (196.85, 92.36) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 685) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (192.98, 89.45) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 196, 272) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 562) +[M::mem_pestat] mean and std.dev: (209.69, 107.16) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 707) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (134, 187, 265) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 527) +[M::mem_pestat] mean and std.dev: (205.26, 95.56) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 658) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (194.09, 89.30) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 183, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 545) +[M::mem_pestat] mean and std.dev: (205.41, 101.94) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 683) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (131, 186, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 545) +[M::mem_pestat] mean and std.dev: (207.55, 97.98) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 683) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 182, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (195.30, 89.86) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 627) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 198, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 531) +[M::mem_pestat] mean and std.dev: (204.44, 96.36) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 665) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 188, 273) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 563) +[M::mem_pestat] mean and std.dev: (211.12, 108.77) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 708) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.62, 90.03) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (117, 178, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (190.27, 91.98) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 188, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 519) +[M::mem_pestat] mean and std.dev: (203.04, 98.95) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 649) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.57, 89.20) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 179, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 484) +[M::mem_pestat] mean and std.dev: (192.35, 87.60) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 603) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (126, 188, 273) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 567) +[M::mem_pestat] mean and std.dev: (201.90, 95.94) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 714) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (193.73, 89.13) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 189, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 543) +[M::mem_pestat] mean and std.dev: (204.10, 95.40) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 680) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (130, 194, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (202.50, 91.89) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (194.29, 88.95) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (116, 165, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (187.14, 92.80) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 200, 295) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 631) +[M::mem_pestat] mean and std.dev: (217.69, 116.61) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 799) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 183, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (195.64, 89.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 627) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (114, 175, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 549) +[M::mem_pestat] mean and std.dev: (197.55, 100.97) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 694) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (134, 186, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 482) +[M::mem_pestat] mean and std.dev: (198.60, 88.49) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 598) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.48, 89.38) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (116, 170, 276) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 596) +[M::mem_pestat] mean and std.dev: (200.90, 110.41) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 756) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (125, 188, 262) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 536) +[M::mem_pestat] mean and std.dev: (204.23, 97.65) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 673) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 177, 244) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 486) +[M::mem_pestat] mean and std.dev: (189.71, 87.24) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 607) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 176, 241) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 481) +[M::mem_pestat] mean and std.dev: (185.80, 83.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 601) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (113, 166, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 512) +[M::mem_pestat] mean and std.dev: (182.47, 90.13) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 645) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 493) +[M::mem_pestat] mean and std.dev: (191.86, 88.04) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 616) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 163, 236) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 472) +[M::mem_pestat] mean and std.dev: (181.81, 86.71) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 590) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (117, 174, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 519) +[M::mem_pestat] mean and std.dev: (192.72, 98.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 653) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.45, 88.06) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 190, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 486) +[M::mem_pestat] mean and std.dev: (197.18, 83.06) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 604) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 169, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 509) +[M::mem_pestat] mean and std.dev: (185.41, 86.77) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 639) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.21, 88.10) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 183, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (194.29, 89.10) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 186, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 517) +[M::mem_pestat] mean and std.dev: (197.54, 92.81) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 648) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (191.99, 88.81) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 192, 275) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 571) +[M::mem_pestat] mean and std.dev: (206.35, 98.33) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 719) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (122, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (193.43, 92.91) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 177, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 489) +[M::mem_pestat] mean and std.dev: (190.28, 87.74) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 611) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 179, 241) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 469) +[M::mem_pestat] mean and std.dev: (193.51, 84.40) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 583) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (135, 196, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 528) +[M::mem_pestat] mean and std.dev: (204.17, 90.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 659) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.32, 88.85) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 194, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 481) +[M::mem_pestat] mean and std.dev: (193.61, 82.15) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 600) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (134, 198, 254) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (201.86, 90.09) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 614) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.50, 89.06) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 194, 283) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 599) +[M::mem_pestat] mean and std.dev: (209.56, 103.80) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 757) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (117, 166, 264) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 558) +[M::mem_pestat] mean and std.dev: (193.67, 101.15) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 705) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.67, 88.53) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 195, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 540) +[M::mem_pestat] mean and std.dev: (199.22, 88.95) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 677) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 173, 238) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 458) +[M::mem_pestat] mean and std.dev: (189.52, 87.23) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 568) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.53, 88.56) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 191, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 479) +[M::mem_pestat] mean and std.dev: (189.47, 82.73) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 597) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 174, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 473) +[M::mem_pestat] mean and std.dev: (194.53, 91.40) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 588) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 177, 244) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 486) +[M::mem_pestat] mean and std.dev: (189.47, 86.81) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 607) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 517) +[M::mem_pestat] mean and std.dev: (195.24, 94.86) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 650) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (134, 185, 261) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 515) +[M::mem_pestat] mean and std.dev: (198.86, 94.69) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 642) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (190.69, 87.48) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 612) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (117, 169, 244) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (191.04, 93.07) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (116, 180, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 509) +[M::mem_pestat] mean and std.dev: (188.19, 91.91) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 640) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (192.30, 87.68) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (113, 169, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 518) +[M::mem_pestat] mean and std.dev: (186.79, 97.31) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 653) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (118, 176, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 535) +[M::mem_pestat] mean and std.dev: (192.31, 93.93) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 674) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (191.88, 87.68) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 175, 219) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 415) +[M::mem_pestat] mean and std.dev: (181.37, 74.70) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 513) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (103, 157, 236) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (177.61, 94.52) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 635) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.72, 88.60) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 177, 244) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 470) +[M::mem_pestat] mean and std.dev: (191.16, 83.35) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 583) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 169, 227) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 435) +[M::mem_pestat] mean and std.dev: (180.34, 83.08) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 539) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 177, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 480) +[M::mem_pestat] mean and std.dev: (188.37, 85.22) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 599) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (120, 159, 230) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 450) +[M::mem_pestat] mean and std.dev: (179.49, 84.47) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 560) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (131, 193, 265) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 533) +[M::mem_pestat] mean and std.dev: (204.89, 92.58) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 667) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 244) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 484) +[M::mem_pestat] mean and std.dev: (189.86, 85.96) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 604) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (110, 169, 233) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 479) +[M::mem_pestat] mean and std.dev: (178.52, 84.56) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 602) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (115, 180, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 538) +[M::mem_pestat] mean and std.dev: (198.26, 96.59) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 679) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 485) +[M::mem_pestat] mean and std.dev: (190.79, 86.25) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 605) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (134, 181, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 461) +[M::mem_pestat] mean and std.dev: (191.34, 81.25) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 570) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (113, 168, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 530) +[M::mem_pestat] mean and std.dev: (182.98, 88.79) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 669) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 485) +[M::mem_pestat] mean and std.dev: (190.51, 86.17) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 605) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (119, 172, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 509) +[M::mem_pestat] mean and std.dev: (187.22, 91.75) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 639) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 160, 239) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 475) +[M::mem_pestat] mean and std.dev: (185.53, 84.77) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 593) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (191.85, 87.78) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 180, 265) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 531) +[M::mem_pestat] mean and std.dev: (197.75, 87.34) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 664) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (120, 168, 223) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 429) +[M::mem_pestat] mean and std.dev: (176.71, 74.61) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 532) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 177, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 483) +[M::mem_pestat] mean and std.dev: (189.05, 86.30) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 603) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 168, 226) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 428) +[M::mem_pestat] mean and std.dev: (184.58, 89.62) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 543) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (115, 161, 224) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 442) +[M::mem_pestat] mean and std.dev: (177.21, 83.86) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 551) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 487) +[M::mem_pestat] mean and std.dev: (190.60, 87.00) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 608) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 527) +[M::mem_pestat] mean and std.dev: (202.28, 101.25) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 661) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (115, 164, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (181.73, 85.89) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 623) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (191.63, 87.43) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 169, 236) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 472) +[M::mem_pestat] mean and std.dev: (184.96, 86.78) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 590) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 177, 239) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 469) +[M::mem_pestat] mean and std.dev: (184.50, 80.71) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 584) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (191.30, 87.21) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 612) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (117, 164, 230) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 456) +[M::mem_pestat] mean and std.dev: (182.02, 84.73) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 569) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 180, 269) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 561) +[M::mem_pestat] mean and std.dev: (202.12, 100.12) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 707) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (191.03, 88.06) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 612) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 183, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 492) +[M::mem_pestat] mean and std.dev: (190.74, 82.77) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 614) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 168, 228) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 442) +[M::mem_pestat] mean and std.dev: (181.82, 80.19) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 549) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (123, 176, 243) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 483) +[M::mem_pestat] mean and std.dev: (188.74, 86.51) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 603) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (117, 175, 244) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (193.38, 93.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (116, 164, 236) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 476) +[M::mem_pestat] mean and std.dev: (182.73, 88.38) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 596) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (190.80, 87.21) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 612) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (116, 173, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 521) +[M::mem_pestat] mean and std.dev: (190.02, 96.36) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 656) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (113, 175, 240) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (188.81, 95.49) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 488) +[M::mem_pestat] mean and std.dev: (191.35, 87.21) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 609) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (118, 176, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 535) +[M::mem_pestat] mean and std.dev: (192.56, 93.66) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 674) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 163, 240) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 482) +[M::mem_pestat] mean and std.dev: (185.14, 84.69) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 603) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 179, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 487) +[M::mem_pestat] mean and std.dev: (190.72, 86.78) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 608) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (116, 158, 236) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 476) +[M::mem_pestat] mean and std.dev: (176.42, 83.12) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 596) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (130, 180, 260) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 520) +[M::mem_pestat] mean and std.dev: (198.93, 94.43) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 650) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.58, 88.82) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 179, 262) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 522) +[M::mem_pestat] mean and std.dev: (195.57, 87.56) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 652) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 174, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 551) +[M::mem_pestat] mean and std.dev: (194.57, 96.18) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 695) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 496) +[M::mem_pestat] mean and std.dev: (192.22, 88.37) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 620) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (135, 189, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (204.28, 95.31) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 615) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (139, 180, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 478) +[M::mem_pestat] mean and std.dev: (193.17, 81.58) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 591) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.17, 88.56) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (138, 191, 274) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 546) +[M::mem_pestat] mean and std.dev: (208.37, 90.55) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 682) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 180, 242) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 470) +[M::mem_pestat] mean and std.dev: (193.12, 84.15) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 584) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.58, 89.20) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 179, 280) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 576) +[M::mem_pestat] mean and std.dev: (208.16, 99.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 724) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (139, 204, 286) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 580) +[M::mem_pestat] mean and std.dev: (219.84, 107.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 727) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 183, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (195.09, 89.08) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 623) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 192, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 534) +[M::mem_pestat] mean and std.dev: (203.60, 97.06) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 668) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 186, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 533) +[M::mem_pestat] mean and std.dev: (200.15, 92.60) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 668) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (193.19, 89.04) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (117, 189, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 531) +[M::mem_pestat] mean and std.dev: (197.84, 98.14) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 669) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (133, 184, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 505) +[M::mem_pestat] mean and std.dev: (197.11, 91.81) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 629) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 497) +[M::mem_pestat] mean and std.dev: (192.82, 88.99) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 621) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (133, 181, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 487) +[M::mem_pestat] mean and std.dev: (199.99, 91.75) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 605) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (131, 183, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 527) +[M::mem_pestat] mean and std.dev: (200.90, 94.08) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 659) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.52, 89.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 191, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 499) +[M::mem_pestat] mean and std.dev: (198.87, 89.13) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 624) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 190, 270) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 552) +[M::mem_pestat] mean and std.dev: (204.94, 93.17) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 693) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 501) +[M::mem_pestat] mean and std.dev: (194.89, 89.74) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 626) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (133, 186, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 532) +[M::mem_pestat] mean and std.dev: (202.29, 91.55) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 665) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 179, 273) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 573) +[M::mem_pestat] mean and std.dev: (202.24, 102.36) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 723) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (127, 183, 252) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 502) +[M::mem_pestat] mean and std.dev: (195.45, 89.70) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 627) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 185, 273) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 557) +[M::mem_pestat] mean and std.dev: (204.52, 100.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 699) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 188, 265) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 557) +[M::mem_pestat] mean and std.dev: (196.37, 88.79) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 703) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (191.83, 87.95) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (121, 178, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 532) +[M::mem_pestat] mean and std.dev: (193.96, 91.42) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 669) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (133, 182, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 478) +[M::mem_pestat] mean and std.dev: (195.60, 87.68) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 593) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (192.06, 87.76) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 177, 251) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 505) +[M::mem_pestat] mean and std.dev: (192.84, 90.28) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 632) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (128, 188, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 518) +[M::mem_pestat] mean and std.dev: (199.09, 87.06) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 648) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (193.76, 88.25) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 180, 256) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 516) +[M::mem_pestat] mean and std.dev: (194.04, 91.30) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 646) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (118, 177, 235) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 469) +[M::mem_pestat] mean and std.dev: (186.79, 87.22) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 586) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (193.55, 87.88) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 177, 236) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 460) +[M::mem_pestat] mean and std.dev: (188.28, 87.19) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 572) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (120, 176, 266) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 558) +[M::mem_pestat] mean and std.dev: (193.52, 102.28) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 704) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.56, 87.81) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (131, 175, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 479) +[M::mem_pestat] mean and std.dev: (191.43, 84.83) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 595) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (127, 180, 253) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 505) +[M::mem_pestat] mean and std.dev: (191.92, 82.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 631) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 178, 244) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 482) +[M::mem_pestat] mean and std.dev: (190.06, 85.50) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 601) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 178, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 483) +[M::mem_pestat] mean and std.dev: (190.68, 84.34) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 601) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (124, 176, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 517) +[M::mem_pestat] mean and std.dev: (194.89, 94.02) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 648) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 180, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 489) +[M::mem_pestat] mean and std.dev: (191.90, 86.78) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 610) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (113, 181, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 515) +[M::mem_pestat] mean and std.dev: (190.94, 93.34) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 649) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (117, 169, 229) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 453) +[M::mem_pestat] mean and std.dev: (179.06, 81.58) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 565) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 180, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 489) +[M::mem_pestat] mean and std.dev: (192.14, 86.77) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 610) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (130, 175, 229) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 427) +[M::mem_pestat] mean and std.dev: (182.78, 77.59) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 526) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (123, 191, 263) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 543) +[M::mem_pestat] mean and std.dev: (203.57, 96.66) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 683) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (193.88, 88.67) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (116, 169, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 539) +[M::mem_pestat] mean and std.dev: (192.79, 94.62) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 680) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (118, 177, 239) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 481) +[M::mem_pestat] mean and std.dev: (190.77, 89.51) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 602) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 179, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 488) +[M::mem_pestat] mean and std.dev: (191.01, 86.83) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 609) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (112, 174, 234) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 478) +[M::mem_pestat] mean and std.dev: (181.62, 86.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 600) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (135, 192, 255) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (202.50, 91.94) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 615) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.33, 87.83) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (128, 174, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 518) +[M::mem_pestat] mean and std.dev: (197.33, 98.33) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 648) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (113, 160, 238) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 488) +[M::mem_pestat] mean and std.dev: (178.98, 80.90) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 181, 249) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 495) +[M::mem_pestat] mean and std.dev: (193.59, 88.40) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 618) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (116, 175, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 506) +[M::mem_pestat] mean and std.dev: (188.42, 88.79) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 636) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 178, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 516) +[M::mem_pestat] mean and std.dev: (195.03, 90.46) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 645) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 182, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 498) +[M::mem_pestat] mean and std.dev: (194.15, 88.80) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 622) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (120, 178, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 534) +[M::mem_pestat] mean and std.dev: (193.44, 97.49) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 672) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (117, 181, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 504) +[M::mem_pestat] mean and std.dev: (189.82, 91.61) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 633) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 246) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 490) +[M::mem_pestat] mean and std.dev: (190.93, 87.55) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 612) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (112, 155, 230) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 466) +[M::mem_pestat] mean and std.dev: (176.28, 82.91) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 584) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (121, 188, 257) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 529) +[M::mem_pestat] mean and std.dev: (202.45, 100.18) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 665) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (124, 178, 245) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 487) +[M::mem_pestat] mean and std.dev: (190.60, 87.13) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 608) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (122, 173, 253) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 515) +[M::mem_pestat] mean and std.dev: (190.43, 92.70) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 646) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (125, 183, 258) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 524) +[M::mem_pestat] mean and std.dev: (196.46, 87.43) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 657) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 248) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 494) +[M::mem_pestat] mean and std.dev: (192.36, 87.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 617) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (126, 183, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 525) +[M::mem_pestat] mean and std.dev: (198.95, 94.51) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 658) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (119, 184, 265) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 557) +[M::mem_pestat] mean and std.dev: (198.91, 98.28) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 703) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 180, 247) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 491) +[M::mem_pestat] mean and std.dev: (192.27, 87.64) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 613) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (132, 192, 262) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 522) +[M::mem_pestat] mean and std.dev: (201.16, 91.78) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 652) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[M::mem_pestat] analyzing insert size distribution for orientation FF... +[M::mem_pestat] (25, 50, 75) percentile: (129, 176, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 519) +[M::mem_pestat] mean and std.dev: (196.53, 94.84) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 649) +[M::mem_pestat] analyzing insert size distribution for orientation FR... +[M::mem_pestat] (25, 50, 75) percentile: (125, 181, 250) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 500) +[M::mem_pestat] mean and std.dev: (193.92, 89.46) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 625) +[M::mem_pestat] skip orientation RF as there are not enough pairs +[M::mem_pestat] analyzing insert size distribution for orientation RR... +[M::mem_pestat] (25, 50, 75) percentile: (129, 188, 259) +[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 519) +[M::mem_pestat] mean and std.dev: (202.99, 94.98) +[M::mem_pestat] low and high boundaries for proper pairs: (1, 649) +[M::mem_pestat] skip orientation FF +[M::mem_pestat] skip orientation RR +[main] Version: 0.7.15-r1140 +[main] CMD: bwa mem -t 16 -R @RG\tID:concatenated_ACC5962A16_XXXXXX_R\tSM:concatenated_ACC5962A16_XXXXXX_R\tPL:ILLUMINAi -M -v 1 /home/hassan.foroughi/repos/BALSAMIC/init/reference/6.0.3/hg19/genome/human_g1k_v37.fasta /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/fastq/concatenated_ACC5962A16_XXXXXX_R_1.fp.fastq.gz /home/hassan.foroughi/repos/BALSAMIC/run_tests/T_panel/analysis/fastq/concatenated_ACC5962A16_XXXXXX_R_2.fp.fastq.gz +[main] Real time: 4113.729 sec; CPU: 61783.647 sec +[bam_sort_core] merging from 128 files and 16 in-memory blocks... +[Wed Dec 30 15:49:02 2020] +Finished job 0. +1 of 1 steps (100%) done diff --git a/tests/test_data/dummy_run_logs/bad_format.h5 b/tests/test_data/dummy_run_logs/bad_format.h5 new file mode 100644 index 000000000..9461bc9fc Binary files /dev/null and b/tests/test_data/dummy_run_logs/bad_format.h5 differ diff --git a/tests/test_data/qc_files/multiqc_picard_HsMetrics.json b/tests/test_data/qc_files/multiqc_picard_HsMetrics.json new file mode 100755 index 000000000..a7db13630 --- /dev/null +++ b/tests/test_data/qc_files/multiqc_picard_HsMetrics.json @@ -0,0 +1,182 @@ +{ + "concatenated_neatlyfastraven_XXXXXX_R": { + "BAIT_SET": "gmslymphoid_7.1_hg19_design.bed", + "GENOME_SIZE": 3101804739.0, + "BAIT_TERRITORY": 1199545.0, + "TARGET_TERRITORY": 1199545.0, + "BAIT_DESIGN_EFFICIENCY": 1.0, + "TOTAL_READS": 44663346.0, + "PF_READS": 44663346.0, + "PF_UNIQUE_READS": 28332639.0, + "PCT_PF_READS": 1.0, + "PCT_PF_UQ_READS": 0.63436, + "PF_UQ_READS_ALIGNED": 28223378.0, + "PCT_PF_UQ_READS_ALIGNED": 0.996144, + "PF_BASES_ALIGNED": 5807803024.0, + "PF_UQ_BASES_ALIGNED": 3656570693.0, + "ON_BAIT_BASES": 3764435751.0, + "NEAR_BAIT_BASES": 671940507.0, + "OFF_BAIT_BASES": 1371426766.0, + "ON_TARGET_BASES": 1387305423.0, + "PCT_SELECTED_BASES": 0.763865, + "PCT_OFF_BAIT": 0.336135, + "ON_BAIT_VS_SELECTED": 0.848538, + "MEAN_BAIT_COVERAGE": 3138.219701, + "MEAN_TARGET_COVERAGE": 901, + "MEDIAN_TARGET_COVERAGE": 1135.0, + "MAX_TARGET_COVERAGE": 6034.0, + "PCT_USABLE_BASES_ON_BAIT": 0.582816, + "PCT_USABLE_BASES_ON_TARGET": 0.214785, + "FOLD_ENRICHMENT": 1676.045951, + "ZERO_CVG_TARGETS_PCT": 0.000639, + "PCT_EXC_DUPE": 0.370404, + "PCT_EXC_MAPQ": 0.028759, + "PCT_EXC_BASEQ": 0.001923, + "PCT_EXC_OVERLAP": 0.190604, + "PCT_EXC_OFF_TARGET": 0.40741, + "FOLD_80_BASE_PENALTY": 1.406809, + "PCT_TARGET_BASES_1X": 0.998522, + "PCT_TARGET_BASES_2X": 0.998522, + "PCT_TARGET_BASES_10X": 0.998506, + "PCT_TARGET_BASES_20X": 0.998502, + "PCT_TARGET_BASES_30X": 0.998489, + "PCT_TARGET_BASES_40X": 0.99848, + "PCT_TARGET_BASES_50X": 0.998473, + "PCT_TARGET_BASES_100X": 0.998329, + "PCT_TARGET_BASES_150X": 0.997938, + "PCT_TARGET_BASES_200X": 0.997481, + "PCT_TARGET_BASES_250X": 0.996963, + "PCT_TARGET_BASES_300X": 0.996278, + "PCT_TARGET_BASES_350X": 0.995179, + "PCT_TARGET_BASES_400X": 0.993483, + "PCT_TARGET_BASES_450X": 0.990721, + "PCT_TARGET_BASES_500X": 0.985945, + "PCT_TARGET_BASES_550X": 0.978217, + "PCT_TARGET_BASES_600X": 0.966796, + "PCT_TARGET_BASES_650X": 0.950037, + "PCT_TARGET_BASES_700X": 0.927646, + "PCT_TARGET_BASES_750X": 0.899417, + "PCT_TARGET_BASES_800X": 0.865841, + "PCT_TARGET_BASES_850X": 0.828958, + "PCT_TARGET_BASES_900X": 0.786791, + "PCT_TARGET_BASES_950X": 0.737412, + "PCT_TARGET_BASES_1000X": 0.678351, + "PCT_TARGET_BASES_1500X": 0.13443, + "PCT_TARGET_BASES_2000X": 0.016783, + "PCT_TARGET_BASES_2500X": 0.001049, + "PCT_TARGET_BASES_3000X": 0.000628, + "PCT_TARGET_BASES_3500X": 0.000469, + "PCT_TARGET_BASES_4000X": 0.000312, + "PCT_TARGET_BASES_4500X": 0.000193, + "PCT_TARGET_BASES_5000X": 0.000128, + "PCT_TARGET_BASES_6000X": 8e-06, + "PCT_TARGET_BASES_7000X": 0.0, + "PCT_TARGET_BASES_8000X": 0.0, + "PCT_TARGET_BASES_9000X": 0.0, + "PCT_TARGET_BASES_10000X": 0.0, + "HS_LIBRARY_SIZE": 14232551.0, + "HS_PENALTY_10X": 3.46089, + "HS_PENALTY_20X": 3.468855, + "HS_PENALTY_30X": 3.48744, + "HS_PENALTY_40X": 3.500714, + "HS_PENALTY_50X": 3.515051, + "HS_PENALTY_100X": 3.585142, + "AT_DROPOUT": 2.137211, + "GC_DROPOUT": 0.454636, + "HET_SNP_SENSITIVITY": 0.311063, + "HET_SNP_Q": 2.0, + "SAMPLE": "", + "LIBRARY": "", + "READ_GROUP": "" + }, + "concatenated_easilyusefulorca_XXXXXX_R": { + "BAIT_SET": "gmslymphoid_7.1_hg19_design.bed", + "GENOME_SIZE": 3101804739.0, + "BAIT_TERRITORY": 1199545.0, + "TARGET_TERRITORY": 1199545.0, + "BAIT_DESIGN_EFFICIENCY": 1.0, + "TOTAL_READS": 29774196.0, + "PF_READS": 29774196.0, + "PF_UNIQUE_READS": 22845102.0, + "PCT_PF_READS": 1.0, + "PCT_PF_UQ_READS": 0.767279, + "PF_UQ_READS_ALIGNED": 22808133.0, + "PCT_PF_UQ_READS_ALIGNED": 0.998382, + "PF_BASES_ALIGNED": 3952917096.0, + "PF_UQ_BASES_ALIGNED": 3022512398.0, + "ON_BAIT_BASES": 2514334741.0, + "NEAR_BAIT_BASES": 503967058.0, + "OFF_BAIT_BASES": 934615297.0, + "ON_TARGET_BASES": 1226151208.0, + "PCT_SELECTED_BASES": 0.763563, + "PCT_OFF_BAIT": 0.336437, + "ON_BAIT_VS_SELECTED": 0.83303, + "MEAN_BAIT_COVERAGE": 2096.073712, + "MEAN_TARGET_COVERAGE": 1022.18025, + "MEDIAN_TARGET_COVERAGE": 1044.0, + "MAX_TARGET_COVERAGE": 5948.0, + "PCT_USABLE_BASES_ON_BAIT": 0.583587, + "PCT_USABLE_BASES_ON_TARGET": 0.284595, + "FOLD_ENRICHMENT": 1644.762897, + "ZERO_CVG_TARGETS_PCT": 0.000639, + "PCT_EXC_DUPE": 0.235372, + "PCT_EXC_MAPQ": 0.032372, + "PCT_EXC_BASEQ": 0.003173, + "PCT_EXC_OVERLAP": 0.210951, + "PCT_EXC_OFF_TARGET": 0.51776, + "FOLD_80_BASE_PENALTY": 1.415434, + "PCT_TARGET_BASES_1X": 0.99848, + "PCT_TARGET_BASES_2X": 0.998479, + "PCT_TARGET_BASES_10X": 0.998469, + "PCT_TARGET_BASES_20X": 0.998464, + "PCT_TARGET_BASES_30X": 0.998457, + "PCT_TARGET_BASES_40X": 0.998449, + "PCT_TARGET_BASES_50X": 0.998438, + "PCT_TARGET_BASES_100X": 0.998372, + "PCT_TARGET_BASES_150X": 0.998023, + "PCT_TARGET_BASES_200X": 0.997447, + "PCT_TARGET_BASES_250X": 0.996915, + "PCT_TARGET_BASES_300X": 0.996274, + "PCT_TARGET_BASES_350X": 0.995137, + "PCT_TARGET_BASES_400X": 0.993448, + "PCT_TARGET_BASES_450X": 0.990445, + "PCT_TARGET_BASES_500X": 0.98442, + "PCT_TARGET_BASES_550X": 0.974237, + "PCT_TARGET_BASES_600X": 0.958569, + "PCT_TARGET_BASES_650X": 0.937245, + "PCT_TARGET_BASES_700X": 0.910187, + "PCT_TARGET_BASES_750X": 0.87714, + "PCT_TARGET_BASES_800X": 0.837864, + "PCT_TARGET_BASES_850X": 0.791489, + "PCT_TARGET_BASES_900X": 0.735958, + "PCT_TARGET_BASES_950X": 0.667805, + "PCT_TARGET_BASES_1000X": 0.585219, + "PCT_TARGET_BASES_1500X": 0.013922, + "PCT_TARGET_BASES_2000X": 0.001017, + "PCT_TARGET_BASES_2500X": 0.000645, + "PCT_TARGET_BASES_3000X": 0.000434, + "PCT_TARGET_BASES_3500X": 0.000305, + "PCT_TARGET_BASES_4000X": 0.000214, + "PCT_TARGET_BASES_4500X": 0.00016, + "PCT_TARGET_BASES_5000X": 8.3e-05, + "PCT_TARGET_BASES_6000X": 0.0, + "PCT_TARGET_BASES_7000X": 0.0, + "PCT_TARGET_BASES_8000X": 0.0, + "PCT_TARGET_BASES_9000X": 0.0, + "PCT_TARGET_BASES_10000X": 0.0, + "HS_LIBRARY_SIZE": 17511296.0, + "HS_PENALTY_10X": 3.00468, + "HS_PENALTY_20X": 3.015127, + "HS_PENALTY_30X": 3.022091, + "HS_PENALTY_40X": 3.030796, + "HS_PENALTY_50X": 3.042287, + "HS_PENALTY_100X": 3.084072, + "AT_DROPOUT": 3.936864, + "GC_DROPOUT": 0.127717, + "HET_SNP_SENSITIVITY": 0.396359, + "HET_SNP_Q": 2.0, + "SAMPLE": "", + "LIBRARY": "", + "READ_GROUP": "" + } +} diff --git a/tests/test_data/references/genome/cancer_rank_model_-v0.1-.ini b/tests/test_data/references/genome/cancer_rank_model_-v0.1-.ini new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_data/references/panel/background_variants.txt b/tests/test_data/references/panel/background_variants.txt new file mode 100644 index 000000000..a02036b27 --- /dev/null +++ b/tests/test_data/references/panel/background_variants.txt @@ -0,0 +1,16 @@ +3 178952085 A G +7 55242464 AGGAATTAAGAGAAGC A +7 55242469 TTAAGAGAAGCAACATCTC T +7 55242469 TTAAGAGAAGCAACATCTC TT +7 55242483 ATCTCCGAAAGCCAACAAGGAAATC A +7 55249071 C T +7 55259515 T G +7 140453136 A T +12 25380275 T G +12 25380275 T G +12 25398284 C T +12 25398285 C A +12 25398284 C T +12 25398285 C A +1 115256529 T C +14 105246551 C T diff --git a/tests/test_data/references/reference.json b/tests/test_data/references/reference.json index 75de6014c..abb131953 100644 --- a/tests/test_data/references/reference.json +++ b/tests/test_data/references/reference.json @@ -6,6 +6,7 @@ "1kg_snps_high": "tests/test_data/references/variants/1kg_phase1_snps_high_confidence_b37.vcf.gz", "1kg_known_indel": "tests/test_data/references/variants/1kg_known_indels_b37.vcf.gz", "mills_1kg": "tests/test_data/references/variants/mills_1kg_index.vcf.gz", + "gnomad_variant": "tests/test_data/reference/variants/gnomad.genomes.r2.1.1.sites.vcf.bgz", "cosmic": "tests/test_data/references/variants/cosmic_coding_muts_v89.vcf.gz", "vep": "tests/test_data/references/vep/", "refflat": "tests/test_data/references/genome/refseq.flat", diff --git a/tests/test_data/references/tumorlod.json b/tests/test_data/references/tumorlod.json new file mode 100644 index 000000000..590291a9d --- /dev/null +++ b/tests/test_data/references/tumorlod.json @@ -0,0 +1,3 @@ +{ + "tumorlod": "0.5", +} diff --git a/tests/test_data/references/variants/gnomad.genomes.r2.1.1.sites.vcf.bgz b/tests/test_data/references/variants/gnomad.genomes.r2.1.1.sites.vcf.bgz new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_data/vcf_tables/test_createVCF_output.vcf.gz b/tests/test_data/vcf_tables/test_createVCF_output.vcf.gz index 16a079dbf..7d33d0614 100644 --- a/tests/test_data/vcf_tables/test_createVCF_output.vcf.gz +++ b/tests/test_data/vcf_tables/test_createVCF_output.vcf.gz @@ -1,5 +1,5 @@ ##fileformat=VCFv4.2 -##fileDate=20201210 +##fileDate=20201217 ##source=NA ##reference=NA ##contig= diff --git a/tests/test_workflow.py b/tests/test_workflow.py index 46320eb03..d75073756 100644 --- a/tests/test_workflow.py +++ b/tests/test_workflow.py @@ -1,12 +1,13 @@ -#! /usr/bin/env python3 -# syntax=python tabstop=4 expandtab - +from unittest import mock import snakemake from BALSAMIC.utils.cli import get_snakefile +MOCKED_OS_ENVIRON = 'os.environ' + -def test_workflow_tumor_normal(tumor_normal_config): +def test_workflow_tumor_normal(tumor_normal_config, sentieon_install_dir, + sentieon_license): # GIVEN a sample config dict and snakefile workflow = 'paired' snakefile = get_snakefile(workflow) @@ -14,12 +15,18 @@ def test_workflow_tumor_normal(tumor_normal_config): # WHEN invoking snakemake module with dryrun option # THEN it should return true - assert snakemake.snakemake(snakefile, - configfiles=[config_json], - dryrun=True) + with mock.patch.dict( + MOCKED_OS_ENVIRON, { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + assert snakemake.snakemake(snakefile, + configfiles=[config_json], + dryrun=True) -def test_workflow_tumor_only(tumor_only_config): +def test_workflow_tumor_only(tumor_only_config, sentieon_install_dir, + sentieon_license): # GIVEN a sample config dict and snakefile workflow = 'single' snakefile = get_snakefile(workflow) @@ -27,25 +34,37 @@ def test_workflow_tumor_only(tumor_only_config): # WHEN invoking snakemake module with dryrun option # THEN it should return true - assert snakemake.snakemake(snakefile, - configfiles=[config_json], - dryrun=True) + with mock.patch.dict( + MOCKED_OS_ENVIRON, { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + assert snakemake.snakemake(snakefile, + configfiles=[config_json], + dryrun=True) -def test_workflow_qc(tumor_normal_config, tumor_only_config): +def test_workflow_qc(tumor_normal_config, tumor_only_config, + sentieon_install_dir, sentieon_license): # GIVEN a sample config dict and snakefile workflow = 'qc' snakefile = get_snakefile(workflow) # WHEN invoking snakemake module with dryrun option # THEN it should return true - for config_json in (tumor_normal_config, tumor_only_config): - assert snakemake.snakemake(snakefile, - configfiles=[config_json], - dryrun=True) + with mock.patch.dict( + MOCKED_OS_ENVIRON, { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + for config_json in (tumor_normal_config, tumor_only_config): + assert snakemake.snakemake(snakefile, + configfiles=[config_json], + dryrun=True) -def test_workflow_sentieon(tumor_normal_wgs_config, tumor_only_wgs_config): +def test_workflow_sentieon(tumor_normal_wgs_config, tumor_only_wgs_config, + sentieon_install_dir, sentieon_license): # GIVEN a sample config dict and snakefile workflows = [('single', tumor_only_wgs_config), ('paired', tumor_normal_wgs_config)] @@ -53,10 +72,35 @@ def test_workflow_sentieon(tumor_normal_wgs_config, tumor_only_wgs_config): # WHEN invoking snakemake module with dryrun option # THEN it should return true - for workflow in workflows: - analysis_type = workflow[0] - config = workflow[1] - snakefile = get_snakefile(analysis_type, sequencing_type) + with mock.patch.dict( + MOCKED_OS_ENVIRON, { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): + for workflow in workflows: + analysis_type = workflow[0] + config = workflow[1] + snakefile = get_snakefile(analysis_type, sequencing_type) + assert snakemake.snakemake(snakefile, + configfiles=[config], + dryrun=True) + + +def test_umiworkflow_tumor_only(tumor_only_umi_config, sentieon_install_dir, + sentieon_license): + # GIVEN a sample config dict and snakefile + workflow = 'umi' + snakefile = get_snakefile(workflow) + config_json = tumor_only_umi_config + config_tumorlod = "tests/test_data/references/tumorlod.json" + + # WHEN invoking snakemake module with dryrun option + # THEN it should return true + with mock.patch.dict( + MOCKED_OS_ENVIRON, { + 'SENTIEON_LICENSE': sentieon_license, + 'SENTIEON_INSTALL_DIR': sentieon_install_dir + }): assert snakemake.snakemake(snakefile, - configfiles=[config], + configfiles=[config_json, config_tumorlod], dryrun=True) diff --git a/tests/utils/test_models.py b/tests/utils/test_models.py index 479ce2c7e..19d894371 100644 --- a/tests/utils/test_models.py +++ b/tests/utils/test_models.py @@ -5,17 +5,10 @@ from pydantic import ValidationError from BALSAMIC.utils.models import ( - VCFAttributes, - VarCallerFilter, - QCModel, - VarcallerAttribute, - VCFModel, - AnalysisModel, - SampleInstanceModel, - BioinfoToolsModel, - ReferenceUrlsModel, - ReferenceMeta, -) + VCFAttributes, VarCallerFilter, QCModel, VarcallerAttribute, AnalysisModel, + SampleInstanceModel, ReferenceUrlsModel, ReferenceMeta, UMIworkflowConfig, + UMIParamsCommon, UMIParamsUMIextract, UMIParamsConsensuscall, + UMIParamsTNscope, UMIParamsVardict, UMIParamsVEP) def test_referencemeta(): @@ -207,6 +200,11 @@ def test_varcallerfilter(): "filter_name": "dummy_depth", "field": "INFO" }, + "pop_freq": { + "tag_value": 0.005, + "filter_name": "dummy_pop_freq", + "field": "INFO" + }, "varcaller_name": "dummy_varcaller", "filter_type": "dummy_ffpe_filter", "analysis_type": "dummy_tumor_only", @@ -281,3 +279,100 @@ def test_sample_instance_model(): with pytest.raises(ValueError) as excinfo: SampleInstanceModel.parse_obj(invalid_args) assert "not supported" in excinfo.value + + +def test_umiparams_common(): + """ test UMIParamsCommon model for correct validation """ + + # GIVEN a UMI workflow common params + test_commonparams = { + "align_header": "test_header_name", + "align_intbases": 100, + "filter_tumor_af": 0.01 + } + # WHEN building the model + test_commonparams_built = UMIParamsCommon(**test_commonparams) + # THEN assert values + assert test_commonparams_built.align_header == "test_header_name" + assert test_commonparams_built.filter_tumor_af == 0.01 + assert test_commonparams_built.align_intbases == 100 + + +def test_umiparams_umiextract(): + """ test UMIParamsUMIextract model for correct validation """ + # GIVEN umiextract params + test_umiextractparams = {"read_structure": "['mode', 'r1,r2']"} + + # WHEN building the model + test_umiextractparams_built = UMIParamsUMIextract(**test_umiextractparams) + + # THEN assert values + assert test_umiextractparams_built.read_structure == "['mode', 'r1,r2']" + + +def test_umiparams_consensuscall(): + """ test UMIParamsConsensuscall model for correct validation """ + + #GIVEN consensuscall params + test_consensuscall = { + "align_format": "BAM", + "filter_minreads": "6,3,3", + "tag": "XZ" + } + + #WHEN building the model + test_consensuscall_built = UMIParamsConsensuscall(**test_consensuscall) + + #THEN assert values + assert test_consensuscall_built.align_format == "BAM" + assert test_consensuscall_built.filter_minreads == "6,3,3" + assert test_consensuscall_built.tag == "XZ" + + +def test_umiparams_tnscope(): + """ test UMIParamsTNscope model for correct validation """ + + #GIVEN tnscope params + test_tnscope_params = { + "algo": "algoname", + "min_tumorLOD": 6, + "error_rate": 5, + "prunefactor": 3, + "disable_detect": "abc" + } + + #WHEN building the model + test_tnscope_params_built = UMIParamsTNscope(**test_tnscope_params) + + #THEN assert values + assert test_tnscope_params_built.algo == "algoname" + assert test_tnscope_params_built.min_tumorLOD == 6 + assert test_tnscope_params_built.error_rate == 5 + assert test_tnscope_params_built.prunefactor == 3 + assert test_tnscope_params_built.disable_detect == "abc" + + +def test_umiparams_vardict(): + """ test UMIParamsVardict model for correct validation""" + + #GIVEN vardict params + test_umivardict = {"vardict_filters": "-a 1 -b 2 -c 5"} + + #WHEN building the model + test_umivardict_built = UMIParamsVardict(**test_umivardict) + + #THEN assert values + assert test_umivardict_built.vardict_filters == "-a 1 -b 2 -c 5" + + +def test_umiparams_vep(): + """ test UMIParamsVEP model for correct validation""" + + #GIVEN vardict params + test_umivep = {"vep_filters": "all defaults params"} + + #WHEN building the model + test_umivep_built = UMIParamsVEP(**test_umivep) + + #THEN assert values + assert test_umivep_built.vep_filters == "all defaults params" diff --git a/tests/utils/test_qc_check.py b/tests/utils/test_qc_check.py new file mode 100644 index 000000000..ce54dbb35 --- /dev/null +++ b/tests/utils/test_qc_check.py @@ -0,0 +1,92 @@ +import json + +from pathlib import Path + +from BALSAMIC.utils.cli import write_json +from BALSAMIC.utils.qc_check import read_hs_metrics, read_qc_table, check_qc_criteria, write_output +from BALSAMIC.utils.qc_check import get_bait_name, get_sample_name, get_qc_criteria, failed_qc +from BALSAMIC.utils.constants import HSMETRICS_QC_CHECK + + +def test_read_hs_metrics(): + # GIVEN the file exist + hs_metrics_path = "tests/test_data/qc_files/multiqc_picard_HsMetrics.json" + + # WHEN reading the file + df = read_hs_metrics(hs_metrics_path) + + # THEN check if the file contains any values + bol_list = df.any().tolist() + for n in range(len(bol_list)): + assert bol_list[n], "No values exists" + + +def test_read_qc_table(): + # GIVEN the file exist + from BALSAMIC.utils.constants import HSMETRICS_QC_CHECK + + # WHEN reading the file + df = read_qc_table(HSMETRICS_QC_CHECK) + + # THEN check if the file contains any values + bol_list = df.any().tolist() + for n in range(len(bol_list)): + assert bol_list[n], "No values exists" + + +def test_get_bait_and_sample_name(tumor_normal_config): + # GIVEN the file exists + # WHEN reading the file + bed = get_bait_name(tumor_normal_config) + sample_name = get_sample_name(tumor_normal_config) + + # THEN check if bed is string format and if sample name exists + assert isinstance(bed, str), "bed is not in string format" + assert sample_name[0], "sample name doesn't exist" + + +def test_get_qc_criteria(): + # GIVEN following df and bed + df_qc = read_qc_table(HSMETRICS_QC_CHECK) + bed = "gmcksolid_4.1_hg19_design.bed" + + # WHEN reading the function + criteria_df = get_qc_criteria(df_qc, bed) + + # THEN check if df has two columns + nr_of_columns = list(criteria_df.columns) + assert len(nr_of_columns) == 2, "number of columns != 2" + + +def test_check_qc_criteria_output_csv_and_qc(tmp_path, tumor_normal_config): + # GIVEN following an output_path, an hs_metrics file, and a config_json with a matching bed name + test_new_dir = tmp_path / "check_qc_results" + test_new_dir.mkdir() + output_path = test_new_dir / "output.csv" + new_config_json_file = Path( + test_new_dir / "new_config_tumor_normal.json").as_posix() + + with open(tumor_normal_config, 'r') as f: + new_config_json = json.load(f) + new_config_json["panel"][ + "capture_kit"] = "dummy_path/to/capture_kit/gmcksolid_4.1_hg19_design.bed" + write_json(new_config_json, new_config_json_file) + + hs_metrics = "tests/test_data/qc_files/multiqc_picard_HsMetrics.json" + + qc_criteria_df = get_qc_criteria(read_qc_table(HSMETRICS_QC_CHECK), + get_bait_name(new_config_json_file)) + hs_metrics_df = read_hs_metrics(hs_metrics) + sample_names = get_sample_name(new_config_json_file) + + # WHEN calling the functions + extract_criteria = check_qc_criteria(qc_criteria_df, hs_metrics_df, + sample_names[0], sample_names[1]) + write_output(extract_criteria, output_path) + qc_check = failed_qc(extract_criteria, sample_names[0], sample_names[1]) + + # THEN check if the output df has 3 indexes, if csv-file exists and if qc check is string + nr_of_indexes = list(extract_criteria.index) + assert len(nr_of_indexes) == 3 + assert output_path.exists(), "File doesn't exists" + assert isinstance(qc_check, str), "qc_check not string" diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 0d3b34ab9..bec70a7fa 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -1,5 +1,5 @@ import json -import os +import subprocess import pytest import sys import copy @@ -11,48 +11,26 @@ from pathlib import Path +from BALSAMIC import __version__ as balsamic_version from BALSAMIC.utils.exc import BalsamicError, WorkflowRunError -from BALSAMIC.utils.constants import CONDA_ENV_PATH +from BALSAMIC.utils.constants import CONTAINERS_CONDA_ENV_PATH +from BALSAMIC.utils.constants import BIOINFO_TOOL_ENV from BALSAMIC.utils.constants import REFERENCE_FILES from BALSAMIC.utils.cli import ( - SnakeMake, - CaptureStdout, - iterdict, - get_snakefile, - createDir, - write_json, - get_config, - recursive_default_dict, - convert_defaultdict_to_regular_dict, - get_file_status_string, - get_from_two_key, - find_file_index, - merge_json, - validate_fastq_pattern, - get_panel_chrom, - get_bioinfo_tools_list, - create_fastq_symlink, - get_fastq_bind_path, - singularity, - get_file_extension, - convert_deliverables_tags, -) - -from BALSAMIC.utils.rule import ( - get_chrom, - get_vcf, - get_sample_type, - get_conda_env, - get_picard_mrkdup, - get_variant_callers, - get_script_path, - get_result_dir, - get_threads, - get_delivery_id, - get_reference_output_files, -) + SnakeMake, CaptureStdout, iterdict, get_snakefile, createDir, write_json, + get_config, recursive_default_dict, convert_defaultdict_to_regular_dict, + get_file_status_string, get_from_two_key, find_file_index, merge_json, + validate_fastq_pattern, get_panel_chrom, create_fastq_symlink, + get_fastq_bind_path, singularity, get_file_extension, + get_bioinfo_tools_version, convert_deliverables_tags, check_executable, + job_id_dump_to_yaml, generate_h5) + +from BALSAMIC.utils.rule import (get_chrom, get_vcf, get_sample_type, + get_picard_mrkdup, get_variant_callers, + get_script_path, get_result_dir, get_threads, + get_delivery_id, get_reference_output_files) def test_get_variant_callers_wrong_analysis_type(tumor_normal_config): @@ -143,15 +121,14 @@ def test_get_reference_output_files(): def test_get_bioinfo_tools_list(): - # GIVEN a path for conda env files - conda_env_path = CONDA_ENV_PATH - + # GIVEN a path for container path and bioinfo tool dictionary # WHEN getting dictionary of bioinformatic tools and their version - bioinfo_tools_dict = get_bioinfo_tools_list(conda_env_path) + bioinfo_tools_dict = get_bioinfo_tools_version(BIOINFO_TOOL_ENV, + CONTAINERS_CONDA_ENV_PATH) # THEN assert it is a dictionary and versions are correct assert isinstance(bioinfo_tools_dict, dict) - assert bioinfo_tools_dict["cnvkit"] == "0.9.4" + assert set(bioinfo_tools_dict["samtools"]) == set(["1.11", "1.9"]) def test_get_delivery_id(): @@ -223,12 +200,10 @@ def test_convert_defaultdict_to_regular_dict(): assert "key_2" in test_dict["key_1"] -def test_iterdict(config_files): +def test_iterdict(reference): """ GIVEN a dict for iteration """ - test_dict = json.load(open(config_files["test_reference"], "r")) - # WHEN passing dict to this function - dict_gen = iterdict(test_dict) + dict_gen = iterdict(reference) # THEN it will create dict generator, we can iterate it, get the key, values as string for key, value in dict_gen: @@ -278,6 +253,7 @@ def test_snakemake_slurm(): snakemake_slurm.mail_type = "FAIL" snakemake_slurm.mail_user = "john.doe@example.com" snakemake_slurm.sm_opt = ("containers", ) + snakemake_slurm.quiet = True snakemake_slurm.use_singularity = True snakemake_slurm.singularity_bind = ["path_1", "path_2"] snakemake_slurm.run_analysis = True @@ -285,7 +261,6 @@ def test_snakemake_slurm(): # WHEN calling the build command shell_command = snakemake_slurm.build_cmd() - # print(shell_command) # THEN constructing snakecommand for slurm runner assert isinstance(shell_command, str) assert "worflow/variantCalling_paired" in shell_command @@ -295,6 +270,7 @@ def test_snakemake_slurm(): assert "sbatch.py" in shell_command assert "test_case" in shell_command assert "containers" in shell_command + assert "--quiet" in shell_command def test_get_script_path(): @@ -327,15 +303,11 @@ def test_get_snakefile(): snakefile = get_snakefile(analysis_type, sequencing_type) pipeline = "" - if sequencing_type == "targeted": - pipeline = "BALSAMIC/workflows/VariantCalling.smk" - elif sequencing_type == "wgs": - pipeline = "BALSAMIC/workflows/VariantCalling_sentieon.smk" - elif analysis_type == "qc": - pipeline = "BALSAMIC/workflows/Alignment.smk" - elif analysis_type == "generate_ref": - pipeline = "BALSAMIC/workflows/GenerateRef" - elif analysis_type == "umi": + if sequencing_type in ['targeted', 'wgs', 'qc']: + pipeline = "BALSAMIC/workflows/balsamic.smk" + elif analysis_type == 'generate_ref': + pipeline = "BALSAMIC/workflows/reference.smk" + elif analysis_type == 'umi': pipeline = "BALSAMIC/workflows/UMIworkflow.smk" # THEN it should return the snakefile path @@ -443,29 +415,6 @@ def test_get_result_dir(sample_config): assert get_result_dir(sample_config) == sample_config["analysis"]["result"] -def test_get_conda_env_found(tmp_path): - # GIVEN a balsamic_env yaml - balsamic_env = "BALSAMIC/config/balsamic_env.yaml" - - # WHEN passing pkg name with this yaml file - conda_env = get_conda_env(balsamic_env, "cnvkit") - - # THEN It should return the conda env which has that pkg - assert conda_env == "varcall_cnvkit" - - -def test_get_conda_env_not_found(tmp_path): - # GIVEN a balsamic_env yaml - balsamic_env = "BALSAMIC/config/balsamic_env.yaml" - bioinfo_tool = "unknown_package" - error_msg = f"Installed package {bioinfo_tool} was not found in {balsamic_env}" - - # WHEN passing pkg name with this yaml file - # THEN It should return the conda env which has that pkg - with pytest.raises(KeyError, match=error_msg): - get_conda_env(balsamic_env, "unknown_package") - - def test_capturestdout(): # GIVEN a catpurestdout context test_stdout_message = "Message to stdout" @@ -494,27 +443,25 @@ def test_get_config_wrong_config(): assert get_config(config_file) -def test_write_json(tmp_path, config_files): - # GIVEN a dict from sample json file (reference.json) - ref_json = json.load(open(config_files["reference"], "r")) - +def test_write_json(tmp_path, reference): + # GIVEN a dict from sample json file tmp = tmp_path / "tmp" tmp.mkdir() output_json = tmp / "output.json" # WHEN passing dict and file name - write_json(ref_json, output_json) + write_json(reference, output_json) output = output_json.read_text() # THEN It will create a json file with given dict - for key, value in iterdict(ref_json): + for key, value in iterdict(reference): assert key in output assert value in output assert len(list(tmp.iterdir())) == 1 -def test_write_json_error(tmp_path, config_files): +def test_write_json_error(tmp_path): with pytest.raises(Exception, match=r"Is a directory"): # GIVEN a invalid dict ref_json = {"path": "this_path", "reference": ""} @@ -600,7 +547,7 @@ def test_find_file_index(tmpdir): assert str(bai_file_2) in result -def test_singularity_shellcmd(singularity_container): +def test_singularity_shellcmd(balsamic_cache): """test singularity shell cmd """ @@ -610,16 +557,16 @@ def test_singularity_shellcmd(singularity_container): dummy_path_2 = "this_path/path2" correct_shellcmd = "exec --bind {} --bind {} ls this_path".format( dummy_path_1, dummy_path_2) + singularity_container_sif = Path(balsamic_cache, balsamic_version, "containers", "align_qc", + "example.sif").as_posix() with mock.patch.object(shutil, "which") as mocked: mocked.return_value = "/my_home/binary_path/singularity" # WHEN building singularity command - shellcmd = singularity( - sif_path=singularity_container, - cmd=dummy_command, - bind_paths=[dummy_path_1, dummy_path_2], - ) + shellcmd = singularity(sif_path=singularity_container_sif, + cmd=dummy_command, + bind_paths=[dummy_path_1, dummy_path_2]) # THEN successfully return a correct singularity cmd assert correct_shellcmd in shellcmd @@ -648,7 +595,7 @@ def test_singularity_shellcmd_sif_not_exist(): bind_paths=[dummy_path_1, dummy_path_2]) -def test_singularity_shellcmd_cmd_not_exist(singularity_container): +def test_singularity_shellcmd_cmd_not_exist(): """test singularity shell cmd with nonexisting singularity command """ @@ -657,6 +604,7 @@ def test_singularity_shellcmd_cmd_not_exist(singularity_container): error_msg = "singularity command does not exist" dummy_path_1 = "this_path/path1" dummy_path_2 = "this_path/path2" + singularity_container_sif = "some_path/container.sif" # WHEN building singularity command # THEN successfully get error if singualrity command doesn't exist @@ -665,21 +613,17 @@ def test_singularity_shellcmd_cmd_not_exist(singularity_container): match=error_msg): mocked.return_value = None - singularity( - sif_path=singularity_container, - cmd=dummy_command, - bind_paths=[dummy_path_1, dummy_path_2], - ) + singularity(sif_path=singularity_container_sif, + cmd=dummy_command, + bind_paths=[dummy_path_1, dummy_path_2]) -def test_merge_json(config_files): +def test_merge_json(reference, config_files): # GIVEN a dict and json file - ref_dict = json.load(open(config_files["reference"], "r")) - json_file = config_files["sample"] # WHEN passing dict and json file to merge - merge_dict = merge_json(ref_dict, json_file) + merge_dict = merge_json(reference, json_file) # THEN It will merge both the data and return dict assert isinstance(merge_dict, dict) @@ -687,15 +631,14 @@ def test_merge_json(config_files): assert "reference" in merge_dict -def test_merge_json_error(config_files): +def test_merge_json_error(reference): with pytest.raises(Exception, match=r"No such file or directory"): # GIVEN a dict and invalid json file path - ref_dict = json.load(open(config_files["reference"], "r")) json_file = "reference.json" # WHEN passing python dict and invalid json path # THEN it should throw OSError as FileNotFoundError - assert merge_json(ref_dict, json_file) + assert merge_json(reference, json_file) def test_validate_fastq_pattern(): @@ -821,3 +764,76 @@ def test_convert_deliverables_tags(): for file in delivery_json["files"]: assert file["id"] == "ACC1" assert "ACC1" in file["tag"] + + +def test_check_executable_exists(): + + # GIVEN an existing executable command + test_command = "ls" + + # WHEN calling check_executable + # THEN it should return True + assert check_executable(test_command) + + +def test_check_executable_not_existing(): + + # GIVEN an existing executable command + test_command = "twenty_twenty_was_bad" + + # WHEN calling check_executable + # THEN it should return True + assert not check_executable(test_command) + + +def test_job_id_dump_to_yaml(tmp_path): + + # GIVEN a file with one job id per line, a key (case name), and an output file name + dummy_dir = tmp_path / "job_id_dump_dir" + dummy_dir.mkdir() + dummy_job_id_dump = dummy_dir / "jod_id.dump" + dummy_job_id_dump.write_text("01234\n56789") + + dummy_name = "angrybird" + + dummy_yaml_out = dummy_dir / "jod_id.yaml" + + # WHEN creating yaml from job id dump + job_id_dump_to_yaml(dummy_job_id_dump, dummy_yaml_out, dummy_name) + + # THEN file should exist + assert dummy_yaml_out.exists() + + +def test_generate_h5(tmp_path): + + # GIVEN a job name, a path, and a job id + dummy_path = tmp_path / "h5dir" + dummy_path.mkdir() + dummy_job_name = "awesome_name" + dummy_job_id = "31415.123123" + correct_output = Path(dummy_path, dummy_job_name + ".h5") + + # WHEN generating a h5 output + with mock.patch.object(subprocess, 'check_output') as mocked: + actual_output = generate_h5(dummy_job_name, dummy_job_id, dummy_path) + + assert actual_output == correct_output + + +def test_generate_h5_capture_no_output(tmp_path): + + # GIVEN a job name, a path, and a job id + dummy_path = tmp_path / "h5dir" + dummy_path.mkdir() + dummy_job_name = "awesome_name" + dummy_job_id = "31415.123123" + mocked_output = "sh5util: No node-step files found for jobid" + correct_output = Path(dummy_path, dummy_job_name + ".h5") + + # WHEN generating a h5 output + with mock.patch.object(subprocess, 'check_output') as mocked: + mocked.return_value = mocked_output.encode("utf-8") + actual_output = generate_h5(dummy_job_name, dummy_job_id, dummy_path) + + assert actual_output == None diff --git a/tests/utils/test_workflowscripts.py b/tests/utils/test_workflowscripts.py new file mode 100644 index 000000000..39727ab8e --- /dev/null +++ b/tests/utils/test_workflowscripts.py @@ -0,0 +1,37 @@ +from unittest import mock +from pathlib import Path +import pytest + +from BALSAMIC.utils.cli import generate_h5 +from BALSAMIC.utils.workflowscripts import plot_analysis + +def test_plot_analysis(tmp_path_factory): + # GIVEN a dummy log file + dummy_log_file = Path( + "tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.sh_31415926535.err" + ) + dummy_h5 = "tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.h5" + dummy_path = tmp_path_factory.mktemp("dummy_pdf_path") + dummy_pdf_name = dummy_path / "BALSAMIC.T_panel.bwa_mem.123.pdf" + dummy_pdf_name.touch() + + # WHEN calling plot_analysis + actual_pdf_file = plot_analysis(dummy_log_file, dummy_h5, dummy_pdf_name) + + assert Path(actual_pdf_file).exists() + + +def test_plot_analysis_bad_h5(tmp_path_factory): + # GIVEN a dummy log file + dummy_log_file = Path( + "tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.sh_31415926535.err" + ) + dummy_h5 = "tests/test_data/dummy_run_logs/bad_format.h5" + dummy_path = tmp_path_factory.mktemp("dummy_pdf_path") + dummy_pdf_name = dummy_path / "plot_file.pdf" + dummy_pdf_name.touch() + + # WHEN calling plot_analysis + actual_pdf_file = plot_analysis(dummy_log_file, dummy_h5, dummy_pdf_name) + + assert actual_pdf_file is None