From b98edf28fa71eaea68d8ab739ef5132320f12d64 Mon Sep 17 00:00:00 2001
From: Morrison <jacob.morrison@submit.cm.cluster>
Date: Mon, 7 Oct 2019 16:05:01 -0400
Subject: [PATCH] Add some plotting functionality

None of this has been tested (at least by Jacob), so no idea if it works.
I have a sneaking suspicion that it won't work in the biscuiteer framework.

Two scripts have been added to inst/scripts for plotting examples (neither of
which uses the functions created in the R code directory). The rest of the
changes can be found in the R directory (and corresponding man page updates).
A few minor changes were made to fix some BiocCheck complaints as well.
---
 DESCRIPTION                                 |   8 +-
 NAMESPACE                                   |  14 ++
 R/plotCNAs.R                                |  88 +++++++++
 R/plottingUtils.R                           | 188 ++++++++++++++++++++
 R/read.segs.R                               | 140 +++++++++++++++
 inst/extdata/PML_RARA_TCGA_AB_2998.hg19.tsv |   7 +
 inst/scripts/plotPmlRara.R                  |  34 ++++
 inst/scripts/signatures.R                   |  83 +++++++++
 man/plotCNA.Rd                              |  20 +++
 man/plotVafAndCN.Rd                         |  26 +++
 man/plotVafMat.Rd                           |  22 +++
 man/read.segs.Rd                            |  35 ++++
 12 files changed, 664 insertions(+), 1 deletion(-)
 create mode 100644 R/plotCNAs.R
 create mode 100644 R/plottingUtils.R
 create mode 100644 R/read.segs.R
 create mode 100644 inst/extdata/PML_RARA_TCGA_AB_2998.hg19.tsv
 create mode 100644 inst/scripts/plotPmlRara.R
 create mode 100644 inst/scripts/signatures.R
 create mode 100644 man/plotCNA.Rd
 create mode 100644 man/plotVafAndCN.Rd
 create mode 100644 man/plotVafMat.Rd
 create mode 100644 man/read.segs.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index d19136c..df23594 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -4,7 +4,7 @@ Title: Convenience Functions for Biscuit
 Description: A test harness for bsseq loading of Biscuit output, summarization
              of WGBS data over defined regions and in mappable samples, with or
              without imputation, dropping of mostly-NA rows, age estimates, etc.
-Version: 0.99.1
+Version: 0.99.2
 Date: 2019-08-22
 Author: Tim Triche, Jr. [aut, cre], Wanding Zhou [aut], Ben Johnson [aut], Jacob Morrison [aut], Lyong Heo [aut]
 Maintainer: "Tim Triche, Jr." <tim.triche@gmail.com>
@@ -28,15 +28,21 @@ Imports: readr,
          VariantAnnotation,
          DelayedMatrixStats,
          SummarizedExperiment,
+         RaggedExperiment,
          GenomeInfoDb,
          Mus.musculus,
          Homo.sapiens,
          matrixStats,
+         ComplexHeatmap,
+         circlize,
          QDNAseq,
          dmrseq,
          methods,
+         stats,
+         grid,
          utils,
          R.utils,
+         readxl,
          gtools
 Suggests: DSS, 
           covr,
diff --git a/NAMESPACE b/NAMESPACE
index 171b5be..07dc0b3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -24,7 +24,11 @@ export(getMvals)
 export(grToSeg)
 export(load.biscuit)
 export(makeBSseq)
+export(plotCNA)
+export(plotVafAndCN)
+export(plotVafMat)
 export(read.biscuit)
+export(read.segs)
 export(segToGr)
 export(simplifySampleNames)
 export(summarizeBsSeqOver)
@@ -36,11 +40,13 @@ exportMethods(info)
 exportMethods(meta)
 exportMethods(samples)
 import(BiocGenerics)
+import(ComplexHeatmap)
 import(GenomeInfoDb)
 import(GenomicRanges)
 import(Homo.sapiens)
 import(Mus.musculus)
 import(QDNAseq)
+import(RaggedExperiment)
 import(Rsamtools)
 import(S4Vectors)
 import(SummarizedExperiment)
@@ -51,6 +57,7 @@ import(dmrseq)
 import(gtools)
 import(impute)
 import(readr)
+import(readxl)
 importFrom(Biobase,featureNames)
 importFrom(DelayedMatrixStats,rowCounts)
 importFrom(HDF5Array,loadHDF5SummarizedExperiment)
@@ -59,9 +66,13 @@ importFrom(Matrix,Matrix)
 importFrom(R.utils,gunzip)
 importFrom(VariantAnnotation,meta)
 importFrom(VariantAnnotation,scanVcfHeader)
+importFrom(circlize,colorRamp2)
 importFrom(data.table,fread)
+importFrom(grid,gpar)
+importFrom(grid,unit)
 importFrom(gtools,inv.logit)
 importFrom(gtools,logit)
+importFrom(matrixStats,rowMaxs)
 importFrom(matrixStats,rowSums2)
 importFrom(methods,as)
 importFrom(methods,callNextMethod)
@@ -69,6 +80,9 @@ importFrom(methods,is)
 importFrom(methods,new)
 importFrom(methods,slot)
 importFrom(qualV,LCS)
+importFrom(stats,median)
+importFrom(stats,quantile)
+importFrom(stats,relevel)
 importFrom(utils,data)
 importFrom(utils,packageVersion)
 importFrom(utils,read.table)
diff --git a/R/plotCNAs.R b/R/plotCNAs.R
new file mode 100644
index 0000000..1d1c619
--- /dev/null
+++ b/R/plotCNAs.R
@@ -0,0 +1,88 @@
+#' Analyze CNAs
+#'
+#' @param filenames  List of files with tumor and normal Excel sheets
+#' @param minWidth   Scaling factor (DEFAULT: 1000)
+#'
+#' @return           Heatmap of CNAs
+#'
+#' @importFrom grid unit gpar
+#' @import readxl
+#' @import GenomeInfoDb
+#' @import ComplexHeatmap
+#' @import RaggedExperiment
+#'
+#' @export
+#'
+plotCNA <- function(filenames,
+                    minWidth = 1000) { 
+  
+  excelCNAs <- .readExcelCNAs(filenames)
+
+  rse <- .makeRse(excelCNAs)
+  seqlevelsStyle(rse) <- "NCBI" # more compact
+  rowData(rse)$units <- floor(width(rse) / minWidth)
+  rse <- subset(rse, rowData(rse)$units > 0)
+
+  toPlot <- assays(rse)$score
+  toPlot[is.na(toPlot)] <- 0 # yuck
+  toKeep <- which(rowSums(abs(toPlot)) != 0)
+  rse <- sortSeqlevels(rse)
+  chrom <- seqnames(rse) # keep as factor!
+  toPlot <- toPlot[toKeep, ]
+  chrom <- chrom[toKeep]
+
+  Heatmap(t(toPlot), name="log2(CN)", 
+          cluster_columns=FALSE, column_split=chrom, row_gap=unit(3, "mm"),
+          clustering_method_rows="ward.D2", row_split=rse$subject,
+          column_title_gp=gpar(fontsize=8), show_column_names=FALSE, 
+          row_title_rot=0, na_col="#FFFFFF", column_gap=unit(3, "mm"))
+}
+
+
+# Helper function
+.readExcelCNAs <- function(filenames) { 
+  res <- GRangesList(lapply(filenames, .readExcelCNA))
+  names(res) <- vapply(strsplit(filenames, "_"), `[`, 1,
+                       FUN.VALUE=character(1))
+  for (nm in names(res)) {
+    mcols(res[[nm]])$name <- nm
+  }
+  sort(sortSeqlevels(unlist(res)))
+}
+
+
+# Helper function
+.readExcelCNA <- function(filename) { 
+  message("Processing ", filename, "...")
+  res1 <- read_excel(filename, sheet=1) 
+  res2 <- read_excel(filename, sheet=2)
+  res <- rbind(res1, res2) 
+  gr <- .parseCoords(res)
+  gr$score <- res$log2Ratio
+  return(gr)  
+}
+
+
+# Helper function
+.parseCoords <- function(res) {
+  gr <- as(sapply(strsplit(res$Altered_Region, "\\("), `[`, 1), "GRanges")
+  seqlevelsStyle(gr) <- "UCSC"
+  return(gr)
+}
+
+
+# Helper function
+.makeRse <- function(gr) { 
+  disjoinSummarizedExperiment(.makeRagExp(gr), mean, 1)
+}
+
+
+# Helper function
+.makeRagExp <- function(gr, div=10) { 
+  gr$score <- pmax(-2, pmin(2, round(gr$score*div)/div))
+  grl <- split(gr, gr$name)
+  re <- RaggedExperiment(grl)
+  colData(re)$subject <- substr(colnames(re), 1, 9) 
+  colData(re)$portion <- substr(colnames(re), 11, 11) 
+  return(re)
+}
diff --git a/R/plottingUtils.R b/R/plottingUtils.R
new file mode 100644
index 0000000..0ed16f6
--- /dev/null
+++ b/R/plottingUtils.R
@@ -0,0 +1,188 @@
+#' Plot variant annotation matrix
+#'
+#' @param VRL      VRangesList of variants
+#' @param cutoff   Minimum value to plot (DEFAULT: 0.25)
+#' @param ANNonly  Show annotations only (DEFAULT: FALSE)
+#'
+#' @return         A Heatmap of the variants
+#'
+#' @importFrom circlize colorRamp2
+#' @import ComplexHeatmap
+#' @import VariantAnnotation
+#'
+#' @export
+#'
+plotVafMat <- function(VRL,
+                       cutoff = 0.25,
+                       ANNonly = FALSE) { 
+
+  vafMat <- .getVafMat(VRL, cutoff=cutoff, ANNonly=ANNonly)
+
+  if (ANNonly) {
+    rowSym <- vapply(strsplit(rownames(vafMat), "\\|"), `[`, 1,
+                     FUN.VALUE=character(1))
+    vafMat <- rowsum(vafMat, rowSym)
+  }
+
+  cols <- colorRamp2(c(      0,   0.5,      0.75,       1),
+                     c('white', 'red', 'darkred', 'black'))
+  subj <- substr(colnames(vafMat), 1, 9)
+  Heatmap(vafMat, name="VAF", show_row_names=ANNonly, 
+          clustering_distance_rows="manhattan",
+          clustering_method_rows="ward.D2",
+          col=cols, column_split=subj)
+}
+
+#' Plot variant annotation matrix and CN
+#'
+#' @param VRL       VRangesList of variants
+#' @param rse       RaggedExperiment of CNs
+#' @param cutoff    Minimum value to plot for variants (DEFAULT: 0.25)
+#' @param minCN     Minimum CN to plot (DEFAULT: 0.25)
+#' @param minWidth  Scaling factor for CNs (DEFAULT: 1000)
+#'
+#' @return          A Heatmap of the variants and CNs
+#'
+#' @importFrom grid unit
+#' @importFrom stats relevel
+#' @importFrom circlize colorRamp2
+#' @import ComplexHeatmap
+#' @import VariantAnnotation
+#'
+#' @export
+#'
+plotVafAndCN <- function(VRL, rse, cutoff=0.25, minCN=0.25, minWidth=1e3) { 
+
+  bothMat <- .getVafAndCN(VRL, rse, cutoff=cutoff,
+                         minCN=minCN, minWidth=minWidth)
+
+  chr <- as.factor(.getChr(bothMat)) 
+  for (ch in rev(c(paste0("chr", c(seq_len(22), "X", "Y"))))) {
+    if (ch %in% levels(chr)) {
+      chr <- relevel(chr, ch)
+    }
+  }
+
+  cols <- colorRamp2(c(        -1,  -0.75,        0,  0.75,         1), 
+                     c('darkblue', 'blue', 'gray99', 'red', 'darkred'))
+
+  subj <- substr(rownames(bothMat), 1, 9)
+  annoSyms <- .annotateSymbols(bothMat)
+
+  Heatmap(bothMat, name="VAF/CN", 
+          border="gray50", col=cols,
+          column_title_rot=90, 
+          show_column_names=FALSE,
+          show_column_dend=FALSE,
+          cluster_column_slices=FALSE,
+          column_gap=unit(2, "mm"),
+          row_gap=unit(2, "mm"),
+          show_parent_dend_line=FALSE,
+          clustering_distance_columns="manhattan",
+          clustering_method_columns="ward.D2", 
+          clustering_distance_rows="manhattan",
+          clustering_method_rows="ward.D2",
+          row_split=subj, column_split=chr,
+          bottom_annotation=annoSyms)
+}
+
+# Helper function
+.getChr <- function(bothMat) {
+  toSplit <- grep("\\|", colnames(bothMat))
+  coords <- colnames(bothMat) 
+  coords[toSplit] <- vapply(strsplit(coords[toSplit], "\\|"), `[`, 2,
+                            FUN.VALUE=character(1)) 
+  vapply(strsplit(coords, ":"), `[`, 1,
+         FUN.VALUE=character(1)) 
+}
+
+# Helper function
+#' @importFrom grid unit gpar
+.annotateSymbols <- function(bothMat, annoFontSize=9) {
+  toAnnotate <- grep("\\|", colnames(bothMat))
+  geneLabels <- vapply(strsplit(colnames(bothMat)[toAnnotate], "\\|"), `[`, 1,
+                       FUN.VALUE=character(1))
+  columnAnnotation(link=anno_mark(at=toAnnotate, 
+                                  side="bottom",
+                                  labels=geneLabels, 
+                                  extend=unit(2, "cm"),
+                                  link_height=unit(1.5, "cm"),
+                                  labels_gp=gpar(fontsize=annoFontSize)),
+                   height=unit(3, "cm"))
+}
+
+# Helper function
+.getVafAndCN <- function(VRL, rse, cutoff=0.25, minCN=0.25, minWidth=1e3) {
+  vafMat <- .getVafMat(VRL, cutoff=cutoff, ANNonly=TRUE)
+  cnaMat <- .getCnaMat(rse, cutoff=minCN, minWidth=minWidth)/2 
+  cnaMat[ which(is.na(cnaMat)) ] <- 0
+  stopifnot(identical(colnames(vafMat), colnames(cnaMat)))
+  cnaMat <- cnaMat/2 # scale to make sense alongside VAF
+  bothMat <- t(rbind(cnaMat, vafMat))
+  return(bothMat)
+}
+
+# Helper function 
+#' @importFrom matrixStats rowMaxs
+.getVafMat <- function(VRL, cutoff=0.25, ANNonly=FALSE) { 
+  if (ANNonly) VRL <- VRangesList(lapply(VRL, .annOnly))
+  VRL <- VRangesList(lapply(VRL, .addLocation))
+  vafMat <- .asMatrix(VRL, ANNonly=ANNonly)
+  vafMat <- subset(vafMat, rowMaxs(vafMat) >= cutoff)
+  return(vafMat)
+}
+
+# Helper function 
+#' @importFrom matrixStats rowMaxs
+.getCnaMat <- function(rse, cutoff=0.25, minWidth=1e3) {
+  wideEnough <- width(rse) >= minWidth
+  deepEnough <- rowMaxs(abs(assays(rse)$score), na.rm=TRUE) >= cutoff
+  assays(subset(rse, deepEnough & wideEnough))[[1]]
+}
+
+# Helper function
+.asMatrix <- function(VRL, ANNonly=FALSE) {
+  VRL <- VRangesList(.addVaf(VRL))
+  if (ANNonly) VRL <- VRangesList(lapply(VRL, .addLocationToName))
+  variants <- sort(Reduce(union, lapply(VRL, names)))
+  bigMat <- matrix(0, nrow=length(variants), ncol=length(VRL))
+  rownames(bigMat) <- variants
+  colnames(bigMat) <- names(VRL)
+  for (i in names(VRL)) bigMat[names(VRL[[i]]), i] <- VRL[[i]]$VAF
+  return(bigMat)
+}
+
+# Helper function
+.addVaf <- function(x) { 
+  if (is(x, "VRangesList")) {
+    lapply(x, .addVaf)
+  } else {
+    x$VAF <- altDepth(x) / totalDepth(x)
+    return(x)
+  }
+}
+
+# Helper function
+.addLocationToName <- function(vr) { 
+  names(vr) <- paste(names(vr), vr$location, sep="|")
+  return(vr)
+}
+
+# Helper function
+.annOnly <- function(vr) {
+  vr <- subset(vr, sapply(grepl("(HIGH|MODERATE)", vr$ANN), any))
+  names(vr) <- .getSymbol(vr)
+  return(vr)
+}
+
+# Helper function
+.getSymbol <- function(vr) { 
+  sapply(vr$ANN, 
+         function(x) as.character(sapply(strsplit(x, "\\|"), `[`, 5)[1]))
+} 
+
+# Helper function
+.addLocation <- function(vr) {
+  vr$location <- as.character(vr) # for later placement 
+  return(vr)
+}
diff --git a/R/read.segs.R b/R/read.segs.R
new file mode 100644
index 0000000..706a483
--- /dev/null
+++ b/R/read.segs.R
@@ -0,0 +1,140 @@
+#' Function to turn segmentations into GRanges with correction(s)
+#'
+#' If your segmentation file is not vaild, this probably won't work. A smarter
+#' way to squash baselines would be with a mixture model. One bonus is that the
+#' baselining is self-correcting - applying it to an already-corrected
+#' segmentation will not do any harm and may even help. 
+#'
+#' @param file     The segmentation file to process
+#' @param weight   Add weights for per-subject baselining? (DEFAULT: TRUE) 
+#' @param by       If weighting, do so by marks or by width (DEFAULT: "marks")
+#' @param from     Quantiles around the median to "squash" to baseline
+#'                   (DEFAULT: c(.4, .6))
+#' @param minNorm  Min log2ratio to consider as possible "normal" baseline
+#'                   (DEFAULT: -0.5)
+#' @param maxNorm  Max log2ratio to consider as possible "normal" baseline
+#'                   (DEFAULT: +0.5)
+#'
+#' @return         A GRanges with corrections applied
+#' 
+#' @import GenomicRanges
+#'
+#' @export
+#'
+read.segs <- function(file,
+                      weight = TRUE,
+                      by = c("marks","width"), 
+                      from = c(.4,.6),
+                      minNorm = -0.5,
+                      maxNorm = +0.5) {
+
+  segs <- read.table(file, stringsAsFactors=FALSE, sep="\t")
+  names(segs)[seq_len(4)] <- c("name","chrom","chromStart","chromEnd")
+  names(segs)[ncol(segs) - 2] <- "width" # autocorrect below
+  names(segs)[ncol(segs) - 1] <- "marks" # autocorrect below
+  names(segs)[ncol(segs)] <- "score"
+  if (weight) {
+    by <- match.arg(by) 
+    segs <- .addWeights(segs, by=by)
+    segs <- .fixBaselines(segs, from=from, minNorm=minNorm, maxNorm=maxNorm)
+    message("Baselines corrected.")
+  }
+  gr <- makeGRangesFromDataFrame(segs, keep.extra.columns=TRUE) 
+  names(gr) <- gr$name
+  return(gr)
+}
+
+
+# Helper function
+.addWeights <- function(segs, by=c("marks","width")) {
+  by <- match.arg(by)
+  if (!by %in% names(segs)) {
+    # in case the default does not / cannot work 
+    segs$width <- segs$chromEnd - segs$chromStart
+    by <- "width"
+  }
+  message("Weighting segments by ", by, "...", appendLF=FALSE)
+  segs <- do.call(rbind, lapply(split(segs, segs$name), .addWeight, by=by))
+  rownames(segs) <- segs$names
+  message(" done.")
+  return(segs)
+}
+
+# Helper function 
+.addWeight <- function(seg, by=c("width","marks")) {
+  by <- match.arg(by)  
+  stopifnot(length(unique(seg$name)) == 1)
+  if (by == "width") {
+    seg$width <- seg$chromStart - seg$chromEnd  
+    seg$weight <- seg$width / sum(seg$width)
+  } else {
+    seg$weight <- seg$marks / sum(seg$marks)
+  }
+  return(seg) 
+}
+
+
+# Helper function
+.fixBaselines <- function(segs, from=c(.4, .6), minNorm=-0.5, maxNorm=+0.5) { 
+  message("Finding baselines...", appendLF=FALSE)
+  segs <- do.call(rbind, 
+                  lapply(split(segs,segs$name), .fixBaseline, from=from,
+                         minNorm=minNorm, maxNorm=maxNorm))
+  rownames(segs) <- segs$names
+  message(" done.")
+  return(segs)
+}
+
+
+# Helper function 
+#' @importFrom stats quantile
+.fixBaseline <- function(seg, from=c(.4, .6), minNorm=-0.5, maxNorm=+0.5) { 
+  baseline <- .findBaseline(seg, minNorm=minNorm, maxNorm=maxNorm)
+  higher <- quantile(seg$score, prob=max(from), weight=seg$weight, na.rm=TRUE)
+  lower <- quantile(seg$score, prob=min(from), weight=seg$weight, na.rm=TRUE)
+  probablyUnchanged <- which(seg$score >= lower & seg$score <= higher)
+  seg[probablyUnchanged, "score"] <- baseline
+  seg$score <- seg$score - baseline
+  return(seg)
+}
+
+
+# Helper function
+#' @importFrom stats median
+.findBaseline <- function(seg, minNorm=-0.5, maxNorm=0.5) { 
+  nearNormal <- subset(seg, score >= minNorm & score <= maxNorm)
+  median(nearNormal$score, weight=nearNormal$weight, na.rm=TRUE)
+}
+
+
+# Helper function
+# TODO: Work in progress
+.assignUncertainty <- function(seg, plot=FALSE) { 
+  # correct for baseline (median correction; only works for mostly-NK tumors)
+  seg$score <- seg$score - .findBaseline(seg)
+
+  # generate Rle representations of score X marks, post-shift  
+
+  # unlist into an "unweighted" vector of weighted scores
+
+  # run mclust on that ?
+
+  # assign segments in the "baseline" cluster a log-ratio of 0. 
+  
+  # plot uncertainty, if requested 
+
+  # return the corrected segments. 
+}
+
+
+# Helper function
+.write.segs <- function(gr, file=NULL) { 
+  names(gr) <- NULL 
+  segs <- as.data.frame(gr)
+  segs <- segs[, c("name","seqnames","start","end","score")] 
+  if (!is.null(file)) {
+    write.table(segs, sep="\t", quote=FALSE, row.names=FALSE,
+                col.names=FALSE, file=file) 
+  }
+  return(segs) 
+}
diff --git a/inst/extdata/PML_RARA_TCGA_AB_2998.hg19.tsv b/inst/extdata/PML_RARA_TCGA_AB_2998.hg19.tsv
new file mode 100644
index 0000000..7cfd1d5
--- /dev/null
+++ b/inst/extdata/PML_RARA_TCGA_AB_2998.hg19.tsv
@@ -0,0 +1,7 @@
+seqnames    start   end name    partner QUAL
+chr15   72104255    72104255    break1o break1h 79
+chr17   35741663    35741663    break1h break1o 79
+chr17   35741661    35741661    break2o break2h 86
+chr15   72104247    72104247    break2h break2o 86
+chr15   72104257    72104257    break3o break3h 83
+chr17   35741661    35741661    break3h break3o 83
diff --git a/inst/scripts/plotPmlRara.R b/inst/scripts/plotPmlRara.R
new file mode 100644
index 0000000..540ee29
--- /dev/null
+++ b/inst/scripts/plotPmlRara.R
@@ -0,0 +1,34 @@
+### Example scritp for plotting PML RARA circle plot
+### Written by: Tim Triche, Jr.
+### Last edit: October 7, 2019
+library(Homo.sapiens)
+library(GenomicRanges)
+library(StructuralVariantAnnotation)
+
+file <- system.file("extdata", "PML_RARA_TCGA_AB_2998.hg19.tsv",
+                    package="biscuiteer")
+gr <- as(read.table(file, header=TRUE), "GRanges")
+seqinfo(gr) <- seqinfo(Homo.sapiens)[seqlevels(gr)]
+
+paired <- .makePairs(split(gr, substr(gr$name, nchar(gr$name), nchar(gr$name))))
+mcols(paired)$name <- sapply(paired,
+                             function(x) substr(mcols(first(x))$name, 1, 6))
+mcols(paired)$score <- sapply(paired, function(x) mcols(first(x))$QUAL)
+
+library(circlize)
+circos.initializeWithIdeogram()
+circos.genomicLink(as.data.frame(S4Vectors::first(paired)),
+                   as.data.frame(S4Vectors::second(paired)),
+                   col="red", lwd=3, border="darkred")
+
+bp <- subset(genes(Homo.sapiens, columns="SYMBOL"), SYMBOL %in% c("PML",
+                                                                  "RARA"))
+names(bp) <- sapply(bp$SYMBOL, `[[`, 1)
+circos.genomicLabels(bp, labels=names(bp), side="inside")
+dev.copy2pdf(file="PML_RARA.pdf")
+
+
+# Helper function
+.makePairs <- function(...) {
+  Pairs(as.list(...)[[1]], as.list(...)[[2]])
+}
diff --git a/inst/scripts/signatures.R b/inst/scripts/signatures.R
new file mode 100644
index 0000000..06cc635
--- /dev/null
+++ b/inst/scripts/signatures.R
@@ -0,0 +1,83 @@
+### Example script for plotting mutation signatures
+### Written by: Tim Triche, Jr.
+### October 7, 2019
+library(YAPSA) 
+library(circlize)
+library(matrixStats)
+library(ComplexHeatmap) 
+library(VariantAnnotation)
+
+VCF_files <- list.files(pattern=".vcf$") 
+names(VCF_files) <- sapply(strsplit(VCF_files, "_"), `[`, 1)
+VCFs <- lapply(VCF_files, readVcf, genome="hg19")
+VRL <- VRangesList(lapply(VCFs, as, "VRanges"))
+for (nm in names(VRL)) VRL[[nm]]$pid <- nm
+
+# helper fn
+toYapsaDf <- function(VR) { 
+  columns <- c("seqnames","start","ref","alt","pid")
+  res <- as(unname(VR), "data.frame")[, columns]
+  names(res) <- c("CHROM","POS","REF","ALT","PID")
+  res$POS <- as.integer(res$POS)
+  return(res)
+}
+
+data("exchange_colour_vector")
+DF <- do.call(rbind, lapply(VRL, toYapsaDf))
+DF <- DF[order(DF$PID, DF$CHROM, DF$POS),]
+DF$change <- attribute_nucleotide_exchanges(DF) 
+DF$col <- exchange_colour_vector[DF$change]
+DF$SUBJECT <- substr(DF$PID, 1, 9)
+DF$PORTION <- substr(DF$PID, 11, 11)
+
+library(BSgenome.Hsapiens.UCSC.hg19)
+mutCats <- 
+  create_mutation_catalogue_from_df(DF,
+                                    this_seqnames.field="CHROM", 
+                                    this_start.field="POS",
+                                    this_end.field="POS",
+                                    this_PID.field="PID",
+                                    this_subgroup.field="SUBJECT",
+                                    this_refGenome=BSgenome.Hsapiens.UCSC.hg19,
+                                    this_wordLength=3)
+
+# use AlexCosmicValid_sig_df and AlexCosmicValid_sigInd_df
+data(sigs)
+cutoff <- 0.033 # 3.3% representation
+mutcat_df <- as.data.frame(mutCats$matrix)
+general_cutoff_vector <- rep(cutoff, dim(AlexCosmicValid_sig_df)[2])
+cutoff_list <- LCD_complex_cutoff(in_mutation_catalogue_df=mutcat_df,
+                                  in_signatures_df=AlexCosmicValid_sig_df,
+                                  in_cutoff_vector=general_cutoff_vector,
+                                  in_sig_ind_df=AlexCosmicValid_sigInd_df)
+subgroups <- make_subgroups_df(DF, cutoff_list$exposures, 
+                               in_subgroup.field="SUBJECT")
+exposures_barplot(in_title="Mutational signatures", 
+                  in_exposures_df=cutoff_list$norm_exposures,
+                  in_signatures_ind_df=cutoff_list$out_sig_ind_df,
+                  in_subgroups_df=subgroups)
+dev.copy2pdf(file="mutationSignatures.NOSH_0003_and_0005.normalized.pdf") 
+
+exposures_barplot(in_title="Mutational signatures", 
+                  in_exposures_df=cutoff_list$exposures,
+                  in_signatures_ind_df=cutoff_list$out_sig_ind_df,
+                  in_subgroups_df=subgroups)
+dev.copy2pdf(file="mutationSignatures.NOSH_0003_and_0005.absolute.pdf") 
+
+# AC1 (green) is C->T, clocklike
+# AC3 (goldenrod) is DNA repair defects (HRR)
+# AC9 (brown) is POLE/SHM related
+# AC12 (darkblue) is MMR related
+# AC16 (violet) is endogenous.unknown
+# AC19 (deep pink) is exogenous.unknown
+# AC28 (navy blue) is (completely) unknown 
+complex_heatmap_exposures(in_exposures_df=cutoff_list$norm_exposures, 
+                          in_method="manhattan",
+                          in_subgroups_df=subgroups,
+                          in_subgroup_column="subgroup",
+                          in_subgroup_colour_column="col",
+                          in_signatures_ind_df=cutoff_list$out_sig_ind_df)
+dev.copy2pdf(file="mutationSignatures.NOSH_0003_and_0005.clustered.pdf") 
+
+plotExchangeSpectra(mutcat_df, in_show_triplets=TRUE) 
+ggsave("mutationSignatures.NOSH_0003_and_0005.spectra.png")
diff --git a/man/plotCNA.Rd b/man/plotCNA.Rd
new file mode 100644
index 0000000..3faef7b
--- /dev/null
+++ b/man/plotCNA.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plotCNAs.R
+\name{plotCNA}
+\alias{plotCNA}
+\title{Analyze CNAs}
+\usage{
+plotCNA(filenames, minWidth = 1000)
+}
+\arguments{
+\item{filenames}{List of files with tumor and normal Excel sheets}
+
+\item{minWidth}{Scaling factor (DEFAULT: 1000)}
+}
+\value{
+\preformatted{      Heatmap of CNAs
+}
+}
+\description{
+Analyze CNAs
+}
diff --git a/man/plotVafAndCN.Rd b/man/plotVafAndCN.Rd
new file mode 100644
index 0000000..3e1a265
--- /dev/null
+++ b/man/plotVafAndCN.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plottingUtils.R
+\name{plotVafAndCN}
+\alias{plotVafAndCN}
+\title{Plot variant annotation matrix and CN}
+\usage{
+plotVafAndCN(VRL, rse, cutoff = 0.25, minCN = 0.25, minWidth = 1000)
+}
+\arguments{
+\item{VRL}{VRangesList of variants}
+
+\item{rse}{RaggedExperiment of CNs}
+
+\item{cutoff}{Minimum value to plot for variants (DEFAULT: 0.25)}
+
+\item{minCN}{Minimum CN to plot (DEFAULT: 0.25)}
+
+\item{minWidth}{Scaling factor for CNs (DEFAULT: 1000)}
+}
+\value{
+\preformatted{     A Heatmap of the variants and CNs
+}
+}
+\description{
+Plot variant annotation matrix and CN
+}
diff --git a/man/plotVafMat.Rd b/man/plotVafMat.Rd
new file mode 100644
index 0000000..ce05e9f
--- /dev/null
+++ b/man/plotVafMat.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plottingUtils.R
+\name{plotVafMat}
+\alias{plotVafMat}
+\title{Plot variant annotation matrix}
+\usage{
+plotVafMat(VRL, cutoff = 0.25, ANNonly = FALSE)
+}
+\arguments{
+\item{VRL}{VRangesList of variants}
+
+\item{cutoff}{Minimum value to plot (DEFAULT: 0.25)}
+
+\item{ANNonly}{Show annotations only (DEFAULT: FALSE)}
+}
+\value{
+\preformatted{    A Heatmap of the variants
+}
+}
+\description{
+Plot variant annotation matrix
+}
diff --git a/man/read.segs.Rd b/man/read.segs.Rd
new file mode 100644
index 0000000..b05b815
--- /dev/null
+++ b/man/read.segs.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/read.segs.R
+\name{read.segs}
+\alias{read.segs}
+\title{Function to turn segmentations into GRanges with correction(s)}
+\usage{
+read.segs(file, weight = TRUE, by = c("marks", "width"),
+  from = c(0.4, 0.6), minNorm = -0.5, maxNorm = +0.5)
+}
+\arguments{
+\item{file}{The segmentation file to process}
+
+\item{weight}{Add weights for per-subject baselining? (DEFAULT: TRUE)}
+
+\item{by}{If weighting, do so by marks or by width (DEFAULT: "marks")}
+
+\item{from}{Quantiles around the median to "squash" to baseline
+(DEFAULT: c(.4, .6))}
+
+\item{minNorm}{Min log2ratio to consider as possible "normal" baseline
+(DEFAULT: -0.5)}
+
+\item{maxNorm}{Max log2ratio to consider as possible "normal" baseline
+(DEFAULT: +0.5)}
+}
+\value{
+\preformatted{    A GRanges with corrections applied
+}
+}
+\description{
+If your segmentation file is not vaild, this probably won't work. A smarter
+way to squash baselines would be with a mixture model. One bonus is that the
+baselining is self-correcting - applying it to an already-corrected
+segmentation will not do any harm and may even help.
+}