Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add some plotting functionality #22

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Title: Convenience Functions for Biscuit
Description: A test harness for bsseq loading of Biscuit output, summarization
of WGBS data over defined regions and in mappable samples, with or
without imputation, dropping of mostly-NA rows, age estimates, etc.
Version: 0.99.1
Version: 0.99.2
Date: 2019-08-22
Author: Tim Triche, Jr. [aut, cre], Wanding Zhou [aut], Ben Johnson [aut], Jacob Morrison [aut], Lyong Heo [aut]
Maintainer: "Tim Triche, Jr." <[email protected]>
Expand All @@ -28,15 +28,21 @@ Imports: readr,
VariantAnnotation,
DelayedMatrixStats,
SummarizedExperiment,
RaggedExperiment,
GenomeInfoDb,
Mus.musculus,
Homo.sapiens,
matrixStats,
ComplexHeatmap,
circlize,
QDNAseq,
dmrseq,
methods,
stats,
grid,
utils,
R.utils,
readxl,
gtools
Suggests: DSS,
covr,
Expand Down
14 changes: 14 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ export(getMvals)
export(grToSeg)
export(load.biscuit)
export(makeBSseq)
export(plotCNA)
export(plotVafAndCN)
export(plotVafMat)
export(read.biscuit)
export(read.segs)
export(segToGr)
export(simplifySampleNames)
export(summarizeBsSeqOver)
Expand All @@ -36,11 +40,13 @@ exportMethods(info)
exportMethods(meta)
exportMethods(samples)
import(BiocGenerics)
import(ComplexHeatmap)
import(GenomeInfoDb)
import(GenomicRanges)
import(Homo.sapiens)
import(Mus.musculus)
import(QDNAseq)
import(RaggedExperiment)
import(Rsamtools)
import(S4Vectors)
import(SummarizedExperiment)
Expand All @@ -51,6 +57,7 @@ import(dmrseq)
import(gtools)
import(impute)
import(readr)
import(readxl)
importFrom(Biobase,featureNames)
importFrom(DelayedMatrixStats,rowCounts)
importFrom(HDF5Array,loadHDF5SummarizedExperiment)
Expand All @@ -59,16 +66,23 @@ importFrom(Matrix,Matrix)
importFrom(R.utils,gunzip)
importFrom(VariantAnnotation,meta)
importFrom(VariantAnnotation,scanVcfHeader)
importFrom(circlize,colorRamp2)
importFrom(data.table,fread)
importFrom(grid,gpar)
importFrom(grid,unit)
importFrom(gtools,inv.logit)
importFrom(gtools,logit)
importFrom(matrixStats,rowMaxs)
importFrom(matrixStats,rowSums2)
importFrom(methods,as)
importFrom(methods,callNextMethod)
importFrom(methods,is)
importFrom(methods,new)
importFrom(methods,slot)
importFrom(qualV,LCS)
importFrom(stats,median)
importFrom(stats,quantile)
importFrom(stats,relevel)
importFrom(utils,data)
importFrom(utils,packageVersion)
importFrom(utils,read.table)
Expand Down
88 changes: 88 additions & 0 deletions R/plotCNAs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#' Analyze CNAs
#'
#' @param filenames List of files with tumor and normal Excel sheets
#' @param minWidth Scaling factor (DEFAULT: 1000)
#'
#' @return Heatmap of CNAs
#'
#' @importFrom grid unit gpar
#' @import readxl
#' @import GenomeInfoDb
#' @import ComplexHeatmap
#' @import RaggedExperiment
#'
#' @export
#'
plotCNA <- function(filenames,
minWidth = 1000) {

excelCNAs <- .readExcelCNAs(filenames)

rse <- .makeRse(excelCNAs)
seqlevelsStyle(rse) <- "NCBI" # more compact
rowData(rse)$units <- floor(width(rse) / minWidth)
rse <- subset(rse, rowData(rse)$units > 0)

toPlot <- assays(rse)$score
toPlot[is.na(toPlot)] <- 0 # yuck
toKeep <- which(rowSums(abs(toPlot)) != 0)
rse <- sortSeqlevels(rse)
chrom <- seqnames(rse) # keep as factor!
toPlot <- toPlot[toKeep, ]
chrom <- chrom[toKeep]

Heatmap(t(toPlot), name="log2(CN)",
cluster_columns=FALSE, column_split=chrom, row_gap=unit(3, "mm"),
clustering_method_rows="ward.D2", row_split=rse$subject,
column_title_gp=gpar(fontsize=8), show_column_names=FALSE,
row_title_rot=0, na_col="#FFFFFF", column_gap=unit(3, "mm"))
}


# Helper function
.readExcelCNAs <- function(filenames) {
res <- GRangesList(lapply(filenames, .readExcelCNA))
names(res) <- vapply(strsplit(filenames, "_"), `[`, 1,
FUN.VALUE=character(1))
for (nm in names(res)) {
mcols(res[[nm]])$name <- nm
}
sort(sortSeqlevels(unlist(res)))
}


# Helper function
.readExcelCNA <- function(filename) {
message("Processing ", filename, "...")
res1 <- read_excel(filename, sheet=1)
res2 <- read_excel(filename, sheet=2)
res <- rbind(res1, res2)
gr <- .parseCoords(res)
gr$score <- res$log2Ratio
return(gr)
}


# Helper function
.parseCoords <- function(res) {
gr <- as(sapply(strsplit(res$Altered_Region, "\\("), `[`, 1), "GRanges")
seqlevelsStyle(gr) <- "UCSC"
return(gr)
}


# Helper function
.makeRse <- function(gr) {
disjoinSummarizedExperiment(.makeRagExp(gr), mean, 1)
}


# Helper function
.makeRagExp <- function(gr, div=10) {
gr$score <- pmax(-2, pmin(2, round(gr$score*div)/div))
grl <- split(gr, gr$name)
re <- RaggedExperiment(grl)
colData(re)$subject <- substr(colnames(re), 1, 9)
colData(re)$portion <- substr(colnames(re), 11, 11)
return(re)
}
188 changes: 188 additions & 0 deletions R/plottingUtils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#' Plot variant annotation matrix
#'
#' @param VRL VRangesList of variants
#' @param cutoff Minimum value to plot (DEFAULT: 0.25)
#' @param ANNonly Show annotations only (DEFAULT: FALSE)
#'
#' @return A Heatmap of the variants
#'
#' @importFrom circlize colorRamp2
#' @import ComplexHeatmap
#' @import VariantAnnotation
#'
#' @export
#'
plotVafMat <- function(VRL,
cutoff = 0.25,
ANNonly = FALSE) {

vafMat <- .getVafMat(VRL, cutoff=cutoff, ANNonly=ANNonly)

if (ANNonly) {
rowSym <- vapply(strsplit(rownames(vafMat), "\\|"), `[`, 1,
FUN.VALUE=character(1))
vafMat <- rowsum(vafMat, rowSym)
}

cols <- colorRamp2(c( 0, 0.5, 0.75, 1),
c('white', 'red', 'darkred', 'black'))
subj <- substr(colnames(vafMat), 1, 9)
Heatmap(vafMat, name="VAF", show_row_names=ANNonly,
clustering_distance_rows="manhattan",
clustering_method_rows="ward.D2",
col=cols, column_split=subj)
}

#' Plot variant annotation matrix and CN
#'
#' @param VRL VRangesList of variants
#' @param rse RaggedExperiment of CNs
#' @param cutoff Minimum value to plot for variants (DEFAULT: 0.25)
#' @param minCN Minimum CN to plot (DEFAULT: 0.25)
#' @param minWidth Scaling factor for CNs (DEFAULT: 1000)
#'
#' @return A Heatmap of the variants and CNs
#'
#' @importFrom grid unit
#' @importFrom stats relevel
#' @importFrom circlize colorRamp2
#' @import ComplexHeatmap
#' @import VariantAnnotation
#'
#' @export
#'
plotVafAndCN <- function(VRL, rse, cutoff=0.25, minCN=0.25, minWidth=1e3) {

bothMat <- .getVafAndCN(VRL, rse, cutoff=cutoff,
minCN=minCN, minWidth=minWidth)

chr <- as.factor(.getChr(bothMat))
for (ch in rev(c(paste0("chr", c(seq_len(22), "X", "Y"))))) {
if (ch %in% levels(chr)) {
chr <- relevel(chr, ch)
}
}

cols <- colorRamp2(c( -1, -0.75, 0, 0.75, 1),
c('darkblue', 'blue', 'gray99', 'red', 'darkred'))

subj <- substr(rownames(bothMat), 1, 9)
annoSyms <- .annotateSymbols(bothMat)

Heatmap(bothMat, name="VAF/CN",
border="gray50", col=cols,
column_title_rot=90,
show_column_names=FALSE,
show_column_dend=FALSE,
cluster_column_slices=FALSE,
column_gap=unit(2, "mm"),
row_gap=unit(2, "mm"),
show_parent_dend_line=FALSE,
clustering_distance_columns="manhattan",
clustering_method_columns="ward.D2",
clustering_distance_rows="manhattan",
clustering_method_rows="ward.D2",
row_split=subj, column_split=chr,
bottom_annotation=annoSyms)
}

# Helper function
.getChr <- function(bothMat) {
toSplit <- grep("\\|", colnames(bothMat))
coords <- colnames(bothMat)
coords[toSplit] <- vapply(strsplit(coords[toSplit], "\\|"), `[`, 2,
FUN.VALUE=character(1))
vapply(strsplit(coords, ":"), `[`, 1,
FUN.VALUE=character(1))
}

# Helper function
#' @importFrom grid unit gpar
.annotateSymbols <- function(bothMat, annoFontSize=9) {
toAnnotate <- grep("\\|", colnames(bothMat))
geneLabels <- vapply(strsplit(colnames(bothMat)[toAnnotate], "\\|"), `[`, 1,
FUN.VALUE=character(1))
columnAnnotation(link=anno_mark(at=toAnnotate,
side="bottom",
labels=geneLabels,
extend=unit(2, "cm"),
link_height=unit(1.5, "cm"),
labels_gp=gpar(fontsize=annoFontSize)),
height=unit(3, "cm"))
}

# Helper function
.getVafAndCN <- function(VRL, rse, cutoff=0.25, minCN=0.25, minWidth=1e3) {
vafMat <- .getVafMat(VRL, cutoff=cutoff, ANNonly=TRUE)
cnaMat <- .getCnaMat(rse, cutoff=minCN, minWidth=minWidth)/2
cnaMat[ which(is.na(cnaMat)) ] <- 0
stopifnot(identical(colnames(vafMat), colnames(cnaMat)))
cnaMat <- cnaMat/2 # scale to make sense alongside VAF
bothMat <- t(rbind(cnaMat, vafMat))
return(bothMat)
}

# Helper function
#' @importFrom matrixStats rowMaxs
.getVafMat <- function(VRL, cutoff=0.25, ANNonly=FALSE) {
if (ANNonly) VRL <- VRangesList(lapply(VRL, .annOnly))
VRL <- VRangesList(lapply(VRL, .addLocation))
vafMat <- .asMatrix(VRL, ANNonly=ANNonly)
vafMat <- subset(vafMat, rowMaxs(vafMat) >= cutoff)
return(vafMat)
}

# Helper function
#' @importFrom matrixStats rowMaxs
.getCnaMat <- function(rse, cutoff=0.25, minWidth=1e3) {
wideEnough <- width(rse) >= minWidth
deepEnough <- rowMaxs(abs(assays(rse)$score), na.rm=TRUE) >= cutoff
assays(subset(rse, deepEnough & wideEnough))[[1]]
}

# Helper function
.asMatrix <- function(VRL, ANNonly=FALSE) {
VRL <- VRangesList(.addVaf(VRL))
if (ANNonly) VRL <- VRangesList(lapply(VRL, .addLocationToName))
variants <- sort(Reduce(union, lapply(VRL, names)))
bigMat <- matrix(0, nrow=length(variants), ncol=length(VRL))
rownames(bigMat) <- variants
colnames(bigMat) <- names(VRL)
for (i in names(VRL)) bigMat[names(VRL[[i]]), i] <- VRL[[i]]$VAF
return(bigMat)
}

# Helper function
.addVaf <- function(x) {
if (is(x, "VRangesList")) {
lapply(x, .addVaf)
} else {
x$VAF <- altDepth(x) / totalDepth(x)
return(x)
}
}

# Helper function
.addLocationToName <- function(vr) {
names(vr) <- paste(names(vr), vr$location, sep="|")
return(vr)
}

# Helper function
.annOnly <- function(vr) {
vr <- subset(vr, sapply(grepl("(HIGH|MODERATE)", vr$ANN), any))
names(vr) <- .getSymbol(vr)
return(vr)
}

# Helper function
.getSymbol <- function(vr) {
sapply(vr$ANN,
function(x) as.character(sapply(strsplit(x, "\\|"), `[`, 5)[1]))
}

# Helper function
.addLocation <- function(vr) {
vr$location <- as.character(vr) # for later placement
return(vr)
}
Loading