From b98edf28fa71eaea68d8ab739ef5132320f12d64 Mon Sep 17 00:00:00 2001 From: Morrison Date: Mon, 7 Oct 2019 16:05:01 -0400 Subject: [PATCH] Add some plotting functionality None of this has been tested (at least by Jacob), so no idea if it works. I have a sneaking suspicion that it won't work in the biscuiteer framework. Two scripts have been added to inst/scripts for plotting examples (neither of which uses the functions created in the R code directory). The rest of the changes can be found in the R directory (and corresponding man page updates). A few minor changes were made to fix some BiocCheck complaints as well. --- DESCRIPTION | 8 +- NAMESPACE | 14 ++ R/plotCNAs.R | 88 +++++++++ R/plottingUtils.R | 188 ++++++++++++++++++++ R/read.segs.R | 140 +++++++++++++++ inst/extdata/PML_RARA_TCGA_AB_2998.hg19.tsv | 7 + inst/scripts/plotPmlRara.R | 34 ++++ inst/scripts/signatures.R | 83 +++++++++ man/plotCNA.Rd | 20 +++ man/plotVafAndCN.Rd | 26 +++ man/plotVafMat.Rd | 22 +++ man/read.segs.Rd | 35 ++++ 12 files changed, 664 insertions(+), 1 deletion(-) create mode 100644 R/plotCNAs.R create mode 100644 R/plottingUtils.R create mode 100644 R/read.segs.R create mode 100644 inst/extdata/PML_RARA_TCGA_AB_2998.hg19.tsv create mode 100644 inst/scripts/plotPmlRara.R create mode 100644 inst/scripts/signatures.R create mode 100644 man/plotCNA.Rd create mode 100644 man/plotVafAndCN.Rd create mode 100644 man/plotVafMat.Rd create mode 100644 man/read.segs.Rd diff --git a/DESCRIPTION b/DESCRIPTION index d19136c..df23594 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,7 @@ Title: Convenience Functions for Biscuit Description: A test harness for bsseq loading of Biscuit output, summarization of WGBS data over defined regions and in mappable samples, with or without imputation, dropping of mostly-NA rows, age estimates, etc. -Version: 0.99.1 +Version: 0.99.2 Date: 2019-08-22 Author: Tim Triche, Jr. [aut, cre], Wanding Zhou [aut], Ben Johnson [aut], Jacob Morrison [aut], Lyong Heo [aut] Maintainer: "Tim Triche, Jr." @@ -28,15 +28,21 @@ Imports: readr, VariantAnnotation, DelayedMatrixStats, SummarizedExperiment, + RaggedExperiment, GenomeInfoDb, Mus.musculus, Homo.sapiens, matrixStats, + ComplexHeatmap, + circlize, QDNAseq, dmrseq, methods, + stats, + grid, utils, R.utils, + readxl, gtools Suggests: DSS, covr, diff --git a/NAMESPACE b/NAMESPACE index 171b5be..07dc0b3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,7 +24,11 @@ export(getMvals) export(grToSeg) export(load.biscuit) export(makeBSseq) +export(plotCNA) +export(plotVafAndCN) +export(plotVafMat) export(read.biscuit) +export(read.segs) export(segToGr) export(simplifySampleNames) export(summarizeBsSeqOver) @@ -36,11 +40,13 @@ exportMethods(info) exportMethods(meta) exportMethods(samples) import(BiocGenerics) +import(ComplexHeatmap) import(GenomeInfoDb) import(GenomicRanges) import(Homo.sapiens) import(Mus.musculus) import(QDNAseq) +import(RaggedExperiment) import(Rsamtools) import(S4Vectors) import(SummarizedExperiment) @@ -51,6 +57,7 @@ import(dmrseq) import(gtools) import(impute) import(readr) +import(readxl) importFrom(Biobase,featureNames) importFrom(DelayedMatrixStats,rowCounts) importFrom(HDF5Array,loadHDF5SummarizedExperiment) @@ -59,9 +66,13 @@ importFrom(Matrix,Matrix) importFrom(R.utils,gunzip) importFrom(VariantAnnotation,meta) importFrom(VariantAnnotation,scanVcfHeader) +importFrom(circlize,colorRamp2) importFrom(data.table,fread) +importFrom(grid,gpar) +importFrom(grid,unit) importFrom(gtools,inv.logit) importFrom(gtools,logit) +importFrom(matrixStats,rowMaxs) importFrom(matrixStats,rowSums2) importFrom(methods,as) importFrom(methods,callNextMethod) @@ -69,6 +80,9 @@ importFrom(methods,is) importFrom(methods,new) importFrom(methods,slot) importFrom(qualV,LCS) +importFrom(stats,median) +importFrom(stats,quantile) +importFrom(stats,relevel) importFrom(utils,data) importFrom(utils,packageVersion) importFrom(utils,read.table) diff --git a/R/plotCNAs.R b/R/plotCNAs.R new file mode 100644 index 0000000..1d1c619 --- /dev/null +++ b/R/plotCNAs.R @@ -0,0 +1,88 @@ +#' Analyze CNAs +#' +#' @param filenames List of files with tumor and normal Excel sheets +#' @param minWidth Scaling factor (DEFAULT: 1000) +#' +#' @return Heatmap of CNAs +#' +#' @importFrom grid unit gpar +#' @import readxl +#' @import GenomeInfoDb +#' @import ComplexHeatmap +#' @import RaggedExperiment +#' +#' @export +#' +plotCNA <- function(filenames, + minWidth = 1000) { + + excelCNAs <- .readExcelCNAs(filenames) + + rse <- .makeRse(excelCNAs) + seqlevelsStyle(rse) <- "NCBI" # more compact + rowData(rse)$units <- floor(width(rse) / minWidth) + rse <- subset(rse, rowData(rse)$units > 0) + + toPlot <- assays(rse)$score + toPlot[is.na(toPlot)] <- 0 # yuck + toKeep <- which(rowSums(abs(toPlot)) != 0) + rse <- sortSeqlevels(rse) + chrom <- seqnames(rse) # keep as factor! + toPlot <- toPlot[toKeep, ] + chrom <- chrom[toKeep] + + Heatmap(t(toPlot), name="log2(CN)", + cluster_columns=FALSE, column_split=chrom, row_gap=unit(3, "mm"), + clustering_method_rows="ward.D2", row_split=rse$subject, + column_title_gp=gpar(fontsize=8), show_column_names=FALSE, + row_title_rot=0, na_col="#FFFFFF", column_gap=unit(3, "mm")) +} + + +# Helper function +.readExcelCNAs <- function(filenames) { + res <- GRangesList(lapply(filenames, .readExcelCNA)) + names(res) <- vapply(strsplit(filenames, "_"), `[`, 1, + FUN.VALUE=character(1)) + for (nm in names(res)) { + mcols(res[[nm]])$name <- nm + } + sort(sortSeqlevels(unlist(res))) +} + + +# Helper function +.readExcelCNA <- function(filename) { + message("Processing ", filename, "...") + res1 <- read_excel(filename, sheet=1) + res2 <- read_excel(filename, sheet=2) + res <- rbind(res1, res2) + gr <- .parseCoords(res) + gr$score <- res$log2Ratio + return(gr) +} + + +# Helper function +.parseCoords <- function(res) { + gr <- as(sapply(strsplit(res$Altered_Region, "\\("), `[`, 1), "GRanges") + seqlevelsStyle(gr) <- "UCSC" + return(gr) +} + + +# Helper function +.makeRse <- function(gr) { + disjoinSummarizedExperiment(.makeRagExp(gr), mean, 1) +} + + +# Helper function +.makeRagExp <- function(gr, div=10) { + gr$score <- pmax(-2, pmin(2, round(gr$score*div)/div)) + grl <- split(gr, gr$name) + re <- RaggedExperiment(grl) + colData(re)$subject <- substr(colnames(re), 1, 9) + colData(re)$portion <- substr(colnames(re), 11, 11) + return(re) +} diff --git a/R/plottingUtils.R b/R/plottingUtils.R new file mode 100644 index 0000000..0ed16f6 --- /dev/null +++ b/R/plottingUtils.R @@ -0,0 +1,188 @@ +#' Plot variant annotation matrix +#' +#' @param VRL VRangesList of variants +#' @param cutoff Minimum value to plot (DEFAULT: 0.25) +#' @param ANNonly Show annotations only (DEFAULT: FALSE) +#' +#' @return A Heatmap of the variants +#' +#' @importFrom circlize colorRamp2 +#' @import ComplexHeatmap +#' @import VariantAnnotation +#' +#' @export +#' +plotVafMat <- function(VRL, + cutoff = 0.25, + ANNonly = FALSE) { + + vafMat <- .getVafMat(VRL, cutoff=cutoff, ANNonly=ANNonly) + + if (ANNonly) { + rowSym <- vapply(strsplit(rownames(vafMat), "\\|"), `[`, 1, + FUN.VALUE=character(1)) + vafMat <- rowsum(vafMat, rowSym) + } + + cols <- colorRamp2(c( 0, 0.5, 0.75, 1), + c('white', 'red', 'darkred', 'black')) + subj <- substr(colnames(vafMat), 1, 9) + Heatmap(vafMat, name="VAF", show_row_names=ANNonly, + clustering_distance_rows="manhattan", + clustering_method_rows="ward.D2", + col=cols, column_split=subj) +} + +#' Plot variant annotation matrix and CN +#' +#' @param VRL VRangesList of variants +#' @param rse RaggedExperiment of CNs +#' @param cutoff Minimum value to plot for variants (DEFAULT: 0.25) +#' @param minCN Minimum CN to plot (DEFAULT: 0.25) +#' @param minWidth Scaling factor for CNs (DEFAULT: 1000) +#' +#' @return A Heatmap of the variants and CNs +#' +#' @importFrom grid unit +#' @importFrom stats relevel +#' @importFrom circlize colorRamp2 +#' @import ComplexHeatmap +#' @import VariantAnnotation +#' +#' @export +#' +plotVafAndCN <- function(VRL, rse, cutoff=0.25, minCN=0.25, minWidth=1e3) { + + bothMat <- .getVafAndCN(VRL, rse, cutoff=cutoff, + minCN=minCN, minWidth=minWidth) + + chr <- as.factor(.getChr(bothMat)) + for (ch in rev(c(paste0("chr", c(seq_len(22), "X", "Y"))))) { + if (ch %in% levels(chr)) { + chr <- relevel(chr, ch) + } + } + + cols <- colorRamp2(c( -1, -0.75, 0, 0.75, 1), + c('darkblue', 'blue', 'gray99', 'red', 'darkred')) + + subj <- substr(rownames(bothMat), 1, 9) + annoSyms <- .annotateSymbols(bothMat) + + Heatmap(bothMat, name="VAF/CN", + border="gray50", col=cols, + column_title_rot=90, + show_column_names=FALSE, + show_column_dend=FALSE, + cluster_column_slices=FALSE, + column_gap=unit(2, "mm"), + row_gap=unit(2, "mm"), + show_parent_dend_line=FALSE, + clustering_distance_columns="manhattan", + clustering_method_columns="ward.D2", + clustering_distance_rows="manhattan", + clustering_method_rows="ward.D2", + row_split=subj, column_split=chr, + bottom_annotation=annoSyms) +} + +# Helper function +.getChr <- function(bothMat) { + toSplit <- grep("\\|", colnames(bothMat)) + coords <- colnames(bothMat) + coords[toSplit] <- vapply(strsplit(coords[toSplit], "\\|"), `[`, 2, + FUN.VALUE=character(1)) + vapply(strsplit(coords, ":"), `[`, 1, + FUN.VALUE=character(1)) +} + +# Helper function +#' @importFrom grid unit gpar +.annotateSymbols <- function(bothMat, annoFontSize=9) { + toAnnotate <- grep("\\|", colnames(bothMat)) + geneLabels <- vapply(strsplit(colnames(bothMat)[toAnnotate], "\\|"), `[`, 1, + FUN.VALUE=character(1)) + columnAnnotation(link=anno_mark(at=toAnnotate, + side="bottom", + labels=geneLabels, + extend=unit(2, "cm"), + link_height=unit(1.5, "cm"), + labels_gp=gpar(fontsize=annoFontSize)), + height=unit(3, "cm")) +} + +# Helper function +.getVafAndCN <- function(VRL, rse, cutoff=0.25, minCN=0.25, minWidth=1e3) { + vafMat <- .getVafMat(VRL, cutoff=cutoff, ANNonly=TRUE) + cnaMat <- .getCnaMat(rse, cutoff=minCN, minWidth=minWidth)/2 + cnaMat[ which(is.na(cnaMat)) ] <- 0 + stopifnot(identical(colnames(vafMat), colnames(cnaMat))) + cnaMat <- cnaMat/2 # scale to make sense alongside VAF + bothMat <- t(rbind(cnaMat, vafMat)) + return(bothMat) +} + +# Helper function +#' @importFrom matrixStats rowMaxs +.getVafMat <- function(VRL, cutoff=0.25, ANNonly=FALSE) { + if (ANNonly) VRL <- VRangesList(lapply(VRL, .annOnly)) + VRL <- VRangesList(lapply(VRL, .addLocation)) + vafMat <- .asMatrix(VRL, ANNonly=ANNonly) + vafMat <- subset(vafMat, rowMaxs(vafMat) >= cutoff) + return(vafMat) +} + +# Helper function +#' @importFrom matrixStats rowMaxs +.getCnaMat <- function(rse, cutoff=0.25, minWidth=1e3) { + wideEnough <- width(rse) >= minWidth + deepEnough <- rowMaxs(abs(assays(rse)$score), na.rm=TRUE) >= cutoff + assays(subset(rse, deepEnough & wideEnough))[[1]] +} + +# Helper function +.asMatrix <- function(VRL, ANNonly=FALSE) { + VRL <- VRangesList(.addVaf(VRL)) + if (ANNonly) VRL <- VRangesList(lapply(VRL, .addLocationToName)) + variants <- sort(Reduce(union, lapply(VRL, names))) + bigMat <- matrix(0, nrow=length(variants), ncol=length(VRL)) + rownames(bigMat) <- variants + colnames(bigMat) <- names(VRL) + for (i in names(VRL)) bigMat[names(VRL[[i]]), i] <- VRL[[i]]$VAF + return(bigMat) +} + +# Helper function +.addVaf <- function(x) { + if (is(x, "VRangesList")) { + lapply(x, .addVaf) + } else { + x$VAF <- altDepth(x) / totalDepth(x) + return(x) + } +} + +# Helper function +.addLocationToName <- function(vr) { + names(vr) <- paste(names(vr), vr$location, sep="|") + return(vr) +} + +# Helper function +.annOnly <- function(vr) { + vr <- subset(vr, sapply(grepl("(HIGH|MODERATE)", vr$ANN), any)) + names(vr) <- .getSymbol(vr) + return(vr) +} + +# Helper function +.getSymbol <- function(vr) { + sapply(vr$ANN, + function(x) as.character(sapply(strsplit(x, "\\|"), `[`, 5)[1])) +} + +# Helper function +.addLocation <- function(vr) { + vr$location <- as.character(vr) # for later placement + return(vr) +} diff --git a/R/read.segs.R b/R/read.segs.R new file mode 100644 index 0000000..706a483 --- /dev/null +++ b/R/read.segs.R @@ -0,0 +1,140 @@ +#' Function to turn segmentations into GRanges with correction(s) +#' +#' If your segmentation file is not vaild, this probably won't work. A smarter +#' way to squash baselines would be with a mixture model. One bonus is that the +#' baselining is self-correcting - applying it to an already-corrected +#' segmentation will not do any harm and may even help. +#' +#' @param file The segmentation file to process +#' @param weight Add weights for per-subject baselining? (DEFAULT: TRUE) +#' @param by If weighting, do so by marks or by width (DEFAULT: "marks") +#' @param from Quantiles around the median to "squash" to baseline +#' (DEFAULT: c(.4, .6)) +#' @param minNorm Min log2ratio to consider as possible "normal" baseline +#' (DEFAULT: -0.5) +#' @param maxNorm Max log2ratio to consider as possible "normal" baseline +#' (DEFAULT: +0.5) +#' +#' @return A GRanges with corrections applied +#' +#' @import GenomicRanges +#' +#' @export +#' +read.segs <- function(file, + weight = TRUE, + by = c("marks","width"), + from = c(.4,.6), + minNorm = -0.5, + maxNorm = +0.5) { + + segs <- read.table(file, stringsAsFactors=FALSE, sep="\t") + names(segs)[seq_len(4)] <- c("name","chrom","chromStart","chromEnd") + names(segs)[ncol(segs) - 2] <- "width" # autocorrect below + names(segs)[ncol(segs) - 1] <- "marks" # autocorrect below + names(segs)[ncol(segs)] <- "score" + if (weight) { + by <- match.arg(by) + segs <- .addWeights(segs, by=by) + segs <- .fixBaselines(segs, from=from, minNorm=minNorm, maxNorm=maxNorm) + message("Baselines corrected.") + } + gr <- makeGRangesFromDataFrame(segs, keep.extra.columns=TRUE) + names(gr) <- gr$name + return(gr) +} + + +# Helper function +.addWeights <- function(segs, by=c("marks","width")) { + by <- match.arg(by) + if (!by %in% names(segs)) { + # in case the default does not / cannot work + segs$width <- segs$chromEnd - segs$chromStart + by <- "width" + } + message("Weighting segments by ", by, "...", appendLF=FALSE) + segs <- do.call(rbind, lapply(split(segs, segs$name), .addWeight, by=by)) + rownames(segs) <- segs$names + message(" done.") + return(segs) +} + +# Helper function +.addWeight <- function(seg, by=c("width","marks")) { + by <- match.arg(by) + stopifnot(length(unique(seg$name)) == 1) + if (by == "width") { + seg$width <- seg$chromStart - seg$chromEnd + seg$weight <- seg$width / sum(seg$width) + } else { + seg$weight <- seg$marks / sum(seg$marks) + } + return(seg) +} + + +# Helper function +.fixBaselines <- function(segs, from=c(.4, .6), minNorm=-0.5, maxNorm=+0.5) { + message("Finding baselines...", appendLF=FALSE) + segs <- do.call(rbind, + lapply(split(segs,segs$name), .fixBaseline, from=from, + minNorm=minNorm, maxNorm=maxNorm)) + rownames(segs) <- segs$names + message(" done.") + return(segs) +} + + +# Helper function +#' @importFrom stats quantile +.fixBaseline <- function(seg, from=c(.4, .6), minNorm=-0.5, maxNorm=+0.5) { + baseline <- .findBaseline(seg, minNorm=minNorm, maxNorm=maxNorm) + higher <- quantile(seg$score, prob=max(from), weight=seg$weight, na.rm=TRUE) + lower <- quantile(seg$score, prob=min(from), weight=seg$weight, na.rm=TRUE) + probablyUnchanged <- which(seg$score >= lower & seg$score <= higher) + seg[probablyUnchanged, "score"] <- baseline + seg$score <- seg$score - baseline + return(seg) +} + + +# Helper function +#' @importFrom stats median +.findBaseline <- function(seg, minNorm=-0.5, maxNorm=0.5) { + nearNormal <- subset(seg, score >= minNorm & score <= maxNorm) + median(nearNormal$score, weight=nearNormal$weight, na.rm=TRUE) +} + + +# Helper function +# TODO: Work in progress +.assignUncertainty <- function(seg, plot=FALSE) { + # correct for baseline (median correction; only works for mostly-NK tumors) + seg$score <- seg$score - .findBaseline(seg) + + # generate Rle representations of score X marks, post-shift + + # unlist into an "unweighted" vector of weighted scores + + # run mclust on that ? + + # assign segments in the "baseline" cluster a log-ratio of 0. + + # plot uncertainty, if requested + + # return the corrected segments. +} + + +# Helper function +.write.segs <- function(gr, file=NULL) { + names(gr) <- NULL + segs <- as.data.frame(gr) + segs <- segs[, c("name","seqnames","start","end","score")] + if (!is.null(file)) { + write.table(segs, sep="\t", quote=FALSE, row.names=FALSE, + col.names=FALSE, file=file) + } + return(segs) +} diff --git a/inst/extdata/PML_RARA_TCGA_AB_2998.hg19.tsv b/inst/extdata/PML_RARA_TCGA_AB_2998.hg19.tsv new file mode 100644 index 0000000..7cfd1d5 --- /dev/null +++ b/inst/extdata/PML_RARA_TCGA_AB_2998.hg19.tsv @@ -0,0 +1,7 @@ +seqnames start end name partner QUAL +chr15 72104255 72104255 break1o break1h 79 +chr17 35741663 35741663 break1h break1o 79 +chr17 35741661 35741661 break2o break2h 86 +chr15 72104247 72104247 break2h break2o 86 +chr15 72104257 72104257 break3o break3h 83 +chr17 35741661 35741661 break3h break3o 83 diff --git a/inst/scripts/plotPmlRara.R b/inst/scripts/plotPmlRara.R new file mode 100644 index 0000000..540ee29 --- /dev/null +++ b/inst/scripts/plotPmlRara.R @@ -0,0 +1,34 @@ +### Example scritp for plotting PML RARA circle plot +### Written by: Tim Triche, Jr. +### Last edit: October 7, 2019 +library(Homo.sapiens) +library(GenomicRanges) +library(StructuralVariantAnnotation) + +file <- system.file("extdata", "PML_RARA_TCGA_AB_2998.hg19.tsv", + package="biscuiteer") +gr <- as(read.table(file, header=TRUE), "GRanges") +seqinfo(gr) <- seqinfo(Homo.sapiens)[seqlevels(gr)] + +paired <- .makePairs(split(gr, substr(gr$name, nchar(gr$name), nchar(gr$name)))) +mcols(paired)$name <- sapply(paired, + function(x) substr(mcols(first(x))$name, 1, 6)) +mcols(paired)$score <- sapply(paired, function(x) mcols(first(x))$QUAL) + +library(circlize) +circos.initializeWithIdeogram() +circos.genomicLink(as.data.frame(S4Vectors::first(paired)), + as.data.frame(S4Vectors::second(paired)), + col="red", lwd=3, border="darkred") + +bp <- subset(genes(Homo.sapiens, columns="SYMBOL"), SYMBOL %in% c("PML", + "RARA")) +names(bp) <- sapply(bp$SYMBOL, `[[`, 1) +circos.genomicLabels(bp, labels=names(bp), side="inside") +dev.copy2pdf(file="PML_RARA.pdf") + + +# Helper function +.makePairs <- function(...) { + Pairs(as.list(...)[[1]], as.list(...)[[2]]) +} diff --git a/inst/scripts/signatures.R b/inst/scripts/signatures.R new file mode 100644 index 0000000..06cc635 --- /dev/null +++ b/inst/scripts/signatures.R @@ -0,0 +1,83 @@ +### Example script for plotting mutation signatures +### Written by: Tim Triche, Jr. +### October 7, 2019 +library(YAPSA) +library(circlize) +library(matrixStats) +library(ComplexHeatmap) +library(VariantAnnotation) + +VCF_files <- list.files(pattern=".vcf$") +names(VCF_files) <- sapply(strsplit(VCF_files, "_"), `[`, 1) +VCFs <- lapply(VCF_files, readVcf, genome="hg19") +VRL <- VRangesList(lapply(VCFs, as, "VRanges")) +for (nm in names(VRL)) VRL[[nm]]$pid <- nm + +# helper fn +toYapsaDf <- function(VR) { + columns <- c("seqnames","start","ref","alt","pid") + res <- as(unname(VR), "data.frame")[, columns] + names(res) <- c("CHROM","POS","REF","ALT","PID") + res$POS <- as.integer(res$POS) + return(res) +} + +data("exchange_colour_vector") +DF <- do.call(rbind, lapply(VRL, toYapsaDf)) +DF <- DF[order(DF$PID, DF$CHROM, DF$POS),] +DF$change <- attribute_nucleotide_exchanges(DF) +DF$col <- exchange_colour_vector[DF$change] +DF$SUBJECT <- substr(DF$PID, 1, 9) +DF$PORTION <- substr(DF$PID, 11, 11) + +library(BSgenome.Hsapiens.UCSC.hg19) +mutCats <- + create_mutation_catalogue_from_df(DF, + this_seqnames.field="CHROM", + this_start.field="POS", + this_end.field="POS", + this_PID.field="PID", + this_subgroup.field="SUBJECT", + this_refGenome=BSgenome.Hsapiens.UCSC.hg19, + this_wordLength=3) + +# use AlexCosmicValid_sig_df and AlexCosmicValid_sigInd_df +data(sigs) +cutoff <- 0.033 # 3.3% representation +mutcat_df <- as.data.frame(mutCats$matrix) +general_cutoff_vector <- rep(cutoff, dim(AlexCosmicValid_sig_df)[2]) +cutoff_list <- LCD_complex_cutoff(in_mutation_catalogue_df=mutcat_df, + in_signatures_df=AlexCosmicValid_sig_df, + in_cutoff_vector=general_cutoff_vector, + in_sig_ind_df=AlexCosmicValid_sigInd_df) +subgroups <- make_subgroups_df(DF, cutoff_list$exposures, + in_subgroup.field="SUBJECT") +exposures_barplot(in_title="Mutational signatures", + in_exposures_df=cutoff_list$norm_exposures, + in_signatures_ind_df=cutoff_list$out_sig_ind_df, + in_subgroups_df=subgroups) +dev.copy2pdf(file="mutationSignatures.NOSH_0003_and_0005.normalized.pdf") + +exposures_barplot(in_title="Mutational signatures", + in_exposures_df=cutoff_list$exposures, + in_signatures_ind_df=cutoff_list$out_sig_ind_df, + in_subgroups_df=subgroups) +dev.copy2pdf(file="mutationSignatures.NOSH_0003_and_0005.absolute.pdf") + +# AC1 (green) is C->T, clocklike +# AC3 (goldenrod) is DNA repair defects (HRR) +# AC9 (brown) is POLE/SHM related +# AC12 (darkblue) is MMR related +# AC16 (violet) is endogenous.unknown +# AC19 (deep pink) is exogenous.unknown +# AC28 (navy blue) is (completely) unknown +complex_heatmap_exposures(in_exposures_df=cutoff_list$norm_exposures, + in_method="manhattan", + in_subgroups_df=subgroups, + in_subgroup_column="subgroup", + in_subgroup_colour_column="col", + in_signatures_ind_df=cutoff_list$out_sig_ind_df) +dev.copy2pdf(file="mutationSignatures.NOSH_0003_and_0005.clustered.pdf") + +plotExchangeSpectra(mutcat_df, in_show_triplets=TRUE) +ggsave("mutationSignatures.NOSH_0003_and_0005.spectra.png") diff --git a/man/plotCNA.Rd b/man/plotCNA.Rd new file mode 100644 index 0000000..3faef7b --- /dev/null +++ b/man/plotCNA.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotCNAs.R +\name{plotCNA} +\alias{plotCNA} +\title{Analyze CNAs} +\usage{ +plotCNA(filenames, minWidth = 1000) +} +\arguments{ +\item{filenames}{List of files with tumor and normal Excel sheets} + +\item{minWidth}{Scaling factor (DEFAULT: 1000)} +} +\value{ +\preformatted{ Heatmap of CNAs +} +} +\description{ +Analyze CNAs +} diff --git a/man/plotVafAndCN.Rd b/man/plotVafAndCN.Rd new file mode 100644 index 0000000..3e1a265 --- /dev/null +++ b/man/plotVafAndCN.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plottingUtils.R +\name{plotVafAndCN} +\alias{plotVafAndCN} +\title{Plot variant annotation matrix and CN} +\usage{ +plotVafAndCN(VRL, rse, cutoff = 0.25, minCN = 0.25, minWidth = 1000) +} +\arguments{ +\item{VRL}{VRangesList of variants} + +\item{rse}{RaggedExperiment of CNs} + +\item{cutoff}{Minimum value to plot for variants (DEFAULT: 0.25)} + +\item{minCN}{Minimum CN to plot (DEFAULT: 0.25)} + +\item{minWidth}{Scaling factor for CNs (DEFAULT: 1000)} +} +\value{ +\preformatted{ A Heatmap of the variants and CNs +} +} +\description{ +Plot variant annotation matrix and CN +} diff --git a/man/plotVafMat.Rd b/man/plotVafMat.Rd new file mode 100644 index 0000000..ce05e9f --- /dev/null +++ b/man/plotVafMat.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plottingUtils.R +\name{plotVafMat} +\alias{plotVafMat} +\title{Plot variant annotation matrix} +\usage{ +plotVafMat(VRL, cutoff = 0.25, ANNonly = FALSE) +} +\arguments{ +\item{VRL}{VRangesList of variants} + +\item{cutoff}{Minimum value to plot (DEFAULT: 0.25)} + +\item{ANNonly}{Show annotations only (DEFAULT: FALSE)} +} +\value{ +\preformatted{ A Heatmap of the variants +} +} +\description{ +Plot variant annotation matrix +} diff --git a/man/read.segs.Rd b/man/read.segs.Rd new file mode 100644 index 0000000..b05b815 --- /dev/null +++ b/man/read.segs.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read.segs.R +\name{read.segs} +\alias{read.segs} +\title{Function to turn segmentations into GRanges with correction(s)} +\usage{ +read.segs(file, weight = TRUE, by = c("marks", "width"), + from = c(0.4, 0.6), minNorm = -0.5, maxNorm = +0.5) +} +\arguments{ +\item{file}{The segmentation file to process} + +\item{weight}{Add weights for per-subject baselining? (DEFAULT: TRUE)} + +\item{by}{If weighting, do so by marks or by width (DEFAULT: "marks")} + +\item{from}{Quantiles around the median to "squash" to baseline +(DEFAULT: c(.4, .6))} + +\item{minNorm}{Min log2ratio to consider as possible "normal" baseline +(DEFAULT: -0.5)} + +\item{maxNorm}{Max log2ratio to consider as possible "normal" baseline +(DEFAULT: +0.5)} +} +\value{ +\preformatted{ A GRanges with corrections applied +} +} +\description{ +If your segmentation file is not vaild, this probably won't work. A smarter +way to squash baselines would be with a mixture model. One bonus is that the +baselining is self-correcting - applying it to an already-corrected +segmentation will not do any harm and may even help. +}