Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.27.3 #133

Merged
merged 10 commits into from
Apr 3, 2024
2 changes: 1 addition & 1 deletion biopipen/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.27.2"
__version__ = "0.27.3"
8 changes: 8 additions & 0 deletions biopipen/ns/scrna.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,13 @@ class SeuratPreparing(Proc):
- scvi: Same as `scVIIntegration`.
- <more>: See <https://satijalab.org/seurat/reference/integratelayers>

DoubletFinder (ns): Arguments to run [`DoubletFinder`](https://github.com/chris-mcginnis-ucsf/DoubletFinder).
See also <https://demultiplexing-doublet-detecting-docs.readthedocs.io/en/latest/DoubletFinder.html>.
To disable `DoubletFinder`, set `envs.DoubletFinder` to `None` or `False`; or set `pcs` to `0`.
- PCs (type=int): Number of PCs to use for 'doubletFinder' function.
- doublets (type=float): Number of expected doublets as a proportion of the pool size.
- pN (type=float): Number of doublets to simulate as a proportion of the pool size.

Requires:
r-seurat:
- check: {{proc.lang}} <(echo "library(Seurat)")
Expand All @@ -227,6 +234,7 @@ class SeuratPreparing(Proc):
"min_cells": 5,
},
"IntegrateLayers": {"method": "harmony"},
"DoubletFinder": {"PCs": 0, "pN": 0.25, "doublets": 0.075},
}
script = "file://../scripts/scrna/SeuratPreparing.R"
plugin_opts = {
Expand Down
35 changes: 30 additions & 5 deletions biopipen/scripts/scrna/MarkersFinder.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ expand_each <- function(name, case) {
pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
}
for (each in eachs) {
by <- make.names(paste0(".", name, "_", case$each,"_", each))
by <- make.names(paste0("..", name, "_", case$each,"_", each))
[email protected] <<- [email protected] %>% mutate(
!!sym(by) := if_else(
!!sym(case$each) == each,
Expand Down Expand Up @@ -364,6 +364,16 @@ add_case_report <- function(info, sigmarkers, siggenes) {
}
}

ensure_sobj <- function(expr, allow_empty) {
tryCatch({ expr }, error = function(e) {
if (allow_empty) {
log_warn(" Ignoring this case: {e$message}")
return(NULL)
} else {
stop(e)
}
})
}

do_case_findall <- function(casename) {
# casename
Expand All @@ -382,10 +392,17 @@ do_case_findall <- function(casename) {
# args$min.cells.group <- args$min.cells.group %||% 1
# args$min.cells.feature <- args$min.cells.feature %||% 1
# args$min.pct <- args$min.pct %||% 0
allow_empty = startsWith(case$group.by, "..")
if (!is.null(case$subset)) {
args$object <- srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
args$object <- ensure_sobj({
srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
}, allow_empty)
if (is.null(args$object)) { return() }
} else {
args$object <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
args$object <- ensure_sobj({
srtobj %>% filter(!is.na(!!sym(case$group.by)))
}, allow_empty)
if (is.null(args$object)) { return() }
}
Idents(args$object) <- case$group.by

Expand Down Expand Up @@ -486,11 +503,19 @@ do_case <- function(casename) {
# sigmarkers
# rest
args <- case$rest
allow_empty = startsWith(case$group.by, "..")
if (!is.null(case$subset)) {
args$object <- srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
args$object <- ensure_sobj({
srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
}, allow_empty)
if (is.null(args$object)) { return() }
} else {
args$object <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
args$object <- ensure_sobj({
srtobj %>% filter(!is.na(!!sym(case$group.by)))
}, allow_empty)
if (is.null(args$object)) { return() }
}

args$assay <- case$assay
args$group.by <- case$group.by
args$ident.1 <- case$ident.1
Expand Down
20 changes: 17 additions & 3 deletions biopipen/scripts/scrna/MetaMarkers.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ expand_each <- function(name, case) {
pull(case$each) %>% unique() %>% na.omit()
}
for (each in eachs) {
by = make.names(paste0(".", name, "_", case$each, "_", each))
by = make.names(paste0("..", name, "_", case$each, "_", each))
idents <- case$idents
if (is.null(idents) || length(idents) == 0) {
[email protected] = [email protected] %>%
Expand Down Expand Up @@ -169,17 +169,31 @@ do_enrich <- function(info, markers, sig) {
}
}

ensure_sobj <- function(expr, allow_empty) {
tryCatch({ expr }, error = function(e) {
if (allow_empty) {
log_warn(" Ignoring this case: {e$message}")
return(NULL)
} else {
stop(e)
}
})
}

do_case <- function(casename) {
log_info("- Dealing with case: {casename} ...")
info <- casename_info(casename, cases, outdir, create = TRUE)
case <- cases[[casename]]
allow_empty = startsWith(case$group_by, "..")

if (sum(!is.na([email protected][[case$group_by]])) == 0) {
msg = "Not enough cells to run tests."
} else {
sobj <- srtobj %>% filter(!is.na(!!sym(case$group_by)))
sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group_by))) }, allow_empty)
if (is.null(sobj)) { return() }
if (!is.null(case$subset)) {
sobj <- srtobj %>% filter(!is.na(!!sym(case$group_by)), !!parse_expr(case$subset))
sobj <- ensure_sobj({ sobj %>% filter(!!parse_expr(case$subset)) }, allow_empty)
if (is.null(sobj)) { return() }
}
df <- tryCatch({
GetAssayData(sobj, layer = "data")
Expand Down
25 changes: 21 additions & 4 deletions biopipen/scripts/scrna/ScFGSEA.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ expand_each <- function(name, case) {
pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
}
for (each in eachs) {
by <- make.names(paste0(".", name, "_", case$each,"_", each))
by <- make.names(paste0("..", name, "_", case$each,"_", each))
[email protected] <<- [email protected] %>%
mutate(!!sym(by) := if_else(
!!sym(case$each) == each,
Expand All @@ -97,18 +97,35 @@ log_info("- Expanding cases...")
cases <- expand_cases(cases, defaults, expand_each)


ensure_sobj <- function(expr, allow_empty) {
tryCatch({ expr }, error = function(e) {
if (allow_empty) {
log_warn(" Ignoring this case: {e$message}")
return(NULL)
} else {
stop(e)
}
})
}


do_case <- function(name, case) {
log_info("- Handling case: {name} ...")
info <- casename_info(name, cases, outdir, create = TRUE)

allow_empty = startsWith(case$group.by, "..")
# prepare expression matrix
log_info(" Preparing expression matrix...")
sobj <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group.by))) }, allow_empty)
if (is.null(sobj)) { return() }

if (!is.null(case$subset)) {
sobj <- sobj %>% filter(!!!parse_exprs(case$subset))
sobj <- ensure_sobj({ sobj %>% filter(!!!parse_exprs(case$subset)) }, allow_empty)
if (is.null(sobj)) { return() }
}
if (!is.null(case$ident.2)) {
sobj <- sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2))
sobj <- ensure_sobj({ sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2)) }, allow_empty)
if (is.null(sobj)) { return() }
}

allclasses <- [email protected][, case$group.by, drop = TRUE]
Expand Down
114 changes: 113 additions & 1 deletion biopipen/scripts/scrna/SeuratPreparing.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ envs = {{envs | r: todot = "-", skip = 1}}

set.seed(8525)
options(future.globals.maxSize = 80000 * 1024^2)
options(future.rng.onMisuse="ignore")
options(Seurat.object.assay.version = "v5")
plan(strategy = "multicore", workers = envs$ncores)

Expand Down Expand Up @@ -342,7 +343,7 @@ RunPCAArgs$object <- sobj
sobj <- do_call(RunPCA, RunPCAArgs)

if (!envs$no_integration) {
log_info("- Running IntegrateLayers ...")
log_info("- Running IntegrateLayers (method = {envs$IntegrateLayers$method}) ...")
IntegrateLayersArgs <- envs$IntegrateLayers
method <- IntegrateLayersArgs$method
if (!is.null(IntegrateLayersArgs$reference) && is.character(IntegrateLayersArgs$reference)) {
Expand Down Expand Up @@ -383,6 +384,117 @@ if (!envs$use_sct) {
sobj <- JoinLayers(sobj)
}

if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletFinder$PCs > 0) {
library(DoubletFinder)

log_info("Running DoubletFinder ...")
log_info("- Preparing Seurat object ...")
# More controls from envs?
sobj <- FindNeighbors(sobj, dims = 1:envs$DoubletFinder$PCs)
sobj <- FindClusters(sobj)

log_info("- pK Indentification ...")
sweep.res.list <- paramSweep(
sobj,
PCs = 1:envs$DoubletFinder$PCs,
sct = envs$use_sct,
num.cores = envs$ncores
)
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
bcmvn <- find.pK(sweep.stats)

bcmvn$Selected <- bcmvn$pK == bcmvn$pK[which.max(bcmvn$BCmetric)[1]]
plot <- ggplot(bcmvn, aes(x = pK, y = BCmetric, color = Selected)) +
geom_point() +
# rotate x axis labels
theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggsave(plot, filename = file.path(plotsdir, "pK_BCmetric.png"))

pK <- bcmvn$pK[which.max(bcmvn$BCmetric)[1]]
pK <- as.numeric(as.character(pK))
pN <- envs$DoubletFinder$pN
log_info("- Homotypic Doublet Proportion Estimate ...")
homotypic.prop <- modelHomotypic(Idents(sobj))
nExp_poi <- round(nrow([email protected]) * envs$DoubletFinder$doublets)
nExp_poi.adj <- round(nExp_poi * (1 - homotypic.prop))

log_info("- Running DoubletFinder ...")
sobj <- doubletFinder(
sobj,
PCs = 1:envs$DoubletFinder$PCs,
pN = pN,
pK = pK,
nExp = nExp_poi.adj,
reuse.pANN = FALSE,
sct = envs$use_sct
)
pANN_col <- paste0("pANN_", pN, "_", pK)
pANN_col <- colnames([email protected])[grepl(pANN_col, colnames([email protected]))]
DF_col <- paste0("DF.classifications_", pN, "_", pK)
DF_col <- colnames([email protected])[grepl(DF_col, colnames([email protected]))]
doublets <- as.data.frame(
cbind(
colnames(sobj),
[email protected][, pANN_col],
[email protected][, DF_col]
)
)
colnames(doublets) <- c("Barcode","DoubletFinder_score","DoubletFinder_DropletType")
write.table(
doublets,
file.path(joboutdir, "DoubletFinder_doublets_singlets.txt"),
row.names = FALSE,
quote = FALSE,
sep = "\t"
)

summary <- as.data.frame(table(doublets$DoubletFinder_DropletType))
colnames(summary) <- c("Classification", "Droplet_N")
write.table(
summary,
file.path(joboutdir, "DoubletFinder_summary.txt"),
row.names = FALSE,
quote = FALSE,
sep = "\t"
)

# Do a dimplot
log_info("- Plotting dimension reduction ...")
dimp <- DimPlot(
sobj, group.by = DF_col, order = "Doublet",
cols = c("#333333", "#FF3333"), pt.size = 0.8, alpha = 0.5)
ggsave(dimp, filename = file.path(plotsdir, "DoubletFinder_dimplot.png"))

log_info("- Filtering doublets ...")
sobj <- subset(sobj, cells = doublets$Barcode[doublets$DoubletFinder_DropletType == "Singlet"])

add_report(
list(
kind = "descr",
content = "The table contains the number of cells classified as singlets and doublets."
),
list(
kind = "table",
data = list(path = file.path(joboutdir, "DoubletFinder_summary.txt"))
),
h1 = "DoubletFinder Results",
h2 = "The DoubletFinder Summary"
)
add_report(
list(
name = "pK vs BCmetric",
src = file.path(plotsdir, "pK_BCmetric.png")
),
list(
name = "Dimension Reduction Plot",
src = file.path(plotsdir, "DoubletFinder_dimplot.png")
),
ui = "table_of_images",
h1 = "DoubletFinder Results",
h2 = "Plots"
)
}

log_info("Saving filtered seurat object ...")
saveRDS(sobj, rdsfile)

Expand Down
9 changes: 9 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Change Log

## 0.27.3

- deps: temporary fix copier breaks with pyyaml-include v2 (copier-org/copier#1568)
- deps: bump pipen-poplog to 0.1.2 (quick fix for populating logs when job fails)
- choir(scrna.ScFGSEA): Skip cases when no cells found (pwwang/immunopipe#50)
- choir(scrna.MarkersFinder): Skip cases when no cells found (pwwang/immunopipe#50)
- choir(scrna.MetaMarkers): Skip cases when no cells found (pwwang/immunopipe#50)
- feat(scrna.SeuratPreparing): support `DoubletFinder`

## 0.27.2

- fix(utils.misc.py): inherit envs when `env` passed for `run_command()`
Expand Down
Loading
Loading