Skip to content

Commit

Permalink
Merge pull request #3 from DendrouLab/devika_pr
Browse files Browse the repository at this point in the history
renamed /python/run_scanpyQC_REP.py & commented out barcode_mtd args …
  • Loading branch information
crichgriffin authored Feb 28, 2023
2 parents 215d492 + 220c774 commit b4aa875
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 19 deletions.
25 changes: 22 additions & 3 deletions panpipes/pipeline_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,13 @@ def run_harmony(outfile):
--modality rna
"""
# cannot use the normal method for importing params from yaml, because it only works up to depth 2
harmony_params = PARAMS['rna']['harmony']
if harmony_params['npcs'] is not None:
cmd += " --harmony_npcs %s" % harmony_params['npcs']
if harmony_params['sigma'] is not None:
cmd += " --sigma_val %s" % harmony_params['sigma']
if harmony_params['theta'] is not None:
cmd += " --theta_val %s" % harmony_params['theta']
neighbor_params = PARAMS['rna']['neighbors']
if neighbor_params['method'] is not None:
cmd += " --neighbors_method %s" % neighbor_params['method']
Expand Down Expand Up @@ -321,6 +328,13 @@ def run_harmony_prot( outfile):
--n_threads %(resources_threads_high)s
"""
cmd += " --modality prot"
harmony_params = PARAMS['prot']['harmony']
if harmony_params['npcs'] is not None:
cmd += " --harmony_npcs %s" % harmony_params['npcs']
if harmony_params['sigma'] is not None:
cmd += " --sigma_val %s" % harmony_params['sigma']
if harmony_params['theta'] is not None:
cmd += " --theta_val %s" % harmony_params['theta']
neighbor_params = PARAMS['prot']['neighbors']
if neighbor_params['method'] is not None:
cmd += " --neighbors_method %s" % neighbor_params['method']
Expand Down Expand Up @@ -416,9 +430,14 @@ def run_harmony_atac( outfile):
--n_threads %(resources_threads_high)s
--modality atac
"""

if PARAMS['atac_sigma'] is not None:
cmd += " --sigma_val %(atac_sigma)s"

harmony_params = PARAMS['atac']['harmony']
if harmony_params['npcs'] is not None:
cmd += " --harmony_npcs %s" % harmony_params['npcs']
if harmony_params['sigma'] is not None:
cmd += " --sigma_val %s" % harmony_params['sigma']
if harmony_params['theta'] is not None:
cmd += " --theta_val %s" % harmony_params['theta']
neighbor_params = PARAMS['atac']['neighbors']
if neighbor_params['method'] is not None:
cmd += " --neighbors_method %s" % neighbor_params['method']
Expand Down
29 changes: 21 additions & 8 deletions panpipes/pipeline_integration/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,22 @@ rna:
#-----------------------------
# Harmony args
#-----------------------------
harmony:
# sigma value, used by Harmony
sigma: 0.1
harmony_npcs: 30
sigma: 0.1
# theta value used by Harmony, default is 1
theta: 1.0
# number of pcs, used by Harmony
npcs: 30
#-----------------------------
# SCVI args
#-----------------------------
scvi:
exclude_mt_genes: True
mt_column: mt
model_args:
nlayers:
nlatent:
n_layers:
n_latent:
gene_likelihood: zinb
training_args:
max_epochs: 400
Expand All @@ -87,6 +91,7 @@ rna:
# number of Principal Components to calculate for neighbours and umap:
# -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
# -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
# the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony
# note: scvelo default is 30
npcs: 30
# number of neighbours
Expand All @@ -111,9 +116,13 @@ prot:
#----------------------------
# Harmony args
#-----------------------------
harmony:
# sigma value, used by Harmony
sigma: 0.1 #prot_sigma
harmony_npcs: 30
sigma: 0.1
# theta value used by Harmony, default is 1
theta: 1.0
# number of pcs, used by Harmony
npcs: 30
#----------------------------›
# find neighbour parameters
#-----------------------------
Expand Down Expand Up @@ -144,9 +153,13 @@ atac:
#----------------------------
# Harmony args
#-----------------------------
harmony:
# sigma value, used by Harmony
sigma: 0.1 #prot_sigma
harmony_npcs: 30
sigma: 0.1
# theta value used by Harmony, default is 1
theta: 1.0
# number of pcs, used by Harmony
npcs: 30
#----------------------------
# find neighbour parameters
#-----------------------------
Expand Down
6 changes: 3 additions & 3 deletions panpipes/pipeline_qc_mm.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,9 @@ def concat_bg_mudatas(infiles, outfile):
"""
if PARAMS['metadatacols'] is not None and PARAMS['metadatacols'] != "":
cmd += " --metadatacols %(metadatacols)s"
if PARAMS["barcode_mtd_include"] is True:
cmd += " --barcode_mtd_df %(barcode_mtd_path)s"
cmd += " --barcode_mtd_metadatacols %(barcode_mtd_metadatacols)s"
# if PARAMS["barcode_mtd_include"] is True:
# cmd += " --barcode_mtd_df %(barcode_mtd_path)s"
# cmd += " --barcode_mtd_metadatacols %(barcode_mtd_metadatacols)s"
cmd += " > logs/concat_bg_mudatas.log"
job_kwargs["job_threads"] = PARAMS['resources_threads_high']
P.run(cmd, **job_kwargs)
Expand Down
13 changes: 9 additions & 4 deletions python/batch_correct_harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,13 @@
help='')
parser.add_argument('--n_threads', default=1,
help="num threads to use for neighbor computations")
parser.add_argument('--harmony_npcs', default=30,
help="npcs for running harmony")
parser.add_argument('--sigma_val', default=0.1,
help="sigma")
parser.add_argument('--neighbors_n_pcs',
parser.add_argument('--theta_val', default=1.0,
help="theta")
parser.add_argument('--neighbors_n_pcs', default=30,
help="n_pcs")
parser.add_argument('--neighbors_method',
help="neighbours method, scanpy or hnsw")
Expand Down Expand Up @@ -73,18 +77,19 @@
adata.obs["comb_columns"] = adata.obs["comb_columns"].astype("category")
# run harmony

ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.neighbors_n_pcs)], adata.obs, ["comb_columns"],
sigma = float(args.sigma_val), verbose=True,max_iter_kmeans=30,
ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)], adata.obs, ["comb_columns"],
sigma = float(args.sigma_val),theta = float(args.theta_val),verbose=True,max_iter_kmeans=30,
max_iter_harmony=40)

else:
# make sure that batch is a categorical
adata.obs[args.integration_col] = adata.obs[args.integration_col].astype("category")
# run harmony
ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.neighbors_n_pcs)],
ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)],
adata.obs,
[args.integration_col],
sigma=float(args.sigma_val),
theta = float(args.theta_val),
verbose=True,max_iter_kmeans=30,
max_iter_harmony=40)

Expand Down
2 changes: 1 addition & 1 deletion python/make_adata_from_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@
# create new modality for hashing
mdata.mod["hashing_ab"]=mdata["prot"][:, mdata["prot"].var["hashing_ab"]]
# subset old modality to remove hashing
mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]]
mdata.mod['prot'] = mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]]
except FileNotFoundError:
warnings.warn("protein metadata table not found")
mdata.update()
Expand Down
140 changes: 140 additions & 0 deletions python/run_scanpyQC_rep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
'''
scanpy QC script GEX
order of QC:
- GEX
- ADT
- Repertoire
- ATAC
'''
import sys
import logging
L = logging.getLogger()
L.setLevel(logging.INFO)
log_handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s: %(levelname)s - %(message)s')
log_handler.setFormatter(formatter)
L.addHandler(log_handler)

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import argparse
import pandas as pd
import muon as mu
import scirpy as ir
import matplotlib.pyplot as plt
plt.ioff()
from panpipes.funcs.io import write_obs
import yaml

parser = argparse.ArgumentParser()
# required option
parser.add_argument("--sampleprefix",
default="",
help="prefix to prepend when saving the metadata file")
parser.add_argument("--input_mudata",
default="mdata_unfilt.h5mu",
help="")
parser.add_argument("--output_mudata",
default="mdata_unfilt.h5mu",
help="")
parser.add_argument("--figdir",
default="./figures/",
help="path to save the figures to")

parser.add_argument("--distance_metrics",
default=None, type=yaml.safe_load,
help="what metrics to calculate sequence distance metric? \
any arguments from scirpy.pp.ir_dist as a dict in format'{arg: value}' ")
parser.add_argument("--clonotype_metrics",
default=None,type=yaml.safe_load,
help="what metrics to calculate sequence distance metric? \
any arguments from scirpy.pp.define_clonotypes as a dict in format'{arg: value}' ")
args, opt = parser.parse_known_args()
L.info(args)
mdata = mu.read(args.input_mudata)
rep = mdata['rep']
# chain qc
ir.tl.chain_qc(rep)

# remove nones, so defaults are used
if args.distance_metrics is not None:
# remove nones, so defaults are used
dist_args = {k: v for k, v in args.distance_metrics.items() if v != "None"}
dist_args = {k: v for k, v in dist_args.items() if v}
else:
dist_args = {}

L.info("distance args:")
L.info(dist_args)

ir.pp.ir_dist(rep, **dist_args)

# pull function arguments from args.
if args.clonotype_metrics is not None:
# remove nones, so defaults are used
clonotype_args = {k: v for k, v in args.clonotype_metrics.items() if v != "None"}
clonotype_args = {k: v for k, v in clonotype_args.items() if v}
else:
clonotype_args={}
# define clonotypes
L.info("clonotypes args:")
L.info(clonotype_args)

ir.tl.define_clonotypes(rep, **clonotype_args)
ir.tl.clonal_expansion(rep)


category_cols = ['has_ir']
for cc in category_cols:
if pd.api.types.infer_dtype(rep.obs[cc]) != "categorical":
rep.obs[cc] = rep.obs[cc].astype('string').astype('category')


if "BCR" in rep.obs['receptor_type'].values:
bcr = rep[rep.obs.receptor_type =="BCR", :].copy()
else:
bcr=None

if "TCR" in rep.obs.receptor_type.values:
tcr = rep[rep.obs.receptor_type =="TCR", :].copy()
else:
tcr=None

# plot group abundances
fig, ax = plt.subplots()
ax = ir.pl.group_abundance(rep, groupby="receptor_type", ax=ax)
fig.savefig(args.figdir +"/barplot_group_abundance_receptor_type.png", bbox_inches="tight")

if bcr is not None:
fig, ax = plt.subplots()
ax = ir.pl.group_abundance(bcr, groupby="chain_pairing", ax=ax)
fig.tight_layout()
fig.savefig(args.figdir +"/barplot_group_abundance_bcr_receptor_subtype.png", bbox_inches="tight")

if tcr is not None:
fig, ax = plt.subplots()
ax = ir.pl.group_abundance(tcr, groupby="chain_pairing", ax=ax)
fig.tight_layout()
fig.savefig(args.figdir +"/barplot_group_abundance_tcr_receptor_subtype.png", bbox_inches="tight")

# clonal expansion

clone_counts = rep.obs.groupby("receptor_type").clone_id.value_counts().to_frame("cell_counts").reset_index()

if bcr is not None:
ax = ir.pl.clonal_expansion(bcr, groupby="sample_id")
ax.set_title("bcr clonal expansion")
plt.savefig(args.figdir +"/barplot_clonal_expansion_bcr.png", bbox_inches="tight" )

if tcr is not None:
ax = ir.pl.clonal_expansion(tcr, groupby="sample_id")
ax.set_title("tcr clonal expansion")
plt.savefig(args.figdir +"/barplot_clonal_expansion_tcr.png", bbox_inches="tight")


L.info("saving anndata and obs in a metadata tsv file")
mdata.update()
write_obs(mdata, output_prefix=args.sampleprefix,
output_suffix="_cell_metadata.tsv")
mdata.write(args.output_mudata)

0 comments on commit b4aa875

Please sign in to comment.