Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

renamed /python/run_scanpyQC_REP.py & commented out barcode_mtd args … #3

Merged
merged 11 commits into from
Feb 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions panpipes/pipeline_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,13 @@ def run_harmony(outfile):
--modality rna
"""
# cannot use the normal method for importing params from yaml, because it only works up to depth 2
harmony_params = PARAMS['rna']['harmony']
if harmony_params['npcs'] is not None:
cmd += " --harmony_npcs %s" % harmony_params['npcs']
if harmony_params['sigma'] is not None:
cmd += " --sigma_val %s" % harmony_params['sigma']
if harmony_params['theta'] is not None:
cmd += " --theta_val %s" % harmony_params['theta']
neighbor_params = PARAMS['rna']['neighbors']
if neighbor_params['method'] is not None:
cmd += " --neighbors_method %s" % neighbor_params['method']
Expand Down Expand Up @@ -321,6 +328,13 @@ def run_harmony_prot( outfile):
--n_threads %(resources_threads_high)s
"""
cmd += " --modality prot"
harmony_params = PARAMS['prot']['harmony']
if harmony_params['npcs'] is not None:
cmd += " --harmony_npcs %s" % harmony_params['npcs']
if harmony_params['sigma'] is not None:
cmd += " --sigma_val %s" % harmony_params['sigma']
if harmony_params['theta'] is not None:
cmd += " --theta_val %s" % harmony_params['theta']
neighbor_params = PARAMS['prot']['neighbors']
if neighbor_params['method'] is not None:
cmd += " --neighbors_method %s" % neighbor_params['method']
Expand Down Expand Up @@ -416,9 +430,14 @@ def run_harmony_atac( outfile):
--n_threads %(resources_threads_high)s
--modality atac
"""

if PARAMS['atac_sigma'] is not None:
cmd += " --sigma_val %(atac_sigma)s"

harmony_params = PARAMS['atac']['harmony']
if harmony_params['npcs'] is not None:
cmd += " --harmony_npcs %s" % harmony_params['npcs']
if harmony_params['sigma'] is not None:
cmd += " --sigma_val %s" % harmony_params['sigma']
if harmony_params['theta'] is not None:
cmd += " --theta_val %s" % harmony_params['theta']
neighbor_params = PARAMS['atac']['neighbors']
if neighbor_params['method'] is not None:
cmd += " --neighbors_method %s" % neighbor_params['method']
Expand Down
29 changes: 21 additions & 8 deletions panpipes/pipeline_integration/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,22 @@ rna:
#-----------------------------
# Harmony args
#-----------------------------
harmony:
# sigma value, used by Harmony
sigma: 0.1
harmony_npcs: 30
sigma: 0.1
# theta value used by Harmony, default is 1
theta: 1.0
# number of pcs, used by Harmony
npcs: 30
#-----------------------------
# SCVI args
#-----------------------------
scvi:
exclude_mt_genes: True
mt_column: mt
model_args:
nlayers:
nlatent:
n_layers:
n_latent:
gene_likelihood: zinb
training_args:
max_epochs: 400
Expand All @@ -87,6 +91,7 @@ rna:
# number of Principal Components to calculate for neighbours and umap:
# -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
# -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
# the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony
# note: scvelo default is 30
npcs: 30
# number of neighbours
Expand All @@ -111,9 +116,13 @@ prot:
#----------------------------
# Harmony args
#-----------------------------
harmony:
# sigma value, used by Harmony
sigma: 0.1 #prot_sigma
harmony_npcs: 30
sigma: 0.1
# theta value used by Harmony, default is 1
theta: 1.0
# number of pcs, used by Harmony
npcs: 30
#----------------------------›
# find neighbour parameters
#-----------------------------
Expand Down Expand Up @@ -144,9 +153,13 @@ atac:
#----------------------------
# Harmony args
#-----------------------------
harmony:
# sigma value, used by Harmony
sigma: 0.1 #prot_sigma
harmony_npcs: 30
sigma: 0.1
# theta value used by Harmony, default is 1
theta: 1.0
# number of pcs, used by Harmony
npcs: 30
#----------------------------
# find neighbour parameters
#-----------------------------
Expand Down
6 changes: 3 additions & 3 deletions panpipes/pipeline_qc_mm.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,9 @@ def concat_bg_mudatas(infiles, outfile):
"""
if PARAMS['metadatacols'] is not None and PARAMS['metadatacols'] != "":
cmd += " --metadatacols %(metadatacols)s"
if PARAMS["barcode_mtd_include"] is True:
cmd += " --barcode_mtd_df %(barcode_mtd_path)s"
cmd += " --barcode_mtd_metadatacols %(barcode_mtd_metadatacols)s"
# if PARAMS["barcode_mtd_include"] is True:
# cmd += " --barcode_mtd_df %(barcode_mtd_path)s"
# cmd += " --barcode_mtd_metadatacols %(barcode_mtd_metadatacols)s"
cmd += " > logs/concat_bg_mudatas.log"
job_kwargs["job_threads"] = PARAMS['resources_threads_high']
P.run(cmd, **job_kwargs)
Expand Down
13 changes: 9 additions & 4 deletions python/batch_correct_harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,13 @@
help='')
parser.add_argument('--n_threads', default=1,
help="num threads to use for neighbor computations")
parser.add_argument('--harmony_npcs', default=30,
help="npcs for running harmony")
parser.add_argument('--sigma_val', default=0.1,
help="sigma")
parser.add_argument('--neighbors_n_pcs',
parser.add_argument('--theta_val', default=1.0,
help="theta")
parser.add_argument('--neighbors_n_pcs', default=30,
help="n_pcs")
parser.add_argument('--neighbors_method',
help="neighbours method, scanpy or hnsw")
Expand Down Expand Up @@ -73,18 +77,19 @@
adata.obs["comb_columns"] = adata.obs["comb_columns"].astype("category")
# run harmony

ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.neighbors_n_pcs)], adata.obs, ["comb_columns"],
sigma = float(args.sigma_val), verbose=True,max_iter_kmeans=30,
ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)], adata.obs, ["comb_columns"],
sigma = float(args.sigma_val),theta = float(args.theta_val),verbose=True,max_iter_kmeans=30,
max_iter_harmony=40)

else:
# make sure that batch is a categorical
adata.obs[args.integration_col] = adata.obs[args.integration_col].astype("category")
# run harmony
ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.neighbors_n_pcs)],
ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)],
adata.obs,
[args.integration_col],
sigma=float(args.sigma_val),
theta = float(args.theta_val),
verbose=True,max_iter_kmeans=30,
max_iter_harmony=40)

Expand Down
2 changes: 1 addition & 1 deletion python/make_adata_from_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@
# create new modality for hashing
mdata.mod["hashing_ab"]=mdata["prot"][:, mdata["prot"].var["hashing_ab"]]
# subset old modality to remove hashing
mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]]
mdata.mod['prot'] = mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]]
except FileNotFoundError:
warnings.warn("protein metadata table not found")
mdata.update()
Expand Down
140 changes: 140 additions & 0 deletions python/run_scanpyQC_rep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
'''
scanpy QC script GEX
order of QC:
- GEX
- ADT
- Repertoire
- ATAC
'''
import sys
import logging
L = logging.getLogger()
L.setLevel(logging.INFO)
log_handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s: %(levelname)s - %(message)s')
log_handler.setFormatter(formatter)
L.addHandler(log_handler)

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import argparse
import pandas as pd
import muon as mu
import scirpy as ir
import matplotlib.pyplot as plt
plt.ioff()
from panpipes.funcs.io import write_obs
import yaml

parser = argparse.ArgumentParser()
# required option
parser.add_argument("--sampleprefix",
default="",
help="prefix to prepend when saving the metadata file")
parser.add_argument("--input_mudata",
default="mdata_unfilt.h5mu",
help="")
parser.add_argument("--output_mudata",
default="mdata_unfilt.h5mu",
help="")
parser.add_argument("--figdir",
default="./figures/",
help="path to save the figures to")

parser.add_argument("--distance_metrics",
default=None, type=yaml.safe_load,
help="what metrics to calculate sequence distance metric? \
any arguments from scirpy.pp.ir_dist as a dict in format'{arg: value}' ")
parser.add_argument("--clonotype_metrics",
default=None,type=yaml.safe_load,
help="what metrics to calculate sequence distance metric? \
any arguments from scirpy.pp.define_clonotypes as a dict in format'{arg: value}' ")
args, opt = parser.parse_known_args()
L.info(args)
mdata = mu.read(args.input_mudata)
rep = mdata['rep']
# chain qc
ir.tl.chain_qc(rep)

# remove nones, so defaults are used
if args.distance_metrics is not None:
# remove nones, so defaults are used
dist_args = {k: v for k, v in args.distance_metrics.items() if v != "None"}
dist_args = {k: v for k, v in dist_args.items() if v}
else:
dist_args = {}

L.info("distance args:")
L.info(dist_args)

ir.pp.ir_dist(rep, **dist_args)

# pull function arguments from args.
if args.clonotype_metrics is not None:
# remove nones, so defaults are used
clonotype_args = {k: v for k, v in args.clonotype_metrics.items() if v != "None"}
clonotype_args = {k: v for k, v in clonotype_args.items() if v}
else:
clonotype_args={}
# define clonotypes
L.info("clonotypes args:")
L.info(clonotype_args)

ir.tl.define_clonotypes(rep, **clonotype_args)
ir.tl.clonal_expansion(rep)


category_cols = ['has_ir']
for cc in category_cols:
if pd.api.types.infer_dtype(rep.obs[cc]) != "categorical":
rep.obs[cc] = rep.obs[cc].astype('string').astype('category')


if "BCR" in rep.obs['receptor_type'].values:
bcr = rep[rep.obs.receptor_type =="BCR", :].copy()
else:
bcr=None

if "TCR" in rep.obs.receptor_type.values:
tcr = rep[rep.obs.receptor_type =="TCR", :].copy()
else:
tcr=None

# plot group abundances
fig, ax = plt.subplots()
ax = ir.pl.group_abundance(rep, groupby="receptor_type", ax=ax)
fig.savefig(args.figdir +"/barplot_group_abundance_receptor_type.png", bbox_inches="tight")

if bcr is not None:
fig, ax = plt.subplots()
ax = ir.pl.group_abundance(bcr, groupby="chain_pairing", ax=ax)
fig.tight_layout()
fig.savefig(args.figdir +"/barplot_group_abundance_bcr_receptor_subtype.png", bbox_inches="tight")

if tcr is not None:
fig, ax = plt.subplots()
ax = ir.pl.group_abundance(tcr, groupby="chain_pairing", ax=ax)
fig.tight_layout()
fig.savefig(args.figdir +"/barplot_group_abundance_tcr_receptor_subtype.png", bbox_inches="tight")

# clonal expansion

clone_counts = rep.obs.groupby("receptor_type").clone_id.value_counts().to_frame("cell_counts").reset_index()

if bcr is not None:
ax = ir.pl.clonal_expansion(bcr, groupby="sample_id")
ax.set_title("bcr clonal expansion")
plt.savefig(args.figdir +"/barplot_clonal_expansion_bcr.png", bbox_inches="tight" )

if tcr is not None:
ax = ir.pl.clonal_expansion(tcr, groupby="sample_id")
ax.set_title("tcr clonal expansion")
plt.savefig(args.figdir +"/barplot_clonal_expansion_tcr.png", bbox_inches="tight")


L.info("saving anndata and obs in a metadata tsv file")
mdata.update()
write_obs(mdata, output_prefix=args.sampleprefix,
output_suffix="_cell_metadata.tsv")
mdata.write(args.output_mudata)