From 682303d5f9c824df72ab2493037e8063027250fe Mon Sep 17 00:00:00 2001 From: deevdevil88 Date: Mon, 20 Feb 2023 12:52:19 +0000 Subject: [PATCH 01/11] renamed /python/run_scanpyQC_REP.py & commented out barcode_mtd args for bg_concat pipeline_qc_mm.py --- panpipes/pipeline_qc_mm.py | 6 +- python/run_scanpyQC_rep.py | 140 +++++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+), 3 deletions(-) create mode 100644 python/run_scanpyQC_rep.py diff --git a/panpipes/pipeline_qc_mm.py b/panpipes/pipeline_qc_mm.py index ae463972..83aa7c9d 100644 --- a/panpipes/pipeline_qc_mm.py +++ b/panpipes/pipeline_qc_mm.py @@ -285,9 +285,9 @@ def concat_bg_mudatas(infiles, outfile): """ if PARAMS['metadatacols'] is not None and PARAMS['metadatacols'] != "": cmd += " --metadatacols %(metadatacols)s" - if PARAMS["barcode_mtd_include"] is True: - cmd += " --barcode_mtd_df %(barcode_mtd_path)s" - cmd += " --barcode_mtd_metadatacols %(barcode_mtd_metadatacols)s" + # if PARAMS["barcode_mtd_include"] is True: + # cmd += " --barcode_mtd_df %(barcode_mtd_path)s" + # cmd += " --barcode_mtd_metadatacols %(barcode_mtd_metadatacols)s" cmd += " > logs/concat_bg_mudatas.log" job_kwargs["job_threads"] = PARAMS['resources_threads_high'] P.run(cmd, **job_kwargs) diff --git a/python/run_scanpyQC_rep.py b/python/run_scanpyQC_rep.py new file mode 100644 index 00000000..873bc62d --- /dev/null +++ b/python/run_scanpyQC_rep.py @@ -0,0 +1,140 @@ +''' +scanpy QC script GEX +order of QC: +- GEX +- ADT +- Repertoire +- ATAC +''' +import sys +import logging +L = logging.getLogger() +L.setLevel(logging.INFO) +log_handler = logging.StreamHandler(sys.stdout) +formatter = logging.Formatter('%(asctime)s: %(levelname)s - %(message)s') +log_handler.setFormatter(formatter) +L.addHandler(log_handler) + +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) + +import argparse +import pandas as pd +import muon as mu +import scirpy as ir +import matplotlib.pyplot as plt +plt.ioff() +from panpipes.funcs.io import write_obs +import yaml + +parser = argparse.ArgumentParser() +# required option +parser.add_argument("--sampleprefix", + default="", + help="prefix to prepend when saving the metadata file") +parser.add_argument("--input_mudata", + default="mdata_unfilt.h5mu", + help="") +parser.add_argument("--output_mudata", + default="mdata_unfilt.h5mu", + help="") +parser.add_argument("--figdir", + default="./figures/", + help="path to save the figures to") + +parser.add_argument("--distance_metrics", + default=None, type=yaml.safe_load, + help="what metrics to calculate sequence distance metric? \ + any arguments from scirpy.pp.ir_dist as a dict in format'{arg: value}' ") +parser.add_argument("--clonotype_metrics", + default=None,type=yaml.safe_load, + help="what metrics to calculate sequence distance metric? \ + any arguments from scirpy.pp.define_clonotypes as a dict in format'{arg: value}' ") +args, opt = parser.parse_known_args() +L.info(args) +mdata = mu.read(args.input_mudata) +rep = mdata['rep'] +# chain qc +ir.tl.chain_qc(rep) + +# remove nones, so defaults are used +if args.distance_metrics is not None: + # remove nones, so defaults are used + dist_args = {k: v for k, v in args.distance_metrics.items() if v != "None"} + dist_args = {k: v for k, v in dist_args.items() if v} +else: + dist_args = {} + +L.info("distance args:") +L.info(dist_args) + +ir.pp.ir_dist(rep, **dist_args) + +# pull function arguments from args. +if args.clonotype_metrics is not None: + # remove nones, so defaults are used + clonotype_args = {k: v for k, v in args.clonotype_metrics.items() if v != "None"} + clonotype_args = {k: v for k, v in clonotype_args.items() if v} +else: + clonotype_args={} +# define clonotypes +L.info("clonotypes args:") +L.info(clonotype_args) + +ir.tl.define_clonotypes(rep, **clonotype_args) +ir.tl.clonal_expansion(rep) + + +category_cols = ['has_ir'] +for cc in category_cols: + if pd.api.types.infer_dtype(rep.obs[cc]) != "categorical": + rep.obs[cc] = rep.obs[cc].astype('string').astype('category') + + +if "BCR" in rep.obs['receptor_type'].values: + bcr = rep[rep.obs.receptor_type =="BCR", :].copy() +else: + bcr=None + +if "TCR" in rep.obs.receptor_type.values: + tcr = rep[rep.obs.receptor_type =="TCR", :].copy() +else: + tcr=None + +# plot group abundances +fig, ax = plt.subplots() +ax = ir.pl.group_abundance(rep, groupby="receptor_type", ax=ax) +fig.savefig(args.figdir +"/barplot_group_abundance_receptor_type.png", bbox_inches="tight") + +if bcr is not None: + fig, ax = plt.subplots() + ax = ir.pl.group_abundance(bcr, groupby="chain_pairing", ax=ax) + fig.tight_layout() + fig.savefig(args.figdir +"/barplot_group_abundance_bcr_receptor_subtype.png", bbox_inches="tight") + +if tcr is not None: + fig, ax = plt.subplots() + ax = ir.pl.group_abundance(tcr, groupby="chain_pairing", ax=ax) + fig.tight_layout() + fig.savefig(args.figdir +"/barplot_group_abundance_tcr_receptor_subtype.png", bbox_inches="tight") + +# clonal expansion + +clone_counts = rep.obs.groupby("receptor_type").clone_id.value_counts().to_frame("cell_counts").reset_index() + +if bcr is not None: + ax = ir.pl.clonal_expansion(bcr, groupby="sample_id") + ax.set_title("bcr clonal expansion") + plt.savefig(args.figdir +"/barplot_clonal_expansion_bcr.png", bbox_inches="tight" ) + +if tcr is not None: + ax = ir.pl.clonal_expansion(tcr, groupby="sample_id") + ax.set_title("tcr clonal expansion") + plt.savefig(args.figdir +"/barplot_clonal_expansion_tcr.png", bbox_inches="tight") + + +L.info("saving anndata and obs in a metadata tsv file") +mdata.update() +write_obs(mdata, output_prefix=args.sampleprefix, + output_suffix="_cell_metadata.tsv") +mdata.write(args.output_mudata) \ No newline at end of file From 0f9dea414faade39d2a4b63367e0a98d82865a02 Mon Sep 17 00:00:00 2001 From: deevdevil88 Date: Mon, 20 Feb 2023 15:30:01 +0000 Subject: [PATCH 02/11] made minor changes to subset out hashing_ab as a seperated mod, and also update prot mod to not have HTOs --- python/make_adata_from_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/make_adata_from_csv.py b/python/make_adata_from_csv.py index d75c8d13..3d963547 100644 --- a/python/make_adata_from_csv.py +++ b/python/make_adata_from_csv.py @@ -144,7 +144,7 @@ # create new modality for hashing mdata.mod["hashing_ab"]=mdata["prot"][:, mdata["prot"].var["hashing_ab"]] # subset old modality to remove hashing - mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]] + mdata.mod['prot'] = mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]] except FileNotFoundError: warnings.warn("protein metadata table not found") mdata.update() From 040dda7c37a4df280f09d09babdf81ae34e971b9 Mon Sep 17 00:00:00 2001 From: deevdevil88 Date: Tue, 21 Feb 2023 17:54:09 +0000 Subject: [PATCH 03/11] modifieid scvi model_args names in pipleine_yml to match those used by the scvi.model.SCVI function --- panpipes/pipeline_integration/pipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml index 95f4b338..c49f0702 100644 --- a/panpipes/pipeline_integration/pipeline.yml +++ b/panpipes/pipeline_integration/pipeline.yml @@ -64,8 +64,8 @@ rna: exclude_mt_genes: True mt_column: mt model_args: - nlayers: - nlatent: + n_layers: + n_latent: gene_likelihood: zinb training_args: max_epochs: 400 From e6a67288d577cc1cf1df5e18cac94f9b2c281983 Mon Sep 17 00:00:00 2001 From: deevdevil88 Date: Thu, 23 Feb 2023 16:39:15 +0000 Subject: [PATCH 04/11] added params harmony sigma and npcs to ruffus and renamed param neighbours_n_pcs to neighbous_ndims --- panpipes/pipeline_integration.py | 31 +++++++++++++++------- panpipes/pipeline_integration/pipeline.yml | 29 ++++++++++++-------- python/batch_correct_harmony.py | 16 ++++++----- 3 files changed, 49 insertions(+), 27 deletions(-) diff --git a/panpipes/pipeline_integration.py b/panpipes/pipeline_integration.py index 571c7ca7..ff0db70c 100644 --- a/panpipes/pipeline_integration.py +++ b/panpipes/pipeline_integration.py @@ -160,13 +160,18 @@ def run_harmony(outfile): --modality rna """ # cannot use the normal method for importing params from yaml, because it only works up to depth 2 + harmony_params = PARAMS['rna']['harmony'] + if harmony_params['npcs'] is not None: + cmd += " --harmony_npcs %s" % harmony_params['npcs'] + if harmony_params['sigma'] is not None: + cmd += " --sigma_val %s" % harmony_params['sigma'] neighbor_params = PARAMS['rna']['neighbors'] if neighbor_params['method'] is not None: cmd += " --neighbors_method %s" % neighbor_params['method'] if neighbor_params['metric'] is not None: cmd += " --neighbors_metric %s" % neighbor_params['metric'] - if neighbor_params['npcs'] is not None: - cmd += " --neighbors_n_pcs %s" % neighbor_params['npcs'] + if neighbor_params['ndims'] is not None: + cmd += " --neighbors_ndims %s" % neighbor_params['ndims'] if neighbor_params['k'] is not None: cmd += " --neighbors_k %s" % neighbor_params['k'] cmd += " > logs/rna_harmony.log " @@ -321,13 +326,18 @@ def run_harmony_prot( outfile): --n_threads %(resources_threads_high)s """ cmd += " --modality prot" + harmony_params = PARAMS['prot']['harmony'] + if harmony_params['npcs'] is not None: + cmd += " --harmony_npcs %s" % harmony_params['npcs'] + if harmony_params['sigma'] is not None: + cmd += " --sigma_val %s" % harmony_params['sigma'] neighbor_params = PARAMS['prot']['neighbors'] if neighbor_params['method'] is not None: cmd += " --neighbors_method %s" % neighbor_params['method'] if neighbor_params['metric'] is not None: cmd += " --neighbors_metric %s" % neighbor_params['metric'] - if neighbor_params['npcs'] is not None: - cmd += " --neighbors_n_pcs %s" % neighbor_params['npcs'] + if neighbor_params['ndims'] is not None: + cmd += " --neighbors_ndims %s" % neighbor_params['ndims'] if neighbor_params['k'] is not None: cmd += " --neighbors_k %s" % neighbor_params['k'] cmd += " > logs/prot_harmony.log " @@ -416,16 +426,19 @@ def run_harmony_atac( outfile): --n_threads %(resources_threads_high)s --modality atac """ - - if PARAMS['atac_sigma'] is not None: - cmd += " --sigma_val %(atac_sigma)s" + + harmony_params = PARAMS['atac']['harmony'] + if harmony_params['npcs'] is not None: + cmd += " --harmony_npcs %s" % harmony_params['npcs'] + if harmony_params['sigma'] is not None: + cmd += " --sigma_val %s" % harmony_params['sigma'] neighbor_params = PARAMS['atac']['neighbors'] if neighbor_params['method'] is not None: cmd += " --neighbors_method %s" % neighbor_params['method'] if neighbor_params['metric'] is not None: cmd += " --neighbors_metric %s" % neighbor_params['metric'] - if neighbor_params['npcs'] is not None: - cmd += " --neighbors_n_pcs %s" % neighbor_params['npcs'] + if neighbor_params['ndims'] is not None: + cmd += " --neighbors_ndims %s" % neighbor_params['ndims'] if neighbor_params['k'] is not None: cmd += " --neighbors_k %s" % neighbor_params['k'] cmd += " > logs/atac_harmony.log " diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml index c49f0702..85d16799 100644 --- a/panpipes/pipeline_integration/pipeline.yml +++ b/panpipes/pipeline_integration/pipeline.yml @@ -54,9 +54,11 @@ rna: #----------------------------- # Harmony args #----------------------------- + harmony: # sigma value, used by Harmony - sigma: 0.1 - harmony_npcs: 30 + sigma: 0.1 + # number of pcs, used by Harmony + npcs: 30 #----------------------------- # SCVI args #----------------------------- @@ -64,8 +66,8 @@ rna: exclude_mt_genes: True mt_column: mt model_args: - n_layers: - n_latent: + nlayers: + nlatent: gene_likelihood: zinb training_args: max_epochs: 400 @@ -87,8 +89,9 @@ rna: # number of Principal Components to calculate for neighbours and umap: # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony # note: scvelo default is 30 - npcs: 30 + ndims: 30 # number of neighbours k: 30 # metric: euclidean | cosine @@ -111,9 +114,11 @@ prot: #---------------------------- # Harmony args #----------------------------- + harmony: # sigma value, used by Harmony - sigma: 0.1 #prot_sigma - harmony_npcs: 30 + sigma: 0.1 + # number of pcs, used by Harmony + npcs: 30 #----------------------------› # find neighbour parameters #----------------------------- @@ -122,7 +127,7 @@ prot: # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) # note: scvelo default is 30 - npcs: 30 + ndims: 30 # number of neighbours k: 30 # metric: euclidean | cosine @@ -144,9 +149,11 @@ atac: #---------------------------- # Harmony args #----------------------------- + harmony: # sigma value, used by Harmony - sigma: 0.1 #prot_sigma - harmony_npcs: 30 + sigma: 0.1 + # number of pcs, used by Harmony + npcs: 30 #---------------------------- # find neighbour parameters #----------------------------- @@ -155,7 +162,7 @@ atac: # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) # note: scvelo default is 30 - npcs: 30 + ndims: 30 # number of neighbours k: 30 # metric: euclidean | cosine diff --git a/python/batch_correct_harmony.py b/python/batch_correct_harmony.py index 12e79fb1..d1900ca5 100644 --- a/python/batch_correct_harmony.py +++ b/python/batch_correct_harmony.py @@ -35,10 +35,12 @@ help='') parser.add_argument('--n_threads', default=1, help="num threads to use for neighbor computations") +parser.add_argument('--harmony_npcs', default=30, + help="npcs for running harmony") parser.add_argument('--sigma_val', default=0.1, help="sigma") -parser.add_argument('--neighbors_n_pcs', - help="n_pcs") +parser.add_argument('--neighbors_ndims', default=30, + help="number of dims for neighbors calculation") parser.add_argument('--neighbors_method', help="neighbours method, scanpy or hnsw") parser.add_argument('--neighbors_k', @@ -73,7 +75,7 @@ adata.obs["comb_columns"] = adata.obs["comb_columns"].astype("category") # run harmony - ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.neighbors_n_pcs)], adata.obs, ["comb_columns"], + ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)], adata.obs, ["comb_columns"], sigma = float(args.sigma_val), verbose=True,max_iter_kmeans=30, max_iter_harmony=40) @@ -81,7 +83,7 @@ # make sure that batch is a categorical adata.obs[args.integration_col] = adata.obs[args.integration_col].astype("category") # run harmony - ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.neighbors_n_pcs)], + ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)], adata.obs, [args.integration_col], sigma=float(args.sigma_val), @@ -95,10 +97,10 @@ adata.obsm['X_harmony']=adjusted_pcs.values L.info("harmony co-ords derived") -if int(args.neighbors_n_pcs) >adata.obsm['X_harmony'].shape[1]: - L.warn(f"N PCs is larger than X_harmony dimensions, reducing n PCs to {adata.obsm['X_harmony'].shape[1] -1}") +if int(args.neighbors_ndims) >adata.obsm['X_harmony'].shape[1]: + L.warn(f"N Dims is larger than X_harmony dimensions, reducing n Dims to {adata.obsm['X_harmony'].shape[1] -1}") -n_pcs= min(int(args.neighbors_n_pcs), adata.obsm['X_harmony'].shape[1]-1) +n_pcs= min(int(args.neighbors_ndims), adata.obsm['X_harmony'].shape[1]-1) # run neighbours and umap run_neighbors_method_choice(adata, From a5afe43e2a320d6333b055abb554e5499ec59b78 Mon Sep 17 00:00:00 2001 From: deevdevil88 Date: Thu, 23 Feb 2023 16:47:00 +0000 Subject: [PATCH 05/11] just adding back the n_latent and n_layerr change in pipeline.yml for integration --- panpipes/pipeline_integration/pipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml index 85d16799..7bb12ff0 100644 --- a/panpipes/pipeline_integration/pipeline.yml +++ b/panpipes/pipeline_integration/pipeline.yml @@ -66,8 +66,8 @@ rna: exclude_mt_genes: True mt_column: mt model_args: - nlayers: - nlatent: + n_layers: + n_latent: gene_likelihood: zinb training_args: max_epochs: 400 From 2766cd11a51efb5ebccf2771fc2351fe15c65cb9 Mon Sep 17 00:00:00 2001 From: deevdevil88 Date: Thu, 23 Feb 2023 18:25:38 +0000 Subject: [PATCH 06/11] changed npcs to ndims in the batch_correct_wnn.py script to match changes in batch_correct_harmony.py script --- python/batch_correct_wnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/batch_correct_wnn.py b/python/batch_correct_wnn.py index ae2f373e..6ea235ff 100644 --- a/python/batch_correct_wnn.py +++ b/python/batch_correct_wnn.py @@ -121,7 +121,7 @@ run_neighbors_method_choice(tmp.mod[kmod], method=pkmod['method'], n_neighbors=int(pkmod['k']), - n_pcs=min(int(pkmod['npcs']), mdata.var.shape[0]-1), #this should be the # rows of var, not obs + n_pcs=min(int(pkmod['ndims']), mdata.var.shape[0]-1), #this should be the # rows of var, not obs metric=pkmod['metric'], #does this throw an error if no PCA for any single mod is stored? use_rep=repuse, From a7883a2acd8e39890ac12b8342058d06d0d1717b Mon Sep 17 00:00:00 2001 From: deevdevil88 Date: Thu, 23 Feb 2023 19:22:00 +0000 Subject: [PATCH 07/11] have reverted back the changes to neighbours_ndims to neighbours_n_pcs --- panpipes/pipeline_integration.py | 12 ++++++------ panpipes/pipeline_integration/pipeline.yml | 2 +- python/batch_correct_harmony.py | 8 ++++---- python/batch_correct_wnn.py | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/panpipes/pipeline_integration.py b/panpipes/pipeline_integration.py index ff0db70c..2ac64f6d 100644 --- a/panpipes/pipeline_integration.py +++ b/panpipes/pipeline_integration.py @@ -170,8 +170,8 @@ def run_harmony(outfile): cmd += " --neighbors_method %s" % neighbor_params['method'] if neighbor_params['metric'] is not None: cmd += " --neighbors_metric %s" % neighbor_params['metric'] - if neighbor_params['ndims'] is not None: - cmd += " --neighbors_ndims %s" % neighbor_params['ndims'] + if neighbor_params['npcs'] is not None: + cmd += " --neighbors_n_pcs %s" % neighbor_params['npcs'] if neighbor_params['k'] is not None: cmd += " --neighbors_k %s" % neighbor_params['k'] cmd += " > logs/rna_harmony.log " @@ -336,8 +336,8 @@ def run_harmony_prot( outfile): cmd += " --neighbors_method %s" % neighbor_params['method'] if neighbor_params['metric'] is not None: cmd += " --neighbors_metric %s" % neighbor_params['metric'] - if neighbor_params['ndims'] is not None: - cmd += " --neighbors_ndims %s" % neighbor_params['ndims'] + if neighbor_params['npcs'] is not None: + cmd += " --neighbors_n_pcs %s" % neighbor_params['npcs'] if neighbor_params['k'] is not None: cmd += " --neighbors_k %s" % neighbor_params['k'] cmd += " > logs/prot_harmony.log " @@ -437,8 +437,8 @@ def run_harmony_atac( outfile): cmd += " --neighbors_method %s" % neighbor_params['method'] if neighbor_params['metric'] is not None: cmd += " --neighbors_metric %s" % neighbor_params['metric'] - if neighbor_params['ndims'] is not None: - cmd += " --neighbors_ndims %s" % neighbor_params['ndims'] + if neighbor_params['npcs'] is not None: + cmd += " --neighbors_n_pcs %s" % neighbor_params['npcs'] if neighbor_params['k'] is not None: cmd += " --neighbors_k %s" % neighbor_params['k'] cmd += " > logs/atac_harmony.log " diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml index 7bb12ff0..6f94da90 100644 --- a/panpipes/pipeline_integration/pipeline.yml +++ b/panpipes/pipeline_integration/pipeline.yml @@ -91,7 +91,7 @@ rna: # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) # the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony # note: scvelo default is 30 - ndims: 30 + npcs: 30 # number of neighbours k: 30 # metric: euclidean | cosine diff --git a/python/batch_correct_harmony.py b/python/batch_correct_harmony.py index d1900ca5..d5890ea7 100644 --- a/python/batch_correct_harmony.py +++ b/python/batch_correct_harmony.py @@ -39,8 +39,8 @@ help="npcs for running harmony") parser.add_argument('--sigma_val', default=0.1, help="sigma") -parser.add_argument('--neighbors_ndims', default=30, - help="number of dims for neighbors calculation") +parser.add_argument('--neighbors_n_pcs', default=30, + help="n_pcs") parser.add_argument('--neighbors_method', help="neighbours method, scanpy or hnsw") parser.add_argument('--neighbors_k', @@ -97,10 +97,10 @@ adata.obsm['X_harmony']=adjusted_pcs.values L.info("harmony co-ords derived") -if int(args.neighbors_ndims) >adata.obsm['X_harmony'].shape[1]: +if int(args.neighbors_n_pcs) >adata.obsm['X_harmony'].shape[1]: L.warn(f"N Dims is larger than X_harmony dimensions, reducing n Dims to {adata.obsm['X_harmony'].shape[1] -1}") -n_pcs= min(int(args.neighbors_ndims), adata.obsm['X_harmony'].shape[1]-1) +n_pcs= min(int(args.neighbors_n_pcs), adata.obsm['X_harmony'].shape[1]-1) # run neighbours and umap run_neighbors_method_choice(adata, diff --git a/python/batch_correct_wnn.py b/python/batch_correct_wnn.py index 6ea235ff..ae2f373e 100644 --- a/python/batch_correct_wnn.py +++ b/python/batch_correct_wnn.py @@ -121,7 +121,7 @@ run_neighbors_method_choice(tmp.mod[kmod], method=pkmod['method'], n_neighbors=int(pkmod['k']), - n_pcs=min(int(pkmod['ndims']), mdata.var.shape[0]-1), #this should be the # rows of var, not obs + n_pcs=min(int(pkmod['npcs']), mdata.var.shape[0]-1), #this should be the # rows of var, not obs metric=pkmod['metric'], #does this throw an error if no PCA for any single mod is stored? use_rep=repuse, From c7596fb85dc6ec39733ff401c87053d21efd663a Mon Sep 17 00:00:00 2001 From: deevdevil88 Date: Thu, 23 Feb 2023 19:27:26 +0000 Subject: [PATCH 08/11] reverted back to neighbors_npcs in pipeline.yml as before --- panpipes/pipeline_integration/pipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml index 6f94da90..4e008214 100644 --- a/panpipes/pipeline_integration/pipeline.yml +++ b/panpipes/pipeline_integration/pipeline.yml @@ -127,7 +127,7 @@ prot: # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) # note: scvelo default is 30 - ndims: 30 + npcs: 30 # number of neighbours k: 30 # metric: euclidean | cosine @@ -162,7 +162,7 @@ atac: # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) # note: scvelo default is 30 - ndims: 30 + npcs: 30 # number of neighbours k: 30 # metric: euclidean | cosine From c59b5eda730881858bec65a6ecdb034fb315ae15 Mon Sep 17 00:00:00 2001 From: deevdevil88 Date: Thu, 23 Feb 2023 19:32:32 +0000 Subject: [PATCH 09/11] reverting back all changes to neighbors_n_pcs --- python/batch_correct_harmony.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/batch_correct_harmony.py b/python/batch_correct_harmony.py index d5890ea7..710cdb0d 100644 --- a/python/batch_correct_harmony.py +++ b/python/batch_correct_harmony.py @@ -98,7 +98,7 @@ L.info("harmony co-ords derived") if int(args.neighbors_n_pcs) >adata.obsm['X_harmony'].shape[1]: - L.warn(f"N Dims is larger than X_harmony dimensions, reducing n Dims to {adata.obsm['X_harmony'].shape[1] -1}") + L.warn(f"N PCs is larger than X_harmony dimensions, reducing n PCs to {adata.obsm['X_harmony'].shape[1] -1}") n_pcs= min(int(args.neighbors_n_pcs), adata.obsm['X_harmony'].shape[1]-1) From 84ea033aa0b7a8c3570b0228075761d58d992891 Mon Sep 17 00:00:00 2001 From: deevdevil88 Date: Fri, 24 Feb 2023 11:00:03 +0000 Subject: [PATCH 10/11] parameterised harmony sigma for panpipes --- panpipes/pipeline_integration.py | 8 +++++++- panpipes/pipeline_integration/pipeline.yml | 6 ++++++ python/batch_correct_harmony.py | 5 ++++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/panpipes/pipeline_integration.py b/panpipes/pipeline_integration.py index 2ac64f6d..a989f821 100644 --- a/panpipes/pipeline_integration.py +++ b/panpipes/pipeline_integration.py @@ -164,7 +164,9 @@ def run_harmony(outfile): if harmony_params['npcs'] is not None: cmd += " --harmony_npcs %s" % harmony_params['npcs'] if harmony_params['sigma'] is not None: - cmd += " --sigma_val %s" % harmony_params['sigma'] + cmd += " --sigma_val %s" % harmony_params['sigma'] + if harmony_params['theta'] is not None: + cmd += " --theta_val %s" % harmony_params['theta'] neighbor_params = PARAMS['rna']['neighbors'] if neighbor_params['method'] is not None: cmd += " --neighbors_method %s" % neighbor_params['method'] @@ -331,6 +333,8 @@ def run_harmony_prot( outfile): cmd += " --harmony_npcs %s" % harmony_params['npcs'] if harmony_params['sigma'] is not None: cmd += " --sigma_val %s" % harmony_params['sigma'] + if harmony_params['theta'] is not None: + cmd += " --theta_val %s" % harmony_params['theta'] neighbor_params = PARAMS['prot']['neighbors'] if neighbor_params['method'] is not None: cmd += " --neighbors_method %s" % neighbor_params['method'] @@ -432,6 +436,8 @@ def run_harmony_atac( outfile): cmd += " --harmony_npcs %s" % harmony_params['npcs'] if harmony_params['sigma'] is not None: cmd += " --sigma_val %s" % harmony_params['sigma'] + if harmony_params['theta'] is not None: + cmd += " --theta_val %s" % harmony_params['theta'] neighbor_params = PARAMS['atac']['neighbors'] if neighbor_params['method'] is not None: cmd += " --neighbors_method %s" % neighbor_params['method'] diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml index 4e008214..5f9fb2cb 100644 --- a/panpipes/pipeline_integration/pipeline.yml +++ b/panpipes/pipeline_integration/pipeline.yml @@ -57,6 +57,8 @@ rna: harmony: # sigma value, used by Harmony sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 # number of pcs, used by Harmony npcs: 30 #----------------------------- @@ -117,6 +119,8 @@ prot: harmony: # sigma value, used by Harmony sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 # number of pcs, used by Harmony npcs: 30 #----------------------------› @@ -152,6 +156,8 @@ atac: harmony: # sigma value, used by Harmony sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 # number of pcs, used by Harmony npcs: 30 #---------------------------- diff --git a/python/batch_correct_harmony.py b/python/batch_correct_harmony.py index 710cdb0d..07dc23b4 100644 --- a/python/batch_correct_harmony.py +++ b/python/batch_correct_harmony.py @@ -39,6 +39,8 @@ help="npcs for running harmony") parser.add_argument('--sigma_val', default=0.1, help="sigma") + parser.add_argument('--theta_val', default=1.0, + help="theta") parser.add_argument('--neighbors_n_pcs', default=30, help="n_pcs") parser.add_argument('--neighbors_method', @@ -76,7 +78,7 @@ # run harmony ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)], adata.obs, ["comb_columns"], - sigma = float(args.sigma_val), verbose=True,max_iter_kmeans=30, + sigma = float(args.sigma_val),theta = float(args.theta_val),verbose=True,max_iter_kmeans=30, max_iter_harmony=40) else: @@ -87,6 +89,7 @@ adata.obs, [args.integration_col], sigma=float(args.sigma_val), + theta = float(args.theta_val), verbose=True,max_iter_kmeans=30, max_iter_harmony=40) From 220c774065df04288317b047bc8d28ff3da4f3bd Mon Sep 17 00:00:00 2001 From: deevdevil88 Date: Fri, 24 Feb 2023 11:25:50 +0000 Subject: [PATCH 11/11] indentation error in batch_correct_harmony.py --- python/batch_correct_harmony.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/batch_correct_harmony.py b/python/batch_correct_harmony.py index 07dc23b4..019b9ee8 100644 --- a/python/batch_correct_harmony.py +++ b/python/batch_correct_harmony.py @@ -39,7 +39,7 @@ help="npcs for running harmony") parser.add_argument('--sigma_val', default=0.1, help="sigma") - parser.add_argument('--theta_val', default=1.0, +parser.add_argument('--theta_val', default=1.0, help="theta") parser.add_argument('--neighbors_n_pcs', default=30, help="n_pcs")