From 682303d5f9c824df72ab2493037e8063027250fe Mon Sep 17 00:00:00 2001
From: deevdevil88 <devikathedevil@gmail.com>
Date: Mon, 20 Feb 2023 12:52:19 +0000
Subject: [PATCH 01/11] renamed /python/run_scanpyQC_REP.py & commented out
 barcode_mtd args for bg_concat pipeline_qc_mm.py

---
 panpipes/pipeline_qc_mm.py |   6 +-
 python/run_scanpyQC_rep.py | 140 +++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+), 3 deletions(-)
 create mode 100644 python/run_scanpyQC_rep.py

diff --git a/panpipes/pipeline_qc_mm.py b/panpipes/pipeline_qc_mm.py
index ae463972..83aa7c9d 100644
--- a/panpipes/pipeline_qc_mm.py
+++ b/panpipes/pipeline_qc_mm.py
@@ -285,9 +285,9 @@ def concat_bg_mudatas(infiles, outfile):
         """
     if PARAMS['metadatacols'] is not None and PARAMS['metadatacols'] != "":
         cmd += " --metadatacols  %(metadatacols)s"
-    if PARAMS["barcode_mtd_include"] is True:
-        cmd += " --barcode_mtd_df %(barcode_mtd_path)s"
-        cmd += " --barcode_mtd_metadatacols %(barcode_mtd_metadatacols)s"
+   # if PARAMS["barcode_mtd_include"] is True:
+   #     cmd += " --barcode_mtd_df %(barcode_mtd_path)s"
+    #    cmd += " --barcode_mtd_metadatacols %(barcode_mtd_metadatacols)s"
     cmd += " > logs/concat_bg_mudatas.log"
     job_kwargs["job_threads"] = PARAMS['resources_threads_high']
     P.run(cmd, **job_kwargs)
diff --git a/python/run_scanpyQC_rep.py b/python/run_scanpyQC_rep.py
new file mode 100644
index 00000000..873bc62d
--- /dev/null
+++ b/python/run_scanpyQC_rep.py
@@ -0,0 +1,140 @@
+'''
+scanpy QC script GEX
+order of QC:
+- GEX
+- ADT
+- Repertoire
+- ATAC
+'''
+import sys
+import logging
+L = logging.getLogger()
+L.setLevel(logging.INFO)
+log_handler = logging.StreamHandler(sys.stdout)
+formatter = logging.Formatter('%(asctime)s: %(levelname)s - %(message)s')
+log_handler.setFormatter(formatter)
+L.addHandler(log_handler)
+
+import warnings
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import argparse
+import pandas as pd
+import muon as mu
+import scirpy as ir
+import matplotlib.pyplot as plt
+plt.ioff()
+from panpipes.funcs.io import write_obs
+import yaml
+
+parser = argparse.ArgumentParser()
+# required option
+parser.add_argument("--sampleprefix",
+                    default="",
+                    help="prefix to prepend when saving the metadata file")
+parser.add_argument("--input_mudata",
+                    default="mdata_unfilt.h5mu",
+                    help="")
+parser.add_argument("--output_mudata",
+                    default="mdata_unfilt.h5mu",
+                    help="")
+parser.add_argument("--figdir",
+                    default="./figures/",
+                    help="path to save the figures to")
+
+parser.add_argument("--distance_metrics",
+                    default=None, type=yaml.safe_load,
+                    help="what metrics to calculate sequence distance metric? \
+                    any arguments from scirpy.pp.ir_dist as a dict in format'{arg: value}' ")
+parser.add_argument("--clonotype_metrics",
+                    default=None,type=yaml.safe_load,
+                    help="what metrics to calculate sequence distance metric? \
+                    any arguments from scirpy.pp.define_clonotypes as a dict in format'{arg: value}' ")
+args, opt = parser.parse_known_args()
+L.info(args)
+mdata = mu.read(args.input_mudata)
+rep = mdata['rep']
+# chain qc
+ir.tl.chain_qc(rep)
+
+# remove nones, so defaults are used
+if args.distance_metrics is not None:
+    # remove nones, so defaults are used
+    dist_args = {k: v for k, v in args.distance_metrics.items() if v != "None"}
+    dist_args = {k: v for k, v in dist_args.items() if v}
+else:
+    dist_args = {}
+
+L.info("distance args:")
+L.info(dist_args)
+
+ir.pp.ir_dist(rep, **dist_args)
+
+# pull function arguments from args. 
+if args.clonotype_metrics is not None:
+    # remove nones, so defaults are used
+    clonotype_args = {k: v for k, v in args.clonotype_metrics.items() if v != "None"}
+    clonotype_args = {k: v for k, v in clonotype_args.items() if v}
+else:
+    clonotype_args={}
+# define clonotypes
+L.info("clonotypes args:")
+L.info(clonotype_args)
+
+ir.tl.define_clonotypes(rep, **clonotype_args)
+ir.tl.clonal_expansion(rep)
+
+    
+category_cols = ['has_ir']
+for cc in category_cols:
+    if pd.api.types.infer_dtype(rep.obs[cc]) != "categorical":
+        rep.obs[cc] = rep.obs[cc].astype('string').astype('category')
+
+
+if "BCR" in rep.obs['receptor_type'].values:
+    bcr = rep[rep.obs.receptor_type =="BCR", :].copy()
+else:
+    bcr=None
+    
+if "TCR" in rep.obs.receptor_type.values:
+    tcr = rep[rep.obs.receptor_type =="TCR", :].copy()
+else:
+    tcr=None
+
+# plot group abundances
+fig, ax = plt.subplots()
+ax = ir.pl.group_abundance(rep, groupby="receptor_type", ax=ax)
+fig.savefig(args.figdir +"/barplot_group_abundance_receptor_type.png", bbox_inches="tight")
+
+if bcr is not None:
+    fig, ax = plt.subplots()
+    ax = ir.pl.group_abundance(bcr, groupby="chain_pairing", ax=ax)
+    fig.tight_layout()
+    fig.savefig(args.figdir +"/barplot_group_abundance_bcr_receptor_subtype.png", bbox_inches="tight")
+
+if tcr is not None:
+    fig, ax = plt.subplots()
+    ax = ir.pl.group_abundance(tcr, groupby="chain_pairing",  ax=ax)
+    fig.tight_layout()
+    fig.savefig(args.figdir +"/barplot_group_abundance_tcr_receptor_subtype.png", bbox_inches="tight")
+
+# clonal expansion
+
+clone_counts = rep.obs.groupby("receptor_type").clone_id.value_counts().to_frame("cell_counts").reset_index()
+
+if bcr is not None:
+    ax = ir.pl.clonal_expansion(bcr, groupby="sample_id")
+    ax.set_title("bcr clonal expansion")
+    plt.savefig(args.figdir +"/barplot_clonal_expansion_bcr.png", bbox_inches="tight" )
+
+if tcr is not None:
+    ax = ir.pl.clonal_expansion(tcr, groupby="sample_id")
+    ax.set_title("tcr clonal expansion")
+    plt.savefig(args.figdir +"/barplot_clonal_expansion_tcr.png", bbox_inches="tight")
+
+
+L.info("saving anndata and obs in a metadata tsv file")
+mdata.update()
+write_obs(mdata, output_prefix=args.sampleprefix, 
+    output_suffix="_cell_metadata.tsv")
+mdata.write(args.output_mudata)
\ No newline at end of file

From 0f9dea414faade39d2a4b63367e0a98d82865a02 Mon Sep 17 00:00:00 2001
From: deevdevil88 <devikathedevil@gmail.com>
Date: Mon, 20 Feb 2023 15:30:01 +0000
Subject: [PATCH 02/11] made minor changes to subset out hashing_ab as a
 seperated mod, and also update prot mod to not have HTOs

---
 python/make_adata_from_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/make_adata_from_csv.py b/python/make_adata_from_csv.py
index d75c8d13..3d963547 100644
--- a/python/make_adata_from_csv.py
+++ b/python/make_adata_from_csv.py
@@ -144,7 +144,7 @@
                 # create new modality for hashing
                 mdata.mod["hashing_ab"]=mdata["prot"][:, mdata["prot"].var["hashing_ab"]]
                 # subset old modality to remove hashing
-                mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]]
+                mdata.mod['prot'] = mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]]
         except FileNotFoundError:
             warnings.warn("protein metadata table not found")
     mdata.update()

From 040dda7c37a4df280f09d09babdf81ae34e971b9 Mon Sep 17 00:00:00 2001
From: deevdevil88 <devikathedevil@gmail.com>
Date: Tue, 21 Feb 2023 17:54:09 +0000
Subject: [PATCH 03/11] modifieid scvi model_args names in pipleine_yml to
 match those used by the scvi.model.SCVI function

---
 panpipes/pipeline_integration/pipeline.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml
index 95f4b338..c49f0702 100644
--- a/panpipes/pipeline_integration/pipeline.yml
+++ b/panpipes/pipeline_integration/pipeline.yml
@@ -64,8 +64,8 @@ rna:
     exclude_mt_genes: True
     mt_column: mt
     model_args:
-        nlayers: 
-        nlatent:
+        n_layers: 
+        n_latent:
         gene_likelihood: zinb
     training_args:
         max_epochs: 400

From e6a67288d577cc1cf1df5e18cac94f9b2c281983 Mon Sep 17 00:00:00 2001
From: deevdevil88 <devikathedevil@gmail.com>
Date: Thu, 23 Feb 2023 16:39:15 +0000
Subject: [PATCH 04/11] added params harmony sigma and npcs to ruffus and
 renamed param neighbours_n_pcs to neighbous_ndims

---
 panpipes/pipeline_integration.py           | 31 +++++++++++++++-------
 panpipes/pipeline_integration/pipeline.yml | 29 ++++++++++++--------
 python/batch_correct_harmony.py            | 16 ++++++-----
 3 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/panpipes/pipeline_integration.py b/panpipes/pipeline_integration.py
index 571c7ca7..ff0db70c 100644
--- a/panpipes/pipeline_integration.py
+++ b/panpipes/pipeline_integration.py
@@ -160,13 +160,18 @@ def run_harmony(outfile):
      --modality rna
      """
     # cannot use the normal method for importing params from yaml, because it only works up to depth 2
+    harmony_params = PARAMS['rna']['harmony']
+    if harmony_params['npcs'] is not None:
+        cmd += " --harmony_npcs %s" % harmony_params['npcs']
+    if harmony_params['sigma'] is not None:
+        cmd += " --sigma_val %s" % harmony_params['sigma']    
     neighbor_params = PARAMS['rna']['neighbors']
     if neighbor_params['method'] is not None:
         cmd += " --neighbors_method %s" % neighbor_params['method']
     if neighbor_params['metric'] is not None:
         cmd += " --neighbors_metric %s" % neighbor_params['metric']
-    if neighbor_params['npcs'] is not None:
-        cmd += " --neighbors_n_pcs %s"  % neighbor_params['npcs']
+    if neighbor_params['ndims'] is not None:
+        cmd += " --neighbors_ndims %s"  % neighbor_params['ndims']
     if neighbor_params['k'] is not None:
         cmd += " --neighbors_k %s" % neighbor_params['k']
     cmd += " > logs/rna_harmony.log " 
@@ -321,13 +326,18 @@ def run_harmony_prot( outfile):
      --n_threads %(resources_threads_high)s
      """
     cmd += " --modality prot"
+    harmony_params = PARAMS['prot']['harmony']
+    if harmony_params['npcs'] is not None:
+        cmd += " --harmony_npcs %s" % harmony_params['npcs']
+    if harmony_params['sigma'] is not None:
+        cmd += " --sigma_val %s" % harmony_params['sigma'] 
     neighbor_params = PARAMS['prot']['neighbors']
     if neighbor_params['method'] is not None:
         cmd += " --neighbors_method %s" % neighbor_params['method']
     if neighbor_params['metric'] is not None:
         cmd += " --neighbors_metric %s" % neighbor_params['metric']
-    if neighbor_params['npcs'] is not None:
-        cmd += " --neighbors_n_pcs %s"  % neighbor_params['npcs']
+    if neighbor_params['ndims'] is not None:
+        cmd += " --neighbors_ndims %s"  % neighbor_params['ndims']
     if neighbor_params['k'] is not None:
         cmd += " --neighbors_k %s" % neighbor_params['k']
     cmd += " > logs/prot_harmony.log " 
@@ -416,16 +426,19 @@ def run_harmony_atac( outfile):
      --n_threads %(resources_threads_high)s
      --modality atac
      """
-    
-    if PARAMS['atac_sigma'] is not None:
-        cmd += " --sigma_val %(atac_sigma)s"
+
+    harmony_params = PARAMS['atac']['harmony']
+    if harmony_params['npcs'] is not None:
+        cmd += " --harmony_npcs %s" % harmony_params['npcs']
+    if harmony_params['sigma'] is not None:
+        cmd += " --sigma_val %s" % harmony_params['sigma'] 
     neighbor_params = PARAMS['atac']['neighbors']
     if neighbor_params['method'] is not None:
         cmd += " --neighbors_method %s" % neighbor_params['method']
     if neighbor_params['metric'] is not None:
         cmd += " --neighbors_metric %s" % neighbor_params['metric']
-    if neighbor_params['npcs'] is not None:
-        cmd += " --neighbors_n_pcs %s"  % neighbor_params['npcs']
+    if neighbor_params['ndims'] is not None:
+        cmd += " --neighbors_ndims %s"  % neighbor_params['ndims']
     if neighbor_params['k'] is not None:
         cmd += " --neighbors_k %s" % neighbor_params['k']
     cmd += " > logs/atac_harmony.log " 
diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml
index c49f0702..85d16799 100644
--- a/panpipes/pipeline_integration/pipeline.yml
+++ b/panpipes/pipeline_integration/pipeline.yml
@@ -54,9 +54,11 @@ rna:
   #-----------------------------
   # Harmony args
   #-----------------------------
+  harmony:
   # sigma value, used by Harmony
-  sigma: 0.1 
-  harmony_npcs: 30
+    sigma: 0.1 
+  # number of pcs, used by Harmony
+    npcs: 30
   #-----------------------------
   # SCVI args
   #-----------------------------
@@ -64,8 +66,8 @@ rna:
     exclude_mt_genes: True
     mt_column: mt
     model_args:
-        n_layers: 
-        n_latent:
+        nlayers: 
+        nlatent:
         gene_likelihood: zinb
     training_args:
         max_epochs: 400
@@ -87,8 +89,9 @@ rna:
   # number of Principal Components to calculate for neighbours and umap:
   #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
   #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
+  # the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony
   # note: scvelo default is 30
-    npcs: 30
+    ndims: 30
     # number of neighbours
     k: 30
     # metric: euclidean | cosine
@@ -111,9 +114,11 @@ prot:
   #----------------------------
   # Harmony args
   #-----------------------------
+  harmony:
   # sigma value, used by Harmony
-  sigma: 0.1 #prot_sigma
-  harmony_npcs: 30
+    sigma: 0.1 
+  # number of pcs, used by Harmony
+    npcs: 30
   #----------------------------›
   # find neighbour parameters
   #-----------------------------
@@ -122,7 +127,7 @@ prot:
     #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
     #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
     # note: scvelo default is 30
-    npcs: 30
+    ndims: 30
     # number of neighbours
     k: 30
     # metric: euclidean | cosine
@@ -144,9 +149,11 @@ atac:
   #----------------------------
   # Harmony args
   #-----------------------------
+  harmony:
   # sigma value, used by Harmony
-  sigma: 0.1 #prot_sigma
-  harmony_npcs: 30
+    sigma: 0.1 
+  # number of pcs, used by Harmony
+    npcs: 30
   #----------------------------
   # find neighbour parameters
   #-----------------------------
@@ -155,7 +162,7 @@ atac:
     #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
     #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
     # note: scvelo default is 30
-    npcs: 30
+    ndims: 30
     # number of neighbours
     k: 30
     # metric: euclidean | cosine
diff --git a/python/batch_correct_harmony.py b/python/batch_correct_harmony.py
index 12e79fb1..d1900ca5 100644
--- a/python/batch_correct_harmony.py
+++ b/python/batch_correct_harmony.py
@@ -35,10 +35,12 @@
                     help='')
 parser.add_argument('--n_threads', default=1,
                     help="num threads to use for neighbor computations")
+parser.add_argument('--harmony_npcs', default=30,
+                    help="npcs for running harmony")        
 parser.add_argument('--sigma_val', default=0.1,
                     help="sigma")
-parser.add_argument('--neighbors_n_pcs',
-                    help="n_pcs")
+parser.add_argument('--neighbors_ndims', default=30,
+                    help="number of dims for neighbors calculation")
 parser.add_argument('--neighbors_method',
                     help="neighbours method, scanpy or hnsw")
 parser.add_argument('--neighbors_k',
@@ -73,7 +75,7 @@
     adata.obs["comb_columns"] = adata.obs["comb_columns"].astype("category")
     # run harmony
 
-    ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.neighbors_n_pcs)], adata.obs, ["comb_columns"], 
+    ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)], adata.obs, ["comb_columns"], 
                                        sigma = float(args.sigma_val), verbose=True,max_iter_kmeans=30, 
                                        max_iter_harmony=40)
 
@@ -81,7 +83,7 @@
     # make sure that batch is a categorical
     adata.obs[args.integration_col] = adata.obs[args.integration_col].astype("category")
     # run harmony
-    ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.neighbors_n_pcs)],
+    ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)],
                         adata.obs,
                         [args.integration_col],
                         sigma=float(args.sigma_val),
@@ -95,10 +97,10 @@
 adata.obsm['X_harmony']=adjusted_pcs.values
 L.info("harmony co-ords derived")
 
-if int(args.neighbors_n_pcs) >adata.obsm['X_harmony'].shape[1]:
-    L.warn(f"N PCs is larger than X_harmony dimensions, reducing n PCs to  {adata.obsm['X_harmony'].shape[1] -1}")
+if int(args.neighbors_ndims) >adata.obsm['X_harmony'].shape[1]:
+    L.warn(f"N Dims is larger than X_harmony dimensions, reducing n Dims to  {adata.obsm['X_harmony'].shape[1] -1}")
 
-n_pcs= min(int(args.neighbors_n_pcs), adata.obsm['X_harmony'].shape[1]-1)
+n_pcs= min(int(args.neighbors_ndims), adata.obsm['X_harmony'].shape[1]-1)
 
 # run neighbours and umap 
 run_neighbors_method_choice(adata, 

From a5afe43e2a320d6333b055abb554e5499ec59b78 Mon Sep 17 00:00:00 2001
From: deevdevil88 <devikathedevil@gmail.com>
Date: Thu, 23 Feb 2023 16:47:00 +0000
Subject: [PATCH 05/11] just adding back the n_latent and n_layerr change in
 pipeline.yml for integration

---
 panpipes/pipeline_integration/pipeline.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml
index 85d16799..7bb12ff0 100644
--- a/panpipes/pipeline_integration/pipeline.yml
+++ b/panpipes/pipeline_integration/pipeline.yml
@@ -66,8 +66,8 @@ rna:
     exclude_mt_genes: True
     mt_column: mt
     model_args:
-        nlayers: 
-        nlatent:
+        n_layers: 
+        n_latent:
         gene_likelihood: zinb
     training_args:
         max_epochs: 400

From 2766cd11a51efb5ebccf2771fc2351fe15c65cb9 Mon Sep 17 00:00:00 2001
From: deevdevil88 <devikathedevil@gmail.com>
Date: Thu, 23 Feb 2023 18:25:38 +0000
Subject: [PATCH 06/11] changed npcs to ndims in the batch_correct_wnn.py
 script to match changes in batch_correct_harmony.py script

---
 python/batch_correct_wnn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/batch_correct_wnn.py b/python/batch_correct_wnn.py
index ae2f373e..6ea235ff 100644
--- a/python/batch_correct_wnn.py
+++ b/python/batch_correct_wnn.py
@@ -121,7 +121,7 @@
         run_neighbors_method_choice(tmp.mod[kmod], 
                 method=pkmod['method'], 
                 n_neighbors=int(pkmod['k']), 
-                n_pcs=min(int(pkmod['npcs']), mdata.var.shape[0]-1), #this should be the # rows of var, not obs
+                n_pcs=min(int(pkmod['ndims']), mdata.var.shape[0]-1), #this should be the # rows of var, not obs
                 metric=pkmod['metric'], 
                 #does this throw an error if no PCA for any single mod is stored?
                 use_rep=repuse,

From a7883a2acd8e39890ac12b8342058d06d0d1717b Mon Sep 17 00:00:00 2001
From: deevdevil88 <devikathedevil@gmail.com>
Date: Thu, 23 Feb 2023 19:22:00 +0000
Subject: [PATCH 07/11] have reverted back the changes to neighbours_ndims to
 neighbours_n_pcs

---
 panpipes/pipeline_integration.py           | 12 ++++++------
 panpipes/pipeline_integration/pipeline.yml |  2 +-
 python/batch_correct_harmony.py            |  8 ++++----
 python/batch_correct_wnn.py                |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/panpipes/pipeline_integration.py b/panpipes/pipeline_integration.py
index ff0db70c..2ac64f6d 100644
--- a/panpipes/pipeline_integration.py
+++ b/panpipes/pipeline_integration.py
@@ -170,8 +170,8 @@ def run_harmony(outfile):
         cmd += " --neighbors_method %s" % neighbor_params['method']
     if neighbor_params['metric'] is not None:
         cmd += " --neighbors_metric %s" % neighbor_params['metric']
-    if neighbor_params['ndims'] is not None:
-        cmd += " --neighbors_ndims %s"  % neighbor_params['ndims']
+    if neighbor_params['npcs'] is not None:
+        cmd += " --neighbors_n_pcs %s"  % neighbor_params['npcs']
     if neighbor_params['k'] is not None:
         cmd += " --neighbors_k %s" % neighbor_params['k']
     cmd += " > logs/rna_harmony.log " 
@@ -336,8 +336,8 @@ def run_harmony_prot( outfile):
         cmd += " --neighbors_method %s" % neighbor_params['method']
     if neighbor_params['metric'] is not None:
         cmd += " --neighbors_metric %s" % neighbor_params['metric']
-    if neighbor_params['ndims'] is not None:
-        cmd += " --neighbors_ndims %s"  % neighbor_params['ndims']
+    if neighbor_params['npcs'] is not None:
+        cmd += " --neighbors_n_pcs %s"  % neighbor_params['npcs']
     if neighbor_params['k'] is not None:
         cmd += " --neighbors_k %s" % neighbor_params['k']
     cmd += " > logs/prot_harmony.log " 
@@ -437,8 +437,8 @@ def run_harmony_atac( outfile):
         cmd += " --neighbors_method %s" % neighbor_params['method']
     if neighbor_params['metric'] is not None:
         cmd += " --neighbors_metric %s" % neighbor_params['metric']
-    if neighbor_params['ndims'] is not None:
-        cmd += " --neighbors_ndims %s"  % neighbor_params['ndims']
+    if neighbor_params['npcs'] is not None:
+        cmd += " --neighbors_n_pcs %s"  % neighbor_params['npcs']
     if neighbor_params['k'] is not None:
         cmd += " --neighbors_k %s" % neighbor_params['k']
     cmd += " > logs/atac_harmony.log " 
diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml
index 7bb12ff0..6f94da90 100644
--- a/panpipes/pipeline_integration/pipeline.yml
+++ b/panpipes/pipeline_integration/pipeline.yml
@@ -91,7 +91,7 @@ rna:
   #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
   # the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony
   # note: scvelo default is 30
-    ndims: 30
+    npcs: 30
     # number of neighbours
     k: 30
     # metric: euclidean | cosine
diff --git a/python/batch_correct_harmony.py b/python/batch_correct_harmony.py
index d1900ca5..d5890ea7 100644
--- a/python/batch_correct_harmony.py
+++ b/python/batch_correct_harmony.py
@@ -39,8 +39,8 @@
                     help="npcs for running harmony")        
 parser.add_argument('--sigma_val', default=0.1,
                     help="sigma")
-parser.add_argument('--neighbors_ndims', default=30,
-                    help="number of dims for neighbors calculation")
+parser.add_argument('--neighbors_n_pcs', default=30,
+                    help="n_pcs")
 parser.add_argument('--neighbors_method',
                     help="neighbours method, scanpy or hnsw")
 parser.add_argument('--neighbors_k',
@@ -97,10 +97,10 @@
 adata.obsm['X_harmony']=adjusted_pcs.values
 L.info("harmony co-ords derived")
 
-if int(args.neighbors_ndims) >adata.obsm['X_harmony'].shape[1]:
+if int(args.neighbors_n_pcs) >adata.obsm['X_harmony'].shape[1]:
     L.warn(f"N Dims is larger than X_harmony dimensions, reducing n Dims to  {adata.obsm['X_harmony'].shape[1] -1}")
 
-n_pcs= min(int(args.neighbors_ndims), adata.obsm['X_harmony'].shape[1]-1)
+n_pcs= min(int(args.neighbors_n_pcs), adata.obsm['X_harmony'].shape[1]-1)
 
 # run neighbours and umap 
 run_neighbors_method_choice(adata, 
diff --git a/python/batch_correct_wnn.py b/python/batch_correct_wnn.py
index 6ea235ff..ae2f373e 100644
--- a/python/batch_correct_wnn.py
+++ b/python/batch_correct_wnn.py
@@ -121,7 +121,7 @@
         run_neighbors_method_choice(tmp.mod[kmod], 
                 method=pkmod['method'], 
                 n_neighbors=int(pkmod['k']), 
-                n_pcs=min(int(pkmod['ndims']), mdata.var.shape[0]-1), #this should be the # rows of var, not obs
+                n_pcs=min(int(pkmod['npcs']), mdata.var.shape[0]-1), #this should be the # rows of var, not obs
                 metric=pkmod['metric'], 
                 #does this throw an error if no PCA for any single mod is stored?
                 use_rep=repuse,

From c7596fb85dc6ec39733ff401c87053d21efd663a Mon Sep 17 00:00:00 2001
From: deevdevil88 <devikathedevil@gmail.com>
Date: Thu, 23 Feb 2023 19:27:26 +0000
Subject: [PATCH 08/11] reverted back to neighbors_npcs in pipeline.yml as
 before

---
 panpipes/pipeline_integration/pipeline.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml
index 6f94da90..4e008214 100644
--- a/panpipes/pipeline_integration/pipeline.yml
+++ b/panpipes/pipeline_integration/pipeline.yml
@@ -127,7 +127,7 @@ prot:
     #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
     #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
     # note: scvelo default is 30
-    ndims: 30
+    npcs: 30
     # number of neighbours
     k: 30
     # metric: euclidean | cosine
@@ -162,7 +162,7 @@ atac:
     #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
     #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
     # note: scvelo default is 30
-    ndims: 30
+    npcs: 30
     # number of neighbours
     k: 30
     # metric: euclidean | cosine

From c59b5eda730881858bec65a6ecdb034fb315ae15 Mon Sep 17 00:00:00 2001
From: deevdevil88 <devikathedevil@gmail.com>
Date: Thu, 23 Feb 2023 19:32:32 +0000
Subject: [PATCH 09/11]  reverting back all changes to neighbors_n_pcs

---
 python/batch_correct_harmony.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/batch_correct_harmony.py b/python/batch_correct_harmony.py
index d5890ea7..710cdb0d 100644
--- a/python/batch_correct_harmony.py
+++ b/python/batch_correct_harmony.py
@@ -98,7 +98,7 @@
 L.info("harmony co-ords derived")
 
 if int(args.neighbors_n_pcs) >adata.obsm['X_harmony'].shape[1]:
-    L.warn(f"N Dims is larger than X_harmony dimensions, reducing n Dims to  {adata.obsm['X_harmony'].shape[1] -1}")
+    L.warn(f"N PCs is larger than X_harmony dimensions, reducing n PCs to  {adata.obsm['X_harmony'].shape[1] -1}")
 
 n_pcs= min(int(args.neighbors_n_pcs), adata.obsm['X_harmony'].shape[1]-1)
 

From 84ea033aa0b7a8c3570b0228075761d58d992891 Mon Sep 17 00:00:00 2001
From: deevdevil88 <devikathedevil@gmail.com>
Date: Fri, 24 Feb 2023 11:00:03 +0000
Subject: [PATCH 10/11] parameterised harmony sigma for panpipes

---
 panpipes/pipeline_integration.py           | 8 +++++++-
 panpipes/pipeline_integration/pipeline.yml | 6 ++++++
 python/batch_correct_harmony.py            | 5 ++++-
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/panpipes/pipeline_integration.py b/panpipes/pipeline_integration.py
index 2ac64f6d..a989f821 100644
--- a/panpipes/pipeline_integration.py
+++ b/panpipes/pipeline_integration.py
@@ -164,7 +164,9 @@ def run_harmony(outfile):
     if harmony_params['npcs'] is not None:
         cmd += " --harmony_npcs %s" % harmony_params['npcs']
     if harmony_params['sigma'] is not None:
-        cmd += " --sigma_val %s" % harmony_params['sigma']    
+        cmd += " --sigma_val %s" % harmony_params['sigma'] 
+    if harmony_params['theta'] is not None:
+        cmd += " --theta_val %s" % harmony_params['theta']       
     neighbor_params = PARAMS['rna']['neighbors']
     if neighbor_params['method'] is not None:
         cmd += " --neighbors_method %s" % neighbor_params['method']
@@ -331,6 +333,8 @@ def run_harmony_prot( outfile):
         cmd += " --harmony_npcs %s" % harmony_params['npcs']
     if harmony_params['sigma'] is not None:
         cmd += " --sigma_val %s" % harmony_params['sigma'] 
+    if harmony_params['theta'] is not None:
+        cmd += " --theta_val %s" % harmony_params['theta']   
     neighbor_params = PARAMS['prot']['neighbors']
     if neighbor_params['method'] is not None:
         cmd += " --neighbors_method %s" % neighbor_params['method']
@@ -432,6 +436,8 @@ def run_harmony_atac( outfile):
         cmd += " --harmony_npcs %s" % harmony_params['npcs']
     if harmony_params['sigma'] is not None:
         cmd += " --sigma_val %s" % harmony_params['sigma'] 
+    if harmony_params['theta'] is not None:
+        cmd += " --theta_val %s" % harmony_params['theta']   
     neighbor_params = PARAMS['atac']['neighbors']
     if neighbor_params['method'] is not None:
         cmd += " --neighbors_method %s" % neighbor_params['method']
diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml
index 4e008214..5f9fb2cb 100644
--- a/panpipes/pipeline_integration/pipeline.yml
+++ b/panpipes/pipeline_integration/pipeline.yml
@@ -57,6 +57,8 @@ rna:
   harmony:
   # sigma value, used by Harmony
     sigma: 0.1 
+  # theta value used by Harmony, default is 1
+    theta: 1.0
   # number of pcs, used by Harmony
     npcs: 30
   #-----------------------------
@@ -117,6 +119,8 @@ prot:
   harmony:
   # sigma value, used by Harmony
     sigma: 0.1 
+  # theta value used by Harmony, default is 1
+    theta: 1.0
   # number of pcs, used by Harmony
     npcs: 30
   #----------------------------›
@@ -152,6 +156,8 @@ atac:
   harmony:
   # sigma value, used by Harmony
     sigma: 0.1 
+  # theta value used by Harmony, default is 1
+    theta: 1.0
   # number of pcs, used by Harmony
     npcs: 30
   #----------------------------
diff --git a/python/batch_correct_harmony.py b/python/batch_correct_harmony.py
index 710cdb0d..07dc23b4 100644
--- a/python/batch_correct_harmony.py
+++ b/python/batch_correct_harmony.py
@@ -39,6 +39,8 @@
                     help="npcs for running harmony")        
 parser.add_argument('--sigma_val', default=0.1,
                     help="sigma")
+ parser.add_argument('--theta_val', default=1.0,
+                    help="theta")                   
 parser.add_argument('--neighbors_n_pcs', default=30,
                     help="n_pcs")
 parser.add_argument('--neighbors_method',
@@ -76,7 +78,7 @@
     # run harmony
 
     ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)], adata.obs, ["comb_columns"], 
-                                       sigma = float(args.sigma_val), verbose=True,max_iter_kmeans=30, 
+                                       sigma = float(args.sigma_val),theta = float(args.theta_val),verbose=True,max_iter_kmeans=30, 
                                        max_iter_harmony=40)
 
 else:
@@ -87,6 +89,7 @@
                         adata.obs,
                         [args.integration_col],
                         sigma=float(args.sigma_val),
+                        theta = float(args.theta_val),
                         verbose=True,max_iter_kmeans=30,
                         max_iter_harmony=40)
 

From 220c774065df04288317b047bc8d28ff3da4f3bd Mon Sep 17 00:00:00 2001
From: deevdevil88 <devikathedevil@gmail.com>
Date: Fri, 24 Feb 2023 11:25:50 +0000
Subject: [PATCH 11/11] indentation error in batch_correct_harmony.py

---
 python/batch_correct_harmony.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/batch_correct_harmony.py b/python/batch_correct_harmony.py
index 07dc23b4..019b9ee8 100644
--- a/python/batch_correct_harmony.py
+++ b/python/batch_correct_harmony.py
@@ -39,7 +39,7 @@
                     help="npcs for running harmony")        
 parser.add_argument('--sigma_val', default=0.1,
                     help="sigma")
- parser.add_argument('--theta_val', default=1.0,
+parser.add_argument('--theta_val', default=1.0,
                     help="theta")                   
 parser.add_argument('--neighbors_n_pcs', default=30,
                     help="n_pcs")