diff --git a/panpipes/pipeline_integration.py b/panpipes/pipeline_integration.py
index 571c7ca7..a989f821 100644
--- a/panpipes/pipeline_integration.py
+++ b/panpipes/pipeline_integration.py
@@ -160,6 +160,13 @@ def run_harmony(outfile):
      --modality rna
      """
     # cannot use the normal method for importing params from yaml, because it only works up to depth 2
+    harmony_params = PARAMS['rna']['harmony']
+    if harmony_params['npcs'] is not None:
+        cmd += " --harmony_npcs %s" % harmony_params['npcs']
+    if harmony_params['sigma'] is not None:
+        cmd += " --sigma_val %s" % harmony_params['sigma'] 
+    if harmony_params['theta'] is not None:
+        cmd += " --theta_val %s" % harmony_params['theta']       
     neighbor_params = PARAMS['rna']['neighbors']
     if neighbor_params['method'] is not None:
         cmd += " --neighbors_method %s" % neighbor_params['method']
@@ -321,6 +328,13 @@ def run_harmony_prot( outfile):
      --n_threads %(resources_threads_high)s
      """
     cmd += " --modality prot"
+    harmony_params = PARAMS['prot']['harmony']
+    if harmony_params['npcs'] is not None:
+        cmd += " --harmony_npcs %s" % harmony_params['npcs']
+    if harmony_params['sigma'] is not None:
+        cmd += " --sigma_val %s" % harmony_params['sigma'] 
+    if harmony_params['theta'] is not None:
+        cmd += " --theta_val %s" % harmony_params['theta']   
     neighbor_params = PARAMS['prot']['neighbors']
     if neighbor_params['method'] is not None:
         cmd += " --neighbors_method %s" % neighbor_params['method']
@@ -416,9 +430,14 @@ def run_harmony_atac( outfile):
      --n_threads %(resources_threads_high)s
      --modality atac
      """
-    
-    if PARAMS['atac_sigma'] is not None:
-        cmd += " --sigma_val %(atac_sigma)s"
+
+    harmony_params = PARAMS['atac']['harmony']
+    if harmony_params['npcs'] is not None:
+        cmd += " --harmony_npcs %s" % harmony_params['npcs']
+    if harmony_params['sigma'] is not None:
+        cmd += " --sigma_val %s" % harmony_params['sigma'] 
+    if harmony_params['theta'] is not None:
+        cmd += " --theta_val %s" % harmony_params['theta']   
     neighbor_params = PARAMS['atac']['neighbors']
     if neighbor_params['method'] is not None:
         cmd += " --neighbors_method %s" % neighbor_params['method']
diff --git a/panpipes/pipeline_integration/pipeline.yml b/panpipes/pipeline_integration/pipeline.yml
index 95f4b338..5f9fb2cb 100644
--- a/panpipes/pipeline_integration/pipeline.yml
+++ b/panpipes/pipeline_integration/pipeline.yml
@@ -54,9 +54,13 @@ rna:
   #-----------------------------
   # Harmony args
   #-----------------------------
+  harmony:
   # sigma value, used by Harmony
-  sigma: 0.1 
-  harmony_npcs: 30
+    sigma: 0.1 
+  # theta value used by Harmony, default is 1
+    theta: 1.0
+  # number of pcs, used by Harmony
+    npcs: 30
   #-----------------------------
   # SCVI args
   #-----------------------------
@@ -64,8 +68,8 @@ rna:
     exclude_mt_genes: True
     mt_column: mt
     model_args:
-        nlayers: 
-        nlatent:
+        n_layers: 
+        n_latent:
         gene_likelihood: zinb
     training_args:
         max_epochs: 400
@@ -87,6 +91,7 @@ rna:
   # number of Principal Components to calculate for neighbours and umap:
   #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
   #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
+  # the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony
   # note: scvelo default is 30
     npcs: 30
     # number of neighbours
@@ -111,9 +116,13 @@ prot:
   #----------------------------
   # Harmony args
   #-----------------------------
+  harmony:
   # sigma value, used by Harmony
-  sigma: 0.1 #prot_sigma
-  harmony_npcs: 30
+    sigma: 0.1 
+  # theta value used by Harmony, default is 1
+    theta: 1.0
+  # number of pcs, used by Harmony
+    npcs: 30
   #----------------------------›
   # find neighbour parameters
   #-----------------------------
@@ -144,9 +153,13 @@ atac:
   #----------------------------
   # Harmony args
   #-----------------------------
+  harmony:
   # sigma value, used by Harmony
-  sigma: 0.1 #prot_sigma
-  harmony_npcs: 30
+    sigma: 0.1 
+  # theta value used by Harmony, default is 1
+    theta: 1.0
+  # number of pcs, used by Harmony
+    npcs: 30
   #----------------------------
   # find neighbour parameters
   #-----------------------------
diff --git a/panpipes/pipeline_qc_mm.py b/panpipes/pipeline_qc_mm.py
index ae463972..83aa7c9d 100644
--- a/panpipes/pipeline_qc_mm.py
+++ b/panpipes/pipeline_qc_mm.py
@@ -285,9 +285,9 @@ def concat_bg_mudatas(infiles, outfile):
         """
     if PARAMS['metadatacols'] is not None and PARAMS['metadatacols'] != "":
         cmd += " --metadatacols  %(metadatacols)s"
-    if PARAMS["barcode_mtd_include"] is True:
-        cmd += " --barcode_mtd_df %(barcode_mtd_path)s"
-        cmd += " --barcode_mtd_metadatacols %(barcode_mtd_metadatacols)s"
+   # if PARAMS["barcode_mtd_include"] is True:
+   #     cmd += " --barcode_mtd_df %(barcode_mtd_path)s"
+    #    cmd += " --barcode_mtd_metadatacols %(barcode_mtd_metadatacols)s"
     cmd += " > logs/concat_bg_mudatas.log"
     job_kwargs["job_threads"] = PARAMS['resources_threads_high']
     P.run(cmd, **job_kwargs)
diff --git a/python/batch_correct_harmony.py b/python/batch_correct_harmony.py
index 12e79fb1..019b9ee8 100644
--- a/python/batch_correct_harmony.py
+++ b/python/batch_correct_harmony.py
@@ -35,9 +35,13 @@
                     help='')
 parser.add_argument('--n_threads', default=1,
                     help="num threads to use for neighbor computations")
+parser.add_argument('--harmony_npcs', default=30,
+                    help="npcs for running harmony")        
 parser.add_argument('--sigma_val', default=0.1,
                     help="sigma")
-parser.add_argument('--neighbors_n_pcs',
+parser.add_argument('--theta_val', default=1.0,
+                    help="theta")                   
+parser.add_argument('--neighbors_n_pcs', default=30,
                     help="n_pcs")
 parser.add_argument('--neighbors_method',
                     help="neighbours method, scanpy or hnsw")
@@ -73,18 +77,19 @@
     adata.obs["comb_columns"] = adata.obs["comb_columns"].astype("category")
     # run harmony
 
-    ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.neighbors_n_pcs)], adata.obs, ["comb_columns"], 
-                                       sigma = float(args.sigma_val), verbose=True,max_iter_kmeans=30, 
+    ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)], adata.obs, ["comb_columns"], 
+                                       sigma = float(args.sigma_val),theta = float(args.theta_val),verbose=True,max_iter_kmeans=30, 
                                        max_iter_harmony=40)
 
 else:
     # make sure that batch is a categorical
     adata.obs[args.integration_col] = adata.obs[args.integration_col].astype("category")
     # run harmony
-    ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.neighbors_n_pcs)],
+    ho = hm.run_harmony(adata.obsm['X_pca'][:,0:int(args.harmony_npcs)],
                         adata.obs,
                         [args.integration_col],
                         sigma=float(args.sigma_val),
+                        theta = float(args.theta_val),
                         verbose=True,max_iter_kmeans=30,
                         max_iter_harmony=40)
 
diff --git a/python/make_adata_from_csv.py b/python/make_adata_from_csv.py
index d75c8d13..3d963547 100644
--- a/python/make_adata_from_csv.py
+++ b/python/make_adata_from_csv.py
@@ -144,7 +144,7 @@
                 # create new modality for hashing
                 mdata.mod["hashing_ab"]=mdata["prot"][:, mdata["prot"].var["hashing_ab"]]
                 # subset old modality to remove hashing
-                mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]]
+                mdata.mod['prot'] = mdata["prot"][:, ~mdata["prot"].var["hashing_ab"]]
         except FileNotFoundError:
             warnings.warn("protein metadata table not found")
     mdata.update()
diff --git a/python/run_scanpyQC_rep.py b/python/run_scanpyQC_rep.py
new file mode 100644
index 00000000..873bc62d
--- /dev/null
+++ b/python/run_scanpyQC_rep.py
@@ -0,0 +1,140 @@
+'''
+scanpy QC script GEX
+order of QC:
+- GEX
+- ADT
+- Repertoire
+- ATAC
+'''
+import sys
+import logging
+L = logging.getLogger()
+L.setLevel(logging.INFO)
+log_handler = logging.StreamHandler(sys.stdout)
+formatter = logging.Formatter('%(asctime)s: %(levelname)s - %(message)s')
+log_handler.setFormatter(formatter)
+L.addHandler(log_handler)
+
+import warnings
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import argparse
+import pandas as pd
+import muon as mu
+import scirpy as ir
+import matplotlib.pyplot as plt
+plt.ioff()
+from panpipes.funcs.io import write_obs
+import yaml
+
+parser = argparse.ArgumentParser()
+# required option
+parser.add_argument("--sampleprefix",
+                    default="",
+                    help="prefix to prepend when saving the metadata file")
+parser.add_argument("--input_mudata",
+                    default="mdata_unfilt.h5mu",
+                    help="")
+parser.add_argument("--output_mudata",
+                    default="mdata_unfilt.h5mu",
+                    help="")
+parser.add_argument("--figdir",
+                    default="./figures/",
+                    help="path to save the figures to")
+
+parser.add_argument("--distance_metrics",
+                    default=None, type=yaml.safe_load,
+                    help="what metrics to calculate sequence distance metric? \
+                    any arguments from scirpy.pp.ir_dist as a dict in format'{arg: value}' ")
+parser.add_argument("--clonotype_metrics",
+                    default=None,type=yaml.safe_load,
+                    help="what metrics to calculate sequence distance metric? \
+                    any arguments from scirpy.pp.define_clonotypes as a dict in format'{arg: value}' ")
+args, opt = parser.parse_known_args()
+L.info(args)
+mdata = mu.read(args.input_mudata)
+rep = mdata['rep']
+# chain qc
+ir.tl.chain_qc(rep)
+
+# remove nones, so defaults are used
+if args.distance_metrics is not None:
+    # remove nones, so defaults are used
+    dist_args = {k: v for k, v in args.distance_metrics.items() if v != "None"}
+    dist_args = {k: v for k, v in dist_args.items() if v}
+else:
+    dist_args = {}
+
+L.info("distance args:")
+L.info(dist_args)
+
+ir.pp.ir_dist(rep, **dist_args)
+
+# pull function arguments from args. 
+if args.clonotype_metrics is not None:
+    # remove nones, so defaults are used
+    clonotype_args = {k: v for k, v in args.clonotype_metrics.items() if v != "None"}
+    clonotype_args = {k: v for k, v in clonotype_args.items() if v}
+else:
+    clonotype_args={}
+# define clonotypes
+L.info("clonotypes args:")
+L.info(clonotype_args)
+
+ir.tl.define_clonotypes(rep, **clonotype_args)
+ir.tl.clonal_expansion(rep)
+
+    
+category_cols = ['has_ir']
+for cc in category_cols:
+    if pd.api.types.infer_dtype(rep.obs[cc]) != "categorical":
+        rep.obs[cc] = rep.obs[cc].astype('string').astype('category')
+
+
+if "BCR" in rep.obs['receptor_type'].values:
+    bcr = rep[rep.obs.receptor_type =="BCR", :].copy()
+else:
+    bcr=None
+    
+if "TCR" in rep.obs.receptor_type.values:
+    tcr = rep[rep.obs.receptor_type =="TCR", :].copy()
+else:
+    tcr=None
+
+# plot group abundances
+fig, ax = plt.subplots()
+ax = ir.pl.group_abundance(rep, groupby="receptor_type", ax=ax)
+fig.savefig(args.figdir +"/barplot_group_abundance_receptor_type.png", bbox_inches="tight")
+
+if bcr is not None:
+    fig, ax = plt.subplots()
+    ax = ir.pl.group_abundance(bcr, groupby="chain_pairing", ax=ax)
+    fig.tight_layout()
+    fig.savefig(args.figdir +"/barplot_group_abundance_bcr_receptor_subtype.png", bbox_inches="tight")
+
+if tcr is not None:
+    fig, ax = plt.subplots()
+    ax = ir.pl.group_abundance(tcr, groupby="chain_pairing",  ax=ax)
+    fig.tight_layout()
+    fig.savefig(args.figdir +"/barplot_group_abundance_tcr_receptor_subtype.png", bbox_inches="tight")
+
+# clonal expansion
+
+clone_counts = rep.obs.groupby("receptor_type").clone_id.value_counts().to_frame("cell_counts").reset_index()
+
+if bcr is not None:
+    ax = ir.pl.clonal_expansion(bcr, groupby="sample_id")
+    ax.set_title("bcr clonal expansion")
+    plt.savefig(args.figdir +"/barplot_clonal_expansion_bcr.png", bbox_inches="tight" )
+
+if tcr is not None:
+    ax = ir.pl.clonal_expansion(tcr, groupby="sample_id")
+    ax.set_title("tcr clonal expansion")
+    plt.savefig(args.figdir +"/barplot_clonal_expansion_tcr.png", bbox_inches="tight")
+
+
+L.info("saving anndata and obs in a metadata tsv file")
+mdata.update()
+write_obs(mdata, output_prefix=args.sampleprefix, 
+    output_suffix="_cell_metadata.tsv")
+mdata.write(args.output_mudata)
\ No newline at end of file