ArcInstitute · abearab · Feb 15, 2024 · Feb 15, 2024 · Feb 15, 2024 · Feb 15, 2024
diff --git a/screenpro/phenoScore.py b/screenpro/phenoScore.py
@@ -104,7 +104,8 @@ def matrixTest(x, y, x_ctrl, y_ctrl, math, ave_reps, test = 'ttest', growth_rate
 
 
 def runPhenoScore(adata, cond1, cond2, math, test, score_level,
-                  growth_rate=1, n_reps=2, ctrl_label='negCtrl'):
+                  growth_rate=1, n_reps=2, 
+                  get_z_score=False,ctrl_label='negCtrl'):
     """
     Calculate phenotype score and p-values when comparing `cond2` vs `cond1`.
     Args:
@@ -116,6 +117,7 @@ def runPhenoScore(adata, cond1, cond2, math, test, score_level,
         score_level (str): score level
         growth_rate (int): growth rate
         n_reps (int): number of replicates
+        get_z_score (bool): boolean to calculate z-score normalized phenotype score and add as a new column (default is False)
         ctrl_label (str): control label
     Returns:
         str: result name
@@ -154,17 +156,24 @@ def runPhenoScore(adata, cond1, cond2, math, test, score_level,
         adj_p_values = getFDR(p_values)
         # get targets
         targets = adata.var.index.str.split('_[-,+]_').str[0].to_list()
+
         # combine results into a dataframe
         result = pd.concat([
             pd.Series(targets, index=adata.var.index, name='target'),
             pd.Series(scores, index=adata.var.index, name='score'),
-            pd.Series(p_values, index=adata.var.index, name=f'{test} pvalue'),
-            pd.Series(adj_p_values, index=adata.var.index, name='BH adj_pvalue'),
         ], axis=1)
+        if get_z_score:
+            # z-score normalization
+            result['z_score'] = calculateZScorePhenotypeScore(result, ctrl_label=ctrl_label)
+        # add p-values
+        result[f'{test} pvalue'] = p_values
+        result['BH adj_pvalue'] = adj_p_values
 
         return result_name, result
+
     elif score_level in ['compare_oligos', 'compare_oligos_and_reps']:
         return None
+
     else:
         raise ValueError(f'score_level "{score_level}" not recognized')
 
@@ -203,20 +212,23 @@ def addPseudoCount():
     #         'Pseudocount behavior not recognized or not implemented')
 
 
-def calculateZScorePhenotypeScore(x, y, x_ctrl, y_ctrl, growth_rate, math, ave):
-    pass
-    # # calculate control median and std
-    # ctrl_std = np.std(calculateDelta(x=x_ctrl, y=y_ctrl, math=math, ave=ave))
-    # ctrl_median = np.median(calculateDelta(x=x_ctrl, y=y_ctrl, math=math, ave=ave))
-    #
-    # # calculate delta
-    # delta = calculateDelta(x=x, y=y, math=math, ave=ave)
-    #
-    # # calculate score
-    # return ((delta - ctrl_median) / growth_rate) / ctrl_std
+def calculateZScorePhenotypeScore(score_df,ctrl_label='negCtrl'):
+    """Calculate z-score normalized phenotype score.
+    Args:
+        score_df (pd.DataFrame): dataframe of scores that includes `score` and `targetType` columns
+        ctrl_label (str): control label, default is 'negCtrl'
+    Returns:
+        pd.Series: z-score normalized phenotype score
+    """
+    # calculate control median and std
+    ctrl_std = score_df[score_df.targetType.eq(ctrl_label)].score.std()
+    # z-score normalization
+    out = score_df.score / ctrl_std
 
+    return out
 
-def runPhenoScoreForReplicate(screen, x_label, y_label, score, growth_factor_table, ctrl_label='negCtrl'):
+
+def runPhenoScoreForReplicate(screen, x_label, y_label, score, growth_factor_table, get_z_score=False, ctrl_label='negCtrl'):
     """
     Calculate phenotype score for each pair of replicates.
     Args:
@@ -225,6 +237,7 @@ def runPhenoScoreForReplicate(screen, x_label, y_label, score, growth_factor_tab
         y_label: name of the second condition in column `condition` of `screen.adata.obs`
         score: score to use for calculating phenotype score, i.e. 'gamma', 'tau', or 'rho'
         growth_factor_table: dataframe of growth factors, i.e. output from `getGrowthFactors` function
+        get_z_score: boolean to calculate z-score normalized phenotype score instead of regular score (default is False)
         ctrl_label: string to identify labels of negative control oligos
 
     Returns:
@@ -244,12 +257,17 @@ def runPhenoScoreForReplicate(screen, x_label, y_label, score, growth_factor_tab
             x_ctrl=adat_ctrl[adat_ctrl.obs.query(f'condition == "{x_label}" & replicate == {str(replicate)}').index].X,
             y_ctrl=adat_ctrl[adat_ctrl.obs.query(f'condition == "{y_label}" & replicate == {str(replicate)}').index].X,
 
-            growth_rate=growth_factor_table.query(f'score=="{score}" & replicate=={replicate}')['growth_factor'].values[
-                0],
+            growth_rate=growth_factor_table.query(f'score=="{score}" & replicate=={replicate}')['growth_factor'].values[0],
             math=screen.math,
             ave='row'  # there is only one column so `row` option here is equivalent to the value before averaging.
         )
 
+        if get_z_score:
+            res = calculateZScorePhenotypeScore(
+                pd.DataFrame({'score':res,'targetType':adat.var.targetType},index=adat.var.index),
+                ctrl_label=ctrl_label
+            )
+
         results.update({f'replicate_{replicate}': res})
 
     out = pd.DataFrame(