Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add z-score normalization option #33

Merged
merged 3 commits into from
Feb 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 35 additions & 17 deletions screenpro/phenoScore.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def matrixTest(x, y, x_ctrl, y_ctrl, math, ave_reps, test = 'ttest', growth_rate


def runPhenoScore(adata, cond1, cond2, math, test, score_level,
growth_rate=1, n_reps=2, ctrl_label='negCtrl'):
growth_rate=1, n_reps=2,
get_z_score=False,ctrl_label='negCtrl'):
"""
Calculate phenotype score and p-values when comparing `cond2` vs `cond1`.
Args:
Expand All @@ -116,6 +117,7 @@ def runPhenoScore(adata, cond1, cond2, math, test, score_level,
score_level (str): score level
growth_rate (int): growth rate
n_reps (int): number of replicates
get_z_score (bool): boolean to calculate z-score normalized phenotype score and add as a new column (default is False)
ctrl_label (str): control label
Returns:
str: result name
Expand Down Expand Up @@ -154,17 +156,24 @@ def runPhenoScore(adata, cond1, cond2, math, test, score_level,
adj_p_values = getFDR(p_values)
# get targets
targets = adata.var.index.str.split('_[-,+]_').str[0].to_list()

# combine results into a dataframe
result = pd.concat([
pd.Series(targets, index=adata.var.index, name='target'),
pd.Series(scores, index=adata.var.index, name='score'),
pd.Series(p_values, index=adata.var.index, name=f'{test} pvalue'),
pd.Series(adj_p_values, index=adata.var.index, name='BH adj_pvalue'),
], axis=1)
if get_z_score:
# z-score normalization
result['z_score'] = calculateZScorePhenotypeScore(result, ctrl_label=ctrl_label)
# add p-values
result[f'{test} pvalue'] = p_values
result['BH adj_pvalue'] = adj_p_values

return result_name, result

elif score_level in ['compare_oligos', 'compare_oligos_and_reps']:
return None

else:
raise ValueError(f'score_level "{score_level}" not recognized')

Expand Down Expand Up @@ -203,20 +212,23 @@ def addPseudoCount():
# 'Pseudocount behavior not recognized or not implemented')


def calculateZScorePhenotypeScore(x, y, x_ctrl, y_ctrl, growth_rate, math, ave):
pass
# # calculate control median and std
# ctrl_std = np.std(calculateDelta(x=x_ctrl, y=y_ctrl, math=math, ave=ave))
# ctrl_median = np.median(calculateDelta(x=x_ctrl, y=y_ctrl, math=math, ave=ave))
#
# # calculate delta
# delta = calculateDelta(x=x, y=y, math=math, ave=ave)
#
# # calculate score
# return ((delta - ctrl_median) / growth_rate) / ctrl_std
def calculateZScorePhenotypeScore(score_df,ctrl_label='negCtrl'):
"""Calculate z-score normalized phenotype score.
Args:
score_df (pd.DataFrame): dataframe of scores that includes `score` and `targetType` columns
ctrl_label (str): control label, default is 'negCtrl'
Returns:
pd.Series: z-score normalized phenotype score
"""
# calculate control median and std
ctrl_std = score_df[score_df.targetType.eq(ctrl_label)].score.std()
# z-score normalization
out = score_df.score / ctrl_std

return out

def runPhenoScoreForReplicate(screen, x_label, y_label, score, growth_factor_table, ctrl_label='negCtrl'):

def runPhenoScoreForReplicate(screen, x_label, y_label, score, growth_factor_table, get_z_score=False, ctrl_label='negCtrl'):
"""
Calculate phenotype score for each pair of replicates.
Args:
Expand All @@ -225,6 +237,7 @@ def runPhenoScoreForReplicate(screen, x_label, y_label, score, growth_factor_tab
y_label: name of the second condition in column `condition` of `screen.adata.obs`
score: score to use for calculating phenotype score, i.e. 'gamma', 'tau', or 'rho'
growth_factor_table: dataframe of growth factors, i.e. output from `getGrowthFactors` function
get_z_score: boolean to calculate z-score normalized phenotype score instead of regular score (default is False)
ctrl_label: string to identify labels of negative control oligos

Returns:
Expand All @@ -244,12 +257,17 @@ def runPhenoScoreForReplicate(screen, x_label, y_label, score, growth_factor_tab
x_ctrl=adat_ctrl[adat_ctrl.obs.query(f'condition == "{x_label}" & replicate == {str(replicate)}').index].X,
y_ctrl=adat_ctrl[adat_ctrl.obs.query(f'condition == "{y_label}" & replicate == {str(replicate)}').index].X,

growth_rate=growth_factor_table.query(f'score=="{score}" & replicate=={replicate}')['growth_factor'].values[
0],
growth_rate=growth_factor_table.query(f'score=="{score}" & replicate=={replicate}')['growth_factor'].values[0],
math=screen.math,
ave='row' # there is only one column so `row` option here is equivalent to the value before averaging.
)

if get_z_score:
res = calculateZScorePhenotypeScore(
pd.DataFrame({'score':res,'targetType':adat.var.targetType},index=adat.var.index),
ctrl_label=ctrl_label
)

results.update({f'replicate_{replicate}': res})

out = pd.DataFrame(
Expand Down
Loading