From 5c478f92329ec1044f3e42406353773b00e80cf6 Mon Sep 17 00:00:00 2001 From: Fnyasimi <41294948+Fnyasimi@users.noreply.github.com> Date: Tue, 8 Oct 2024 11:26:26 -0500 Subject: [PATCH 1/2] initial inflation adjustment feature --- software/M04_zscores.py | 47 ++++++++++++++++++++++++++++++++++++++++- software/SPrediXcan.py | 6 +++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/software/M04_zscores.py b/software/M04_zscores.py index 29224c6..9c05dbd 100644 --- a/software/M04_zscores.py +++ b/software/M04_zscores.py @@ -5,7 +5,10 @@ import logging import os - +import sqlite3 +import pandas as pd +import numpy as np +from scipy.stats import chi2 from timeit import default_timer as timer from metax import Logging @@ -14,6 +17,42 @@ from metax.metaxcan import AssociationCalculation from metax.metaxcan import Utilities as MetaxcanUtilities +#calibration functions +def get_phi(db): + # get phi from SQLite database + try: + conn = sqlite3.connect(db) + extras = pd.read_sql_query("SELECT gene, phi FROM extra", conn) + extras = extras.dropna(subset=['phi']) + return extras + except (sqlite3.DatabaseError, sqlite3.OperationalError) as e: + # Log any database-related errors + logging.error(f"An error occurred while accessing the database: {e}") + finally: + # Ensure the connection is closed + if conn: + conn.close() + +def correct_inf_phi(xcan_df, predict_db, N, h2): + + extras = get_phi(predict_db) + xcan_df = xcan_df.merge(extras, on='gene', how='inner') + + xcan_df['uncalibrated_pvalue'] = xcan_df['pvalue'] + xcan_df['uncalibrated_zscore'] = xcan_df['zscore'] + + #QC: Replace negative phi values with 0 + xcan_df['phi'] = np.where(xcan_df['phi'] < 0, 0, xcan_df['phi']) + + # Calibration value + denominator = 1 + (xcan_df['phi'] * N * h2) + + # calibrated p-value + xcan_df['pvalue'] = chi2.sf(xcan_df['zscore']**2 / denominator, df=1) + # calibrated z-score + xcan_df['zscore'] = np.sqrt(chi2.ppf(xcan_df['pvalue'],df = 1)) * np.sign(xcan_df['zscore']) + logging.info("The pvalue and zscore have been calibrated successfully") + return xcan_df def run_metaxcan(args, context): logging.info("Started metaxcan association") @@ -48,6 +87,12 @@ def run_metaxcan(args, context): additional = AssociationCalculation.dataframe_from_aditional_stats(additional) results = MetaxcanUtilities.merge_additional_output(results, additional, context, args.remove_ens_version) + if args.gwas_h2 is not None and args.gwas_N is not None: + logging.info("Calibrating pvalue and zscore using phi in model, N and h2") + results = correct_inf_phi(results, args.model_db_path, args.gwas_N, args.gwas_h2) + else: + logging.warning("IMPORTANT: The pvalue and zscore are uncalibrated for inflation") + if args.output_file: Utilities.ensure_requisite_folders(args.output_file) results.to_csv(args.output_file, index=False, na_rep="NA") diff --git a/software/SPrediXcan.py b/software/SPrediXcan.py index 8011c3d..d3b5ee3 100755 --- a/software/SPrediXcan.py +++ b/software/SPrediXcan.py @@ -39,7 +39,8 @@ def run(args): parser.add_argument("--model_db_snp_key", help="Specify a key to use as snp_id") #GWAS betas parser.add_argument("--gwas_file", help="Load a single GWAS file. (Alternative to providing a gwas_folder and gwas_file_pattern)") - + parser.add_argument("--gwas_h2", help="GWAS heritability (h2)", type=float, default=None, required=False) + parser.add_argument("--gwas_N", help="GWAS sample size (N)", type=int, default=None, required=False) parser.add_argument("--gwas_folder", help="name of folder containing GWAS data. All files in the folder are assumed to belong to a single study.") parser.add_argument("--gwas_file_pattern", help="Pattern to recognice GWAS files in folders (in case there are extra files and you don't want them selected).") @@ -61,6 +62,9 @@ def run(args): Logging.configureLogging(int(args.verbosity)) + if args.gwas_h2 is None or args.gwas_N is None: + logging.warning("Missing --gwas_h2 and --gwas_N are required to calibrate the pvalue and zscore.") + if args.throw: run(args) else: From 1a7f5f284913f1cb50189c6c78abbc0ed96e623c Mon Sep 17 00:00:00 2001 From: Fnyasimi <41294948+Fnyasimi@users.noreply.github.com> Date: Tue, 8 Oct 2024 12:27:21 -0500 Subject: [PATCH 2/2] highlight the warning in red for easier visibility of missing params --- software/metax/Logging.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/software/metax/Logging.py b/software/metax/Logging.py index 3bb81dd..8155225 100644 --- a/software/metax/Logging.py +++ b/software/metax/Logging.py @@ -12,10 +12,21 @@ def configureLogging(level=5, target=sys.stderr): kludge=True logger.handlers.clear() + # create console handler and set level to info + RED = "\033[91m" + RESET = "\033[0m" + + # A custom logging formatter for warning + class CustomFormatter(logging.Formatter): + def format(self, record): + if record.levelno == logging.WARNING: + record.msg = f"{RED}{record.msg}{RESET}" + return super().format(record) + # create console handler and set level to info handler = logging.StreamHandler(target) handler.setLevel(level) - formatter = logging.Formatter("%(levelname)s - %(message)s") + formatter = CustomFormatter("%(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler)