记录如何本地clump

使用的函数可以看这个https://mrcieu.github.io/ieugwasr/reference/ld_clump_local.html

加载需要的包

library(TwoSampleMR)
library(data.table)
library(ieugwasr)
library(plinkbinr)  #这个包是帮你装个plink的，自己装plink也行，就是要注意运行时是否有权限

读取数据

gene_blood_exp_snp<-readRDS("gene_blood_exp_snp.rds") #eQTL数据
snp_epilepsy_out<-readRDS("snp_epilepsy_out.rds")   #你的gwas数据
exp_dat1<-gene_blood_exp_snp
genelist <- unique(exp_dat1$gene)

head(gene_blood_exp_snp)
 gene hgnc_symbol        SNP chr.exposure pos.exposure effect_allele.exposure other_allele.exposure eaf.exposure z.exposure
1 ENSG00000000938         FGR rs34806307            1     27958339                      T                     C   0.05812083    11.1724
2 ENSG00000000938         FGR  rs4908343            1     27931698                      G                     A   0.18086010     8.3407
3 ENSG00000000938         FGR rs12726763            1     27958245                      G                     A   0.94275336     7.9185
4 ENSG00000000938         FGR rs12749647            1     27895517                      G                     A   0.11428375     6.9687
5 ENSG00000000938         FGR rs72886321            1     27909007                      A                     C   0.01914212     6.5440
6 ENSG00000000938         FGR rs59900503            1     27907201                      C                     T   0.01919834     6.5196
  beta.exposure se.exposure pval.exposure id.exposure exposure         F
1    0.18878560  0.01689750    5.5656e-29       blood exposure 124.82252
2    0.08574590  0.01028042    7.3915e-17       blood exposure  69.56728
3    0.13488910  0.01703468    2.4059e-15       blood exposure  62.70264
4    0.08669946  0.01244127    3.2011e-12       blood exposure  48.56278
5    0.18905513  0.02888984    5.9975e-11       blood exposure  42.82394
6    0.18808058  0.02884848    7.0495e-11       blood exposure  42.50518

head(snp_epilepsy_out)
chr.outcome pos.outcome        SNP effect_allele.outcome other_allele.outcome eaf.outcome z.outcome beta.outcome  se.outcome outcome
1           6   130840091  rs2326918                     A                    G      0.8470    -0.279 -0.002586613 0.009271014 outcome
2           7   145771806  rs6977693                     T                    C      0.8587    -0.874 -0.008373962 0.009581192 outcome
3          11   100009976 rs12364336                     A                    G      0.8772    -1.506 -0.015313714 0.010168469 outcome
4           1   166367755 rs12562373                     A                    G      0.7678     0.760  0.006007180 0.007904185 outcome
5          14    86737556  rs2135099                     A                    G      0.1652    -0.779 -0.007000891 0.008987023 outcome
6           2   201527977 rs57502521                     A                    G      0.9649    -0.088 -0.001595890 0.018135114 outcome
  mr_keep.outcome pval.outcome pval_origin.outcome id.outcome
1            TRUE    0.7802448            inferred     AIkFiR
2            TRUE    0.3821183            inferred     AIkFiR
3            TRUE    0.1320672            inferred     AIkFiR
4            TRUE    0.4472546            inferred     AIkFiR
5            TRUE    0.4359797            inferred     AIkFiR
6            TRUE    0.9298767            inferred     AIkFiR

为了在后面不报错，我魔改了一下这个函数

修改部分主要是在fun2这个部分，报错主要也是集中在这个部分

Important

注意这个是服务器版本的，windows版的在下面

ld_clump_local <- function (dat, clump_kb, clump_r2, clump_p, bfile, plink_bin) {
  shell <- ifelse(Sys.info()["sysname"] == "Windows", "cmd", 
                  "sh")
  fn <- tempfile()
  write.table(data.frame(SNP = dat[["rsid"]], P = dat[["pval"]]), 
              file = fn, row.names = F, col.names = T, quote = F)
  fun2 <- paste0(shQuote(plink_bin, type = shell), " --bfile ", 
                 shQuote(bfile, type = shell), " --clump ", shQuote(fn, 
                                                                    type = shell), " --clump-p1 ", clump_p, " --clump-r2 ", 
                 clump_r2, " --clump-kb ", clump_kb, " --out ", shQuote(fn, 
                                                                        type = shell))
  system_output <- try(system(paste(fun2, "2>&1"), intern = TRUE))
  # Check for the specific warning message
  warning_message <- "Warning: No significant --clump results.  Skipping."
  if (warning_message %in% system_output) {
    message("Encountered the specific warning message: ", warning_message)
    return(data.frame()) # return an empty data frame
  } 
  res <- read.table(paste(fn, ".clumped", sep = ""), header = T)
  unlink(paste(fn, "*", sep = ""))
  y <- subset(dat, !dat[["rsid"]] %in% res[["SNP"]])
  if (nrow(y) > 0) {
    message("Removing ", length(y[["rsid"]]), " of ", nrow(dat), 
            " variants due to LD with other variants or absence from LD reference panel")
  }
  return(subset(dat, dat[["rsid"]] %in% res[["SNP"]]))
}

需要下载plink这个软件

https://www.cog-genomics.org/plink/ 根据自己的操作系统下载

需要下载LD 参考数据集

http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz 要解压，里面有五个人群的，一般选欧洲

Windows本地版的

ld_clump_local<- function (dat, clump_kb, clump_r2, clump_p, bfile, plink_bin) {
  shell <- ifelse(Sys.info()["sysname"] == "Windows", "cmd", 
                  "sh")
  fn <- tempfile()
  write.table(data.frame(SNP = dat[["rsid"]], P = dat[["pval"]]), 
              file = fn, row.names = F, col.names = T, quote = F)
  fun2 <- paste0(shQuote(plink_bin, type = shell), " --bfile ", 
                 shQuote(bfile, type = shell), " --clump ", gsub('\\\\',"/",fn), " --clump-p1 ", clump_p, " --clump-r2 ", 
                 clump_r2, " --clump-kb ", clump_kb, " --out ", gsub('\\\\',"/",fn))
  system(fun2)
  res <- read.table(paste(fn, ".clumped", sep = ""), header = T)
  unlink(paste(fn, "*", sep = ""))
  y <- subset(dat, !dat[["rsid"]] %in% res[["SNP"]])
  if (nrow(y) > 0) {
    message("Removing ", length(y[["rsid"]]), " of ", nrow(dat), 
            " variants due to LD with other variants or absence from LD reference panel")
  }
  return(subset(dat, dat[["rsid"]] %in% res[["SNP"]]))
}

Important

还不行的话，最简单的方法是fun2修改成fun2 <- paste0(plink_bin, " --bfile ",bfile, " --clump ",fn," --clump-p1 ", clump_p, " --clump-r2 ", clump_r2, " --clump-kb ", clump_kb, " --out ",fn)

循环执行函数

reslist <- list() # 获取基因列表
for (i in genelist) {
  exp_dat <- exp_dat1[exp_dat1$gene == i,]     
  epilepsy <- snp_epilepsy_out[snp_epilepsy_out$SNP %in% exp_dat$SNP, ]
  exp_dat <- exp_dat[exp_dat$SNP %in% epilepsy$SNP, ]

  ld <- ld_clump_local(dplyr::tibble(rsid=exp_dat$SNP, pval=exp_dat$pval.exposure),
                       clump_r2=0.001,clump_kb=10000,clump_p=1, 
                       bfile = "/GPUFS/gyfyy_jxhe_1/User/zyt/mr_gy/0127/1000genomes/EUR", 
                       plink_bin = plinkbinr::get_plink_exe())  #这个是天河服务器版本的，你要改自己的路径哦
  exp_dat <- exp_dat[exp_dat$SNP %in% ld$rsid, ]

if (nrow(exp_dat) == 0) {
  next
}       #这个是为了当没有数据时执行下一个循环

dat <- harmonise_data(exposure_dat=exp_dat, outcome_dat=epilepsy)
if (nrow(dat) == 0) {
  next
}
  res <- mr(dat)     #执行mr
  reslist[[i]] <- res   #把结果写入列表
  
}

result <- data.frame()   
for (i in names(reslist)) {
  if (nrow(reslist[[i]]) == 0) {
    next
  }    #当结果为空时跳到下一个循环
  r1 <- reslist[[i]]
  r1$gene  <-  i
  result <- rbind(result,r1)
}

我有一计可以多线程运行

Important

函数需要根据自己项目的不同进行修改哦数据名和上面的稍微有点区别，其实也就加了个format_data，应该很好理解

process_gene <- function(i) {
  cough1 <- cough[cough$Gene == i,]    #这个是eqtl文件
  cough1 <- cough1[cough1$p<5E-08]
  if (nrow(cough1) == 0) {
    return(NULL)
  }
  exp_dat <- format_data(
    dat=cough1,
    type = 'exposure',
    snp_col = "SNP",
    beta_col = "b",
    se_col = "SE",
    eaf_col = "Freq",
    effect_allele_col = "A1",
    other_allele_col = "A2",
    # samplesize_col = "all_meta_sample_N",
    pval_col = "p",
    chr_col = "Chr",
    pos_col = "BP"
  )
  
  asthma2 <- disease[disease$rsid %in% exp_dat$SNP, ]     #这个是你的gwas文件
  if (nrow(asthma2) == 0) {
    return(NULL)
  }
  asthma <- format_data(
    dat=asthma2,
    type = "outcome",
    header = TRUE,
    snps = exp_dat$SNP,
    snp_col = "rsid",
    beta_col = "inv_var_meta_beta",
    se_col = "inv_var_meta_sebeta",
    eaf_col = "all_meta_AF",
    effect_allele_col = "ALT",
    other_allele_col = "REF",
    # samplesize_col = "all_meta_sample_N",
    pval_col = "inv_var_meta_p",
    chr_col = "CHR",
    pos_col = "POS"
  )
  
  exp_dat <- exp_dat[exp_dat$SNP %in% asthma$SNP, ]
  
  
  ld <- ld_clump_local(dplyr::tibble(rsid=exp_dat$SNP, pval=exp_dat$pval.exposure),
                       clump_r2=0.001,clump_kb=10000,clump_p=1, bfile = "./GWAS_data/1000genomic/EUR", plink_bin = "./lin/projects/bin/plink")   #执行本地clump
  exp_dat <- exp_dat[exp_dat$SNP %in% ld$rsid, ]
  
  if (nrow(exp_dat) == 0) {
    return(NULL)
  }
  
  dat <- harmonise_data(exposure_dat=exp_dat, outcome_dat=asthma)
  if (nrow(dat) == 0) {
    return(NULL)
  }
  res <- mr(dat)
  
  return(res)
}

运行函数

library(parallel)
# 您可以根据您的机器的核心数调整mc.cores的值
reslist <- mclapply(genelist, process_gene, mc.cores = 8)

# 清理结果，移除NULL元素
reslist <- reslist[!sapply(reslist, is.null)]

到这里这个list就是结果啦，自己转成dataframe哦,给你们举个例子差不多是这样写，自己去改哈哈哈

result <- data.frame()
for (i in 1:length(reslist)) {
  result <- rbind(result,c(reslist[[i]]$summary,reslist[[i]]$gene))
}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

local_clump.md

local_clump.md

记录如何本地clump

使用的函数可以看这个https://mrcieu.github.io/ieugwasr/reference/ld_clump_local.html

加载需要的包

读取数据

为了在后面不报错，我魔改了一下这个函数

需要下载plink这个软件

需要下载LD 参考数据集

Windows本地版的

循环执行函数

我有一计可以多线程运行

Files

local_clump.md

Latest commit

History

local_clump.md

File metadata and controls

记录如何本地clump

使用的函数可以看这个https://mrcieu.github.io/ieugwasr/reference/ld_clump_local.html

加载需要的包

读取数据

为了在后面不报错，我魔改了一下这个函数

需要下载plink这个软件

需要下载LD 参考数据集

Windows本地版的

循环执行函数

我有一计可以多线程运行