library(TwoSampleMR)
library(data.table)
library(ieugwasr)
library(plinkbinr) #这个包是帮你装个plink的,自己装plink也行,就是要注意运行时是否有权限
gene_blood_exp_snp<-readRDS("gene_blood_exp_snp.rds") #eQTL数据
snp_epilepsy_out<-readRDS("snp_epilepsy_out.rds") #你的gwas数据
exp_dat1<-gene_blood_exp_snp
genelist <- unique(exp_dat1$gene)
head(gene_blood_exp_snp)
gene hgnc_symbol SNP chr.exposure pos.exposure effect_allele.exposure other_allele.exposure eaf.exposure z.exposure
1 ENSG00000000938 FGR rs34806307 1 27958339 T C 0.05812083 11.1724
2 ENSG00000000938 FGR rs4908343 1 27931698 G A 0.18086010 8.3407
3 ENSG00000000938 FGR rs12726763 1 27958245 G A 0.94275336 7.9185
4 ENSG00000000938 FGR rs12749647 1 27895517 G A 0.11428375 6.9687
5 ENSG00000000938 FGR rs72886321 1 27909007 A C 0.01914212 6.5440
6 ENSG00000000938 FGR rs59900503 1 27907201 C T 0.01919834 6.5196
beta.exposure se.exposure pval.exposure id.exposure exposure F
1 0.18878560 0.01689750 5.5656e-29 blood exposure 124.82252
2 0.08574590 0.01028042 7.3915e-17 blood exposure 69.56728
3 0.13488910 0.01703468 2.4059e-15 blood exposure 62.70264
4 0.08669946 0.01244127 3.2011e-12 blood exposure 48.56278
5 0.18905513 0.02888984 5.9975e-11 blood exposure 42.82394
6 0.18808058 0.02884848 7.0495e-11 blood exposure 42.50518
head(snp_epilepsy_out)
chr.outcome pos.outcome SNP effect_allele.outcome other_allele.outcome eaf.outcome z.outcome beta.outcome se.outcome outcome
1 6 130840091 rs2326918 A G 0.8470 -0.279 -0.002586613 0.009271014 outcome
2 7 145771806 rs6977693 T C 0.8587 -0.874 -0.008373962 0.009581192 outcome
3 11 100009976 rs12364336 A G 0.8772 -1.506 -0.015313714 0.010168469 outcome
4 1 166367755 rs12562373 A G 0.7678 0.760 0.006007180 0.007904185 outcome
5 14 86737556 rs2135099 A G 0.1652 -0.779 -0.007000891 0.008987023 outcome
6 2 201527977 rs57502521 A G 0.9649 -0.088 -0.001595890 0.018135114 outcome
mr_keep.outcome pval.outcome pval_origin.outcome id.outcome
1 TRUE 0.7802448 inferred AIkFiR
2 TRUE 0.3821183 inferred AIkFiR
3 TRUE 0.1320672 inferred AIkFiR
4 TRUE 0.4472546 inferred AIkFiR
5 TRUE 0.4359797 inferred AIkFiR
6 TRUE 0.9298767 inferred AIkFiR
修改部分主要是在fun2这个部分,报错主要也是集中在这个部分
Important
注意这个是服务器版本的,windows版的在下面
ld_clump_local <- function (dat, clump_kb, clump_r2, clump_p, bfile, plink_bin) {
shell <- ifelse(Sys.info()["sysname"] == "Windows", "cmd",
"sh")
fn <- tempfile()
write.table(data.frame(SNP = dat[["rsid"]], P = dat[["pval"]]),
file = fn, row.names = F, col.names = T, quote = F)
fun2 <- paste0(shQuote(plink_bin, type = shell), " --bfile ",
shQuote(bfile, type = shell), " --clump ", shQuote(fn,
type = shell), " --clump-p1 ", clump_p, " --clump-r2 ",
clump_r2, " --clump-kb ", clump_kb, " --out ", shQuote(fn,
type = shell))
system_output <- try(system(paste(fun2, "2>&1"), intern = TRUE))
# Check for the specific warning message
warning_message <- "Warning: No significant --clump results. Skipping."
if (warning_message %in% system_output) {
message("Encountered the specific warning message: ", warning_message)
return(data.frame()) # return an empty data frame
}
res <- read.table(paste(fn, ".clumped", sep = ""), header = T)
unlink(paste(fn, "*", sep = ""))
y <- subset(dat, !dat[["rsid"]] %in% res[["SNP"]])
if (nrow(y) > 0) {
message("Removing ", length(y[["rsid"]]), " of ", nrow(dat),
" variants due to LD with other variants or absence from LD reference panel")
}
return(subset(dat, dat[["rsid"]] %in% res[["SNP"]]))
}
https://www.cog-genomics.org/plink/ 根据自己的操作系统下载
http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz 要解压,里面有五个人群的,一般选欧洲
ld_clump_local<- function (dat, clump_kb, clump_r2, clump_p, bfile, plink_bin) {
shell <- ifelse(Sys.info()["sysname"] == "Windows", "cmd",
"sh")
fn <- tempfile()
write.table(data.frame(SNP = dat[["rsid"]], P = dat[["pval"]]),
file = fn, row.names = F, col.names = T, quote = F)
fun2 <- paste0(shQuote(plink_bin, type = shell), " --bfile ",
shQuote(bfile, type = shell), " --clump ", gsub('\\\\',"/",fn), " --clump-p1 ", clump_p, " --clump-r2 ",
clump_r2, " --clump-kb ", clump_kb, " --out ", gsub('\\\\',"/",fn))
system(fun2)
res <- read.table(paste(fn, ".clumped", sep = ""), header = T)
unlink(paste(fn, "*", sep = ""))
y <- subset(dat, !dat[["rsid"]] %in% res[["SNP"]])
if (nrow(y) > 0) {
message("Removing ", length(y[["rsid"]]), " of ", nrow(dat),
" variants due to LD with other variants or absence from LD reference panel")
}
return(subset(dat, dat[["rsid"]] %in% res[["SNP"]]))
}
Important
还不行的话,最简单的方法是fun2修改成fun2 <- paste0(plink_bin, " --bfile ",bfile, " --clump ",fn," --clump-p1 ", clump_p, " --clump-r2 ", clump_r2, " --clump-kb ", clump_kb, " --out ",fn)
reslist <- list() # 获取基因列表
for (i in genelist) {
exp_dat <- exp_dat1[exp_dat1$gene == i,]
epilepsy <- snp_epilepsy_out[snp_epilepsy_out$SNP %in% exp_dat$SNP, ]
exp_dat <- exp_dat[exp_dat$SNP %in% epilepsy$SNP, ]
ld <- ld_clump_local(dplyr::tibble(rsid=exp_dat$SNP, pval=exp_dat$pval.exposure),
clump_r2=0.001,clump_kb=10000,clump_p=1,
bfile = "/GPUFS/gyfyy_jxhe_1/User/zyt/mr_gy/0127/1000genomes/EUR",
plink_bin = plinkbinr::get_plink_exe()) #这个是天河服务器版本的,你要改自己的路径哦
exp_dat <- exp_dat[exp_dat$SNP %in% ld$rsid, ]
if (nrow(exp_dat) == 0) {
next
} #这个是为了当没有数据时执行下一个循环
dat <- harmonise_data(exposure_dat=exp_dat, outcome_dat=epilepsy)
if (nrow(dat) == 0) {
next
}
res <- mr(dat) #执行mr
reslist[[i]] <- res #把结果写入列表
}
result <- data.frame()
for (i in names(reslist)) {
if (nrow(reslist[[i]]) == 0) {
next
} #当结果为空时跳到下一个循环
r1 <- reslist[[i]]
r1$gene <- i
result <- rbind(result,r1)
}
Important
函数需要根据自己项目的不同进行修改哦 数据名和上面的稍微有点区别,其实也就加了个format_data,应该很好理解
process_gene <- function(i) {
cough1 <- cough[cough$Gene == i,] #这个是eqtl文件
cough1 <- cough1[cough1$p<5E-08]
if (nrow(cough1) == 0) {
return(NULL)
}
exp_dat <- format_data(
dat=cough1,
type = 'exposure',
snp_col = "SNP",
beta_col = "b",
se_col = "SE",
eaf_col = "Freq",
effect_allele_col = "A1",
other_allele_col = "A2",
# samplesize_col = "all_meta_sample_N",
pval_col = "p",
chr_col = "Chr",
pos_col = "BP"
)
asthma2 <- disease[disease$rsid %in% exp_dat$SNP, ] #这个是你的gwas文件
if (nrow(asthma2) == 0) {
return(NULL)
}
asthma <- format_data(
dat=asthma2,
type = "outcome",
header = TRUE,
snps = exp_dat$SNP,
snp_col = "rsid",
beta_col = "inv_var_meta_beta",
se_col = "inv_var_meta_sebeta",
eaf_col = "all_meta_AF",
effect_allele_col = "ALT",
other_allele_col = "REF",
# samplesize_col = "all_meta_sample_N",
pval_col = "inv_var_meta_p",
chr_col = "CHR",
pos_col = "POS"
)
exp_dat <- exp_dat[exp_dat$SNP %in% asthma$SNP, ]
ld <- ld_clump_local(dplyr::tibble(rsid=exp_dat$SNP, pval=exp_dat$pval.exposure),
clump_r2=0.001,clump_kb=10000,clump_p=1, bfile = "./GWAS_data/1000genomic/EUR", plink_bin = "./lin/projects/bin/plink") #执行本地clump
exp_dat <- exp_dat[exp_dat$SNP %in% ld$rsid, ]
if (nrow(exp_dat) == 0) {
return(NULL)
}
dat <- harmonise_data(exposure_dat=exp_dat, outcome_dat=asthma)
if (nrow(dat) == 0) {
return(NULL)
}
res <- mr(dat)
return(res)
}
运行函数
library(parallel)
# 您可以根据您的机器的核心数调整mc.cores的值
reslist <- mclapply(genelist, process_gene, mc.cores = 8)
# 清理结果,移除NULL元素
reslist <- reslist[!sapply(reslist, is.null)]
到这里这个list就是结果啦,自己转成dataframe哦,给你们举个例子差不多是这样写,自己去改哈哈哈
result <- data.frame()
for (i in 1:length(reslist)) {
result <- rbind(result,c(reslist[[i]]$summary,reslist[[i]]$gene))
}