-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2_Eye_Methylation_Age_Calculation.R
102 lines (86 loc) · 5.85 KB
/
2_Eye_Methylation_Age_Calculation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
######################################################
### This code is adapted from Steve Horvath R script
### for estimating DNA methylation age.
### the following script and files must be downloaded to your working directory:
### 1) probeAnnotation21kdatMethUsed.csv
### 2) datMiniAnnotation27k.csv
### 3) AdditionalFile3.csv
### 4) StepwiseAnalysis.txt
### 5) https://labs.genetics.ucla.edu/horvath/dnamage/NORMALIZATION.R
#####################################################
library(WGCNA)
library(sqldf)
library(impute)
library(parallel)
source("~/Desktop/Research/AMD/Methylation/EyeBank/Data/analysis/NORMALIZATION.R")
setwd("~/Desktop/Research/AMD/Methylation/EyeBank/Data/analysis/")
######################Age transformation and probe annotation functions#####################
trafo= function(x,adult.age=20) { x=(x+1)/(1+adult.age); y=ifelse(x<=1, log( x),x-1);y }
anti.trafo= function(x,adult.age=20) { ifelse(x<0, (1+adult.age)*exp(x)-1, (1+adult.age)*x+adult.age) }
probeAnnotation21kdatMethUsed= read.csv("probeAnnotation21kdatMethUsed.csv")
probeAnnotattion27k=read.csv("datMiniAnnotation27k.csv")
datClock= read.csv("AdditionalFile3.csv")
##########################Step2.1#########################
#Read in the DNA methylation data (beta values)
dat0=read.csv.sql("eyebetas.csv");
closeAllConnections()
nSamples=dim(dat0)[[2]]-1
nProbes= dim(dat0)[[1]]
#the following command may not be needed. But it is sometimes useful when you use read.csv.sql
dat0[,1]= gsub(x=dat0 [,1],pattern="\"",replacement="")
file.remove("LogFile.txt")
file.create("LogFile.txt")
DoNotProceed=FALSE
cat(paste( "The methylation data set contains", nSamples, "samples (e.g. arrays) and ", nProbes, " probes."),file="LogFile.txt")
if (nSamples==0)
{DoNotProceed=TRUE; cat(paste( "\n ERROR: There must be a data input error since there seem to be no samples.\n
Make sure that you input a comma delimited file (.csv file)\n that can be read using the R command read.csv.sql .
Samples correspond to columns in that file ."), file="LogFile.txt",append=TRUE) }
if (nProbes==0)
{DoNotProceed=TRUE; cat(paste( "\n ERROR: There must be a data input error since there seem to be zero probes.\n
Make sure that you input a comma delimited file (.csv file)\n
that can be read using the R command read.csv.sql CpGs correspond to rows.") ,
file="LogFile.txt",append=TRUE) }
if ( nSamples > nProbes )
{ cat(paste( "\n MAJOR WARNING: It worries me a lot that there are more samples than CpG probes.\n
Make sure that probes correspond to rows and samples to columns.\n
I wonder whether you want to first transpose the data and then resubmit them?
In any event, I will proceed with the analysis."),file="LogFile.txt",append=TRUE) }
if ( is.numeric(dat0[,1]) )
{ DoNotProceed=TRUE; cat(paste( "\n Error: The first column does not seem to contain probe identifiers (cg numbers from Illumina)
since these entries are numeric values. Make sure that the first column of the file contains probe
identifiers such as cg00000292. Instead it contains ", dat0[1:3,1] ),file="LogFile.txt",append=TRUE) }
if ( !is.character(dat0[,1]) )
{ cat(paste( "\n Major Warning: The first column does not seem to contain probe identifiers (cg numbers from Illumina)
since these entries are numeric values. Make sure that the first column of the file contains CpG probe identifiers such as cg00000292.
Instead it contains ", dat0[1:3,1] ),file="LogFile.txt",append=TRUE) }
datout=data.frame(Error=c("Input error. Please check the log file for details","Please read the instructions carefully."),
comment=c(""," "))
if ( ! DoNotProceed ){
nonNumericColumn=rep(FALSE, dim(dat0)[[2]]-1)
for (i in 2:dim(dat0)[[2]] ){ nonNumericColumn[i-1]=! is.numeric(dat0[,i]) }
if ( sum(nonNumericColumn) >0 ){ cat(paste( "\n MAJOR WARNING: Possible input error. The following samples contain non-numeric beta values: ",
colnames(dat0)[-1][ nonNumericColumn], "\n Hint: Maybe you use the wrong symbols for missing data.
Make sure to code missing values as NA in the Excel file. To proceed, I will force the entries into numeric values but make sure this makes sense.\n" ),
file="LogFile.txt",append=TRUE) }
}
match1=match(probeAnnotation21kdatMethUsed$Name, dat0[,1])
if ( sum( is.na(match1))>0 )
{ missingProbes= probeAnnotation21kdatMethUsed$Name[!is.element(probeAnnotation21kdatMethUsed$Name,dat0[,1])]
DoNotProceed=TRUE; cat(paste( "\n \n Input error: You forgot to include the following ", length(missingProbes),
" CpG probes (or probe names):\n ", paste( missingProbes, sep="",collapse=", ")),file="LogFile.txt",append=TRUE) }
##################Step 2.2 restrict the data######################################
match1= match(probeAnnotation21kdatMethUsed$Name, dat0[,1])
if ( sum( is.na(match1))>0 ) stop(paste(sum( is.na(match1)), "CpG probes cannot be matched"))
dat1= dat0[match1,]
asnumeric1=function(x) {as.numeric(as.character(x))}
dat1[,-1]=apply(as.matrix(dat1[,-1]),2,asnumeric1)
####################Step 2.3 Create the outputfile#########################
set.seed(1)
normalizeData=TRUE
source("~/Desktop/Research/AMD/Methylation/EyeBank/Data/analysis/StepwiseAnalysis.txt")
#####################Step 2.4 output the results#############################
if ( sum( datout$Comment != "" ) ==0 ) { cat(paste( "\n The individual samples appear to be fine. "),file="LogFile.txt",append=TRUE) }
if ( sum( datout$Comment != "" ) >0 ) { cat(paste( "\n Warnings were generated for the following samples.\n", datout[,1][datout$Comment != ""], "\n Hint: Check the output file for more details."),file="LogFile.txt",append=TRUE) }
# output the results into the directory
write.table(datout,"EyeAge_Output.csv", row.names=F, sep="," )