-
Notifications
You must be signed in to change notification settings - Fork 228
/
Copy path11_ProcessCRSP.R
153 lines (131 loc) · 3.92 KB
/
11_ProcessCRSP.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# processes CRSP download
# separate from the download file in case you need to patch the download and
# don't want to wait an hour to test the code
# 2021 04
# ==== MONTHLY CRSP SETUP ====
crspm = read_fst(paste0(pathProject,'Portfolios/Data/Intermediate/m_crsp_raw.fst'))
# incorporate delisting return
# GHZ cite Johnson and Zhao (2007), Shumway and Warther (1999)
# but the way HXZ does this might be a bit better
crspm = crspm %>%
mutate(
dlret = ifelse(
is.na(dlret)
& (dlstcd == 500 | (dlstcd >=520 & dlstcd <=584))
& (exchcd == 1 | exchcd == 2)
, -0.35
, dlret
)
, dlret = ifelse(
is.na(dlret)
& (dlstcd == 500 | (dlstcd >=520 & dlstcd <=584))
& (exchcd == 3)
, -0.55
, dlret
)
, dlret = ifelse(
dlret < -1 & !is.na(dlret)
, -1
, dlret
)
, dlret = ifelse(
is.na(dlret)
, 0
, dlret
)
, ret = (1+ret)*(1+dlret)-1 # 2022 02 patched ret + dlret < -1 problem
, ret = ifelse(
is.na(ret) & ( dlret != 0)
, dlret
, ret
)
)
# convert ret to pct, other formatting
crspm = crspm %>%
mutate(
ret = 100*ret
, date = as.Date(date)
, me = abs(prc) * shrout
, yyyymm = year(date) * 100 + month(date)
)
# keep around me and melag for sanity
templag <- crspm %>%
select(permno, yyyymm, me) %>%
mutate(
yyyymm = yyyymm + 1,
yyyymm = if_else(yyyymm %% 100 == 13, yyyymm + 100 - 12, yyyymm)
) %>%
transmute(permno, yyyymm, melag = me)
## subset into two smaller datasets for cleanliness
gc()
crspmret <- crspm %>%
select(permno, date, yyyymm, ret) %>%
filter(!is.na(ret)) %>%
left_join(templag, by = c("permno", "yyyymm")) %>%
arrange(permno, yyyymm)
gc()
crspminfo <- crspm %>%
select(permno, yyyymm, prc, exchcd, me, shrcd) %>%
arrange(permno, yyyymm)
# add info for easy me quantile screens
tempcut <- crspminfo %>%
filter(exchcd == 1) %>%
group_by(yyyymm) %>%
summarize(
me_nyse10 = quantile(me, probs = 0.1, na.rm = T),
me_nyse20 = quantile(me, probs = 0.2, na.rm = T)
)
crspminfo <- crspminfo %>%
left_join(tempcut, by = "yyyymm")
# write to disk
write_fst(crspmret, paste0(pathProject,'Portfolios/Data/Intermediate/crspmret.fst'))
write_fst(crspminfo, paste0(pathProject,'Portfolios/Data/Intermediate/crspminfo.fst'))
## clean up
rm(list=ls(pattern='crsp'))
rm(list=ls(pattern='temp'))
# ==== DAILY CRSP SETUP ====
if (!skipdaily){
# unlike monthly crsp, we try to do this in place for memory mgmt
crspdret = read_fst(
paste0(pathProject,'Portfolios/Data/Intermediate/d_crsp_raw.fst')
, columns = c('permno','date','ret')
) %>% setDT()
# drop na, reformat
crspdret = crspdret[
!is.na(ret)
][
, ':=' (
ret = 100*ret
, date = as.Date(date)
, yyyymm = year(date)*100+month(date)
)
][
order(permno,date)
]
gc()
## Calculate passive within-month gains (calc in place)
setkeyv(crspdret, c('permno','yyyymm')) # hopefully this speeds up the passive gain calc
crspdret = crspdret[
, passgain := data.table::shift(ret, fill=0, type='lag'), by = c('permno','yyyymm')
][
, passgain := cumprod(1+passgain/100), by=c('permno','yyyymm')
]
# merge on last month's lagged me for fast (monthly-rebalanced) value-weighting
# other monthly info (e.g. exchcd) is used only in port assignments
templag = read_fst(
paste0(pathProject,'Portfolios/Data/Intermediate/crspminfo.fst')
, columns = c('permno','yyyymm','me')
) %>%
setDT() %>%
mutate(
yyyymm = yyyymm + 1
, yyyymm = if_else(yyyymm %% 100 == 13, yyyymm+100-12,yyyymm)
) %>%
transmute(permno, yyyymm, melag = me)
setkeyv(templag, c('permno','yyyymm'))
# left join update by reference
crspdret[templag, on = c('permno','yyyymm'), melag := i.melag]
# write to disk
write_fst(crspdret, paste0(pathProject,'Portfolios/Data/Intermediate/crspdret.fst'))
}
rm(list=ls(pattern='crsp'))