-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprint_comments.Rmd
120 lines (88 loc) · 2.41 KB
/
preprint_comments.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
---
title: "R Notebook"
---
```{r}
library(tidyverse)
```
```{r}
# API key. Requires registration with Disqus
key <- Sys.getenv("DISQUS_API_KEY")
getThreadsFromUrl <- function(url) {
request <- httr::GET(url)
data <- httr::content(request)
return(data)
}
# Extraction of relevant information
extractCommentCounts <- function(item){
if(length(item)>1){
tibble(
doi = str_replace(str_extract(item$link, "10.1101.*"), "v.*", ""),
disqus_thread_id = item$id,
comments_count = item$posts
)
}
}
```
```{r}
has_next <- T
medrxiv_data <- list()
cursor <- NULL
while(has_next == T) {
if(length(cursor)) {
url <- paste0("https://disqus.com/api/3.0/forums/listThreads.json?forum=medrxiv&limit=100&api_key=", key, "&cursor=", cursor)
} else {
url <- paste0("https://disqus.com/api/3.0/forums/listThreads.json?forum=medrxiv&limit=100&api_key=", key)
}
d <- getThreadsFromUrl(url)
medrxiv_data <- c(medrxiv_data, d$response)
if(d$cursor$hasNext == T) {
cursor <- d$cursor$`next`
} else {
has_next <- F
}
}
medrxiv_comment_counts <- map_dfr(medrxiv_data, extractCommentCounts) %>%
distinct() %>%
group_by(doi) %>%
mutate(comments_count = sum(comments_count)) %>%
ungroup() %>%
select(doi, comments_count) %>%
distinct()
```
```{r}
#Sys.sleep(2000)
has_next <- T
biorxiv_data <- list()
cursor <- NULL
while(has_next == T) {
if(length(cursor)) {
url <- paste0("https://disqus.com/api/3.0/forums/listThreads.json?forum=biorxivstage&limit=100&api_key=", key, "&cursor=", cursor)
} else {
url <- paste0("https://disqus.com/api/3.0/forums/listThreads.json?forum=biorxivstage&limit=100&api_key=", key)
}
d <- getThreadsFromUrl(url)
biorxiv_data <- c(biorxiv_data, d$response)
if(d$cursor$hasNext == T) {
cursor <- d$cursor$`next`
} else {
has_next <- F
}
}
biorxiv_comment_counts <- map_dfr(biorxiv_data, extractCommentCounts) %>%
distinct() %>%
group_by(doi) %>%
mutate(comments_count = sum(comments_count)) %>%
ungroup() %>%
select(doi, comments_count) %>%
distinct()
```
# Create final dataset
```{r}
preprint_comment_counts <- bind_rows(medrxiv_comment_counts, biorxiv_comment_counts) %>%
group_by(doi) %>%
summarize(comments_count = sum(comments_count)) %>%
ungroup() %>%
right_join(preprints %>% select(doi), by = "doi") %>%
distinct() %>%
write_csv("data/preprint_comments_20190101_20201031.csv")
```