-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathindex.js
135 lines (119 loc) · 3.76 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
const Axios = require('axios').default
const Cheerio = require('cheerio')
/**
* Returns the URL for the given parameters
*
* @param {String} slug Identifier for the movie ot TV show
* @param {Number} page Number or page(s) to be retrieved
* @param {Boolean} isTV State whether given slug is a TV show or not
*/
const movieUrl = (slug, page, isTV = false) => {
const showType = isTV ? 'tv' : 'm'
return `https://www.rottentomatoes.com/${showType}/${slug}/reviews/?page=${page}&type=user`
}
/**
* Fetches the review page for the given parameters
*
* @param {String} slug Identifier for the movie ot TV show
* @param {Number} page Number or page(s) to be retrieved
* @param {Boolean} isTV State whether given slug is a TV show or not
*/
const getPage = (slug, page, isTV) => Axios.get(movieUrl(slug, page, isTV))
/**
* Scrapes audience reviews (reviewer name, date, stars, and review excerpt)
*
* @param {String} data HTML data fetched from getPage method
*/
const scrapePage = data => {
const $ = Cheerio.load(data)
const reviews = []
$('.review_table_row').each((i, element) => {
const stars = $(element).find('.glyphicon.glyphicon-star').length
const hasHalf = $(element).find('span:contains("½")').length ? 0.5 : 0
const [reviewer, date, review] = [
'.bold.unstyled.articleLink',
'.fr.small.subtle',
'.user_review',
].map(classes =>
$(element)
.find(classes)
.text()
.trim()
)
reviews.push({
reviewer: reviewer,
date: date,
stars: stars + hasHalf,
review: review,
})
})
return reviews
}
/**
* Fetches audience reviews for the given parameters
*
* @param {String} slug Identifier for the movie or TV show
* @param {Number} reviewCount Number of requested reviews to be given
* @param {Boolean} isTV State whether given slug is a TV show or not
*/
const getAudienceReviews = async (slug, reviewCount, isTV = false) => {
let wantedAmountOfReviews = reviewCount
let maxPage = 1
let countLastPageReviews = 0
/**
* Retrieves the first page to check the total number of pages
*/
await getPage(slug, maxPage, isTV).then(response => {
const $ = Cheerio.load(response.data)
const paginatorText = $('.pageInfo').html()
maxPage = paginatorText.slice(
paginatorText.indexOf('of') + 3,
paginatorText.length
)
})
/**
* Retrieves the number of reviews on the last page
*/
await getPage(slug, maxPage, isTV).then(response => {
const $ = Cheerio.load(response.data)
countLastPageReviews = $('.review_table_row').length
})
const REVIEWS_PER_PAGE = 20
const countReviews = (maxPage - 1) * REVIEWS_PER_PAGE + countLastPageReviews
/**
* Sets the number of reviews to the total of reviews available
*/
if (wantedAmountOfReviews > countReviews) {
wantedAmountOfReviews = countReviews
}
return new Promise(resolve => {
const pageRequests = []
for (let i = 1; i <= maxPage; i++) {
pageRequests.push(getPage(slug, i, isTV))
}
resolve(pageRequests)
})
.then(pageRequests => Axios.all(pageRequests))
.catch(error => {
if (error.response.status == 404)
return Promise.reject({
status: 404,
message:
`⚠️ Page not found for '${slug}'. You can check the page manually by opening this link:\n` +
movieUrl(slug, page, isTV),
})
return Promise.reject({
message: `⚠️ An error occured, please try again.`,
})
})
.then(
Axios.spread((...requests) => {
const reviews = []
requests.forEach(request => {
reviews.push.apply(reviews, scrapePage(request.data))
})
return reviews.slice(0, wantedAmountOfReviews)
})
)
}
module.exports = { getAudienceReviews }