-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel.rb
81 lines (67 loc) · 3.13 KB
/
model.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# Usage example:
# ./query.rb https://www.reddit.com/r/personalfinance/comments/owhy0r/leverage_through_leaps_for_the_diy_investor/
require 'pry'
require 'scylla' # https://github.com/hashwin/scylla
require 'nlp_pure/segmenting/default_word' # https://github.com/parhamr/nlp-pure
require 'nlp_pure/segmenting/default_sentence'
require 'json'
module Model
extend self
ENDPOINT_ID="8391419966628298752"
PROJECT_ID="apt-memento-329311"
def get_estimate(post_data)
post = JSON.parse(post_data).first['data']["children"].first['data']
passed_days = ((Time.now.to_i - post["created_utc"]).to_f / (60*60*24).to_f).to_i
if post['archived'] || passed_days >= 180
{days: 0, expected_rating: post['score'].to_i, current_rating: post['score'].to_i}
else
target_days = 180 - passed_days
query_data = prepare_data(post, passed_days, target_days)
google_response = query_google(query_data)
expected_rating = post['score'].to_i + JSON.parse(google_response)['predictions'].first['value'].to_i
{will_be_archived_in_days: target_days, expected_rating: expected_rating, current_rating: post['score'].to_i}
end
end
private
def query_google(data)
`curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" \
https://europe-west4-aiplatform.googleapis.com/v1/projects/#{PROJECT_ID}/locations/europe-west4/endpoints/#{ENDPOINT_ID}:predict \
-d "#{data}"`
end
def get_passed_days(post)
(Time.now.to_i - post["created_utc"]) / (60*60*24)
end
def post_valid?
passed_days = (Time.now.to_i - post["created_utc"]) / (60*60*24)
post['archived'] || passed_days + target_days > 180
end
def prepare_data(post, passed_days, target_days)
{
'instances' => [{
'target_days' => target_days,
'passed_days' => passed_days,
'current_score' => post['score'],
'upvote_ratio' => post['upvote_ratio'],
'nsfw' => post['thumbnail'] == 'nsfw',
'spoiler' => post['spoiler'],
'over_18' => post['over_18'],
'distinguished' => !!post['distinguished'],
'gilded' => post['gilded'],
'num_comments' => post['num_comments'],
'media_only' => post['media_only'],
'any_media' => !!post['media'] || !!post['media_embed'].any? || !!post['secure_media'] || !!post['secure_media_embed'].any?,
'locked' => post['locked'],
'hide_score' => post['hide_score'],
'stickied' => post['stickied'],
'contest_mode' => post['contest_mode'],
'subscribers' => post['subreddit_subscribers'],
'is_english' => (post['title'] + ' ' + post['selftext']).language == 'english',
'title_symbols' => post['title'].gsub(/\s+/, "").size,
'title_words' => NlpPure::Segmenting::DefaultWord.parse(post['title']).size,
'body_symbols' => post['selftext'].gsub(/\s+/, "").size,
'body_words' => NlpPure::Segmenting::DefaultWord.parse(post['selftext']).size,
'body_sentences' => NlpPure::Segmenting::DefaultSentence.parse(post['selftext']).reject { |c| c.empty? }.size,
}.transform_values{|v| "\"#{v}\""}]
}.to_json
end
end