forked from edmundyan/yelp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHITS.py
71 lines (61 loc) · 1.59 KB
/
HITS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import networkx as nx
from networkx.algorithms import bipartite
from networkx.exception import NetworkXError
#local
import parse
import social
import bipartite
def hits_score(R,B):
user = set(n for n,d in R.nodes(data=True) if d['bipartite']==0)
business = set(R) - user
u = dict.fromkeys(user,0)
b = dict.fromkeys(business,0)
for c in b:
b[c] = B[c]
n = 0
while True: # make up to 90% businesses' score won't change by 5%
score_diff = 0
# Business to User
for i in u:
TotalDiff = 0
for j in R.neighbors(i):
TotalDiff += abs(R.get_edge_data(i,j)['stars']-b[j])
u[i] = 1.0/(1.0+TotalDiff)
s = 1.0/sum(u.values())
for i in u:
u[i]*=s
# User to Business
for i in b:
TotalWeight = 0.
newStar = 0.
for j in R.neighbors(i):
newStar += float(u[j]*(R.get_edge_data(j,i)['stars']))
TotalWeight += float(u[j])
if TotalWeight != 0:
newStar = newStar/float(TotalWeight)
if abs(b[i]-newStar)/float(b[i]) <= 0.001:
score_diff += 1
b[i] = newStar
# scale user credibility to a max of 5
max_val=max(u.values())
for i in u:
u[i]=u[i]/float(max_val) * 5
# Stopping Criteria
if score_diff / float(len(business)) >= 0.99:
print score_diff
print len(business)
break
n += 1
print n
return u,b
def main():
# load file
print "Loading Review file"
R = bipartite.loadBipartite()
print "Loading Business file"
B = bipartite.loadBusinesses()
# analysis
u,b = hits_score(R,B)
if __name__ == '__main__':
main()