-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprobability.py
executable file
·124 lines (89 loc) · 2.94 KB
/
probability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from __future__ import division
from collections import Counter
#import models
import operator # for sorted function
import math
import retrievalmain as rm
from collections import OrderedDict
FINAL_WORDS_order_dic = OrderedDict()
FINAL_WORDS={}
def probability_first_word(word,probable_words,top_n=10):
total=0
for i in probable_words.values():
total+=(i)
# print total
try:
for k , v in probable_words.items():
if k.startswith(word):
v= int(v)
score=v/total
FINAL_WORDS.update({k:score})
return (sorted(FINAL_WORDS.items(),key=operator.itemgetter(1),reverse=True)[:top_n])
except Exception as e:
pass
def probability_second_word(first_word,second_word,pw,fwcount,top_n=10):
global FINAL_WORDS
FINAL_WORDS = {}
for p_word,count in pw.items():
score_num=count[0]
score_den=fwcount
score=score_num/score_den
FINAL_WORDS.update({p_word:score})
return Counter(FINAL_WORDS).most_common(top_n)
def probability_third_word(fc,sc,rft,rst,pw,top_n=5):
global FINAL_WORDS
FINAL_WORDS = {}
total=0
for i in pw.values():
total+=(i)
#print total
total_rst=0
for i in rst.values():
total_rst+=(i[0])
#print total_rst
total_rft=0
for i in rft.values():
total_rft+=(i[0])
#print total_rft
for p_word, count in pw.items():
score = count/total
# print score
if p_word in rft.keys():
score_rft = rft[p_word][0]+rft[p_word][1]/total_rft # adding score and selfcount and then dividing by total rel will
# print score_rft # solve the problem of conflict of nodes having same score
score = score +score_rft # i.e the probability of occurence of word after first word
if p_word in rst.keys():
score_rst = rst[p_word][0]+rst[p_word][1]/total_rst
# print score_rst
score = score+score_rst
FINAL_WORDS.update({p_word:(score)})
'''
boundry condition when nothing is found
'''
if len(FINAL_WORDS)<6:
FINAL_WORDS_order_dic.update(Counter(FINAL_WORDS).most_common(top_n))
# FINAL_WORDS={}
for p_word ,count_li in rst.items():
if p_word not in FINAL_WORDS_order_dic.keys():
score = count_li[0]+count_li[1]/(total_rst)
FINAL_WORDS.update({p_word:(score)})
FINAL_WORDS_order_dic.update(Counter(FINAL_WORDS).most_common(top_n))
if len(FINAL_WORDS_order_dic)<6:
for p_word ,count_li in rft.items():
if p_word not in FINAL_WORDS_order_dic.keys():
score = count_li[0]+count_li[1]/(total_rft)
FINAL_WORDS.update({p_word:(score)})
FINAL_WORDS_order_dic.update(Counter(FINAL_WORDS).most_common(top_n))
return Counter(FINAL_WORDS).most_common(top_n)
def probability_fourth_word(pw,top_n=10):
# print pw
global FINAL_WORDS
FINAL_WORDS = {}
total=0
for trav in pw.values():
total+=trav
# print total
for p_word,count in pw.items():
score=count/total
FINAL_WORDS.update({p_word:score})
return Counter(FINAL_WORDS).most_common(top_n)