-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbot.py
118 lines (93 loc) · 3.56 KB
/
bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os
from numpy import array
from pickle import load as pickle_load
os.environ[
'TF_CPP_MIN_LOG_LEVEL'] = '1'
import tensorflow as tf
tf.get_logger().setLevel(
'ERROR') # or any {'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL', 'ALL'}
from json import load as json_load
from tensorflow import keras
from keras.models import load_model
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from cppyy import gbl, include
include("/static/include/cpplib.hpp")
random_choice = gbl.random_choice
def ensure_nltk_data_downloaded():
from nltk import download, data
try:
data.find('tokenizers/punkt')
except LookupError:
download('punkt')
try:
data.find('corpora/wordnet')
except LookupError:
download('wordnet')
ensure_nltk_data_downloaded()
lemmatizer = WordNetLemmatizer()
with open("intents.json") as file:
data = json_load(file)
words = pickle_load(open('words.pkl', 'rb'))
classes = pickle_load(open('classes.pkl', 'rb'))
model = load_model('model_v12.h5')
responses_if_no_intent = [
"Sorry, I don't understand.",
"I'm not sure what you mean.",
"I don't understand.",
"I'm not sure I understand.",
]
# lemmatize and lower each word and remove duplicates
def clean_up_sentence(sentence):
sentence_words = word_tokenize(
sentence) # tokenize the pattern - split words into array
sentence_words = [
lemmatizer.lemmatize(word.lower()) for word in
sentence_words # lemmatize - create base word, in attempt to represent related words
]
return sentence_words
# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words):
sentence_words = clean_up_sentence(sentence) # tokenize the pattern
bag = [0] * len(words) # initialize bag of words array
for s in sentence_words:
for i, w in enumerate(
words): # assign 1 if current word is in the vocabulary position
if w == s:
bag[i] = 1
return (array(bag)) # return bag of words array
# predict the class of the sentence using the model and return the probability of each class
def predict_class(sentence, model):
p = bow(sentence, words) # filter below a threshold (e.g. 0.25)
res = model.predict(array(
[p]))[0] # get the highest probability from the predictions
ERROR_THRESHOLD = 0.25
results = [
[i, r]
for i, r in enumerate(res)
if r > ERROR_THRESHOLD # sort by strength of probability (highest first)
] # filter out predictions below a threshold (e.g. 0.25)
results.sort(key=lambda x: x[1],
reverse=True) # sort by strength of probability (highest first)
return_list = [] # create a list of tuples (class, probability)
for r in results: # return tuple of intent and probability (e.g. ("greeting", 0.999))
return_list.append({
"intent": classes[r[0]],
"probability": str(r[1])
}) # return tuple of intent and probability (e.g. ("greeting", 0.999))
return return_list
# get a random response from the intents file for the tag that was predicted
def get_response(ints, intents_json):
if not ints: # if there is no matching tag
return random_choice(responses_if_no_intent)
tag = ints[0]["intent"] # get the tag of the first prediction
list_of_intents = intents_json["intents"] # find the matching intent tag
for i in list_of_intents:
if i["tag"] == tag: # set a random response from the intent
result = random_choice(i["responses"])
break
return result
def chatbot_response(msg):
ints = predict_class(msg, model)
res = get_response(ints, data)
return res