-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
119 lines (93 loc) · 4.47 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import streamlit as st
import pickle
import string
import nltk
import pandas as pd
# Text Transformer
def transform(string_input):
# Lowercasing
string_input = string_input.lower()
# Tokenizing
string_input = nltk.word_tokenize(string_input)
# Removing special characters
import re
string_input = [re.sub('[^a-zA-Z0-9]+', '', _) for _ in string_input]
# Removing stopwords and stemming
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
stop_words = set(stopwords.words('english'))
# Applying stemming and removing stopwords
string_input = [ps.stem(i) for i in string_input if i not in stop_words]
# Joining the processed words
string_input = ' '.join(string_input)
return string_input
tfidf = pickle.load(open('vectorizer.pkl', 'rb'))
model = pickle.load(open('model_bnb.pkl', 'rb'))
def main():
# Initialize a session state variable that tracks the sidebar state (either 'expanded' or 'collapsed').
if 'sidebar_state' not in st.session_state:
st.session_state.sidebar_state = 'collapsed'
# Streamlit set_page_config method has a 'initial_sidebar_state' argument that controls sidebar state.
st.set_page_config(initial_sidebar_state=st.session_state.sidebar_state)
st.title("Spam Detector by Streaks V.2")
input_text = st.text_area("Enter the Message").strip()
if st.button('Predict'):
if not input_text:
st.error("Please Enter a Text")
else:
# 1. preprocess
transformed_input = transform(input_text)
# 2. vectorize
vector_input = tfidf.transform([transformed_input])
# 3. predict
result = model.predict(vector_input)[0]
# 4. Display
if result == 1:
st.header("Spam")
else:
st.header("Not Spam")
st.subheader('Not Correct?')
st.markdown('Help the model by contributing your data anonymously')
# Toggle sidebar state between 'expanded' and 'expanded'.
if st.button('Contribute!'):
st.session_state.sidebar_state = 'expanded' if st.session_state.sidebar_state == 'collapsed' else 'collapsed'
# Force an app rerun after switching the sidebar state.
st.experimental_rerun()
st.sidebar.subheader('Make Contribution Here')
contribute_text = st.sidebar.text_area("Enter the Message you want to contribute").strip()
contribute_label = st.sidebar.radio("Select the Label", ["0", "1"], index=None, captions=['Not Spam', 'Spam'])
if st.sidebar.button('Finish Contribution'):
if not contribute_text:
st.sidebar.error("Please Enter a Text")
else:
# Save the contribution to a CSV file
contribution_data = {'text': [contribute_text], 'target': [contribute_label]}
try:
contribution_df = pd.read_csv('contributions.csv')
contribution_df = pd.concat([contribution_df, pd.DataFrame(contribution_data)], ignore_index=True)
contribution_df.to_csv('contributions.csv', index=False)
st.success("Thanks for your contribution!")
except FileNotFoundError:
contribution_df = pd.DataFrame(contribution_data)
contribution_df.to_csv('contributions.csv', index=False)
st.success("Thanks for your contribution!")
# Read the contributions
contribution_df = pd.read_csv('contributions.csv')
if len(contribution_df) >= 2:
try:
# Transform and vectorize the contributions
contribution_df['text'] = contribution_df['text'].apply(transform)
X_contribution = tfidf.transform(contribution_df['text'])
y_contribution = contribution_df['target']
# Update the model with new data
model.partial_fit(X_contribution, y_contribution, classes=[0, 1])
# Clear the contributions DataFrame
contribution_df = pd.DataFrame(columns=['text', 'target'])
contribution_df.to_csv('contributions.csv', index=False)
st.success("Model Updated")
except FileNotFoundError:
st.error("No contributions found for retraining!")
st.warning('The App still fails to identify transactional messages correctly. Use the app with caution. More contributions would be really helpful in training the model on those types of messages.', icon="⚠️")
if __name__ == "__main__":
main()