Add files via upload

Dev228-afk · Aug 12, 2021 · 3e49fc7 · 3e49fc7
1 parent 02605fb
commit 3e49fc7
Show file tree

Hide file tree

Showing 17 changed files with 5,931 additions and 28 deletions.
diff --git a/Procfile b/Procfile
@@ -0,0 +1 @@
+web: gunicorn app:app
diff --git a/README.md b/README.md
@@ -1,28 +1,25 @@
-## Spam SMS Classification - Deployment
-![Kaggle](https://img.shields.io/badge/Dataset-Kaggle-blue.svg) ![Python 3.6](https://img.shields.io/badge/Python-3.6-brightgreen.svg) ![NLTK](https://img.shields.io/badge/Library-NLTK-orange.svg)
-
-• This repository consists of files required to deploy a ___Machine Learning Web App___ created with ___Flask___ on ___Heroku___ platform.
-
-• If you want to view the deployed model, click on the following link:<br />
-Deployed at: _https://spam-cheker.herokuapp.com/_
-
-•Here's Glimpse of Web-Page
-
-   **Home Page**:
-
-   ![homepage](readme_resources/1.jpg)
-
-   **Resul Page**:
-
-   ![resultpage](readme_resources/2.jpg)
-
-• Please do ⭐ the repository, if it helped you in anyway.
-
-
-_**----- Important Note -----**_<br />
-• If you encounter this webapp as shown in the picture given below, it is occuring just because **free dynos for this particular month provided by Heroku have been completely used.** _You can access the webpage on 1st of the next month._<br />
-• Sorry for the inconvenience.
-• Or You might haven't provided latest version of **Gunicorn**. to overcome this issue use have to provide **>=** as before entering version name(I personally got this error).
-
-
-![Heroku-Error](readme_resources/application-error-heroku.png)
+# Spam SMS Classification - Deployment
+![Kaggle](https://img.shields.io/badge/Dataset-Kaggle-blue.svg) ![Python 3.6](https://img.shields.io/badge/Python-3.6-brightgreen.svg) ![NLTK](https://img.shields.io/badge/Library-NLTK-orange.svg)
+
+• This repository consists of files required to deploy a ___Machine Learning Web App___ created with ___Flask___ on ___Heroku___ platform.
+
+• If you want to view the deployed model, click on the following link:<br />
+Deployed at: _https://spam-cheker.herokuapp.com/_
+
+•Here's Glimpse of Web-Page
+ **Home Page**:
+  ![homepage](readme_resources/1.jpg)
+
+ **Resul Page**:
+  ![resultpage](readme_resources/2.jpg)
+
+• Please do ⭐ the repository, if it helped you in anyway.
+
+
+_**----- Important Note -----**_<br />
+• If you encounter this webapp as shown in the picture given below, it is occuring just because **free dynos for this particular month provided by Heroku have been completely used.** _You can access the webpage on 1st of the next month._<br />
+• Sorry for the inconvenience.
+• Or You might haven't provided latest version of **Gunicorn**. to overcome this issue use have to provide **>=** as before entering version name(I personally got this error).
+
+
+![Heroku-Error](readme_resources/application-error-heroku.png)
diff --git a/Spam SMS Classifier - Deployment.py b/Spam SMS Classifier - Deployment.py
@@ -0,0 +1,66 @@
+# Importing essential libraries
+import pandas as pd
+import pickle
+
+# Loading the dataset
+df = pd.read_csv('Spam SMS Collection', sep='\t', names=['label', 'message'])
+
+# Importing essential libraries for performing Natural Language Processing on 'SMS Spam Collection' dataset
+import nltk
+import re
+nltk.download('stopwords')
+from nltk.corpus import stopwords
+from nltk.stem.porter import PorterStemmer
+
+# Cleaning the messages
+corpus = []
+ps = PorterStemmer()
+
+for i in range(0,df.shape[0]):
+
+  # Cleaning special character from the message
+  message = re.sub(pattern='[^a-zA-Z]', repl=' ', string=df.message[i])
+
+  # Converting the entire message into lower case
+  message = message.lower()
+
+  # Tokenizing the review by words
+  words = message.split()
+
+  # Removing the stop words
+  words = [word for word in words if word not in set(stopwords.words('english'))]
+
+  # Stemming the words
+  words = [ps.stem(word) for word in words]
+
+  # Joining the stemmed words
+  message = ' '.join(words)
+
+  # Building a corpus of messages
+  corpus.append(message)
+
+# Creating the Bag of Words model
+from sklearn.feature_extraction.text import CountVectorizer
+cv = CountVectorizer(max_features=2500)
+X = cv.fit_transform(corpus).toarray()
+
+# Extracting dependent variable from the dataset
+y = pd.get_dummies(df['label'])
+y = y.iloc[:, 1].values
+
+# Creating a pickle file for the CountVectorizer
+pickle.dump(cv, open('cv-transform.pkl', 'wb'))
+
+# Model Building
+
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)
+
+# Fitting Naive Bayes to the Training set
+from sklearn.naive_bayes import MultinomialNB
+classifier = MultinomialNB(alpha=0.3)
+classifier.fit(X_train, y_train)
+
+# Creating a pickle file for the Multinomial Naive Bayes model
+filename = 'spam-sms-mnb-model.pkl'
+pickle.dump(classifier, open(filename, 'wb'))