-
Notifications
You must be signed in to change notification settings - Fork 193
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cb58c75
commit 09880af
Showing
9 changed files
with
157 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
sudo curl -L --fail https://raw.githubusercontent.com/linuxserver/docker-docker-compose/master/run.sh -o /usr/local/bin/docker-compose | ||
$ sudo chmod +x /usr/local/bin/docker-compose |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from flask import Flask, jsonify, request | ||
from utilities import predict_pipeline | ||
|
||
app = Flask(__name__) | ||
|
||
|
||
@app.post('/predict') | ||
def predict(): | ||
data = request.json | ||
try: | ||
sample = data['text'] | ||
except KeyError: | ||
return jsonify({'error': 'No text sent'}) | ||
|
||
sample = [sample] | ||
predictions = predict_pipeline(sample) | ||
try: | ||
result = jsonify(predictions[0]) | ||
except TypeError as e: | ||
result = jsonify({'error': str(e)}) | ||
return result | ||
|
||
if __name__ == '__main__': | ||
app.run(host='0.0.0.0', debug=True) |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
flask>=2.0.0 | ||
scikit-learn==1.0.1 | ||
nltk==3.6.6 | ||
# "import nltk; nltk.download('omw-1.4'); nltk.download('wordnet')" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
import re | ||
import pickle | ||
|
||
# nltk | ||
from nltk.stem import WordNetLemmatizer | ||
|
||
|
||
lemmatizer = WordNetLemmatizer() | ||
# grouping together the inflected forms ("better" -> "good") | ||
|
||
|
||
with open('models/pipeline.pickle', 'rb') as f: | ||
loaded_pipe = pickle.load(f) | ||
|
||
|
||
def predict_pipeline(text): | ||
return predict(loaded_pipe, text) | ||
|
||
|
||
# Defining dictionary containing all emojis with their meanings. | ||
emojis = {':)': 'smile', ':-)': 'smile', ';d': 'wink', ':-E': 'vampire', ':(': 'sad', | ||
':-(': 'sad', ':-<': 'sad', ':P': 'raspberry', ':O': 'surprised', | ||
':-@': 'shocked', ':@': 'shocked',':-$': 'confused', ':\\': 'annoyed', | ||
':#': 'mute', ':X': 'mute', ':^)': 'smile', ':-&': 'confused', '$_$': 'greedy', | ||
'@@': 'eyeroll', ':-!': 'confused', ':-D': 'smile', ':-0': 'yell', 'O.o': 'confused', | ||
'<(-_-)>': 'robot', 'd[-_-]b': 'dj', ":'-)": 'sadsmile', ';)': 'wink', | ||
';-)': 'wink', 'O:-)': 'angel','O*-)': 'angel','(:-D': 'gossip', '=^.^=': 'cat'} | ||
|
||
## Defining set containing all stopwords in english. | ||
stopwords = ['a', 'about', 'above', 'after', 'again', 'ain', 'all', 'am', 'an', | ||
'and','any','are', 'as', 'at', 'be', 'because', 'been', 'before', | ||
'being', 'below', 'between','both', 'by', 'can', 'd', 'did', 'do', | ||
'does', 'doing', 'down', 'during', 'each','few', 'for', 'from', | ||
'further', 'had', 'has', 'have', 'having', 'he', 'her', 'here', | ||
'hers', 'herself', 'him', 'himself', 'his', 'how', 'i', 'if', 'in', | ||
'into','is', 'it', 'its', 'itself', 'just', 'll', 'm', 'ma', | ||
'me', 'more', 'most','my', 'myself', 'now', 'o', 'of', 'on', 'once', | ||
'only', 'or', 'other', 'our', 'ours','ourselves', 'out', 'own', 're', | ||
's', 'same', 'she', "shes", 'should', "shouldve",'so', 'some', 'such', | ||
't', 'than', 'that', "thatll", 'the', 'their', 'theirs', 'them', | ||
'themselves', 'then', 'there', 'these', 'they', 'this', 'those', | ||
'through', 'to', 'too','under', 'until', 'up', 've', 'very', 'was', | ||
'we', 'were', 'what', 'when', 'where','which','while', 'who', 'whom', | ||
'why', 'will', 'with', 'won', 'y', 'you', "youd","youll", "youre", | ||
"youve", 'your', 'yours', 'yourself', 'yourselves'] | ||
|
||
|
||
def preprocess(textdata): | ||
processed_texts = [] | ||
|
||
# Defining regex patterns. | ||
url_pattern = r"((http://)[^ ]*|(https://)[^ ]*|( www\.)[^ ]*)" | ||
user_pattern = '@[^\s]+' | ||
alpha_pattern = "[^a-zA-Z0-9]" | ||
sequence_pattern = r"(.)\1\1+" | ||
seq_replace_pattern = r"\1\1" | ||
|
||
for tweet in textdata: | ||
tweet = tweet.lower() | ||
|
||
# Replace all URls with 'URL' | ||
tweet = re.sub(url_pattern, ' URL', tweet) | ||
# Replace all emojis. | ||
for emoji in emojis.keys(): | ||
tweet = tweet.replace(emoji, "EMOJI" + emojis[emoji]) | ||
# Replace @USERNAME to 'USER'. | ||
tweet = re.sub(user_pattern, ' USER', tweet) | ||
# Replace all non alphabets. | ||
tweet = re.sub(alpha_pattern, " ", tweet) | ||
# Replace 3 or more consecutive letters by 2 letter. | ||
tweet = re.sub(sequence_pattern, seq_replace_pattern, tweet) | ||
|
||
preprocessed_words = [] | ||
for word in tweet.split(): | ||
# Check if the word is a stopword. | ||
if len(word) > 1 and word not in stopwords: | ||
# Lemmatizing the word. | ||
word = lemmatizer.lemmatize(word) | ||
preprocessed_words.append(word) | ||
|
||
processed_texts.append(' '.join(preprocessed_words)) | ||
|
||
return processed_texts | ||
|
||
|
||
def predict(model, text): | ||
# Predict the sentiment | ||
preprocessed_text = preprocess(text) | ||
predictions = model.predict(preprocessed_text) | ||
|
||
pred_to_label = {0: 'Negative', 1: 'Positive'} | ||
|
||
# Make a list of text with sentiment. | ||
data = [] | ||
for t, pred in zip(text, predictions): | ||
data.append({'text': t, 'pred': int(pred), 'label': pred_to_label[pred]}) | ||
|
||
return data | ||
|
||
|
||
if __name__=="__main__": | ||
# Text to classify should be in a list. | ||
text = ["I hate twitter", | ||
"May the Force be with you.", | ||
"Mr. Stark, I don't feel so good"] | ||
|
||
predictions = predict_pipeline(text) | ||
print(predictions) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
https://kubesphere.io/blogs/install-kubernetes-containerd-multus/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
https://microk8s.io/docs/getting-started |