forked from erichensleyibm/NLC_product_classifier-demo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwelcome.py
executable file
·160 lines (134 loc) · 5.96 KB
/
welcome.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import requests
import _config
from flask import Flask, render_template, request
from watson_developer_cloud import NaturalLanguageClassifierV1
from flask_table import Table, Col
from lxml import html
app = Flask(__name__)
# The data set we want to use
DATA_SET = 'data/product_description_training.csv'
VCAP_SERVICES = os.getenv("VCAP_SERVICES")
if VCAP_SERVICES is not None:
# These will be automatically set if deployed to IBM Cloud
SERVICES = json.loads(VCAP_SERVICES)
NLC_USERNAME = SERVICES['natural_language_classifier'][0]['credentials']['username']
NLC_PASSWORD = SERVICES['natural_language_classifier'][0]['credentials']['username']
else:
try:
# Set these here for local development
NLC_USERNAME = _config.NLC_USERNAME
NLC_PASSWORD = _config.NLC_PASSWORD
except:
# handling for hardcoding credentials
NLC_USERNAME = ""
NLC_PASSWORD = ""
CLASSIFIER = None
@app.route('/')
def Welcome():
global CLASSIFIER
try:
global NLC_SERVICE
NLC_SERVICE = NaturalLanguageClassifierV1(
username=NLC_USERNAME,
password=NLC_PASSWORD
)
except:
NLC_SERVICE = False
if NLC_SERVICE:
# create classifier if it doesn't exist, format the json
CLASSIFIER = _create_classifier()
classifier_info = json.dumps(CLASSIFIER, indent=4)
# update the UI, but only the classifier info box
return render_template('index.html', classifier_info=classifier_info, icd_code="", icd_output="", classifier_output="")
else:
return render_template('index.html', classifier_info="Please add a _config.py file with your NLC credentials if running locally. " , icd_code="", icd_output="", classifier_output="")
@app.route('/classify_text', methods=['GET', 'POST'])
def classify_text():
# get the text from the UI
input_text = request.form['classifierinput_text']
# get info about the classifier
classifier_info = json.dumps(CLASSIFIER, indent=4)
#check if text is valid
if input_text != '':
#send the text to the classifier, get back a product classification
classifier_output = NLC_SERVICE.classify(CLASSIFIER['classifier_id'], input_text)
#send results to table formatter
all_results = ResultsTable(classifier_output['classes'])
return render_template('index.html', classifier_info=classifier_info, classifier_input = input_text, all_results = all_results)
else:
return render_template('index.html', classifier_info=classifier_info, classifier_input = 'No description provided.', all_results = '')
@app.route('/classify_url', methods=['GET', 'POST'])
def classify_url():
# get info about the classifier
classifier_info = json.dumps(CLASSIFIER, indent=4)
# get the text from the UI
input_url = request.form['classifierinput_url']
# send url to parser
input_text = _get_Kohls_url_info(input_url)
# check for valid product description
if input_text:
# send the text to the classifier, get back an ICD code
classifier_output = NLC_SERVICE.classify(CLASSIFIER['classifier_id'], input_text)
# send results to table formatter
all_results = ResultsTable(classifier_output['classes'])
# fill in the text boxes
return render_template('index.html', classifier_info=classifier_info, classifier_input = input_text, all_results = all_results)
else:
return render_template('index.html', classifier_info=classifier_info, classifier_input = 'Invalid Url. Please provide a product page from Kohls.com, or manually add the product description above.', all_results = '')
class ResultsTable(Table):
# set class id and table values
table_id = 'classes'
class_name = Col('Class')
confidence = Col('Confidence')
def _create_classifier():
# fetch all classifiers associated with the NLC instance
result = NLC_SERVICE.list_classifiers()
# for the purposes of this demo, we handle only one classifier
# return the first one found
if len(result['classifiers']) > 0:
return result['classifiers'][0]
else:
# if none found, create a new classifier, change this value
with open(DATA_SET, 'rb') as training_data:
metadata = '{"name": "Product_description_classifier", "language": "en"}'
classifier = NLC_SERVICE.create_classifier(
metadata=metadata,
training_data=training_data
)
return classifier
def _get_Kohls_url_info(url):
# parse passed url
# check if valid product description
if url[8:34] == 'www.kohls.com/product/prd-':
# extract product_id
prd_id = url.split('/prd-')[1].split('/')[0]
raw_desc = []
# loop to handle missed connections
while raw_desc == []:
# retrieve page info
pageContent=requests.get(url)
# convert to html
tree = html.fromstring(pageContent.content)
# parse html using xpath
raw_desc = tree.xpath('//*[@id="%s_productDetails"]/div/descendant::*/text()' % (prd_id))
# extract product description
desc = ' '.join([i for i in raw_desc if i not in ['PRODUCT FEATURES', '\r']])
return desc
else:
return False
port = os.getenv('PORT', '5000')
if __name__ == "__main__":
app.run(host='0.0.0.0', port=int(port))