-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
58 lines (45 loc) · 1.75 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import spacy
from flask import Flask, request, jsonify
from spacy.lang.char_classes import ALPHA, ALPHA_LOWER, ALPHA_UPPER
from spacy.lang.char_classes import CONCAT_QUOTES, LIST_ELLIPSES, LIST_ICONS
from spacy.util import compile_infix_regex
app = Flask(__name__)
nlp = spacy.load("en_coreference_web_trf")
#doc = nlp("""jhon bought a new fast 4-wheel car. He daily drives this car but the car is disturbing him.""")
# Modify tokenizer infix patterns
infixes = (
LIST_ELLIPSES
+ LIST_ICONS
+ [
r"(?<=[0-9])[+\\-\\*^](?=[0-9-])",
r"(?<=[{al}{q}])\\.(?=[{au}{q}])".format(
al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES
),
r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
# ✅ Commented out regex that splits on hyphens between letters:
# r"(?<=[{a}])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS),
r"(?<=[{a}0-9])[:<>=/](?=[{a}])".format(a=ALPHA),
]
)
infix_re = compile_infix_regex(infixes)
nlp.tokenizer.infix_finditer = infix_re.finditer
@app.route('/coreference_resolution', methods=['POST'])
def coreference_resolution():
# Get the input text from the request
input_text = request.json.get('document', '')
# Process the input text with spaCy
doc = nlp(input_text)
# Extract coreference clusters
clusters = [
val for key, val in doc.spans.items() if key.startswith("coref_cluster")
]
# Build the result object
result_object = {"clusters": []}
for cluster in clusters:
cluster_indices = []
for span in cluster:
cluster_indices.append([span.start, span.end])
result_object["clusters"].append(cluster_indices)
return jsonify(result_object)
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)