-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
157 lines (127 loc) · 4.75 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
from __future__ import annotations
import hashlib
import io
import os
import zipfile
import dotenv
from flask import flash
from flask import Flask
from flask import redirect
from flask import render_template
from flask import request
from flask import url_for
from jinja2 import Environment
from jinja2 import FileSystemLoader
from jinja2 import select_autoescape
from src.data import db
from src.data import etl
from src.data import s3
from src.utils import files
_ = dotenv.load_dotenv(dotenv.find_dotenv())
db_conn = db.db_init()
app = Flask(__name__)
app.secret_key = os.environ.get(
"APP_SECRET_KEY",
os.urandom(16).hex(),
)
def md5_filter(s, _=None):
return hashlib.md5(s.encode("utf-8")).hexdigest()
env = Environment(
loader=FileSystemLoader(searchpath="templates"),
autoescape=select_autoescape(["html", "xml"]),
)
env.filters["hash"] = md5_filter
app.jinja_env.filters["hash"] = md5_filter
@app.route("/", methods=["GET", "POST"])
def index():
file_hash = ""
filename = ""
if request.method == "POST":
if "chatFile" not in request.files:
flash("No file part")
return redirect(request.url)
file = request.files["chatFile"]
if file.filename == "":
flash("No selected file")
return redirect(request.url)
if not files.allowed_file(file.filename):
flash("Invalid file type")
return redirect(request.url)
if file.filename.endswith(".txt"):
file_hash = files.compute_hash(file)
filename = files.unique_filename_generator(file.filename, file, file_hash)
if file.filename.endswith(".zip"):
with zipfile.ZipFile(file) as z:
txt_files = [name for name in z.namelist() if name.endswith(".txt")]
if not txt_files:
flash("No .txt file found in the ZIP.")
return redirect(request.url)
filename = txt_files[0]
with z.open(filename) as file_in_zip:
file_content = file_in_zip.read()
file = io.BytesIO(file_content)
file_hash = files.compute_hash(file)
filename = files.unique_filename_generator(filename, file, file_hash)
if not s3.file_exists_in_s3(filename):
s3_filename = s3.upload_to_s3(file, filename)
if not s3_filename:
flash("Error occurred while uploading to S3.")
return redirect(request.url)
flash("File uploaded successfully.")
return redirect(
url_for("analyze", upload_filename=s3_filename, _external=True),
)
flash("File with same content already exists!")
return redirect(url_for("analyze", upload_filename=filename, _external=True))
return render_template("index.html")
@app.route("/analyze/<upload_filename>", methods=["GET", "POST"])
def analyze(upload_filename: str):
_, view_name = etl.etl_pipeline(upload_filename, db_conn=db_conn)
query = ""
keyword = ""
page = request.args.get("page", 1, type=int)
if request.method == "POST":
keyword = request.form.get("keyword", "").strip()
strict_search = request.form.get("strict_search") == "true"
if strict_search:
query = f"SELECT * FROM {view_name} WHERE message = '{keyword}'"
else:
query = f"SELECT * FROM {view_name} WHERE message LIKE '%{keyword}%'"
total_results = db_conn.sql(
f"""
SELECT COUNT(*)
FROM ({query}) AS foo""",
).fetchone()[0]
results = db_conn.sql(
f"""
SELECT *
FROM ({query}) AS foo
LIMIT 20 OFFSET {(page - 1) * 20}""",
).fetchall()
top_senders = db_conn.sql(
f"""
SELECT sender, COUNT(*) as count
FROM ({query}) AS foo
GROUP BY sender
ORDER BY count DESC
LIMIT 3""",
).fetchall()
else:
results = db_conn.sql(f"SELECT * FROM {view_name} LIMIT 10").fetchall()
top_senders = db_conn.sql(
f"SELECT sender, COUNT(*) as count FROM {view_name} GROUP BY sender ORDER BY count DESC LIMIT 3",
).fetchall()
total_results = 10
return render_template(
"analyze.html",
uploaded_filename=upload_filename,
results=results,
total_results=total_results,
query=query,
keyword=keyword,
page=page,
top_senders=top_senders,
)
if __name__ == "__main__":
port = int(os.environ.get("PORT", 5000))
app.run(host="0.0.0.0", port=port)