-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlogs_analysis.py
executable file
·99 lines (87 loc) · 2.57 KB
/
logs_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python3
"""Logs Analysis for extracting key facts out of a news site's database."""
import psycopg2
DB = 'news'
def most_read_articles(cur):
"""Report the most popular articles of all times."""
print('\n', 'Most read articles:')
cur.execute("""
SELECT articles.title, COUNT(log.path) as num
FROM log, articles
WHERE log.path = '/article/' || articles.slug
GROUP BY articles.title
ORDER BY num DESC
LIMIT 3
""")
output = cur.fetchall()
for title, views in output:
yield '"{}" \u2014 {} views'.format(title, views)
def most_read_authors(cur):
"""Report the most popular author of all times."""
print('\n', 'Most read authors:')
cur.execute("""
SELECT authors.name, COUNT(log.path) as num
FROM log, articles, authors
WHERE log.path = '/article/' || articles.slug
AND articles.author = authors.id
GROUP BY authors.name
ORDER BY num DESC
LIMIT 4
""")
output = cur.fetchall()
for author, views in output:
yield '{} \u2014 {} views'.format(author, views)
def days_with_most_errors(cur):
"""Report days with more than 1% of requests resulting in errors."""
print('\n', 'Errors:')
cur.execute("""
WITH result AS (
WITH date_and_status AS (
SELECT
log.time::date AS day,
log.status as status
FROM
log
)
SELECT
day,
((COUNT(status) FILTER (WHERE status != '200 OK'))::float / (COUNT(status))::float) * 100 AS fail_rate
FROM
date_and_status
GROUP BY
day
)
SELECT
TO_CHAR(day, 'FMMonth dd, yyyy'),
fail_rate
FROM
result
WHERE
fail_rate >= 1
ORDER BY
fail_rate DESC
LIMIT
10
""")
output = cur.fetchall()
for date, fail_rate in output:
yield '{} \u2014 {}% errors'.format(date, round(float(fail_rate), 2))
def main():
"""Query a database DB and print the result."""
conn = psycopg2.connect(database=DB)
cur = conn.cursor()
# Get generator for most read articles and print them out
art_gen = most_read_articles(cur)
for article in art_gen:
print(article)
# And for most read author
auth_gen = most_read_authors(cur)
for author in auth_gen:
print(author)
# And for days with more than 1% errors
err_gen = days_with_most_errors(cur)
for error in err_gen:
print(error)
conn.close()
if __name__ == "__main__":
main()