forked from dholendar-27/aw-client
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsuggest_categories.py
89 lines (70 loc) · 2.54 KB
/
suggest_categories.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""
Lists the most common words among uncategorized events, by duration, to help in creating categories.
This might make more sense as a notebook.
"""
from collections import Counter
from datetime import datetime, timedelta, timezone
from tabulate import tabulate
from typing import Dict, List, Tuple, Any
from aw_core import Event
import aw_client
from aw_client import queries
# set up client
awc = aw_client.ActivityWatchClient("test")
def example_categories():
# TODO: Use tools in aw-research to load categories from toml file
return [
(
("Work", "ActivityWatch"),
{"type": "regex", "regex": "aw-|activitywatch", "ignore_case": True},
),
]
def get_events(categories=List[Tuple[Tuple[str], Dict[str, Any]]]):
"""
Retrieves AFK-filtered events, only returns events which are Uncategorized.
"""
start = datetime(2022, 1, 1, tzinfo=timezone.utc)
now = datetime.now(tz=timezone.utc)
timeperiods = [(start, now)]
canonicalQuery = queries.canonicalEvents(
queries.DesktopQueryParams(
bid_window="aw-watcher-window_",
bid_afk="aw-watcher-afk_",
classes=categories,
)
)
res = awc.query(
f"""
{canonicalQuery}
events = filter_keyvals(events, "$category", [["Uncategorized"]]);
duration = sum_durations(events);
RETURN = {{"events": events, "duration": duration}};
""",
timeperiods,
)
events = res[0]["events"]
print(f"Fetched {len(events)} events")
return [Event(**e) for e in events]
def events2words(events):
for e in events:
for v in e.data.values():
if isinstance(v, str):
for word in v.split():
if len(word) >= 3:
# normalize
word = word.lower()
yield (word, e.duration)
def main():
categories = example_categories()
events = get_events(categories)
# find most common words, by duration
corpus: Dict[str, timedelta] = Counter() # type: ignore
for word, duration in events2words(events):
if word not in corpus:
corpus[word] = timedelta(0)
corpus[word] += duration
# The top words are rarely useful for categorization, as they are usually browsers and other categories
# of activity which are too broad for it to make sense as a rule (except as a fallback).
print(tabulate(corpus.most_common(50), headers=["word", "duration"])) # type: ignore
if __name__ == "__main__":
main()