Skip to content

Commit

Permalink
Merge pull request #241 from 0dm/scrubbing-changes
Browse files Browse the repository at this point in the history
Scrubbing changes
  • Loading branch information
abrichr authored Jun 13, 2023
2 parents 7ce0479 + 76b1396 commit 180a774
Show file tree
Hide file tree
Showing 6 changed files with 215 additions and 207 deletions.
118 changes: 67 additions & 51 deletions openadapt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,58 @@
"RECORD_READ_ACTIVE_ELEMENT_STATE": False,
# TODO: remove?
"REPLAY_STRIP_ELEMENT_STATE": True,
# IGNORES WARNINGS (PICKLING, ETC.)
"IGNORE_WARNINGS": False,
# ACTION EVENT CONFIGURATIONS
"ACTION_TEXT_SEP": "-",
"ACTION_TEXT_NAME_PREFIX": "<",
"ACTION_TEXT_NAME_SUFFIX": ">"
"ACTION_TEXT_NAME_SUFFIX": ">",
# SCRUBBING CONFIGURATIONS
"SCRUB_ENABLED": True,
"SCRUB_CHAR": "*",
"SCRUB_LANGUAGE": "en",
# TODO support lists in getenv_fallback
"SCRUB_FILL_COLOR": (255, 0, 0),
"SCRUB_CONFIG_TRF": {
"nlp_engine_name": "spacy",
"models": [{"lang_code": "en", "model_name": "en_core_web_trf"}],
},
"SCRUB_IGNORE_ENTITIES": [
# 'US_PASSPORT',
# 'US_DRIVER_LICENSE',
# 'CRYPTO',
# 'UK_NHS',
# 'PERSON',
# 'CREDIT_CARD',
# 'US_BANK_NUMBER',
# 'PHONE_NUMBER',
# 'US_ITIN',
# 'AU_ABN',
"DATE_TIME",
# 'NRP',
# 'SG_NRIC_FIN',
# 'AU_ACN',
# 'IP_ADDRESS',
# 'EMAIL_ADDRESS',
"URL",
# 'IBAN_CODE',
# 'AU_TFN',
# 'LOCATION',
# 'AU_MEDICARE',
# 'US_SSN',
# 'MEDICAL_LICENSE'
],
"SCRUB_KEYS_HTML": [
"text",
"canonical_text",
"title",
"state",
"task_description",
"key_char",
"canonical_key_char",
"key_vk",
"children",
],
}


Expand Down Expand Up @@ -61,53 +109,21 @@ def getenv_fallback(var_name):
logger.info(f"{key}={val}")


# SCRUBBING CONFIGURATIONS
SCRUB_ENABLED = True
SCRUB_CHAR = "*"
SCRUB_LANGUAGE = "en"
SCRUB_CONFIG_TRF = {
"nlp_engine_name": "spacy",
"models": [
{
"lang_code": "en",
"model_name": "en_core_web_trf"
}
],
}
DEFAULT_SCRUB_FILL_COLOR = (255,0,0)
SCRUB_IGNORE_ENTITIES = [
# 'US_PASSPORT',
# 'US_DRIVER_LICENSE',
# 'CRYPTO',
# 'UK_NHS',
# 'PERSON',
# 'CREDIT_CARD',
# 'US_BANK_NUMBER',
# 'PHONE_NUMBER',
# 'US_ITIN',
# 'AU_ABN',
"DATE_TIME",
# 'NRP',
# 'SG_NRIC_FIN',
# 'AU_ACN',
# 'IP_ADDRESS',
# 'EMAIL_ADDRESS',
"URL",
# 'IBAN_CODE',
# 'AU_TFN',
# 'LOCATION',
# 'AU_MEDICARE',
# 'US_SSN',
# 'MEDICAL_LICENSE'
]
SCRUB_KEYS_HTML = [
"text",
"canonical_text",
"title",
"state",
"task_description",
"key_char",
"canonical_key_char",
"key_vk",
"children",
]
def filter_log_messages(data):
"""
This function filters log messages by ignoring any message that contains a specific string.
Args:
data: The input parameter "data" is expected to be data from a loguru logger.
Returns:
a boolean value indicating whether the message in the input data should be ignored or not. If the
message contains any of the messages in the `messages_to_ignore` list, the function returns `False`
indicating that the message should be ignored. Otherwise, it returns `True` indicating that the
message should not be ignored.
"""
# TODO: ultimately, we want to fix the underlying issues, but for now, we can ignore these messages
messages_to_ignore = [
"Cannot pickle Objective-C objects",
]
return not any(msg in data["message"] for msg in messages_to_ignore)
62 changes: 27 additions & 35 deletions openadapt/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import fire
import mss.tools

from openadapt import config, crud, scrub, utils, window
from openadapt import config, crud, utils, window


EVENT_TYPES = ("screen", "action", "window")
Expand Down Expand Up @@ -242,7 +242,6 @@ def on_move(
y: int,
injected: bool,
) -> None:

logger.debug(f"{x=} {y=} {injected=}")
if not injected:
trigger_action_event(
Expand All @@ -251,7 +250,7 @@ def on_move(
"name": "move",
"mouse_x": x,
"mouse_y": y,
}
},
)


Expand All @@ -273,7 +272,7 @@ def on_click(
"mouse_y": y,
"mouse_button_name": button.name,
"mouse_pressed": pressed,
}
},
)


Expand All @@ -295,7 +294,7 @@ def on_scroll(
"mouse_y": y,
"mouse_dx": dx,
"mouse_dy": dy,
}
},
)


Expand All @@ -311,23 +310,15 @@ def handle_key(
"vk",
]
attrs = {
f"key_{attr_name}": getattr(key, attr_name, None)
for attr_name in attr_names
f"key_{attr_name}": getattr(key, attr_name, None) for attr_name in attr_names
}
logger.debug(f"{attrs=}")
canonical_attrs = {
f"canonical_key_{attr_name}": getattr(canonical_key, attr_name, None)
for attr_name in attr_names
}
logger.debug(f"{canonical_attrs=}")
trigger_action_event(
event_q,
{
"name": event_name,
**attrs,
**canonical_attrs
}
)
trigger_action_event(event_q, {"name": event_name, **attrs, **canonical_attrs})


def read_screen_events(
Expand Down Expand Up @@ -358,8 +349,8 @@ def read_screen_events(

def read_window_events(
event_q: queue.Queue,
terminate_event: multiprocessing.Event,
recording_timestamp: float,
terminate_event: multiprocessing.Event,
recording_timestamp: float,
) -> None:
"""
Read window events and add them to the event queue.
Expand All @@ -378,10 +369,9 @@ def read_window_events(
window_data = window.get_active_window_data()
if not window_data:
continue
if (
window_data["title"] != prev_window_data.get("title") or
window_data["window_id"] != prev_window_data.get("window_id")
):
if window_data["title"] != prev_window_data.get("title") or window_data[
"window_id"
] != prev_window_data.get("window_id"):
# TODO: fix exception sometimes triggered by the next line on win32:
# File "\Python39\lib\threading.py" line 917, in run
# File "...\openadapt\record.py", line 277, in read window events
Expand All @@ -394,15 +384,17 @@ def read_window_events(
logger.info(f"{_window_data=}")
if window_data != prev_window_data:
logger.debug("queuing window event for writing")
event_q.put(Event(
utils.get_timestamp(),
"window",
window_data,
))
event_q.put(
Event(
utils.get_timestamp(),
"window",
window_data,
)
)
prev_window_data = window_data


def performance_stats_writer (
def performance_stats_writer(
perf_q: multiprocessing.Queue,
recording_timestamp: float,
terminate_event: multiprocessing.Event,
Expand All @@ -428,7 +420,10 @@ def performance_stats_writer (
continue

crud.insert_perf_stat(
recording_timestamp, event_type, start_time, end_time,
recording_timestamp,
event_type,
start_time,
end_time,
)
logger.info("performance stats writer done")

Expand Down Expand Up @@ -468,25 +463,21 @@ def create_recording(

def read_keyboard_events(
event_q: queue.Queue,
terminate_event: multiprocessing.Event,
recording_timestamp: float,
terminate_event: multiprocessing.Event,
recording_timestamp: float,
) -> None:


def on_press(event_q, key, injected):
canonical_key = keyboard_listener.canonical(key)
logger.debug(f"{key=} {injected=} {canonical_key=}")
if not injected:
handle_key(event_q, "press", key, canonical_key)


def on_release(event_q, key, injected):
canonical_key = keyboard_listener.canonical(key)
logger.debug(f"{key=} {injected=} {canonical_key=}")
if not injected:
handle_key(event_q, "release", key, canonical_key)


utils.set_start_time(recording_timestamp)
keyboard_listener = keyboard.Listener(
on_press=partial(on_press, event_q),
Expand Down Expand Up @@ -524,7 +515,7 @@ def record(
"""

utils.configure_logging(logger, LOG_LEVEL)
logger.info(f"{scrub.scrub_text(task_description)=}")
logger.info(f"{task_description=}")

recording = create_recording(task_description)
recording_timestamp = recording.timestamp
Expand Down Expand Up @@ -650,5 +641,6 @@ def record(

logger.info(f"saved {recording_timestamp=}")


if __name__ == "__main__":
fire.Fire(record)
Loading

0 comments on commit 180a774

Please sign in to comment.