Skip to content

Commit

Permalink
trying to fix the scrubbing configurations
Browse files Browse the repository at this point in the history
  • Loading branch information
Krish Patel committed May 30, 2023
1 parent ce67fd1 commit b4a3b60
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 24 deletions.
32 changes: 19 additions & 13 deletions openadapt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
from dotenv import load_dotenv
from loguru import logger
from presidio_analyzer import AnalyzerEngine
from presidio_analyzer.nlp_engine import NlpEngineProvider
from presidio_anonymizer import AnonymizerEngine
from presidio_image_redactor import ImageRedactorEngine, ImageAnalyzerEngine
from presidio_image_redactor import ImageRedactorEngine


_DEFAULTS = {
Expand Down Expand Up @@ -51,18 +50,25 @@ def getenv_fallback(var_name):

# SCRUBBING CONFIGURATIONS

SCRUB_CONFIG = {
"nlp_engine_name": "spacy",
"models": [{"lang_code": "en", "model_name": "en_core_web_trf"}],
}
SCRUB_PROVIDER = NlpEngineProvider(nlp_configuration=SCRUB_CONFIG)
NLP_ENGINE = SCRUB_PROVIDER.create_engine()
ANALYZER = AnalyzerEngine(
nlp_engine=NLP_ENGINE,
supported_languages=["en"]
)
# SCRUB_CONFIG_TRF = {
# "nlp_engine_name": "spacy",
# "models": [{"lang_code": "en", "model_name": "en_core_web_trf"}],
# }
# SCRUB_PROVIDER_TRF = NlpEngineProvider(nlp_configuration=SCRUB_CONFIG_TRF)
# NLP_ENGINE_TRF = SCRUB_PROVIDER_TRF.create_engine()
# SCRUB_CONFIG_LG = {
# "nlp_engine_name": "spacy",
# "models": [{"lang_code": "en", "model_name": "en_core_web_lg"}],
# }
# SCRUB_PROVIDER_LG = NlpEngineProvider(nlp_configuration=SCRUB_CONFIG_LG)
# NLP_ENGINE_LG = SCRUB_PROVIDER_LG.create_engine()
# ANALYZER = AnalyzerEngine(
# nlp_engine=NLP_ENGINE_TRF,
# supported_languages=["en"]
# )
ANALYZER = AnalyzerEngine()
ANONYMIZER = AnonymizerEngine()
IMAGE_REDACTOR = ImageRedactorEngine(ImageAnalyzerEngine(ANALYZER))
IMAGE_REDACTOR = ImageRedactorEngine()
SCRUB_IGNORE_ENTITIES = [
# 'US_PASSPORT',
# 'US_DRIVER_LICENSE',
Expand Down
9 changes: 1 addition & 8 deletions openadapt/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,13 +382,6 @@ def read_window_events(
window_data["title"] != prev_window_data.get("title") or
window_data["window_id"] != prev_window_data.get("window_id")
):
# TODO: fix exception sometimes triggered by the next line on win32:
# File "\Python39\lib\threading.py" line 917, in run
# File "...\openadapt\record.py", line 277, in read window events
# File "...\env\lib\site-packages\loguru\logger.py" line 1977, in info
# File "...\env\lib\site-packages\loguru\_logger.py", line 1964, in _log
# for handler in core.handlers.values):
# RuntimeError: dictionary changed size during iteration
_window_data = dict(window_data)
_window_data.pop("state")
logger.info(f"{_window_data=}")
Expand All @@ -399,7 +392,7 @@ def read_window_events(
"window",
window_data,
))
prev_window_data = window_data
prev_window_data = dict(window_data)


def performance_stats_writer (
Expand Down
1 change: 1 addition & 0 deletions openadapt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@


def configure_logging(logger, log_level):
return
log_level_override = os.getenv("LOG_LEVEL")
log_level = log_level_override or log_level
logger.remove()
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
alembic==1.8.1
ascii_magic==2.3.0
git+https://github.com/abrichr/atomacos.git; sys_platform == 'darwin'
atomacos @ git+https://github.com/abrichr/atomacos.git#egg=atomacos ; sys_platform == 'darwin'
bokeh==2.4.3
clipboard==0.0.4
deepdiff[optimize]==6.3.0
Expand Down Expand Up @@ -31,4 +31,4 @@ transformers==4.28.1
pytesseract==0.3.7
presidio_analyzer
presidio_anonymizer
presidio_image_redactor
presidio_image_redactor
2 changes: 1 addition & 1 deletion tests/openadapt/test_scrub.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def test_scrub_address() -> None:

assert (
scrub.scrub_text("My address is 123 Main St, Toronto, On, CAN.")
== "My address is 123 Main St, Toro***, On, ***."
== "My address is 123 Main St, *******, On, CAN."
)


Expand Down

0 comments on commit b4a3b60

Please sign in to comment.