Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ChainLogger, PyTorchLogger and LookupLogger #19

Merged
merged 19 commits into from
Jan 3, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ logger2 = {"@loggers": "spacy.LookupLogger.v1", "substring": "pytorch"}
This logger can be used to daisy-chain multiple loggers and execute them in-order. Loggers that are executed earlier in the chain
can pass information to those that come later by adding it to the dictionary that is passed to them.

Currently, upto 10 loggers can be chained together.
Currently, up to 10 loggers can be chained together.

### Example config

Expand Down Expand Up @@ -289,7 +289,7 @@ logger2 = {"@loggers": "spacy.ConsoleLogger.v1", "progress_bar": "true"}

### Usage

This logger can be used lookup statistics in the info dictionary and print them to `stdout`. It is primarily
This logger can be used to lookup statistics in the info dictionary and print them to `stdout`. It is primarily
intended to be used as a tool when developing new loggers.

### Example config
Expand All @@ -298,9 +298,9 @@ intended to be used as a tool when developing new loggers.
[training.logger]
@loggers = "spacy.ChainLogger.v1"
logger1 = {"@loggers": "spacy.PyTorchLogger.v1", "prefix": "pytorch"}
logger2 = {"@loggers": "spacy.LookupLogger.v1", "substring": "pytorch"}
logger2 = {"@loggers": "spacy.LookupLogger.v1", "patterns": ["^(p|P)ytorch"]}
shadeMe marked this conversation as resolved.
Show resolved Hide resolved
```

| Name | Type | Description |
| ----------- | ----- | ------------------------------------------------------------------------- |
| `substring` | `str` | If a statistic's name contains this string, it's printed out to `stdout`. |
| Name | Type | Description |
| ---------- | ----------- | ---------------------------------------------------------------------------------------------------- |
| `patterns` | `List[str]` | A list of regular expressions. If a statistic's name matches one of these, it's printed to `stdout`. |
12 changes: 8 additions & 4 deletions spacy_loggers/lookup.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
"""
A utility logger that looks up specific statistics and prints them to stdout.
"""
from typing import Dict, Any, Optional, IO
from typing import Dict, Any, Optional, IO, List
import sys

from spacy import Language
from .util import dict_to_dot, LoggerT
from .util import dict_to_dot, LoggerT, setup_custom_stats_matcher


def lookup_logger_v1(substring: str) -> LoggerT:
def lookup_logger_v1(patterns: List[str]) -> LoggerT:
def setup_logger(nlp: Language, stdout: IO = sys.stdout, stderr: IO = sys.stderr):
if len(patterns) == 0:
raise ValueError("Lookup logger has no patterns")
shadeMe marked this conversation as resolved.
Show resolved Hide resolved
match_stat = setup_custom_stats_matcher(patterns)

def log_step(info: Optional[Dict[str, Any]]):
if info is None:
return
config_dot = dict_to_dot(info)
for k, v in config_dot.items():
if substring in k:
if match_stat(k):
stdout.writelines([k, " -> ", str(v), "\n"])

def finalize():
Expand Down
47 changes: 47 additions & 0 deletions spacy_loggers/tests/test_chain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pytest

from .util import load_logger_from_config


valid_config_string = """
[nlp]
lang = "en"
pipeline = ["tok2vec"]

[components]

[components.tok2vec]
factory = "tok2vec"

[training]

[training.logger]
@loggers = "spacy.ChainLogger.v1"
logger1 = {"@loggers": "spacy.ConsoleLogger.v1", "progress_bar": "true"}
logger9 = {"@loggers": "spacy.LookupLogger.v1", "patterns": ["test"]}
"""

invalid_config_string = """
[nlp]
lang = "en"
pipeline = ["tok2vec"]

[components]

[components.tok2vec]
factory = "tok2vec"

[training]

[training.logger]
@loggers = "spacy.ChainLogger.v1"
"""


def test_load_from_config():
valid_logger, nlp = load_logger_from_config(valid_config_string)
_, _ = valid_logger(nlp)

with pytest.raises(ValueError, match="No loggers"):
invalid_logger, nlp = load_logger_from_config(invalid_config_string)
_, _ = invalid_logger(nlp)
shadeMe marked this conversation as resolved.
Show resolved Hide resolved
91 changes: 91 additions & 0 deletions spacy_loggers/tests/test_lookup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import pytest
import re

from spacy_loggers.util import setup_custom_stats_matcher
from .util import load_logger_from_config


valid_config_string = """
[nlp]
lang = "en"
pipeline = ["tok2vec"]

[components]

[components.tok2vec]
factory = "tok2vec"

[training]

[training.logger]
@loggers = "spacy.LookupLogger.v1"
patterns = ["^(p|P)ytorch", "zeppelin" ]
svlandeg marked this conversation as resolved.
Show resolved Hide resolved
"""

invalid_config_string_empty = """
[nlp]
lang = "en"
pipeline = ["tok2vec"]

[components]

[components.tok2vec]
factory = "tok2vec"

[training]

[training.logger]
@loggers = "spacy.LookupLogger.v1"
patterns = []
"""

invalid_config_string_incorrect_pattern = """
[nlp]
lang = "en"
pipeline = ["tok2vec"]

[components]

[components.tok2vec]
factory = "tok2vec"

[training]

[training.logger]
@loggers = "spacy.LookupLogger.v1"
patterns = [")"]
"""


def test_load_from_config():
valid_logger, nlp = load_logger_from_config(valid_config_string)
_, _ = valid_logger(nlp)

with pytest.raises(ValueError, match="no patterns"):
invalid_logger, nlp = load_logger_from_config(invalid_config_string_empty)
_, _ = invalid_logger(nlp)

with pytest.raises(ValueError, match="couldn't be compiled"):
invalid_logger, nlp = load_logger_from_config(
invalid_config_string_incorrect_pattern
)
_, _ = invalid_logger(nlp)


def test_custom_stats_matcher():
patterns = ["^(p|P)ytorch", "zeppelin$"]
inputs = [
"no match",
"torch",
"pYtorch",
"pytorch",
"Pytorch 1.13",
"led zeppelin",
]
outputs = [False, False, False, True, True, True]

matcher = setup_custom_stats_matcher(patterns)
assert [matcher(x) for x in inputs] == outputs

with pytest.raises(ValueError, match="couldn't be compiled"):
_ = setup_custom_stats_matcher([")"])
20 changes: 20 additions & 0 deletions spacy_loggers/tests/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from typing import Tuple

from spacy import Language
from spacy.util import (
load_model_from_config,
registry,
load_config_from_str,
)
from spacy.schemas import ConfigSchemaTraining

from spacy_loggers.util import LoggerT


def load_logger_from_config(config_str: str) -> Tuple[LoggerT, Language]:
config = load_config_from_str(config_str)
nlp = load_model_from_config(config, auto_fill=True)
T = registry.resolve(
nlp.config.interpolate()["training"], schema=ConfigSchemaTraining
)
return T["logger"], nlp
34 changes: 34 additions & 0 deletions spacy_loggers/util.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""
Configuration utilities copied from spacy.util.
"""
import sys
from typing import Dict, Any, Tuple, Callable, Iterator, List, Optional, IO
import re

from spacy import Language
from spacy.util import registry


LoggerT = Callable[
Expand Down Expand Up @@ -49,3 +52,34 @@ def dict_to_dot(obj: Dict[str, dict]) -> Dict[str, Any]:
RETURNS (Dict[str, Any]): The key/value pairs.
"""
return {".".join(key): value for key, value in walk_dict(obj)}


def setup_custom_stats_matcher(
svlandeg marked this conversation as resolved.
Show resolved Hide resolved
regexps: Optional[List[str]] = None,
) -> Callable[[str], bool]:
try:
compiled = []
if compiled is not None:
shadeMe marked this conversation as resolved.
Show resolved Hide resolved
for regex in regexps:
compiled.append(re.compile(regex, flags=re.MULTILINE))
except re.error as err:
raise ValueError(
f"Regular expression `{regex}` couldn't be compiled for logger stats matcher"
) from err

def is_match(string: str) -> bool:
for regex in compiled:
if regex.search(string):
return True
return False

return is_match


def setup_default_console_logger(
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
) -> Tuple[Callable, Callable]:
console_logger = registry.get("loggers", "spacy.ConsoleLogger.v1")
console = console_logger(progress_bar=False)
console_log_step, console_finalize = console(nlp, stdout, stderr)
return console_log_step, console_finalize