Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better caching options #197

Merged
merged 1 commit into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions src/ua_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,10 @@
__all__ = [
"BasicResolver",
"CachingResolver",
"Clearing",
"Cache",
"DefaultedParseResult",
"Device",
"Domain",
"LRU",
"Locking",
"Matchers",
"OS",
"ParseResult",
Expand All @@ -46,7 +44,7 @@
from typing import Callable, Optional

from .basic import Resolver as BasicResolver
from .caching import CachingResolver, Clearing, Locking, LRU
from .caching import CachingResolver, S3Fifo as Cache
from .core import (
DefaultedParseResult,
Device,
Expand Down Expand Up @@ -77,7 +75,7 @@ def from_matchers(cls, m: Matchers, /) -> Parser:
return cls(
CachingResolver(
BasicResolver(m),
Locking(LRU(200)),
Cache(200),
)
)

Expand Down
86 changes: 55 additions & 31 deletions src/ua_parser/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import csv
import gc
import io
import itertools
import math
Expand All @@ -8,19 +9,29 @@
import sys
import threading
import time
from typing import Any, Callable, Iterable, List, Optional, Sequence, Tuple, Union
import tracemalloc
from typing import (
Any,
Callable,
Dict,
Iterable,
List,
Optional,
Sequence,
Tuple,
Union,
cast,
)

from . import (
BasicResolver,
CachingResolver,
Clearing,
Domain,
Locking,
LRU,
Matchers,
Parser,
PartialParseResult,
Resolver,
caching,
)
from .caching import Cache, Local
from .loaders import load_builtins, load_yaml
Expand All @@ -34,6 +45,17 @@
}


CACHES: Dict[str, Optional[Callable[[int], Cache]]] = {"none": None}
CACHES.update(
(cache.__name__.lower(), cache)
for cache in [
cast(Callable[[int], Cache], caching.Lru),
caching.S3Fifo,
caching.Sieve,
]
)


def get_rules(parsers: List[str], regexes: Optional[io.IOBase]) -> Matchers:
if regexes:
if not load_yaml:
Expand Down Expand Up @@ -156,18 +178,13 @@ def get_parser(
else:
sys.exit(f"unknown parser {parser!r}")

c: Callable[[int], Cache]
if cache == "none":
return Parser(r).parse
elif cache == "clearing":
c = Clearing
elif cache == "lru":
c = LRU
elif cache == "lru-threadsafe":
c = lambda size: Locking(LRU(size)) # noqa: E731
else:
if cache not in CACHES:
sys.exit(f"unknown cache algorithm {cache!r}")

c = CACHES.get(cache)
if c is None:
return Parser(r).parse

return Parser(CachingResolver(r, c(cachesize))).parse


Expand All @@ -182,14 +199,16 @@ def run(


def run_hitrates(args: argparse.Namespace) -> None:
def noop(ua: str, domains: Domain, /) -> PartialParseResult:
return PartialParseResult(
domains=domains,
string=ua,
user_agent=None,
os=None,
device=None,
)
r = PartialParseResult(
domains=Domain.ALL,
string="",
user_agent=None,
os=None,
device=None,
)

def noop(_ua: str, _domains: Domain, /) -> PartialParseResult:
return r

class Counter:
def __init__(self, parser: Resolver) -> None:
Expand All @@ -206,19 +225,25 @@ def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult:
print(total, "lines", uniques, "uniques")
print(f"ideal hit rate: {(total - uniques)/total:.0%}")
print()
caches: List[Callable[[int], Cache]] = [Clearing, LRU]
w = int(math.log10(max(args.cachesizes)) + 1)
tracemalloc.start()
for cache, cache_size in itertools.product(
caches,
filter(None, CACHES.values()),
args.cachesizes,
):
misses = Counter(noop)
gc.collect()
before = tracemalloc.take_snapshot()
parser = Parser(CachingResolver(misses, cache(cache_size)))
for line in lines:
parser.parse(line)

gc.collect()
after = tracemalloc.take_snapshot()
diff = sum(s.size_diff for s in after.compare_to(before, "filename"))
print(
f"{cache.__name__.lower()}({cache_size}): {(total - misses.count)/total:.0%} hit rate"
f"{cache.__name__.lower():8}({cache_size:{w}}): {(total - misses.count)/total*100:2.0f}% hit rate, {diff:9} bytes"
)
del misses, parser


CACHESIZE = 1000
Expand All @@ -242,9 +267,8 @@ def run_threaded(args: argparse.Namespace) -> None:
lines = list(args.file)
basic = BasicResolver(load_builtins())
resolvers: List[Tuple[str, Resolver]] = [
("clearing", CachingResolver(basic, Clearing(CACHESIZE))),
("locking-lru", CachingResolver(basic, Locking(LRU(CACHESIZE)))),
("local-lru", CachingResolver(basic, Local(lambda: LRU(CACHESIZE)))),
("locking-lru", CachingResolver(basic, caching.Lru(CACHESIZE))),
("local-lru", CachingResolver(basic, Local(lambda: caching.Lru(CACHESIZE)))),
("re2", Re2Resolver(load_builtins())),
]
for name, resolver in resolvers:
Expand Down Expand Up @@ -367,8 +391,8 @@ def __call__(
bench.add_argument(
"--caches",
nargs="+",
choices=["none", "clearing", "lru", "lru-threadsafe"],
default=["none", "clearing", "lru", "lru-threadsafe"],
choices=list(CACHES),
default=list(CACHES),
help="""Cache implementations to test. `clearing` completely
clears the cache when full, `lru` uses a least-recently-eviction
policy. `lru` is not thread-safe, so `lru-threadsafe` adds a mutex
Expand Down
Loading