diff --git a/README.rst b/README.rst index 1425b64..f294910 100644 --- a/README.rst +++ b/README.rst @@ -79,13 +79,14 @@ Extract browser data from user-agent string .. - ⚠️The convenience parsers (``ParseUserAgent``, ``ParseOs``, and - ``ParseDevice``) currently have no caching, which can result in - degraded performances when parsing large amounts of identical - user-agents (which might occur for real-world datasets). - - In that case, prefer using ``Parse`` and extracting the - sub-component you need from the resulting dictionary. + ⚠️Before 0.15, the convenience parsers (``ParseUserAgent``, + ``ParseOs``, and ``ParseDevice``) were not cached, which could + result in degraded performances when parsing large amounts of + identical user-agents (which might occur for real-world datasets). + + For these versions (up to 0.10 included), prefer using ``Parse`` + and extracting the sub-component you need from the resulting + dictionary. Extract OS information from user-agent string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/ua_parser/user_agent_parser.py b/ua_parser/user_agent_parser.py index cc1cfc1..8f55b82 100644 --- a/ua_parser/user_agent_parser.py +++ b/ua_parser/user_agent_parser.py @@ -215,8 +215,29 @@ def Parse(self, user_agent_string): return device, brand, model -MAX_CACHE_SIZE = 20 -_parse_cache = {} +MAX_CACHE_SIZE = 200 +_PARSE_CACHE = {} + + +def _lookup(ua, args): + key = (ua, tuple(sorted(args.items()))) + entry = _PARSE_CACHE.get(key) + if entry is not None: + return entry + + if len(_PARSE_CACHE) >= MAX_CACHE_SIZE: + _PARSE_CACHE.pop(next(iter(_PARSE_CACHE))) + + v = _PARSE_CACHE[key] = {"string": ua} + return v + + +def _cached(ua, args, key, fn): + entry = _lookup(ua, args) + r = entry.get(key) + if not r: + r = entry[key] = fn(ua, args) + return r def Parse(user_agent_string, **jsParseBits): @@ -227,21 +248,20 @@ def Parse(user_agent_string, **jsParseBits): Returns: A dictionary containing all parsed bits """ - jsParseBits = jsParseBits or {} - key = (user_agent_string, repr(jsParseBits)) - cached = _parse_cache.get(key) - if cached is not None: - return cached - if len(_parse_cache) > MAX_CACHE_SIZE: - _parse_cache.clear() - v = { - "user_agent": ParseUserAgent(user_agent_string, **jsParseBits), - "os": ParseOS(user_agent_string, **jsParseBits), - "device": ParseDevice(user_agent_string, **jsParseBits), - "string": user_agent_string, - } - _parse_cache[key] = v - return v + entry = _lookup(user_agent_string, jsParseBits) + # entry is complete, return directly + if len(entry) == 4: + return entry + + # entry is partially or entirely empty + if "user_agent" not in entry: + entry["user_agent"] = _ParseUserAgent(user_agent_string, jsParseBits) + if "os" not in entry: + entry["os"] = _ParseOS(user_agent_string, jsParseBits) + if "device" not in entry: + entry["device"] = _ParseDevice(user_agent_string, jsParseBits) + + return entry def ParseUserAgent(user_agent_string, **jsParseBits): @@ -252,6 +272,10 @@ def ParseUserAgent(user_agent_string, **jsParseBits): Returns: A dictionary containing parsed bits. """ + return _cached(user_agent_string, jsParseBits, "user_agent", _ParseUserAgent) + + +def _ParseUserAgent(user_agent_string, jsParseBits): if ( "js_user_agent_family" in jsParseBits and jsParseBits["js_user_agent_family"] != "" @@ -298,6 +322,10 @@ def ParseOS(user_agent_string, **jsParseBits): Returns: A dictionary containing parsed bits. """ + return _cached(user_agent_string, jsParseBits, "os", _ParseOS) + + +def _ParseOS(user_agent_string, jsParseBits): for osParser in OS_PARSERS: os, os_v1, os_v2, os_v3, os_v4 = osParser.Parse(user_agent_string) if os: @@ -312,7 +340,7 @@ def ParseOS(user_agent_string, **jsParseBits): } -def ParseDevice(user_agent_string): +def ParseDevice(user_agent_string, **jsParseBits): """Parses the user-agent string for device info. Args: user_agent_string: The full user-agent string. @@ -320,6 +348,10 @@ def ParseDevice(user_agent_string): Returns: A dictionary containing parsed bits. """ + return _cached(user_agent_string, jsParseBits, "device", _ParseDevice) + + +def _ParseDevice(user_agent_string, jsParseBits): for deviceParser in DEVICE_PARSERS: device, brand, model = deviceParser.Parse(user_agent_string) if device: diff --git a/ua_parser/user_agent_parser_test.py b/ua_parser/user_agent_parser_test.py index b1d2c81..f8b7b6a 100644 --- a/ua_parser/user_agent_parser_test.py +++ b/ua_parser/user_agent_parser_test.py @@ -182,6 +182,11 @@ def runUserAgentTestsFromYAML(self, file_name): result["patch"], ), ) + self.assertLessEqual( + len(user_agent_parser._PARSE_CACHE), + user_agent_parser.MAX_CACHE_SIZE, + "verify that the cache size never exceeds the configured setting", + ) def runOSTestsFromYAML(self, file_name): yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name))