From 54c928571cca2f855e24e52bf112826f9034b72e Mon Sep 17 00:00:00 2001 From: Scott K Logan Date: Fri, 6 Sep 2024 14:07:39 -0500 Subject: [PATCH 1/2] Re-work EmPy token caching The previous approach was to re-implement Interpreter.parse() to iterate over cached tokens where possible. This proved to be a problem when the implementation changed in EmPy 4.x. The approach implemented here is to create a shim between the Interpreter and Scanner API and record/inject the tokens there, which improves the compatibility by working only at the API boundary and not duplicating chunks of the upstream implementation. --- colcon_core/shell/template/__init__.py | 51 +++++++++++++++----------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/colcon_core/shell/template/__init__.py b/colcon_core/shell/template/__init__.py index 842220c89..12bacb2c1 100644 --- a/colcon_core/shell/template/__init__.py +++ b/colcon_core/shell/template/__init__.py @@ -4,6 +4,7 @@ from io import StringIO import os +from colcon_core.generic_decorator import GenericDecorator from colcon_core.logging import colcon_logger try: from em import Interpreter @@ -69,25 +70,31 @@ def installProxy(self): # noqa: D102 N802 class CachingInterpreter(BypassStdoutInterpreter): - """Interpreter for EmPy which which caches parsed tokens.""" - - def parse(self, scanner, locals=None): # noqa: A002 D102 - global cached_tokens - data = scanner.buffer - # try to use cached tokens - tokens = cached_tokens.get(data) - if tokens is None: - # collect tokens and cache them - tokens = [] - while True: - token = scanner.one() - if token is None: - break - tokens.append(token) - cached_tokens[data] = tokens - - # reimplement the parse method using the (cached) tokens - self.invoke('atParse', scanner=scanner, locals=locals) - for token in tokens: - self.invoke('atToken', token=token) - token.run(self, locals) + """Interpreter for EmPy which caches parsed tokens.""" + + class _CachingScannerDecorator(GenericDecorator): + + def __init__(self, decoree, cache): + super().__init__(decoree, _cache=cache, _idx=0) + + def one(self, *args, **kwargs): + try: + token, count = self._cache[self._idx] + except IndexError: + count = len(self._decoree) + token = self._decoree.one(*args, **kwargs) + count -= len(self._decoree) + self._cache.append((token, count)) + else: + self.advance(count) + self.sync() + + self._idx += 1 + return token + + def parse(self, scanner, *args, **kwargs): # noqa: A002 D102 + cache = cached_tokens.setdefault(scanner.buffer, []) + return super().parse( + CachingInterpreter._CachingScannerDecorator(scanner, cache), + *args, + **kwargs) From 1e6c9b9471c2d6796dcde6ef9ecc3de39c2118f5 Mon Sep 17 00:00:00 2001 From: Scott K Logan Date: Fri, 6 Sep 2024 15:55:08 -0500 Subject: [PATCH 2/2] Update colcon_core/shell/template/__init__.py Co-authored-by: Chris Lalancette --- colcon_core/shell/template/__init__.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/colcon_core/shell/template/__init__.py b/colcon_core/shell/template/__init__.py index 12bacb2c1..57087da32 100644 --- a/colcon_core/shell/template/__init__.py +++ b/colcon_core/shell/template/__init__.py @@ -78,16 +78,15 @@ def __init__(self, decoree, cache): super().__init__(decoree, _cache=cache, _idx=0) def one(self, *args, **kwargs): - try: + if self._idx < len(self._cache): token, count = self._cache[self._idx] - except IndexError: + self.advance(count) + self.sync() + else: count = len(self._decoree) token = self._decoree.one(*args, **kwargs) count -= len(self._decoree) self._cache.append((token, count)) - else: - self.advance(count) - self.sync() self._idx += 1 return token