dbt-labs · nathaniel-may · Jan 31, 2022 · Jan 24, 2022 · Jan 24, 2022 · Jan 24, 2022
@@ -21,6 +21,7 @@
     UpdateReference
 )
 from dbt.utils import lowercase
+from dbt.lazy import Lazy
 
 
 def dot_separated(key: _ReferenceKey) -> str:
@@ -323,11 +324,11 @@ def add(self, relation):
         """
         cached = _CachedRelation(relation)
         fire_event(AddRelation(relation=_make_key(cached)))
-        fire_event(DumpBeforeAddGraph(dump=self.dump_graph()))
+        fire_event(DumpBeforeAddGraph(dump=Lazy.defer(lambda: self.dump_graph)))
 
         with self.lock:
             self._setdefault(cached)
-        fire_event(DumpAfterAddGraph(dump=self.dump_graph()))
+        fire_event(DumpAfterAddGraph(dump=Lazy.defer(lambda: self.dump_graph)))
 
     def _remove_refs(self, keys):
         """Removes all references to all entries in keys. This does not
@@ -441,15 +442,15 @@ def rename(self, old, new):
         new_key = _make_key(new)
         fire_event(RenameSchema(old_key=old_key, new_key=new_key))
 
-        fire_event(DumpBeforeRenameSchema(dump=self.dump_graph()))
+        fire_event(DumpBeforeRenameSchema(dump=Lazy.defer(lambda: self.dump_graph)))
 
         with self.lock:
             if self._check_rename_constraints(old_key, new_key):
                 self._rename_relation(old_key, _CachedRelation(new))
             else:
                 self._setdefault(_CachedRelation(new))
 
-        fire_event(DumpAfterRenameSchema(dump=self.dump_graph()))
+        fire_event(DumpAfterRenameSchema(dump=Lazy.defer(lambda: self.dump_graph)))
 
     def get_relations(
         self, database: Optional[str], schema: Optional[str]

@@ -9,6 +9,7 @@
     RunResult
 )
 from dbt import ui
+from dbt.lazy import Lazy
 from dbt.events.base_types import (
     Event, NoFile, DebugLevel, InfoLevel, WarnLevel, ErrorLevel, ShowException,
     NodeInfo, Cache
@@ -700,41 +701,38 @@ def message(self) -> str:
 @dataclass
 class DumpBeforeAddGraph(DebugLevel, Cache):
     # large value. delay not necessary since every debug level message is logged anyway.
-    dump: Dict[str, List[str]]
+    dump: Lazy[Dict[str, List[str]]]
     code: str = "E031"
 
     def message(self) -> str:
-        return f"before adding : {self.dump}"
+        return f"before adding : {self.dump.value()}"
 
 
 @dataclass
 class DumpAfterAddGraph(DebugLevel, Cache):
-    # large value. delay not necessary since every debug level message is logged anyway.
-    dump: Dict[str, List[str]]
+    dump: Lazy[Dict[str, List[str]]]
     code: str = "E032"
 
     def message(self) -> str:
-        return f"after adding: {self.dump}"
+        return f"after adding: {self.dump.value()}"
 
 
 @dataclass
 class DumpBeforeRenameSchema(DebugLevel, Cache):
-    # large value. delay not necessary since every debug level message is logged anyway.
-    dump: Dict[str, List[str]]
+    dump: Lazy[Dict[str, List[str]]]
     code: str = "E033"
 
     def message(self) -> str:
-        return f"before rename: {self.dump}"
+        return f"before rename: {self.dump.value()}"
 
 
 @dataclass
 class DumpAfterRenameSchema(DebugLevel, Cache):
-    # large value. delay not necessary since every debug level message is logged anyway.
-    dump: Dict[str, List[str]]
+    dump: Lazy[Dict[str, List[str]]]
     code: str = "E034"
 
     def message(self) -> str:
-        return f"after rename: {self.dump}"
+        return f"after rename: {self.dump.value()}"
 
 
 @dataclass
@@ -2517,10 +2515,10 @@ def message(self) -> str:
         old_key=_ReferenceKey(database="", schema="", identifier=""),
         new_key=_ReferenceKey(database="", schema="", identifier="")
     )
-    DumpBeforeAddGraph(dict())
-    DumpAfterAddGraph(dict())
-    DumpBeforeRenameSchema(dict())
-    DumpAfterRenameSchema(dict())
+    DumpBeforeAddGraph(Lazy.defer(lambda: dict()))
+    DumpAfterAddGraph(Lazy.defer(lambda: dict()))
+    DumpBeforeRenameSchema(Lazy.defer(lambda: dict()))
+    DumpAfterRenameSchema(Lazy.defer(lambda: dict()))
     AdapterImportError(ModuleNotFoundError())
     PluginLoadError()
     SystemReportReturnCode(returncode=0)

diff --git a/core/dbt/lazy.py b/core/dbt/lazy.py
@@ -0,0 +1,45 @@
+# necessary for annotating constructors
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Callable, cast, Generic, Optional, TypeVar
+
+
+T = TypeVar('T')
+
+
+# A data type for representing lazily evaluated values. Evaluation is explicilty
+# called with either `value` for access to memoization, or `force` to skip
+# memoization.
+#
+# inspired by the purescript data type with
+# additional considerations for impurity
+# https://pursuit.purescript.org/packages/purescript-lazy/5.0.0/docs/Data.Lazy
+@dataclass
+class Lazy(Generic[T]):
+    _f: Callable[[], T]
+    memo: Optional[T] = None
+
+    # constructor for lazy values
+    @classmethod
+    def defer(cls, f: Callable[[], T]) -> Lazy[T]:
+        return Lazy(f)
+
+    # workaround for open mypy issue:
+    # https://github.com/python/mypy/issues/6910
+    def _typed_eval_f(self) -> T:
+        return cast(Callable[[], T], getattr(self, "_f"))()
+
+    # gets the value from memoization or by evaluating the function.
+    # good when the deferred function is pure.
+    def value(self) -> T:
+        if self.memo:
+            return self.memo
+        else:
+            self.memo = self._typed_eval_f()
+            return self.memo
+
+    # forces evaluation skipping the memoization.
+    # necessary for when the deferred funciton is stateful or impure.
+    def force(self) -> T:
+        return self._typed_eval_f()
@@ -5,18 +5,20 @@
 from dbt.events.functions import event_to_serializable_dict
 from dbt.events.types import *
 from dbt.events.test_types import *
-from dbt.events.base_types import Event
+from dbt.events.base_types import Event, InfoLevel
 from dbt.events.stubs import _CachedRelation, BaseRelation, _ReferenceKey, ParsedModelNode
 from importlib import reload
 import dbt.events.functions as event_funcs
 import dbt.flags as flags
+from dbt.lazy import Lazy
 import inspect
 import json
 from unittest import TestCase
 from dbt.contracts.graph.parsed import (
     ParsedModelNode, NodeConfig, DependsOn
 )
 from dbt.contracts.files import FileHash
+from typing import Generic, TypeVar
 
 # takes in a class and finds any subclasses for it
 def get_all_subclasses(cls):
@@ -429,4 +431,85 @@ def test_all_serializable(self):
                 json.dumps(d)
             except TypeError as e:
                 raise Exception(f"{event} is not serializable to json. Originating exception: {e}")
-
+
+
+T = TypeVar('T')
+
+@dataclass
+class Counter(Generic[T]):
+    dummy_val: T
+    count: int = 0
+
+    def next() -> T:
+        self.count = self.count + 1
+        return dummy_val
+
+
+@dataclass
+class DummyCacheEvent(InfoLevel, Cache):
+    code = 'X999'
+    counter: Counter
+
+    def message() -> str:
+        return f"state: {self.counter.next()}"
+
+
+class SkipsRenderingCacheEvents(TestCase):
+
+    def setUp(self):
+        pass
+
+    # tests that if a cache event uses lazy evaluation for its message
+    # creation, the evaluation will not be forced for cache events when
+    # running without `--log-cache-events`.
+    def test_skip_cache_event_message_rendering(self):
+        # a dummy event that extends `Cache`
+        e = DummyCacheEvent(Counter("some_state"))
+
+        # counter of zero means this potentially expensive function
+        # (emulating dump_graph) has never been called
+        self.assertTrue(e.counter.count == 0)
+
+        # call fire_event
+        event_funcs.fire_event(e)
+
+        # assert that the expensive function has STILL not been called
+        self.assertTrue(e.counter.count == 0)
+
+    # this test checks that every subclass of `Cache` uses the same lazy evaluation 
+    # strategy. This ensures that potentially expensive cache event values are not
+    # built unless they are needed for logging purposes. It also checks that these
+    # potentially expensive values are cached, and not evaluated more than once.
+    def test_all_cache_events_are_lazy(self):
+        cache_events = get_all_subclasses(Cache)
+        for clazz in cache_events:
+            # this body assumes every subclass of `Cache` takes exactly one dictionary value
+            # if you just added a cache event that is different, just branch
+            # inside this for loop for your even vs the others.
+
+            # initialize the counter to return a dictionary (emulating dump_graph)
+            counter = Counter(dict())
+
+            # assert that the counter starts at 0
+            self.assertTrue(counter.count == 0)
+
+            # create the cache event to use this counter type
+            e = clazz.__init__(Lazy.defer(lambda: counter))
+
+            # assert that initializing the event with the counter
+            # did not evaluate the lazy value
+            self.assertTrue(counter.count == 0)
+
+            # log an event which should trigger evaluation and up
+            # the counter
+            event_funcs.fire_event(e)
+
+            # assert that the counter did, in fact, increase
+            self.assertTrue(counter.count == 1)
+
+            # fire another event which should reuse the previous value
+            # not evaluate the function again
+            event_funcs.fire_event(e)
+
+            # assert that the counter did not, in fact, increase
+            self.assertTrue(counter.count == 1)