Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Guard against unnecessarily calling dump_graph in logging #4619

Merged
merged 39 commits into from
Jan 31, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
bf81ace
add lazy type and apply to cache events
Jan 24, 2022
1f0b63c
Counter returns parametric dummy_val
Jan 24, 2022
66bb04b
add second test
Jan 24, 2022
893b4f5
lint fixes
Jan 24, 2022
b8c949a
debug line
Jan 24, 2022
ddd2425
try catch
Jan 24, 2022
655d700
boop
Jan 24, 2022
13ce6ca
exclude test event from schema checks
Jan 24, 2022
270c985
fix syntax
Jan 24, 2022
aa09715
remove import
Jan 24, 2022
5a4b674
update dummy values
Jan 24, 2022
005bb04
override serialization of lazy value
Jan 24, 2022
7d2086a
lint fixes
Jan 24, 2022
551ae7b
fix broken tests
Jan 24, 2022
40fab11
fix counter usage
Jan 24, 2022
d982baf
fix lazy to expliciltly check for none
Jan 24, 2022
37a8fe9
fix typo
Jan 24, 2022
8eaebb6
fix lazy type
Jan 25, 2022
7cdc870
comment update
Jan 25, 2022
7e5ebeb
whitespace
Jan 25, 2022
0aeabf4
comment wording
Jan 25, 2022
99b55e0
lint fixes
Jan 25, 2022
940be3a
maybe this fixes adapter integration?
Jan 25, 2022
1700937
forgot import
Jan 25, 2022
258417d
add unit tests for both json and text
Jan 25, 2022
02b1b50
this looks like the bug
Jan 26, 2022
ba5b1e0
update test to assert that the test wasn't bypassed
Jan 26, 2022
4d5a5a8
fixed usage docstring
Jan 26, 2022
8984bf5
simplify force definition
Jan 26, 2022
dbad987
remove old comment
Jan 27, 2022
4fd67bb
update readme with logging section that explains usage of laziness
Jan 27, 2022
0f38888
tighten test constraint
Jan 27, 2022
00d8453
resolve merge conflicts
Jan 27, 2022
65086de
bad first pass at using mashumaro
Jan 27, 2022
5cf4488
update
Jan 27, 2022
c6bac98
uses mashumaro
Jan 28, 2022
307af32
minor cleanup
Jan 28, 2022
1156247
move Lazy type to helper_types
Jan 31, 2022
71cd49b
update module readme
Jan 31, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions core/dbt/adapters/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
UpdateReference
)
from dbt.utils import lowercase
from dbt.lazy import Lazy


def dot_separated(key: _ReferenceKey) -> str:
Expand Down Expand Up @@ -323,11 +324,11 @@ def add(self, relation):
"""
cached = _CachedRelation(relation)
fire_event(AddRelation(relation=_make_key(cached)))
fire_event(DumpBeforeAddGraph(dump=self.dump_graph()))
fire_event(DumpBeforeAddGraph(dump=Lazy.defer(lambda: self.dump_graph)))

with self.lock:
self._setdefault(cached)
fire_event(DumpAfterAddGraph(dump=self.dump_graph()))
fire_event(DumpAfterAddGraph(dump=Lazy.defer(lambda: self.dump_graph)))

def _remove_refs(self, keys):
"""Removes all references to all entries in keys. This does not
Expand Down Expand Up @@ -441,15 +442,15 @@ def rename(self, old, new):
new_key = _make_key(new)
fire_event(RenameSchema(old_key=old_key, new_key=new_key))

fire_event(DumpBeforeRenameSchema(dump=self.dump_graph()))
fire_event(DumpBeforeRenameSchema(dump=Lazy.defer(lambda: self.dump_graph)))

with self.lock:
if self._check_rename_constraints(old_key, new_key):
self._rename_relation(old_key, _CachedRelation(new))
else:
self._setdefault(_CachedRelation(new))

fire_event(DumpAfterRenameSchema(dump=self.dump_graph()))
fire_event(DumpAfterRenameSchema(dump=Lazy.defer(lambda: self.dump_graph)))

def get_relations(
self, database: Optional[str], schema: Optional[str]
Expand Down
28 changes: 13 additions & 15 deletions core/dbt/events/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
RunResult
)
from dbt import ui
from dbt.lazy import Lazy
from dbt.events.base_types import (
Event, NoFile, DebugLevel, InfoLevel, WarnLevel, ErrorLevel, ShowException,
NodeInfo, Cache
Expand Down Expand Up @@ -700,41 +701,38 @@ def message(self) -> str:
@dataclass
class DumpBeforeAddGraph(DebugLevel, Cache):
# large value. delay not necessary since every debug level message is logged anyway.
dump: Dict[str, List[str]]
dump: Lazy[Dict[str, List[str]]]
code: str = "E031"

def message(self) -> str:
return f"before adding : {self.dump}"
return f"before adding : {self.dump.value()}"


@dataclass
class DumpAfterAddGraph(DebugLevel, Cache):
# large value. delay not necessary since every debug level message is logged anyway.
dump: Dict[str, List[str]]
dump: Lazy[Dict[str, List[str]]]
code: str = "E032"

def message(self) -> str:
return f"after adding: {self.dump}"
return f"after adding: {self.dump.value()}"


@dataclass
class DumpBeforeRenameSchema(DebugLevel, Cache):
# large value. delay not necessary since every debug level message is logged anyway.
dump: Dict[str, List[str]]
dump: Lazy[Dict[str, List[str]]]
code: str = "E033"

def message(self) -> str:
return f"before rename: {self.dump}"
return f"before rename: {self.dump.value()}"


@dataclass
class DumpAfterRenameSchema(DebugLevel, Cache):
# large value. delay not necessary since every debug level message is logged anyway.
dump: Dict[str, List[str]]
dump: Lazy[Dict[str, List[str]]]
code: str = "E034"

def message(self) -> str:
return f"after rename: {self.dump}"
return f"after rename: {self.dump.value()}"


@dataclass
Expand Down Expand Up @@ -2517,10 +2515,10 @@ def message(self) -> str:
old_key=_ReferenceKey(database="", schema="", identifier=""),
new_key=_ReferenceKey(database="", schema="", identifier="")
)
DumpBeforeAddGraph(dict())
DumpAfterAddGraph(dict())
DumpBeforeRenameSchema(dict())
DumpAfterRenameSchema(dict())
DumpBeforeAddGraph(Lazy.defer(lambda: dict()))
DumpAfterAddGraph(Lazy.defer(lambda: dict()))
DumpBeforeRenameSchema(Lazy.defer(lambda: dict()))
DumpAfterRenameSchema(Lazy.defer(lambda: dict()))
AdapterImportError(ModuleNotFoundError())
PluginLoadError()
SystemReportReturnCode(returncode=0)
Expand Down
45 changes: 45 additions & 0 deletions core/dbt/lazy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# necessary for annotating constructors
from __future__ import annotations

from dataclasses import dataclass
from typing import Callable, cast, Generic, Optional, TypeVar


T = TypeVar('T')


# A data type for representing lazily evaluated values. Evaluation is explicilty
# called with either `value` for access to memoization, or `force` to skip
# memoization.
#
# inspired by the purescript data type with
# additional considerations for impurity
# https://pursuit.purescript.org/packages/purescript-lazy/5.0.0/docs/Data.Lazy
@dataclass
class Lazy(Generic[T]):
_f: Callable[[], T]
memo: Optional[T] = None

# constructor for lazy values
@classmethod
def defer(cls, f: Callable[[], T]) -> Lazy[T]:
return Lazy(f)

# workaround for open mypy issue:
# https://github.com/python/mypy/issues/6910
def _typed_eval_f(self) -> T:
return cast(Callable[[], T], getattr(self, "_f"))()

# gets the value from memoization or by evaluating the function.
# good when the deferred function is pure.
def value(self) -> T:
if self.memo:
return self.memo
else:
self.memo = self._typed_eval_f()
return self.memo

# forces evaluation skipping the memoization.
# necessary for when the deferred funciton is stateful or impure.
def force(self) -> T:
return self._typed_eval_f()
87 changes: 85 additions & 2 deletions test/unit/test_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,20 @@
from dbt.events.functions import event_to_serializable_dict
from dbt.events.types import *
from dbt.events.test_types import *
from dbt.events.base_types import Event
from dbt.events.base_types import Event, InfoLevel
from dbt.events.stubs import _CachedRelation, BaseRelation, _ReferenceKey, ParsedModelNode
from importlib import reload
import dbt.events.functions as event_funcs
import dbt.flags as flags
from dbt.lazy import Lazy
nathaniel-may marked this conversation as resolved.
Show resolved Hide resolved
import inspect
import json
from unittest import TestCase
from dbt.contracts.graph.parsed import (
ParsedModelNode, NodeConfig, DependsOn
)
from dbt.contracts.files import FileHash
from typing import Generic, TypeVar

# takes in a class and finds any subclasses for it
def get_all_subclasses(cls):
Expand Down Expand Up @@ -429,4 +431,85 @@ def test_all_serializable(self):
json.dumps(d)
except TypeError as e:
raise Exception(f"{event} is not serializable to json. Originating exception: {e}")



T = TypeVar('T')

@dataclass
class Counter(Generic[T]):
dummy_val: T
count: int = 0

def next() -> T:
self.count = self.count + 1
return dummy_val


@dataclass
class DummyCacheEvent(InfoLevel, Cache):
code = 'X999'
counter: Counter

def message() -> str:
return f"state: {self.counter.next()}"


class SkipsRenderingCacheEvents(TestCase):

def setUp(self):
pass

# tests that if a cache event uses lazy evaluation for its message
# creation, the evaluation will not be forced for cache events when
# running without `--log-cache-events`.
def test_skip_cache_event_message_rendering(self):
# a dummy event that extends `Cache`
e = DummyCacheEvent(Counter("some_state"))

# counter of zero means this potentially expensive function
# (emulating dump_graph) has never been called
self.assertTrue(e.counter.count == 0)

# call fire_event
event_funcs.fire_event(e)

# assert that the expensive function has STILL not been called
self.assertTrue(e.counter.count == 0)

# this test checks that every subclass of `Cache` uses the same lazy evaluation
# strategy. This ensures that potentially expensive cache event values are not
# built unless they are needed for logging purposes. It also checks that these
# potentially expensive values are cached, and not evaluated more than once.
def test_all_cache_events_are_lazy(self):
cache_events = get_all_subclasses(Cache)
for clazz in cache_events:
# this body assumes every subclass of `Cache` takes exactly one dictionary value
# if you just added a cache event that is different, just branch
# inside this for loop for your even vs the others.

# initialize the counter to return a dictionary (emulating dump_graph)
counter = Counter(dict())

# assert that the counter starts at 0
self.assertTrue(counter.count == 0)

# create the cache event to use this counter type
e = clazz.__init__(Lazy.defer(lambda: counter))

# assert that initializing the event with the counter
# did not evaluate the lazy value
self.assertTrue(counter.count == 0)

# log an event which should trigger evaluation and up
# the counter
event_funcs.fire_event(e)

# assert that the counter did, in fact, increase
self.assertTrue(counter.count == 1)

# fire another event which should reuse the previous value
# not evaluate the function again
event_funcs.fire_event(e)

# assert that the counter did not, in fact, increase
self.assertTrue(counter.count == 1)