Skip to content

Commit

Permalink
change json override strategy (#4396)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nathaniel May authored and leahwicz committed Dec 2, 2021
1 parent e56256d commit af1a6d4
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 166 deletions.
2 changes: 1 addition & 1 deletion core/dbt/adapters/base/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def _schema_is_cached(self, database: Optional[str], schema: str) -> bool:
if (database, schema) not in self.cache:
fire_event(
CacheMiss(
conn_name=self.nice_connection_name,
conn_name=self.nice_connection_name(),
database=database,
schema=schema
)
Expand Down
36 changes: 15 additions & 21 deletions core/dbt/events/base_types.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from abc import ABCMeta, abstractmethod, abstractproperty
from dataclasses import dataclass
from datetime import datetime
import json
import os
import threading
from typing import Any, Optional
Expand Down Expand Up @@ -97,26 +96,6 @@ def level_tag(self) -> str:
def message(self) -> str:
raise Exception("msg not implemented for Event")

# override this method to convert non-json serializable fields to json.
# for override examples, see existing concrete types.
#
# there is no type-level mechanism to have mypy enforce json serializability, so we just try
# to serialize and raise an exception at runtime when that fails. This safety mechanism
# only works if we have attempted to serialize every concrete event type in our tests.
def fields_to_json(self, field_value: Any) -> Any:
try:
json.dumps(field_value, sort_keys=True)
return field_value
except TypeError:
val_type = type(field_value).__name__
event_type = type(self).__name__
return Exception(
f"type {val_type} is not serializable to json."
f" First make sure that the call sites for {event_type} match the type hints"
f" and if they do, you can override Event::fields_to_json in {event_type} in"
" types.py to define your own serialization function to any valid json type"
)

# exactly one time stamp per concrete event
def get_ts(self) -> datetime:
if not self.ts:
Expand Down Expand Up @@ -146,6 +125,21 @@ def get_invocation_id(cls) -> str:
from dbt.events.functions import get_invocation_id
return get_invocation_id()

# default dict factory for all events. can override on concrete classes.
@classmethod
def asdict(cls, data: list) -> dict:
d = dict()
for k, v in data:
# stringify all exceptions
if isinstance(v, Exception) or isinstance(v, BaseException):
d[k] = str(v)
# skip all binary data
elif isinstance(v, bytes):
continue
else:
d[k] = v
return d


@dataclass # type: ignore
class NodeInfo(Event, metaclass=ABCMeta):
Expand Down
31 changes: 19 additions & 12 deletions core/dbt/events/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,18 +132,25 @@ def event_to_serializable_dict(
) -> Dict[str, Any]:
data = dict()
node_info = dict()
if hasattr(e, '__dataclass_fields__'):
if isinstance(e, NodeInfo):
node_info = dataclasses.asdict(e.get_node_info())

for field, value in dataclasses.asdict(e).items(): # type: ignore[attr-defined]
if field not in ["code", "report_node_data"]:
_json_value = e.fields_to_json(value)

if not isinstance(_json_value, Exception):
data[field] = _json_value
else:
data[field] = f"JSON_SERIALIZE_FAILED: {type(value).__name__, 'NA'}"
log_line = dict()
try:
log_line = dataclasses.asdict(e, dict_factory=type(e).asdict)
except AttributeError:
event_type = type(e).__name__
raise Exception( # TODO this may hang async threads
f"type {event_type} is not serializable to json."
f" First make sure that the call sites for {event_type} match the type hints"
f" and if they do, you can override the dataclass method `asdict` in {event_type} in"
" types.py to define your own serialization function to a dictionary of valid json"
" types"
)

if isinstance(e, NodeInfo):
node_info = dataclasses.asdict(e.get_node_info())

for field, value in log_line.items(): # type: ignore[attr-defined]
if field not in ["code", "report_node_data"]:
data[field] = value

event_dict = {
'type': 'log_line',
Expand Down
Loading

0 comments on commit af1a6d4

Please sign in to comment.