Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change json override strategy #4396

Merged
merged 6 commits into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/dbt/adapters/base/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def _schema_is_cached(self, database: Optional[str], schema: str) -> bool:
if (database, schema) not in self.cache:
fire_event(
CacheMiss(
conn_name=self.nice_connection_name,
conn_name=self.nice_connection_name(),
database=database,
schema=schema
)
Expand Down
36 changes: 15 additions & 21 deletions core/dbt/events/base_types.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from abc import ABCMeta, abstractmethod, abstractproperty
from dataclasses import dataclass
from datetime import datetime
import json
import os
import threading
from typing import Any, Optional
Expand Down Expand Up @@ -97,26 +96,6 @@ def level_tag(self) -> str:
def message(self) -> str:
raise Exception("msg not implemented for Event")

# override this method to convert non-json serializable fields to json.
# for override examples, see existing concrete types.
#
# there is no type-level mechanism to have mypy enforce json serializability, so we just try
# to serialize and raise an exception at runtime when that fails. This safety mechanism
# only works if we have attempted to serialize every concrete event type in our tests.
def fields_to_json(self, field_value: Any) -> Any:
try:
json.dumps(field_value, sort_keys=True)
return field_value
except TypeError:
val_type = type(field_value).__name__
event_type = type(self).__name__
return Exception(
f"type {val_type} is not serializable to json."
f" First make sure that the call sites for {event_type} match the type hints"
f" and if they do, you can override Event::fields_to_json in {event_type} in"
" types.py to define your own serialization function to any valid json type"
)

# exactly one time stamp per concrete event
def get_ts(self) -> datetime:
if not self.ts:
Expand Down Expand Up @@ -146,6 +125,21 @@ def get_invocation_id(cls) -> str:
from dbt.events.functions import get_invocation_id
return get_invocation_id()

# default dict factory for all events. can override on concrete classes.
@classmethod
def asdict(cls, data: list) -> dict:
d = dict()
for k, v in data:
# stringify all exceptions
if isinstance(v, Exception) or isinstance(v, BaseException):
d[k] = str(v)
# skip all binary data
elif isinstance(v, bytes):
continue
else:
d[k] = v
return d


@dataclass # type: ignore
class NodeInfo(Event, metaclass=ABCMeta):
Expand Down
31 changes: 19 additions & 12 deletions core/dbt/events/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,18 +132,25 @@ def event_to_serializable_dict(
) -> Dict[str, Any]:
data = dict()
node_info = dict()
if hasattr(e, '__dataclass_fields__'):
if isinstance(e, NodeInfo):
node_info = dataclasses.asdict(e.get_node_info())

for field, value in dataclasses.asdict(e).items(): # type: ignore[attr-defined]
if field not in ["code", "report_node_data"]:
_json_value = e.fields_to_json(value)

if not isinstance(_json_value, Exception):
data[field] = _json_value
else:
data[field] = f"JSON_SERIALIZE_FAILED: {type(value).__name__, 'NA'}"
log_line = dict()
try:
log_line = dataclasses.asdict(e, dict_factory=type(e).asdict)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the heart of the change.

except AttributeError:
event_type = type(e).__name__
raise Exception( # TODO this may hang async threads
f"type {event_type} is not serializable to json."
f" First make sure that the call sites for {event_type} match the type hints"
f" and if they do, you can override the dataclass method `asdict` in {event_type} in"
" types.py to define your own serialization function to a dictionary of valid json"
" types"
)

if isinstance(e, NodeInfo):
node_info = dataclasses.asdict(e.get_node_info())

for field, value in log_line.items(): # type: ignore[attr-defined]
if field not in ["code", "report_node_data"]:
data[field] = value

event_dict = {
'type': 'log_line',
Expand Down
Loading