Skip to content

Commit

Permalink
adding a quickfix for saving expeectation suites that have use dateti…
Browse files Browse the repository at this point in the history
…me objects in the evaluation parameters (#1957)

Co-authored-by: Eugene Mandel <[email protected]>
  • Loading branch information
mbakunze and eugmandel authored Oct 6, 2020
1 parent 7f95536 commit cc3b432
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 0 deletions.
6 changes: 6 additions & 0 deletions great_expectations/marshmallow__shade/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,12 @@ def dumps(self, obj: typing.Any, *args, many: bool = None, **kwargs):
if ``obj`` is invalid.
"""
serialized = self.dump(obj, many=many)

def datetime_serializer(o):
if isinstance(o, dt.datetime):
return o.__str__()
if "default" not in kwargs:
kwargs.update({"default": datetime_serializer})
return self.opts.render_module.dumps(serialized, *args, **kwargs)

def _deserialize(
Expand Down
25 changes: 25 additions & 0 deletions tests/data_asset/test_datetime_evaluation_parameter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
Test the ability to use datetime objects as run time evaluation parameters.
"""

import pandas as pd

from great_expectations.dataset import PandasDataset


def test_pandas_datetime_evaluation_parameter():
evaluation_params = {"now": pd.Timestamp.now(), "now_minus_48h": pd.Timestamp.now() - pd.to_timedelta(2, unit="d")}

test_data = {
"data_refresh": [pd.Timestamp.now(), (pd.Timestamp.now() - pd.to_timedelta(1, unit="d"))]
}
_df = pd.DataFrame(test_data)
df = PandasDataset(_df)

for param in evaluation_params:
df.set_evaluation_parameter(param, evaluation_params[param])
df.expect_column_max_to_be_between(column="data_refresh", min_value={"$PARAMETER": "now_minus_48h"})

result = df.validate()

assert result.success
69 changes: 69 additions & 0 deletions tests/data_context/test_pandas_datetime_suites.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import pandas as pd
from tempfile import TemporaryDirectory
import great_expectations as ge
from great_expectations.dataset import PandasDataset
from great_expectations.data_context.data_context import ExpectationSuite
import os
import datetime


def test_save_expectation_suite_with_datetime_objects(data_context_parameterized_expectation_suite):
# create datetime evaluation parameters
evaluation_params = {"now": datetime.datetime.now(), "now_minus_48h": datetime.datetime.now() - datetime.timedelta(days=2)}
test_data = {"data_refresh": [datetime.datetime.now(), datetime.datetime.now() - datetime.timedelta(days=1)]}
test_df = pd.DataFrame(test_data)
dataset_name = "test_pandas_source"

with TemporaryDirectory() as tempdir:
ge_path = os.path.join(tempdir, "great_expectations")
ge.DataContext.create(tempdir, usage_statistics_enabled=False)
context = ge.DataContext(ge_path)

context.add_datasource(dataset_name, class_name="PandasDatasource")

batch_kwargs = {"dataset": test_df, "datasource": dataset_name, "PandasInMemoryDF": True, "ge_batch_id": "test_id",}

empty_suite = context.create_expectation_suite("test_suite")

batch = context.get_batch(batch_kwargs=batch_kwargs, expectation_suite_name=empty_suite)
for param in evaluation_params:
batch.set_evaluation_parameter(param, evaluation_params[param])

# Add expectation that will succeed using the datetime in a $PARAMETER
batch.expect_column_max_to_be_between(column="data_refresh", min_value={"$PARAMETER": "now_minus_48h"})
result = batch.validate()
assert result.success
batch.save_expectation_suite()
assert isinstance(batch, PandasDataset)

# Check that we can load the saved expectation suite
reloaded_expectation_suite = context.get_expectation_suite("test_suite")
assert isinstance(reloaded_expectation_suite, ExpectationSuite)

# Run validation via the action_list_operator
run_id = {
"run_name": f"{dataset_name}_{datetime.datetime.now()}",
"run_time": datetime.datetime.now(),
}
results = context.run_validation_operator(
"action_list_operator", assets_to_validate=[batch], run_id=run_id,
evaluation_parameters=evaluation_params
)
assert results.success

# Check that we can build Data Docs
index_page_locator_infos = context.build_data_docs()
assert index_page_locator_infos["local_site"] == f"file://{ge_path}/uncommitted/data_docs/local_site/index.html"

# Check that we can reload the expectation suite and validate
reloaded_batch = context.get_batch(batch_kwargs=batch_kwargs, expectation_suite_name=reloaded_expectation_suite)

run_id = {
"run_name": f"reloaded_{dataset_name}_{datetime.datetime.now()}",
"run_time": datetime.datetime.now(),
}
reloaded_results = context.run_validation_operator(
"action_list_operator", assets_to_validate=[reloaded_batch], run_id=run_id,
)

assert reloaded_results.success

0 comments on commit cc3b432

Please sign in to comment.