Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into feature/split-requ…
Browse files Browse the repository at this point in the history
…irements-dev-files
  • Loading branch information
kenwade4 committed Mar 15, 2022
2 parents a83f1e3 + 70ef378 commit 3b141eb
Show file tree
Hide file tree
Showing 19 changed files with 115 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class TestBackend:
backend: str
dialects: Optional[List[str]]

__test__ = False # Tell pytest not to try to collect this class as a test

def __post_init__(self):
allowed_backend_names = ("pandas", "spark", "sqlalchemy")
allowed_sql_dialects = ("sqlite", "postgresql", "mysql", "mssql", "bigquery")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def __init__(
batch_list: Optional[List[Batch]] = None,
batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = None,
data_context: Optional["DataContext"] = None, # noqa: F821
column_names: Optional[Union[str, Optional[List[str]]]] = None,
exclude_columns: Optional[Union[str, Optional[List[str]]]] = None,
include_column_names: Optional[Union[str, Optional[List[str]]]] = None,
exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None,
limit_mode: Optional[Union[CardinalityLimitMode, str]] = None,
max_unique_values: Optional[Union[str, int]] = None,
max_proportion_unique: Optional[Union[str, float]] = None,
Expand All @@ -58,8 +58,8 @@ def __init__(
batch_list: explicitly specified Batch objects for use in DomainBuilder
batch_request: BatchRequest to be optionally used to define batches to consider for this domain builder.
data_context: DataContext associated with this profiler.
column_names: Explicitly specified column_names list desired (if None, it is computed based on active Batch)
exclude_columns: If provided, these columns are pre-filtered and excluded from consideration, cardinality is not computed.
include_column_names: Explicitly specified desired columns (if None, it is computed based on active Batch).
exclude_column_names: If provided, these columns are pre-filtered and excluded from consideration.
limit_mode: CardinalityLimitMode or string name of the mode
defining the maximum allowable cardinality to use when
filtering columns.
Expand All @@ -72,11 +72,10 @@ def __init__(
batch_list=batch_list,
batch_request=batch_request,
data_context=data_context,
column_names=column_names,
include_column_names=include_column_names,
exclude_column_names=exclude_column_names,
)

self._exclude_columns = exclude_columns

self._limit_mode = limit_mode
self._max_unique_values = max_unique_values
self._max_proportion_unique = max_proportion_unique
Expand All @@ -99,10 +98,6 @@ def max_unique_values(self) -> Optional[Union[str, int]]:
def max_proportion_unique(self) -> Optional[Union[str, float]]:
return self._max_proportion_unique

@property
def exclude_columns(self) -> Optional[Union[str, Optional[List[str]]]]:
return self._exclude_columns

@property
def cardinality_checker(self) -> Optional[CardinalityChecker]:
return self._cardinality_checker
Expand All @@ -120,8 +115,6 @@ def _get_domains(
List of domains that match the desired cardinality.
"""
table_column_names: List[str] = self.get_effective_column_names(
include_columns=self.column_names,
exclude_columns=self.exclude_columns,
variables=variables,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,59 +18,70 @@ def __init__(
batch_list: Optional[List[Batch]] = None,
batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = None,
data_context: Optional["DataContext"] = None, # noqa: F821
column_names: Optional[Union[str, Optional[List[str]]]] = None,
include_column_names: Optional[Union[str, Optional[List[str]]]] = None,
exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None,
):
"""
Args:
batch_list: explicitly specified Batch objects for use in DomainBuilder
batch_request: specified in DomainBuilder configuration to get Batch objects for domain computation.
data_context: DataContext
column_names: Explicitly specified column_names list desired (if None, it is computed based on active Batch)
include_column_names: Explicitly specified desired columns (if None, it is computed based on active Batch).
exclude_column_names: If provided, these columns are pre-filtered and excluded from consideration.
"""
super().__init__(
batch_list=batch_list,
batch_request=batch_request,
data_context=data_context,
)

self._column_names = column_names
self._include_column_names = include_column_names
self._exclude_column_names = exclude_column_names

@property
def domain_type(self) -> Union[str, MetricDomainTypes]:
return MetricDomainTypes.COLUMN

"""
All DomainBuilder classes, whose "domain_type" property equals "MetricDomainTypes.COLUMN", must extend present class
(ColumnDomainBuilder) in order to provide full getter/setter accessor for "column_names" property (as override).
(ColumnDomainBuilder) in order to provide full getter/setter accessor for "include_column_names" property (as override).
"""

@property
def column_names(self) -> Optional[Union[str, Optional[List[str]]]]:
return self._column_names
def include_column_names(self) -> Optional[Union[str, Optional[List[str]]]]:
return self._include_column_names

@column_names.setter
def column_names(self, value: Optional[Union[str, Optional[List[str]]]]) -> None:
self._column_names = value
@property
def exclude_column_names(self) -> Optional[Union[str, Optional[List[str]]]]:
return self._exclude_column_names

@include_column_names.setter
def include_column_names(
self, value: Optional[Union[str, Optional[List[str]]]]
) -> None:
self._include_column_names = value

def get_effective_column_names(
self,
include_columns: Optional[Union[str, Optional[List[str]]]] = None,
exclude_columns: Optional[Union[str, Optional[List[str]]]] = None,
variables: Optional[ParameterContainer] = None,
) -> List[str]:
# Obtain include_columns from "rule state" (i.e., variables and parameters); from instance variable otherwise.
include_columns = get_parameter_value_and_validate_return_type(
# Obtain include_column_names from "rule state" (i.e., variables and parameters); from instance variable otherwise.
include_column_names: Optional[
List[str]
] = get_parameter_value_and_validate_return_type(
domain=None,
parameter_reference=include_columns,
parameter_reference=self.include_column_names,
expected_return_type=None,
variables=variables,
parameters=None,
)

# Obtain exclude_columns from "rule state" (i.e., variables and parameters); from instance variable otherwise.
exclude_columns = get_parameter_value_and_validate_return_type(
# Obtain exclude_column_names from "rule state" (i.e., variables and parameters); from instance variable otherwise.
exclude_column_names: Optional[
List[str]
] = get_parameter_value_and_validate_return_type(
domain=None,
parameter_reference=exclude_columns,
parameter_reference=self.exclude_column_names,
expected_return_type=None,
variables=variables,
parameters=None,
Expand All @@ -89,16 +100,16 @@ def get_effective_column_names(
)
)

effective_column_names: List[str] = include_columns or table_columns
effective_column_names: List[str] = include_column_names or table_columns

if exclude_columns is None:
exclude_columns = []
if exclude_column_names is None:
exclude_column_names = []

column_name: str
effective_column_names = [
column_name
for column_name in effective_column_names
if column_name not in exclude_columns
if column_name not in exclude_column_names
]

if set(effective_column_names) == set(table_columns):
Expand All @@ -122,8 +133,6 @@ def _get_domains(
"""
return build_simple_domains_from_column_names(
column_names=self.get_effective_column_names(
include_columns=self.column_names,
exclude_columns=None,
variables=variables,
),
domain_type=self.domain_type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def __init__(
batch_list: Optional[List[Batch]] = None,
batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = None,
data_context: Optional["DataContext"] = None, # noqa: F821
column_names: Optional[Union[str, Optional[List[str]]]] = None,
include_column_names: Optional[Union[str, Optional[List[str]]]] = None,
exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None,
max_unexpected_values: Union[str, int] = 0,
max_unexpected_ratio: Optional[Union[str, float]] = None,
min_max_unexpected_values_proportion: Union[str, float] = 9.75e-1,
Expand All @@ -37,7 +38,8 @@ def __init__(
batch_list: explicitly specified Batch objects for use in DomainBuilder
batch_request: BatchRequest to be optionally used to define batches to consider for this domain builder.
data_context: DataContext associated with this profiler.
column_names: Explicitly specified column_names list desired (if None, it is computed based on active Batch)
include_column_names: Explicitly specified desired columns (if None, it is computed based on active Batch).
exclude_column_names: If provided, these columns are pre-filtered and excluded from consideration.
max_unexpected_values: maximum "unexpected_count" value of "map_metric_name" (intra-Batch)
max_unexpected_ratio: maximum "unexpected_count" value of "map_metric_name" divided by number of records
(intra-Batch); if both "max_unexpected_values" and "max_unexpected_ratio" are specified, then
Expand Down Expand Up @@ -72,7 +74,8 @@ def __init__(
batch_list=batch_list,
batch_request=batch_request,
data_context=data_context,
column_names=column_names,
include_column_names=include_column_names,
exclude_column_names=exclude_column_names,
)

self._map_metric_name = map_metric_name
Expand Down Expand Up @@ -151,8 +154,6 @@ def _get_domains(
)

table_column_names: List[str] = self.get_effective_column_names(
include_columns=self.column_names,
exclude_columns=None,
variables=variables,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,24 @@ def __init__(
batch_list: Optional[List[Batch]] = None,
batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = None,
data_context: Optional["DataContext"] = None, # noqa: F821
column_names: Optional[Union[str, Optional[List[str]]]] = None,
include_column_names: Optional[Union[str, Optional[List[str]]]] = None,
exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None,
column_name_suffixes: Optional[Union[str, Iterable, List[str]]] = None,
):
"""
Args:
batch_list: explicitly specified Batch objects for use in DomainBuilder
batch_request: specified in DomainBuilder configuration to get Batch objects for domain computation.
data_context: DataContext
column_names: Explicitly specified column_names list desired (if None, it is computed based on active Batch)
include_column_names: Explicitly specified desired columns (if None, it is computed based on active Batch).
exclude_column_names: If provided, these columns are pre-filtered and excluded from consideration.
"""
super().__init__(
batch_list=batch_list,
batch_request=batch_request,
data_context=data_context,
column_names=column_names,
include_column_names=include_column_names,
exclude_column_names=exclude_column_names,
)

if column_name_suffixes is None:
Expand All @@ -60,8 +63,6 @@ def _get_domains(
Find the column suffix for each column and return all domains matching the specified suffix.
"""
table_column_names: List[str] = self.get_effective_column_names(
include_columns=self.column_names,
exclude_columns=None,
variables=variables,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def __init__(
batch_list: Optional[List[Batch]] = None,
batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = None,
data_context: Optional["DataContext"] = None, # noqa: F821
column_names: Optional[Union[str, Optional[List[str]]]] = None,
include_column_names: Optional[Union[str, Optional[List[str]]]] = None,
exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None,
semantic_types: Optional[
Union[str, SemanticDomainTypes, List[Union[str, SemanticDomainTypes]]]
] = None,
Expand All @@ -37,14 +38,16 @@ def __init__(
batch_list: explicitly specified Batch objects for use in DomainBuilder
batch_request: specified in DomainBuilder configuration to get Batch objects for domain computation.
data_context: DataContext
column_names: Explicitly specified column_names list desired (if None, it is computed based on active Batch)
include_column_names: Explicitly specified desired columns (if None, it is computed based on active Batch).
exclude_column_names: If provided, these columns are pre-filtered and excluded from consideration.
semantic_types: single or multiple type specifications using SemanticDomainTypes (or string equivalents)
"""
super().__init__(
batch_list=batch_list,
batch_request=batch_request,
data_context=data_context,
column_names=column_names,
include_column_names=include_column_names,
exclude_column_names=exclude_column_names,
)

if semantic_types is None:
Expand Down Expand Up @@ -72,8 +75,6 @@ def _get_domains(
Find the semantic column type for each column and return all domains matching the specified type or types.
"""
table_column_names: List[str] = self.get_effective_column_names(
include_columns=self.column_names,
exclude_columns=None,
variables=variables,
)

Expand Down
18 changes: 17 additions & 1 deletion great_expectations/rule_based_profiler/helpers/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
Domain,
ParameterContainer,
get_parameter_value_by_fully_qualified_parameter_name,
is_fully_qualified_parameter_name_literal_string_format,
)
from great_expectations.validator.metric_configuration import MetricConfiguration

Expand Down Expand Up @@ -224,7 +225,11 @@ def get_parameter_value(
variables=variables,
parameters=parameters,
)
elif isinstance(parameter_reference, str) and parameter_reference.startswith("$"):
elif isinstance(
parameter_reference, str
) and is_fully_qualified_parameter_name_literal_string_format(
fully_qualified_parameter_name=parameter_reference
):
parameter_reference = get_parameter_value_by_fully_qualified_parameter_name(
fully_qualified_parameter_name=parameter_reference,
domain=domain,
Expand All @@ -239,6 +244,17 @@ def get_parameter_value(
variables=variables,
parameters=parameters,
)
elif isinstance(
parameter_reference, str
) and is_fully_qualified_parameter_name_literal_string_format(
fully_qualified_parameter_name=parameter_reference
):
parameter_reference = get_parameter_value_by_fully_qualified_parameter_name(
fully_qualified_parameter_name=parameter_reference,
domain=domain,
variables=variables,
parameters=parameters,
)

return parameter_reference

Expand Down
1 change: 1 addition & 0 deletions great_expectations/rule_based_profiler/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
ParameterContainer,
build_parameter_container,
build_parameter_container_for_variables,
is_fully_qualified_parameter_name_literal_string_format,
get_parameter_value_by_fully_qualified_parameter_name,
DOMAIN_KWARGS_PARAMETER_NAME,
)
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,16 @@ def _parse_attribute_name(name: str) -> ParseResults:
)


def is_fully_qualified_parameter_name_literal_string_format(
fully_qualified_parameter_name: str,
) -> bool:
return fully_qualified_parameter_name.startswith("$")


def validate_fully_qualified_parameter_name(fully_qualified_parameter_name: str):
if not fully_qualified_parameter_name.startswith("$"):
if not is_fully_qualified_parameter_name_literal_string_format(
fully_qualified_parameter_name=fully_qualified_parameter_name
):
raise ge_exceptions.ProfilerExecutionError(
message=f"""Unable to get value for parameter name "{fully_qualified_parameter_name}" -- parameter \
names must start with $ (e.g., "${fully_qualified_parameter_name}").
Expand Down
2 changes: 1 addition & 1 deletion great_expectations/validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ def _validate_profiler_and_update_rules_properties(
# TODO: <Alex>Handle future domain_type cases as they are defined.</Alex>
if domain_type == MetricDomainTypes.COLUMN:
column_name = expectation_kwargs["column"]
rule.domain_builder.column_names = [column_name]
rule.domain_builder.include_column_names = [column_name]

for parameter_builder in rule.parameter_builders:
if hasattr(parameter_builder, "metric_name") and hasattr(
Expand Down
8 changes: 4 additions & 4 deletions tests/data_asset/test_data_asset_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,9 +440,9 @@ def test_format_map_output():

success = True
element_count = 20
nonnull_values = pd.Series([])
nonnull_values = pd.Series([], dtype=np.float64)
nonnull_count = 0
boolean_mapped_success_values = pd.Series([])
boolean_mapped_success_values = pd.Series([], dtype=np.float64)
success_count = 0
unexpected_list = []
unexpected_index_list = []
Expand Down Expand Up @@ -533,9 +533,9 @@ def test_format_map_output():

success = False
element_count = 0
nonnull_values = pd.Series([])
nonnull_values = pd.Series([], dtype=np.float64)
nonnull_count = 0
boolean_mapped_success_values = pd.Series([])
boolean_mapped_success_values = pd.Series([], dtype=np.float64)
success_count = 0
unexpected_list = []
unexpected_index_list = []
Expand Down
Loading

0 comments on commit 3b141eb

Please sign in to comment.