-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Adding support for native Python transformations on a single dictionary #4724
Changes from all commits
5617321
c514a4d
237e32e
71927f5
7bf426c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,6 +74,7 @@ class OnDemandFeatureView(BaseFeatureView): | |
tags: dict[str, str] | ||
owner: str | ||
write_to_online_store: bool | ||
singleton: bool | ||
|
||
def __init__( # noqa: C901 | ||
self, | ||
|
@@ -98,6 +99,7 @@ def __init__( # noqa: C901 | |
tags: Optional[dict[str, str]] = None, | ||
owner: str = "", | ||
write_to_online_store: bool = False, | ||
singleton: bool = False, | ||
): | ||
""" | ||
Creates an OnDemandFeatureView object. | ||
|
@@ -121,6 +123,8 @@ def __init__( # noqa: C901 | |
of the primary maintainer. | ||
write_to_online_store (optional): A boolean that indicates whether to write the on demand feature view to | ||
the online store for faster retrieval. | ||
singleton (optional): A boolean that indicates whether the transformation is executed on a singleton | ||
(only applicable when mode="python"). | ||
""" | ||
super().__init__( | ||
name=name, | ||
|
@@ -204,6 +208,9 @@ def __init__( # noqa: C901 | |
self.features = features | ||
self.feature_transformation = feature_transformation | ||
self.write_to_online_store = write_to_online_store | ||
self.singleton = singleton | ||
if self.singleton and self.mode != "python": | ||
raise ValueError("Singleton is only supported for Python mode.") | ||
|
||
@property | ||
def proto_class(self) -> type[OnDemandFeatureViewProto]: | ||
|
@@ -221,6 +228,7 @@ def __copy__(self): | |
tags=self.tags, | ||
owner=self.owner, | ||
write_to_online_store=self.write_to_online_store, | ||
singleton=self.singleton, | ||
) | ||
fv.entities = self.entities | ||
fv.features = self.features | ||
|
@@ -247,6 +255,7 @@ def __eq__(self, other): | |
or self.feature_transformation != other.feature_transformation | ||
or self.write_to_online_store != other.write_to_online_store | ||
or sorted(self.entity_columns) != sorted(other.entity_columns) | ||
or self.singleton != other.singleton | ||
): | ||
return False | ||
|
||
|
@@ -328,6 +337,7 @@ def to_proto(self) -> OnDemandFeatureViewProto: | |
tags=self.tags, | ||
owner=self.owner, | ||
write_to_online_store=self.write_to_online_store, | ||
singleton=self.singleton if self.singleton else False, | ||
) | ||
|
||
return OnDemandFeatureViewProto(spec=spec, meta=meta) | ||
|
@@ -434,6 +444,9 @@ def from_proto( | |
] | ||
else: | ||
entity_columns = [] | ||
singleton = False | ||
if hasattr(on_demand_feature_view_proto.spec, "singleton"): | ||
singleton = on_demand_feature_view_proto.spec.singleton | ||
|
||
on_demand_feature_view_obj = cls( | ||
name=on_demand_feature_view_proto.spec.name, | ||
|
@@ -451,6 +464,7 @@ def from_proto( | |
tags=dict(on_demand_feature_view_proto.spec.tags), | ||
owner=on_demand_feature_view_proto.spec.owner, | ||
write_to_online_store=write_to_online_store, | ||
singleton=singleton, | ||
) | ||
|
||
on_demand_feature_view_obj.entities = entities | ||
|
@@ -614,17 +628,19 @@ def transform_dict( | |
feature_dict[full_feature_ref] = feature_dict[feature.name] | ||
columns_to_cleanup.append(str(full_feature_ref)) | ||
|
||
output_dict: dict[str, Any] = self.feature_transformation.transform( | ||
feature_dict | ||
) | ||
if self.singleton and self.mode == "python": | ||
output_dict: dict[str, Any] = ( | ||
self.feature_transformation.transform_singleton(feature_dict) | ||
) | ||
else: | ||
output_dict = self.feature_transformation.transform(feature_dict) | ||
for feature_name in columns_to_cleanup: | ||
del output_dict[feature_name] | ||
return output_dict | ||
|
||
def infer_features(self) -> None: | ||
inferred_features = self.feature_transformation.infer_features( | ||
self._construct_random_input() | ||
) | ||
random_input = self._construct_random_input(singleton=self.singleton) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @robhowley and here |
||
inferred_features = self.feature_transformation.infer_features(random_input) | ||
|
||
if self.features: | ||
missing_features = [] | ||
|
@@ -644,8 +660,10 @@ def infer_features(self) -> None: | |
f"Could not infer Features for the feature view '{self.name}'.", | ||
) | ||
|
||
def _construct_random_input(self) -> dict[str, list[Any]]: | ||
rand_dict_value: dict[ValueType, list[Any]] = { | ||
def _construct_random_input( | ||
self, singleton: bool = False | ||
) -> dict[str, Union[list[Any], Any]]: | ||
rand_dict_value: dict[ValueType, Union[list[Any], Any]] = { | ||
ValueType.BYTES: [str.encode("hello world")], | ||
ValueType.STRING: ["hello world"], | ||
ValueType.INT32: [1], | ||
|
@@ -663,20 +681,25 @@ def _construct_random_input(self) -> dict[str, list[Any]]: | |
ValueType.BOOL_LIST: [[True]], | ||
ValueType.UNIX_TIMESTAMP_LIST: [[_utc_now()]], | ||
} | ||
if singleton: | ||
rand_dict_value = {k: rand_dict_value[k][0] for k in rand_dict_value} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @robhowley and here |
||
|
||
rand_missing_value = [None] if singleton else None | ||
feature_dict = {} | ||
for feature_view_projection in self.source_feature_view_projections.values(): | ||
for feature in feature_view_projection.features: | ||
feature_dict[f"{feature_view_projection.name}__{feature.name}"] = ( | ||
rand_dict_value.get(feature.dtype.to_value_type(), [None]) | ||
rand_dict_value.get( | ||
feature.dtype.to_value_type(), rand_missing_value | ||
) | ||
) | ||
feature_dict[f"{feature.name}"] = rand_dict_value.get( | ||
feature.dtype.to_value_type(), [None] | ||
feature.dtype.to_value_type(), rand_missing_value | ||
) | ||
for request_data in self.source_request_sources.values(): | ||
for field in request_data.schema: | ||
feature_dict[f"{field.name}"] = rand_dict_value.get( | ||
field.dtype.to_value_type(), [None] | ||
field.dtype.to_value_type(), rand_missing_value | ||
) | ||
|
||
return feature_dict | ||
|
@@ -713,6 +736,7 @@ def on_demand_feature_view( | |
tags: Optional[dict[str, str]] = None, | ||
owner: str = "", | ||
write_to_online_store: bool = False, | ||
singleton: bool = False, | ||
): | ||
""" | ||
Creates an OnDemandFeatureView object with the given user function as udf. | ||
|
@@ -731,6 +755,8 @@ def on_demand_feature_view( | |
of the primary maintainer. | ||
write_to_online_store (optional): A boolean that indicates whether to write the on demand feature view to | ||
the online store for faster retrieval. | ||
singleton (optional): A boolean that indicates whether the transformation is executed on a singleton | ||
(only applicable when mode="python"). | ||
""" | ||
|
||
def mainify(obj) -> None: | ||
|
@@ -775,6 +801,7 @@ def decorator(user_function): | |
owner=owner, | ||
write_to_online_store=write_to_online_store, | ||
entities=entities, | ||
singleton=singleton, | ||
) | ||
functools.update_wrapper( | ||
wrapper=on_demand_feature_view_obj, wrapped=user_function | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,24 +37,39 @@ def transform(self, input_dict: dict) -> dict: | |
output_dict = self.udf.__call__(input_dict) | ||
return {**input_dict, **output_dict} | ||
|
||
def infer_features(self, random_input: dict[str, list[Any]]) -> list[Field]: | ||
output_dict: dict[str, list[Any]] = self.transform(random_input) | ||
def transform_singleton(self, input_dict: dict) -> dict: | ||
# This flattens the list of elements to extract the first one | ||
# in the case of a singleton element, it takes the value directly | ||
# in the case of a list of lists, it takes the first list | ||
input_dict = {k: v[0] for k, v in input_dict.items()} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. v[0] might cause error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It shouldn't but I'll add a test for it |
||
output_dict = self.udf.__call__(input_dict) | ||
return {**input_dict, **output_dict} | ||
|
||
def infer_features(self, random_input: dict[str, Any]) -> list[Field]: | ||
output_dict: dict[str, Any] = self.transform(random_input) | ||
|
||
fields = [] | ||
for feature_name, feature_value in output_dict.items(): | ||
if len(feature_value) <= 0: | ||
raise TypeError( | ||
f"Failed to infer type for feature '{feature_name}' with value " | ||
+ f"'{feature_value}' since no items were returned by the UDF." | ||
) | ||
if isinstance(feature_value, list): | ||
if len(feature_value) <= 0: | ||
raise TypeError( | ||
f"Failed to infer type for feature '{feature_name}' with value " | ||
+ f"'{feature_value}' since no items were returned by the UDF." | ||
) | ||
inferred_type = type(feature_value[0]) | ||
inferred_value = feature_value[0] | ||
else: | ||
inferred_type = type(feature_value) | ||
inferred_value = feature_value | ||
|
||
fields.append( | ||
Field( | ||
name=feature_name, | ||
dtype=from_value_type( | ||
python_type_to_feast_value_type( | ||
feature_name, | ||
value=feature_value[0], | ||
type_name=type(feature_value[0]).__name__, | ||
value=inferred_value, | ||
type_name=inferred_type.__name__, | ||
) | ||
), | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@robhowley see here