Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Apply cache to load proto registry for performance #3702

Merged
merged 3 commits into from
Sep 12, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion sdk/python/feast/infra/registry/proto_registry_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import uuid
from functools import wraps
from typing import List, Optional

from feast import usage
Expand All @@ -23,6 +24,26 @@
from feast.stream_feature_view import StreamFeatureView


def registry_proto_cache(func):
cache_key = None
cache_value = None
Comment on lines +27 to +29
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I understand this.. If you use the same annotation for multiple methods then the cache value needs have different shapes, such as List[FeatureService] or List[FeatureView] right? Is that going to work as expected here? Can we add tests?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The context of the wrapper decorator is created whenever different argument functions (e.g list_feature_views, list_feature_services..) are called.
Therefore, when calling list_feature_views and when calling list_feature_services, different results are returned.
The operation of this cache is similar to Python's functools.lru_cache, but since the registryProto object is not hashable, it is implemented to hash using the object's id.


@wraps(func)
def wrapper(registry_proto: RegistryProto, project: str):
nonlocal cache_key, cache_value

key = tuple([id(registry_proto), registry_proto.version_id, project])

if key == cache_key:
return cache_value
else:
cache_value = func(registry_proto, project)
Comment on lines +39 to +40
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you need to update cache_key here as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@achals I'm sorry, you're right. I added commit b8a373f

cache_key = key
return cache_value

return wrapper


def init_project_metadata(cached_registry_proto: RegistryProto, project: str):
new_project_uuid = f"{uuid.uuid4()}"
usage.set_current_project_uuid(new_project_uuid)
Expand Down Expand Up @@ -137,8 +158,9 @@ def get_validation_reference(
raise ValidationReferenceNotFound(name, project=project)


@registry_proto_cache
def list_feature_services(
registry_proto: RegistryProto, project: str, allow_cache: bool = False
registry_proto: RegistryProto, project: str
) -> List[FeatureService]:
feature_services = []
for feature_service_proto in registry_proto.feature_services:
Expand All @@ -147,6 +169,7 @@ def list_feature_services(
return feature_services


@registry_proto_cache
def list_feature_views(
registry_proto: RegistryProto, project: str
) -> List[FeatureView]:
Expand All @@ -157,6 +180,7 @@ def list_feature_views(
return feature_views


@registry_proto_cache
def list_request_feature_views(
registry_proto: RegistryProto, project: str
) -> List[RequestFeatureView]:
Expand All @@ -169,6 +193,7 @@ def list_request_feature_views(
return feature_views


@registry_proto_cache
def list_stream_feature_views(
registry_proto: RegistryProto, project: str
) -> List[StreamFeatureView]:
Expand All @@ -181,6 +206,7 @@ def list_stream_feature_views(
return stream_feature_views


@registry_proto_cache
def list_on_demand_feature_views(
registry_proto: RegistryProto, project: str
) -> List[OnDemandFeatureView]:
Expand All @@ -193,6 +219,7 @@ def list_on_demand_feature_views(
return on_demand_feature_views


@registry_proto_cache
def list_entities(registry_proto: RegistryProto, project: str) -> List[Entity]:
entities = []
for entity_proto in registry_proto.entities:
Expand All @@ -201,6 +228,7 @@ def list_entities(registry_proto: RegistryProto, project: str) -> List[Entity]:
return entities


@registry_proto_cache
def list_data_sources(registry_proto: RegistryProto, project: str) -> List[DataSource]:
data_sources = []
for data_source_proto in registry_proto.data_sources:
Expand All @@ -209,6 +237,7 @@ def list_data_sources(registry_proto: RegistryProto, project: str) -> List[DataS
return data_sources


@registry_proto_cache
def list_saved_datasets(
registry_proto: RegistryProto, project: str
) -> List[SavedDataset]:
Expand All @@ -219,6 +248,7 @@ def list_saved_datasets(
return saved_datasets


@registry_proto_cache
def list_validation_references(
registry_proto: RegistryProto, project: str
) -> List[ValidationReference]:
Expand All @@ -231,6 +261,7 @@ def list_validation_references(
return validation_references


@registry_proto_cache
def list_project_metadata(
registry_proto: RegistryProto, project: str
) -> List[ProjectMetadata]:
Expand Down