From 1795427d58f2f5eb84d2cf69e513330920d91fcb Mon Sep 17 00:00:00 2001 From: brimoor Date: Tue, 12 Nov 2024 10:39:02 -0500 Subject: [PATCH 1/2] summary field/index improvements --- fiftyone/core/dataset.py | 44 ++++++++++++++++-- fiftyone/operators/builtin.py | 88 ++++++++++++++++++++++++++--------- 2 files changed, 108 insertions(+), 24 deletions(-) diff --git a/fiftyone/core/dataset.py b/fiftyone/core/dataset.py index e4911670b7..9db20ee4c4 100644 --- a/fiftyone/core/dataset.py +++ b/fiftyone/core/dataset.py @@ -1673,6 +1673,18 @@ def list_summary_fields(self): self.get_field_schema(flat=True, info_keys=_SUMMARY_FIELD_KEY) ) + def _get_summarized_fields_map(self): + schema = self.get_field_schema(flat=True, info_keys=_SUMMARY_FIELD_KEY) + + summarized_fields = {} + for path, field in schema.items(): + summary_info = field.info[_SUMMARY_FIELD_KEY] + source_path = summary_info.get("path", None) + if source_path is not None: + summarized_fields[source_path] = path + + return summarized_fields + def create_summary_field( self, path, @@ -1750,13 +1762,25 @@ def create_summary_field( """ _field = self.get_field(path) - if isinstance(_field, (fof.StringField, fof.BooleanField)): + is_list_field = isinstance(_field, fof.ListField) + if is_list_field: + _field = _field.field + + if isinstance( + _field, + (fof.StringField, fof.BooleanField, fof.ObjectIdField), + ): field_type = "categorical" elif isinstance( _field, (fof.FloatField, fof.IntField, fof.DateField, fof.DateTimeField), ): field_type = "numeric" + elif is_list_field: + raise ValueError( + f"Cannot generate a summary for list field '{path}' with " + f"element type {type(_field)}" + ) elif _field is not None: raise ValueError( f"Cannot generate a summary for field '{path}' of " @@ -1889,8 +1913,17 @@ def create_summary_field( return field_name def _get_default_summary_field_name(self, path): - _path, is_frame_field, list_fields, _, _ = self._parse_field_name(path) + ( + _path, + is_frame_field, + list_fields, + _, + id_to_str, + ) = self._parse_field_name(path) + _chunks = _path.split(".") + if id_to_str: + _chunks = [c[1:] if c.startswith("_") else c for c in _chunks] chunks = [] if is_frame_field: @@ -1907,7 +1940,12 @@ def _get_default_summary_field_name(self, path): if found_list: chunks.append(_chunks[-1]) - return "_".join(chunks) + field_name = "_".join(chunks) + + if field_name == path: + field_name += "_summary" + + return field_name def _populate_summary_field(self, field_name, summary_info): path = summary_info["path"] diff --git a/fiftyone/operators/builtin.py b/fiftyone/operators/builtin.py index 45783e94d8..8dca68b969 100644 --- a/fiftyone/operators/builtin.py +++ b/fiftyone/operators/builtin.py @@ -15,6 +15,8 @@ import fiftyone.operators as foo import fiftyone.operators.types as types from fiftyone.core.odm.workspace import default_workspace_factory + +# pylint: disable=no-name-in-module from fiftyone.operators.builtins.panels.model_evaluation import EvaluationPanel @@ -66,8 +68,9 @@ def _edit_field_info_inputs(ctx, inputs): } ) + path_keys = list(schema.keys()) path_selector = types.AutocompleteView() - for key in sorted(schema.keys()): + for key in path_keys: path_selector.add_choice(key, label=key) inputs.enum( @@ -239,7 +242,7 @@ def _clone_sample_field_inputs(ctx, inputs): schema = target_view.get_field_schema(flat=True) full_schema = ctx.dataset.get_field_schema(flat=True) - field_keys = sorted(schema.keys()) + field_keys = list(schema.keys()) field_selector = types.AutocompleteView() for key in field_keys: field_selector.add_choice(key, label=key) @@ -367,7 +370,7 @@ def _clone_frame_field_inputs(ctx, inputs): schema = target_view.get_frame_field_schema(flat=True) full_schema = ctx.dataset.get_frame_field_schema(flat=True) - field_keys = sorted(schema.keys()) + field_keys = list(schema.keys()) field_selector = types.AutocompleteView() for key in field_keys: field_selector.add_choice(key, label=key) @@ -454,8 +457,9 @@ def _rename_sample_field_inputs(ctx, inputs): prop.invalid = True return + field_keys = list(schema.keys()) field_selector = types.AutocompleteView() - for key in sorted(schema.keys()): + for key in field_keys: field_selector.add_choice(key, label=key) field_prop = inputs.enum( @@ -549,8 +553,9 @@ def _rename_frame_field_inputs(ctx, inputs): prop.invalid = True return + field_keys = list(schema.keys()) field_selector = types.AutocompleteView() - for key in sorted(schema.keys()): + for key in field_keys: field_selector.add_choice(key, label=key) field_prop = inputs.enum( @@ -664,7 +669,7 @@ def _clear_sample_field_inputs(ctx, inputs): schema.pop("id", None) schema.pop("filepath", None) - field_keys = sorted(schema.keys()) + field_keys = list(schema.keys()) field_selector = types.AutocompleteView() for key in field_keys: field_selector.add_choice(key, label=key) @@ -764,7 +769,7 @@ def _clear_frame_field_inputs(ctx, inputs): schema.pop("id", None) schema.pop("frame_number", None) - field_keys = sorted(schema.keys()) + field_keys = list(schema.keys()) field_selector = types.AutocompleteView() for key in field_keys: field_selector.add_choice(key, label=key) @@ -907,8 +912,9 @@ def _delete_sample_field_inputs(ctx, inputs): prop.invalid = True return + field_keys = list(schema.keys()) field_selector = types.AutocompleteView() - for key in sorted(schema.keys()): + for key in field_keys: field_selector.add_choice(key, label=key) field_prop = inputs.enum( @@ -976,8 +982,9 @@ def _delete_frame_field_inputs(ctx, inputs): prop.invalid = True return + field_keys = list(schema.keys()) field_selector = types.AutocompleteView() - for key in sorted(schema.keys()): + for key in field_keys: field_selector.add_choice(key, label=key) field_prop = inputs.enum( @@ -1021,9 +1028,34 @@ def resolve_input(self, ctx): } ) + categorical_field_types = ( + fo.StringField, + fo.BooleanField, + fo.ObjectIdField, + ) + numeric_field_types = ( + fo.FloatField, + fo.IntField, + fo.DateField, + fo.DateTimeField, + ) + valid_field_types = categorical_field_types + numeric_field_types + + path_keys = [ + p + for p, f in schema.items() + if ( + isinstance(f, valid_field_types) + or ( + isinstance(f, fo.ListField) + and isinstance(f.field, valid_field_types) + ) + ) + ] + indexes = set(ctx.dataset.list_indexes()) - field_keys = sorted(p for p in schema if p not in indexes) + field_keys = [p for p in path_keys if p not in indexes] field_selector = types.AutocompleteView() for key in field_keys: field_selector.add_choice(key, label=key) @@ -1051,7 +1083,7 @@ def execute(self, ctx): field_name = ctx.params["field_name"] unique = ctx.params.get("unique", False) - ctx.dataset.create_index(field_name, unique=unique) + ctx.dataset.create_index(field_name, unique=unique, wait=False) class DropIndex(foo.Operator): @@ -1071,7 +1103,8 @@ def resolve_input(self, ctx): default_indexes = set(ctx.dataset._get_default_indexes()) if ctx.dataset._has_frame_fields(): default_indexes.update( - ctx.dataset._get_default_indexes(frames=True) + ctx.dataset._FRAMES_PREFIX + path + for path in ctx.dataset._get_default_indexes(frames=True) ) indexes = [i for i in indexes if i not in default_indexes] @@ -1132,6 +1165,9 @@ def execute(self, ctx): read_only = ctx.params.get("read_only", True) create_index = ctx.params.get("create_index", True) + if not field_name: + field_name = None + if not sidebar_group: sidebar_group = False @@ -1159,24 +1195,34 @@ def _create_summary_field_inputs(ctx, inputs): } ) - categorical_field_types = (fo.StringField, fo.BooleanField) + categorical_field_types = ( + fo.StringField, + fo.BooleanField, + fo.ObjectIdField, + ) numeric_field_types = ( fo.FloatField, fo.IntField, fo.DateField, fo.DateTimeField, ) + valid_field_types = categorical_field_types + numeric_field_types - schema = { - p: f + field_keys = [ + p for p, f in schema.items() if ( - isinstance(f, categorical_field_types) - or isinstance(f, numeric_field_types) + isinstance(f, valid_field_types) + or ( + isinstance(f, fo.ListField) + and isinstance(f.field, valid_field_types) + ) ) - } + ] - path_keys = list(schema.keys()) + summarized_fields = set(ctx.dataset._get_summarized_fields_map()) + + path_keys = [p for p in field_keys if p not in summarized_fields] path_selector = types.AutocompleteView() for key in path_keys: path_selector.add_choice(key, label=key) @@ -1208,7 +1254,7 @@ def _create_summary_field_inputs(ctx, inputs): default=default_field_name, ) - if field_name and field_name in path_keys: + if field_name and field_name in schema: field_name_prop.invalid = True field_name_prop.error_message = f"Field '{field_name}' already exists" inputs.str( @@ -1254,7 +1300,7 @@ def _create_summary_field_inputs(ctx, inputs): ) elif isinstance(field, numeric_field_types): group_prefix = path.rsplit(".", 1)[0] + "." - group_by_keys = sorted(p for p in schema if p.startswith(group_prefix)) + group_by_keys = [p for p in field_keys if p.startswith(group_prefix)] group_by_selector = types.AutocompleteView() for group in group_by_keys: group_by_selector.add_choice(group, label=group) From 1e12882c90708a677932f30885331a822b85f3ff Mon Sep 17 00:00:00 2001 From: brimoor Date: Tue, 12 Nov 2024 13:40:09 -0500 Subject: [PATCH 2/2] dynamic update! --- fiftyone/operators/builtin.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fiftyone/operators/builtin.py b/fiftyone/operators/builtin.py index 8dca68b969..70effa2247 100644 --- a/fiftyone/operators/builtin.py +++ b/fiftyone/operators/builtin.py @@ -1158,7 +1158,7 @@ def resolve_input(self, ctx): def execute(self, ctx): path = ctx.params["path"] - field_name = ctx.params.get("field_name", None) + _, field_name = _get_dynamic(ctx.params, "field_name", path, None) sidebar_group = ctx.params.get("sidebar_group", None) include_counts = ctx.params.get("include_counts", False) group_by = ctx.params.get("group_by", None) @@ -1184,6 +1184,12 @@ def execute(self, ctx): ctx.trigger("reload_dataset") +def _get_dynamic(params, key, ref_path, default=None): + dynamic_key = key + "|" + ref_path.replace(".", "_") + value = params.get(dynamic_key, default) + return dynamic_key, value + + def _create_summary_field_inputs(ctx, inputs): schema = ctx.dataset.get_field_schema(flat=True) if ctx.dataset._has_frame_fields(): @@ -1240,14 +1246,14 @@ def _create_summary_field_inputs(ctx, inputs): if path is None or path not in path_keys: return - field_name = ctx.params.get("field_name", None) + prop_name, field_name = _get_dynamic(ctx.params, "field_name", path, None) if field_name is None: default_field_name = ctx.dataset._get_default_summary_field_name(path) else: default_field_name = field_name - field_name_prop = inputs.str( - "field_name", + prop = inputs.str( + prop_name, required=False, label="Summary field", description="The sample field in which to store the summary data", @@ -1255,8 +1261,8 @@ def _create_summary_field_inputs(ctx, inputs): ) if field_name and field_name in schema: - field_name_prop.invalid = True - field_name_prop.error_message = f"Field '{field_name}' already exists" + prop.invalid = True + prop.error_message = f"Field '{field_name}' already exists" inputs.str( "error", label="Error",