Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Summary field and index improvements #5091

Merged
merged 2 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 41 additions & 3 deletions fiftyone/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1673,6 +1673,18 @@ def list_summary_fields(self):
self.get_field_schema(flat=True, info_keys=_SUMMARY_FIELD_KEY)
)

def _get_summarized_fields_map(self):
schema = self.get_field_schema(flat=True, info_keys=_SUMMARY_FIELD_KEY)

summarized_fields = {}
for path, field in schema.items():
summary_info = field.info[_SUMMARY_FIELD_KEY]
source_path = summary_info.get("path", None)
if source_path is not None:
summarized_fields[source_path] = path

return summarized_fields

def create_summary_field(
self,
path,
Expand Down Expand Up @@ -1750,13 +1762,25 @@ def create_summary_field(
"""
_field = self.get_field(path)

if isinstance(_field, (fof.StringField, fof.BooleanField)):
is_list_field = isinstance(_field, fof.ListField)
if is_list_field:
_field = _field.field

if isinstance(
_field,
(fof.StringField, fof.BooleanField, fof.ObjectIdField),
):
field_type = "categorical"
elif isinstance(
_field,
(fof.FloatField, fof.IntField, fof.DateField, fof.DateTimeField),
):
field_type = "numeric"
elif is_list_field:
raise ValueError(
f"Cannot generate a summary for list field '{path}' with "
f"element type {type(_field)}"
)
elif _field is not None:
raise ValueError(
f"Cannot generate a summary for field '{path}' of "
Expand Down Expand Up @@ -1889,8 +1913,17 @@ def create_summary_field(
return field_name

def _get_default_summary_field_name(self, path):
_path, is_frame_field, list_fields, _, _ = self._parse_field_name(path)
(
_path,
is_frame_field,
list_fields,
_,
id_to_str,
) = self._parse_field_name(path)

_chunks = _path.split(".")
if id_to_str:
_chunks = [c[1:] if c.startswith("_") else c for c in _chunks]

chunks = []
if is_frame_field:
Expand All @@ -1907,7 +1940,12 @@ def _get_default_summary_field_name(self, path):
if found_list:
chunks.append(_chunks[-1])

return "_".join(chunks)
field_name = "_".join(chunks)

if field_name == path:
field_name += "_summary"

return field_name

def _populate_summary_field(self, field_name, summary_info):
path = summary_info["path"]
Expand Down
88 changes: 67 additions & 21 deletions fiftyone/operators/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import fiftyone.operators as foo
import fiftyone.operators.types as types
from fiftyone.core.odm.workspace import default_workspace_factory

# pylint: disable=no-name-in-module
from fiftyone.operators.builtins.panels.model_evaluation import EvaluationPanel


Expand Down Expand Up @@ -66,8 +68,9 @@ def _edit_field_info_inputs(ctx, inputs):
}
)

path_keys = list(schema.keys())
path_selector = types.AutocompleteView()
for key in sorted(schema.keys()):
for key in path_keys:
Comment on lines +71 to +73
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider refactoring repeated field selection logic

The same pattern for field selection appears in multiple places:

  1. Converting keys to a list
  2. Creating an AutocompleteView
  3. Adding choices in a loop

Consider extracting this into a helper function to reduce code duplication.

Example implementation:

def create_field_selector(keys):
    """Create an AutocompleteView for the given field keys.
    
    Args:
        keys: Iterable of field keys
        
    Returns:
        types.AutocompleteView: The configured selector
    """
    field_selector = types.AutocompleteView()
    for key in sorted(keys):  # Sort for consistent ordering
        field_selector.add_choice(key, label=key)
    return field_selector

Then use it like:

-    field_keys = list(schema.keys())
-    field_selector = types.AutocompleteView()
-    for key in field_keys:
-        field_selector.add_choice(key, label=key)
+    field_selector = create_field_selector(schema.keys())

Also applies to: 245-246, 373-374, 460-462, 556-558, 672-674, 772-774, 915-917, 985-987

path_selector.add_choice(key, label=key)

inputs.enum(
Expand Down Expand Up @@ -239,7 +242,7 @@ def _clone_sample_field_inputs(ctx, inputs):
schema = target_view.get_field_schema(flat=True)
full_schema = ctx.dataset.get_field_schema(flat=True)

field_keys = sorted(schema.keys())
field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in field_keys:
field_selector.add_choice(key, label=key)
Expand Down Expand Up @@ -367,7 +370,7 @@ def _clone_frame_field_inputs(ctx, inputs):
schema = target_view.get_frame_field_schema(flat=True)
full_schema = ctx.dataset.get_frame_field_schema(flat=True)

field_keys = sorted(schema.keys())
field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in field_keys:
field_selector.add_choice(key, label=key)
Expand Down Expand Up @@ -454,8 +457,9 @@ def _rename_sample_field_inputs(ctx, inputs):
prop.invalid = True
return

field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in sorted(schema.keys()):
for key in field_keys:
field_selector.add_choice(key, label=key)

field_prop = inputs.enum(
Expand Down Expand Up @@ -549,8 +553,9 @@ def _rename_frame_field_inputs(ctx, inputs):
prop.invalid = True
return

field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in sorted(schema.keys()):
for key in field_keys:
field_selector.add_choice(key, label=key)

field_prop = inputs.enum(
Expand Down Expand Up @@ -664,7 +669,7 @@ def _clear_sample_field_inputs(ctx, inputs):
schema.pop("id", None)
schema.pop("filepath", None)

field_keys = sorted(schema.keys())
field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in field_keys:
field_selector.add_choice(key, label=key)
Expand Down Expand Up @@ -764,7 +769,7 @@ def _clear_frame_field_inputs(ctx, inputs):
schema.pop("id", None)
schema.pop("frame_number", None)

field_keys = sorted(schema.keys())
field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in field_keys:
field_selector.add_choice(key, label=key)
Expand Down Expand Up @@ -907,8 +912,9 @@ def _delete_sample_field_inputs(ctx, inputs):
prop.invalid = True
return

field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in sorted(schema.keys()):
for key in field_keys:
field_selector.add_choice(key, label=key)

field_prop = inputs.enum(
Expand Down Expand Up @@ -976,8 +982,9 @@ def _delete_frame_field_inputs(ctx, inputs):
prop.invalid = True
return

field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in sorted(schema.keys()):
for key in field_keys:
field_selector.add_choice(key, label=key)

field_prop = inputs.enum(
Expand Down Expand Up @@ -1021,9 +1028,34 @@ def resolve_input(self, ctx):
}
)

categorical_field_types = (
fo.StringField,
fo.BooleanField,
fo.ObjectIdField,
)
numeric_field_types = (
fo.FloatField,
fo.IntField,
fo.DateField,
fo.DateTimeField,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FrameNumberField could be apply here. I only note it because it came up in the lightning backend. Reported by @minhtuev

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the backend we have the luxury of using isinstance, which understands that FrameNumberField is a subclass of IntField. So we're all set 😎

)
valid_field_types = categorical_field_types + numeric_field_types

path_keys = [
p
for p, f in schema.items()
if (
isinstance(f, valid_field_types)
or (
isinstance(f, fo.ListField)
and isinstance(f.field, valid_field_types)
)
)
]

indexes = set(ctx.dataset.list_indexes())

field_keys = sorted(p for p in schema if p not in indexes)
field_keys = [p for p in path_keys if p not in indexes]
field_selector = types.AutocompleteView()
for key in field_keys:
field_selector.add_choice(key, label=key)
Expand Down Expand Up @@ -1051,7 +1083,7 @@ def execute(self, ctx):
field_name = ctx.params["field_name"]
unique = ctx.params.get("unique", False)

ctx.dataset.create_index(field_name, unique=unique)
ctx.dataset.create_index(field_name, unique=unique, wait=False)


class DropIndex(foo.Operator):
Expand All @@ -1071,7 +1103,8 @@ def resolve_input(self, ctx):
default_indexes = set(ctx.dataset._get_default_indexes())
if ctx.dataset._has_frame_fields():
default_indexes.update(
ctx.dataset._get_default_indexes(frames=True)
ctx.dataset._FRAMES_PREFIX + path
for path in ctx.dataset._get_default_indexes(frames=True)
)

indexes = [i for i in indexes if i not in default_indexes]
Expand Down Expand Up @@ -1132,6 +1165,9 @@ def execute(self, ctx):
read_only = ctx.params.get("read_only", True)
create_index = ctx.params.get("create_index", True)

if not field_name:
field_name = None

if not sidebar_group:
sidebar_group = False

Expand Down Expand Up @@ -1159,24 +1195,34 @@ def _create_summary_field_inputs(ctx, inputs):
}
)

categorical_field_types = (fo.StringField, fo.BooleanField)
categorical_field_types = (
fo.StringField,
fo.BooleanField,
fo.ObjectIdField,
)
numeric_field_types = (
fo.FloatField,
fo.IntField,
fo.DateField,
fo.DateTimeField,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above

)
valid_field_types = categorical_field_types + numeric_field_types

schema = {
p: f
field_keys = [
p
for p, f in schema.items()
if (
isinstance(f, categorical_field_types)
or isinstance(f, numeric_field_types)
isinstance(f, valid_field_types)
or (
isinstance(f, fo.ListField)
and isinstance(f.field, valid_field_types)
)
)
}
]

path_keys = list(schema.keys())
summarized_fields = set(ctx.dataset._get_summarized_fields_map())

path_keys = [p for p in field_keys if p not in summarized_fields]
path_selector = types.AutocompleteView()
for key in path_keys:
path_selector.add_choice(key, label=key)
Expand Down Expand Up @@ -1208,7 +1254,7 @@ def _create_summary_field_inputs(ctx, inputs):
default=default_field_name,
)

if field_name and field_name in path_keys:
if field_name and field_name in schema:
field_name_prop.invalid = True
field_name_prop.error_message = f"Field '{field_name}' already exists"
inputs.str(
Expand Down Expand Up @@ -1254,7 +1300,7 @@ def _create_summary_field_inputs(ctx, inputs):
)
elif isinstance(field, numeric_field_types):
group_prefix = path.rsplit(".", 1)[0] + "."
group_by_keys = sorted(p for p in schema if p.startswith(group_prefix))
group_by_keys = [p for p in field_keys if p.startswith(group_prefix)]
group_by_selector = types.AutocompleteView()
for group in group_by_keys:
group_by_selector.add_choice(group, label=group)
Expand Down
Loading