diff --git a/plugins/evaluation/__init__.py b/plugins/evaluation/__init__.py index dd17271e..845fabcd 100644 --- a/plugins/evaluation/__init__.py +++ b/plugins/evaluation/__init__.py @@ -58,7 +58,7 @@ def execute(self, ctx): # Parse custom metrics if metrics: custom_metrics = {} - for metric in _to_string_list(metrics): + for metric in metrics: operator = foo.get_operator(metric) kwargs.pop(f"header|{metric}", None) params = kwargs.pop(f"parameters|{metric}", None) @@ -231,29 +231,25 @@ def _get_evaluation_type(view, pred_field): def _add_custom_metrics(ctx, inputs, eval_type, method): supported_metrics = [] - for operator in foo.list_operators(type="operator"): - if "metric_tags" in operator.config.kwargs: - metric_tags = operator.config.kwargs["metric_tags"] - if not metric_tags or eval_type in metric_tags: - supported_metrics.append(operator) + for operator in foo.list_operators(type=foo.EvaluationMetric): + eval_types = getattr(operator.config, "eval_types", None) + if eval_types is None or eval_type in eval_types: + supported_metrics.append(operator) if not supported_metrics: return - metrics = _to_string_list(ctx.params.get("metrics", [])) - - metric_choices = types.AutocompleteView(multiple=True) + metric_choices = types.DropdownView(multiple=True) for operator in supported_metrics: - if operator.uri not in metrics: - metric_choices.add_choice( - operator.uri, - label=operator.config.label, - description=operator.config.description, - ) + metric_choices.add_choice( + operator.uri, + label=operator.config.label, + description=operator.config.description, + ) - prop = inputs.list( + inputs.list( "metrics", - types.OneOf([types.Object(), types.String()]), + types.String(), required=False, default=None, label="Custom metrics", @@ -261,14 +257,7 @@ def _add_custom_metrics(ctx, inputs, eval_type, method): view=metric_choices, ) - for metric in metrics: - if not any(metric == operator.uri for operator in supported_metrics): - prop.invalid = True - prop.error_message = f"Invalid metric '{metric}'" - return - - if not metrics: - return + metrics = ctx.params.get("metrics", None) or [] for metric in metrics: operator = foo.get_operator(metric) @@ -1279,13 +1268,6 @@ def get_new_eval_key( return eval_key -def _to_string_list(values): - if not values: - return [] - - return [d["value"] if isinstance(d, dict) else d for d in values] - - def register(p): p.register(EvaluateModel) p.register(GetEvaluationInfo) diff --git a/plugins/metric-examples/__init__.py b/plugins/metric-examples/__init__.py index a9c29ffd..9583fed0 100644 --- a/plugins/metric-examples/__init__.py +++ b/plugins/metric-examples/__init__.py @@ -1,167 +1,204 @@ """ Example metrics. -| Copyright 2017-2024, Voxel51, Inc. +| Copyright 2017-2025, Voxel51, Inc. | `voxel51.com `_ | """ - -import fiftyone as fo -import fiftyone.operators as foo -import fiftyone.operators.types as types -import numpy as np import itertools +import numpy as np -class EvaluationMetric(foo.Operator): - def get_parameters(self, ctx, inputs): - pass - - def parse_parameters(self, ctx, params): - pass - - def compute_by_sample(self, sample, eval_key, **kwargs): - pass - - def compute(self, samples, eval_key, results, **kwargs): - raise NotImplementedError("Subclass must implement compute()") - - def get_fields(self, samples, eval_key): - return [] - - def rename(self, samples, eval_key, new_eval_key): - dataset = samples._dataset - for metric_field in self.get_fields(samples, eval_key): - new_metric_field = metric_field.replace(eval_key, new_eval_key, 1) - dataset.rename_sample_field(metric_field, new_metric_field) - - def cleanup(self, samples, eval_key): - dataset = samples._dataset - for metric_field in self.get_fields(samples, eval_key): - dataset.delete_sample_field(metric_field, error_level=1) +import fiftyone as fo +import fiftyone.operators as foo +from fiftyone import ViewField as F -class ExampleMetric(EvaluationMetric): +class ExampleMetric(foo.EvaluationMetric): @property def config(self): - return foo.OperatorConfig( + return foo.EvaluationMetricConfig( name="example_metric", label="Example metric", - description="This is an example metric", - metric_tags=None, + description="An example evaluation metric", ) def get_parameters(self, ctx, inputs): inputs.str( "value", - label="Example parameter", - description="This is an example metric parameter", + label="Example value", + description="The example value to store/return", default="foo", required=True, ) - def compute(self, samples, eval_key, results, value="foo"): + def compute(self, samples, results, value="foo"): dataset = samples._dataset + eval_key = results.key metric_field = f"{eval_key}_{self.config.name}" dataset.add_sample_field(metric_field, fo.StringField) samples.set_field(metric_field, value).save() return value - def get_fields(self, samples, eval_key): - expected_fields = [f"{eval_key}_{self.config.name}"] - return list(filter(samples.has_field, expected_fields)) + def get_fields(self, samples, config, eval_key): + return [f"{eval_key}_{self.config.name}"] -def _safe_mean(values): - values = [v for v in values if v is not None] - return np.mean(values) if values else None - - -def _abs_error(ypred, ytrue): - return abs(ypred - ytrue) - - -class AbsoluteErrorMetric(EvaluationMetric): +class MeanAbsoluteErrorMetric(foo.EvaluationMetric): @property def config(self): - return foo.OperatorConfig( - name="absolute_error", - label="Absolute Error Metric", - description="A metric for absolute error.", - metric_tags=["regression"], + return foo.EvaluationMetricConfig( + name="mean_absolute_error", + label="Mean Absolute Error", + description="Computes the mean absolute error of the regression data", + eval_types=["regression"], lower_is_better=True, ) - def compute_by_sample(self, sample, eval_key, ytrue, ypred): - metric_field = f"{eval_key}_{self.config.name}" - if sample.media_type == "video": - frame_errors = list(map(_abs_error, ypred, ytrue)) - for idx, frame in enumerate(sample.frames.values()): - frame[metric_field] = frame_errors[idx] - sample[metric_field] = _safe_mean(frame_errors) + def compute(self, samples, results): + dataset = samples._dataset + eval_key = results.key + is_frame_field = samples._is_frame_field(results.config.gt_field) + + ytrue = results.ytrue + ypred = results.ypred + missing = results.missing + + metric_field = f"{eval_key}_absolute_error" + compute_error = _make_compute_error_fcn(_absolute_error, missing) + + if is_frame_field: + # Split values back into frames + frame_counts = samples.values(F("frames").length()) + _ytrue = _unflatten(ytrue, frame_counts) + _ypred = _unflatten(ypred, frame_counts) + + frame_errors = [ + list(map(compute_error, _yp, _yt)) + for _yp, _yt in zip(_ypred, _ytrue) + ] + sample_errors = [_safe_mean(e) for e in frame_errors] + + errors = list(itertools.chain.from_iterable(frame_errors)) + + # Per-frame errors + _metric_field = samples._FRAMES_PREFIX + metric_field + samples.set_values(_metric_field, frame_errors) + + # Per-sample errors + samples.set_values(metric_field, sample_errors) else: - sample[metric_field] = _abs_error(ypred, ytrue) - - def compute(self, samples, eval_key, results): - ypred, ytrue = results.ypred, results.ytrue - start_idx = 0 - for sample in samples.iter_samples(autosave=True): - num_frames = ( - len(sample._frames) if sample.media_type == "video" else 1 - ) - self.compute_by_sample( - sample, - eval_key, - ytrue=ytrue[start_idx : start_idx + num_frames], - ypred=ypred[start_idx : start_idx + num_frames], - ) - start_idx += num_frames - - def get_fields(self, samples, eval_key): - metric_field = f"{eval_key}_{self.config.name}" - expected_fields = [metric_field, samples._FRAMES_PREFIX + metric_field] - return list(filter(samples.has_field, expected_fields)) + # Per-sample errors + errors = list(map(compute_error, ypred, ytrue)) + samples.set_values(metric_field, errors) + + return _safe_mean(errors) + + def get_fields(self, samples, config, eval_key): + metric_field = f"{eval_key}_absolute_error" + fields = [metric_field] + if samples._is_frame_field(config.gt_field): + fields.append(samples._FRAMES_PREFIX + metric_field) -class MeanAbsoluteErrorMetric(EvaluationMetric): + return fields + + +class MeanSquaredErrorMetric(foo.EvaluationMetric): @property def config(self): - return foo.OperatorConfig( - name="mean_absolute_error", - label="Mean Absolute Error Metric", - description="A metric for computing mean absolute error across all frames or samples.", - metric_tags=["regression"], + return foo.EvaluationMetricConfig( + name="mean_squared_error", + label="Mean Squared Error", + description="Computes the mean squared error of the regression data", + eval_types=["regression"], lower_is_better=True, ) - def get_parameters(self, ctx, inputs): - eval_key = ctx.params.get("eval_key", None) - inputs.str( - "error_eval_key", - label="Sample/Frame error eval key parameter", - description="Sample/Frame error eval key to use for computing Mean Absolute Error", - default=f"{eval_key}_absolute_error", - required=True, - ) - - def compute(self, samples, eval_key, results, error_eval_key): + def compute(self, samples, results): dataset = samples._dataset - - if dataset.has_field(dataset._FRAMES_PREFIX + error_eval_key): - # Compute MAE across all frames. - values = dataset.values(dataset._FRAMES_PREFIX + error_eval_key) - values = list(itertools.chain.from_iterable(values)) - elif dataset.has_field(error_eval_key): - # Compute MAE across all samples. - values = dataset.values(error_eval_key) + eval_key = results.key + is_frame_field = samples._is_frame_field(results.config.gt_field) + + ytrue = results.ytrue + ypred = results.ypred + missing = results.missing + + metric_field = f"{eval_key}_squared_error" + compute_error = _make_compute_error_fcn(_squared_error, missing) + + if is_frame_field: + # Split values back into frames + frame_counts = samples.values(F("frames").length()) + _ytrue = _unflatten(ytrue, frame_counts) + _ypred = _unflatten(ypred, frame_counts) + + # Per-frame errors + frame_errors = [ + list(map(compute_error, _yp, _yt)) + for _yp, _yt in zip(_ypred, _ytrue) + ] + errors = list(itertools.chain.from_iterable(frame_errors)) + _metric_field = samples._FRAMES_PREFIX + metric_field + samples.set_values(_metric_field, frame_errors) + + # Per-sample mean errors + sample_errors = [_safe_mean(e) for e in frame_errors] + samples.set_values(metric_field, sample_errors) else: + # Per-sample errors + errors = list(map(compute_error, ypred, ytrue)) + samples.set_values(metric_field, errors) + + return _safe_mean(errors) + + def get_fields(self, samples, config, eval_key): + metric_field = f"{eval_key}_squared_error" + + fields = [metric_field] + if samples._is_frame_field(config.gt_field): + fields.append(samples._FRAMES_PREFIX + metric_field) + + return fields + + +def _unflatten(values, counts): + _values = iter(values) + return [list(itertools.islice(_values, n)) for n in counts] + + +def _make_compute_error_fcn(error_fcn, missing): + def compute_error(yp, yt): + if missing is not None: + if yp is None: + yp = missing + + if yt is None: + yt = missing + + try: + return error_fcn(yp, yt) + except: return None - return np.average(values).tolist() + return compute_error + + +def _absolute_error(ypred, ytrue): + return np.abs(ypred - ytrue) + + +def _squared_error(ypred, ytrue): + return np.square(ypred - ytrue) + + +def _safe_mean(values): + values = [v for v in values if v is not None] + return np.mean(values) if values else None def register(p): p.register(ExampleMetric) - p.register(AbsoluteErrorMetric) p.register(MeanAbsoluteErrorMetric) + p.register(MeanSquaredErrorMetric) diff --git a/plugins/metric-examples/fiftyone.yml b/plugins/metric-examples/fiftyone.yml index d11e64d8..d33e3046 100644 --- a/plugins/metric-examples/fiftyone.yml +++ b/plugins/metric-examples/fiftyone.yml @@ -1,5 +1,5 @@ name: "@voxel51/metric-examples" -description: Example metrics +description: Example evaluation metrics version: 1.0.0 fiftyone: version: ">=1.3.0" @@ -7,5 +7,5 @@ url: https://github.com/voxel51/fiftyone-plugins/tree/main/plugins/metric-exampl license: Apache 2.0 panels: - example_metric - - absolute_error - mean_absolute_error + - mean_squared_error