Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding min() and max() aggregations #5029

Merged
merged 2 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions docs/source/user_guide/using_aggregations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,46 @@ compute the sum of the (non-``None``) values of a field in a collection:
)
# 0.34994137249820706

.. _aggregations-min:

Min values
__________

You can use the
:meth:`min() <fiftyone.core.collections.SampleCollection.min>` aggregation to
compute the minimum of the (non-``None``) values of a field in a collection:

.. code-block:: python
:linenos:

import fiftyone.zoo as foz

dataset = foz.load_zoo_dataset("quickstart")

# Compute minimum confidence of detections in the `predictions` field
print(dataset.min("predictions.detections.confidence"))
# 0.05003104358911514

.. _aggregations-max:

Max values
__________

You can use the
:meth:`max() <fiftyone.core.collections.SampleCollection.max>` aggregation to
compute the maximum of the (non-``None``) values of a field in a collection:

.. code-block:: python
:linenos:

import fiftyone.zoo as foz

dataset = foz.load_zoo_dataset("quickstart")

# Compute maximum confidence of detections in the `predictions` field
print(dataset.max("predictions.detections.confidence"))
# 0.9999035596847534

.. _aggregations-mean:

Mean values
Expand Down
2 changes: 2 additions & 0 deletions fiftyone/__public__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
Distinct,
FacetAggregations,
HistogramValues,
Min,
Max,
Mean,
Quantiles,
Schema,
Expand Down
246 changes: 245 additions & 1 deletion fiftyone/core/aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ class Bounds(Aggregation):
print(bounds) # (min, max)

#
# Compute the a bounds of a numeric list field
# Compute the bounds of a numeric list field
#

aggregation = fo.Bounds("numeric_list_field")
Expand Down Expand Up @@ -1482,6 +1482,250 @@ def _parse_edges(self, edges):
return edges


class Min(Aggregation):
"""Computes the minimum of a numeric field of a collection.

``None``-valued fields are ignored.

This aggregation is typically applied to *numeric* or *date* field types
(or lists of such types):

- :class:`fiftyone.core.fields.IntField`
- :class:`fiftyone.core.fields.FloatField`
- :class:`fiftyone.core.fields.DateField`
- :class:`fiftyone.core.fields.DateTimeField`

Examples::

import fiftyone as fo
from fiftyone import ViewField as F

dataset = fo.Dataset()
dataset.add_samples(
[
fo.Sample(
filepath="/path/to/image1.png",
numeric_field=1.0,
numeric_list_field=[1, 2, 3],
),
fo.Sample(
filepath="/path/to/image2.png",
numeric_field=4.0,
numeric_list_field=[1, 2],
),
fo.Sample(
filepath="/path/to/image3.png",
numeric_field=None,
numeric_list_field=None,
),
]
)

#
# Compute the minimum of a numeric field
#

aggregation = fo.Min("numeric_field")
min = dataset.aggregate(aggregation)
print(min) # the min

#
# Compute the minimum of a numeric list field
#

aggregation = fo.Min("numeric_list_field")
min = dataset.aggregate(aggregation)
print(min) # the min

#
# Compute the minimum of a transformation of a numeric field
#

aggregation = fo.Min(2 * (F("numeric_field") + 1))
min = dataset.aggregate(aggregation)
print(min) # the min

Args:
field_or_expr: a field name, ``embedded.field.name``,
:class:`fiftyone.core.expressions.ViewExpression`, or
`MongoDB expression <https://docs.mongodb.com/manual/meta/aggregation-quick-reference/#aggregation-expressions>`_
defining the field or expression to aggregate
expr (None): a :class:`fiftyone.core.expressions.ViewExpression` or
`MongoDB expression <https://docs.mongodb.com/manual/meta/aggregation-quick-reference/#aggregation-expressions>`_
to apply to ``field_or_expr`` (which must be a field) before
aggregating
safe (False): whether to ignore nan/inf values when dealing with
floating point values
"""

def default_result(self):
"""Returns the default result for this aggregation.

Returns:
``None``
"""
return None

def parse_result(self, d):
"""Parses the output of :meth:`to_mongo`.

Args:
d: the result dict

Returns:
the minimum value
"""
value = d["min"]

if self._field_type is not None:
p = self._field_type.to_python
value = p(value)

return value

def to_mongo(self, sample_collection, context=None):
path, pipeline, _, id_to_str, field_type = _parse_field_and_expr(
sample_collection,
self._field_name,
expr=self._expr,
safe=self._safe,
context=context,
)

self._field_type = field_type

if id_to_str:
value = {"$toString": "$" + path}
else:
value = "$" + path

pipeline.append({"$group": {"_id": None, "min": {"$min": value}}})

return pipeline


class Max(Aggregation):
"""Computes the maximum of a numeric field of a collection.

``None``-valued fields are ignored.

This aggregation is typically applied to *numeric* or *date* field types
(or lists of such types):

- :class:`fiftyone.core.fields.IntField`
- :class:`fiftyone.core.fields.FloatField`
- :class:`fiftyone.core.fields.DateField`
- :class:`fiftyone.core.fields.DateTimeField`

Examples::

import fiftyone as fo
from fiftyone import ViewField as F

dataset = fo.Dataset()
dataset.add_samples(
[
fo.Sample(
filepath="/path/to/image1.png",
numeric_field=1.0,
numeric_list_field=[1, 2, 3],
),
fo.Sample(
filepath="/path/to/image2.png",
numeric_field=4.0,
numeric_list_field=[1, 2],
),
fo.Sample(
filepath="/path/to/image3.png",
numeric_field=None,
numeric_list_field=None,
),
]
)

#
# Compute the maximum of a numeric field
#

aggregation = fo.Max("numeric_field")
max = dataset.aggregate(aggregation)
print(max) # the max

#
# Compute the maximum of a numeric list field
#

aggregation = fo.Max("numeric_list_field")
max = dataset.aggregate(aggregation)
print(max) # the max

#
# Compute the maximum of a transformation of a numeric field
#

aggregation = fo.Max(2 * (F("numeric_field") + 1))
max = dataset.aggregate(aggregation)
print(max) # the max

Args:
field_or_expr: a field name, ``embedded.field.name``,
:class:`fiftyone.core.expressions.ViewExpression`, or
`MongoDB expression <https://docs.mongodb.com/manual/meta/aggregation-quick-reference/#aggregation-expressions>`_
defining the field or expression to aggregate
expr (None): a :class:`fiftyone.core.expressions.ViewExpression` or
`MongoDB expression <https://docs.mongodb.com/manual/meta/aggregation-quick-reference/#aggregation-expressions>`_
to apply to ``field_or_expr`` (which must be a field) before
aggregating
safe (False): whether to ignore nan/inf values when dealing with
floating point values
"""

def default_result(self):
"""Returns the default result for this aggregation.

Returns:
``None``
"""
return None

def parse_result(self, d):
"""Parses the output of :meth:`to_mongo`.

Args:
d: the result dict

Returns:
the maximum value
"""
value = d["max"]

if self._field_type is not None:
p = self._field_type.to_python
value = p(value)

return value

def to_mongo(self, sample_collection, context=None):
path, pipeline, _, id_to_str, field_type = _parse_field_and_expr(
sample_collection,
self._field_name,
expr=self._expr,
safe=self._safe,
context=context,
)

self._field_type = field_type

if id_to_str:
value = {"$toString": "$" + path}
else:
value = "$" + path

pipeline.append({"$group": {"_id": None, "max": {"$max": value}}})

return pipeline


class Mean(Aggregation):
"""Computes the arithmetic mean of the field values of a collection.

Expand Down
Loading
Loading