From 8f1e6c48456b860893830c3e1ccdef99c1348a61 Mon Sep 17 00:00:00 2001 From: Benjamin Kane Date: Thu, 21 Nov 2024 16:20:52 -0500 Subject: [PATCH 1/3] add slice filter to qp sidebar --- .../__generated__/lightningQuery.graphql.ts | 3 +- .../state/src/recoil/queryPerformance.ts | 3 ++ app/schema.graphql | 1 + fiftyone/server/lightning.py | 30 ++++++++++++++----- 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/app/packages/relay/src/queries/__generated__/lightningQuery.graphql.ts b/app/packages/relay/src/queries/__generated__/lightningQuery.graphql.ts index 4749ee6299..e4510efa61 100644 --- a/app/packages/relay/src/queries/__generated__/lightningQuery.graphql.ts +++ b/app/packages/relay/src/queries/__generated__/lightningQuery.graphql.ts @@ -1,5 +1,5 @@ /** - * @generated SignedSource<> + * @generated SignedSource<> * @lightSyntaxTransform * @nogrep */ @@ -12,6 +12,7 @@ import { ConcreteRequest, Query } from 'relay-runtime'; export type LightningInput = { dataset: string; paths: ReadonlyArray; + slice?: string | null; }; export type LightningPathInput = { exclude?: ReadonlyArray | null; diff --git a/app/packages/state/src/recoil/queryPerformance.ts b/app/packages/state/src/recoil/queryPerformance.ts index 8392074e29..ed1f736f98 100644 --- a/app/packages/state/src/recoil/queryPerformance.ts +++ b/app/packages/state/src/recoil/queryPerformance.ts @@ -11,6 +11,7 @@ import { graphQLSelectorFamily } from "recoil-relay"; import type { ResponseFrom } from "../utils"; import { config } from "./config"; import { getBrowserStorageEffectForKey } from "./customEffects"; +import { groupSlice, groupStatistics } from "./groups"; import { isLabelPath } from "./labels"; import { RelayEnvironmentKey } from "./relay"; import * as schemaAtoms from "./schema"; @@ -34,6 +35,8 @@ export const lightningQuery = graphQLSelectorFamily< input: { dataset: get(datasetName), paths, + slice: + get(groupStatistics(false)) === "group" ? null : get(groupSlice), }, }; }, diff --git a/app/schema.graphql b/app/schema.graphql index 2b843f3a7a..f959dd13d2 100644 --- a/app/schema.graphql +++ b/app/schema.graphql @@ -444,6 +444,7 @@ input LabelTagColorInput { input LightningInput { dataset: String! paths: [LightningPathInput!]! + slice: String = null } input LightningPathInput { diff --git a/fiftyone/server/lightning.py b/fiftyone/server/lightning.py index 2b1d22df3d..5185a281ea 100644 --- a/fiftyone/server/lightning.py +++ b/fiftyone/server/lightning.py @@ -9,7 +9,6 @@ from bson import ObjectId from dataclasses import asdict, dataclass from datetime import date, datetime -import math import typing as t import asyncio @@ -46,6 +45,7 @@ class LightningPathInput: class LightningInput: dataset: str paths: t.List[LightningPathInput] + slice: t.Optional[str] = None @gql.interface @@ -138,7 +138,13 @@ async def lightning_resolver( for collection, sublist in zip(collections, queries) for item in sublist ] - result = await _do_async_pooled_queries(dataset, flattened) + + filter = ( + {f"{dataset.group_field}.name": input.slice} + if dataset.group_field and input.slice + else None + ) + result = await _do_async_pooled_queries(dataset, flattened, filter) results = [] offset = 0 @@ -293,10 +299,11 @@ async def _do_async_pooled_queries( queries: t.List[ t.Tuple[AsyncIOMotorCollection, t.Union[DistinctQuery, t.List[t.Dict]]] ], + filter: t.Optional[t.Mapping[str, str]], ): return await asyncio.gather( *[ - _do_async_query(dataset, collection, query) + _do_async_query(dataset, collection, query, filter) for collection, query in queries ] ) @@ -306,25 +313,28 @@ async def _do_async_query( dataset: fo.Dataset, collection: AsyncIOMotorCollection, query: t.Union[DistinctQuery, t.List[t.Dict]], + filter: t.Optional[t.Mapping[str, str]], ): if isinstance(query, DistinctQuery): if query.has_list and not query.filters: - return await _do_distinct_query(collection, query) + return await _do_distinct_query(collection, query, filter) - return await _do_distinct_pipeline(dataset, collection, query) + return await _do_distinct_pipeline(dataset, collection, filter) return [i async for i in collection.aggregate(query)] async def _do_distinct_query( - collection: AsyncIOMotorCollection, query: DistinctQuery + collection: AsyncIOMotorCollection, + query: DistinctQuery, + filter: t.Optional[t.Mapping[str, str]], ): match = None if query.search: match = query.search try: - result = await collection.distinct(query.path) + result = await collection.distinct(query.path, filter) except: # too many results return None @@ -350,12 +360,16 @@ async def _do_distinct_pipeline( dataset: fo.Dataset, collection: AsyncIOMotorCollection, query: DistinctQuery, + filter: t.Optional[t.Mapping[str, str]], ): pipeline = [] if query.filters: pipeline += get_view(dataset, filters=query.filters)._pipeline() - pipeline += [{"$sort": {query.path: 1}}] + if filter: + pipeline.append({"$match": filter}) + + pipeline.append({"$sort": {query.path: 1}}) if query.search: if query.is_object_id_field: From 4fa08db9336576b04dc9f5e616d0660e21299c87 Mon Sep 17 00:00:00 2001 From: Benjamin Kane Date: Thu, 21 Nov 2024 20:49:36 -0500 Subject: [PATCH 2/3] cleanup, add slice tests to lightning --- .../state/src/recoil/queryPerformance.ts | 7 +- docs/source/user_guide/app.rst | 6 - fiftyone/server/lightning.py | 11 +- tests/unittests/lightning_tests.py | 119 ++++++++++++++++-- 4 files changed, 119 insertions(+), 24 deletions(-) diff --git a/app/packages/state/src/recoil/queryPerformance.ts b/app/packages/state/src/recoil/queryPerformance.ts index ed1f736f98..bc9e2227ec 100644 --- a/app/packages/state/src/recoil/queryPerformance.ts +++ b/app/packages/state/src/recoil/queryPerformance.ts @@ -11,7 +11,7 @@ import { graphQLSelectorFamily } from "recoil-relay"; import type { ResponseFrom } from "../utils"; import { config } from "./config"; import { getBrowserStorageEffectForKey } from "./customEffects"; -import { groupSlice, groupStatistics } from "./groups"; +import { groupSlice } from "./groups"; import { isLabelPath } from "./labels"; import { RelayEnvironmentKey } from "./relay"; import * as schemaAtoms from "./schema"; @@ -35,8 +35,7 @@ export const lightningQuery = graphQLSelectorFamily< input: { dataset: get(datasetName), paths, - slice: - get(groupStatistics(false)) === "group" ? null : get(groupSlice), + slice: get(groupSlice), }, }; }, @@ -86,6 +85,8 @@ const indexesByPath = selector({ const { sampleIndexes: samples, frameIndexes: frames } = get(indexes); + console.log(samples); + const schema = gatherPaths(State.SPACE.SAMPLE); const frameSchema = gatherPaths(State.SPACE.FRAME).map((p) => p.slice("frames.".length) diff --git a/docs/source/user_guide/app.rst b/docs/source/user_guide/app.rst index 769f3fecb6..40b9bf0f59 100644 --- a/docs/source/user_guide/app.rst +++ b/docs/source/user_guide/app.rst @@ -489,8 +489,6 @@ perform initial filters on: # Note: it is faster to declare indexes before adding samples dataset.add_samples(...) - fo.app_config.default_query_performance = True - session = fo.launch_app(dataset) .. note:: @@ -521,8 +519,6 @@ compound index that includes the group slice name: dataset.create_index("ground_truth.detections.label") dataset.create_index([("group.name", 1), ("ground_truth.detections.label", 1)]) - fo.app_config.default_query_performance = True - session = fo.launch_app(dataset) For datasets with a small number of fields, you can index all fields by adding @@ -538,8 +534,6 @@ a single dataset = foz.load_zoo_dataset("quickstart") dataset.create_index("$**") - fo.app_config.default_query_performance = True - session = fo.launch_app(dataset) .. warning:: diff --git a/fiftyone/server/lightning.py b/fiftyone/server/lightning.py index 5185a281ea..701588864d 100644 --- a/fiftyone/server/lightning.py +++ b/fiftyone/server/lightning.py @@ -319,7 +319,10 @@ async def _do_async_query( if query.has_list and not query.filters: return await _do_distinct_query(collection, query, filter) - return await _do_distinct_pipeline(dataset, collection, filter) + return await _do_distinct_pipeline(dataset, collection, query, filter) + + if filter: + query.insert(0, {"$match": filter}) return [i async for i in collection.aggregate(query)] @@ -363,12 +366,12 @@ async def _do_distinct_pipeline( filter: t.Optional[t.Mapping[str, str]], ): pipeline = [] - if query.filters: - pipeline += get_view(dataset, filters=query.filters)._pipeline() - if filter: pipeline.append({"$match": filter}) + if query.filters: + pipeline += get_view(dataset, filters=query.filters)._pipeline() + pipeline.append({"$sort": {query.path: 1}}) if query.search: diff --git a/tests/unittests/lightning_tests.py b/tests/unittests/lightning_tests.py index 319315f89b..b631e8cf08 100644 --- a/tests/unittests/lightning_tests.py +++ b/tests/unittests/lightning_tests.py @@ -1053,6 +1053,91 @@ async def test_strings(self, dataset: fo.Dataset): ) +class TestGroupDatasetLightningQueries(unittest.IsolatedAsyncioTestCase): + @drop_async_dataset + async def test_group_dataset(self, dataset: fo.Dataset): + group = fo.Group() + one = fo.Sample( + classifications=fo.Classifications( + classifications=[fo.Classification(label="one")] + ), + filepath="one.png", + group=group.element("one"), + numeric=1, + string="one", + ) + two = fo.Sample( + classifications=fo.Classifications( + classifications=[fo.Classification(label="two")] + ), + filepath="two.png", + group=group.element("two"), + numeric=2, + string="two", + ) + dataset.add_samples([one, two]) + + query = """ + query Query($input: LightningInput!) { + lightning(input: $input) { + ... on IntLightningResult { + path + min + max + } + ... on StringLightningResult { + path + values + } + } + } + """ + + # only query "one" slice samples + result = await _execute( + query, + dataset, + (fo.IntField, fo.StringField), + ["classifications.classifications.label", "numeric", "string"], + frames=False, + slice="one", + ) + + self.assertListEqual( + result.data["lightning"], + [ + { + "path": "classifications.classifications.label", + "values": ["one"], + }, + {"path": "numeric", "min": 1.0, "max": 1.0}, + {"path": "string", "values": ["one"]}, + ], + ) + + # only query "two" slice samples + result = await _execute( + query, + dataset, + (fo.IntField, fo.StringField), + ["classifications.classifications.label", "numeric", "string"], + frames=False, + slice="two", + ) + + self.assertListEqual( + result.data["lightning"], + [ + { + "path": "classifications.classifications.label", + "values": ["two"], + }, + {"path": "numeric", "min": 2.0, "max": 2.0}, + {"path": "string", "values": ["two"]}, + ], + ) + + def _add_samples(dataset: fo.Dataset, *sample_data: t.List[t.Dict]): samples = [] keys = set() @@ -1067,7 +1152,12 @@ def _add_samples(dataset: fo.Dataset, *sample_data: t.List[t.Dict]): async def _execute( - query: str, dataset: fo.Dataset, field: fo.Field, keys: t.Set[str] + query: str, + dataset: fo.Dataset, + field: fo.Field, + keys: t.Set[str], + frames=True, + slice: t.Optional[str] = None, ): return await execute( schema, @@ -1076,7 +1166,8 @@ async def _execute( "input": asdict( LightningInput( dataset=dataset.name, - paths=_get_paths(dataset, field, keys), + paths=_get_paths(dataset, field, keys, frames=frames), + slice=slice, ) ) }, @@ -1084,17 +1175,23 @@ async def _execute( def _get_paths( - dataset: fo.Dataset, field_type: t.Type[fo.Field], keys: t.Set[str] + dataset: fo.Dataset, + field_type: t.Type[fo.Field], + keys: t.Set[str], + frames=True, ): field_dict = dataset.get_field_schema(flat=True) - field_dict.update( - **{ - f"frames.{path}": field - for path, field in dataset.get_frame_field_schema( - flat=True - ).items() - } - ) + + if frames: + field_dict.update( + **{ + f"frames.{path}": field + for path, field in dataset.get_frame_field_schema( + flat=True + ).items() + } + ) + paths: t.List[LightningPathInput] = [] for path in sorted(field_dict): field = field_dict[path] From 6a393af171f4c308c5bd0b817f2e0e4b47e4d17b Mon Sep 17 00:00:00 2001 From: Benjamin Kane Date: Fri, 22 Nov 2024 10:38:43 -0500 Subject: [PATCH 3/3] rm log --- app/packages/state/src/recoil/queryPerformance.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/app/packages/state/src/recoil/queryPerformance.ts b/app/packages/state/src/recoil/queryPerformance.ts index bc9e2227ec..439f167889 100644 --- a/app/packages/state/src/recoil/queryPerformance.ts +++ b/app/packages/state/src/recoil/queryPerformance.ts @@ -85,8 +85,6 @@ const indexesByPath = selector({ const { sampleIndexes: samples, frameIndexes: frames } = get(indexes); - console.log(samples); - const schema = gatherPaths(State.SPACE.SAMPLE); const frameSchema = gatherPaths(State.SPACE.FRAME).map((p) => p.slice("frames.".length)