diff --git a/packages/gatsby/src/redux/nodes.ts b/packages/gatsby/src/redux/nodes.ts index 03c518d824304..5d952efe242ea 100644 --- a/packages/gatsby/src/redux/nodes.ts +++ b/packages/gatsby/src/redux/nodes.ts @@ -3,9 +3,27 @@ import { IGatsbyNode } from "./types" import { createPageDependency } from "./actions/add-page-dependency" import { IDbQueryElemMatch } from "../db/common/query" +// Only list supported ops here. "CacheableFilterOp" +type FilterOp = "$eq" | "$lte" +// Note: `undefined` is an encoding for a property that does not exist +type FilterValueNullable = string | number | boolean | null | undefined +// This is filter value in most cases +type FilterValue = string | number | boolean export type FilterCacheKey = string -export type FilterCache = Map> -export type FiltersCache = Map +export interface IFilterCache { + op: FilterOp + // In this set, `undefined` values represent nodes that did not have the path + byValue: Map> + meta: { + // Ordered set of all values found by this filter. No null / undefs. + valuesAsc?: Array + // Flat set of nodes, ordered by valueAsc, but not ordered per value group + nodesByValueAsc?: Array + // Ranges of nodes per value, maps to the nodesByValueAsc array + valueRanges?: Map + } +} +export type FiltersCache = Map /** * Get all nodes from redux store. @@ -152,24 +170,68 @@ export const addResolvedNodes = ( return resolvedNodes } +export const postIndexingMetaSetup = (filterCache: IFilterCache): void => { + // Create an ordered array of individual nodes, ordered (grouped) by the + // value to which the filter resolves. Nodes are not ordered per value. + // This way non-eq ops can simply slice the array to get a range. + + const entriesNullable: Array<[FilterValueNullable, Set]> = [ + ...filterCache.byValue.entries(), + ] + + // These range checks never return `null` or `undefined` so filter those out + // By filtering them out early, the sort should be faster. Could be ... + const entries: Array<[ + FilterValue, + Set + ]> = entriesNullable.filter(([v]) => v != null) as Array< + [FilterValue, Set] + > + + // Sort all sets by its value, asc. Ignore/allow potential type casting. + entries.sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0)) + + const orderedNodes: Array = [] + const orderedValues: Array = [] + const offsets: Map = new Map() + entries.forEach(([v, bucket]: [FilterValue, Set]) => { + // Record the range containing all nodes with as filter value v + // The last value of the range should be the offset of the next value + // (So you should be able to do `nodes.slice(start, stop)` to get them) + offsets.set(v, [orderedNodes.length, orderedNodes.length + bucket.size]) + // We could do `arr.push(...bucket)` here but that's not safe with very + // large sets, so we use a regular loop + bucket.forEach(node => orderedNodes.push(node)) + orderedValues.push(v) + }) + + filterCache.meta.valuesAsc = orderedValues + filterCache.meta.nodesByValueAsc = orderedNodes + // The nodesByValueAsc is ordered by value, but multiple nodes per value are + // not ordered. To make lt as fast as lte, we must know the start and stop + // index for each value. Similarly useful for for `ne`. + filterCache.meta.valueRanges = offsets +} + /** - * Given a ("flat") filter path leading up to "eq", a set of node types, and a + * Given a single non-elemMatch filter path, a set of node types, and a * cache, create a cache that for each resulting value of the filter contains - * all the Nodes in a Set (or, if the property is `id`, just the Nodes). + * all the Nodes in a Set. * This cache is used for applying the filter and is a massive improvement over - * looping over all the nodes, when the number of pages (/nodes) scale up. + * looping over all the nodes, when the number of pages (/nodes) scales up. */ -export const ensureIndexByTypedChain = ( - cacheKey: FilterCacheKey, - chain: string[], +export const ensureIndexByQuery = ( + op: FilterOp, + filterCacheKey: FilterCacheKey, + filterPath: string[], nodeTypeNames: string[], filtersCache: FiltersCache ): void => { const state = store.getState() const resolvedNodesCache = state.resolvedNodesCache - const filterCache: FilterCache = new Map() - filtersCache.set(cacheKey, filterCache) + const filterCache: IFilterCache = { op, byValue: new Map(), meta: {} } + filtersCache.set(filterCacheKey, filterCache) // We cache the subsets of nodes by type, but only one type. So if searching // through one node type we can prevent a search through all nodes, otherwise @@ -177,7 +239,7 @@ export const ensureIndexByTypedChain = ( if (nodeTypeNames.length === 1) { getNodesByType(nodeTypeNames[0]).forEach(node => { - addNodeToFilterCache(node, chain, filterCache, resolvedNodesCache) + addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache) }) } else { // Here we must first filter for the node type @@ -187,15 +249,19 @@ export const ensureIndexByTypedChain = ( return } - addNodeToFilterCache(node, chain, filterCache, resolvedNodesCache) + addNodeToFilterCache(node, filterPath, filterCache, resolvedNodesCache) }) } + + if (op === `$lte`) { + postIndexingMetaSetup(filterCache) + } } function addNodeToFilterCache( node: IGatsbyNode, chain: Array, - filterCache: FilterCache, + filterCache: IFilterCache, resolvedNodesCache, valueOffset: any = node ): void { @@ -209,35 +275,44 @@ function addNodeToFilterCache( // - for plain query, valueOffset === node // - for elemMatch, valueOffset is sub-tree of the node to continue matching let v = valueOffset as any + let prev = v let i = 0 while (i < chain.length && v) { const nextProp = chain[i++] + prev = v v = v[nextProp] } if ( (typeof v !== `string` && typeof v !== `number` && - typeof v !== `boolean`) || + typeof v !== `boolean` && + v !== null) || i !== chain.length ) { - // Not sure whether this is supposed to happen, but this means that either - // - The node chain ended with `undefined`, or - // - The node chain ended in something other than a primitive, or - // - A part in the chain in the object was not an object - return + if (chain[i - 1] in prev) { + // This means that either + // - The filter resolved to `undefined`, or + // - The filter resolved to something other than a primitive + return + } + // The filter path did not fully exist in node. Encode this as `undefined`. + // The edge case is that `eq` will return these for `null` checks while + // range checks like `lte` do not return these, so we make a distinction. + v = undefined } - let set = filterCache.get(v) + let set = filterCache.byValue.get(v) if (!set) { set = new Set() - filterCache.set(v, set) + filterCache.byValue.set(v, set) } set.add(node) } export const ensureIndexByElemMatch = ( - cacheKey: FilterCacheKey, + op: FilterOp, + filterCacheKey: FilterCacheKey, filter: IDbQueryElemMatch, nodeTypeNames: Array, filtersCache: FiltersCache @@ -248,8 +323,8 @@ export const ensureIndexByElemMatch = ( const state = store.getState() const { resolvedNodesCache } = state - const filterCache: FilterCache = new Map() - filtersCache.set(cacheKey, filterCache) + const filterCache: IFilterCache = { op, byValue: new Map(), meta: {} } + filtersCache.set(filterCacheKey, filterCache) if (nodeTypeNames.length === 1) { getNodesByType(nodeTypeNames[0]).forEach(node => { @@ -277,13 +352,17 @@ export const ensureIndexByElemMatch = ( ) }) } + + if (op === `$lte`) { + postIndexingMetaSetup(filterCache) + } } function addNodeToBucketWithElemMatch( node: IGatsbyNode, valueAtCurrentStep: any, // Arbitrary step on the path inside the node filter: IDbQueryElemMatch, - filterCache: FilterCache, + filterCache: IFilterCache, resolvedNodesCache ): void { // There can be a filter that targets `__gatsby_resolved` so fix that first @@ -337,24 +416,127 @@ function addNodeToBucketWithElemMatch( } } +const binarySearch = ( + values: Array, + needle: FilterValue +): [number, number] | undefined => { + let min = 0 + let max = values.length - 1 + let pivot = Math.floor(values.length / 2) + while (min <= max) { + const value = values[pivot] + if (needle < value) { + // Move pivot to middle of nodes left of current pivot + // assert pivot < max + max = pivot + } else if (needle > value) { + // Move pivot to middle of nodes right of current pivot + // assert pivot > min + min = pivot + } else { + // This means needle === value + // TODO: except for NaN ... and potentially certain type casting cases + return [pivot, pivot] + } + + if (max - min <= 1) { + // End of search. Needle not found (as expected). Use pivot as index. + // If the needle was not found, max-min==1 and max is returned. + return [min, max] + } + + pivot = Math.floor((max - min) / 2) + } + + // Shouldn't be reachable, but just in case, fall back to Sift if so. + return undefined +} + /** - * Given a ("flat") filter path leading up to "eq", a target value to filter - * for, a set of node types, and a pre-generated lookup cache, return the set - * of Nodes (or, if the property is `id` just the Node) which pass the filter. - * This returns `undefined` if there is Node that passes the filter. + * Given the cache key for a filter and a target value return the set of nodes + * that resolve to this value. + * This returns `undefined` if there is no such node * * Basically if the filter was {a: {b: {slug: {eq: "foo/bar"}}}} then it will * return all the nodes that have `node.slug === "foo/bar"`. That usually (but * not always) at most one node for slug, but this filter can apply to anything. - * - * The only exception is `id`, since internally there can be at most one node - * per `id` so there's a minor optimization for that (no need for Sets). */ -export const getFilterCacheByTypedChain = ( - cacheKey: FilterCacheKey, - value: boolean | number | string, +export const getNodesFromCacheByValue = ( + filterCacheKey: FilterCacheKey, + filterValue: FilterValueNullable, filtersCache: FiltersCache ): Set | undefined => { - const byTypedKey = filtersCache?.get(cacheKey) - return byTypedKey?.get(value) + const filterCache = filtersCache?.get(filterCacheKey) + if (!filterCache) { + return undefined + } + + const op = filterCache.op + + if (op === `$eq`) { + if (filterValue == null) { + // Edge case; fetch all nodes for `null` and `undefined` because `$eq` + // also returns nodes without the path when searching for `null`. Not + // ops do so, so we map non-existing paths to `undefined`. + return new Set([ + ...(filterCache.byValue.get(null) ?? []), + ...(filterCache.byValue.get(undefined) ?? []), + ]) + } + return filterCache.byValue.get(filterValue) + } + + if (op === `$lte`) { + // First try a direct approach. If a value is queried that also exists then + // we can prevent a binary search through the whole set, O(1) vs O(log n) + + if (filterValue == null) { + // This is an edge case and this value should be directly indexed + // For `lte` this should only return nodes for `null`, not a "range" + return filterCache.byValue.get(filterValue) + } + + const ranges = filterCache.meta.valueRanges + const nodes = filterCache.meta.nodesByValueAsc + + const range = ranges!.get(filterValue) + if (range) { + return new Set(nodes!.slice(0, range[1])) + } + + // Query may ask for a value that doesn't appear in the set, like if the + // set is [1, 2, 5, 6] and the query is <= 3. In that case we have to + // apply a search (we'll do binary) to determine the offset to slice from. + + // Note: for lte, the valueAsc array must be set at this point + const values = filterCache.meta.valuesAsc as Array + // It shouldn't find the targetValue (but it might) and return the index of + // the two value between which targetValue sits, or first/last element. + const point = binarySearch(values, filterValue) + if (!point) { + return undefined + } + const [pivotMin, pivotMax] = point + // Each pivot index must have a value and a range + // The returned min/max index may include the lower/upper bound, so we still + // have to do lte checks for both values. + let pivotValue = values[pivotMax] + if (pivotValue > filterValue) { + pivotValue = values[pivotMin] + } + + // Note: the pivot value _shouldnt_ match the filter value because that + // means the value was actually found, but those should have been indexed + // so should have yielded a result in the .get() above. + + const [exclPivot, inclPivot] = ranges!.get(pivotValue) as [number, number] + + // Note: technically, `5 <= "5" === true` but `5` would not be cached. + // So we have to consider weak comparison and may have to include the pivot + const until = pivotValue <= filterValue ? inclPivot : exclPivot + return new Set(nodes!.slice(0, until)) + } + + // Unreachable because we checked all values of FilterOp (which op is) + return undefined } diff --git a/packages/gatsby/src/redux/run-sift.js b/packages/gatsby/src/redux/run-sift.js index bd511eaf7b2cd..0f3f1140ce963 100644 --- a/packages/gatsby/src/redux/run-sift.js +++ b/packages/gatsby/src/redux/run-sift.js @@ -14,13 +14,21 @@ const { dbQueryToSiftQuery, } = require(`../db/common/query`) const { - ensureIndexByTypedChain, + ensureIndexByQuery, ensureIndexByElemMatch, - getFilterCacheByTypedChain, + getNodesFromCacheByValue, addResolvedNodes, getNode: siftGetNode, } = require(`./nodes`) +const FAST_OPS = [ + `$eq`, + // "$lt", + `$lte`, + // "$gt", + // "$gte" +] + /** * Creates a key for one filterCache inside FiltersCache * @@ -28,7 +36,7 @@ const { * @param {DbQuery} filter * @returns {FilterCacheKey} (a string: `types.join()/path.join()/operator` ) */ -const createTypedFilterCacheKey = (typeNames, filter) => { +const createFilterCacheKey = (typeNames, filter) => { // Note: while `elemMatch` is a special case, in the key it's just `elemMatch` // (This function is future proof for elemMatch support, won't receive it yet) let f = filter @@ -129,13 +137,13 @@ function handleMany(siftArgs, nodes) { } /** - * Given the chain of a simple filter, return the set of nodes that pass the - * filter. The chain should be a property chain leading to the property to - * check, followed by the value to check against. Common example: - * `allThings(filter: { fields: { slug: { eq: $slug } } })` + * Given the path of a set of filters, return the sets of nodes that pass the + * filter. * Only nodes of given node types will be considered * A fast index is created if one doesn't exist yet so cold call is slower. - * The empty result value is null if firstOnly is false, or else an empty array. + * Returns undefined if an op was not supported for fast indexes or when no + * nodes were found for given (query) value. In the zero nodes case, we have to + * go through Sift to make sure we're not missing an edge case, for now. * * @param {Array} filters Resolved. (Should be checked by caller to exist) * @param {Array} nodeTypeNames @@ -143,23 +151,33 @@ function handleMany(siftArgs, nodes) { * @returns {Array | undefined} */ const runFiltersWithoutSift = (filters, nodeTypeNames, filtersCache) => { - const caches = getBucketsForFilters(filters, nodeTypeNames, filtersCache) + const nodesPerValueSets /*: Array> */ = getBucketsForFilters( + filters, + nodeTypeNames, + filtersCache + ) - if (!caches) { + if (!nodesPerValueSets) { // Let Sift take over as fallback return undefined } // Put smallest last (we'll pop it) - caches.sort((a, b) => b.length - a.length) + nodesPerValueSets.sort( + (a /*: Set */, b /*: Set */) => b.size - a.size + ) // Iterate on the set with the fewest elements and create the intersection - const needles = caches.pop() + const needles /*: Set*/ = nodesPerValueSets.pop() // Take the intersection of the retrieved caches-by-value - const result = [] + const result /*: Array */ = [] // This _can_ still be expensive but the set of nodes should be limited ... - needles.forEach(node => { - if (caches.every(cache => cache.has(node))) { + needles.forEach((node /*: IGatsbyNode */) => { + if ( + nodesPerValueSets.every((cache /*: Set */) => + cache.has(node) + ) + ) { // Every cache set contained this node so keep it result.push(node) } @@ -169,6 +187,9 @@ const runFiltersWithoutSift = (filters, nodeTypeNames, filtersCache) => { // Consider the case of {a: {eq: 5}, b: {eq: 10}}, do we cache the [5,10] // case for all value pairs? How likely is that to ever be reused? + if (result.length === 0) { + return undefined + } return result } @@ -180,36 +201,36 @@ const runFiltersWithoutSift = (filters, nodeTypeNames, filtersCache) => { * cache was not found. Must fallback to sift. */ const getBucketsForFilters = (filters, nodeTypeNames, filtersCache) => { - const filterCaches /*: Array*/ = [] + const nodesPerValueSets /*: Array>*/ = [] // Fail fast while trying to create and get the value-cache for each path let every = filters.every((filter /*: DbQuery*/) => { - let cacheKey = createTypedFilterCacheKey(nodeTypeNames, filter) + let filterCacheKey = createFilterCacheKey(nodeTypeNames, filter) if (filter.type === `query`) { // (Let TS warn us if a new query type gets added) const q /*: IDbQueryQuery */ = filter return getBucketsForQueryFilter( - cacheKey, + filterCacheKey, q, nodeTypeNames, filtersCache, - filterCaches + nodesPerValueSets ) } else { // (Let TS warn us if a new query type gets added) const q /*: IDbQueryElemMatch*/ = filter return collectBucketForElemMatch( - cacheKey, + filterCacheKey, q, nodeTypeNames, filtersCache, - filterCaches + nodesPerValueSets ) } }) if (every) { - return filterCaches + return nodesPerValueSets } // "failed at least one" @@ -219,62 +240,72 @@ const getBucketsForFilters = (filters, nodeTypeNames, filtersCache) => { /** * Fetch all buckets for given query filter. That means it's not elemMatch. * - * @param {FilterCacheKey} cacheKey + * @param {FilterCacheKey} filterCacheKey * @param {IDbQueryQuery} filter * @param {Array} nodeTypeNames * @param {FiltersCache} filtersCache - * @param {Array} filterCaches + * @param {Array>} nodesPerValueSets * @returns {boolean} false means soft fail, filter must go through Sift */ const getBucketsForQueryFilter = ( - cacheKey, + filterCacheKey, filter, nodeTypeNames, filtersCache, - filterCaches + nodesPerValueSets ) => { let { - path: chain, - query: { value: targetValue }, + path: filterPath, + query: { + // Note: comparator is verified to be a FilterOp in filterWithoutSift + comparator /*: as FilterOp*/, + value: filterValue, + }, } = filter - if (!filtersCache.has(cacheKey)) { - ensureIndexByTypedChain(cacheKey, chain, nodeTypeNames, filtersCache) + if (!filtersCache.has(filterCacheKey)) { + ensureIndexByQuery( + comparator, + filterCacheKey, + filterPath, + nodeTypeNames, + filtersCache + ) } - const filterCache = getFilterCacheByTypedChain( - cacheKey, - targetValue, + const nodesPerValue /*: Set | undefined */ = getNodesFromCacheByValue( + filterCacheKey, + filterValue, filtersCache ) // If we couldn't find the needle then maybe sift can, for example if the // schema contained a proxy; `slug: String @proxy(from: "slugInternal")` // There are also cases (and tests) where id exists with a different type - if (!filterCache) { + if (!nodesPerValue) { return false } // In all other cases this must be a non-empty Set because the indexing // mechanism does not create a Set unless there's a IGatsbyNode for it - filterCaches.push(filterCache) + nodesPerValueSets.push(nodesPerValue) return true } /** - * @param {string} typedKey + * @param {FilterCacheKey} filterCacheKey * @param {IDbQueryElemMatch} filter * @param {Array} nodeTypeNames * @param {FiltersCache} filtersCache - * @param {Array} filterCaches Matching node sets are put in this array + * @param {Array>} nodesPerValueSets Matching node sets are put in this array */ const collectBucketForElemMatch = ( - typedKey, + filterCacheKey, filter, nodeTypeNames, filtersCache, - filterCaches + nodesPerValueSets ) => { // Get comparator and target value for this elemMatch let comparator = `` @@ -292,22 +323,22 @@ const collectBucketForElemMatch = ( } } - if ( - ![ - `$eq`, - // "$lte", - // "$gte", - ].includes(comparator) - ) { + if (!FAST_OPS.includes(comparator)) { return false } - if (!filtersCache.has(typedKey)) { - ensureIndexByElemMatch(typedKey, filter, nodeTypeNames, filtersCache) + if (!filtersCache.has(filterCacheKey)) { + ensureIndexByElemMatch( + comparator, + filterCacheKey, + filter, + nodeTypeNames, + filtersCache + ) } - const nodesByKeyValue /*: Set | undefined*/ = getFilterCacheByTypedChain( - typedKey, + const nodesByValue /*: Set | undefined*/ = getNodesFromCacheByValue( + filterCacheKey, targetValue, filtersCache ) @@ -315,13 +346,13 @@ const collectBucketForElemMatch = ( // If we couldn't find the needle then maybe sift can, for example if the // schema contained a proxy; `slug: String @proxy(from: "slugInternal")` // There are also cases (and tests) where id exists with a different type - if (!nodesByKeyValue) { + if (!nodesByValue) { return false } // In all other cases this must be a non-empty Set because the indexing // mechanism does not create a Set unless there's a IGatsbyNode for it - filterCaches.push(nodesByKeyValue) + nodesPerValueSets.push(nodesByValue) return true } @@ -330,7 +361,6 @@ const collectBucketForElemMatch = ( * Filters and sorts a list of nodes using mongodb-like syntax. * * @param args raw graphql query filter/sort as an object - * @property {boolean | number | string} args.type gqlType. See build-node-types * @property {boolean} args.firstOnly true if you want to return only the first * result found. This will return a collection of size 1. Not a single element * @property {{filter?: Object, sort?: Object} | undefined} args.queryArgs @@ -444,9 +474,11 @@ const filterToStats = ( } /** - * Check if the filter is "flat" (single leaf) and an "$eq". If so, uses custom - * indexes based on filter and types and returns any result it finds. - * If conditions are not met or no nodes are found, returns undefined. + * Check if filter op is supported (not all are). If so, uses custom + * fast indexes based on filter and types and returns any result it finds. + * If conditions are not met or no nodes are found, returns undefined and + * a slow run through Sift is executed instead. + * This function is a noop if no filter cache is given to it. * * @param {Array} filters Resolved. (Should be checked by caller to exist) * @param {Array} nodeTypeNames @@ -454,8 +486,6 @@ const filterToStats = ( * @returns {Array | undefined} Collection of results */ const filterWithoutSift = (filters, nodeTypeNames, filtersCache) => { - // This can also be `$ne`, `$in` or any other grapqhl comparison op - if (!filtersCache) { // If no filter cache is passed on, explicitly don't use one return undefined @@ -463,17 +493,14 @@ const filterWithoutSift = (filters, nodeTypeNames, filtersCache) => { if (filters.length === 0) { // If no filters are given, go through Sift. This does not appear to be - // slower than s - // hortcutting it here. + // slower than shortcutting it here. return undefined } if ( filters.some( filter => - filter.type === `query` && // enabled - // filter.type === `elemMatch` || // disabled - ![`$eq`].includes(filter.query.comparator) + filter.type === `query` && !FAST_OPS.includes(filter.query.comparator) ) ) { // If there's a filter with non-supported op, stop now. diff --git a/packages/gatsby/src/schema/__tests__/node-model.js b/packages/gatsby/src/schema/__tests__/node-model.js index cb736bab5680f..3c4b10b4b86d9 100644 --- a/packages/gatsby/src/schema/__tests__/node-model.js +++ b/packages/gatsby/src/schema/__tests__/node-model.js @@ -296,7 +296,7 @@ describe(`NodeModel`, () => { filter: { frontmatter: { published: { eq: false } } }, } const firstOnly = true - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) const result = await nodeModel.runQuery({ query, firstOnly, @@ -311,7 +311,7 @@ describe(`NodeModel`, () => { filter: { frontmatter: { published: { eq: false } } }, } const firstOnly = false - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) const result = await nodeModel.runQuery({ query, firstOnly, @@ -328,7 +328,7 @@ describe(`NodeModel`, () => { filter: { frontmatter: { published: { eq: false } } }, } const firstOnly = false - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) await nodeModel.runQuery( { query, @@ -354,7 +354,7 @@ describe(`NodeModel`, () => { filter: { frontmatter: { published: { eq: false } } }, } const firstOnly = false - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) await nodeModel.withContext({ path: `/` }).runQuery({ query, firstOnly, @@ -377,7 +377,7 @@ describe(`NodeModel`, () => { filter: { frontmatter: { published: { eq: false } } }, } const firstOnly = false - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) await nodeModel.runQuery( { query, @@ -397,7 +397,7 @@ describe(`NodeModel`, () => { const type = `AllFiles` const query = {} const firstOnly = true - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) const result = nodeModel.runQuery({ query, firstOnly, @@ -412,7 +412,7 @@ describe(`NodeModel`, () => { const type = `TeamMember` const query = { name: { ne: null } } const firstOnly = true - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) const result = await nodeModel.runQuery({ query, firstOnly, @@ -429,7 +429,7 @@ describe(`NodeModel`, () => { }, } const firstOnly = false - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) const result = await nodeModel.runQuery({ query, firstOnly, @@ -448,7 +448,7 @@ describe(`NodeModel`, () => { }, } const firstOnly = true - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) const result = await nodeModel.runQuery({ query, firstOnly, @@ -555,7 +555,7 @@ describe(`NodeModel`, () => { { desc: `no cache`, cb: () => null }, // Always goes through sift ].forEach(({ desc, cb: createFiltersCache }) => { it(`[${desc}] should not resolve prepared nodes more than once`, async () => { - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) await nodeModel.runQuery( { query: { filter: { betterTitle: { eq: `foo` } } }, @@ -566,7 +566,7 @@ describe(`NodeModel`, () => { ) expect(resolveBetterTitleMock.mock.calls.length).toBe(2) expect(resolveOtherTitleMock.mock.calls.length).toBe(0) - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) await nodeModel.runQuery( { query: { filter: { betterTitle: { eq: `foo` } } }, @@ -577,7 +577,7 @@ describe(`NodeModel`, () => { ) expect(resolveBetterTitleMock.mock.calls.length).toBe(2) expect(resolveOtherTitleMock.mock.calls.length).toBe(0) - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) await nodeModel.runQuery( { query: { @@ -590,7 +590,7 @@ describe(`NodeModel`, () => { ) expect(resolveBetterTitleMock.mock.calls.length).toBe(2) expect(resolveOtherTitleMock.mock.calls.length).toBe(2) - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) await nodeModel.runQuery( { query: { @@ -603,7 +603,7 @@ describe(`NodeModel`, () => { ) expect(resolveBetterTitleMock.mock.calls.length).toBe(2) expect(resolveOtherTitleMock.mock.calls.length).toBe(2) - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) await nodeModel.runQuery( { query: { @@ -619,7 +619,7 @@ describe(`NodeModel`, () => { }) it(`[${desc}] can filter by resolved fields`, async () => { - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) const result = await nodeModel.runQuery( { query: { @@ -769,7 +769,7 @@ describe(`NodeModel`, () => { ].forEach(({ desc, cb: createFiltersCache }) => { describe(`[${desc}] Tracks nodes returned by queries`, () => { it(`Tracks objects when running query without filter`, async () => { - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) const result = await nodeModel.runQuery({ query: {}, type: schema.getType(`Test`), @@ -786,7 +786,7 @@ describe(`NodeModel`, () => { }) it(`Tracks objects when running query with filter`, async () => { - nodeModel.replaceTypeKeyValueCache(createFiltersCache()) + nodeModel.replaceFiltersCache(createFiltersCache()) const result = await nodeModel.runQuery({ query: { filter: { diff --git a/packages/gatsby/src/schema/__tests__/run-query.js b/packages/gatsby/src/schema/__tests__/run-query.js index cad1d109f06e7..9c2d373a94790 100644 --- a/packages/gatsby/src/schema/__tests__/run-query.js +++ b/packages/gatsby/src/schema/__tests__/run-query.js @@ -216,9 +216,11 @@ function resetDb(nodes) { ) } +let nodesAfterLastRunQuery async function runQuery(queryArgs, filtersCache) { const nodes = makeNodes() resetDb(nodes) + nodesAfterLastRunQuery = nodes const { sc, type: gqlType } = makeGqlType(nodes) const args = { gqlType, @@ -248,12 +250,17 @@ it(`should use the cache argument`, async () => { // Confirm cache is not ignored expect(filtersCache.size === 1).toBe(true) - filtersCache.forEach((filterCache, cacheKey) => { + filtersCache.forEach(( + filterCache /*: FilterCache */, + cacheKey /*: FilterCacheKey */ + ) => { // This test will change when the composition of the FilterCache changes // For now it should be a Map of values to Set of nodes - expect(filterCache instanceof Map).toBe(true) + expect(filterCache instanceof Object).toBe(true) + expect(filterCache.byValue instanceof Map).toBe(true) + expect(filterCache.meta instanceof Object).toBe(true) // There ought to be at least one value mapped (probably more, shrug) - expect(filterCache.size >= 1).toBe(true) + expect(filterCache.byValue.size >= 1).toBe(true) }) }) @@ -367,8 +374,7 @@ it(`should use the cache argument`, async () => { let result = await runFilter({ hair: { lt: 2 } }) expect(result.length).toEqual(2) - expect(result[0].hair).toEqual(1) - expect(result[1].hair).toEqual(0) + result.forEach(r => expect(r.hair <= 2).toBe(true)) }) it(`handles lt operator with null`, async () => { @@ -383,9 +389,68 @@ it(`should use the cache argument`, async () => { it(`handles lte operator with number`, async () => { let result = await runFilter({ hair: { lte: 1 } }) - expect(result.length).toEqual(2) - expect(result[0].hair).toEqual(1) - expect(result[1].hair).toEqual(0) + let actual = nodesAfterLastRunQuery.reduce( + (acc, node) => (node.hair <= 1 ? acc + 1 : acc), + 0 + ) + + expect(actual).not.toBe(0) // Test should keep this invariant! + expect(result.length).toEqual(actual) + result.forEach(r => expect(r.hair <= 1).toBe(true)) + }) + + it(`should lte when value is lower than all found values`, async () => { + if (IS_LOKI) return + + let result = await runFilter({ float: { lte: 1 } }) + + let actual = nodesAfterLastRunQuery.reduce( + (acc, node) => (node.float <= 1 ? acc + 1 : acc), + 0 + ) + + expect(actual).toEqual(0) // Make sure test nodes keep this invariant! + expect(result).toEqual(null) // Zero results yields null + }) + + it(`should lte when value is in the middle of all found values`, async () => { + let result = await runFilter({ float: { lte: 2 } }) + + let actual = nodesAfterLastRunQuery.reduce( + (acc, node) => (node.float <= 2 ? acc + 1 : acc), + 0 + ) + + expect(result.length).toEqual(actual) + result.forEach(r => expect(r.float <= 2).toBe(true)) + }) + + it(`should lte when value is higher than all found values`, async () => { + let result = await runFilter({ float: { lte: 5 } }) + + let actual = nodesAfterLastRunQuery.reduce( + (acc, node) => (node.float <= 5 ? acc + 1 : acc), + 0 + ) + + expect(result.length).toEqual(actual) + }) + + it.skip(`should lte when type coercion fails direct value lookup`, async () => { + // Here 1.5 exists but only as number. However, `1.5 <= '1.5' === true` + // This test checks whether we don't incorrectly assume that if the + // value wasn't mapped, that it can't be found. + let result = await runFilter({ float: { lte: `1.5` } }) + + let actual = nodesAfterLastRunQuery.reduce( + (acc, node) => (node.float <= 1.5 ? acc + 1 : acc), + 0 + ) + + expect(result).not.toBe(undefined) + expect(result).not.toBe(null) + expect(result.length).toEqual(actual) + result.forEach(r => expect(r.float <= 2).toBe(true)) }) it(`handles lte operator with null`, async () => { @@ -393,8 +458,14 @@ it(`should use the cache argument`, async () => { let result = await runFilter({ nil: { lte: null } }) + let actual = nodesAfterLastRunQuery.reduce( + (acc, node) => (node.nil <= null ? acc + 1 : acc), + 0 + ) + // lte null matches null but no nodes without the property (NULL) - expect(result.length).toEqual(1) + expect(actual).not.toBe(0) // Test should keep this invariant! + expect(result.length).toEqual(actual) expect(result[0].name).toEqual(`The Mad Wax`) expect(result[0].nil).toEqual(null) }) @@ -419,9 +490,14 @@ it(`should use the cache argument`, async () => { it(`handles gte operator with number`, async () => { let result = await runFilter({ hair: { gte: 1 } }) - expect(result.length).toEqual(2) - expect(result[0].hair).toEqual(1) - expect(result[1].hair).toEqual(2) + let actual = nodesAfterLastRunQuery.reduce( + (acc, node) => (node.hair >= 1 ? acc + 1 : acc), + 0 + ) + + expect(actual).not.toBe(0) // Test invariant should hold + expect(result.length).toEqual(actual) + result.forEach(r => expect(r.hair >= 1).toBe(true)) }) it(`handles gte operator with null`, async () => { @@ -429,10 +505,18 @@ it(`should use the cache argument`, async () => { let result = await runFilter({ nil: { gte: null } }) - // lte null matches null but no nodes without the property (NULL) - expect(result.length).toEqual(1) - expect(result[0].name).toEqual(`The Mad Wax`) - expect(result[0].nil).toEqual(null) + let actual = nodesAfterLastRunQuery.reduce( + (acc, node) => (node.nil >= null ? acc + 1 : acc), + 0 + ) + + // gte null matches null but no nodes without the property (NULL) + expect(actual).not.toBe(0) // Test invariant should hold + expect(result.length).toEqual(actual) + result.forEach( + // Note: confirm no `null` is returned for >= null + r => expect(r.nil === null).toBe(true) + ) }) it(`handles the regex operator without flags`, async () => { @@ -590,6 +674,9 @@ it(`should use the cache argument`, async () => { }) expect(result.length).toEqual(1) + expect( + result[0]?.singleElem?.things.some(e => e?.one?.two?.three === 123) + ).toEqual(true) }) it(`handles the elemMatch operator on the second element`, async () => { @@ -803,12 +890,14 @@ it(`should use the cache argument`, async () => { // nodes that do not have the field at all (NULL). expect(result.length).toEqual(2) - result.forEach(edge => { - // Either does not exist or does not contain - expect(edge.anArray === undefined || !edge.anArray.includes(5)).toBe( - true - ) - }) + // Either does not exist or does not contain + result + .filter(edge => edge.anArray !== undefined) + .forEach(edge => { + // In this test, if the property exists it should be an array + expect(Array.isArray(edge.anArray)).toBe(true) + expect(edge.anArray.includes(5)).toBe(false) + }) }) it(`handles the nin operator for array [null]`, async () => { diff --git a/packages/gatsby/src/schema/node-model.js b/packages/gatsby/src/schema/node-model.js index a7c7fd8853afd..638378b0719b2 100644 --- a/packages/gatsby/src/schema/node-model.js +++ b/packages/gatsby/src/schema/node-model.js @@ -71,7 +71,7 @@ class LocalNodeModel { this._prepareNodesQueues = {} this._prepareNodesPromises = {} this._preparedNodesCache = new Map() - this.replaceTypeKeyValueCache() + this.replaceFiltersCache() } /** @@ -84,7 +84,7 @@ class LocalNodeModel { * actually queried. If the filter targets `id` directly, only one Node is * cached instead of a Set of Nodes. If null, don't create or use a cache. */ - replaceTypeKeyValueCache(map = new Map()) { + replaceFiltersCache(map = new Map()) { this._filtersCache = map // See redux/nodes.js for usage }