From 49a0cd9bdd5779db53327d4f2e3b9c178a07ab73 Mon Sep 17 00:00:00 2001 From: Christian Westgaard Date: Thu, 7 Mar 2024 14:58:26 +0100 Subject: [PATCH] Fix #84 Max Included Items limits --- src/main/resources/guillotine/resolvers/urlset.ts | 6 +----- .../resources/lib/app-sitemapxml/constants.ts | 4 +++- .../lib/app-sitemapxml/queryForSitemapContent.ts | 15 +++++++++++++-- .../resources/site/controllers/sitemap/sitemap.ts | 8 +------- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/main/resources/guillotine/resolvers/urlset.ts b/src/main/resources/guillotine/resolvers/urlset.ts index 1f725c3..cd26fce 100644 --- a/src/main/resources/guillotine/resolvers/urlset.ts +++ b/src/main/resources/guillotine/resolvers/urlset.ts @@ -27,7 +27,6 @@ import { import { DEFAULT_PRIORITY, DEFAULT_UPDATE_PERIOD, - MAX_ITEMS_LIMIT, } from '/lib/app-sitemapxml/constants'; import {queryForSitemapContent} from '/lib/app-sitemapxml/queryForSitemapContent'; import {URLSET_FIELD_NAME} from '/guillotine/constants'; @@ -78,9 +77,6 @@ export const urlset = (graphQL: GraphQL): Resolver< const { count = maxItemsInt } = args; - const limitedCount = count < 0 - ? MAX_ITEMS_LIMIT - : Math.min(count, MAX_ITEMS_LIMIT); const { branch, @@ -104,7 +100,7 @@ export const urlset = (graphQL: GraphQL): Resolver< priority, result } = queryForSitemapContent({ - count: limitedCount, + count, site, siteConfig }); diff --git a/src/main/resources/lib/app-sitemapxml/constants.ts b/src/main/resources/lib/app-sitemapxml/constants.ts index 20b0ed5..b4bce1c 100644 --- a/src/main/resources/lib/app-sitemapxml/constants.ts +++ b/src/main/resources/lib/app-sitemapxml/constants.ts @@ -11,4 +11,6 @@ export const DEFAULT_UPDATE_PERIOD: tChangeFreq = 'monthly'; // https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd // Container for a set of up to 50,000 document elements. This is the root element of the XML file. -export const MAX_ITEMS_LIMIT: number = 50000; +export const MAX_ITEMS_LIMIT = 50000; + +export const ES_MAX_ITEMS_LIMIT = 10000; diff --git a/src/main/resources/lib/app-sitemapxml/queryForSitemapContent.ts b/src/main/resources/lib/app-sitemapxml/queryForSitemapContent.ts index 0457781..863cc01 100644 --- a/src/main/resources/lib/app-sitemapxml/queryForSitemapContent.ts +++ b/src/main/resources/lib/app-sitemapxml/queryForSitemapContent.ts @@ -18,6 +18,7 @@ import { import { DEFAULT_PRIORITY, DEFAULT_UPDATE_PERIOD, + ES_MAX_ITEMS_LIMIT, MAX_ITEMS_LIMIT } from '/lib/app-sitemapxml/constants'; @@ -134,8 +135,18 @@ export function queryForSitemapContent({ const mustNot: QueryDsl[] = blockRobotsDslArray.concat(ignoreDslArray); + const countBetweenZeroAndMaxItemsLimit = (count >= 0 && count <= MAX_ITEMS_LIMIT) + ? count + : MAX_ITEMS_LIMIT; + DEBUG && log.debug('countBetweenZeroAndMaxItemsLimit: %s', toStr(countBetweenZeroAndMaxItemsLimit)); + + const countWithinElasticSearchMaxItemsLimitOrMinusOne = (countBetweenZeroAndMaxItemsLimit <= ES_MAX_ITEMS_LIMIT) + ? countBetweenZeroAndMaxItemsLimit + : -1; + DEBUG && log.debug('countWithinElasticSearchMaxItemsLimitOrMinusOne: %s', toStr(countWithinElasticSearchMaxItemsLimitOrMinusOne)); + const nodeQueryParams = { - count: -1, + count: countWithinElasticSearchMaxItemsLimitOrMinusOne, query: { boolean: { must, @@ -154,7 +165,7 @@ export function queryForSitemapContent({ } DEBUG && log.debug('nodeQueryRes.total: %s', nodeQueryRes.total); - const stopBefore = Math.min(nodeQueryRes.hits.length, count, MAX_ITEMS_LIMIT); + const stopBefore = Math.min(nodeQueryRes.hits.length, countBetweenZeroAndMaxItemsLimit); DEBUG && log.debug('stopBefore: %s', stopBefore); const contents: { diff --git a/src/main/resources/site/controllers/sitemap/sitemap.ts b/src/main/resources/site/controllers/sitemap/sitemap.ts index 3e0c68e..826234f 100644 --- a/src/main/resources/site/controllers/sitemap/sitemap.ts +++ b/src/main/resources/site/controllers/sitemap/sitemap.ts @@ -15,7 +15,6 @@ import { } from '/lib/xp/portal'; // @ts-expect-error No types yet. import {render} from '/lib/xslt'; -import {MAX_ITEMS_LIMIT} from '/lib/app-sitemapxml/constants'; import {queryForSitemapContent} from '/lib/app-sitemapxml/queryForSitemapContent'; @@ -57,17 +56,12 @@ export function get(request: Request<{ overrideDomain = '', } = siteConfig || {}; // Handle null (aka no config) - const maxItemsInt = Math.min( - MAX_ITEMS_LIMIT, - parseInt(maxItems as string, 10) - ); - const { changefreq, priority, result } = queryForSitemapContent({ - count: maxItemsInt, + count: parseInt(maxItems as string, 10), site, siteConfig, });