From 103ea046613c20c37f09ae3290a1d66bcd2e97ba Mon Sep 17 00:00:00 2001 From: ApsarasX Date: Wed, 6 Apr 2022 21:44:07 +0800 Subject: [PATCH] feat(sitemap): add ignorePatterns option (#6979) Co-authored-by: Joshua Chen --- .../src/__tests__/createSitemap.test.ts | 27 +++++++++++++++++++ .../src/__tests__/options.test.ts | 14 ++++++++++ .../src/createSitemap.ts | 11 +++++--- .../docusaurus-plugin-sitemap/src/index.ts | 4 +-- .../docusaurus-plugin-sitemap/src/options.ts | 10 ++++--- .../src/plugin-sitemap.d.ts | 14 +++++++--- packages/docusaurus-utils/src/globUtils.ts | 7 ++++- website/docs/api/plugins/plugin-sitemap.md | 2 ++ website/docusaurus.config.js | 3 +++ 9 files changed, 78 insertions(+), 14 deletions(-) diff --git a/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts b/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts index c041bff79b6b..193002e4bef1 100644 --- a/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts +++ b/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts @@ -19,6 +19,7 @@ describe('createSitemap', () => { { changefreq: EnumChangefreq.DAILY, priority: 0.7, + ignorePatterns: [], }, ); expect(sitemap).toContain( @@ -42,11 +43,34 @@ describe('createSitemap', () => { { changefreq: EnumChangefreq.DAILY, priority: 0.7, + ignorePatterns: [], }, ); expect(sitemap).not.toContain('404'); }); + it('excludes patterns configured to be ignored', async () => { + const sitemap = await createSitemap( + { + url: 'https://example.com', + } as DocusaurusConfig, + ['/', '/search/', '/tags/', '/search/foo', '/tags/foo/bar'], + { + changefreq: EnumChangefreq.DAILY, + priority: 0.7, + ignorePatterns: [ + // Shallow ignore + '/search/', + // Deep ignore + '/tags/**', + ], + }, + ); + expect(sitemap).not.toContain('/search/'); + expect(sitemap).toContain('/search/foo'); + expect(sitemap).not.toContain('/tags'); + }); + it('keep trailing slash unchanged', async () => { const sitemap = await createSitemap( { @@ -57,6 +81,7 @@ describe('createSitemap', () => { { changefreq: EnumChangefreq.DAILY, priority: 0.7, + ignorePatterns: [], }, ); @@ -76,6 +101,7 @@ describe('createSitemap', () => { { changefreq: EnumChangefreq.DAILY, priority: 0.7, + ignorePatterns: [], }, ); @@ -95,6 +121,7 @@ describe('createSitemap', () => { { changefreq: EnumChangefreq.DAILY, priority: 0.7, + ignorePatterns: [], }, ); diff --git a/packages/docusaurus-plugin-sitemap/src/__tests__/options.test.ts b/packages/docusaurus-plugin-sitemap/src/__tests__/options.test.ts index e53a2adc6ba9..5e38548b699d 100644 --- a/packages/docusaurus-plugin-sitemap/src/__tests__/options.test.ts +++ b/packages/docusaurus-plugin-sitemap/src/__tests__/options.test.ts @@ -27,6 +27,7 @@ describe('validateOptions', () => { const userOptions = { changefreq: 'yearly', priority: 0.9, + ignorePatterns: ['/search/**'], }; expect(testValidate(userOptions)).toEqual({ ...defaultOptions, @@ -49,4 +50,17 @@ describe('validateOptions', () => { `"\\"changefreq\\" must be one of [daily, monthly, always, hourly, weekly, yearly, never]"`, ); }); + + it('rejects bad ignorePatterns inputs', () => { + expect(() => + testValidate({ignorePatterns: '/search'}), + ).toThrowErrorMatchingInlineSnapshot( + `"\\"ignorePatterns\\" must be an array"`, + ); + expect(() => + testValidate({ignorePatterns: [/^\/search/]}), + ).toThrowErrorMatchingInlineSnapshot( + `"\\"ignorePatterns[0]\\" must be a string"`, + ); + }); }); diff --git a/packages/docusaurus-plugin-sitemap/src/createSitemap.ts b/packages/docusaurus-plugin-sitemap/src/createSitemap.ts index 2d6afbcb68c6..8df3581badd1 100644 --- a/packages/docusaurus-plugin-sitemap/src/createSitemap.ts +++ b/packages/docusaurus-plugin-sitemap/src/createSitemap.ts @@ -6,25 +6,28 @@ */ import {SitemapStream, streamToPromise} from 'sitemap'; -import type {Options} from '@docusaurus/plugin-sitemap'; +import type {PluginOptions} from '@docusaurus/plugin-sitemap'; import type {DocusaurusConfig} from '@docusaurus/types'; import {applyTrailingSlash} from '@docusaurus/utils-common'; +import {createMatcher} from '@docusaurus/utils'; export default async function createSitemap( siteConfig: DocusaurusConfig, routesPaths: string[], - options: Options, + options: PluginOptions, ): Promise { const {url: hostname} = siteConfig; if (!hostname) { throw new Error('URL in docusaurus.config.js cannot be empty/undefined.'); } - const {changefreq, priority} = options; + const {changefreq, priority, ignorePatterns} = options; + + const ignoreMatcher = createMatcher(ignorePatterns); const sitemapStream = new SitemapStream({hostname}); routesPaths - .filter((route) => !route.endsWith('404.html')) + .filter((route) => !route.endsWith('404.html') && !ignoreMatcher(route)) .forEach((routePath) => sitemapStream.write({ url: applyTrailingSlash(routePath, { diff --git a/packages/docusaurus-plugin-sitemap/src/index.ts b/packages/docusaurus-plugin-sitemap/src/index.ts index 0a6651029917..2b77da6e0b24 100644 --- a/packages/docusaurus-plugin-sitemap/src/index.ts +++ b/packages/docusaurus-plugin-sitemap/src/index.ts @@ -7,13 +7,13 @@ import fs from 'fs-extra'; import path from 'path'; -import type {Options} from '@docusaurus/plugin-sitemap'; +import type {PluginOptions} from '@docusaurus/plugin-sitemap'; import createSitemap from './createSitemap'; import type {LoadContext, Plugin} from '@docusaurus/types'; export default function pluginSitemap( context: LoadContext, - options: Options, + options: PluginOptions, ): Plugin { return { name: 'docusaurus-plugin-sitemap', diff --git a/packages/docusaurus-plugin-sitemap/src/options.ts b/packages/docusaurus-plugin-sitemap/src/options.ts index e7d7972153a5..e5f2053931c0 100644 --- a/packages/docusaurus-plugin-sitemap/src/options.ts +++ b/packages/docusaurus-plugin-sitemap/src/options.ts @@ -7,12 +7,13 @@ import {Joi} from '@docusaurus/utils-validation'; import {EnumChangefreq} from 'sitemap'; -import type {Options} from '@docusaurus/plugin-sitemap'; +import type {Options, PluginOptions} from '@docusaurus/plugin-sitemap'; import type {OptionValidationContext} from '@docusaurus/types'; -export const DEFAULT_OPTIONS: Options = { +export const DEFAULT_OPTIONS: PluginOptions = { changefreq: EnumChangefreq.WEEKLY, priority: 0.5, + ignorePatterns: [], }; const PluginOptionSchema = Joi.object({ @@ -24,6 +25,9 @@ const PluginOptionSchema = Joi.object({ .valid(...Object.values(EnumChangefreq)) .default(DEFAULT_OPTIONS.changefreq), priority: Joi.number().min(0).max(1).default(DEFAULT_OPTIONS.priority), + ignorePatterns: Joi.array() + .items(Joi.string()) + .default(DEFAULT_OPTIONS.ignorePatterns), trailingSlash: Joi.forbidden().messages({ 'any.unknown': 'Please use the new Docusaurus global trailingSlash config instead, and the sitemaps plugin will use it.', @@ -33,7 +37,7 @@ const PluginOptionSchema = Joi.object({ export function validateOptions({ validate, options, -}: OptionValidationContext): Options { +}: OptionValidationContext): PluginOptions { const validatedOptions = validate(PluginOptionSchema, options); return validatedOptions; } diff --git a/packages/docusaurus-plugin-sitemap/src/plugin-sitemap.d.ts b/packages/docusaurus-plugin-sitemap/src/plugin-sitemap.d.ts index 7f16b96a4c94..6f175bd1f7a1 100644 --- a/packages/docusaurus-plugin-sitemap/src/plugin-sitemap.d.ts +++ b/packages/docusaurus-plugin-sitemap/src/plugin-sitemap.d.ts @@ -7,10 +7,16 @@ import type {EnumChangefreq} from 'sitemap'; -export type Options = { - id?: string; +export type PluginOptions = { /** @see https://www.sitemaps.org/protocol.html#xmlTagDefinitions */ - changefreq?: EnumChangefreq; + changefreq: EnumChangefreq; /** @see https://www.sitemaps.org/protocol.html#xmlTagDefinitions */ - priority?: number; + priority: number; + /** + * A list of glob patterns; matching route paths will be filtered from the + * sitemap. Note that you may need to include the base URL in here. + */ + ignorePatterns: string[]; }; + +export type Options = Partial; diff --git a/packages/docusaurus-utils/src/globUtils.ts b/packages/docusaurus-utils/src/globUtils.ts index b70ed4cd53d3..67af9051eeae 100644 --- a/packages/docusaurus-utils/src/globUtils.ts +++ b/packages/docusaurus-utils/src/globUtils.ts @@ -31,11 +31,16 @@ type Matcher = (str: string) => boolean; * A very thin wrapper around `Micromatch.makeRe`. * * @see {@link createAbsoluteFilePathMatcher} - * @param patterns A list of glob patterns. + * @param patterns A list of glob patterns. If the list is empty, it defaults to + * matching none. * @returns A matcher handle that tells if a file path is matched by any of the * patterns. */ export function createMatcher(patterns: string[]): Matcher { + if (patterns.length === 0) { + // `/(?:)/.test("foo")` is `true` + return () => false; + } const regexp = new RegExp( patterns.map((pattern) => Micromatch.makeRe(pattern).source).join('|'), ); diff --git a/website/docs/api/plugins/plugin-sitemap.md b/website/docs/api/plugins/plugin-sitemap.md index 6c8bd87d32c1..c8aec5da3079 100644 --- a/website/docs/api/plugins/plugin-sitemap.md +++ b/website/docs/api/plugins/plugin-sitemap.md @@ -39,6 +39,7 @@ Accepted fields: | --- | --- | --- | --- | | `changefreq` | `string` | `'weekly'` | See [sitemap docs](https://www.sitemaps.org/protocol.html#xmlTagDefinitions) | | `priority` | `number` | `0.5` | See [sitemap docs](https://www.sitemaps.org/protocol.html#xmlTagDefinitions) | +| `ignorePatterns` | `string[]` | `[]` | A list of glob patterns; matching route paths will be filtered from the sitemap. Note that you may need to include the base URL in here. | @@ -68,6 +69,7 @@ Most Docusaurus users configure this plugin through the preset options. const config = { changefreq: 'weekly', priority: 0.5, + ignorePatterns: ['/tags/**'], }; ``` diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index 775ada764646..5c7e1c0dec81 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -338,6 +338,9 @@ const config = { trackingID: 'UA-141789564-1', } : undefined, + sitemap: { + ignorePatterns: ['/tests/**'], + }, }), ], ],