From 17cbab6692abdddc1e50287baf3832d080381291 Mon Sep 17 00:00:00 2001 From: Sean Thomas Burke Date: Thu, 7 Nov 2024 23:10:55 -0800 Subject: [PATCH] Updating Docs --- CONTRIBUTING.md | 7 +- README.md | 90 +++--------- example.es6.js | 47 ------ example.js | 85 +++++------ src/tests/test.es5.js | 184 ----------------------- src/tests/test.js | 331 ------------------------------------------ src/tests/test.ts.ts | 141 ++++++++++++++++++ 7 files changed, 204 insertions(+), 681 deletions(-) delete mode 100644 example.es6.js delete mode 100644 src/tests/test.es5.js delete mode 100644 src/tests/test.js diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cc36d0a..4dc6a28 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ ## Contributing -The files to modify are under the `src` folder. `src/assets` are JavaScript files written in es6 that get compiled +The files to modify are under the `src` folder. `src/assets` are JavaScript files written in JavaScript that get compiled through babel into `lib/sitemapper.js`. ### Build @@ -11,7 +11,7 @@ To build the `lib` directory with the compiled assets use this command npm run build ``` -This uses [Babel](http://babeljs.io/) to compile the files. Make sure to run `npm run build` before submitting a pull request. +This uses [Babel](http://babeljs.io/) to compile the files. The prepack step will run `npm run build` when submitting a pull request. ```bash # Run examples/index.js @@ -58,7 +58,10 @@ src/ assets/ sitemapper.js examples/ + google.js index.js tests/ test.js + test.ts.ts + tsconfig.json ``` diff --git a/README.md b/README.md index 9dca147..8f679b2 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ sitemap.fetch('https://wp.seantburke.com/sitemap.xml').then(function (sites) { }); ``` -### Examples in ES6 +### Examples ```javascript import Sitemapper from 'sitemapper'; @@ -81,21 +81,23 @@ You can add options on the initial Sitemapper object when instantiating it. - `retries`: (Number) - Sets the maximum number of retries to attempt in case of an error response (e.g. 404 or Timeout). Default: 0 - `rejectUnauthorized`: (Boolean) - If true, it will throw on invalid certificates, such as expired or self-signed ones. Default: True - `lastmod`: (Number) - Timestamp of the minimum lastmod value allowed for returned urls -- `field` : (Object) - An object of fields to be returned from the sitemap. For Example: `{ loc: true, lastmod: true, changefreq: true, priority: true }`. Leaving a field out has the same effect as `field: false`. If not specified sitemapper defaults to returning the 'classic' array of urls. - `proxyAgent`: (HttpProxyAgent|HttpsProxyAgent) - instance of npm "hpagent" HttpProxyAgent or HttpsProxyAgent to be passed to npm "got" +- `field` : (Object) - An object of fields to be returned from the sitemap. -```javascript -const sitemapper = new Sitemapper({ - url: 'https://art-works.community/sitemap.xml', - rejectUnauthorized: true, - timeout: 15000, - requestHeaders: { - 'User-Agent': - 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0', - }, -}); + +For Example: + +``` +{ + loc: true, + lastmod: true, + changefreq: true, + priority: true, +} ``` +Leaving a field out has the same effect as `: false`. If not specified sitemapper defaults to returning the 'classic' array of urls. + An example using all available options: ```javascript @@ -109,61 +111,13 @@ const sitemapper = new Sitemapper({ debug: true, concurrency: 2, retries: 1, + rejectUnauthorized: false, + field: { + loc: true, + lastmod: true, + changefreq: true, + priority: true, + }, + proxyAgent: new HttpProxyAgent('http://localhost:8080'), }); ``` - -### Examples in ES5 - -```javascript -var Sitemapper = require('sitemapper'); - -var Google = new Sitemapper({ - url: 'https://www.google.com/work/sitemap.xml', - timeout: 15000, // 15 seconds -}); - -Google.fetch() - .then(function (data) { - console.log(data); - }) - .catch(function (error) { - console.log(error); - }); - -// or - -var sitemapper = new Sitemapper(); - -sitemapper.timeout = 5000; -sitemapper - .fetch('https://wp.seantburke.com/sitemap.xml') - .then(function (data) { - console.log(data); - }) - .catch(function (error) { - console.log(error); - }); -``` - -## Version 1 - -```bash -npm install sitemapper@1.1.1 --save -``` - -### Simple Example - -```javascript -var Sitemapper = require('sitemapper'); - -var sitemapper = new Sitemapper(); - -sitemapper.getSites( - 'https://wp.seantburke.com/sitemap.xml', - function (err, sites) { - if (!err) { - console.log(sites); - } - } -); -``` diff --git a/example.es6.js b/example.es6.js deleted file mode 100644 index 628ab60..0000000 --- a/example.es6.js +++ /dev/null @@ -1,47 +0,0 @@ -import Sitemapper from 'sitemapper'; - -(async () => { - const sitemapper = new Sitemapper(); - - const Google = new Sitemapper({ - url: 'https://www.google.com/work/sitemap.xml', - debug: false, - timeout: 15000, // 15 seconds - }); - - try { - const data = await Google.fetch(); - console.log(data.sites); - } catch (error) { - console.log(error); - } - - sitemapper.timeout = 5000; - - try { - const { url, sites } = await sitemapper.fetch( - 'https://wp.seantburke.com/sitemap.xml' - ); - console.log(`url:${url}`, 'sites:', sites); - } catch (error) { - console.log(error); - } - - try { - const { url, sites } = await sitemapper.fetch( - 'http://www.cnn.com/sitemaps/sitemap-index.xml' - ); - console.log(`url:${url}`, 'sites:', sites); - } catch (error) { - console.log(error); - } - - try { - const { url, sites } = await sitemapper.fetch( - 'http://www.stubhub.com/new-sitemap/us/sitemap-US-en-index.xml' - ); - console.log(`url:${url}`, 'sites:', sites); - } catch (error) { - console.log(error); - } -})(); diff --git a/example.js b/example.js index 97e1ee3..628ab60 100644 --- a/example.js +++ b/example.js @@ -1,60 +1,47 @@ -var Sitemapper = require('sitemapper'); +import Sitemapper from 'sitemapper'; -// Instantiate an instance with options -var Google = new Sitemapper({ - url: 'https://www.google.com/work/sitemap.xml', - debug: false, - timeout: 15000, // 15 seconds -}); +(async () => { + const sitemapper = new Sitemapper(); -// Then fetch -Google.fetch() - .then(function (data) { - console.log(data); - }) - .catch(function (error) { - console.log(error); + const Google = new Sitemapper({ + url: 'https://www.google.com/work/sitemap.xml', + debug: false, + timeout: 15000, // 15 seconds }); -// Instantiate an instance with no options -var sitemapper = new Sitemapper(); -sitemapper.timeout = 5000; - -sitemapper - .fetch('https://wp.seantburke.com/sitemap.xml') - .then(function (data) { - console.log(data); - }) - .catch(function (error) { + try { + const data = await Google.fetch(); + console.log(data.sites); + } catch (error) { console.log(error); - }); + } + + sitemapper.timeout = 5000; -sitemapper - .fetch('http://www.cnn.com/sitemaps/sitemap-index.xml') - .then(function (data) { - console.log('sites:', data.sites, 'url', data.url); - }) - .catch(function (error) { + try { + const { url, sites } = await sitemapper.fetch( + 'https://wp.seantburke.com/sitemap.xml' + ); + console.log(`url:${url}`, 'sites:', sites); + } catch (error) { console.log(error); - }); + } -sitemapper - .fetch('http://www.stubhub.com/new-sitemap/us/sitemap-US-en-index.xml') - .then(function (data) { - console.log('sites:', data.sites, 'url', data.url); - }) - .catch(function (error) { + try { + const { url, sites } = await sitemapper.fetch( + 'http://www.cnn.com/sitemaps/sitemap-index.xml' + ); + console.log(`url:${url}`, 'sites:', sites); + } catch (error) { console.log(error); - }); + } -// Version 1.0.0 example which has been deprecated. -sitemapper.getSites( - 'https://wp.seantburke.com/sitemap.xml', - function (err, sites) { - if (!err) { - console.log(sites); - } else { - console.log(err); - } + try { + const { url, sites } = await sitemapper.fetch( + 'http://www.stubhub.com/new-sitemap/us/sitemap-US-en-index.xml' + ); + console.log(`url:${url}`, 'sites:', sites); + } catch (error) { + console.log(error); } -); +})(); diff --git a/src/tests/test.es5.js b/src/tests/test.es5.js deleted file mode 100644 index 2d105f7..0000000 --- a/src/tests/test.es5.js +++ /dev/null @@ -1,184 +0,0 @@ -require('async'); -require('assert'); -require('should'); -const isUrl = require('is-url'); - -const Sitemapper = require('../../lib/assets/sitemapper.js'); -var sitemapper; - -describe('Sitemapper', function () { - beforeEach(function () { - sitemapper = new Sitemapper(); - }); - - describe('Sitemapper Class', function () { - it('should have initializeTimeout method', function () { - sitemapper.initializeTimeout.should.be.Function; - }); - - it('should have crawl method', function () { - sitemapper.crawl.should.be.Function; - }); - - it('should have parse method', function () { - sitemapper.parse.should.be.Function; - }); - - it('should have fetch method', function () { - sitemapper.fetch.should.be.Function; - }); - - it('should construct with a url', function () { - sitemapper = new Sitemapper({ - url: 'google.com', - }); - sitemapper.url.should.equal('google.com'); - }); - - it('should construct with a timeout', function () { - sitemapper = new Sitemapper({ - timeout: 1000, - }); - sitemapper.timeout.should.equal(1000); - }); - - it('should set timeout', function () { - sitemapper.timeout = 1000; - sitemapper.timeout.should.equal(1000); - }); - - it('should set url', function () { - sitemapper.url = 1000; - sitemapper.url.should.equal(1000); - }); - }); - - describe('fetch Method resolves sites to array', function () { - it('https://wp.seantburke.com/sitemap.xml sitemaps should be an array', function (done) { - this.timeout(30000); - const url = 'https://wp.seantburke.com/sitemap.xml'; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.url.should.equal(url); - data.sites.length.should.be.above(2); - isUrl(data.sites[0]).should.be.true; - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('gibberish.gibberish should fail silently with an empty array', function (done) { - this.timeout(30000); - const url = 'http://gibberish.gibberish'; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.errors.should.be.Array; - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('https://webflow.com/sitemap.xml sitemaps should be an array', function (done) { - this.timeout(30000); - const url = 'https://webflow.com/sitemap.xml'; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.url.should.equal(url); - data.sites.length.should.be.above(2); - isUrl(data.sites[0]).should.be.true; - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('https://www.golinks.io/sitemap.xml sitemaps should be an array', function (done) { - this.timeout(30000); - const url = 'https://www.golinks.io/sitemap.xml'; - sitemapper.timeout = 5000; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.url.should.equal(url); - data.sites.length.should.be.above(2); - isUrl(data.sites[0]).should.be.true; - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('https://www.golinks.io/sitemap.xml sitemaps should return an empty array when timing out', function (done) { - this.timeout(30000); - const url = 'https://www.golinks.io/sitemap.xml'; - sitemapper.timeout = 1; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - }); - - describe('gzipped sitemaps', function () { - beforeEach(function () { - sitemapper = new Sitemapper({ - requestHeaders: { - 'Accept-Encoding': 'gzip,deflate,sdch', - }, - }); - }); - - it('https://www.banggood.com/sitemap/category.xml.gz gzip should be a non-empty array', function (done) { - this.timeout(30000); - const url = 'https://www.banggood.com/sitemap/category.xml.gz'; - sitemapper.timeout = 10000; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.errors.should.be.Array; - data.sites.length.should.be.greaterThan(0); - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - }); - - describe('getSites method', function () { - it('getSites should be backwards compatible', function (done) { - this.timeout(30000); - const url = 'https://wp.seantburke.com/sitemap.xml'; - sitemapper.getSites(url, (err, sites) => { - sites.should.be.Array; - isUrl(sites[0]).should.be.true; - done(); - }); - }); - }); -}); diff --git a/src/tests/test.js b/src/tests/test.js deleted file mode 100644 index f7c6fb8..0000000 --- a/src/tests/test.js +++ /dev/null @@ -1,331 +0,0 @@ -import 'async'; -import 'assert'; -import 'should'; -import isUrl from 'is-url'; - -import Sitemapper from '../../lib/assets/sitemapper.js'; - -let sitemapper; - -describe('Sitemapper', function () { - beforeEach(function () { - sitemapper = new Sitemapper(); - }); - - describe('Sitemapper Class', function () { - it('should have initializeTimeout method', function () { - sitemapper.initializeTimeout.should.be.Function; - }); - - it('should have crawl method', function () { - sitemapper.crawl.should.be.Function; - }); - - it('should have parse method', function () { - sitemapper.parse.should.be.Function; - }); - - it('should have fetch method', function () { - sitemapper.fetch.should.be.Function; - }); - - it('should construct with a url', function () { - sitemapper = new Sitemapper({ - url: 'google.com', - }); - sitemapper.url.should.equal('google.com'); - }); - - it('should construct with a timeout', function () { - sitemapper = new Sitemapper({ - timeout: 1000, - }); - sitemapper.timeout.should.equal(1000); - }); - - it('should set timeout', function () { - sitemapper.timeout = 1000; - sitemapper.timeout.should.equal(1000); - }); - - it('should set url', function () { - sitemapper.url = 1000; - sitemapper.url.should.equal(1000); - }); - }); - - describe('fetch Method resolves sites to array', function () { - it('https://wp.seantburke.com/sitemap.xml sitemaps should be an array', function (done) { - this.timeout(30000); - const url = 'https://wp.seantburke.com/sitemap.xml'; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.url.should.equal(url); - data.sites.length.should.be.above(2); - isUrl(data.sites[0]).should.be.true; - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('gibberish.gibberish should fail silently with an empty array', function (done) { - this.timeout(30000); - const url = 'http://gibberish.gibberish'; - sitemapper.debug = true; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.errors.should.be.Array; - data.errors.length.should.be.greaterThan(0); - data.errors.length.should.be.greaterThan(0); - console.log(data); - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('https://webflow.com/sitemap.xml sitemaps should be an array', function (done) { - this.timeout(30000); - const url = 'https://webflow.com/sitemap.xml'; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.url.should.equal(url); - data.sites.length.should.be.above(2); - isUrl(data.sites[0]).should.be.true; - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('https://www.golinks.io/sitemap.xml sitemaps should be an array', function (done) { - this.timeout(30000); - const url = 'https://www.golinks.io/sitemap.xml'; - sitemapper.timeout = 5000; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.url.should.equal(url); - data.sites.length.should.be.above(2); - isUrl(data.sites[0]).should.be.true; - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('https://www.golinks.io/sitemap.xml sitemaps should return an empty array when timing out', function (done) { - this.timeout(30000); - const url = 'https://www.golinks.io/sitemap.xml'; - sitemapper.timeout = 1; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.errors.should.be.Array; - console.log(data); - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('https://www.golinks.com/blog/sitemap.xml sitemaps should return an empty array when timing out', function (done) { - this.timeout(30000); - const url = 'https://www.golinks.com/blog/sitemap.xml'; - sitemapper.timeout = 10000; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.errors.should.be.Array; - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('https://www.banggood.com/sitemap/category.xml.gz gzip should be a non-empty array', function (done) { - this.timeout(30000); - const url = 'https://www.banggood.com/sitemap/category.xml.gz'; - sitemapper.timeout = 10000; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.sites.length.should.be.greaterThan(0); - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - }); - - describe('gzipped sitemaps', function () { - beforeEach(function () { - sitemapper = new Sitemapper({ - requestHeaders: { - 'Accept-Encoding': 'gzip,deflate,sdch', - }, - }); - }); - - it('https://www.banggood.com/sitemap/category.xml.gz gzip should be a non-empty array', function (done) { - this.timeout(30000); - const url = 'https://www.banggood.com/sitemap/category.xml.gz'; - sitemapper.timeout = 10000; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.errors.should.be.Array; - data.sites.length.should.be.greaterThan(0); - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('https://foo.com/sitemap.xml should not allow insecure request', function (done) { - this.timeout(30000); - const url = 'https://foo.com/sitemap.xml'; - sitemapper.timeout = 10000; - sitemapper.rejectUnauthorized = false; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.errors.should.be.Array; - data.errors.should.containEql({ - type: 'HTTPError', - message: 'HTTP Error occurred: Response code 404 (Not Found)', - url: 'https://foo.com/sitemap.xml', - retries: 0, - }); - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - }); - - describe('getSites method', function () { - it('getSites should be backwards compatible', function (done) { - this.timeout(30000); - const url = 'https://wp.seantburke.com/sitemap.xml'; - sitemapper.getSites(url, (err, sites) => { - sites.should.be.Array; - isUrl(sites[0]).should.be.true; - done(); - }); - }); - }); - - describe('exclusions option', function () { - // check for the url that should be excluded in a later test - it('should prevent false positive', function (done) { - this.timeout(30000); - const url = 'https://wp.seantburke.com/sitemap.xml'; - // exclude video and image sitemap index urls - sitemapper.exclusions = [/video/, /image/]; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.sites.includes('https://wp.seantburke.com/?page_id=2').should.be - .true; - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - - it('should filter out page_id urls', function (done) { - this.timeout(30000); - const url = 'https://wp.seantburke.com/sitemap.xml'; - // exclude page_id=2 - sitemapper.exclusions = [/page_id/]; - sitemapper - .fetch(url) - .then((data) => { - data.sites.should.be.Array; - data.sites.includes('https://wp.seantburke.com/?page_id=2').should.be - .false; - done(); - }) - .catch((error) => { - console.error('Test failed'); - done(error); - }); - }); - }); - - describe('isExcluded method', function () { - it('should return false when no exclusions are set', function () { - const result = sitemapper.isExcluded('https://foo.com/page1'); - result.should.be.false(); - }); - - it('should return false when url does not match any exclusion patterns', function () { - sitemapper.exclusions = [/\.pdf$/, /private/]; - const result = sitemapper.isExcluded('https://foo.com/page1'); - result.should.be.false(); - }); - - it('should return false when url matches an exclusion pattern', function () { - sitemapper.exclusions = [/\.pdf$/, /private/]; - const result = sitemapper.isExcluded('https://foo.com/document.pdf'); - result.should.be.true(); - }); - - it('should return true when url matches any of multiple exclusion patterns', function () { - sitemapper.exclusions = [/\.pdf$/, /private/, /temp/]; - const result = sitemapper.isExcluded('https://foo.com/private/temp.html'); - result.should.be.true(); - }); - - it('should handle complex regex patterns correctly', function () { - sitemapper.exclusions = [/^https:\/\/foo\.com\/([a-z]{2})\/private/]; - const result1 = sitemapper.isExcluded('https://foo.com/en/private/page'); - const result2 = sitemapper.isExcluded('https://foo.com/en/public/page'); - result1.should.be.true(); - result2.should.be.false(); - }); - - it('should handle case sensitivity correctly', function () { - sitemapper.exclusions = [/private/i]; - const result1 = sitemapper.isExcluded('https://foo.com/PRIVATE/page'); - const result2 = sitemapper.isExcluded('https://foo.com/Private/page'); - result1.should.be.true(); - result2.should.be.true(); - }); - }); -}); diff --git a/src/tests/test.ts.ts b/src/tests/test.ts.ts index 98cb630..0d631e3 100644 --- a/src/tests/test.ts.ts +++ b/src/tests/test.ts.ts @@ -90,11 +90,14 @@ describe('Sitemapper', function () { it('gibberish.gibberish should fail silently with an empty array', function (done) { this.timeout(30000); const url = 'http://gibberish.gibberish'; + sitemapper.debug = true; sitemapper .fetch(url) .then((data) => { data.sites.should.be.Array; data.errors.should.be.Array; + data.errors.length.should.be.greaterThan(0); + console.log(data); done(); }) .catch((error) => { @@ -168,11 +171,46 @@ describe('Sitemapper', function () { this.timeout(30000); const url = 'https://www.golinks.io/sitemap.xml'; sitemapper.timeout = 1; + sitemapper + .fetch(url) + .then((data) => { + data.sites.should.be.Array; + data.errors.should.be.Array; + console.log(data); + done(); + }) + .catch((error) => { + console.error('Test failed'); + done(error); + }); + }); + it('https://www.golinks.com/blog/sitemap.xml sitemaps should return an empty array when timing out', function (done) { + this.timeout(30000); + const url = 'https://www.golinks.com/blog/sitemap.xml'; + sitemapper.timeout = 10000; sitemapper .fetch(url) .then((data) => { data.sites.should.be.Array; + data.errors.should.be.Array; + done(); + }) + .catch((error) => { + console.error('Test failed'); + done(error); + }); + }); + + it('https://www.banggood.com/sitemap/category.xml.gz gzip should be a non-empty array', function (done) { + this.timeout(30000); + const url = 'https://www.banggood.com/sitemap/category.xml.gz'; + sitemapper.timeout = 10000; + sitemapper + .fetch(url) + .then((data) => { + data.sites.should.be.Array; + data.sites.length.should.be.greaterThan(0); done(); }) .catch((error) => { @@ -208,6 +246,30 @@ describe('Sitemapper', function () { done(error); }); }); + + it('https://foo.com/sitemap.xml should not allow insecure request', function (done) { + this.timeout(30000); + const url = 'https://foo.com/sitemap.xml'; + sitemapper.timeout = 10000; + sitemapper.rejectUnauthorized = false; + sitemapper + .fetch(url) + .then((data) => { + data.sites.should.be.Array; + data.errors.should.be.Array; + data.errors.should.containEql({ + type: 'HTTPError', + message: 'HTTP Error occurred: Response code 404 (Not Found)', + url: 'https://foo.com/sitemap.xml', + retries: 0, + }); + done(); + }) + .catch((error) => { + console.error('Test failed'); + done(error); + }); + }); }); describe('getSites method', function () { @@ -221,4 +283,83 @@ describe('Sitemapper', function () { }); }); }); + + describe('exclusions option', function () { + it('should prevent false positive', function (done) { + this.timeout(30000); + const url = 'https://wp.seantburke.com/sitemap.xml'; + sitemapper.exclusions = [/video/, /image/]; + sitemapper + .fetch(url) + .then((data) => { + data.sites.should.be.Array; + data.sites.includes('https://wp.seantburke.com/?page_id=2').should.be + .true; + done(); + }) + .catch((error) => { + console.error('Test failed'); + done(error); + }); + }); + + it('should filter out page_id urls', function (done) { + this.timeout(30000); + const url = 'https://wp.seantburke.com/sitemap.xml'; + sitemapper.exclusions = [/page_id/]; + sitemapper + .fetch(url) + .then((data) => { + data.sites.should.be.Array; + data.sites.includes('https://wp.seantburke.com/?page_id=2').should.be + .false; + done(); + }) + .catch((error) => { + console.error('Test failed'); + done(error); + }); + }); + }); + + describe('isExcluded method', function () { + it('should return false when no exclusions are set', function () { + const result = sitemapper.isExcluded('https://foo.com/page1'); + result.should.be.false(); + }); + + it('should return false when url does not match any exclusion patterns', function () { + sitemapper.exclusions = [/\.pdf$/, /private/]; + const result = sitemapper.isExcluded('https://foo.com/page1'); + result.should.be.false(); + }); + + it('should return false when url matches an exclusion pattern', function () { + sitemapper.exclusions = [/\.pdf$/, /private/]; + const result = sitemapper.isExcluded('https://foo.com/document.pdf'); + result.should.be.true(); + }); + + it('should return true when url matches any of multiple exclusion patterns', function () { + sitemapper.exclusions = [/\.pdf$/, /private/, /temp/]; + const result = sitemapper.isExcluded('https://foo.com/private/temp.html'); + result.should.be.true(); + }); + + it('should handle complex regex patterns correctly', function () { + sitemapper.exclusions = [/^https:\/\/foo\.com\/([a-z]{2})\/private/]; + const result1 = sitemapper.isExcluded('https://foo.com/en/private/page'); + const result2 = sitemapper.isExcluded('https://foo.com/en/public/page'); + result1.should.be.true(); + result2.should.be.false(); + }); + + it('should handle case sensitivity correctly', function () { + sitemapper.exclusions = [/private/i]; + const result1 = sitemapper.isExcluded('https://foo.com/PRIVATE/page'); + const result2 = sitemapper.isExcluded('https://foo.com/Private/page'); + result1.should.be.true(); + result2.should.be.true(); + }); + }); });