From b23a29eacaea6d57534edca9e8ec7260ac13293b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Tue, 3 Apr 2018 15:05:55 +0200 Subject: [PATCH 01/11] Add extraMediasRegex parameter config --- packages/gatsby-source-wordpress/src/gatsby-node.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/gatsby-source-wordpress/src/gatsby-node.js b/packages/gatsby-source-wordpress/src/gatsby-node.js index b9fc54f522f8f..bd1084e698f42 100644 --- a/packages/gatsby-source-wordpress/src/gatsby-node.js +++ b/packages/gatsby-source-wordpress/src/gatsby-node.js @@ -32,6 +32,7 @@ exports.sourceNodes = async ( searchAndReplaceContentUrls = {}, concurrentRequests = 10, excludedRoutes = [], + extraMediasRegex, } ) => { const { createNode } = boundActionCreators @@ -91,7 +92,7 @@ exports.sourceNodes = async ( entities = normalize.mapTagsCategoriesToTaxonomies(entities) // Creates links from entities to media nodes - entities = normalize.mapEntitiesToMedia(entities) + entities = normalize.mapEntitiesToMedia(entities, extraMediasRegex) // Downloads media files and removes "sizes" data as useless in Gatsby context. entities = await normalize.downloadMediaFiles({ From de949a6580126d98bed835706e8b298165eabbc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Tue, 3 Apr 2018 15:10:51 +0200 Subject: [PATCH 02/11] Add parameter to mapEntitiesToMedia Changes are on `mapEntitiesToMedia `. Other diffs related to running `yarn format`. --- .../gatsby-source-wordpress/src/normalize.js | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/packages/gatsby-source-wordpress/src/normalize.js b/packages/gatsby-source-wordpress/src/normalize.js index 35f42a453c60c..7ce2f6db59179 100644 --- a/packages/gatsby-source-wordpress/src/normalize.js +++ b/packages/gatsby-source-wordpress/src/normalize.js @@ -223,11 +223,13 @@ exports.mapTagsCategoriesToTaxonomies = entities => return e }) -exports.mapElementsToParent = entities => entities.map(e => { +exports.mapElementsToParent = entities => + entities.map(e => { if (e.wordpress_parent) { // Create parent_element with a link to the parent node of type. - e.parent_element___NODE = entities - .find(t => t.wordpress_id === e.wordpress_parent && t.__type === e.__type).id + e.parent_element___NODE = entities.find( + t => t.wordpress_id === e.wordpress_parent && t.__type === e.__type + ).id } return e }) @@ -278,7 +280,7 @@ exports.searchReplaceContentUrls = function({ }) } -exports.mapEntitiesToMedia = entities => { +exports.mapEntitiesToMedia = (entities, extraMediasRegex) => { const media = entities.filter(e => e.__type === `wordpress__wp_media`) return entities.map(e => { @@ -298,7 +300,11 @@ exports.mapEntitiesToMedia = entities => { const photoRegex = /\.(gif|jpg|jpeg|tiff|png)$/i const isPhotoUrl = filename => _.isString(filename) && photoRegex.test(filename) - const isPhotoUrlAlreadyProcessed = key => key == `source_url` + const isOtherMediaUrl = filename => + _.isString(filename) && + extraMediasRegex && + extraMediasRegex.test(filename) + const isMediaUrlAlreadyProcessed = key => key == `source_url` const isFeaturedMedia = (value, key) => (_.isNumber(value) || _.isBoolean(value)) && key === `featured_media` // ACF Gallery and similarly shaped arrays @@ -319,7 +325,19 @@ exports.mapEntitiesToMedia = entities => { : null, deleteField: true, } - } else if (isPhotoUrl(value) && !isPhotoUrlAlreadyProcessed(key)) { + } else if (isPhotoUrl(value) && !isMediaUrlAlreadyProcessed(key)) { + const mediaNodeID = getMediaItemID( + media.find(m => m.source_url === value) + ) + return { + mediaNodeID, + deleteField: !!mediaNodeID, + } + } else if ( + extraMediasRegex && + isOtherMediaUrl(value) && + !isMediaUrlAlreadyProcessed(key) + ) { const mediaNodeID = getMediaItemID( media.find(m => m.source_url === value) ) From c76489e7bf832753f3c6c8a53338931b260a91f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Tue, 3 Apr 2018 15:11:32 +0200 Subject: [PATCH 03/11] Format rules applied --- packages/gatsby-source-wordpress/src/fetch.js | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/packages/gatsby-source-wordpress/src/fetch.js b/packages/gatsby-source-wordpress/src/fetch.js index 6305afd5b59c0..daef071fdda51 100644 --- a/packages/gatsby-source-wordpress/src/fetch.js +++ b/packages/gatsby-source-wordpress/src/fetch.js @@ -207,7 +207,15 @@ async function fetchData({ if (_verbose) console.time(`Fetching the ${type} took`) let routeResponse = await getPages( - { url, _perPage, _hostingWPCOM, _auth, _accessToken, _verbose, _concurrentRequests }, + { + url, + _perPage, + _hostingWPCOM, + _auth, + _accessToken, + _verbose, + _concurrentRequests, + }, 1 ) @@ -270,7 +278,15 @@ async function fetchData({ * @returns */ async function getPages( - { url, _perPage, _hostingWPCOM, _auth, _accessToken, _concurrentRequests, _verbose }, + { + url, + _perPage, + _hostingWPCOM, + _auth, + _accessToken, + _concurrentRequests, + _verbose, + }, page = 1 ) { try { @@ -320,9 +336,13 @@ async function getPages( } // We got page 1, now we want pages 2 through totalPages - const pageOptions = _.range(2, totalPages + 1).map(getPage => getOptions(getPage)) + const pageOptions = _.range(2, totalPages + 1).map(getPage => + getOptions(getPage) + ) - const pages = await requestInQueue(pageOptions, { concurrent: _concurrentRequests }) + const pages = await requestInQueue(pageOptions, { + concurrent: _concurrentRequests, + }) const pageData = pages.map(page => page.data) pageData.forEach(list => { @@ -383,10 +403,17 @@ function getValidRoutes({ console.log( colorized.out(`Invalid route.`, colorized.color.Font.FgRed) ) - } else if (_excludedRoutes.some(excludedRoute => minimatch(routePath, excludedRoute))) { + } else if ( + _excludedRoutes.some(excludedRoute => + minimatch(routePath, excludedRoute) + ) + ) { if (_verbose) console.log( - colorized.out(`Excluded route from excludedRoutes pattern.`, colorized.color.Font.FgYellow) + colorized.out( + `Excluded route from excludedRoutes pattern.`, + colorized.color.Font.FgYellow + ) ) } else { if (_verbose) @@ -475,8 +502,7 @@ const getRawEntityType = route => * @param {any} baseUrl The base site URL that should be removed * @param {any} fullUrl The full URL to retrieve the route path from */ -const getRoutePath = (baseUrl, fullUrl) => - fullUrl.replace(baseUrl, ``) +const getRoutePath = (baseUrl, fullUrl) => fullUrl.replace(baseUrl, ``) /** * Extract the route manufacturer From 0af172b2eff1b927188c4758d94dbbd05d0f1381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Tue, 3 Apr 2018 15:17:44 +0200 Subject: [PATCH 04/11] Update README.md --- packages/gatsby-source-wordpress/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/gatsby-source-wordpress/README.md b/packages/gatsby-source-wordpress/README.md index ed9d893c2f083..a69bf1cc18627 100644 --- a/packages/gatsby-source-wordpress/README.md +++ b/packages/gatsby-source-wordpress/README.md @@ -97,6 +97,10 @@ plugins: [ // Example: `["/*/*/comments", "/yoast/**"]` will exclude routes ending in `comments` and // all routes that begin with `yoast` from fetch. excludedRoutes: ["/*/*/comments", "/yoast/**"], + // Include other media files. + // You can provide any regex that will be matched against the file URL. + // Example : /\.(svg)$/i will include SVG files. + extraMediasRegex: /\.(svg)$/i, }, }, ]; From b8de24ec73f9ccaf52ab56dacfa0243712b22041 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Tue, 3 Apr 2018 15:41:10 +0200 Subject: [PATCH 05/11] Refactor for DRY --- packages/gatsby-source-wordpress/src/normalize.js | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/packages/gatsby-source-wordpress/src/normalize.js b/packages/gatsby-source-wordpress/src/normalize.js index 7ce2f6db59179..aecfe9c5b1b2a 100644 --- a/packages/gatsby-source-wordpress/src/normalize.js +++ b/packages/gatsby-source-wordpress/src/normalize.js @@ -325,17 +325,8 @@ exports.mapEntitiesToMedia = (entities, extraMediasRegex) => { : null, deleteField: true, } - } else if (isPhotoUrl(value) && !isMediaUrlAlreadyProcessed(key)) { - const mediaNodeID = getMediaItemID( - media.find(m => m.source_url === value) - ) - return { - mediaNodeID, - deleteField: !!mediaNodeID, - } } else if ( - extraMediasRegex && - isOtherMediaUrl(value) && + (isPhotoUrl(value) || (extraMediasRegex && isOtherMediaUrl(value))) && !isMediaUrlAlreadyProcessed(key) ) { const mediaNodeID = getMediaItemID( From 34fb0dd09c2b4cb134b39138084a8fcebef5a2d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Wed, 4 Apr 2018 15:29:49 +0200 Subject: [PATCH 06/11] Removed parameter & example from README.md --- packages/gatsby-source-wordpress/README.md | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/packages/gatsby-source-wordpress/README.md b/packages/gatsby-source-wordpress/README.md index a69bf1cc18627..0708216da6040 100644 --- a/packages/gatsby-source-wordpress/README.md +++ b/packages/gatsby-source-wordpress/README.md @@ -42,16 +42,16 @@ We welcome PRs adding support for data from other plugins. // In your gatsby-config.js plugins: [ /* - * Gatsby's data processing layer begins with “source” - * plugins. Here the site sources its data from Wordpress. - */ + * Gatsby's data processing layer begins with “source” + * plugins. Here the site sources its data from Wordpress. + */ { resolve: "gatsby-source-wordpress", options: { /* - * The base URL of the Wordpress site without the trailingslash and the protocol. This is required. - * Example : 'gatsbyjsexamplewordpress.wordpress.com' or 'www.example-site.com' - */ + * The base URL of the Wordpress site without the trailingslash and the protocol. This is required. + * Example : 'gatsbyjsexamplewordpress.wordpress.com' or 'www.example-site.com' + */ baseUrl: "gatsbyjsexamplewordpress.wordpress.com", // The protocol. This can be http or https. protocol: "http", @@ -97,10 +97,6 @@ plugins: [ // Example: `["/*/*/comments", "/yoast/**"]` will exclude routes ending in `comments` and // all routes that begin with `yoast` from fetch. excludedRoutes: ["/*/*/comments", "/yoast/**"], - // Include other media files. - // You can provide any regex that will be matched against the file URL. - // Example : /\.(svg)$/i will include SVG files. - extraMediasRegex: /\.(svg)$/i, }, }, ]; From d302bb97087fa45c887a97376fa8d958b17cbf02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Wed, 4 Apr 2018 15:30:44 +0200 Subject: [PATCH 07/11] Removed parameter from gatsby-node.js --- packages/gatsby-source-wordpress/src/gatsby-node.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/gatsby-source-wordpress/src/gatsby-node.js b/packages/gatsby-source-wordpress/src/gatsby-node.js index bd1084e698f42..b9fc54f522f8f 100644 --- a/packages/gatsby-source-wordpress/src/gatsby-node.js +++ b/packages/gatsby-source-wordpress/src/gatsby-node.js @@ -32,7 +32,6 @@ exports.sourceNodes = async ( searchAndReplaceContentUrls = {}, concurrentRequests = 10, excludedRoutes = [], - extraMediasRegex, } ) => { const { createNode } = boundActionCreators @@ -92,7 +91,7 @@ exports.sourceNodes = async ( entities = normalize.mapTagsCategoriesToTaxonomies(entities) // Creates links from entities to media nodes - entities = normalize.mapEntitiesToMedia(entities, extraMediasRegex) + entities = normalize.mapEntitiesToMedia(entities) // Downloads media files and removes "sizes" data as useless in Gatsby context. entities = await normalize.downloadMediaFiles({ From ced85b2d0c33099ca3e47c14922e8ff0de51c729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Wed, 4 Apr 2018 15:33:35 +0200 Subject: [PATCH 08/11] Abstract media checking to URL in normalize.js --- .../gatsby-source-wordpress/src/normalize.js | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/packages/gatsby-source-wordpress/src/normalize.js b/packages/gatsby-source-wordpress/src/normalize.js index aecfe9c5b1b2a..24e2d9045686d 100644 --- a/packages/gatsby-source-wordpress/src/normalize.js +++ b/packages/gatsby-source-wordpress/src/normalize.js @@ -280,7 +280,7 @@ exports.searchReplaceContentUrls = function({ }) } -exports.mapEntitiesToMedia = (entities, extraMediasRegex) => { +exports.mapEntitiesToMedia = entities => { const media = entities.filter(e => e.__type === `wordpress__wp_media`) return entities.map(e => { @@ -297,13 +297,7 @@ exports.mapEntitiesToMedia = (entities, extraMediasRegex) => { ? true : false - const photoRegex = /\.(gif|jpg|jpeg|tiff|png)$/i - const isPhotoUrl = filename => - _.isString(filename) && photoRegex.test(filename) - const isOtherMediaUrl = filename => - _.isString(filename) && - extraMediasRegex && - extraMediasRegex.test(filename) + const isURL = value => _.isString(value) && value.startsWith(`http`) const isMediaUrlAlreadyProcessed = key => key == `source_url` const isFeaturedMedia = (value, key) => (_.isNumber(value) || _.isBoolean(value)) && key === `featured_media` @@ -314,7 +308,7 @@ exports.mapEntitiesToMedia = (entities, extraMediasRegex) => { // Try to get media node from value: // - special case - check if key is featured_media and value is photo ID - // - check if value is photo url + // - check if value is media url // - check if value is ACF Image Object // - check if value is ACF Gallery const getMediaFromValue = (value, key) => { @@ -325,10 +319,7 @@ exports.mapEntitiesToMedia = (entities, extraMediasRegex) => { : null, deleteField: true, } - } else if ( - (isPhotoUrl(value) || (extraMediasRegex && isOtherMediaUrl(value))) && - !isMediaUrlAlreadyProcessed(key) - ) { + } else if (isURL(value) && !isMediaUrlAlreadyProcessed(key)) { const mediaNodeID = getMediaItemID( media.find(m => m.source_url === value) ) From 8409a3120bb17e995cba06540c5a3968fbbaebc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Wed, 4 Apr 2018 15:34:40 +0200 Subject: [PATCH 09/11] Format request-in-queue.js --- .../gatsby-source-wordpress/src/request-in-queue.js | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/gatsby-source-wordpress/src/request-in-queue.js b/packages/gatsby-source-wordpress/src/request-in-queue.js index b462e5022a98f..357bb215acb62 100644 --- a/packages/gatsby-source-wordpress/src/request-in-queue.js +++ b/packages/gatsby-source-wordpress/src/request-in-queue.js @@ -35,14 +35,16 @@ async function handleQueue(task, cb) { * @param {Options} opts Options that will be given to better-queue * @return {Promise} Resolves with the accumulated values from the tasks */ -module.exports = function requestInQueue (tasks, opts = {}) { +module.exports = function requestInQueue(tasks, opts = {}) { return new Promise((res, rej) => { const q = new Queue(handleQueue, { ..._defaults, ...opts }) - const taskMap = new Map(tasks.map((t) => { - q.push(t) - return [t.url, null] - })) + const taskMap = new Map( + tasks.map(t => { + q.push(t) + return [t.url, null] + }) + ) q.on(`task_failed`, (id, err) => { rej(`${id} failed with err: ${err}`) From 81beb74f11b5eaedb9157dc97e164d490ee1be85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Wed, 4 Apr 2018 15:35:21 +0200 Subject: [PATCH 10/11] Format __tests__/request-in-queue.js --- .../src/__tests__/request-in-queue.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/gatsby-source-wordpress/src/__tests__/request-in-queue.js b/packages/gatsby-source-wordpress/src/__tests__/request-in-queue.js index 4c1ffd9a8f9ce..f5c7bb7fa5f91 100644 --- a/packages/gatsby-source-wordpress/src/__tests__/request-in-queue.js +++ b/packages/gatsby-source-wordpress/src/__tests__/request-in-queue.js @@ -4,7 +4,9 @@ const requestInQueue = require(`../request-in-queue`) const axios = require(`axios`) axios.mockImplementation(opts => { - if (opts.throw) { throw new Error(opts.throw) } + if (opts.throw) { + throw new Error(opts.throw) + } return opts.url.slice(opts.url.lastIndexOf(`/`) + 1) }) @@ -28,7 +30,7 @@ describe(`requestInQueue`, () => { it(`runs all requests in queue`, async () => { await requestInQueue(requests) - requests.forEach((req) => { + requests.forEach(req => { expect(axios).toHaveBeenCalledWith(req) }) }) From 2049c7480525e727dec4f576a4f57db61dedb510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Wed, 4 Apr 2018 15:46:51 +0200 Subject: [PATCH 11/11] Removed extra spaces in README.md --- packages/gatsby-source-wordpress/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/gatsby-source-wordpress/README.md b/packages/gatsby-source-wordpress/README.md index 0708216da6040..b1f756936c936 100644 --- a/packages/gatsby-source-wordpress/README.md +++ b/packages/gatsby-source-wordpress/README.md @@ -42,16 +42,16 @@ We welcome PRs adding support for data from other plugins. // In your gatsby-config.js plugins: [ /* - * Gatsby's data processing layer begins with “source” - * plugins. Here the site sources its data from Wordpress. - */ + * Gatsby's data processing layer begins with “source” + * plugins. Here the site sources its data from Wordpress. + */ { resolve: "gatsby-source-wordpress", options: { /* - * The base URL of the Wordpress site without the trailingslash and the protocol. This is required. - * Example : 'gatsbyjsexamplewordpress.wordpress.com' or 'www.example-site.com' - */ + * The base URL of the Wordpress site without the trailingslash and the protocol. This is required. + * Example : 'gatsbyjsexamplewordpress.wordpress.com' or 'www.example-site.com' + */ baseUrl: "gatsbyjsexamplewordpress.wordpress.com", // The protocol. This can be http or https. protocol: "http",