From 88f4b475e9117d2b1bbd2f9cd41237f62050fc84 Mon Sep 17 00:00:00 2001 From: Anthony DeDominic Date: Sun, 24 Nov 2019 21:47:03 -0500 Subject: [PATCH 1/5] Use mdn search api v1 vs parsing html from mdn.io redirects. --- src/plugins/mdn/mdnPlugin.js | 85 +++++------------------------------- 1 file changed, 11 insertions(+), 74 deletions(-) diff --git a/src/plugins/mdn/mdnPlugin.js b/src/plugins/mdn/mdnPlugin.js index e44ddaa..5180820 100644 --- a/src/plugins/mdn/mdnPlugin.js +++ b/src/plugins/mdn/mdnPlugin.js @@ -1,75 +1,11 @@ const url = require('url'); const superagent = require('superagent'); -const cheerio = require('cheerio'); -function slugify(words) { - return words - .map((x) => x.trim().toLowerCase()) - .join('-') - .replace(/[^a-zA-Z0-9]+/g, '-') - .replace(/[^a-zA-Z0-9]+/g, '-'); -} +const mdnUrl = 'https://developer.mozilla.org' +const mdnSearchApiUrl = `${mdnUrl}/api/v1/search/en-US` class HtmlParseError extends Error {} -function getMdnTitle(title) { - return title.replace(/\s*-\s*(\w+\s*\w*)\s*\|\s*MDN/gi, (m, _type) => { - let type = _type; - if (type === 'JavaScript') type = null; - if (type === 'Web APIs') type = 'DOM'; - return type ? `, ${type}` : ''; - }); -} - -function extractFromHtml(html) { - const $ = cheerio.load(html); - const title = getMdnTitle($('head title').text()); - const text = $('#wikiArticle') - .first() - .find('p') - .first() - .text(); - - if (!text) { - const bodyText = $('body') - .text() - .replace(/\s+/g, ' '); - - if ( - /did not match any documents|No results containing all your search terms were found/.test( - bodyText, - ) - ) { - throw new HtmlParseError(`No MDN page found with this search.`); - } - throw new HtmlParseError(`Failed to extract mdn text`); - } - return { text, title }; -} - -async function fixLanguage(origRes, lastRedirect) { - let res = origRes; - - // attempt to rewrite the language part of the URL - const urlParts = url.parse(lastRedirect); - urlParts.pathname = urlParts.pathname.replace( - /^\/(\w+)(\/docs\/)/, - (m, lang, rest) => { - return `/en-US${rest}`; - }, - ); - - // If we changed the URL, we need to do another request for it - const fixedUrl = url.format(urlParts); - - if (fixedUrl !== lastRedirect) { - console.error(`Translated MDN URL from "${lastRedirect}" to "${fixedUrl}"`); - res = await superagent.get(fixedUrl).redirects(1); - } - - return res; -} - const mdnPlugin = async (msg) => { if (!msg.command) return; @@ -79,8 +15,8 @@ const mdnPlugin = async (msg) => { } msg.handling(); - const suffix = slugify(words.slice(1)); - const initialUrl = `https://mdn.io/${suffix}`; + const query = new URLSearchParams({ q: words.slice(1).join(' '), topic: 'js' }); + const initialUrl = `${mdnSearchApiUrl}?${query}`; let lastRedirect = initialUrl; let res = null; @@ -89,6 +25,7 @@ const mdnPlugin = async (msg) => { res = await superagent .get(initialUrl) .set('accept-language', 'en-US,en;q=0.5') + .set('Accept', 'application/json') .redirects(5) .on('redirect', (redirect) => { lastRedirect = redirect.headers.location; @@ -100,10 +37,6 @@ const mdnPlugin = async (msg) => { } } - if (res) { - res = await fixLanguage(res, lastRedirect).catch(() => null); - } - if (!res || !res.ok) { msg.respondWithMention(`Try ${initialUrl} (couldn't fetch metadata)`); return; @@ -111,7 +44,11 @@ const mdnPlugin = async (msg) => { let pageData; try { - pageData = extractFromHtml(res.text); + pageData = { + title: res.body.documents[0].title, + text: res.body.documents[0].excerpt.replace(/<\/?mark>/g, ''), + url: `${mdnUrl}/${res.body.documents[0].slug}`, + }; } catch (e) { if (!(e instanceof HtmlParseError)) throw e; @@ -123,7 +60,7 @@ const mdnPlugin = async (msg) => { if (response.length > 400) { response = `${response.slice(0, 350).trim()}…`; } - response += ` ${initialUrl}`; + response += ` ${pageData.url || initialUrl}`; msg.respondWithMention(response); }; From 243ae02c74613fcb5cd8c65aba18758a93399fe2 Mon Sep 17 00:00:00 2001 From: Anthony DeDominic Date: Sun, 24 Nov 2019 22:02:03 -0500 Subject: [PATCH 2/5] only throw on type error. basically when the json output does not match expectations. --- src/plugins/mdn/mdnPlugin.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/plugins/mdn/mdnPlugin.js b/src/plugins/mdn/mdnPlugin.js index 5180820..9596a67 100644 --- a/src/plugins/mdn/mdnPlugin.js +++ b/src/plugins/mdn/mdnPlugin.js @@ -4,8 +4,6 @@ const superagent = require('superagent'); const mdnUrl = 'https://developer.mozilla.org' const mdnSearchApiUrl = `${mdnUrl}/api/v1/search/en-US` -class HtmlParseError extends Error {} - const mdnPlugin = async (msg) => { if (!msg.command) return; @@ -50,7 +48,7 @@ const mdnPlugin = async (msg) => { url: `${mdnUrl}/${res.body.documents[0].slug}`, }; } catch (e) { - if (!(e instanceof HtmlParseError)) throw e; + if (!(e instanceof TypeError)) throw e; msg.respond(`${initialUrl} - ${e.message}`); return; From f0a1466773127015445e7da06596b52e00243e5e Mon Sep 17 00:00:00 2001 From: Anthony DeDominic Date: Sun, 24 Nov 2019 22:12:45 -0500 Subject: [PATCH 3/5] * remove unused code * pass the appropriate query param to remove tags. --- src/plugins/mdn/mdnPlugin.js | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/plugins/mdn/mdnPlugin.js b/src/plugins/mdn/mdnPlugin.js index 9596a67..72c3ab0 100644 --- a/src/plugins/mdn/mdnPlugin.js +++ b/src/plugins/mdn/mdnPlugin.js @@ -1,4 +1,3 @@ -const url = require('url'); const superagent = require('superagent'); const mdnUrl = 'https://developer.mozilla.org' @@ -13,10 +12,13 @@ const mdnPlugin = async (msg) => { } msg.handling(); - const query = new URLSearchParams({ q: words.slice(1).join(' '), topic: 'js' }); + const query = new URLSearchParams({ + q: words.slice(1).join(' '), + topic: 'js', + highlight: false, + }); const initialUrl = `${mdnSearchApiUrl}?${query}`; - let lastRedirect = initialUrl; let res = null; try { @@ -24,10 +26,7 @@ const mdnPlugin = async (msg) => { .get(initialUrl) .set('accept-language', 'en-US,en;q=0.5') .set('Accept', 'application/json') - .redirects(5) - .on('redirect', (redirect) => { - lastRedirect = redirect.headers.location; - }); + .redirects(5); } catch (e) { // Rethrow if it's not an HTTP error if (!e || !e.response) { @@ -44,7 +43,7 @@ const mdnPlugin = async (msg) => { try { pageData = { title: res.body.documents[0].title, - text: res.body.documents[0].excerpt.replace(/<\/?mark>/g, ''), + text: res.body.documents[0].excerpt, url: `${mdnUrl}/${res.body.documents[0].slug}`, }; } catch (e) { From fd744a20ad6d093172a134f6d939b77f89bdd5a1 Mon Sep 17 00:00:00 2001 From: Anthony DeDominic Date: Sun, 24 Nov 2019 22:22:21 -0500 Subject: [PATCH 4/5] Handle case where search returns no results. --- src/plugins/mdn/mdnPlugin.js | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/plugins/mdn/mdnPlugin.js b/src/plugins/mdn/mdnPlugin.js index 72c3ab0..7bcbdd1 100644 --- a/src/plugins/mdn/mdnPlugin.js +++ b/src/plugins/mdn/mdnPlugin.js @@ -41,11 +41,20 @@ const mdnPlugin = async (msg) => { let pageData; try { - pageData = { - title: res.body.documents[0].title, - text: res.body.documents[0].excerpt, - url: `${mdnUrl}/${res.body.documents[0].slug}`, - }; + if (res.body.documents.length > 0) { + pageData = { + title: res.body.documents[0].title, + text: res.body.documents[0].excerpt, + url: `${mdnUrl}/${res.body.documents[0].slug}`, + }; + } + else { + pageData = { + title: 'Not Found', + text: `Could not find anything on: ${words.slice(1).join(' ')}`, + url: '', + } + } } catch (e) { if (!(e instanceof TypeError)) throw e; @@ -57,7 +66,7 @@ const mdnPlugin = async (msg) => { if (response.length > 400) { response = `${response.slice(0, 350).trim()}…`; } - response += ` ${pageData.url || initialUrl}`; + response += ` ${pageData.url}`; msg.respondWithMention(response); }; From bd7a72f57c0eb525218d7497d2dfedbf3c4dbdf2 Mon Sep 17 00:00:00 2001 From: Anthony DeDominic Date: Sun, 24 Nov 2019 22:34:47 -0500 Subject: [PATCH 5/5] fix topic name from "js" to "javascript". --- src/plugins/mdn/mdnPlugin.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/mdn/mdnPlugin.js b/src/plugins/mdn/mdnPlugin.js index 7bcbdd1..959a04b 100644 --- a/src/plugins/mdn/mdnPlugin.js +++ b/src/plugins/mdn/mdnPlugin.js @@ -14,7 +14,7 @@ const mdnPlugin = async (msg) => { const query = new URLSearchParams({ q: words.slice(1).join(' '), - topic: 'js', + topic: 'javascript', highlight: false, }); const initialUrl = `${mdnSearchApiUrl}?${query}`; @@ -28,7 +28,7 @@ const mdnPlugin = async (msg) => { .set('Accept', 'application/json') .redirects(5); } catch (e) { - // Rethrow if it's not an HTTP error + // Rethrow if it's not a document error if (!e || !e.response) { throw e; }