Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use mdn search api v1 vs parsing html from mdn.io redirects. #40

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 27 additions & 84 deletions src/plugins/mdn/mdnPlugin.js
Original file line number Diff line number Diff line change
@@ -1,74 +1,7 @@
const url = require('url');
const superagent = require('superagent');
const cheerio = require('cheerio');

function slugify(words) {
return words
.map((x) => x.trim().toLowerCase())
.join('-')
.replace(/[^a-zA-Z0-9]+/g, '-')
.replace(/[^a-zA-Z0-9]+/g, '-');
}

class HtmlParseError extends Error {}

function getMdnTitle(title) {
return title.replace(/\s*-\s*(\w+\s*\w*)\s*\|\s*MDN/gi, (m, _type) => {
let type = _type;
if (type === 'JavaScript') type = null;
if (type === 'Web APIs') type = 'DOM';
return type ? `, ${type}` : '';
});
}

function extractFromHtml(html) {
const $ = cheerio.load(html);
const title = getMdnTitle($('head title').text());
const text = $('#wikiArticle')
.first()
.find('p')
.first()
.text();

if (!text) {
const bodyText = $('body')
.text()
.replace(/\s+/g, ' ');

if (
/did not match any documents|No results containing all your search terms were found/.test(
bodyText,
)
) {
throw new HtmlParseError(`No MDN page found with this search.`);
}
throw new HtmlParseError(`Failed to extract mdn text`);
}
return { text, title };
}

async function fixLanguage(origRes, lastRedirect) {
let res = origRes;

// attempt to rewrite the language part of the URL
const urlParts = url.parse(lastRedirect);
urlParts.pathname = urlParts.pathname.replace(
/^\/(\w+)(\/docs\/)/,
(m, lang, rest) => {
return `/en-US${rest}`;
},
);

// If we changed the URL, we need to do another request for it
const fixedUrl = url.format(urlParts);

if (fixedUrl !== lastRedirect) {
console.error(`Translated MDN URL from "${lastRedirect}" to "${fixedUrl}"`);
res = await superagent.get(fixedUrl).redirects(1);
}

return res;
}
const mdnUrl = 'https://developer.mozilla.org'
const mdnSearchApiUrl = `${mdnUrl}/api/v1/search/en-US`

const mdnPlugin = async (msg) => {
if (!msg.command) return;
Expand All @@ -79,41 +12,51 @@ const mdnPlugin = async (msg) => {
}
msg.handling();

const suffix = slugify(words.slice(1));
const initialUrl = `https://mdn.io/${suffix}`;
const query = new URLSearchParams({
q: words.slice(1).join(' '),
topic: 'javascript',
highlight: false,
});
const initialUrl = `${mdnSearchApiUrl}?${query}`;

let lastRedirect = initialUrl;
let res = null;

try {
res = await superagent
.get(initialUrl)
.set('accept-language', 'en-US,en;q=0.5')
.redirects(5)
.on('redirect', (redirect) => {
lastRedirect = redirect.headers.location;
});
.set('Accept', 'application/json')
.redirects(5);
} catch (e) {
// Rethrow if it's not an HTTP error
// Rethrow if it's not a document error
if (!e || !e.response) {
throw e;
}
}

if (res) {
res = await fixLanguage(res, lastRedirect).catch(() => null);
}

if (!res || !res.ok) {
msg.respondWithMention(`Try ${initialUrl} (couldn't fetch metadata)`);
return;
}

let pageData;
try {
pageData = extractFromHtml(res.text);
if (res.body.documents.length > 0) {
pageData = {
title: res.body.documents[0].title,
text: res.body.documents[0].excerpt,
url: `${mdnUrl}/${res.body.documents[0].slug}`,
};
}
else {
pageData = {
title: 'Not Found',
text: `Could not find anything on: ${words.slice(1).join(' ')}`,
url: '',
}
}
} catch (e) {
if (!(e instanceof HtmlParseError)) throw e;
if (!(e instanceof TypeError)) throw e;

msg.respond(`${initialUrl} - ${e.message}`);
return;
Expand All @@ -123,7 +66,7 @@ const mdnPlugin = async (msg) => {
if (response.length > 400) {
response = `${response.slice(0, 350).trim()}…`;
}
response += ` ${initialUrl}`;
response += ` ${pageData.url}`;

msg.respondWithMention(response);
};
Expand Down