From 1c0e184d9a728d3d4941deb32840d3e31f3b0b7c Mon Sep 17 00:00:00 2001 From: Arne Hassel Date: Tue, 17 Sep 2019 16:29:24 +0200 Subject: [PATCH] Replacing regex for XHTML that failed on big files with dumber test There are false positives that the change does not support well, but this should work for all well-formed XHTML documents. This should solve a problem that is reported in the Solid data browser - https://github.com/solid/solid-ui/issues/118 --- src/fetcher.js | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/fetcher.js b/src/fetcher.js index 7c8c0faf0..5200efe44 100644 --- a/src/fetcher.js +++ b/src/fetcher.js @@ -282,8 +282,7 @@ class HTMLHandler extends Handler { // We only handle XHTML so we have to figure out if this is XML // log.info("Sniffing HTML " + xhr.resource + " for XHTML.") - - if (responseText.match(/\s*<\?xml\s+version\s*=[^<>]+\?>/)) { + if (isXML(responseText)) { fetcher.addStatus(options.req, "Has an XML declaration. We'll assume " + "it's XHTML as the content-type was text/html.\n") @@ -291,9 +290,8 @@ class HTMLHandler extends Handler { return xhtmlHandler.parse(fetcher, responseText, options, response) } - // DOCTYPE - // There is probably a smarter way to do this - if (responseText.match(/.*/)) { + // DOCTYPE html + if (isXHTML(responseText)) { fetcher.addStatus(options.req, 'Has XHTML DOCTYPE. Switching to XHTMLHandler.\n') @@ -302,7 +300,7 @@ class HTMLHandler extends Handler { } // xmlns - if (responseText.match(/[^(/)) { + if (isXMLNS(responseText)) { fetcher.addStatus(options.req, 'Has default namespace for XHTML, so switching to XHTMLHandler.\n') @@ -340,7 +338,7 @@ class TextHandler extends Handler { // We only speak dialects of XML right now. Is this XML? // Look for an XML declaration - if (responseText.match(/\s*<\?xml\s+version\s*=[^<>]+\?>/)) { + if (isXML(responseText)) { fetcher.addStatus(options.req, 'Warning: ' + options.resource + " has an XML declaration. We'll assume " + "it's XML but its content-type wasn't XML.\n") @@ -411,6 +409,23 @@ const HANDLERS = { RDFXMLHandler, XHTMLHandler, XMLHandler, HTMLHandler, TextHandler, N3Handler } +function isXHTML (responseText) { + const docTypeStart = responseText.indexOf('') + if (docTypeStart === -1 || docTypeEnd === -1 || docTypeStart > docTypeEnd) { + return false + } + return responseText.substr(docTypeStart, docTypeEnd - docTypeStart).indexOf('XHTML') !== -1 +} + +function isXML (responseText) { + return responseText.match(/\s*<\?xml\s+version\s*=[^<>]+\?>/) +} + +function isXMLNS (responseText) { + return responseText.match(/[^(/) +} + /** Fetcher * * The Fetcher object is a helper object for a quadstore