Skip to content

Commit

Permalink
Replacing regex for XHTML that failed on big files with dumber test
Browse files Browse the repository at this point in the history
There are false positives that the change does not support well, but this should work for all well-formed XHTML documents.

This should solve a problem that is reported in the Solid data browser - https://github.com/solid/solid-ui/issues/118
  • Loading branch information
megoth authored and Arne Hassel committed Sep 17, 2019
1 parent 5c018ac commit 1c0e184
Showing 1 changed file with 22 additions and 7 deletions.
29 changes: 22 additions & 7 deletions src/fetcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -282,18 +282,16 @@ class HTMLHandler extends Handler {

// We only handle XHTML so we have to figure out if this is XML
// log.info("Sniffing HTML " + xhr.resource + " for XHTML.")

if (responseText.match(/\s*<\?xml\s+version\s*=[^<>]+\?>/)) {
if (isXML(responseText)) {
fetcher.addStatus(options.req, "Has an XML declaration. We'll assume " +
"it's XHTML as the content-type was text/html.\n")

let xhtmlHandler = new XHTMLHandler(this.response)
return xhtmlHandler.parse(fetcher, responseText, options, response)
}

// DOCTYPE
// There is probably a smarter way to do this
if (responseText.match(/.*<!DOCTYPE\s+html[^<]+-\/\/W3C\/\/DTD XHTML[^<]+http:\/\/www.w3.org\/TR\/xhtml[^<]+>/)) {
// DOCTYPE html
if (isXHTML(responseText)) {
fetcher.addStatus(options.req,
'Has XHTML DOCTYPE. Switching to XHTMLHandler.\n')

Expand All @@ -302,7 +300,7 @@ class HTMLHandler extends Handler {
}

// xmlns
if (responseText.match(/[^(<html)]*<html\s+[^<]*xmlns=['"]http:\/\/www.w3.org\/1999\/xhtml["'][^<]*>/)) {
if (isXMLNS(responseText)) {
fetcher.addStatus(options.req,
'Has default namespace for XHTML, so switching to XHTMLHandler.\n')

Expand Down Expand Up @@ -340,7 +338,7 @@ class TextHandler extends Handler {
// We only speak dialects of XML right now. Is this XML?

// Look for an XML declaration
if (responseText.match(/\s*<\?xml\s+version\s*=[^<>]+\?>/)) {
if (isXML(responseText)) {
fetcher.addStatus(options.req, 'Warning: ' + options.resource +
" has an XML declaration. We'll assume " +
"it's XML but its content-type wasn't XML.\n")
Expand Down Expand Up @@ -411,6 +409,23 @@ const HANDLERS = {
RDFXMLHandler, XHTMLHandler, XMLHandler, HTMLHandler, TextHandler, N3Handler
}

function isXHTML (responseText) {
const docTypeStart = responseText.indexOf('<!DOCTYPE html')
const docTypeEnd = responseText.indexOf('>')
if (docTypeStart === -1 || docTypeEnd === -1 || docTypeStart > docTypeEnd) {
return false
}
return responseText.substr(docTypeStart, docTypeEnd - docTypeStart).indexOf('XHTML') !== -1
}

function isXML (responseText) {
return responseText.match(/\s*<\?xml\s+version\s*=[^<>]+\?>/)
}

function isXMLNS (responseText) {
return responseText.match(/[^(<html)]*<html\s+[^<]*xmlns=['"]http:\/\/www.w3.org\/1999\/xhtml["'][^<]*>/)
}

/** Fetcher
*
* The Fetcher object is a helper object for a quadstore
Expand Down

0 comments on commit 1c0e184

Please sign in to comment.