Skip to content

Commit

Permalink
Merge pull request #353 from linkeddata/xhtml-test
Browse files Browse the repository at this point in the history
Replacing regex for XHTML that failed on big files with dumber test
  • Loading branch information
megoth authored Oct 7, 2019
2 parents 5c018ac + 1c0e184 commit 0014d0c
Showing 1 changed file with 22 additions and 7 deletions.
29 changes: 22 additions & 7 deletions src/fetcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -282,18 +282,16 @@ class HTMLHandler extends Handler {

// We only handle XHTML so we have to figure out if this is XML
// log.info("Sniffing HTML " + xhr.resource + " for XHTML.")

if (responseText.match(/\s*<\?xml\s+version\s*=[^<>]+\?>/)) {
if (isXML(responseText)) {
fetcher.addStatus(options.req, "Has an XML declaration. We'll assume " +
"it's XHTML as the content-type was text/html.\n")

let xhtmlHandler = new XHTMLHandler(this.response)
return xhtmlHandler.parse(fetcher, responseText, options, response)
}

// DOCTYPE
// There is probably a smarter way to do this
if (responseText.match(/.*<!DOCTYPE\s+html[^<]+-\/\/W3C\/\/DTD XHTML[^<]+http:\/\/www.w3.org\/TR\/xhtml[^<]+>/)) {
// DOCTYPE html
if (isXHTML(responseText)) {
fetcher.addStatus(options.req,
'Has XHTML DOCTYPE. Switching to XHTMLHandler.\n')

Expand All @@ -302,7 +300,7 @@ class HTMLHandler extends Handler {
}

// xmlns
if (responseText.match(/[^(<html)]*<html\s+[^<]*xmlns=['"]http:\/\/www.w3.org\/1999\/xhtml["'][^<]*>/)) {
if (isXMLNS(responseText)) {
fetcher.addStatus(options.req,
'Has default namespace for XHTML, so switching to XHTMLHandler.\n')

Expand Down Expand Up @@ -340,7 +338,7 @@ class TextHandler extends Handler {
// We only speak dialects of XML right now. Is this XML?

// Look for an XML declaration
if (responseText.match(/\s*<\?xml\s+version\s*=[^<>]+\?>/)) {
if (isXML(responseText)) {
fetcher.addStatus(options.req, 'Warning: ' + options.resource +
" has an XML declaration. We'll assume " +
"it's XML but its content-type wasn't XML.\n")
Expand Down Expand Up @@ -411,6 +409,23 @@ const HANDLERS = {
RDFXMLHandler, XHTMLHandler, XMLHandler, HTMLHandler, TextHandler, N3Handler
}

function isXHTML (responseText) {
const docTypeStart = responseText.indexOf('<!DOCTYPE html')
const docTypeEnd = responseText.indexOf('>')
if (docTypeStart === -1 || docTypeEnd === -1 || docTypeStart > docTypeEnd) {
return false
}
return responseText.substr(docTypeStart, docTypeEnd - docTypeStart).indexOf('XHTML') !== -1
}

function isXML (responseText) {
return responseText.match(/\s*<\?xml\s+version\s*=[^<>]+\?>/)
}

function isXMLNS (responseText) {
return responseText.match(/[^(<html)]*<html\s+[^<]*xmlns=['"]http:\/\/www.w3.org\/1999\/xhtml["'][^<]*>/)
}

/** Fetcher
*
* The Fetcher object is a helper object for a quadstore
Expand Down

0 comments on commit 0014d0c

Please sign in to comment.