Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Documents with an Internal Subset DOCTYPE shouldn't stop trying to bind #519

Merged
merged 1 commit into from
Jul 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*******************************************************************************
* Copyright (c) 2019 Red Hat Inc. and others.
* All rights reserved. This program and the accompanying materials
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v20.html
*
* Contributors:
* Red Hat Inc. - initial API and implementation
*******************************************************************************/
package org.eclipse.lsp4xml.extensions.contentmodel.participants.diagnostics;

import org.apache.xerces.impl.dtd.XMLDTDValidator;
import org.apache.xerces.parsers.XIncludeAwareParserConfiguration;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLComponentManager;
import org.apache.xerces.xni.parser.XMLConfigurationException;

/**
* Custom Xerces XML parser configuration to :
*
* <ul>
* <li>disable only DTD validation if required</li>
* </ul>
*
*/
class LSPXMLParserConfiguration extends XIncludeAwareParserConfiguration {

private final boolean disableDTDValidation;

public LSPXMLParserConfiguration(boolean disableDTDValidation) {
this.disableDTDValidation = disableDTDValidation;
}

@Override
protected void reset() throws XNIException {
super.reset();
if (disableDTDValidation) {
// reset again DTD validator by setting "http://xml.org/sax/features/validation"
// to false.
disableDTDValidation();
}
}

private void disableDTDValidation() {
XMLDTDValidator validator = (XMLDTDValidator) super.getProperty(DTD_VALIDATOR);
if (validator != null) {
// Calling XMLDTDValidator#setFeature("http://xml.org/sax/features/validation",
// false) does nothing.
// The only way to set "http://xml.org/sax/features/validation" to false is to
// call XMLDTDValidator#reset with the proper component.
// We need to create a new component and not use the current configuration
// otherwise set
// "http://xml.org/sax/features/validation" to the configuration
// will update the other component and will disable validation too for XML
// Schema
XMLComponentManager disableDTDComponent = new XMLComponentManager() {

@Override
public Object getProperty(String propertyId) throws XMLConfigurationException {
return LSPXMLParserConfiguration.this.getProperty(propertyId);
}

@Override
public boolean getFeature(String featureId) throws XMLConfigurationException {
if ("http://xml.org/sax/features/validation".equals(featureId)) {
return false;
}
return LSPXMLParserConfiguration.this.getFeature(featureId);
}
};
validator.reset(disableDTDComponent);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

import org.apache.xerces.impl.XMLEntityManager;
import org.apache.xerces.parsers.SAXParser;
import org.apache.xerces.parsers.XIncludeAwareParserConfiguration;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLEntityResolver;
import org.apache.xerces.xni.parser.XMLInputSource;
Expand Down Expand Up @@ -61,21 +60,20 @@ public class XMLValidator {
public static void doDiagnostics(DOMDocument document, XMLEntityResolver entityResolver,
List<Diagnostic> diagnostics, ContentModelSettings contentModelSettings, CancelChecker monitor) {
try {
// it should be better to cache XML Schema with XMLGrammarCachingConfiguration,
// It should be better to cache XML Schema with XMLGrammarCachingConfiguration,
// but we cannot use
// XMLGrammarCachingConfiguration because cache is done with target namespaces.
// There are conflicts when
// 2 XML Schemas don't define target namespaces.
XMLParserConfiguration configuration = new XIncludeAwareParserConfiguration(); // new
// XMLGrammarCachingConfiguration();
LSPXMLParserConfiguration configuration = new LSPXMLParserConfiguration(
isDisableOnlyDTDValidation(document));

if (entityResolver != null) {
configuration.setProperty("http://apache.org/xml/properties/internal/entity-resolver", entityResolver); //$NON-NLS-1$
}

final LSPErrorReporterForXML reporter = new LSPErrorReporterForXML(document, diagnostics);
boolean externalDTDValid = checkExternalDTD(document, reporter, configuration);

SAXParser parser = new SAXParser(configuration);
// Add LSP error reporter to fill LSP diagnostics from Xerces errors
parser.setProperty("http://apache.org/xml/properties/internal/error-reporter", reporter);
Expand Down Expand Up @@ -124,6 +122,31 @@ public static void doDiagnostics(DOMDocument document, XMLEntityResolver entityR
}
}

/**
* Returns true is DTD validation must be disabled and false otherwise.
*
* @param document the DOM document
* @return true is DTD validation must be disabled and false otherwise.
*/
private static boolean isDisableOnlyDTDValidation(DOMDocument document) {
// When XML declares a DOCTYPE only to define entities like
// <!DOCTYPE root [
// <!ENTITY foo "Bar">
// ]>
// Xerces try to validate the XML and report an error on each XML elements
// because they are not declared in the DOCTYPE.
// In this case, DTD validation must be disabled.
if (!document.hasDTD()) {
return false;
}
DOMDocumentType docType = document.getDoctype();
if (docType.getKindNode() != null) {
return false;
}
// Disable the DTD validation only if there are not <!ELEMENT or an <!ATTRLIST
return !docType.getChildren().stream().anyMatch(node -> node.isDTDElementDecl() || node.isDTDAttListDecl());
}

/**
* Returns true if the given document has a valid DTD (or doesn't define a DTD)
* and false otherwise.
Expand All @@ -143,7 +166,7 @@ private static boolean checkExternalDTD(DOMDocument document, LSPErrorReporterFo
if (docType.getKindNode() == null) {
return true;
}

// When XML is bound with a DTD path which doesn't exist, Xerces throws an
// IOException which breaks the validation of XML syntax instead of reporting it
// (like XML Schema). Here we parse only the
Expand All @@ -159,7 +182,7 @@ private static boolean checkExternalDTD(DOMDocument document, LSPErrorReporterFo
String xml = document.getText().substring(0, end);
xml += "<root/>";
try {

// Customize the entity manager to collect the error when DTD doesn't exist.
XMLEntityManager entityManager = new XMLEntityManager() {
@Override
Expand All @@ -185,7 +208,7 @@ public String setupCurrentEntity(String name, XMLInputSource xmlInputSource, boo
}
};
entityManager.reset(configuration);

SAXParser parser = new SAXParser(configuration);
parser.setProperty("http://apache.org/xml/properties/internal/entity-manager", entityManager);
InputSource inputSource = new InputSource();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,40 @@ public void MSG_OPEN_PAREN_OR_ELEMENT_TYPE_REQUIRED_IN_CHILDREN() throws Excepti
d(2, 19, 20, DTDErrorCode.MSG_OPEN_PAREN_OR_ELEMENT_TYPE_REQUIRED_IN_CHILDREN));
}

@Test
public void disableDTDValidationWhenNoElementDecl() throws Exception {
String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" + //
"<!DOCTYPE article [\r\n" + //
" <!ENTITY nbsp \"entity-value\">\r\n" + //
"]> \r\n" + //
"<article>\r\n" + //
" &nbsp;\r\n" + //
"</article>";
// No error, here DOCTYPE declares only ENTITY -> DTD validation is disabled
XMLAssert.testDiagnosticsFor(xml);

xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" + //
"<!DOCTYPE article [\r\n" + //
" <!ENTITY nbsp \"entity-value\">\r\n" + //
" <!ELEMENT element-name (#PCDATA)>\r\n" + //
"]> \r\n" + //
"<article>\r\n" + //
" &nbsp;\r\n" + //
"</article>";
// Error -> DTD validation defines a !ELEMENT entity-value which doesn't match
// the article root element
XMLAssert.testDiagnosticsFor(xml, d(5, 1, 8, DTDErrorCode.MSG_ELEMENT_NOT_DECLARED));

xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" + //
"<!DOCTYPE article [\r\n" + //
" <!ENTITY nbsp \"entity-value\">\r\n" + //
" <!ELEMENT article (#PCDATA)>\r\n" + //
"]> \r\n" + //
"<article>\r\n" + //
" &nbsp;\r\n" + //
"</article>";
// No error, DTD validation is done and XML matches the article element
// declaration
XMLAssert.testDiagnosticsFor(xml);
}
}