Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

XML entities declared in a DTD are marked undeclared after XML file #36

Merged
merged 1 commit into from
Apr 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,11 @@ protected Range toLSPRange(XMLLocator location, String key, Object[] arguments,
}
return null;
}

@Override
protected boolean isIgnoreFatalError(String key) {
// Don't stop the validation when there are
// * EntityNotDeclared error
return DTDErrorCode.EntityNotDeclared.name().equals(key);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
/*******************************************************************************
* Copyright (c) 2020 Red Hat Inc. and others.
* All rights reserved. This program and the accompanying materials
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v20.html
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Red Hat Inc. - initial API and implementation
*******************************************************************************/
package org.eclipse.lemminx.extensions.contentmodel.participants.diagnostics;

import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.text.MessageFormat;

import org.apache.xerces.impl.Constants;
import org.apache.xerces.impl.XMLEntityManager;
import org.apache.xerces.impl.dtd.DTDGrammar;
import org.apache.xerces.impl.dtd.XMLDTDDescription;
import org.apache.xerces.impl.dtd.XMLEntityDecl;
import org.apache.xerces.impl.validation.ValidationManager;
import org.apache.xerces.parsers.SAXParser;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.NamespaceContext;
import org.apache.xerces.xni.XMLLocator;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.grammars.XMLGrammarPool;
import org.apache.xerces.xni.parser.XMLParserConfiguration;
import org.eclipse.lemminx.commons.BadLocationException;
import org.eclipse.lemminx.dom.DOMDocument;
import org.eclipse.lemminx.dom.DOMDocumentType;
import org.eclipse.lemminx.extensions.contentmodel.participants.DTDErrorCode;
import org.eclipse.lsp4j.DiagnosticSeverity;
import org.eclipse.lsp4j.Range;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;

/**
* Extension of Xerces SAX Parser to fix some Xerces bugs:
*
* <ul>
* <li>[BUG 1]: when the DTD file path is wrong on DOCTYPE, Xerces breaks all
* validation like syntax validation</li>
* <li>[BUG 2]: when Xerces XML grammar pool is used, the second validation
* ignore the existing of entities. See
* https://github.com/redhat-developer/vscode-xml/issues/234</li>
* </ul>
*
* @author Angelo ZERR
*
*/
public class LSPSAXParser extends SAXParser {

private static final String DTD_NOT_FOUND = "Cannot find DTD ''{0}''.\nCreate the DTD file or configure an XML catalog for this DTD.";

protected static final String VALIDATION_MANAGER = Constants.XERCES_PROPERTY_PREFIX
+ Constants.VALIDATION_MANAGER_PROPERTY;

protected static final String ENTITY_MANAGER = Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY;

private final DOMDocument document;

private final LSPErrorReporterForXML reporter;

private final XMLGrammarPool grammarPool;

public LSPSAXParser(DOMDocument document, LSPErrorReporterForXML reporter, XMLParserConfiguration config,
XMLGrammarPool grammarPool) {
super(config);
this.document = document;
this.reporter = reporter;
this.grammarPool = grammarPool;
init(reporter);
}

private void init(LSPErrorReporterForXML reporter) {
try {
// Add LSP error reporter to fill LSP diagnostics from Xerces errors
super.setProperty("http://apache.org/xml/properties/internal/error-reporter", reporter);
super.setFeature("http://apache.org/xml/features/continue-after-fatal-error", false); //$NON-NLS-1$
super.setFeature("http://xml.org/sax/features/namespace-prefixes", true); //$NON-NLS-1$
super.setFeature("http://xml.org/sax/features/namespaces", true); //$NON-NLS-1$
super.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", true);
} catch (SAXNotRecognizedException | SAXNotSupportedException e) {
// Should never occur.
}
}

private XMLLocator locator;

@Override
public void startDocument(XMLLocator locator, String encoding, NamespaceContext namespaceContext,
Augmentations augs) throws XNIException {
this.locator = locator;
super.startDocument(locator, encoding, namespaceContext, augs);
}

@Override
public void doctypeDecl(String rootElement, String publicId, String systemId, Augmentations augs)
throws XNIException {
if (systemId != null) {
// There a declared DTD in the DOCTYPE
// <!DOCTYPE root-element SYSTEM "./extended.dtd" []>
String eid = null;
try {
eid = XMLEntityManager.expandSystemId(systemId, locator.getExpandedSystemId(), false);
} catch (java.io.IOException e) {
}
if (!isDTDExists(eid)) {
// The declared DTD doesn't exist
// <!DOCTYPE root-element SYSTEM "./dtd-doesnt-exist.dtd" []>
try {
// Report the error
DOMDocumentType docType = document.getDoctype();
Range range = new Range(document.positionAt(docType.getSystemIdNode().getStart()),
document.positionAt(docType.getSystemIdNode().getEnd()));
reporter.addDiagnostic(range, MessageFormat.format(DTD_NOT_FOUND, eid), DiagnosticSeverity.Error,
DTDErrorCode.dtd_not_found.getCode());
} catch (BadLocationException e) {
// Do nothing
}

// FIX [BUG 1]
// To avoid breaking the validation (ex : syntax validation) we mark
// the cache DTD as true to avoid having an IOException error which breaks the
// validation.
// boolean readExternalSubset must be false in
// Xerces
// https://github.com/apache/xerces2-j/blob/e5a239b96fd2cff6566a29e7a4a3a4a2bbf9b0d4/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java#L950
ValidationManager fValidationManager = (ValidationManager) fConfiguration
.getProperty(VALIDATION_MANAGER);
if (fValidationManager != null) {
fValidationManager.setCachedDTD(true);
}
} else {
if (grammarPool != null) {
// FIX [BUG 2]
// DTD exists, get the DTD grammar from the cache
XMLEntityManager entityManager = (XMLEntityManager) fConfiguration.getProperty(ENTITY_MANAGER);
XMLDTDDescription grammarDesc = new XMLDTDDescription(publicId, systemId,
locator.getExpandedSystemId(), eid, rootElement);
DTDGrammar grammar = (DTDGrammar) grammarPool.retrieveGrammar(grammarDesc);
if (grammar != null) {
// The DTD grammar is in cache, we need to fill XML entity manager with the
// entities declared in the cached DTD grammar
fillEntities(grammar, entityManager);
}
}
}
}
super.doctypeDecl(rootElement, publicId, systemId, augs);
}

private static boolean isDTDExists(String expandedSystemId) {
if (expandedSystemId == null || expandedSystemId.isEmpty()) {
return true;
}
try {
URL location = new URL(expandedSystemId);
URLConnection connect = location.openConnection();
if (!(connect instanceof HttpURLConnection)) {
InputStream stream = connect.getInputStream();
stream.close();
}
} catch (Exception e) {
return false;
}
return true;
}

/**
* Fill entities from the given DTD grammar to the given entity manager.
*
* @param grammar the DTD grammar
* @param entityManager the entitymanager to update with entities of the DTD
* grammar.
*/
private static void fillEntities(DTDGrammar grammar, XMLEntityManager entityManager) {
int index = 0;
XMLEntityDecl entityDecl = new XMLEntityDecl() {

@Override
public void setValues(String name, String publicId, String systemId, String baseSystemId, String notation,
String value, boolean isPE, boolean inExternal) {
if (inExternal) {
// Only entities declared in the cached DTD grammar must be added in the XML
// entity manager.
entityManager.addInternalEntity(name, value);
}
};
};
while (grammar.getEntityDecl(index, entityDecl)) {
index++;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2018 Angelo ZERR
* Copyright (c) 2018-2020 Angelo ZERR
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v2.0
* which accompanies this distribution, and is available at
Expand All @@ -14,25 +14,18 @@

import java.io.IOException;
import java.io.StringReader;
import java.text.MessageFormat;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CancellationException;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.xerces.impl.XMLEntityManager;
import org.apache.xerces.parsers.SAXParser;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.grammars.XMLGrammarPool;
import org.apache.xerces.xni.parser.XMLEntityResolver;
import org.apache.xerces.xni.parser.XMLInputSource;
import org.apache.xerces.xni.parser.XMLParserConfiguration;
import org.eclipse.lemminx.commons.BadLocationException;
import org.eclipse.lemminx.dom.DOMDocument;
import org.eclipse.lemminx.dom.DOMDocumentType;
import org.eclipse.lemminx.dom.DOMElement;
import org.eclipse.lemminx.extensions.contentmodel.participants.DTDErrorCode;
import org.eclipse.lemminx.extensions.contentmodel.settings.ContentModelSettings;
import org.eclipse.lemminx.extensions.contentmodel.settings.XMLValidationSettings;
import org.eclipse.lemminx.services.extensions.diagnostics.LSPContentHandler;
Expand All @@ -57,8 +50,6 @@ public class XMLValidator {

private static final Logger LOGGER = Logger.getLogger(XMLValidator.class.getName());

private static final String DTD_NOT_FOUND = "Cannot find DTD ''{0}''.\nCreate the DTD file or configure an XML catalog for this DTD.";

public static void doDiagnostics(DOMDocument document, XMLEntityResolver entityResolver,
List<Diagnostic> diagnostics, ContentModelSettings contentModelSettings, XMLGrammarPool grammarPool,
CancelChecker monitor) {
Expand All @@ -74,14 +65,9 @@ public static void doDiagnostics(DOMDocument document, XMLEntityResolver entityR
}

final LSPErrorReporterForXML reporter = new LSPErrorReporterForXML(document, diagnostics);
boolean externalDTDValid = checkExternalDTD(document, reporter, configuration);
SAXParser parser = new SAXParser(configuration);
// Add LSP error reporter to fill LSP diagnostics from Xerces errors
parser.setProperty("http://apache.org/xml/properties/internal/error-reporter", reporter);
parser.setFeature("http://apache.org/xml/features/continue-after-fatal-error", false); //$NON-NLS-1$
parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true /* document.hasNamespaces() */); //$NON-NLS-1$
parser.setFeature("http://xml.org/sax/features/namespaces", true /* document.hasNamespaces() */); //$NON-NLS-1$

SAXParser parser = new LSPSAXParser(document, reporter, configuration, grammarPool);

// Add LSP content handler to stop XML parsing if monitor is canceled.
parser.setContentHandler(new LSPContentHandler(monitor));

Expand All @@ -99,9 +85,7 @@ public static void doDiagnostics(DOMDocument document, XMLEntityResolver entityR
} else {
hasGrammar = false; // validation for Schema was disabled
}

parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", externalDTDValid);
parser.setFeature("http://xml.org/sax/features/validation", hasGrammar && externalDTDValid); //$NON-NLS-1$
parser.setFeature("http://xml.org/sax/features/validation", hasGrammar); //$NON-NLS-1$

// Parse XML
String content = document.getText();
Expand Down Expand Up @@ -148,81 +132,6 @@ private static boolean isDisableOnlyDTDValidation(DOMDocument document) {
return !docType.getChildren().stream().anyMatch(node -> node.isDTDElementDecl() || node.isDTDAttListDecl());
}

/**
* Returns true if the given document has a valid DTD (or doesn't define a DTD)
* and false otherwise.
*
* @param document the DOM document
* @param reporter the reporter
* @param configuration the configuration
* @return true if the given document has a valid DTD (or doesn't define a DTD)
* and false otherwise.
*/
private static boolean checkExternalDTD(DOMDocument document, LSPErrorReporterForXML reporter,
XMLParserConfiguration configuration) {
if (!document.hasDTD()) {
return true;
}
DOMDocumentType docType = document.getDoctype();
if (docType.getKindNode() == null) {
return true;
}

// When XML is bound with a DTD path which doesn't exist, Xerces throws an
// IOException which breaks the validation of XML syntax instead of reporting it
// (like XML Schema). Here we parse only the
// DOCTYPE to catch this error. If there is an error
// the next validation with be disabled by using
// http://xml.org/sax/features/validation &
// http://apache.org/xml/features/nonvalidating/load-external-dtd (disable uses
// of DTD for validation)

// Parse only the DOCTYPE of the DOM document

int end = document.getDoctype().getEnd();
String xml = document.getText().substring(0, end);
xml += "<root/>";
try {

// Customize the entity manager to collect the error when DTD doesn't exist.
XMLEntityManager entityManager = new XMLEntityManager() {
@Override
public String setupCurrentEntity(String name, XMLInputSource xmlInputSource, boolean literal,
boolean isExternal) throws IOException, XNIException {
// Catch the setupCurrentEntity method which throws an IOException when DTD is
// not found
try {
return super.setupCurrentEntity(name, xmlInputSource, literal, isExternal);
} catch (IOException e) {
// Report the DTD invalid error
try {
Range range = new Range(document.positionAt(docType.getSystemIdNode().getStart()),
document.positionAt(docType.getSystemIdNode().getEnd()));
reporter.addDiagnostic(range,
MessageFormat.format(DTD_NOT_FOUND, xmlInputSource.getSystemId()),
DiagnosticSeverity.Error, DTDErrorCode.dtd_not_found.getCode());
} catch (BadLocationException e1) {
// Do nothing
}
throw e;
}
}
};
entityManager.reset(configuration);

SAXParser parser = new SAXParser(configuration);
parser.setProperty("http://apache.org/xml/properties/internal/entity-manager", entityManager);
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", true);

parseXML(xml, document.getDocumentURI(), parser);
} catch (SAXException | CancellationException exception) {
// ignore error
} catch (IOException e) {
return false;
}
return true;
}

/**
* Warn if XML document is not bound to a grammar according the settings
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,18 @@ public String reportError(XMLLocator location, String domain, String key, Object
return null;
}

if (severity == SEVERITY_FATAL_ERROR && !fContinueAfterFatalError) {
if (severity == SEVERITY_FATAL_ERROR && !fContinueAfterFatalError && !isIgnoreFatalError(key)) {
XMLParseException parseException = (exception != null) ? new XMLParseException(location, message, exception)
: new XMLParseException(location, message);
throw parseException;
}
return message;
}

protected boolean isIgnoreFatalError(String key) {
return false;
}

public boolean addDiagnostic(Range adjustedRange, String message, DiagnosticSeverity severity, String key) {
Diagnostic d = new Diagnostic(adjustedRange, message, severity, source, key);
if (diagnostics.contains(d)) {
Expand Down
Loading