Skip to content

Commit

Permalink
When DTD is not found, XML is not checked for well-formedness
Browse files Browse the repository at this point in the history
  • Loading branch information
angelozerr authored and fbricon committed Jul 17, 2019
1 parent 962510d commit 0c41043
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,14 @@ public class DOMDocumentType extends DTDDeclNode implements org.w3c.dom.Document
public enum DocumentTypeKind {
PUBLIC, SYSTEM, INVALID
}

DTDDeclParameter name;
DTDDeclParameter kind; // SYSTEM || PUBLIC
DTDDeclParameter publicId;
DTDDeclParameter systemId;
DTDDeclParameter internalSubset;

private String content; // |<!DOCTYPE ... >|
//private String unrecognizedParameters;

public DOMDocumentType(int start, int end, DOMDocument ownerDocument) {
super(start, end, ownerDocument);
Expand All @@ -55,12 +54,23 @@ void setName(int start, int end) {
}

/**
* @return the DocumentTypeKind
* Returns the document type kind (PUBLIC or SYSTEM)
*
* @return the document type kind (PUBLIC or SYSTEM)
*/
public String getKind() {
return kind != null ? kind.getParameter() : null;
}

/**
* Returns the node where document type kind (PUBLIC or SYSTEM) is declared
*
* @return the node where document type kind (PUBLIC or SYSTEM) is declared
*/
public DTDDeclParameter getKindNode() {
return kind;
}

/**
* @param kind the DocumentTypeKind to set
*/
Expand Down Expand Up @@ -106,7 +116,7 @@ public NamedNodeMap getEntities() {
@Override
public String getInternalSubset() {
String subset;
if(internalSubset != null) {
if (internalSubset != null) {
subset = internalSubset.getParameter();
subset = subset.substring(1, subset.length() - 1);
internalSubset.parameter = subset; // Set parameter to a value without '[' and ']'
Expand All @@ -116,7 +126,6 @@ public String getInternalSubset() {
return null;
}


public void setStartInternalSubset(int start) {
internalSubset = addNewParameter(start, start + 1);
}
Expand All @@ -126,9 +135,9 @@ public void setEndInternalSubset(int end) {
}

public boolean isInternalSubset(DTDDeclParameter parameter) {
if(this.internalSubset != null) {
if (this.internalSubset != null) {
return this.internalSubset.equals(parameter);
}
}
return false;
}

Expand Down Expand Up @@ -173,6 +182,10 @@ public String getSystemId() {
return systemId != null ? systemId.getParameter() : null;
}

public DTDDeclParameter getSystemIdNode() {
return systemId;
}

public String getSystemIdWithoutQuotes() {
return systemId != null ? systemId.getParameterWithoutFirstAndLastChar() : null;
}
Expand All @@ -184,28 +197,13 @@ void setSystemId(int start, int end) {
systemId = addNewParameter(start, end);
}

/**
* Removes trailing " characters
*/
private static String cleanURL(String url) {
if (url == null) {
return null;
}
if (url.isEmpty()) {
return url;
}
int start = url.charAt(0) == '\"' ? 1 : 0;
int end = url.charAt(url.length() - 1) == '\"' ? url.length() - 1 : url.length();
return url.substring(start, end);
}

/**
* Returns a substring of the whole document.
*
*
* Since offset values are relative to 'this.start' we need to
* subtract getStart() to make them relative to 'content'
*/
* Since offset values are relative to 'this.start' we need to subtract
* getStart() to make them relative to 'content'
*/
public String getSubstring(int start, int end) {
return getContent().substring(start - getStart(), end - getStart());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,19 @@ public abstract class DOMNode implements Node {

DOMNode parent;

private static final NodeList EMPTY_CHILDREN = new NodeList() {

@Override
public Node item(int index) {
return null;
}

@Override
public int getLength() {
return 0;
}
};

static class XMLNodeList<T extends DOMNode> extends ArrayList<T> implements NodeList {

private static final long serialVersionUID = 1L;
Expand Down Expand Up @@ -585,7 +598,7 @@ public NamedNodeMap getAttributes() {
*/
@Override
public NodeList getChildNodes() {
return children;
return children != null ? children : EMPTY_CHILDREN;
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public class Constants {

public static final Pattern ATTRIBUTE_VALUE_REGEX = Pattern.compile("^(\"[^\"]*\"?)|(\'[^\']*\'?)");

public static final Pattern URL_VALUE_REGEX = Pattern.compile("^(\"|\')[^<>\"]*(\"|\')");
public static final Pattern URL_VALUE_REGEX = Pattern.compile("^(\"[^<>\"]*\")|(\'[^<>\']*\')");

public static final Pattern PROLOG_NAME_OPTIONS = Pattern.compile("^(xml)[\\s<>?]?");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@
*/
public enum DTDErrorCode implements IXMLErrorCode {



AttNameRequiredInAttDef,
AttTypeRequiredInAttDef,
ElementDeclUnterminated,
Expand Down Expand Up @@ -65,8 +63,9 @@ public enum DTDErrorCode implements IXMLErrorCode {
PEReferenceWithinMarkup,
QuoteRequiredInPublicID,
QuoteRequiredInSystemID,
SpaceRequiredAfterSYSTEM;

SpaceRequiredAfterSYSTEM,
dtd_not_found("dtd-not-found");

private final String code;

private DTDErrorCode() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,30 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.MessageFormat;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CancellationException;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.xerces.impl.XMLEntityManager;
import org.apache.xerces.parsers.SAXParser;
import org.apache.xerces.parsers.XIncludeAwareParserConfiguration;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLEntityResolver;
import org.apache.xerces.xni.parser.XMLInputSource;
import org.apache.xerces.xni.parser.XMLParserConfiguration;
import org.eclipse.lsp4j.Diagnostic;
import org.eclipse.lsp4j.DiagnosticSeverity;
import org.eclipse.lsp4j.Position;
import org.eclipse.lsp4j.Range;
import org.eclipse.lsp4j.jsonrpc.CancelChecker;
import org.eclipse.lsp4xml.commons.BadLocationException;
import org.eclipse.lsp4xml.dom.DOMDocument;
import org.eclipse.lsp4xml.dom.DOMDocumentType;
import org.eclipse.lsp4xml.dom.DOMElement;
import org.eclipse.lsp4xml.extensions.contentmodel.participants.DTDErrorCode;
import org.eclipse.lsp4xml.extensions.contentmodel.settings.ContentModelSettings;
import org.eclipse.lsp4xml.extensions.contentmodel.settings.XMLValidationSettings;
import org.eclipse.lsp4xml.services.extensions.diagnostics.LSPContentHandler;
Expand All @@ -49,61 +56,67 @@ public class XMLValidator {

private static final Logger LOGGER = Logger.getLogger(XMLValidator.class.getName());

private static final String DTD_NOT_FOUND = "Cannot find DTD ''{0}''.\nCreate the DTD file or configure an XML catalog for this DTD.";

public static void doDiagnostics(DOMDocument document, XMLEntityResolver entityResolver,
List<Diagnostic> diagnostics, ContentModelSettings contentModelSettings, CancelChecker monitor) {

try {
XMLParserConfiguration configuration = new XIncludeAwareParserConfiguration(); // new
// XMLGrammarCachingConfiguration();
// it should be better to cache XML Schema with XMLGrammarCachingConfiguration,
// but we cannot use
// XMLGrammarCachingConfiguration because cache is done with target namespaces.
// There are conflicts when
// 2 XML Schemas don't define target namespaces.
SAXParser reader = new SAXParser(configuration);
// Add LSP error reporter to fill LSP diagnostics from Xerces errors
reader.setProperty("http://apache.org/xml/properties/internal/error-reporter",
new LSPErrorReporterForXML(document, diagnostics));
reader.setFeature("http://apache.org/xml/features/continue-after-fatal-error", false); //$NON-NLS-1$
reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true /* document.hasNamespaces() */); //$NON-NLS-1$
reader.setFeature("http://xml.org/sax/features/namespaces", true /* document.hasNamespaces() */); //$NON-NLS-1$

// Add LSP content handler to stop XML parsing if monitor is canceled.
reader.setContentHandler(new LSPContentHandler(monitor));
XMLParserConfiguration configuration = new XIncludeAwareParserConfiguration(); // new
// XMLGrammarCachingConfiguration();

if (entityResolver != null) {
reader.setProperty("http://apache.org/xml/properties/internal/entity-resolver", entityResolver); //$NON-NLS-1$
configuration.setProperty("http://apache.org/xml/properties/internal/entity-resolver", entityResolver); //$NON-NLS-1$
}

final LSPErrorReporterForXML reporter = new LSPErrorReporterForXML(document, diagnostics);
boolean externalDTDValid = checkExternalDTD(document, reporter, configuration);

SAXParser parser = new SAXParser(configuration);
// Add LSP error reporter to fill LSP diagnostics from Xerces errors
parser.setProperty("http://apache.org/xml/properties/internal/error-reporter", reporter);
parser.setFeature("http://apache.org/xml/features/continue-after-fatal-error", false); //$NON-NLS-1$
parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true /* document.hasNamespaces() */); //$NON-NLS-1$
parser.setFeature("http://xml.org/sax/features/namespaces", true /* document.hasNamespaces() */); //$NON-NLS-1$

// Add LSP content handler to stop XML parsing if monitor is canceled.
parser.setContentHandler(new LSPContentHandler(monitor));

boolean hasGrammar = document.hasGrammar();

// If diagnostics for Schema preference is enabled
XMLValidationSettings validationSettings = contentModelSettings != null ? contentModelSettings.getValidation() : null;
if((validationSettings == null) || validationSettings.isSchema()) {


// If diagnostics for Schema preference is enabled
XMLValidationSettings validationSettings = contentModelSettings != null
? contentModelSettings.getValidation()
: null;
if ((validationSettings == null) || validationSettings.isSchema()) {

checkExternalSchema(document.getExternalSchemaLocation(), reader);
checkExternalSchema(document.getExternalSchemaLocation(), parser);

reader.setFeature("http://apache.org/xml/features/validation/schema", hasGrammar); //$NON-NLS-1$
parser.setFeature("http://apache.org/xml/features/validation/schema", hasGrammar); //$NON-NLS-1$

// warn if XML document is not bound to a grammar according the settings
warnNoGrammar(document, diagnostics, contentModelSettings);
} else {
hasGrammar = false; //validation for Schema was disabled
hasGrammar = false; // validation for Schema was disabled
}

reader.setFeature("http://xml.org/sax/features/validation", hasGrammar); //$NON-NLS-1$
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", externalDTDValid);
parser.setFeature("http://xml.org/sax/features/validation", hasGrammar && externalDTDValid); //$NON-NLS-1$

// Parse XML
String content = document.getText();
String uri = document.getDocumentURI();
InputSource inputSource = new InputSource();
inputSource.setByteStream(new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8)));
inputSource.setSystemId(uri);
reader.parse(inputSource);
parser.parse(inputSource);

} catch (IOException | SAXException | CancellationException exception) {
exception.printStackTrace();
// ignore error
} catch (CacheResourceDownloadingException e) {
throw e;
Expand All @@ -112,6 +125,82 @@ public static void doDiagnostics(DOMDocument document, XMLEntityResolver entityR
}
}

/**
* Returns true if the given document has a valid DTD (or doesn't define a DTD)
* and false otherwise.
*
* @param document the DOM document
* @param reporter the reporter
* @param configuration the configuration
* @return true if the given document has a valid DTD (or doesn't define a DTD)
* and false otherwise.
*/
private static boolean checkExternalDTD(DOMDocument document, LSPErrorReporterForXML reporter,
XMLParserConfiguration configuration) {
if (!document.hasDTD()) {
return true;
}
DOMDocumentType docType = document.getDoctype();
if (docType.getKindNode() == null) {
return true;
}

// When XML is bound with a DTD path which doesn't exist, Xerces throws an
// IOException which breaks the validation of XML syntax instead of reporting it
// (like XML Schema). Here we parse only the
// DOCTYPE to catch this error. If there is an error
// the next validation with be disabled by using
// http://xml.org/sax/features/validation &
// http://apache.org/xml/features/nonvalidating/load-external-dtd (disable uses
// of DTD for validation)

// Parse only the DOCTYPE of the DOM document

int end = document.getDoctype().getEnd();
String xml = document.getText().substring(0, end);
xml += "<root/>";
try {

// Customize the entity manager to collect the error when DTD doesn't exist.
XMLEntityManager entityManager = new XMLEntityManager() {
@Override
public String setupCurrentEntity(String name, XMLInputSource xmlInputSource, boolean literal,
boolean isExternal) throws IOException, XNIException {
// Catch the setupCurrentEntity method which throws an IOException when DTD is
// not found
try {
return super.setupCurrentEntity(name, xmlInputSource, literal, isExternal);
} catch (IOException e) {
// Report the DTD invalid error
try {
Range range = new Range(document.positionAt(docType.getSystemIdNode().getStart()),
document.positionAt(docType.getSystemIdNode().getEnd()));
reporter.addDiagnostic(range,
MessageFormat.format(DTD_NOT_FOUND, xmlInputSource.getSystemId()),
DiagnosticSeverity.Error, DTDErrorCode.dtd_not_found.getCode());
} catch (BadLocationException e1) {
// Do nothing
}
throw e;
}
}
};
entityManager.reset(configuration);

SAXParser parser = new SAXParser(configuration);
parser.setProperty("http://apache.org/xml/properties/internal/entity-manager", entityManager);
InputSource inputSource = new InputSource();
inputSource.setByteStream(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)));
inputSource.setSystemId(document.getDocumentURI());
parser.parse(inputSource);
} catch (SAXException | CancellationException exception) {
// ignore error
} catch (IOException e) {
return false;
}
return true;
}

/**
* Warn if XML document is not bound to a grammar according the settings
*
Expand Down
Loading

0 comments on commit 0c41043

Please sign in to comment.