Skip to content

Commit

Permalink
.ent and .mod files should be parsed as DTDs
Browse files Browse the repository at this point in the history
See #380

Signed-off-by: azerr <[email protected]>
  • Loading branch information
angelozerr committed Jul 17, 2019
1 parent 2c6d46a commit 04eed67
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,13 @@ public class DOMUtils {

private static final String XSD_EXTENSION = ".xsd";

// DTD file extensions
private static final String DTD_EXTENSION = ".dtd";

private static final String ENT_EXTENSION = ".ent";

private static final String MOD_EXTENSION = ".mod";

private static final String HTTP_WWW_W3_ORG_2001_XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema";

private static final String URN_OASIS_NAMES_TC_ENTITY_XMLNS_XML_CATALOG_NS = "urn:oasis:names:tc:entity:xmlns:xml:catalog";
Expand All @@ -27,7 +32,7 @@ private DOMUtils() {
* @return true if the XML document is a XML Schema and false otherwise.
*/
public static boolean isXSD(DOMDocument document) {
if(document == null) {
if (document == null) {
return false;
}
String uri = document.getDocumentURI();
Expand Down Expand Up @@ -81,6 +86,7 @@ private static boolean checkRootNamespace(DOMDocument document, String namespace
* @return true if the given URI is a DTD and false otherwise.
*/
public static boolean isDTD(String uri) {
return uri != null && uri.endsWith(DTD_EXTENSION);
return uri != null
&& (uri.endsWith(DTD_EXTENSION) || uri.endsWith(ENT_EXTENSION) || uri.endsWith(MOD_EXTENSION));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ public void testLargeFileWithScanner() {
while (token != TokenType.EOS) {
token = scanner.scan();
}
System.err.println("Parsed 'largeFile.xml' with XMLScanner in " + (System.currentTimeMillis() - start) + " ms.");
System.err
.println("Parsed 'largeFile.xml' with XMLScanner in " + (System.currentTimeMillis() - start) + " ms.");
}

@Test
Expand All @@ -41,7 +42,7 @@ public void testLargeFileWithDocument() {
DOMDocument xmlDocument = DOMParser.getInstance().parse(document, null);
System.err.println("Parsed 'largeFile.xml' with XMLParser in " + (System.currentTimeMillis() - start) + " ms.");
}

@Test
public void testBigLargeFileWithScanner() {
InputStream in = DOMDocumentTest.class.getResourceAsStream("/xml/nasa.xml");
Expand Down Expand Up @@ -155,10 +156,8 @@ public void findElementListWithXPath() throws XPathExpressionException {

@Test
public void testUsesSchemaTrue1WithNamespace() {
String text =
"<root\n" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
" xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 testXSD.xsd\"> </root> ";
String text = "<root\n" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 testXSD.xsd\"> </root> ";
TextDocument textDocument = new TextDocument(text, "/home/nikolas/testXML.xml");
DOMDocument d = DOMParser.getInstance().parse(text, textDocument.getUri(), null);
Assert.assertTrue(d.hasSchemaInstancePrefix());
Expand All @@ -167,10 +166,8 @@ public void testUsesSchemaTrue1WithNamespace() {

@Test
public void testUsesSchemaTrue2WithNamespace() {
String text =
"<root\n" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
" xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 nested/testXSD.xsd\"> </root> ";
String text = "<root\n" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 nested/testXSD.xsd\"> </root> ";
TextDocument textDocument = new TextDocument(text, "/home/nikolas/testXML.xml");
DOMDocument d = DOMParser.getInstance().parse(text, textDocument.getUri(), null);
Assert.assertTrue(d.hasSchemaInstancePrefix());
Expand All @@ -179,10 +176,8 @@ public void testUsesSchemaTrue2WithNamespace() {

@Test
public void testUsesSchemaTrue3WithNamespace() {
String text =
"<root\n" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
" xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 file:///home/nikolas/nested/testXSD.xsd\"> </root> ";
String text = "<root\n" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 file:///home/nikolas/nested/testXSD.xsd\"> </root> ";
TextDocument textDocument = new TextDocument(text, "/home/nikolas/testXML.xml");
DOMDocument d = DOMParser.getInstance().parse(text, textDocument.getUri(), null);
Assert.assertTrue(d.hasSchemaInstancePrefix());
Expand All @@ -191,10 +186,8 @@ public void testUsesSchemaTrue3WithNamespace() {

@Test
public void testUsesSchemaTrue1NoNamespace() {
String text =
"<root\n" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
" xsi:noNamespaceSchemaLocation=\"testXSD.xsd\"> </root> ";
String text = "<root\n" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:noNamespaceSchemaLocation=\"testXSD.xsd\"> </root> ";
TextDocument textDocument = new TextDocument(text, "/home/nikolas/testXML.xml");
DOMDocument d = DOMParser.getInstance().parse(text, textDocument.getUri(), null);
Assert.assertTrue(d.hasSchemaInstancePrefix());
Expand All @@ -203,10 +196,8 @@ public void testUsesSchemaTrue1NoNamespace() {

@Test
public void testUsesSchemaTrue2NoNamespace() {
String text =
"<root\n" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
" xsi:noNamespaceSchemaLocation=\"nested/testXSD.xsd\"> </root> ";
String text = "<root\n" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:noNamespaceSchemaLocation=\"nested/testXSD.xsd\"> </root> ";
TextDocument textDocument = new TextDocument(text, "/home/nikolas/testXML.xml");
DOMDocument d = DOMParser.getInstance().parse(text, textDocument.getUri(), null);
Assert.assertTrue(d.hasSchemaInstancePrefix());
Expand All @@ -215,10 +206,8 @@ public void testUsesSchemaTrue2NoNamespace() {

@Test
public void testUsesSchemaTrue3NoNamespace() {
String text =
"<root\n" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
" xsi:noNamespaceSchemaLocation=\"file:///home/nikolas/nested/testXSD.xsd\"> </root> ";
String text = "<root\n" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:noNamespaceSchemaLocation=\"file:///home/nikolas/nested/testXSD.xsd\"> </root> ";
TextDocument textDocument = new TextDocument(text, "/home/nikolas/testXML.xml");
DOMDocument d = DOMParser.getInstance().parse(text, textDocument.getUri(), null);
Assert.assertTrue(d.hasSchemaInstancePrefix());
Expand All @@ -227,51 +216,86 @@ public void testUsesSchemaTrue3NoNamespace() {

@Test
public void testUsesSchemaFalseWithNamespace() {
String text =
"<root\n" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
" xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 testXSD.xsd\"> </root> ";
String text = "<root\n" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 testXSD.xsd\"> </root> ";
TextDocument textDocument = new TextDocument(text, "/home/nikolas/testXML.xml");
DOMDocument d = DOMParser.getInstance().parse(text, textDocument.getUri(), null);
Assert.assertTrue(d.hasSchemaInstancePrefix());
Assert.assertFalse(d.usesSchema("/home/NOT_NIKOLAS/testXSD.xsd")); //bad path
Assert.assertFalse(d.usesSchema("/home/NOT_NIKOLAS/testXSD.xsd")); // bad path
}

@Test
public void testUsesSchemaFalseNoNamespace() {
String text =
"<root\n" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
" xsi:noNamespaceSchemaLocation=\"nested/testXSD.xsd\"> </root> ";
String text = "<root\n" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:noNamespaceSchemaLocation=\"nested/testXSD.xsd\"> </root> ";
TextDocument textDocument = new TextDocument(text, "/home/nikolas/testXML.xml");
DOMDocument d = DOMParser.getInstance().parse(text, textDocument.getUri(), null);
Assert.assertTrue(d.hasSchemaInstancePrefix());
Assert.assertFalse(d.usesSchema("/home/NOT_NIKOLAS/nested/testXSD.xsd")); //bad path
Assert.assertFalse(d.usesSchema("/home/NOT_NIKOLAS/nested/testXSD.xsd")); // bad path
}

@Test
public void testUsesSchemaTrueAbsolutePath() {
String text =
"<root\n" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
" xsi:noNamespaceSchemaLocation=\"/home/nikolas/nested/testXSD.xsd\"> </root> ";
String text = "<root\n" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:noNamespaceSchemaLocation=\"/home/nikolas/nested/testXSD.xsd\"> </root> ";
TextDocument textDocument = new TextDocument(text, "/home/nikolas/testXML.xml");
DOMDocument d = DOMParser.getInstance().parse(text, textDocument.getUri(), null);
Assert.assertTrue(d.hasSchemaInstancePrefix());
Assert.assertTrue(d.usesSchema("/home/nikolas/nested/testXSD.xsd")); //bad path
Assert.assertTrue(d.usesSchema("/home/nikolas/nested/testXSD.xsd")); // bad path
}

@Test
public void testNoNamespaceSchemaLocationAndShemaLocationBoth() {
String text = "<root\n" + //
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" + //
" xsi:noNamespaceSchemaLocation=\"/home/nikolas/nested/testXSD.xsd\"" + //
" xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 testXSD.xsd\"" + //
">" + //
" </root> ";
" xsi:noNamespaceSchemaLocation=\"/home/nikolas/nested/testXSD.xsd\"" + //
" xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 testXSD.xsd\"" + //
">" + //
" </root> ";
TextDocument textDocument = new TextDocument(text, "/home/test.xml");
DOMDocument d = DOMParser.getInstance().parse(text, textDocument.getUri(), null);
Assert.assertNotNull(d.getNoNamespaceSchemaLocation());
Assert.assertNotNull(d.getSchemaLocation());
}

@Test
public void testDOMAsDTD() {
String content = "<!ELEMENT";

// .xml file extension
DOMDocument xml = DOMParser.getInstance().parse(content, "test.xml", null);
Assert.assertFalse(xml.isDTD());
DOMNode element = xml.getChild(0);
Assert.assertTrue(element.isElement());

// .unknown file extension
DOMDocument unknown = DOMParser.getInstance().parse(content, "test.unknown", null);
Assert.assertFalse(unknown.isDTD());
DOMNode unknownElement = unknown.getChild(0);
Assert.assertTrue(unknownElement.isElement());

// .dtd file extension
DOMDocument dtd = DOMParser.getInstance().parse(content, "test.dtd", null);
Assert.assertTrue(dtd.isDTD());
DOMNode dtdDocType = dtd.getChild(0);
Assert.assertTrue(dtdDocType.isDoctype());
DOMNode dtdElementDecl = dtdDocType.getChild(0);
Assert.assertTrue(dtdElementDecl.isDTDElementDecl());

// .ent file extension
DOMDocument ent = DOMParser.getInstance().parse(content, "test.ent", null);
Assert.assertTrue(ent.isDTD());
DOMNode entDocType = ent.getChild(0);
Assert.assertTrue(entDocType.isDoctype());
DOMNode entElementDecl = entDocType.getChild(0);
Assert.assertTrue(entElementDecl.isDTDElementDecl());

// .mod file extension
DOMDocument mod = DOMParser.getInstance().parse(content, "test.mod", null);
Assert.assertTrue(mod.isDTD());
DOMNode modDocType = mod.getChild(0);
Assert.assertTrue(modDocType.isDoctype());
DOMNode modElemmodDecl = modDocType.getChild(0);
Assert.assertTrue(modElemmodDecl.isDTDElementDecl());
}
}

0 comments on commit 04eed67

Please sign in to comment.