Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parser when not replacing entities and treating char references as entities #176

Merged
merged 3 commits into from
Nov 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/main/java/com/ctc/wstx/sr/BasicStreamReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -2834,7 +2834,7 @@ && readCDataSecondary(mCfgLazyParsing
/* Need to call different methods based on whether we can do
* automatic entity expansion or not:
*/
int ch = mCfgReplaceEntities ?
int ch = mCfgReplaceEntities || mCfgTreatCharRefsAsEntities ?
fullyResolveEntity(true) : resolveCharOnlyEntity(true);

if (ch != 0) {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/ctc/wstx/sr/StreamScanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -1639,7 +1639,7 @@ protected EntityDecl expandEntity(String id, boolean allowExt,
return null;
}

if (!mCfgTreatCharRefsAsEntities || this instanceof MinimalDTDReader) {
if (mCfgReplaceEntities || !mCfgTreatCharRefsAsEntities || this instanceof MinimalDTDReader) {
expandEntity(ed, allowExt);
}

Expand Down
99 changes: 99 additions & 0 deletions src/test/java/stax2/stream/TestExternalEntityRef.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package stax2.stream;

import com.ctc.wstx.api.WstxInputProperties;
import org.codehaus.stax2.XMLInputFactory2;
import org.codehaus.stax2.XMLStreamReader2;
import stax2.BaseStax2Test;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLResolver;
import javax.xml.stream.XMLStreamException;

/**
* Test entities in round-trip mode
*
* @author Guillaume Nodet
*/
public class TestExternalEntityRef
extends BaseStax2Test
{
private static final String TEST_BASIC = "<project>" +
"<?foo?>" +
"<name>&oelig;</name>" +
"</project>";

public void testEntityRef()
throws XMLStreamException {

XMLInputFactory2 f = getInputFactory();
f.configureForRoundTripping();
XMLStreamReader2 sr = constructStreamReader(f, TEST_BASIC);

assertEquals(START_ELEMENT, sr.next());
assertEquals("project", sr.getLocalName());
assertEquals(PROCESSING_INSTRUCTION, sr.next());
assertEquals("foo", sr.getPITarget());
assertEquals(START_ELEMENT, sr.next());
assertEquals("name", sr.getLocalName());
assertEquals(ENTITY_REFERENCE, sr.next());
assertEquals("oelig", sr.getLocalName());
}

private static final String TEST_DTD = "<!DOCTYPE foo [\n" +
" <!ENTITY desc SYSTEM \"file:desc.xml\">\n" +
" ]>\n" +
"<project>" +
"&desc;" +
"</project>";

public void testWithDtd()
throws XMLStreamException {

XMLInputFactory2 f = getInputFactory();
f.configureForRoundTripping();
XMLStreamReader2 sr = constructStreamReader(f, TEST_DTD);

assertEquals(DTD, sr.next());
assertEquals(SPACE, sr.next());
assertEquals(START_ELEMENT, sr.next());
assertEquals("project", sr.getLocalName());
assertEquals(ENTITY_REFERENCE, sr.next());
assertEquals("desc", sr.getLocalName());
assertEquals(END_ELEMENT, sr.next());
assertEquals("project", sr.getLocalName());
}

public void testWithDtdExpand()
throws XMLStreamException {

XMLInputFactory2 f = getInputFactory();
f.configureForRoundTripping();
f.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true);
f.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, true);
f.setProperty(WstxInputProperties.P_ENTITY_RESOLVER, new XMLResolver() {
@Override
public Object resolveEntity(String publicID, String systemID, String baseURI, String namespace) throws XMLStreamException {
if ("file:desc.xml".equals(systemID)) {
return "<?xml version='1.0' encoding='UTF-8'?><description>foo</description>";
}
return null;
}
});
XMLStreamReader2 sr = constructStreamReader(f, TEST_DTD);

assertEquals(DTD, sr.next());
assertEquals(SPACE, sr.next());
assertEquals(START_ELEMENT, sr.next());
assertEquals("project", sr.getLocalName());
assertEquals(ENTITY_REFERENCE, sr.next());
assertEquals("desc", sr.getLocalName());
assertEquals(START_ELEMENT, sr.next());
assertEquals("description", sr.getLocalName());
assertEquals(CHARACTERS, sr.next());
assertEquals("foo", sr.getText());
assertEquals(END_ELEMENT, sr.next());
assertEquals("description", sr.getLocalName());
assertEquals(END_ELEMENT, sr.next());
assertEquals("project", sr.getLocalName());
}
}
94 changes: 93 additions & 1 deletion src/test/java/wstxtest/stream/TestTreatCharRefAsEnts.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public void testReturnCharsReference() throws Exception
assertEquals("& more", sr.getText());
}

public void testReturnCharsReferenceWithHighMinTextSegment() throws Exception
public void testReturnEntityForCharReferenceWithHighMinTextSegment() throws Exception
{
String XML = "<root>text &amp; more</root>";

Expand All @@ -95,6 +95,98 @@ public void testReturnCharsReferenceWithHighMinTextSegment() throws Exception
assertEquals("text & more", sr.getText());
}

public void testReturnCharsReferenceWithHighMinTextSegment() throws Exception
{
String XML = "<root>text &amp; more</root>";

// 64 is the default
BasicStreamReader sr = getReader(XML, true, false, 64);

assertTokenType(START_ELEMENT, sr.next());
assertEquals("root", sr.getLocalName());

assertTokenType(CHARACTERS, sr.next());

assertEquals("text & more", sr.getText());
}

public void testNoReplEntitiesAndReturnEntityForCharReference() throws Exception
{

String XML = "<root>text &amp; more</root>";

BasicStreamReader sr = getReader(XML, false, true, 1);

assertTokenType(START_ELEMENT, sr.next());
assertEquals("root", sr.getLocalName());

assertTokenType(CHARACTERS, sr.next());

assertEquals("text ", sr.getText());

assertTokenType(ENTITY_REFERENCE, sr.next());
assertEquals("amp", sr.getLocalName());
EntityDecl ed = sr.getCurrentEntityDecl();
assertNotNull(ed);
assertEquals("amp", ed.getName());
assertEquals("&", ed.getReplacementText());

// The pure stax way:
assertEquals("&", sr.getText());

// Finally, let's see that location info is about right?
Location loc = sr.getCurrentLocation();
assertNotNull(loc);
assertEquals(16, loc.getCharacterOffset());
}

public void testNoReplEntitiesAndReturnCharsReference() throws Exception
{
String XML = "<root>text &amp; more</root>";

// 64 is the default
BasicStreamReader sr = getReader(XML, false, false, 1);

assertTokenType(START_ELEMENT, sr.next());
assertEquals("root", sr.getLocalName());

assertTokenType(CHARACTERS, sr.next());
assertEquals("text ", sr.getText());

assertTokenType(CHARACTERS, sr.next());
assertEquals("& more", sr.getText());
}

public void testNoReplEntitiesAndReturnEntityForCharReferenceWithHighMinTextSegment() throws Exception
{
String XML = "<root>text &amp; more</root>";

// 64 is the default
BasicStreamReader sr = getReader(XML, false, true, 64);

assertTokenType(START_ELEMENT, sr.next());
assertEquals("root", sr.getLocalName());

assertTokenType(CHARACTERS, sr.next());

assertEquals("text & more", sr.getText());
}

public void testNoReplEntitiesAndReturnCharsReferenceWithHighMinTextSegment() throws Exception
{
String XML = "<root>text &amp; more</root>";

// 64 is the default
BasicStreamReader sr = getReader(XML, false, false, 64);

assertTokenType(START_ELEMENT, sr.next());
assertEquals("root", sr.getLocalName());

assertTokenType(CHARACTERS, sr.next());

assertEquals("text & more", sr.getText());
}

private BasicStreamReader getReader(String contents, boolean replEntities,
boolean treatCharRefsAsEnts, int minTextSegment)
throws XMLStreamException
Expand Down