Skip to content

Commit

Permalink
bpo-17239: XML entity expansion limitation
Browse files Browse the repository at this point in the history
  • Loading branch information
tiran committed Sep 23, 2018
1 parent 24b447e commit 504e092
Show file tree
Hide file tree
Showing 14 changed files with 633 additions and 29 deletions.
13 changes: 12 additions & 1 deletion Include/pyexpat.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,17 @@

/* note: you must import expat.h before importing this module! */

#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.1"
#include "expat.h"

#define PyExpat_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)

#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.2"
#define PyExpat_CAPSULE_NAME "pyexpat.expat_CAPI"

#if PyExpat_COMBINED_VERSION < 20300
enum XML_Option {};
#endif

struct PyExpat_CAPI
{
char* magic; /* set to PyExpat_CAPI_MAGIC */
Expand Down Expand Up @@ -50,6 +58,9 @@ struct PyExpat_CAPI
void *encodingHandlerData, const XML_Char *name, XML_Encoding *info);
/* might be none for expat < 2.1.0 */
int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt);
/* expat >= 2.3.0 */
enum XML_Status (*SetOption)(XML_Parser parser, enum XML_Option option, void *value);
enum XML_Status (*GetOption)(XML_Parser parser, enum XML_Option option, void *rvalue);
/* always add new stuff to the end! */
};

68 changes: 67 additions & 1 deletion Lib/test/test_sax.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
XMLFilterBase, prepare_input_source
from xml.sax.expatreader import create_parser
from xml.sax.handler import feature_namespaces, feature_external_ges
from xml.sax.handler import feature_huge_xml
from xml.sax.handler import ErrorHandler
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from io import BytesIO, StringIO
import codecs
Expand All @@ -32,6 +34,10 @@
except UnicodeEncodeError:
raise unittest.SkipTest("filename is not encodable to utf8")

TEST_ENTITYTOOLARGE = findfile("entitytoolarge.xml", subdir="xmltestdata")
TEST_EXPANSIONLIMIT = findfile("expansionlimit.xml", subdir="xmltestdata")
TEST_RECURSIONLIMIT = findfile("nestinglimit.xml", subdir="xmltestdata")

supports_nonascii_filenames = True
if not os.path.supports_unicode_filenames:
try:
Expand Down Expand Up @@ -1311,6 +1317,65 @@ def test_nsattrs_wattr(self):
self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")


class NullSink(StringIO):
def write(self, *args):
"""/dev/null write"""
pass


class XmlEntityExpansion(unittest.TestCase):

def get_parser(self, huge_xml=None):
result = NullSink()
handler = XMLGenerator(result, 'utf-8')
parser = create_parser()
parser.setContentHandler(handler)
parser.setErrorHandler(ErrorHandler())
if huge_xml is not None:
parser.setFeature(feature_huge_xml, huge_xml)
return parser

def check_parse(self, source, huge_xml=None):
parser = self.get_parser(huge_xml)
parser.parse(source)

def test_entitytoolarge(self):
header = "<!DOCTYPE he [<!ELEMENT he (#PCDATA)*><!ENTITY e '"
entity = "0123456789" * 100
footer = "'>]><he>&e;</he>"

parser = self.get_parser()
parser.feed(header)
# feed 1MB + 1 byte as entity text
for i in range(1000):
parser.feed(entity)
parser.feed('-')

with self.assertRaisesRegex(SAXParseException,
"entity text is too large"):
parser.feed(footer, True)

parser = self.get_parser(True)
parser.feed(header)
# feed 1MB + 1 byte as entity text
for i in range(1000):
parser.feed(entity)
parser.feed('-')
parser.feed(footer, True)

def test_expansionlimit(self):
with self.assertRaisesRegex(SAXParseException,
"entity expansion limit reached"):
self.check_parse(TEST_EXPANSIONLIMIT)
self.check_parse(TEST_EXPANSIONLIMIT, True)

def test_recursionlimit(self):
with self.assertRaisesRegex(SAXParseException,
"entity nesting limit reached"):
self.check_parse(TEST_RECURSIONLIMIT)
self.check_parse(TEST_EXPANSIONLIMIT, True)


def test_main():
run_unittest(MakeParserTest,
ParseTest,
Expand All @@ -1323,7 +1388,8 @@ def test_main():
StreamReaderWriterXmlgenTest,
ExpatReaderTest,
ErrorReportingTest,
XmlReaderTest)
XmlReaderTest,
XmlEntityExpansion)

if __name__ == "__main__":
test_main()
58 changes: 58 additions & 0 deletions Lib/test/xmltestdata/expansionlimit.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<!DOCTYPE he [
<!ELEMENT he (#PCDATA)*>
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
<!ENTITY b "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP">]>
<he>
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
</he>
7 changes: 7 additions & 0 deletions Lib/test/xmltestdata/nestinglimit.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!DOCTYPE he [
<!ELEMENT he (#PCDATA)*>
<!ENTITY e1 '&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;'>
<!ENTITY e2 '&e3;&e3;&e3;&e3;&e3;'>
<!ENTITY e3 'entity'>
]>
<he>&e1;</he>
2 changes: 2 additions & 0 deletions Lib/xml/dom/expatbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ def getParser(self):
self._parser.buffer_text = True
self._parser.ordered_attributes = True
self._parser.specified_attributes = True
if self._options.huge_xml is not None:
self._parser.huge_entites = self._options.huge_xml
self.install(self._parser)
return self._parser

Expand Down
3 changes: 3 additions & 0 deletions Lib/xml/dom/xmlbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ class Options:
errorHandler = None
filter = None

# None: keep default, True: disable entity expansion protection
huge_xml = None


class DOMBuilder:
entityResolver = None
Expand Down
8 changes: 8 additions & 0 deletions Lib/xml/etree/ElementTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1626,6 +1626,14 @@ def close(self):
del self.parser, self._parser
del self.target, self._target

@property
def huge_xml(self):
return self._parser.huge_xml

@huge_xml.setter
def huge_xml(self, value):
self._parser.huge_xml = value


# Import the C accelerators
try:
Expand Down
11 changes: 9 additions & 2 deletions Lib/xml/sax/expatreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from xml.sax.handler import feature_validation, feature_namespaces
from xml.sax.handler import feature_namespace_prefixes
from xml.sax.handler import feature_external_ges, feature_external_pes
from xml.sax.handler import feature_string_interning
from xml.sax.handler import feature_string_interning, feature_huge_xml

from xml.sax.handler import property_xml_string, property_interning_dict

# xml.parsers.expat does not raise ImportError in Jython
Expand Down Expand Up @@ -97,6 +98,7 @@ def __init__(self, namespaceHandling=0, bufsize=2**16-20):
self._entity_stack = []
self._external_ges = 0
self._interning = None
self._huge_xml = None

# XMLReader methods

Expand Down Expand Up @@ -137,6 +139,8 @@ def getFeature(self, name):
return 0
elif name == feature_external_ges:
return self._external_ges
elif name == feature_huge_xml:
return self._parser.huge_xml
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)

def setFeature(self, name, state):
Expand All @@ -153,6 +157,8 @@ def setFeature(self, name, state):
self._interning = {}
else:
self._interning = None
elif name == feature_huge_xml:
self._huge_xml = bool(state)
elif name == feature_validation:
if state:
raise SAXNotSupportedException(
Expand Down Expand Up @@ -285,7 +291,8 @@ def reset(self):
intern = self._interning)
self._parser.StartElementHandler = self.start_element
self._parser.EndElementHandler = self.end_element

if self._huge_xml is not None:
self._parser.huge_xml = self._huge_xml
self._reset_cont_handler()
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
self._parser.NotationDeclHandler = self.notation_decl
Expand Down
9 changes: 8 additions & 1 deletion Lib/xml/sax/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,19 @@ def resolveEntity(self, publicId, systemId):
# DTD subset.
# access: (parsing) read-only; (not parsing) read/write

feature_huge_xml = "http://python.org/sax/features/huge-xml"
# true: Allow XML files with huge entities and DTD
# false: Protect against DoS attacks like entity expansion (billion laughs)
# access: (parsing) read-only; (not parsing) read/write


all_features = [feature_namespaces,
feature_namespace_prefixes,
feature_string_interning,
feature_validation,
feature_external_ges,
feature_external_pes]
feature_external_pes,
feature_huge_xml]


#============================================================================
Expand Down
41 changes: 40 additions & 1 deletion Modules/_elementtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -3709,6 +3709,37 @@ xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
return PyObject_GenericGetAttr((PyObject*) self, nameobj);
}

static PyObject*
xmlparser_huge_xml_getter(XMLParserObject *self, void *closure)
{
if (EXPAT(GetOption) != NULL) {
XML_Bool hx = XML_FALSE;
if (EXPAT(GetOption)(self->parser, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) {
PyErr_SetString(PyExc_RuntimeError, "Failed to get option value");
return NULL;
}
return PyBool_FromLong((long)hx);
} else {
Py_RETURN_NONE;
}
}

static int
xmlparser_huge_xml_setter(XMLParserObject *self, PyObject *value, void *closure)
{
if (EXPAT(SetOption) != NULL) {
XML_Bool hx = PyObject_IsTrue(value) ? XML_TRUE : XML_FALSE;
if (EXPAT(SetOption)(self->parser, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) {
PyErr_SetString(PyExc_RuntimeError, "Failed to set option");
return -1;
}
return 0;
} else {
PyErr_SetString(PyExc_ValueError, "expat version doesn't support huge XML limit");
return -1;
}
}

#include "clinic/_elementtree.c.h"

static PyMethodDef element_methods[] = {
Expand Down Expand Up @@ -3874,6 +3905,14 @@ static PyMethodDef xmlparser_methods[] = {
{NULL, NULL}
};

static PyGetSetDef xmlparser_getsetlist[] = {
{"huge_xml",
(getter)xmlparser_huge_xml_getter,
(setter)xmlparser_huge_xml_setter,
"Allow huge entities and disable entity expansion protection"},
{NULL},
};

static PyTypeObject XMLParser_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Expand Down Expand Up @@ -3904,7 +3943,7 @@ static PyTypeObject XMLParser_Type = {
0, /* tp_iternext */
xmlparser_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
xmlparser_getsetlist, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
Expand Down
Loading

0 comments on commit 504e092

Please sign in to comment.