Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-61441: XML entity expansion limitation #9265

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion Include/pyexpat.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,17 @@

/* note: you must import expat.h before importing this module! */

#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.1"
#include "expat.h"

#define PyExpat_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)

#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.2"
#define PyExpat_CAPSULE_NAME "pyexpat.expat_CAPI"

#if PyExpat_COMBINED_VERSION < 20300
enum XML_Option {};
#endif

struct PyExpat_CAPI
{
char* magic; /* set to PyExpat_CAPI_MAGIC */
Expand Down Expand Up @@ -50,6 +58,9 @@ struct PyExpat_CAPI
void *encodingHandlerData, const XML_Char *name, XML_Encoding *info);
/* might be none for expat < 2.1.0 */
int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt);
/* expat >= 2.3.0 */
enum XML_Status (*SetOption)(XML_Parser parser, enum XML_Option option, void *value);
enum XML_Status (*GetOption)(XML_Parser parser, enum XML_Option option, void *rvalue);
/* always add new stuff to the end! */
};

68 changes: 67 additions & 1 deletion Lib/test/test_sax.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
XMLFilterBase, prepare_input_source
from xml.sax.expatreader import create_parser
from xml.sax.handler import feature_namespaces, feature_external_ges
from xml.sax.handler import feature_huge_xml
from xml.sax.handler import ErrorHandler
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from io import BytesIO, StringIO
import codecs
Expand All @@ -32,6 +34,10 @@
except UnicodeEncodeError:
raise unittest.SkipTest("filename is not encodable to utf8")

TEST_ENTITYTOOLARGE = findfile("entitytoolarge.xml", subdir="xmltestdata")
TEST_EXPANSIONLIMIT = findfile("expansionlimit.xml", subdir="xmltestdata")
TEST_RECURSIONLIMIT = findfile("nestinglimit.xml", subdir="xmltestdata")

supports_nonascii_filenames = True
if not os.path.supports_unicode_filenames:
try:
Expand Down Expand Up @@ -1311,6 +1317,65 @@ def test_nsattrs_wattr(self):
self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")


class NullSink(StringIO):
def write(self, *args):
"""/dev/null write"""
pass


class XmlEntityExpansion(unittest.TestCase):

def get_parser(self, huge_xml=None):
result = NullSink()
handler = XMLGenerator(result, 'utf-8')
parser = create_parser()
parser.setContentHandler(handler)
parser.setErrorHandler(ErrorHandler())
if huge_xml is not None:
parser.setFeature(feature_huge_xml, huge_xml)
return parser

def check_parse(self, source, huge_xml=None):
parser = self.get_parser(huge_xml)
parser.parse(source)

def test_entitytoolarge(self):
header = "<!DOCTYPE he [<!ELEMENT he (#PCDATA)*><!ENTITY e '"
entity = "0123456789" * 100
footer = "'>]><he>&e;</he>"

parser = self.get_parser()
parser.feed(header)
# feed 1MB + 1 byte as entity text
for i in range(1000):
parser.feed(entity)
parser.feed('-')

with self.assertRaisesRegex(SAXParseException,
"entity text is too large"):
parser.feed(footer, True)

parser = self.get_parser(True)
parser.feed(header)
# feed 1MB + 1 byte as entity text
for i in range(1000):
parser.feed(entity)
parser.feed('-')
parser.feed(footer, True)

def test_expansionlimit(self):
with self.assertRaisesRegex(SAXParseException,
"entity expansion limit reached"):
self.check_parse(TEST_EXPANSIONLIMIT)
self.check_parse(TEST_EXPANSIONLIMIT, True)

def test_recursionlimit(self):
with self.assertRaisesRegex(SAXParseException,
"entity nesting limit reached"):
self.check_parse(TEST_RECURSIONLIMIT)
self.check_parse(TEST_EXPANSIONLIMIT, True)


def test_main():
run_unittest(MakeParserTest,
ParseTest,
Expand All @@ -1323,7 +1388,8 @@ def test_main():
StreamReaderWriterXmlgenTest,
ExpatReaderTest,
ErrorReportingTest,
XmlReaderTest)
XmlReaderTest,
XmlEntityExpansion)

if __name__ == "__main__":
test_main()
58 changes: 58 additions & 0 deletions Lib/test/xmltestdata/expansionlimit.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<!DOCTYPE he [
<!ELEMENT he (#PCDATA)*>
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
<!ENTITY b "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP">]>
<he>
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
</he>
7 changes: 7 additions & 0 deletions Lib/test/xmltestdata/nestinglimit.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!DOCTYPE he [
<!ELEMENT he (#PCDATA)*>
<!ENTITY e1 '&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;'>
<!ENTITY e2 '&e3;&e3;&e3;&e3;&e3;'>
<!ENTITY e3 'entity'>
]>
<he>&e1;</he>
2 changes: 2 additions & 0 deletions Lib/xml/dom/expatbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ def getParser(self):
self._parser.buffer_text = True
self._parser.ordered_attributes = True
self._parser.specified_attributes = True
if self._options.huge_xml is not None:
self._parser.huge_entites = self._options.huge_xml
self.install(self._parser)
return self._parser

Expand Down
3 changes: 3 additions & 0 deletions Lib/xml/dom/xmlbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ class Options:
errorHandler = None
filter = None

# None: keep default, True: disable entity expansion protection
huge_xml = None


class DOMBuilder:
entityResolver = None
Expand Down
8 changes: 8 additions & 0 deletions Lib/xml/etree/ElementTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1626,6 +1626,14 @@ def close(self):
del self.parser, self._parser
del self.target, self._target

@property
def huge_xml(self):
return self._parser.huge_xml

@huge_xml.setter
def huge_xml(self, value):
self._parser.huge_xml = value


# Import the C accelerators
try:
Expand Down
11 changes: 9 additions & 2 deletions Lib/xml/sax/expatreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from xml.sax.handler import feature_validation, feature_namespaces
from xml.sax.handler import feature_namespace_prefixes
from xml.sax.handler import feature_external_ges, feature_external_pes
from xml.sax.handler import feature_string_interning
from xml.sax.handler import feature_string_interning, feature_huge_xml

from xml.sax.handler import property_xml_string, property_interning_dict

# xml.parsers.expat does not raise ImportError in Jython
Expand Down Expand Up @@ -97,6 +98,7 @@ def __init__(self, namespaceHandling=0, bufsize=2**16-20):
self._entity_stack = []
self._external_ges = 0
self._interning = None
self._huge_xml = None

# XMLReader methods

Expand Down Expand Up @@ -137,6 +139,8 @@ def getFeature(self, name):
return 0
elif name == feature_external_ges:
return self._external_ges
elif name == feature_huge_xml:
return self._parser.huge_xml
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)

def setFeature(self, name, state):
Expand All @@ -153,6 +157,8 @@ def setFeature(self, name, state):
self._interning = {}
else:
self._interning = None
elif name == feature_huge_xml:
self._huge_xml = bool(state)
elif name == feature_validation:
if state:
raise SAXNotSupportedException(
Expand Down Expand Up @@ -285,7 +291,8 @@ def reset(self):
intern = self._interning)
self._parser.StartElementHandler = self.start_element
self._parser.EndElementHandler = self.end_element

if self._huge_xml is not None:
self._parser.huge_xml = self._huge_xml
self._reset_cont_handler()
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
self._parser.NotationDeclHandler = self.notation_decl
Expand Down
9 changes: 8 additions & 1 deletion Lib/xml/sax/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,19 @@ def resolveEntity(self, publicId, systemId):
# DTD subset.
# access: (parsing) read-only; (not parsing) read/write

feature_huge_xml = "http://python.org/sax/features/huge-xml"
# true: Allow XML files with huge entities and DTD
# false: Protect against DoS attacks like entity expansion (billion laughs)
# access: (parsing) read-only; (not parsing) read/write


all_features = [feature_namespaces,
feature_namespace_prefixes,
feature_string_interning,
feature_validation,
feature_external_ges,
feature_external_pes]
feature_external_pes,
feature_huge_xml]


#============================================================================
Expand Down
41 changes: 40 additions & 1 deletion Modules/_elementtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -3709,6 +3709,37 @@ xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
return PyObject_GenericGetAttr((PyObject*) self, nameobj);
}

static PyObject*
xmlparser_huge_xml_getter(XMLParserObject *self, void *closure)
{
if (EXPAT(GetOption) != NULL) {
XML_Bool hx = XML_FALSE;
if (EXPAT(GetOption)(self->parser, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) {
PyErr_SetString(PyExc_RuntimeError, "Failed to get option value");
return NULL;
}
return PyBool_FromLong((long)hx);
} else {
Py_RETURN_NONE;
}
}

static int
xmlparser_huge_xml_setter(XMLParserObject *self, PyObject *value, void *closure)
{
if (EXPAT(SetOption) != NULL) {
XML_Bool hx = PyObject_IsTrue(value) ? XML_TRUE : XML_FALSE;
if (EXPAT(SetOption)(self->parser, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) {
PyErr_SetString(PyExc_RuntimeError, "Failed to set option");
return -1;
}
return 0;
} else {
PyErr_SetString(PyExc_ValueError, "expat version doesn't support huge XML limit");
return -1;
}
}

#include "clinic/_elementtree.c.h"

static PyMethodDef element_methods[] = {
Expand Down Expand Up @@ -3874,6 +3905,14 @@ static PyMethodDef xmlparser_methods[] = {
{NULL, NULL}
};

static PyGetSetDef xmlparser_getsetlist[] = {
{"huge_xml",
(getter)xmlparser_huge_xml_getter,
(setter)xmlparser_huge_xml_setter,
"Allow huge entities and disable entity expansion protection"},
{NULL},
};

static PyTypeObject XMLParser_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Expand Down Expand Up @@ -3904,7 +3943,7 @@ static PyTypeObject XMLParser_Type = {
0, /* tp_iternext */
xmlparser_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
xmlparser_getsetlist, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
Expand Down
Loading