diff --git a/Include/pyexpat.h b/Include/pyexpat.h
index 07020b5dc964cb..a3c0d6b2a642ee 100644
--- a/Include/pyexpat.h
+++ b/Include/pyexpat.h
@@ -3,9 +3,17 @@
/* note: you must import expat.h before importing this module! */
-#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.1"
+#include "expat.h"
+
+#define PyExpat_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
+
+#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.2"
#define PyExpat_CAPSULE_NAME "pyexpat.expat_CAPI"
+#if PyExpat_COMBINED_VERSION < 20300
+enum XML_Option {};
+#endif
+
struct PyExpat_CAPI
{
char* magic; /* set to PyExpat_CAPI_MAGIC */
@@ -50,6 +58,9 @@ struct PyExpat_CAPI
void *encodingHandlerData, const XML_Char *name, XML_Encoding *info);
/* might be none for expat < 2.1.0 */
int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt);
+ /* expat >= 2.3.0 */
+ enum XML_Status (*SetOption)(XML_Parser parser, enum XML_Option option, void *value);
+ enum XML_Status (*GetOption)(XML_Parser parser, enum XML_Option option, void *rvalue);
/* always add new stuff to the end! */
};
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
index 3044960a0ed165..a2b006fb7e60d2 100644
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -14,6 +14,8 @@
XMLFilterBase, prepare_input_source
from xml.sax.expatreader import create_parser
from xml.sax.handler import feature_namespaces, feature_external_ges
+from xml.sax.handler import feature_huge_xml
+from xml.sax.handler import ErrorHandler
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from io import BytesIO, StringIO
import codecs
@@ -32,6 +34,10 @@
except UnicodeEncodeError:
raise unittest.SkipTest("filename is not encodable to utf8")
+TEST_ENTITYTOOLARGE = findfile("entitytoolarge.xml", subdir="xmltestdata")
+TEST_EXPANSIONLIMIT = findfile("expansionlimit.xml", subdir="xmltestdata")
+TEST_RECURSIONLIMIT = findfile("nestinglimit.xml", subdir="xmltestdata")
+
supports_nonascii_filenames = True
if not os.path.supports_unicode_filenames:
try:
@@ -1311,6 +1317,65 @@ def test_nsattrs_wattr(self):
self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
+class NullSink(StringIO):
+ def write(self, *args):
+ """/dev/null write"""
+ pass
+
+
+class XmlEntityExpansion(unittest.TestCase):
+
+ def get_parser(self, huge_xml=None):
+ result = NullSink()
+ handler = XMLGenerator(result, 'utf-8')
+ parser = create_parser()
+ parser.setContentHandler(handler)
+ parser.setErrorHandler(ErrorHandler())
+ if huge_xml is not None:
+ parser.setFeature(feature_huge_xml, huge_xml)
+ return parser
+
+ def check_parse(self, source, huge_xml=None):
+ parser = self.get_parser(huge_xml)
+ parser.parse(source)
+
+ def test_entitytoolarge(self):
+ header = "]>&e;"
+
+ parser = self.get_parser()
+ parser.feed(header)
+ # feed 1MB + 1 byte as entity text
+ for i in range(1000):
+ parser.feed(entity)
+ parser.feed('-')
+
+ with self.assertRaisesRegex(SAXParseException,
+ "entity text is too large"):
+ parser.feed(footer, True)
+
+ parser = self.get_parser(True)
+ parser.feed(header)
+ # feed 1MB + 1 byte as entity text
+ for i in range(1000):
+ parser.feed(entity)
+ parser.feed('-')
+ parser.feed(footer, True)
+
+ def test_expansionlimit(self):
+ with self.assertRaisesRegex(SAXParseException,
+ "entity expansion limit reached"):
+ self.check_parse(TEST_EXPANSIONLIMIT)
+ self.check_parse(TEST_EXPANSIONLIMIT, True)
+
+ def test_recursionlimit(self):
+ with self.assertRaisesRegex(SAXParseException,
+ "entity nesting limit reached"):
+ self.check_parse(TEST_RECURSIONLIMIT)
+ self.check_parse(TEST_EXPANSIONLIMIT, True)
+
+
def test_main():
run_unittest(MakeParserTest,
ParseTest,
@@ -1323,7 +1388,8 @@ def test_main():
StreamReaderWriterXmlgenTest,
ExpatReaderTest,
ErrorReportingTest,
- XmlReaderTest)
+ XmlReaderTest,
+ XmlEntityExpansion)
if __name__ == "__main__":
test_main()
diff --git a/Lib/test/xmltestdata/expansionlimit.xml b/Lib/test/xmltestdata/expansionlimit.xml
new file mode 100644
index 00000000000000..7a626d418e321f
--- /dev/null
+++ b/Lib/test/xmltestdata/expansionlimit.xml
@@ -0,0 +1,58 @@
+
+
+]>
+
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
+
diff --git a/Lib/test/xmltestdata/nestinglimit.xml b/Lib/test/xmltestdata/nestinglimit.xml
new file mode 100644
index 00000000000000..2dfb3f41fa55c3
--- /dev/null
+++ b/Lib/test/xmltestdata/nestinglimit.xml
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+&e1;
diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py
index 2bd835b035948d..93968d8fbccd2d 100644
--- a/Lib/xml/dom/expatbuilder.py
+++ b/Lib/xml/dom/expatbuilder.py
@@ -160,6 +160,8 @@ def getParser(self):
self._parser.buffer_text = True
self._parser.ordered_attributes = True
self._parser.specified_attributes = True
+ if self._options.huge_xml is not None:
+ self._parser.huge_entites = self._options.huge_xml
self.install(self._parser)
return self._parser
diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py
index 213ab14551c67e..eff2ffda146b7b 100644
--- a/Lib/xml/dom/xmlbuilder.py
+++ b/Lib/xml/dom/xmlbuilder.py
@@ -41,6 +41,9 @@ class Options:
errorHandler = None
filter = None
+ # None: keep default, True: disable entity expansion protection
+ huge_xml = None
+
class DOMBuilder:
entityResolver = None
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 371b37147e327a..2520b0b28ba127 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1626,6 +1626,14 @@ def close(self):
del self.parser, self._parser
del self.target, self._target
+ @property
+ def huge_xml(self):
+ return self._parser.huge_xml
+
+ @huge_xml.setter
+ def huge_xml(self, value):
+ self._parser.huge_xml = value
+
# Import the C accelerators
try:
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
index 5066ffc2fa51f0..b080888a0d35e4 100644
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -9,7 +9,8 @@
from xml.sax.handler import feature_validation, feature_namespaces
from xml.sax.handler import feature_namespace_prefixes
from xml.sax.handler import feature_external_ges, feature_external_pes
-from xml.sax.handler import feature_string_interning
+from xml.sax.handler import feature_string_interning, feature_huge_xml
+
from xml.sax.handler import property_xml_string, property_interning_dict
# xml.parsers.expat does not raise ImportError in Jython
@@ -97,6 +98,7 @@ def __init__(self, namespaceHandling=0, bufsize=2**16-20):
self._entity_stack = []
self._external_ges = 0
self._interning = None
+ self._huge_xml = None
# XMLReader methods
@@ -137,6 +139,8 @@ def getFeature(self, name):
return 0
elif name == feature_external_ges:
return self._external_ges
+ elif name == feature_huge_xml:
+ return self._parser.huge_xml
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
def setFeature(self, name, state):
@@ -153,6 +157,8 @@ def setFeature(self, name, state):
self._interning = {}
else:
self._interning = None
+ elif name == feature_huge_xml:
+ self._huge_xml = bool(state)
elif name == feature_validation:
if state:
raise SAXNotSupportedException(
@@ -285,7 +291,8 @@ def reset(self):
intern = self._interning)
self._parser.StartElementHandler = self.start_element
self._parser.EndElementHandler = self.end_element
-
+ if self._huge_xml is not None:
+ self._parser.huge_xml = self._huge_xml
self._reset_cont_handler()
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
self._parser.NotationDeclHandler = self.notation_decl
diff --git a/Lib/xml/sax/handler.py b/Lib/xml/sax/handler.py
index 481733d2cbe6e5..4829c5383716c9 100644
--- a/Lib/xml/sax/handler.py
+++ b/Lib/xml/sax/handler.py
@@ -277,12 +277,19 @@ def resolveEntity(self, publicId, systemId):
# DTD subset.
# access: (parsing) read-only; (not parsing) read/write
+feature_huge_xml = "http://python.org/sax/features/huge-xml"
+# true: Allow XML files with huge entities and DTD
+# false: Protect against DoS attacks like entity expansion (billion laughs)
+# access: (parsing) read-only; (not parsing) read/write
+
+
all_features = [feature_namespaces,
feature_namespace_prefixes,
feature_string_interning,
feature_validation,
feature_external_ges,
- feature_external_pes]
+ feature_external_pes,
+ feature_huge_xml]
#============================================================================
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index bba687388797c7..ddbf1a54914f33 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -3709,6 +3709,37 @@ xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
return PyObject_GenericGetAttr((PyObject*) self, nameobj);
}
+static PyObject*
+xmlparser_huge_xml_getter(XMLParserObject *self, void *closure)
+{
+ if (EXPAT(GetOption) != NULL) {
+ XML_Bool hx = XML_FALSE;
+ if (EXPAT(GetOption)(self->parser, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) {
+ PyErr_SetString(PyExc_RuntimeError, "Failed to get option value");
+ return NULL;
+ }
+ return PyBool_FromLong((long)hx);
+ } else {
+ Py_RETURN_NONE;
+ }
+}
+
+static int
+xmlparser_huge_xml_setter(XMLParserObject *self, PyObject *value, void *closure)
+{
+ if (EXPAT(SetOption) != NULL) {
+ XML_Bool hx = PyObject_IsTrue(value) ? XML_TRUE : XML_FALSE;
+ if (EXPAT(SetOption)(self->parser, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) {
+ PyErr_SetString(PyExc_RuntimeError, "Failed to set option");
+ return -1;
+ }
+ return 0;
+ } else {
+ PyErr_SetString(PyExc_ValueError, "expat version doesn't support huge XML limit");
+ return -1;
+ }
+}
+
#include "clinic/_elementtree.c.h"
static PyMethodDef element_methods[] = {
@@ -3874,6 +3905,14 @@ static PyMethodDef xmlparser_methods[] = {
{NULL, NULL}
};
+static PyGetSetDef xmlparser_getsetlist[] = {
+ {"huge_xml",
+ (getter)xmlparser_huge_xml_getter,
+ (setter)xmlparser_huge_xml_setter,
+ "Allow huge entities and disable entity expansion protection"},
+ {NULL},
+};
+
static PyTypeObject XMLParser_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
@@ -3904,7 +3943,7 @@ static PyTypeObject XMLParser_Type = {
0, /* tp_iternext */
xmlparser_methods, /* tp_methods */
0, /* tp_members */
- 0, /* tp_getset */
+ xmlparser_getsetlist, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h
index 174c3fafda3f27..0e4ffe2c7517c8 100644
--- a/Modules/expat/expat.h
+++ b/Modules/expat/expat.h
@@ -124,9 +124,81 @@ enum XML_Error {
XML_ERROR_RESERVED_PREFIX_XMLNS,
XML_ERROR_RESERVED_NAMESPACE_URI,
/* Added in 2.2.1. */
- XML_ERROR_INVALID_ARGUMENT
+ XML_ERROR_INVALID_ARGUMENT,
+ /* Added in 2.3.0 */
+ XML_ERROR_ENTITY_SIZE_VIOLATION,
+ XML_ERROR_ENTITY_NESTED_SIZE_VIOLATION,
+ XML_ERROR_ENTITY_EXPANSION_RATIO_VIOLATION,
+ XML_ERROR_ENTITY_NESTING_VIOLATION,
+ XML_ERROR_HASH_TABLE_SIZE_VIOLATION
};
+/*
+ * HUGE_XML: enable huge XML processing and disable all parsing limits
+ * ENTITIES_MAX_NESTED_REFS: restrict nested entity expansions of an entity
+ * reference.
+ * ENTITIES_MAX_RATIO: restrict ratio between expanded entities and processed
+ * XML bytes.
+ * ENTITIES_MAX_RATIO_THRESHOLD: start treshold for ratio
+ * ENTITIES_MAX_SIZE: limit max entity size in bytes for single and nested entities
+ * HASH_TABLE_DTD_MAX_ENTRY_COUNT: max entries in DTD hash tables
+ */
+
+enum XML_Option {
+ /* Added in 2.3.0 */
+ XML_OPTION_HUGE_XML, /* XML_Bool */
+ XML_OPTION_ENTITIES_MAX_NESTED_REFS, /* unsigned int */
+ XML_OPTION_ENTITIES_MAX_RATIO, /* unsigned int */
+ XML_OPTION_ENTITIES_MAX_RATIO_THRESHOLD, /* unsigned int */
+ XML_OPTION_ENTITIES_MAX_SIZE, /* size_t, */
+ XML_OPTION_HASH_TABLE_DTD_MAX_ENTRY_COUNT /* size_t */
+};
+
+/* Entity expansion protection
+ *
+ * Mitigation against billion laugh and quadratic blowup attacks.
+ *
+ * XML_ENTITY_NESTED_REFERENCE_LIMIT confines nesting of entities within entities.
+ * XML_ENTITY_EXPANSION_SIZE restricts the maximum length of entities,
+ * both text of a single entity and resulting text of nested entities.
+ * XML_ENTITY_EXPANSION_RATIO constrains the ratio between position in XML
+ * document and total amount of all expanded entities once more than
+ * XML_ENTITY_EXPANSION_RATIO_THRESHOLD bytes have been expanded.
+ * XML_MAX_HASH_TABLE_ENTRIES limits total amount of entries across all
+ * DTD hash tables.
+ *
+ * The limits are modelled after libxml2's limits
+ * https://github.com/GNOME/libxml2/blob/v2.9.8/parser.c#L99
+ * https://github.com/GNOME/libxml2/blob/v2.9.8/include/libxml/parserInternals.h#L33
+ */
+#ifndef XML_HUGE_XML_DEFAULT
+#define XML_HUGE_XML_DEFAULT 0
+#endif
+
+#ifndef XML_ENTITY_NESTED_REFERENCE_LIMIT
+#define XML_ENTITY_NESTED_REFERENCE_LIMIT 40
+#endif
+
+#ifndef XML_ENTITY_EXPANSION_SIZE
+/* 1MB text */
+#define XML_ENTITY_EXPANSION_SIZE 1000000
+#endif
+
+#ifndef XML_ENTITY_EXPANSION_RATIO
+#define XML_ENTITY_EXPANSION_RATIO 10
+#endif
+
+#ifndef XML_ENTITY_EXPANSION_RATIO_THRESHOLD
+/* 1MB text */
+#define XML_ENTITY_EXPANSION_RATIO_THRESHOLD 1000000
+#endif
+
+#ifndef XML_MAX_HASH_TABLE_ENTRIES
+/* 1M entries across all hash tables */
+#define XML_MAX_HASH_TABLE_ENTRIES 1000000
+#endif
+
+
enum XML_Content_Type {
XML_CTYPE_EMPTY = 1,
XML_CTYPE_ANY,
@@ -948,6 +1020,16 @@ XMLPARSEAPI(int)
XML_SetHashSalt(XML_Parser parser,
unsigned long hash_salt);
+/* Set / get XML parser options, see enum XML_Options
+ *
+ * Added in 2.3.0
+ */
+XMLPARSEAPI(enum XML_Status)
+XML_SetOption(XML_Parser parser, enum XML_Option option, void *value);
+
+XMLPARSEAPI(enum XML_Status)
+XML_GetOption(XML_Parser parser, enum XML_Option option, void *rvalue);
+
/* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then
XML_GetErrorCode returns information about the error.
*/
@@ -1057,7 +1139,9 @@ enum XML_FeatureEnum {
XML_FEATURE_SIZEOF_XML_LCHAR,
XML_FEATURE_NS,
XML_FEATURE_LARGE_SIZE,
- XML_FEATURE_ATTR_INFO
+ XML_FEATURE_ATTR_INFO,
+ /* Added in 2.3.0 */
+ XML_FEATURE_HUGE_XML
/* Additional features must be added to the end of this enum. */
};
@@ -1075,8 +1159,8 @@ XML_GetFeatureList(void);
See http://semver.org.
*/
#define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 2
-#define XML_MICRO_VERSION 6
+#define XML_MINOR_VERSION 3
+#define XML_MICRO_VERSION 0
#ifdef __cplusplus
}
diff --git a/Modules/expat/pyexpatns.h b/Modules/expat/pyexpatns.h
index cfb742ee000b09..51d0c17e69005e 100644
--- a/Modules/expat/pyexpatns.h
+++ b/Modules/expat/pyexpatns.h
@@ -119,7 +119,8 @@
#define XML_UseParserAsHandlerArg PyExpat_XML_UseParserAsHandlerArg
#define XmlUtf16Encode PyExpat_XmlUtf16Encode
#define XmlUtf8Encode PyExpat_XmlUtf8Encode
-
+#define XML_GetOption PyExpat_XML_GetOption
+#define XML_SetOption PyExpat_XML_SetOption
#endif /* !PYEXPATNS_H */
diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c
index c4f3ffc215c9ef..cf5ae279ba6447 100644
--- a/Modules/expat/xmlparse.c
+++ b/Modules/expat/xmlparse.c
@@ -164,15 +164,6 @@ typedef char ICHAR;
/* Do safe (NULL-aware) pointer arithmetic */
#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
-/* Handle the case where memmove() doesn't exist. */
-#ifndef HAVE_MEMMOVE
-#ifdef HAVE_BCOPY
-#define memmove(d,s,l) bcopy((s),(d),(l))
-#else
-#error memmove does not exist on this platform, nor is a substitute available
-#endif /* HAVE_BCOPY */
-#endif /* HAVE_MEMMOVE */
-
#include "internal.h"
#include "xmltok.h"
#include "xmlrole.h"
@@ -380,6 +371,20 @@ typedef struct open_internal_entity {
XML_Bool betweenDecl; /* WFC: PE Between Declarations */
} OPEN_INTERNAL_ENTITY;
+typedef struct {
+ ENTITY *first_entity;
+ unsigned int entitiesNestingLevel;
+ size_t nestedEntitiesExpansionSize; /* in bytes, not XML_Cha) */
+ size_t totalEntitiesExpansionSize; /* in bytes, not XML_Char */
+ /* settings */
+ XML_Bool hugeXML;
+ unsigned int entitiesMaxNesting;
+ unsigned int entitiesMaxRatio;
+ unsigned int entitiesMaxRatioThreshold;
+ size_t entitiesMaxSize; /* in bytes, not XML_Char */
+ size_t hashTableMaxCount;
+} LIMIT;
+
typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
const char *start,
const char *end,
@@ -472,6 +477,12 @@ setContext(XML_Parser parser, const XML_Char *context);
static void FASTCALL normalizePublicId(XML_Char *s);
+static LIMIT * limitCreate(const XML_Memory_Handling_Suite *ms);
+static void limitReset(LIMIT *limit);
+static enum XML_Error limitEntityPreContent(XML_Parser parser, ENTITY *entity);
+static enum XML_Error limitEntityPostContent(XML_Parser parser, ENTITY *entity);
+static enum XML_Status limitHashTables(XML_Parser parser);
+
static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
/* do not call if m_parentParser != NULL */
static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
@@ -527,7 +538,8 @@ static XML_Parser
parserCreate(const XML_Char *encodingName,
const XML_Memory_Handling_Suite *memsuite,
const XML_Char *nameSep,
- DTD *dtd);
+ DTD *dtd,
+ LIMIT *limit);
static void
parserInit(XML_Parser parser, const XML_Char *encodingName);
@@ -645,6 +657,7 @@ struct XML_ParserStruct {
enum XML_ParamEntityParsing m_paramEntityParsing;
#endif
unsigned long m_hash_secret_salt;
+ LIMIT *m_limit;
};
#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
@@ -913,14 +926,15 @@ XML_ParserCreate_MM(const XML_Char *encodingName,
const XML_Memory_Handling_Suite *memsuite,
const XML_Char *nameSep)
{
- return parserCreate(encodingName, memsuite, nameSep, NULL);
+ return parserCreate(encodingName, memsuite, nameSep, NULL, NULL);
}
static XML_Parser
parserCreate(const XML_Char *encodingName,
const XML_Memory_Handling_Suite *memsuite,
const XML_Char *nameSep,
- DTD *dtd)
+ DTD *dtd,
+ LIMIT *limit)
{
XML_Parser parser;
@@ -977,6 +991,21 @@ parserCreate(const XML_Char *encodingName,
}
parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
+ if (limit) {
+ parser->m_limit = limit;
+ } else {
+ parser->m_limit = limitCreate(&parser->m_mem);
+ if (parser->m_limit == NULL) {
+ FREE(parser, parser->m_dataBuf);
+ FREE(parser, parser->m_atts);
+#ifdef XML_ATTR_INFO
+ FREE(parser, parser->m_attInfo);
+#endif
+ FREE(parser, parser);
+ return NULL;
+ }
+ }
+
if (dtd)
parser->m_dtd = dtd;
else {
@@ -987,6 +1016,8 @@ parserCreate(const XML_Char *encodingName,
#ifdef XML_ATTR_INFO
FREE(parser, parser->m_attInfo);
#endif
+ if (limit == NULL)
+ FREE(parser, parser->m_limit);
FREE(parser, parser);
return NULL;
}
@@ -1157,6 +1188,7 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
parser->m_protocolEncodingName = NULL;
parserInit(parser, encodingName);
dtdReset(parser->m_dtd, &parser->m_mem);
+ limitReset(parser->m_limit);
return XML_TRUE;
}
@@ -1195,6 +1227,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser,
XML_Parser parser = oldParser;
DTD *newDtd = NULL;
DTD *oldDtd;
+ LIMIT *oldLimit;
XML_StartElementHandler oldStartElementHandler;
XML_EndElementHandler oldEndElementHandler;
XML_CharacterDataHandler oldCharacterDataHandler;
@@ -1239,6 +1272,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser,
/* Stash the original parser contents on the stack */
oldDtd = parser->m_dtd;
+ oldLimit = parser->m_limit;
oldStartElementHandler = parser->m_startElementHandler;
oldEndElementHandler = parser->m_endElementHandler;
oldCharacterDataHandler = parser->m_characterDataHandler;
@@ -1290,10 +1324,10 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser,
if (parser->m_ns) {
XML_Char tmp[2];
*tmp = parser->m_namespaceSeparator;
- parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
+ parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldLimit);
}
else {
- parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
+ parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldLimit);
}
if (!parser)
@@ -1425,6 +1459,9 @@ XML_ParserFree(XML_Parser parser)
if (parser->m_dtd)
#endif /* XML_DTD */
dtdDestroy(parser->m_dtd, (XML_Bool)!parser->m_parentParser, &parser->m_mem);
+ /* LIMIT structure parser->m_limit is shared with all external parsers */
+ if (!parser->m_parentParser)
+ FREE(parser, parser->m_limit);
FREE(parser, (void *)parser->m_atts);
#ifdef XML_ATTR_INFO
FREE(parser, (void *)parser->m_attInfo);
@@ -1803,6 +1840,71 @@ XML_SetHashSalt(XML_Parser parser,
return 1;
}
+enum XML_Status XMLCALL
+XML_SetOption(XML_Parser parser,
+ enum XML_Option option,
+ void *value)
+{
+ if ((parser == NULL) || (value == NULL))
+ return XML_STATUS_ERROR;
+ /* block after XML_Parse()/XML_ParseBuffer() has been called */
+ if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ return XML_STATUS_ERROR;
+ switch(option) {
+ case XML_OPTION_HUGE_XML:
+ parser->m_limit->hugeXML = *(XML_Bool*)(value) ? XML_TRUE : XML_FALSE;
+ return XML_STATUS_OK;
+ case XML_OPTION_ENTITIES_MAX_NESTED_REFS:
+ parser->m_limit->entitiesMaxNesting = *(unsigned int *)(value);
+ return XML_STATUS_OK;
+ case XML_OPTION_ENTITIES_MAX_RATIO:
+ parser->m_limit->entitiesMaxRatio = *(unsigned int *)(value);
+ return XML_STATUS_OK;
+ case XML_OPTION_ENTITIES_MAX_RATIO_THRESHOLD:
+ parser->m_limit->entitiesMaxRatioThreshold = *(unsigned int *)(value);
+ return XML_STATUS_OK;
+ case XML_OPTION_ENTITIES_MAX_SIZE:
+ parser->m_limit->entitiesMaxSize = *(size_t *)(value);
+ return XML_STATUS_OK;
+ case XML_OPTION_HASH_TABLE_DTD_MAX_ENTRY_COUNT:
+ parser->m_limit->hashTableMaxCount = *(size_t *)(value);
+ return XML_STATUS_OK;
+ default:
+ return XML_STATUS_ERROR;
+ }
+}
+
+enum XML_Status XMLCALL
+XML_GetOption(XML_Parser parser,
+ enum XML_Option option,
+ void *value)
+{
+ if ((parser == NULL) || (value == NULL))
+ return XML_STATUS_ERROR;
+ switch(option) {
+ case XML_OPTION_HUGE_XML:
+ *(XML_Bool*)(value) = parser->m_limit->hugeXML;
+ return XML_STATUS_OK;
+ case XML_OPTION_ENTITIES_MAX_NESTED_REFS:
+ *(unsigned int *)(value) = parser->m_limit->entitiesMaxNesting;
+ return XML_STATUS_OK;
+ case XML_OPTION_ENTITIES_MAX_RATIO:
+ *(unsigned int *)(value) = parser->m_limit->entitiesMaxRatio;
+ return XML_STATUS_OK;
+ case XML_OPTION_ENTITIES_MAX_RATIO_THRESHOLD:
+ *(unsigned int *)(value) = parser->m_limit->entitiesMaxRatioThreshold;
+ return XML_STATUS_OK;
+ case XML_OPTION_ENTITIES_MAX_SIZE:
+ *(size_t *)(value) = parser->m_limit->entitiesMaxSize;
+ return XML_STATUS_OK;
+ case XML_OPTION_HASH_TABLE_DTD_MAX_ENTRY_COUNT:
+ *(size_t *)(value) = parser->m_limit->hashTableMaxCount;
+ return XML_STATUS_OK;
+ default:
+ return XML_STATUS_ERROR;
+ }
+}
+
enum XML_Status XMLCALL
XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
{
@@ -2418,6 +2520,17 @@ XML_ErrorString(enum XML_Error code)
/* Added in 2.2.5. */
case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
return XML_L("invalid argument");
+ /* Added in 2.3.0 */
+ case XML_ERROR_ENTITY_SIZE_VIOLATION:
+ return XML_L("entity text is too large");
+ case XML_ERROR_ENTITY_NESTED_SIZE_VIOLATION:
+ return XML_L("nested entity text is too large");
+ case XML_ERROR_ENTITY_EXPANSION_RATIO_VIOLATION:
+ return XML_L("entity expansion ratio exceeded");
+ case XML_ERROR_ENTITY_NESTING_VIOLATION:
+ return XML_L("entity nesting limit exceeded");
+ case XML_ERROR_HASH_TABLE_SIZE_VIOLATION:
+ return XML_L("hash table entry limit exceeded");
}
return NULL;
}
@@ -2487,6 +2600,8 @@ XML_GetFeatureList(void)
#ifdef XML_ATTR_INFO
{XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
#endif
+ {XML_FEATURE_HUGE_XML, XML_L("XML_OPTION_HUGE_XML"),
+ XML_HUGE_XML_DEFAULT},
{XML_FEATURE_END, NULL, 0}
};
@@ -2755,6 +2870,7 @@ doContent(XML_Parser parser,
{
const XML_Char *name;
ENTITY *entity;
+ enum XML_Error result;
XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
s + enc->minBytesPerChar,
next - enc->minBytesPerChar);
@@ -2794,7 +2910,6 @@ doContent(XML_Parser parser,
if (entity->notation)
return XML_ERROR_BINARY_ENTITY_REF;
if (entity->textPtr) {
- enum XML_Error result;
if (!parser->m_defaultExpandInternalEntities) {
if (parser->m_skippedEntityHandler)
parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 0);
@@ -2802,9 +2917,15 @@ doContent(XML_Parser parser,
reportDefault(parser, enc, s, next);
break;
}
+ result = limitEntityPreContent(parser, entity);
+ if (result != XML_ERROR_NONE)
+ return result;
result = processInternalEntity(parser, entity, XML_FALSE);
if (result != XML_ERROR_NONE)
return result;
+ result = limitEntityPostContent(parser, entity);
+ if (result != XML_ERROR_NONE)
+ return result;
}
else if (parser->m_externalEntityRefHandler) {
const XML_Char *context;
@@ -2812,14 +2933,22 @@ doContent(XML_Parser parser,
context = getContext(parser);
entity->open = XML_FALSE;
if (!context)
- return XML_ERROR_NO_MEMORY;
+ return XML_ERROR_NO_MEMORY;
+ result = limitEntityPreContent(parser, entity);
+ if (result != XML_ERROR_NONE)
+ return result;
if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
context,
entity->base,
entity->systemId,
- entity->publicId))
+ entity->publicId)) {
+ limitEntityPostContent(parser, entity);
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
+ }
poolDiscard(&parser->m_tempPool);
+ result = limitEntityPostContent(parser, entity);
+ if (result != XML_ERROR_NONE)
+ return result;
}
else if (parser->m_defaultHandler)
reportDefault(parser, enc, s, next);
@@ -3198,6 +3327,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
sizeof(ELEMENT_TYPE));
if (!elementType)
return XML_ERROR_NO_MEMORY;
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return XML_ERROR_HASH_TABLE_SIZE_VIOLATION;
if (parser->m_ns && !setElementTypePrefix(parser, elementType))
return XML_ERROR_NO_MEMORY;
}
@@ -4441,6 +4572,8 @@ doProlog(XML_Parser parser,
sizeof(ENTITY));
if (!parser->m_declEntity)
return XML_ERROR_NO_MEMORY;
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return XML_ERROR_HASH_TABLE_SIZE_VIOLATION;
#endif /* XML_DTD */
dtd->hasParamEntityRefs = XML_TRUE;
if (parser->m_startDoctypeDeclHandler) {
@@ -4508,6 +4641,8 @@ doProlog(XML_Parser parser,
*/
return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
}
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return XML_ERROR_HASH_TABLE_SIZE_VIOLATION;
if (parser->m_useForeignDTD)
entity->base = parser->m_curBase;
dtd->paramEntityRead = XML_FALSE;
@@ -4552,6 +4687,8 @@ doProlog(XML_Parser parser,
sizeof(ENTITY));
if (!entity)
return XML_ERROR_NO_MEMORY;
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return XML_ERROR_HASH_TABLE_SIZE_VIOLATION;
entity->base = parser->m_curBase;
dtd->paramEntityRead = XML_FALSE;
if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
@@ -4767,6 +4904,8 @@ doProlog(XML_Parser parser,
sizeof(ENTITY));
if (!parser->m_declEntity)
return XML_ERROR_NO_MEMORY;
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return XML_ERROR_HASH_TABLE_SIZE_VIOLATION;
parser->m_declEntity->publicId = NULL;
}
#endif /* XML_DTD */
@@ -4844,6 +4983,8 @@ doProlog(XML_Parser parser,
sizeof(ENTITY));
if (!parser->m_declEntity)
return XML_ERROR_NO_MEMORY;
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return XML_ERROR_HASH_TABLE_SIZE_VIOLATION;
if (parser->m_declEntity->name != name) {
poolDiscard(&dtd->pool);
parser->m_declEntity = NULL;
@@ -4876,6 +5017,8 @@ doProlog(XML_Parser parser,
name, sizeof(ENTITY));
if (!parser->m_declEntity)
return XML_ERROR_NO_MEMORY;
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return XML_ERROR_HASH_TABLE_SIZE_VIOLATION;
if (parser->m_declEntity->name != name) {
poolDiscard(&dtd->pool);
parser->m_declEntity = NULL;
@@ -5400,6 +5543,10 @@ processInternalEntity(XML_Parser parser, ENTITY *entity,
if (!openEntity)
return XML_ERROR_NO_MEMORY;
}
+ if (parser->m_limit->first_entity == NULL) {
+ parser->m_limit->first_entity = entity;
+ }
+
entity->open = XML_TRUE;
entity->processed = 0;
openEntity->next = parser->m_openInternalEntities;
@@ -5438,6 +5585,7 @@ processInternalEntity(XML_Parser parser, ENTITY *entity,
parser->m_freeInternalEntities = openEntity;
}
}
+
return result;
}
@@ -6075,6 +6223,8 @@ setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
sizeof(PREFIX));
if (!prefix)
return 0;
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return 0;
if (prefix->name == poolStart(&dtd->pool))
poolFinish(&dtd->pool);
else
@@ -6103,6 +6253,8 @@ getAttributeId(XML_Parser parser, const ENCODING *enc,
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
if (!id)
return NULL;
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return NULL;
if (id->name != name)
poolDiscard(&dtd->pool);
else {
@@ -6137,6 +6289,8 @@ getAttributeId(XML_Parser parser, const ENCODING *enc,
sizeof(PREFIX));
if (!id->prefix)
return NULL;
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return NULL;
if (id->prefix->name == poolStart(&dtd->pool))
poolFinish(&dtd->pool);
else
@@ -6278,6 +6432,8 @@ setContext(XML_Parser parser, const XML_Char *context)
sizeof(PREFIX));
if (!prefix)
return XML_FALSE;
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return XML_FALSE;
if (prefix->name == poolStart(&parser->m_tempPool)) {
prefix->name = poolCopyString(&dtd->pool, prefix->name);
if (!prefix->name)
@@ -6331,6 +6487,120 @@ normalizePublicId(XML_Char *publicId)
*p = XML_T('\0');
}
+static LIMIT *
+limitCreate(const XML_Memory_Handling_Suite *ms)
+{
+ LIMIT *limit = (LIMIT *)ms->malloc_fcn(sizeof(LIMIT));
+ if (!limit)
+ return NULL;
+
+ limitReset(limit);
+
+ return limit;
+}
+
+static void
+limitReset(LIMIT *limit)
+{
+ limit->first_entity = NULL;
+ limit->entitiesNestingLevel = 0;
+ limit->nestedEntitiesExpansionSize = 0;
+ limit->totalEntitiesExpansionSize = 0;
+
+ limit->hugeXML = XML_HUGE_XML_DEFAULT ? XML_TRUE : XML_FALSE;
+ limit->entitiesMaxNesting = XML_ENTITY_NESTED_REFERENCE_LIMIT;
+ limit->entitiesMaxSize = XML_ENTITY_EXPANSION_SIZE;
+ limit->entitiesMaxRatio = XML_ENTITY_EXPANSION_RATIO;
+ limit->entitiesMaxRatioThreshold = XML_ENTITY_EXPANSION_RATIO_THRESHOLD;
+ limit->hashTableMaxCount = XML_MAX_HASH_TABLE_ENTRIES;
+}
+
+static enum XML_Error
+limitEntityPreContent(XML_Parser parser, ENTITY *entity)
+{
+ XML_Index index;
+
+ if (parser->m_limit->first_entity == NULL) {
+ parser->m_limit->first_entity = entity;
+#ifdef DEBUG_LIMIT
+ fprintf(stderr, "pre: first entry %s\n", entity->name);
+#endif
+ }
+ parser->m_limit->entitiesNestingLevel++;
+ parser->m_limit->nestedEntitiesExpansionSize += (entity->textLen * sizeof(XML_Char));
+ parser->m_limit->totalEntitiesExpansionSize += (entity->textLen * sizeof(XML_Char));
+
+ if (parser->m_limit->hugeXML)
+ return XML_ERROR_NONE;
+
+ if (parser->m_limit->entitiesMaxNesting &&
+ (parser->m_limit->entitiesNestingLevel > parser->m_limit->entitiesMaxNesting))
+ return XML_ERROR_ENTITY_NESTING_VIOLATION;
+
+ if (parser->m_limit->entitiesMaxSize) {
+ if ((XML_Size)(entity->textLen * sizeof(XML_Char)) > parser->m_limit->entitiesMaxSize)
+ /* current entity text is too large */
+ return XML_ERROR_ENTITY_SIZE_VIOLATION;
+
+ if (parser->m_limit->nestedEntitiesExpansionSize > parser->m_limit->entitiesMaxSize)
+ /* sum of text is too large */
+ return XML_ERROR_ENTITY_NESTED_SIZE_VIOLATION;
+ }
+
+ index = XML_GetCurrentByteIndex(parser);
+ if ((parser->m_limit->entitiesMaxRatio) &&
+ (index > parser->m_limit->entitiesMaxRatioThreshold)) {
+ /* overflow safe comparison */
+ size_t limit = (parser->m_limit->totalEntitiesExpansionSize +
+ (parser->m_limit->entitiesMaxRatio - 1)) / parser->m_limit->entitiesMaxRatio;
+ if (limit > (XML_Size)index)
+ /* Ratio between processed bytes and all expanded entities is off */
+ return XML_ERROR_ENTITY_EXPANSION_RATIO_VIOLATION;
+ }
+
+ return XML_ERROR_NONE;
+}
+
+static enum XML_Error
+limitEntityPostContent(XML_Parser parser, ENTITY *entity)
+{
+ if (parser->m_limit->first_entity == entity) {
+ parser->m_limit->first_entity = NULL;
+ parser->m_limit->entitiesNestingLevel = 0;
+ parser->m_limit->nestedEntitiesExpansionSize = 0;
+ }
+
+ return XML_ERROR_NONE;
+}
+
+static enum XML_Status
+limitHashTables(XML_Parser parser)
+{
+ DTD *dtd = parser->m_dtd;
+ size_t used = 0;
+ if (parser->m_limit->hugeXML || !parser->m_limit->hashTableMaxCount)
+ return XML_STATUS_OK;
+
+#define add_check_used(table) \
+ if (((table).used + used) < used) \
+ return XML_STATUS_ERROR; /* overflow */ \
+ used += (table).used; \
+ if (used > parser->m_limit->hashTableMaxCount) \
+ return XML_STATUS_ERROR
+
+ add_check_used(dtd->generalEntities);
+ add_check_used(dtd->elementTypes);
+ add_check_used(dtd->attributeIds);
+ add_check_used(dtd->prefixes);
+#ifdef XML_DTD
+ add_check_used(dtd->paramEntities);
+#endif
+
+#undef add_check_used
+
+ return XML_STATUS_OK;
+}
+
static DTD *
dtdCreate(const XML_Memory_Handling_Suite *ms)
{
@@ -6455,6 +6725,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H
return 0;
if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
return 0;
+ /* no limitHashTables() for copy operation */
}
hashTableIterInit(&iter, &(oldDtd->attributeIds));
@@ -6479,6 +6750,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H
sizeof(ATTRIBUTE_ID));
if (!newA)
return 0;
+ /* no limitHashTables() for copy operation */
newA->maybeTokenized = oldA->maybeTokenized;
if (oldA->prefix) {
newA->xmlns = oldA->xmlns;
@@ -6508,6 +6780,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H
sizeof(ELEMENT_TYPE));
if (!newE)
return 0;
+ /* no limitHashTables() for copy operation */
if (oldE->nDefaultAtts) {
newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
@@ -6592,6 +6865,7 @@ copyEntityTable(XML_Parser oldParser,
newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
if (!newE)
return 0;
+ /* no limitHashTables() for copy operation */
if (oldE->systemId) {
const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
if (!tem)
@@ -7183,6 +7457,8 @@ getElementType(XML_Parser parser,
ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
if (!ret)
return NULL;
+ if (limitHashTables(parser) != XML_STATUS_OK)
+ return NULL;
if (ret->name != name)
poolDiscard(&dtd->pool);
else {
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index c52079e518f2d7..4f15567dd1ef90 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -13,7 +13,7 @@ module pyexpat
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
-#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
+#define XML_COMBINED_VERSION PyExpat_COMBINED_VERSION
static XML_Memory_Handling_Suite ExpatMemoryHandler = {
PyObject_Malloc, PyObject_Realloc, PyObject_Free};
@@ -1068,6 +1068,7 @@ pyexpat_xmlparser___dir___impl(xmlparseobject *self)
APPEND(rc, "buffer_size");
APPEND(rc, "buffer_text");
APPEND(rc, "buffer_used");
+ APPEND(rc, "huge_xml");
APPEND(rc, "namespace_prefixes");
APPEND(rc, "ordered_attributes");
APPEND(rc, "specified_attributes");
@@ -1324,6 +1325,18 @@ xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
return self->intern;
}
}
+ if (_PyUnicode_EqualToASCIIString(nameobj, "huge_xml")) {
+#if XML_COMBINED_VERSION >= 20300
+ XML_Bool hx;
+ if (XML_GetOption(self->itself, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) {
+ PyErr_SetString(PyExc_RuntimeError, "Failed to get option value");
+ return NULL;
+ }
+ return PyBool_FromLong((long)hx);
+#else
+ Py_RETURN_NONE;
+#endif
+ }
generic:
return PyObject_GenericGetAttr((PyObject*)self, nameobj);
}
@@ -1476,6 +1489,21 @@ xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
if (flush_character_buffer(self) < 0)
return -1;
}
+
+ if (_PyUnicode_EqualToASCIIString(name, "huge_xml")) {
+#if XML_COMBINED_VERSION >= 20300
+ XML_Bool hx = PyObject_IsTrue(v) ? XML_TRUE : XML_FALSE;
+ if (XML_SetOption(self->itself, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) {
+ PyErr_SetString(PyExc_RuntimeError, "Failed to set option");
+ return -1;
+ }
+ return 0;
+#else
+ PyErr_SetString(PyExc_ValueError, "expat version doesn't support huge XML limit");
+ return -1;
+#endif
+ }
+
if (sethandler(self, name, v)) {
return 0;
}
@@ -1882,6 +1910,13 @@ MODULE_INITFUNC(void)
#else
capi.SetHashSalt = NULL;
#endif
+#if XML_COMBINED_VERSION >= 20300
+ capi.GetOption = XML_GetOption;
+ capi.SetOption = XML_SetOption;
+#else
+ capi.GetOption = NULL;
+ capi.SetOption = NULL;
+#endif
/* export using capsule */
capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);