diff --git a/Include/pyexpat.h b/Include/pyexpat.h index 07020b5dc964cb..a3c0d6b2a642ee 100644 --- a/Include/pyexpat.h +++ b/Include/pyexpat.h @@ -3,9 +3,17 @@ /* note: you must import expat.h before importing this module! */ -#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.1" +#include "expat.h" + +#define PyExpat_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION) + +#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.2" #define PyExpat_CAPSULE_NAME "pyexpat.expat_CAPI" +#if PyExpat_COMBINED_VERSION < 20300 +enum XML_Option {}; +#endif + struct PyExpat_CAPI { char* magic; /* set to PyExpat_CAPI_MAGIC */ @@ -50,6 +58,9 @@ struct PyExpat_CAPI void *encodingHandlerData, const XML_Char *name, XML_Encoding *info); /* might be none for expat < 2.1.0 */ int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt); + /* expat >= 2.3.0 */ + enum XML_Status (*SetOption)(XML_Parser parser, enum XML_Option option, void *value); + enum XML_Status (*GetOption)(XML_Parser parser, enum XML_Option option, void *rvalue); /* always add new stuff to the end! */ }; diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 3044960a0ed165..a2b006fb7e60d2 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -14,6 +14,8 @@ XMLFilterBase, prepare_input_source from xml.sax.expatreader import create_parser from xml.sax.handler import feature_namespaces, feature_external_ges +from xml.sax.handler import feature_huge_xml +from xml.sax.handler import ErrorHandler from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from io import BytesIO, StringIO import codecs @@ -32,6 +34,10 @@ except UnicodeEncodeError: raise unittest.SkipTest("filename is not encodable to utf8") +TEST_ENTITYTOOLARGE = findfile("entitytoolarge.xml", subdir="xmltestdata") +TEST_EXPANSIONLIMIT = findfile("expansionlimit.xml", subdir="xmltestdata") +TEST_RECURSIONLIMIT = findfile("nestinglimit.xml", subdir="xmltestdata") + supports_nonascii_filenames = True if not os.path.supports_unicode_filenames: try: @@ -1311,6 +1317,65 @@ def test_nsattrs_wattr(self): self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr") +class NullSink(StringIO): + def write(self, *args): + """/dev/null write""" + pass + + +class XmlEntityExpansion(unittest.TestCase): + + def get_parser(self, huge_xml=None): + result = NullSink() + handler = XMLGenerator(result, 'utf-8') + parser = create_parser() + parser.setContentHandler(handler) + parser.setErrorHandler(ErrorHandler()) + if huge_xml is not None: + parser.setFeature(feature_huge_xml, huge_xml) + return parser + + def check_parse(self, source, huge_xml=None): + parser = self.get_parser(huge_xml) + parser.parse(source) + + def test_entitytoolarge(self): + header = "]>&e;" + + parser = self.get_parser() + parser.feed(header) + # feed 1MB + 1 byte as entity text + for i in range(1000): + parser.feed(entity) + parser.feed('-') + + with self.assertRaisesRegex(SAXParseException, + "entity text is too large"): + parser.feed(footer, True) + + parser = self.get_parser(True) + parser.feed(header) + # feed 1MB + 1 byte as entity text + for i in range(1000): + parser.feed(entity) + parser.feed('-') + parser.feed(footer, True) + + def test_expansionlimit(self): + with self.assertRaisesRegex(SAXParseException, + "entity expansion limit reached"): + self.check_parse(TEST_EXPANSIONLIMIT) + self.check_parse(TEST_EXPANSIONLIMIT, True) + + def test_recursionlimit(self): + with self.assertRaisesRegex(SAXParseException, + "entity nesting limit reached"): + self.check_parse(TEST_RECURSIONLIMIT) + self.check_parse(TEST_EXPANSIONLIMIT, True) + + def test_main(): run_unittest(MakeParserTest, ParseTest, @@ -1323,7 +1388,8 @@ def test_main(): StreamReaderWriterXmlgenTest, ExpatReaderTest, ErrorReportingTest, - XmlReaderTest) + XmlReaderTest, + XmlEntityExpansion) if __name__ == "__main__": test_main() diff --git a/Lib/test/xmltestdata/expansionlimit.xml b/Lib/test/xmltestdata/expansionlimit.xml new file mode 100644 index 00000000000000..7a626d418e321f --- /dev/null +++ b/Lib/test/xmltestdata/expansionlimit.xml @@ -0,0 +1,58 @@ + + +]> + +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; +&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a; + diff --git a/Lib/test/xmltestdata/nestinglimit.xml b/Lib/test/xmltestdata/nestinglimit.xml new file mode 100644 index 00000000000000..2dfb3f41fa55c3 --- /dev/null +++ b/Lib/test/xmltestdata/nestinglimit.xml @@ -0,0 +1,7 @@ + + + + +]> +&e1; diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py index 2bd835b035948d..93968d8fbccd2d 100644 --- a/Lib/xml/dom/expatbuilder.py +++ b/Lib/xml/dom/expatbuilder.py @@ -160,6 +160,8 @@ def getParser(self): self._parser.buffer_text = True self._parser.ordered_attributes = True self._parser.specified_attributes = True + if self._options.huge_xml is not None: + self._parser.huge_entites = self._options.huge_xml self.install(self._parser) return self._parser diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py index 213ab14551c67e..eff2ffda146b7b 100644 --- a/Lib/xml/dom/xmlbuilder.py +++ b/Lib/xml/dom/xmlbuilder.py @@ -41,6 +41,9 @@ class Options: errorHandler = None filter = None + # None: keep default, True: disable entity expansion protection + huge_xml = None + class DOMBuilder: entityResolver = None diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 371b37147e327a..2520b0b28ba127 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1626,6 +1626,14 @@ def close(self): del self.parser, self._parser del self.target, self._target + @property + def huge_xml(self): + return self._parser.huge_xml + + @huge_xml.setter + def huge_xml(self, value): + self._parser.huge_xml = value + # Import the C accelerators try: diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index 5066ffc2fa51f0..b080888a0d35e4 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -9,7 +9,8 @@ from xml.sax.handler import feature_validation, feature_namespaces from xml.sax.handler import feature_namespace_prefixes from xml.sax.handler import feature_external_ges, feature_external_pes -from xml.sax.handler import feature_string_interning +from xml.sax.handler import feature_string_interning, feature_huge_xml + from xml.sax.handler import property_xml_string, property_interning_dict # xml.parsers.expat does not raise ImportError in Jython @@ -97,6 +98,7 @@ def __init__(self, namespaceHandling=0, bufsize=2**16-20): self._entity_stack = [] self._external_ges = 0 self._interning = None + self._huge_xml = None # XMLReader methods @@ -137,6 +139,8 @@ def getFeature(self, name): return 0 elif name == feature_external_ges: return self._external_ges + elif name == feature_huge_xml: + return self._parser.huge_xml raise SAXNotRecognizedException("Feature '%s' not recognized" % name) def setFeature(self, name, state): @@ -153,6 +157,8 @@ def setFeature(self, name, state): self._interning = {} else: self._interning = None + elif name == feature_huge_xml: + self._huge_xml = bool(state) elif name == feature_validation: if state: raise SAXNotSupportedException( @@ -285,7 +291,8 @@ def reset(self): intern = self._interning) self._parser.StartElementHandler = self.start_element self._parser.EndElementHandler = self.end_element - + if self._huge_xml is not None: + self._parser.huge_xml = self._huge_xml self._reset_cont_handler() self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl self._parser.NotationDeclHandler = self.notation_decl diff --git a/Lib/xml/sax/handler.py b/Lib/xml/sax/handler.py index 481733d2cbe6e5..4829c5383716c9 100644 --- a/Lib/xml/sax/handler.py +++ b/Lib/xml/sax/handler.py @@ -277,12 +277,19 @@ def resolveEntity(self, publicId, systemId): # DTD subset. # access: (parsing) read-only; (not parsing) read/write +feature_huge_xml = "http://python.org/sax/features/huge-xml" +# true: Allow XML files with huge entities and DTD +# false: Protect against DoS attacks like entity expansion (billion laughs) +# access: (parsing) read-only; (not parsing) read/write + + all_features = [feature_namespaces, feature_namespace_prefixes, feature_string_interning, feature_validation, feature_external_ges, - feature_external_pes] + feature_external_pes, + feature_huge_xml] #============================================================================ diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index bba687388797c7..ddbf1a54914f33 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3709,6 +3709,37 @@ xmlparser_getattro(XMLParserObject* self, PyObject* nameobj) return PyObject_GenericGetAttr((PyObject*) self, nameobj); } +static PyObject* +xmlparser_huge_xml_getter(XMLParserObject *self, void *closure) +{ + if (EXPAT(GetOption) != NULL) { + XML_Bool hx = XML_FALSE; + if (EXPAT(GetOption)(self->parser, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) { + PyErr_SetString(PyExc_RuntimeError, "Failed to get option value"); + return NULL; + } + return PyBool_FromLong((long)hx); + } else { + Py_RETURN_NONE; + } +} + +static int +xmlparser_huge_xml_setter(XMLParserObject *self, PyObject *value, void *closure) +{ + if (EXPAT(SetOption) != NULL) { + XML_Bool hx = PyObject_IsTrue(value) ? XML_TRUE : XML_FALSE; + if (EXPAT(SetOption)(self->parser, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) { + PyErr_SetString(PyExc_RuntimeError, "Failed to set option"); + return -1; + } + return 0; + } else { + PyErr_SetString(PyExc_ValueError, "expat version doesn't support huge XML limit"); + return -1; + } +} + #include "clinic/_elementtree.c.h" static PyMethodDef element_methods[] = { @@ -3874,6 +3905,14 @@ static PyMethodDef xmlparser_methods[] = { {NULL, NULL} }; +static PyGetSetDef xmlparser_getsetlist[] = { + {"huge_xml", + (getter)xmlparser_huge_xml_getter, + (setter)xmlparser_huge_xml_setter, + "Allow huge entities and disable entity expansion protection"}, + {NULL}, +}; + static PyTypeObject XMLParser_Type = { PyVarObject_HEAD_INIT(NULL, 0) "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0, @@ -3904,7 +3943,7 @@ static PyTypeObject XMLParser_Type = { 0, /* tp_iternext */ xmlparser_methods, /* tp_methods */ 0, /* tp_members */ - 0, /* tp_getset */ + xmlparser_getsetlist, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h index 174c3fafda3f27..0e4ffe2c7517c8 100644 --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -124,9 +124,81 @@ enum XML_Error { XML_ERROR_RESERVED_PREFIX_XMLNS, XML_ERROR_RESERVED_NAMESPACE_URI, /* Added in 2.2.1. */ - XML_ERROR_INVALID_ARGUMENT + XML_ERROR_INVALID_ARGUMENT, + /* Added in 2.3.0 */ + XML_ERROR_ENTITY_SIZE_VIOLATION, + XML_ERROR_ENTITY_NESTED_SIZE_VIOLATION, + XML_ERROR_ENTITY_EXPANSION_RATIO_VIOLATION, + XML_ERROR_ENTITY_NESTING_VIOLATION, + XML_ERROR_HASH_TABLE_SIZE_VIOLATION }; +/* + * HUGE_XML: enable huge XML processing and disable all parsing limits + * ENTITIES_MAX_NESTED_REFS: restrict nested entity expansions of an entity + * reference. + * ENTITIES_MAX_RATIO: restrict ratio between expanded entities and processed + * XML bytes. + * ENTITIES_MAX_RATIO_THRESHOLD: start treshold for ratio + * ENTITIES_MAX_SIZE: limit max entity size in bytes for single and nested entities + * HASH_TABLE_DTD_MAX_ENTRY_COUNT: max entries in DTD hash tables + */ + +enum XML_Option { + /* Added in 2.3.0 */ + XML_OPTION_HUGE_XML, /* XML_Bool */ + XML_OPTION_ENTITIES_MAX_NESTED_REFS, /* unsigned int */ + XML_OPTION_ENTITIES_MAX_RATIO, /* unsigned int */ + XML_OPTION_ENTITIES_MAX_RATIO_THRESHOLD, /* unsigned int */ + XML_OPTION_ENTITIES_MAX_SIZE, /* size_t, */ + XML_OPTION_HASH_TABLE_DTD_MAX_ENTRY_COUNT /* size_t */ +}; + +/* Entity expansion protection + * + * Mitigation against billion laugh and quadratic blowup attacks. + * + * XML_ENTITY_NESTED_REFERENCE_LIMIT confines nesting of entities within entities. + * XML_ENTITY_EXPANSION_SIZE restricts the maximum length of entities, + * both text of a single entity and resulting text of nested entities. + * XML_ENTITY_EXPANSION_RATIO constrains the ratio between position in XML + * document and total amount of all expanded entities once more than + * XML_ENTITY_EXPANSION_RATIO_THRESHOLD bytes have been expanded. + * XML_MAX_HASH_TABLE_ENTRIES limits total amount of entries across all + * DTD hash tables. + * + * The limits are modelled after libxml2's limits + * https://github.com/GNOME/libxml2/blob/v2.9.8/parser.c#L99 + * https://github.com/GNOME/libxml2/blob/v2.9.8/include/libxml/parserInternals.h#L33 + */ +#ifndef XML_HUGE_XML_DEFAULT +#define XML_HUGE_XML_DEFAULT 0 +#endif + +#ifndef XML_ENTITY_NESTED_REFERENCE_LIMIT +#define XML_ENTITY_NESTED_REFERENCE_LIMIT 40 +#endif + +#ifndef XML_ENTITY_EXPANSION_SIZE +/* 1MB text */ +#define XML_ENTITY_EXPANSION_SIZE 1000000 +#endif + +#ifndef XML_ENTITY_EXPANSION_RATIO +#define XML_ENTITY_EXPANSION_RATIO 10 +#endif + +#ifndef XML_ENTITY_EXPANSION_RATIO_THRESHOLD +/* 1MB text */ +#define XML_ENTITY_EXPANSION_RATIO_THRESHOLD 1000000 +#endif + +#ifndef XML_MAX_HASH_TABLE_ENTRIES +/* 1M entries across all hash tables */ +#define XML_MAX_HASH_TABLE_ENTRIES 1000000 +#endif + + enum XML_Content_Type { XML_CTYPE_EMPTY = 1, XML_CTYPE_ANY, @@ -948,6 +1020,16 @@ XMLPARSEAPI(int) XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt); +/* Set / get XML parser options, see enum XML_Options + * + * Added in 2.3.0 + */ +XMLPARSEAPI(enum XML_Status) +XML_SetOption(XML_Parser parser, enum XML_Option option, void *value); + +XMLPARSEAPI(enum XML_Status) +XML_GetOption(XML_Parser parser, enum XML_Option option, void *rvalue); + /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then XML_GetErrorCode returns information about the error. */ @@ -1057,7 +1139,9 @@ enum XML_FeatureEnum { XML_FEATURE_SIZEOF_XML_LCHAR, XML_FEATURE_NS, XML_FEATURE_LARGE_SIZE, - XML_FEATURE_ATTR_INFO + XML_FEATURE_ATTR_INFO, + /* Added in 2.3.0 */ + XML_FEATURE_HUGE_XML /* Additional features must be added to the end of this enum. */ }; @@ -1075,8 +1159,8 @@ XML_GetFeatureList(void); See http://semver.org. */ #define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 2 -#define XML_MICRO_VERSION 6 +#define XML_MINOR_VERSION 3 +#define XML_MICRO_VERSION 0 #ifdef __cplusplus } diff --git a/Modules/expat/pyexpatns.h b/Modules/expat/pyexpatns.h index cfb742ee000b09..51d0c17e69005e 100644 --- a/Modules/expat/pyexpatns.h +++ b/Modules/expat/pyexpatns.h @@ -119,7 +119,8 @@ #define XML_UseParserAsHandlerArg PyExpat_XML_UseParserAsHandlerArg #define XmlUtf16Encode PyExpat_XmlUtf16Encode #define XmlUtf8Encode PyExpat_XmlUtf8Encode - +#define XML_GetOption PyExpat_XML_GetOption +#define XML_SetOption PyExpat_XML_SetOption #endif /* !PYEXPATNS_H */ diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c index c4f3ffc215c9ef..cf5ae279ba6447 100644 --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -164,15 +164,6 @@ typedef char ICHAR; /* Do safe (NULL-aware) pointer arithmetic */ #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) -/* Handle the case where memmove() doesn't exist. */ -#ifndef HAVE_MEMMOVE -#ifdef HAVE_BCOPY -#define memmove(d,s,l) bcopy((s),(d),(l)) -#else -#error memmove does not exist on this platform, nor is a substitute available -#endif /* HAVE_BCOPY */ -#endif /* HAVE_MEMMOVE */ - #include "internal.h" #include "xmltok.h" #include "xmlrole.h" @@ -380,6 +371,20 @@ typedef struct open_internal_entity { XML_Bool betweenDecl; /* WFC: PE Between Declarations */ } OPEN_INTERNAL_ENTITY; +typedef struct { + ENTITY *first_entity; + unsigned int entitiesNestingLevel; + size_t nestedEntitiesExpansionSize; /* in bytes, not XML_Cha) */ + size_t totalEntitiesExpansionSize; /* in bytes, not XML_Char */ + /* settings */ + XML_Bool hugeXML; + unsigned int entitiesMaxNesting; + unsigned int entitiesMaxRatio; + unsigned int entitiesMaxRatioThreshold; + size_t entitiesMaxSize; /* in bytes, not XML_Char */ + size_t hashTableMaxCount; +} LIMIT; + typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, const char *end, @@ -472,6 +477,12 @@ setContext(XML_Parser parser, const XML_Char *context); static void FASTCALL normalizePublicId(XML_Char *s); +static LIMIT * limitCreate(const XML_Memory_Handling_Suite *ms); +static void limitReset(LIMIT *limit); +static enum XML_Error limitEntityPreContent(XML_Parser parser, ENTITY *entity); +static enum XML_Error limitEntityPostContent(XML_Parser parser, ENTITY *entity); +static enum XML_Status limitHashTables(XML_Parser parser); + static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms); /* do not call if m_parentParser != NULL */ static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); @@ -527,7 +538,8 @@ static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, - DTD *dtd); + DTD *dtd, + LIMIT *limit); static void parserInit(XML_Parser parser, const XML_Char *encodingName); @@ -645,6 +657,7 @@ struct XML_ParserStruct { enum XML_ParamEntityParsing m_paramEntityParsing; #endif unsigned long m_hash_secret_salt; + LIMIT *m_limit; }; #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) @@ -913,14 +926,15 @@ XML_ParserCreate_MM(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep) { - return parserCreate(encodingName, memsuite, nameSep, NULL); + return parserCreate(encodingName, memsuite, nameSep, NULL, NULL); } static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, - DTD *dtd) + DTD *dtd, + LIMIT *limit) { XML_Parser parser; @@ -977,6 +991,21 @@ parserCreate(const XML_Char *encodingName, } parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE; + if (limit) { + parser->m_limit = limit; + } else { + parser->m_limit = limitCreate(&parser->m_mem); + if (parser->m_limit == NULL) { + FREE(parser, parser->m_dataBuf); + FREE(parser, parser->m_atts); +#ifdef XML_ATTR_INFO + FREE(parser, parser->m_attInfo); +#endif + FREE(parser, parser); + return NULL; + } + } + if (dtd) parser->m_dtd = dtd; else { @@ -987,6 +1016,8 @@ parserCreate(const XML_Char *encodingName, #ifdef XML_ATTR_INFO FREE(parser, parser->m_attInfo); #endif + if (limit == NULL) + FREE(parser, parser->m_limit); FREE(parser, parser); return NULL; } @@ -1157,6 +1188,7 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) parser->m_protocolEncodingName = NULL; parserInit(parser, encodingName); dtdReset(parser->m_dtd, &parser->m_mem); + limitReset(parser->m_limit); return XML_TRUE; } @@ -1195,6 +1227,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, XML_Parser parser = oldParser; DTD *newDtd = NULL; DTD *oldDtd; + LIMIT *oldLimit; XML_StartElementHandler oldStartElementHandler; XML_EndElementHandler oldEndElementHandler; XML_CharacterDataHandler oldCharacterDataHandler; @@ -1239,6 +1272,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, /* Stash the original parser contents on the stack */ oldDtd = parser->m_dtd; + oldLimit = parser->m_limit; oldStartElementHandler = parser->m_startElementHandler; oldEndElementHandler = parser->m_endElementHandler; oldCharacterDataHandler = parser->m_characterDataHandler; @@ -1290,10 +1324,10 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, if (parser->m_ns) { XML_Char tmp[2]; *tmp = parser->m_namespaceSeparator; - parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); + parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldLimit); } else { - parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); + parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldLimit); } if (!parser) @@ -1425,6 +1459,9 @@ XML_ParserFree(XML_Parser parser) if (parser->m_dtd) #endif /* XML_DTD */ dtdDestroy(parser->m_dtd, (XML_Bool)!parser->m_parentParser, &parser->m_mem); + /* LIMIT structure parser->m_limit is shared with all external parsers */ + if (!parser->m_parentParser) + FREE(parser, parser->m_limit); FREE(parser, (void *)parser->m_atts); #ifdef XML_ATTR_INFO FREE(parser, (void *)parser->m_attInfo); @@ -1803,6 +1840,71 @@ XML_SetHashSalt(XML_Parser parser, return 1; } +enum XML_Status XMLCALL +XML_SetOption(XML_Parser parser, + enum XML_Option option, + void *value) +{ + if ((parser == NULL) || (value == NULL)) + return XML_STATUS_ERROR; + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED) + return XML_STATUS_ERROR; + switch(option) { + case XML_OPTION_HUGE_XML: + parser->m_limit->hugeXML = *(XML_Bool*)(value) ? XML_TRUE : XML_FALSE; + return XML_STATUS_OK; + case XML_OPTION_ENTITIES_MAX_NESTED_REFS: + parser->m_limit->entitiesMaxNesting = *(unsigned int *)(value); + return XML_STATUS_OK; + case XML_OPTION_ENTITIES_MAX_RATIO: + parser->m_limit->entitiesMaxRatio = *(unsigned int *)(value); + return XML_STATUS_OK; + case XML_OPTION_ENTITIES_MAX_RATIO_THRESHOLD: + parser->m_limit->entitiesMaxRatioThreshold = *(unsigned int *)(value); + return XML_STATUS_OK; + case XML_OPTION_ENTITIES_MAX_SIZE: + parser->m_limit->entitiesMaxSize = *(size_t *)(value); + return XML_STATUS_OK; + case XML_OPTION_HASH_TABLE_DTD_MAX_ENTRY_COUNT: + parser->m_limit->hashTableMaxCount = *(size_t *)(value); + return XML_STATUS_OK; + default: + return XML_STATUS_ERROR; + } +} + +enum XML_Status XMLCALL +XML_GetOption(XML_Parser parser, + enum XML_Option option, + void *value) +{ + if ((parser == NULL) || (value == NULL)) + return XML_STATUS_ERROR; + switch(option) { + case XML_OPTION_HUGE_XML: + *(XML_Bool*)(value) = parser->m_limit->hugeXML; + return XML_STATUS_OK; + case XML_OPTION_ENTITIES_MAX_NESTED_REFS: + *(unsigned int *)(value) = parser->m_limit->entitiesMaxNesting; + return XML_STATUS_OK; + case XML_OPTION_ENTITIES_MAX_RATIO: + *(unsigned int *)(value) = parser->m_limit->entitiesMaxRatio; + return XML_STATUS_OK; + case XML_OPTION_ENTITIES_MAX_RATIO_THRESHOLD: + *(unsigned int *)(value) = parser->m_limit->entitiesMaxRatioThreshold; + return XML_STATUS_OK; + case XML_OPTION_ENTITIES_MAX_SIZE: + *(size_t *)(value) = parser->m_limit->entitiesMaxSize; + return XML_STATUS_OK; + case XML_OPTION_HASH_TABLE_DTD_MAX_ENTRY_COUNT: + *(size_t *)(value) = parser->m_limit->hashTableMaxCount; + return XML_STATUS_OK; + default: + return XML_STATUS_ERROR; + } +} + enum XML_Status XMLCALL XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { @@ -2418,6 +2520,17 @@ XML_ErrorString(enum XML_Error code) /* Added in 2.2.5. */ case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ return XML_L("invalid argument"); + /* Added in 2.3.0 */ + case XML_ERROR_ENTITY_SIZE_VIOLATION: + return XML_L("entity text is too large"); + case XML_ERROR_ENTITY_NESTED_SIZE_VIOLATION: + return XML_L("nested entity text is too large"); + case XML_ERROR_ENTITY_EXPANSION_RATIO_VIOLATION: + return XML_L("entity expansion ratio exceeded"); + case XML_ERROR_ENTITY_NESTING_VIOLATION: + return XML_L("entity nesting limit exceeded"); + case XML_ERROR_HASH_TABLE_SIZE_VIOLATION: + return XML_L("hash table entry limit exceeded"); } return NULL; } @@ -2487,6 +2600,8 @@ XML_GetFeatureList(void) #ifdef XML_ATTR_INFO {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, #endif + {XML_FEATURE_HUGE_XML, XML_L("XML_OPTION_HUGE_XML"), + XML_HUGE_XML_DEFAULT}, {XML_FEATURE_END, NULL, 0} }; @@ -2755,6 +2870,7 @@ doContent(XML_Parser parser, { const XML_Char *name; ENTITY *entity; + enum XML_Error result; XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); @@ -2794,7 +2910,6 @@ doContent(XML_Parser parser, if (entity->notation) return XML_ERROR_BINARY_ENTITY_REF; if (entity->textPtr) { - enum XML_Error result; if (!parser->m_defaultExpandInternalEntities) { if (parser->m_skippedEntityHandler) parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 0); @@ -2802,9 +2917,15 @@ doContent(XML_Parser parser, reportDefault(parser, enc, s, next); break; } + result = limitEntityPreContent(parser, entity); + if (result != XML_ERROR_NONE) + return result; result = processInternalEntity(parser, entity, XML_FALSE); if (result != XML_ERROR_NONE) return result; + result = limitEntityPostContent(parser, entity); + if (result != XML_ERROR_NONE) + return result; } else if (parser->m_externalEntityRefHandler) { const XML_Char *context; @@ -2812,14 +2933,22 @@ doContent(XML_Parser parser, context = getContext(parser); entity->open = XML_FALSE; if (!context) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; + result = limitEntityPreContent(parser, entity); + if (result != XML_ERROR_NONE) + return result; if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg, context, entity->base, entity->systemId, - entity->publicId)) + entity->publicId)) { + limitEntityPostContent(parser, entity); return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + } poolDiscard(&parser->m_tempPool); + result = limitEntityPostContent(parser, entity); + if (result != XML_ERROR_NONE) + return result; } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); @@ -3198,6 +3327,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc, sizeof(ELEMENT_TYPE)); if (!elementType) return XML_ERROR_NO_MEMORY; + if (limitHashTables(parser) != XML_STATUS_OK) + return XML_ERROR_HASH_TABLE_SIZE_VIOLATION; if (parser->m_ns && !setElementTypePrefix(parser, elementType)) return XML_ERROR_NO_MEMORY; } @@ -4441,6 +4572,8 @@ doProlog(XML_Parser parser, sizeof(ENTITY)); if (!parser->m_declEntity) return XML_ERROR_NO_MEMORY; + if (limitHashTables(parser) != XML_STATUS_OK) + return XML_ERROR_HASH_TABLE_SIZE_VIOLATION; #endif /* XML_DTD */ dtd->hasParamEntityRefs = XML_TRUE; if (parser->m_startDoctypeDeclHandler) { @@ -4508,6 +4641,8 @@ doProlog(XML_Parser parser, */ return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ } + if (limitHashTables(parser) != XML_STATUS_OK) + return XML_ERROR_HASH_TABLE_SIZE_VIOLATION; if (parser->m_useForeignDTD) entity->base = parser->m_curBase; dtd->paramEntityRead = XML_FALSE; @@ -4552,6 +4687,8 @@ doProlog(XML_Parser parser, sizeof(ENTITY)); if (!entity) return XML_ERROR_NO_MEMORY; + if (limitHashTables(parser) != XML_STATUS_OK) + return XML_ERROR_HASH_TABLE_SIZE_VIOLATION; entity->base = parser->m_curBase; dtd->paramEntityRead = XML_FALSE; if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg, @@ -4767,6 +4904,8 @@ doProlog(XML_Parser parser, sizeof(ENTITY)); if (!parser->m_declEntity) return XML_ERROR_NO_MEMORY; + if (limitHashTables(parser) != XML_STATUS_OK) + return XML_ERROR_HASH_TABLE_SIZE_VIOLATION; parser->m_declEntity->publicId = NULL; } #endif /* XML_DTD */ @@ -4844,6 +4983,8 @@ doProlog(XML_Parser parser, sizeof(ENTITY)); if (!parser->m_declEntity) return XML_ERROR_NO_MEMORY; + if (limitHashTables(parser) != XML_STATUS_OK) + return XML_ERROR_HASH_TABLE_SIZE_VIOLATION; if (parser->m_declEntity->name != name) { poolDiscard(&dtd->pool); parser->m_declEntity = NULL; @@ -4876,6 +5017,8 @@ doProlog(XML_Parser parser, name, sizeof(ENTITY)); if (!parser->m_declEntity) return XML_ERROR_NO_MEMORY; + if (limitHashTables(parser) != XML_STATUS_OK) + return XML_ERROR_HASH_TABLE_SIZE_VIOLATION; if (parser->m_declEntity->name != name) { poolDiscard(&dtd->pool); parser->m_declEntity = NULL; @@ -5400,6 +5543,10 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, if (!openEntity) return XML_ERROR_NO_MEMORY; } + if (parser->m_limit->first_entity == NULL) { + parser->m_limit->first_entity = entity; + } + entity->open = XML_TRUE; entity->processed = 0; openEntity->next = parser->m_openInternalEntities; @@ -5438,6 +5585,7 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, parser->m_freeInternalEntities = openEntity; } } + return result; } @@ -6075,6 +6223,8 @@ setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) sizeof(PREFIX)); if (!prefix) return 0; + if (limitHashTables(parser) != XML_STATUS_OK) + return 0; if (prefix->name == poolStart(&dtd->pool)) poolFinish(&dtd->pool); else @@ -6103,6 +6253,8 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); if (!id) return NULL; + if (limitHashTables(parser) != XML_STATUS_OK) + return NULL; if (id->name != name) poolDiscard(&dtd->pool); else { @@ -6137,6 +6289,8 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, sizeof(PREFIX)); if (!id->prefix) return NULL; + if (limitHashTables(parser) != XML_STATUS_OK) + return NULL; if (id->prefix->name == poolStart(&dtd->pool)) poolFinish(&dtd->pool); else @@ -6278,6 +6432,8 @@ setContext(XML_Parser parser, const XML_Char *context) sizeof(PREFIX)); if (!prefix) return XML_FALSE; + if (limitHashTables(parser) != XML_STATUS_OK) + return XML_FALSE; if (prefix->name == poolStart(&parser->m_tempPool)) { prefix->name = poolCopyString(&dtd->pool, prefix->name); if (!prefix->name) @@ -6331,6 +6487,120 @@ normalizePublicId(XML_Char *publicId) *p = XML_T('\0'); } +static LIMIT * +limitCreate(const XML_Memory_Handling_Suite *ms) +{ + LIMIT *limit = (LIMIT *)ms->malloc_fcn(sizeof(LIMIT)); + if (!limit) + return NULL; + + limitReset(limit); + + return limit; +} + +static void +limitReset(LIMIT *limit) +{ + limit->first_entity = NULL; + limit->entitiesNestingLevel = 0; + limit->nestedEntitiesExpansionSize = 0; + limit->totalEntitiesExpansionSize = 0; + + limit->hugeXML = XML_HUGE_XML_DEFAULT ? XML_TRUE : XML_FALSE; + limit->entitiesMaxNesting = XML_ENTITY_NESTED_REFERENCE_LIMIT; + limit->entitiesMaxSize = XML_ENTITY_EXPANSION_SIZE; + limit->entitiesMaxRatio = XML_ENTITY_EXPANSION_RATIO; + limit->entitiesMaxRatioThreshold = XML_ENTITY_EXPANSION_RATIO_THRESHOLD; + limit->hashTableMaxCount = XML_MAX_HASH_TABLE_ENTRIES; +} + +static enum XML_Error +limitEntityPreContent(XML_Parser parser, ENTITY *entity) +{ + XML_Index index; + + if (parser->m_limit->first_entity == NULL) { + parser->m_limit->first_entity = entity; +#ifdef DEBUG_LIMIT + fprintf(stderr, "pre: first entry %s\n", entity->name); +#endif + } + parser->m_limit->entitiesNestingLevel++; + parser->m_limit->nestedEntitiesExpansionSize += (entity->textLen * sizeof(XML_Char)); + parser->m_limit->totalEntitiesExpansionSize += (entity->textLen * sizeof(XML_Char)); + + if (parser->m_limit->hugeXML) + return XML_ERROR_NONE; + + if (parser->m_limit->entitiesMaxNesting && + (parser->m_limit->entitiesNestingLevel > parser->m_limit->entitiesMaxNesting)) + return XML_ERROR_ENTITY_NESTING_VIOLATION; + + if (parser->m_limit->entitiesMaxSize) { + if ((XML_Size)(entity->textLen * sizeof(XML_Char)) > parser->m_limit->entitiesMaxSize) + /* current entity text is too large */ + return XML_ERROR_ENTITY_SIZE_VIOLATION; + + if (parser->m_limit->nestedEntitiesExpansionSize > parser->m_limit->entitiesMaxSize) + /* sum of text is too large */ + return XML_ERROR_ENTITY_NESTED_SIZE_VIOLATION; + } + + index = XML_GetCurrentByteIndex(parser); + if ((parser->m_limit->entitiesMaxRatio) && + (index > parser->m_limit->entitiesMaxRatioThreshold)) { + /* overflow safe comparison */ + size_t limit = (parser->m_limit->totalEntitiesExpansionSize + + (parser->m_limit->entitiesMaxRatio - 1)) / parser->m_limit->entitiesMaxRatio; + if (limit > (XML_Size)index) + /* Ratio between processed bytes and all expanded entities is off */ + return XML_ERROR_ENTITY_EXPANSION_RATIO_VIOLATION; + } + + return XML_ERROR_NONE; +} + +static enum XML_Error +limitEntityPostContent(XML_Parser parser, ENTITY *entity) +{ + if (parser->m_limit->first_entity == entity) { + parser->m_limit->first_entity = NULL; + parser->m_limit->entitiesNestingLevel = 0; + parser->m_limit->nestedEntitiesExpansionSize = 0; + } + + return XML_ERROR_NONE; +} + +static enum XML_Status +limitHashTables(XML_Parser parser) +{ + DTD *dtd = parser->m_dtd; + size_t used = 0; + if (parser->m_limit->hugeXML || !parser->m_limit->hashTableMaxCount) + return XML_STATUS_OK; + +#define add_check_used(table) \ + if (((table).used + used) < used) \ + return XML_STATUS_ERROR; /* overflow */ \ + used += (table).used; \ + if (used > parser->m_limit->hashTableMaxCount) \ + return XML_STATUS_ERROR + + add_check_used(dtd->generalEntities); + add_check_used(dtd->elementTypes); + add_check_used(dtd->attributeIds); + add_check_used(dtd->prefixes); +#ifdef XML_DTD + add_check_used(dtd->paramEntities); +#endif + +#undef add_check_used + + return XML_STATUS_OK; +} + static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms) { @@ -6455,6 +6725,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H return 0; if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) return 0; + /* no limitHashTables() for copy operation */ } hashTableIterInit(&iter, &(oldDtd->attributeIds)); @@ -6479,6 +6750,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H sizeof(ATTRIBUTE_ID)); if (!newA) return 0; + /* no limitHashTables() for copy operation */ newA->maybeTokenized = oldA->maybeTokenized; if (oldA->prefix) { newA->xmlns = oldA->xmlns; @@ -6508,6 +6780,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H sizeof(ELEMENT_TYPE)); if (!newE) return 0; + /* no limitHashTables() for copy operation */ if (oldE->nDefaultAtts) { newE->defaultAtts = (DEFAULT_ATTRIBUTE *) ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); @@ -6592,6 +6865,7 @@ copyEntityTable(XML_Parser oldParser, newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); if (!newE) return 0; + /* no limitHashTables() for copy operation */ if (oldE->systemId) { const XML_Char *tem = poolCopyString(newPool, oldE->systemId); if (!tem) @@ -7183,6 +7457,8 @@ getElementType(XML_Parser parser, ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); if (!ret) return NULL; + if (limitHashTables(parser) != XML_STATUS_OK) + return NULL; if (ret->name != name) poolDiscard(&dtd->pool); else { diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index c52079e518f2d7..4f15567dd1ef90 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -13,7 +13,7 @@ module pyexpat [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/ -#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION) +#define XML_COMBINED_VERSION PyExpat_COMBINED_VERSION static XML_Memory_Handling_Suite ExpatMemoryHandler = { PyObject_Malloc, PyObject_Realloc, PyObject_Free}; @@ -1068,6 +1068,7 @@ pyexpat_xmlparser___dir___impl(xmlparseobject *self) APPEND(rc, "buffer_size"); APPEND(rc, "buffer_text"); APPEND(rc, "buffer_used"); + APPEND(rc, "huge_xml"); APPEND(rc, "namespace_prefixes"); APPEND(rc, "ordered_attributes"); APPEND(rc, "specified_attributes"); @@ -1324,6 +1325,18 @@ xmlparse_getattro(xmlparseobject *self, PyObject *nameobj) return self->intern; } } + if (_PyUnicode_EqualToASCIIString(nameobj, "huge_xml")) { +#if XML_COMBINED_VERSION >= 20300 + XML_Bool hx; + if (XML_GetOption(self->itself, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) { + PyErr_SetString(PyExc_RuntimeError, "Failed to get option value"); + return NULL; + } + return PyBool_FromLong((long)hx); +#else + Py_RETURN_NONE; +#endif + } generic: return PyObject_GenericGetAttr((PyObject*)self, nameobj); } @@ -1476,6 +1489,21 @@ xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v) if (flush_character_buffer(self) < 0) return -1; } + + if (_PyUnicode_EqualToASCIIString(name, "huge_xml")) { +#if XML_COMBINED_VERSION >= 20300 + XML_Bool hx = PyObject_IsTrue(v) ? XML_TRUE : XML_FALSE; + if (XML_SetOption(self->itself, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) { + PyErr_SetString(PyExc_RuntimeError, "Failed to set option"); + return -1; + } + return 0; +#else + PyErr_SetString(PyExc_ValueError, "expat version doesn't support huge XML limit"); + return -1; +#endif + } + if (sethandler(self, name, v)) { return 0; } @@ -1882,6 +1910,13 @@ MODULE_INITFUNC(void) #else capi.SetHashSalt = NULL; #endif +#if XML_COMBINED_VERSION >= 20300 + capi.GetOption = XML_GetOption; + capi.SetOption = XML_SetOption; +#else + capi.GetOption = NULL; + capi.SetOption = NULL; +#endif /* export using capsule */ capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);