From 1892770f3e32d75368ffad99b8e86d539786c213 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Mon, 12 Aug 2024 09:58:23 +0900 Subject: [PATCH] Fix calculation of Security.entity_expansion_text_limit in SAX/pull parsers (#195) GitHub: fix #193 ## [Why?] In SAX and pull parsers, the total value of rv.bytesize was checked, but the summing process was unnecessary. - Add Log ```patch diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 28810bf..5cfc089 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -556,6 +556,7 @@ module REXML re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/ rv.gsub!( re, entity_value ) sum += rv.bytesize +puts " rv.bytesize: #{rv.bytesize} sum: #{sum} > Security.entity_expansion_text_limit: #{Security.entity_expansion_text_limit} : #{rv}" if sum > Security.entity_expansion_text_limit raise "entity expansion has grown too large" end diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 7e0befe..cc68dbf 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -415,6 +415,7 @@ module REXML sum = 0 string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) { s = Text.expand($&, doctype, filter) +puts " s.bytesize: #{s.bytesize} sum + s.bytesize : #{sum + s.bytesize } > Security.entity_expansion_text_limit: #{Security.entity_expansion_text_limit} : #{s}" if sum + s.bytesize > Security.entity_expansion_text_limit raise "entity expansion has grown too large" else ``` - entity_expansion_text_limit.rb ```ruby $LOAD_PATH.unshift(File.expand_path("lib")) require 'rexml' require 'rexml/parsers/sax2parser' require 'rexml/parsers/pullparser' def dom_entity_expansion_count_check(xml) doc = REXML::Document.new(xml) doc.root.children.first.value puts "DOM: entity_expansion_count: #{doc.entity_expansion_count}" end def sax_entity_expansion_count_check(xml) sax = REXML::Parsers::SAX2Parser.new(xml) sax.parse puts "SAX: entity_expansion_count: #{sax.entity_expansion_count}" end def pull_entity_expansion_count_check(xml) parser = REXML::Parsers::PullParser.new(xml) while parser.has_next? parser.pull end puts "Pull: entity_expansion_count: #{parser.entity_expansion_count}" end xml = < ]> &a; XML dom_entity_expansion_count_check(xml) sax_entity_expansion_count_check(xml) pull_entity_expansion_count_check(xml) ``` ``` $ ruby entity_expansion_text_limit.rb s.bytesize: 10 sum + s.bytesize : 10 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx s.bytesize: 10 sum + s.bytesize : 20 > Security.entity_expansion_text_limit: 10240 : yyyyyyyyyy s.bytesize: 10 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : zzzzzzzzzz s.bytesize: 30 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz s.bytesize: 10 sum + s.bytesize : 10 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx s.bytesize: 10 sum + s.bytesize : 20 > Security.entity_expansion_text_limit: 10240 : yyyyyyyyyy s.bytesize: 10 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : zzzzzzzzzz s.bytesize: 30 sum + s.bytesize : 60 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz s.bytesize: 10 sum + s.bytesize : 10 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx s.bytesize: 10 sum + s.bytesize : 20 > Security.entity_expansion_text_limit: 10240 : yyyyyyyyyy s.bytesize: 10 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : zzzzzzzzzz s.bytesize: 30 sum + s.bytesize : 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz s.bytesize: 90 sum + s.bytesize : 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz DOM: entity_expansion_count: 13 rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 180 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 270 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz SAX: entity_expansion_count: 13 rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 180 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e; rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e; rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 270 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz Pull: entity_expansion_count: 13 ``` 90 bytes is the expected value, but SAX and Pull exceed 90 bytes due to unnecessary total processing. --- lib/rexml/parsers/baseparser.rb | 4 +--- test/test_document.rb | 20 ++++++++++++++++++++ test/test_pullparser.rb | 30 ++++++++++++++++++++++++++++++ test/test_sax.rb | 24 ++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 28810bfa..342f9482 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -548,15 +548,13 @@ def unnormalize( string, entities=nil, filter=nil ) } matches.collect!{|x|x[0]}.compact! if matches.size > 0 - sum = 0 matches.each do |entity_reference| unless filter and filter.include?(entity_reference) entity_value = entity( entity_reference, entities ) if entity_value re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/ rv.gsub!( re, entity_value ) - sum += rv.bytesize - if sum > Security.entity_expansion_text_limit + if rv.bytesize > Security.entity_expansion_text_limit raise "entity expansion has grown too large" end else diff --git a/test/test_document.rb b/test/test_document.rb index 0764631d..72ec3579 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -33,10 +33,12 @@ def test_new class EntityExpansionLimitTest < Test::Unit::TestCase def setup @default_entity_expansion_limit = REXML::Security.entity_expansion_limit + @default_entity_expansion_text_limit = REXML::Security.entity_expansion_text_limit end def teardown REXML::Security.entity_expansion_limit = @default_entity_expansion_limit + REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit end class GeneralEntityTest < self @@ -126,6 +128,24 @@ def test_with_default_entity doc.root.children.first.value end end + + def test_entity_expansion_text_limit + xml = <<-XML + + + + + + +]> +&a; + XML + + REXML::Security.entity_expansion_text_limit = 90 + doc = REXML::Document.new(xml) + assert_equal(90, doc.root.children.first.value.bytesize) + end end class ParameterEntityTest < self diff --git a/test/test_pullparser.rb b/test/test_pullparser.rb index 55205af8..827fad1d 100644 --- a/test/test_pullparser.rb +++ b/test/test_pullparser.rb @@ -159,10 +159,12 @@ def test_peek class EntityExpansionLimitTest < Test::Unit::TestCase def setup @default_entity_expansion_limit = REXML::Security.entity_expansion_limit + @default_entity_expansion_text_limit = REXML::Security.entity_expansion_text_limit end def teardown REXML::Security.entity_expansion_limit = @default_entity_expansion_limit + REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit end class GeneralEntityTest < self @@ -249,6 +251,34 @@ def test_with_default_entity end end end + + def test_entity_expansion_text_limit + source = <<-XML + + + + + +]> +&a; + XML + + REXML::Security.entity_expansion_text_limit = 90 + parser = REXML::Parsers::PullParser.new(source) + events = {} + element_name = '' + while parser.has_next? + event = parser.pull + case event.event_type + when :start_element + element_name = event[0] + when :text + events[element_name] = event[1] + end + end + assert_equal(90, events['member'].size) + end end end end diff --git a/test/test_sax.rb b/test/test_sax.rb index 5e3ad75b..f452de50 100644 --- a/test/test_sax.rb +++ b/test/test_sax.rb @@ -102,10 +102,12 @@ def test_sax2 class EntityExpansionLimitTest < Test::Unit::TestCase def setup @default_entity_expansion_limit = REXML::Security.entity_expansion_limit + @default_entity_expansion_text_limit = REXML::Security.entity_expansion_text_limit end def teardown REXML::Security.entity_expansion_limit = @default_entity_expansion_limit + REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit end class GeneralEntityTest < self @@ -182,6 +184,28 @@ def test_with_default_entity sax.parse end end + + def test_entity_expansion_text_limit + source = <<-XML + + + + + +]> +&a; + XML + + REXML::Security.entity_expansion_text_limit = 90 + sax = REXML::Parsers::SAX2Parser.new(source) + text_size = nil + sax.listen(:characters, ["member"]) do |text| + text_size = text.size + end + sax.parse + assert_equal(90, text_size) + end end end