diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 2f068e0c..275372ee 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -132,6 +132,13 @@ module Private
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
+ CHARACTER_REFERENCES = /*((?:\d+)|(?:x[a-fA-F0-9]+));/
+ DEFAULT_ENTITIES_PATTERNS = {}
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
+ default_entities.each do |term|
+ DEFAULT_ENTITIES_PATTERNS[term] = /{term};/
+ end
end
private_constant :Private
@@ -504,10 +511,10 @@ def normalize( input, entities=nil, entity_filter=nil )
# Unescapes all possible entities
def unnormalize( string, entities=nil, filter=nil )
- rv = string.gsub( /\r\n?/, "\n" )
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
- rv.gsub!( /*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
@@ -518,7 +525,7 @@ def unnormalize( string, entities=nil, filter=nil )
unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities )
if entity_value
- re = /{entity_reference};/
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /{entity_reference};/
rv.gsub!( re, entity_value )
else
er = DEFAULT_ENTITIES[entity_reference]
@@ -526,7 +533,7 @@ def unnormalize( string, entities=nil, filter=nil )
end
end
end
- rv.gsub!( /&/, '&' )
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
end
rv
end
diff --git a/test/test_pullparser.rb b/test/test_pullparser.rb
index 53a985ba..b6a48c93 100644
--- a/test/test_pullparser.rb
+++ b/test/test_pullparser.rb
@@ -62,6 +62,26 @@ def test_entity_replacement
end
end
+ def test_character_references
+ source = 'AB'
+ parser = REXML::Parsers::PullParser.new( source )
+ element_name = ''
+ while parser.has_next?
+ event = parser.pull
+ case event.event_type
+ when :start_element
+ element_name = event[0]
+ when :text
+ case element_name
+ when 'a'
+ assert_equal('A', event[1])
+ when 'b'
+ assert_equal('B', event[1])
+ end
+ end
+ end
+ end
+
def test_peek_unshift
source = ""
REXML::Parsers::PullParser.new(source)