diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 275372ee..02759e70 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -511,7 +511,11 @@ def normalize( input, entities=nil, entity_filter=nil ) # Unescapes all possible entities def unnormalize( string, entities=nil, filter=nil ) - rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" ) + if string.include?("\r") + rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" ) + else + rv = string.dup + end matches = rv.scan( REFERENCE_RE ) return rv if matches.size == 0 rv.gsub!( Private::CHARACTER_REFERENCES ) { diff --git a/test/test_pullparser.rb b/test/test_pullparser.rb index b6a48c93..073d896d 100644 --- a/test/test_pullparser.rb +++ b/test/test_pullparser.rb @@ -82,6 +82,27 @@ def test_character_references end end + def test_text_content_with_line_breaks + source = "AB\nC\r\n" + parser = REXML::Parsers::PullParser.new( source ) + + events = {} + element_name = '' + while parser.has_next? + event = parser.pull + case event.event_type + when :start_element + element_name = event[0] + when :text + events[element_name] = event[1] + end + end + + assert_equal('A', events['a']) + assert_equal("B\n", events['b']) + assert_equal("C\n", events['c']) + end + def test_peek_unshift source = "" REXML::Parsers::PullParser.new(source)