From 682a293d7daccb274e2e52e0bbc530c57c2d2946 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 28 Dec 2024 22:21:04 -0500 Subject: [PATCH 1/2] fix: SAX::ParserContext keeps a reference to the input to prevent it from being GCed before we parse it. (cherry picked from commit f2a9275e442178084db43505d6425354e016d259) --- ext/nokogiri/xml_sax_parser_context.c | 10 ++++++++-- test/test_memory_usage.rb | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/ext/nokogiri/xml_sax_parser_context.c b/ext/nokogiri/xml_sax_parser_context.c index 75fe2e4f017..0d2b65b599f 100644 --- a/ext/nokogiri/xml_sax_parser_context.c +++ b/ext/nokogiri/xml_sax_parser_context.c @@ -102,7 +102,10 @@ noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_en c_context->sax = NULL; } - return noko_xml_sax_parser_context_wrap(rb_class, c_context); + VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context); + rb_iv_set(rb_context, "@input", rb_io); + + return rb_context; } /* :nodoc: */ @@ -154,7 +157,10 @@ noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALU c_context->sax = NULL; } - return noko_xml_sax_parser_context_wrap(rb_class, c_context); + VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context); + rb_iv_set(rb_context, "@input", rb_input); + + return rb_context; } /* diff --git a/test/test_memory_usage.rb b/test/test_memory_usage.rb index a23bb675e07..1b2044b448f 100644 --- a/test/test_memory_usage.rb +++ b/test/test_memory_usage.rb @@ -313,5 +313,29 @@ def start_element(name, attrs = []) # Expected error. This comment makes rubocop happy. end end + + it "XML::SAX::ParserContext.io holds a reference to IO input" do + content = File.read(XML_ATOM_FILE) + + memwatch(__method__) do + pc = Nokogiri::XML::SAX::ParserContext.io(StringIO.new(content), "ISO-8859-1") + parser = Nokogiri::XML::SAX::Parser.new(Nokogiri::SAX::TestCase::Doc.new) + GC.stress + pc.parse_with(parser) + + assert_equal(472, parser.document.data.length) + end + end + + it "XML::SAX::ParserContext.memory holds a reference to string input" do + memwatch(__method__) do + pc = Nokogiri::XML::SAX::ParserContext.memory(File.read(XML_ATOM_FILE), "ISO-8859-1") + parser = Nokogiri::XML::SAX::Parser.new(Nokogiri::SAX::TestCase::Doc.new) + GC.stress + pc.parse_with(parser) + + assert_equal(472, parser.document.data.length) + end + end end if ENV["NOKOGIRI_MEMORY_SUITE"] && Nokogiri.uses_libxml? end From 1c9b8f1273841f56aec0395bf0517c93fd6e1f7f Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 29 Dec 2024 13:17:16 -0500 Subject: [PATCH 2/2] doc: update CHANGELOG.md --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9aa6fbd252..7e4d4f8ef3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA --- +## v1.18.1 / unreleased + +### Fixed + +* [CRuby] XML::SAX::ParserContext keeps a reference to the input to avoid a potential use-after-free issue that's existed since v1.4.0 (2009). (#3395) @flavorjones + + ## v1.18.0 / 2024-12-25 ### Notable Changes