Skip to content

Commit

Permalink
fix: {HTML4,XML}::SAX::{Parser,ParserContext} check arg types
Browse files Browse the repository at this point in the history
Previously, arguments of the wrong type might cause segfault on CRuby.
  • Loading branch information
flavorjones committed May 7, 2022
1 parent 7d0b92c commit db05ba9
Show file tree
Hide file tree
Showing 11 changed files with 68 additions and 13 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ This version of Nokogiri uses [`jar-dependencies`](https://github.com/mkristian/

* [CRuby] UTF-16-encoded documents longer than ~4000 code points now serialize properly. Previously the serialized document was corrupted when it exceeded the length of libxml2's internal string buffer. [[#752](https://github.com/sparklemotion/nokogiri/issues/752)]
* [HTML5] The Gumbo parser now correctly handles text at the end of `form` elements.
* `{HTML4,XML}::SAX::{Parser,ParserContext}` constructor methods now raise `TypeError` instead of segfaulting when an incorrect type is passed. (Thanks to [@agustingianni](https://github.com/agustingianni) from the Github Security Lab for reporting!)


### Improved
Expand All @@ -36,7 +37,7 @@ This version of Nokogiri uses [`jar-dependencies`](https://github.com/mkristian/
* Avoid compile-time conflict with system-installed `gumbo.h` on OpenBSD. [[#2464](https://github.com/sparklemotion/nokogiri/issues/2464)]
* Remove calls to `vasprintf` in favor of platform-independent `rb_vsprintf`
* Prefer `ruby_xmalloc` to `malloc` within the C extension. [[#2480](https://github.com/sparklemotion/nokogiri/issues/2480)] (Thanks, [@Garfield96](https://github.com/Garfield96)!)
* Installation from source on systems missing libiconv will once again generate a helpful error message (broken since v1.11.0). [#2505]
* Installation from source on systems missing libiconv will once again generate a helpful error message (broken since v1.11.0). [[#2505](https://github.com/sparklemotion/nokogiri/issues/2505)]


## 1.13.5 / 2022-05-04
Expand Down
11 changes: 11 additions & 0 deletions ext/java/nokogiri/Html4SaxParserContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,13 @@ static EncodingType get(final int ordinal)
IRubyObject data,
IRubyObject encoding)
{
if (!(data instanceof RubyString)) {
throw context.getRuntime().newTypeError("data must be kind_of String");
}
if (!(encoding instanceof RubyString)) {
throw context.getRuntime().newTypeError("data must be kind_of String");
}

Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass);
ctx.setInputSourceFile(context, data);
String javaEncoding = findEncodingName(context, encoding);
Expand All @@ -247,6 +254,10 @@ static EncodingType get(final int ordinal)
IRubyObject data,
IRubyObject encoding)
{
if (!(encoding instanceof RubyFixnum)) {
throw context.getRuntime().newTypeError("encoding must be kind_of String");
}

Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass);
ctx.setIOInputSource(context, data, context.nil);
String javaEncoding = findEncodingName(context, encoding);
Expand Down
7 changes: 5 additions & 2 deletions ext/java/nokogiri/XmlSaxParserContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,12 @@ public class XmlSaxParserContext extends ParserContext
parse_io(ThreadContext context,
IRubyObject klazz,
IRubyObject data,
IRubyObject enc)
IRubyObject encoding)
{
//int encoding = (int)enc.convertToInteger().getLongValue();
// check the type of the unused encoding to match behavior of CRuby
if (!(encoding instanceof RubyFixnum)) {
throw context.getRuntime().newTypeError("encoding must be kind_of String");
}
final Ruby runtime = context.runtime;
XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
ctx.initialize(runtime);
Expand Down
8 changes: 7 additions & 1 deletion ext/java/nokogiri/internals/ParserContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ public abstract class ParserContext extends RubyObject
source = new InputSource();
ParserContext.setUrl(context, source, url);

Ruby ruby = context.getRuntime();

if (!(data.respondsTo("read"))) {
throw ruby.newTypeError("must respond to :read");
}

source.setByteStream(new IOInputStream(data));
if (java_encoding != null) {
source.setEncoding(java_encoding);
Expand All @@ -75,7 +81,7 @@ public abstract class ParserContext extends RubyObject
Ruby ruby = context.getRuntime();

if (!(data instanceof RubyString)) {
throw ruby.newArgumentError("must be kind_of String");
throw ruby.newTypeError("must be kind_of String");
}

RubyString stringData = (RubyString) data;
Expand Down
5 changes: 2 additions & 3 deletions ext/nokogiri/html4_sax_parser_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding)
{
htmlParserCtxtPtr ctxt;

if (NIL_P(data)) {
rb_raise(rb_eArgError, "data cannot be nil");
}
Check_Type(data, T_STRING);

if (!(int)RSTRING_LEN(data)) {
rb_raise(rb_eRuntimeError, "data cannot be empty");
}
Expand Down
13 changes: 10 additions & 3 deletions ext/nokogiri/xml_sax_parser_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

VALUE cNokogiriXmlSaxParserContext ;

static ID id_read;

static void
deallocate(xmlParserCtxtPtr ctxt)
{
Expand All @@ -26,6 +28,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
xmlParserCtxtPtr ctxt;
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);

if (!rb_respond_to(io, id_read)) {
rb_raise(rb_eTypeError, "argument expected to respond to :read");
}

ctxt = xmlCreateIOParserCtxt(NULL, NULL,
(xmlInputReadCallback)noko_io_read,
(xmlInputCloseCallback)noko_io_close,
Expand Down Expand Up @@ -62,9 +68,8 @@ parse_memory(VALUE klass, VALUE data)
{
xmlParserCtxtPtr ctxt;

if (NIL_P(data)) {
rb_raise(rb_eArgError, "data cannot be nil");
}
Check_Type(data, T_STRING);

if (!(int)RSTRING_LEN(data)) {
rb_raise(rb_eRuntimeError, "data cannot be empty");
}
Expand Down Expand Up @@ -278,4 +283,6 @@ noko_init_xml_sax_parser_context()
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);

id_read = rb_intern("read");
}
2 changes: 1 addition & 1 deletion lib/nokogiri/html4/sax/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class Parser < Nokogiri::XML::SAX::Parser
###
# Parse html stored in +data+ using +encoding+
def parse_memory(data, encoding = "UTF-8")
raise ArgumentError unless data
raise TypeError unless String === data
return if data.empty?

ctx = ParserContext.memory(data, encoding)
Expand Down
8 changes: 7 additions & 1 deletion test/html4/sax/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_parse_file_with_dir
end

def test_parse_memory_nil
assert_raises(ArgumentError) do
assert_raises(TypeError) do
@parser.parse_memory(nil)
end
end
Expand Down Expand Up @@ -161,6 +161,12 @@ def test_parsing_dom_error_from_io
def test_empty_processing_instruction
@parser.parse_memory("<strong>this will segfault<?strong>")
end

it "handles invalid types gracefully" do
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_memory(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_io(0xcafecafe) }
end
end
end
end
Expand Down
9 changes: 9 additions & 0 deletions test/html4/sax/test_parser_context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ def test_from_file
ctx.parse_with(parser)
# end
end

def test_graceful_handling_of_invalid_types
assert_raises(TypeError) { ParserContext.new(0xcafecafe) }
assert_raises(TypeError) { ParserContext.memory(0xcafecafe, "UTF-8") }
assert_raises(TypeError) { ParserContext.io(0xcafecafe, 1) }
assert_raises(TypeError) { ParserContext.io(StringIO.new("asdf"), "should be an index into ENCODINGS") }
assert_raises(TypeError) { ParserContext.file(0xcafecafe, "UTF-8") }
assert_raises(TypeError) { ParserContext.file("path/to/file", 0xcafecafe) }
end
end
end
end
Expand Down
8 changes: 7 additions & 1 deletion test/xml/sax/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ class TestCase
end
end

it "handles invalid types gracefully" do
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse_memory(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse_io(0xcafecafe) }
end

it :test_namespace_declaration_order_is_saved do
parser.parse(<<~EOF)
<root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
Expand Down Expand Up @@ -263,7 +269,7 @@ def call_parse_io_with_encoding(encoding)
end

it :test_render_parse_nil_param do
assert_raises(ArgumentError) { parser.parse_memory(nil) }
assert_raises(TypeError) { parser.parse_memory(nil) }
end

it :test_bad_encoding_args do
Expand Down
7 changes: 7 additions & 0 deletions test/xml/sax/test_parser_context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ def test_recovery
assert(pc.recovery)
end

def test_graceful_handling_of_invalid_types
assert_raises(TypeError) { ParserContext.new(0xcafecafe) }
assert_raises(TypeError) { ParserContext.memory(0xcafecafe) }
assert_raises(TypeError) { ParserContext.io(0xcafecafe, 1) }
assert_raises(TypeError) { ParserContext.io(StringIO.new("asdf"), "should be an index into ENCODINGS") }
end

def test_from_io
ctx = ParserContext.new(StringIO.new("fo"), "UTF-8")
assert(ctx)
Expand Down

0 comments on commit db05ba9

Please sign in to comment.