diff --git a/README.md b/README.md index 5cc3c5a..5e4e863 100644 --- a/README.md +++ b/README.md @@ -417,6 +417,17 @@ elements not in this array will be removed. ] ``` +#### :parser_options (Hash) + +[Parsing options](https://github.com/rubys/nokogumbo/tree/v2.0.1#parsing-options) supplied to `nokogumbo`. + +```ruby +:parser_options => { + max_errors: -1, + max_tree_depth: -1 +} +``` + #### :protocols (Hash) URL protocols to allow in specific attributes. If an attribute is listed here diff --git a/lib/sanitize.rb b/lib/sanitize.rb index ed4cfa6..1900978 100644 --- a/lib/sanitize.rb +++ b/lib/sanitize.rb @@ -121,7 +121,7 @@ def fragment(html) return '' unless html html = preprocess(html) - frag = Nokogiri::HTML5.fragment(html) + frag = Nokogiri::HTML5.fragment(html, @config[:parser_options]) node!(frag) to_html(frag) end diff --git a/lib/sanitize/config/default.rb b/lib/sanitize/config/default.rb index 890a0b0..201e7ac 100644 --- a/lib/sanitize/config/default.rb +++ b/lib/sanitize/config/default.rb @@ -56,6 +56,10 @@ module Config # that all HTML will be stripped). :elements => [], + # Parsing options supplied to nokogumbo. + # https://github.com/rubys/nokogumbo/tree/v2.0.1#parsing-options + :parser_options => {}, + # URL handling protocols to allow in specific attributes. By default, no # protocols are allowed. Use :relative in place of a protocol if you want # to allow relative URLs sans protocol. diff --git a/test/test_sanitize.rb b/test/test_sanitize.rb index 80ee94b..1015b7f 100644 --- a/test/test_sanitize.rb +++ b/test/test_sanitize.rb @@ -61,6 +61,29 @@ it 'should not choke on frozen fragments' do @s.fragment('foo'.freeze).must_equal 'foo' end + + describe 'when html body exceeds Nokogumbo::DEFAULT_MAX_TREE_DEPTH' do + let(:content) do + content = nest_html_content('foo', Nokogumbo::DEFAULT_MAX_TREE_DEPTH) + "#{content}" + end + + it 'raises an ArgumentError exception' do + assert_raises ArgumentError do + @s.fragment(content) + end + end + + describe 'and :max_tree_depth of -1 is supplied in :parser_options' do + before do + @s = Sanitize.new(parser_options: { max_tree_depth: -1 }) + end + + it 'does not raise an ArgumentError exception' do + @s.fragment(content).must_equal 'foo' + end + end + end end describe '#node!' do @@ -109,4 +132,10 @@ end end end + + private + + def nest_html_content(html_content, depth) + "#{'' * depth}#{html_content}#{'' * depth}" + end end