Skip to content

Commit

Permalink
{XML,HTML}::Document.parse: handle Pathname arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
doriantaylor authored and flavorjones committed Nov 12, 2020
1 parent 2d4ae63 commit 86f02c0
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 6 deletions.
4 changes: 4 additions & 0 deletions lib/nokogiri.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Modify the PATH on windows so that the external DLLs will get loaded.

require 'rbconfig'
require 'pathname'

if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
require 'nokogiri/jruby/dependencies'
Expand Down Expand Up @@ -54,6 +55,9 @@ class << self
###
# Parse an HTML or XML document. +string+ contains the document.
def parse string, url = nil, encoding = nil, options = nil
# whackamole; see respective (XML|HTML)::Document
string = string.expand_path.open('rb') if string.is_a? Pathname

if string.respond_to?(:read) ||
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
# Expect an HTML indicator to appear within the first 512
Expand Down
8 changes: 8 additions & 0 deletions lib/nokogiri/html/document.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# frozen_string_literal: true

# for the constant; pathname is standard lib
require 'pathname'

module Nokogiri
module HTML
class Document < Nokogiri::XML::Document
Expand Down Expand Up @@ -166,6 +170,10 @@ def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::
# Give the options to the user
yield options if block_given?

# coerce to binary filehandle if this is a Pathname object
string_or_io = string_or_io.expand_path.open('rb') if
string_or_io.is_a? Pathname

if string_or_io.respond_to?(:encoding)
unless string_or_io.encoding.name == "ASCII-8BIT"
encoding ||= string_or_io.encoding.name
Expand Down
24 changes: 18 additions & 6 deletions lib/nokogiri/xml/document.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# frozen_string_literal: true

# for the constant; pathname is standard lib
require 'pathname'

module Nokogiri
module XML
##
Expand Down Expand Up @@ -56,12 +60,20 @@ def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::
end

doc = if string_or_io.respond_to?(:read)
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
read_io(string_or_io, url, encoding, options.to_i)
else
# read_memory pukes on empty docs
read_memory(string_or_io, url, encoding, options.to_i)
end
# check url first cause this might get blown away
url ||= string_or_io.respond_to?(:path) ?
string_or_io.path : nil

# coerce to binary filehandle if this is a Pathname object
string_or_io = string_or_io.expand_path.open('rb') if
string_or_io.is_a? Pathname

# aaaand go
read_io(string_or_io, url, encoding, options.to_i)
else
# read_memory pukes on empty docs
read_memory(string_or_io, url, encoding, options.to_i)
end

# do xinclude processing
doc.do_xinclude(options) if options.xinclude?
Expand Down
18 changes: 18 additions & 0 deletions test/test_nokogiri.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,24 @@ def test_atom_is_xml?
assert !doc.html?
end

def test_atom_from_pathname
# atom file is big enough to trip the input callback more than once

path = Pathname(XML_ATOM_FILE) # pathname should be already required

# XXX this behaviour should probably change
assert Nokogiri.parse(path).html?

# we explicitly say xml because of Nokogiri.parse behaviour
doc = Nokogiri.XML(path)

assert doc.xml?
assert !doc.html?

# wqe already know the second half of this works
assert_equal doc.to_xml, Nokogiri.parse(File.read(XML_ATOM_FILE)).to_xml
end

def test_html?
doc = Nokogiri.parse(File.read(HTML_FILE))
assert !doc.xml?
Expand Down

0 comments on commit 86f02c0

Please sign in to comment.