Skip to content

Commit

Permalink
Refactor libXML error handling to remove global state (#12663)
Browse files Browse the repository at this point in the history
libXML error handlers are now set right before calling a lib function collecting errors directly in that context. Afterwards the error handler is reset. There is no longer a global array collecting errors at `XML::Error.errors`.

Co-authored-by: Sijawusz Pur Rahnama <[email protected]>
Co-authored-by: Beta Ziliani <[email protected]>
  • Loading branch information
3 people authored Nov 17, 2022
1 parent d5e7b85 commit 7a04fdd
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 56 deletions.
20 changes: 20 additions & 0 deletions spec/std/xml/reader_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -553,4 +553,24 @@ module XML
end
end
end

describe "#errors" do
it "makes errors accessible" do
reader = XML::Reader.new(%(<people></foo>))
reader.read
reader.expand?

reader.errors.map(&.to_s).should eq ["Opening and ending tag mismatch: people line 1 and foo"]
end

it "adds errors to `XML::Error.errors` (deprecated)" do
XML::Error.errors # clear class error list

reader = XML::Reader.new(%(<people></foo>))
reader.read
reader.expand?

XML::Error.errors.try(&.map(&.to_s)).should eq ["Opening and ending tag mismatch: people line 1 and foo"]
end
end
end
9 changes: 2 additions & 7 deletions spec/std/xml/xml_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -158,15 +158,10 @@ describe XML do
person2.previous_element.should eq(person)
end

it "handles errors" do
it "#errors" do
xml = XML.parse(%(<people></foo>))
xml.root.not_nil!.name.should eq("people")
errors = xml.errors.not_nil!
errors.size.should eq(1)

errors[0].message.should eq("Opening and ending tag mismatch: people line 1 and foo")
errors[0].line_number.should eq(1)
errors[0].to_s.should eq("Opening and ending tag mismatch: people line 1 and foo")
xml.errors.try(&.map(&.to_s)).should eq ["Opening and ending tag mismatch: people line 1 and foo"]
end

describe "#namespace" do
Expand Down
21 changes: 11 additions & 10 deletions src/xml.cr
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ module XML
# See `ParserOptions.default` for default options.
def self.parse(string : String, options : ParserOptions = ParserOptions.default) : Node
raise XML::Error.new("Document is empty", 0) if string.empty?
from_ptr LibXML.xmlReadMemory(string, string.bytesize, nil, nil, options)
from_ptr { LibXML.xmlReadMemory(string, string.bytesize, nil, nil, options) }
end

# Parses an XML document from *io* with *options* into an `XML::Node`.
#
# See `ParserOptions.default` for default options.
def self.parse(io : IO, options : ParserOptions = ParserOptions.default) : Node
from_ptr LibXML.xmlReadIO(
from_ptr { LibXML.xmlReadIO(
->(ctx, buffer, len) {
LibC::Int.new(Box(IO).unbox(ctx).read Slice.new(buffer, len))
},
Expand All @@ -68,22 +68,22 @@ module XML
nil,
nil,
options,
)
) }
end

# Parses an HTML document from *string* with *options* into an `XML::Node`.
#
# See `HTMLParserOptions.default` for default options.
def self.parse_html(string : String, options : HTMLParserOptions = HTMLParserOptions.default) : Node
raise XML::Error.new("Document is empty", 0) if string.empty?
from_ptr LibXML.htmlReadMemory(string, string.bytesize, nil, nil, options)
from_ptr { LibXML.htmlReadMemory(string, string.bytesize, nil, nil, options) }
end

# Parses an HTML document from *io* with *options* into an `XML::Node`.
#
# See `HTMLParserOptions.default` for default options.
def self.parse_html(io : IO, options : HTMLParserOptions = HTMLParserOptions.default) : Node
from_ptr LibXML.htmlReadIO(
from_ptr { LibXML.htmlReadIO(
->(ctx, buffer, len) {
LibC::Int.new(Box(IO).unbox(ctx).read Slice.new(buffer, len))
},
Expand All @@ -92,15 +92,16 @@ module XML
nil,
nil,
options,
)
) }
end

protected def self.from_ptr(doc : LibXML::Doc*)
protected def self.from_ptr(& : -> LibXML::Doc*)
errors = [] of XML::Error
doc = XML::Error.collect(errors) { yield }

raise Error.new(LibXML.xmlGetLastError) unless doc

node = Node.new(doc)
XML::Error.set_errors(node)
node
Node.new(doc, errors)
end
end

Expand Down
62 changes: 38 additions & 24 deletions src/xml/error.cr
Original file line number Diff line number Diff line change
Expand Up @@ -11,35 +11,14 @@ class XML::Error < Exception
super(message)
end

# TODO: this logic isn't thread/fiber safe, but error checking is less needed than
# the ability to parse HTML5 and malformed documents. In any case, fix this.
@@errors = [] of self

LibXML.xmlSetStructuredErrorFunc nil, ->(ctx, error) {
@@errors << XML::Error.new(error)
}

LibXML.xmlSetGenericErrorFunc nil, ->(ctx, fmt) {
# TODO: use va_start and va_end to
message = String.new(fmt).chomp
error = XML::Error.new(message, 0)

{% if flag?(:arm) || flag?(:aarch64) %}
# libxml2 is likely missing ARM unwind tables (.ARM.extab and .ARM.exidx
# sections) which prevent raising from a libxml2 context.
@@errors << error
{% else %}
raise error
{% end %}
}

# :nodoc:
def self.set_errors(node)
if errors = self.errors
node.errors = errors
end
protected def self.add_errors(errors)
@@errors.concat(errors)
end

@[Deprecated("This class accessor is deprecated. XML errors are accessible directly in the respective context via `XML::Reader#errors` and `XML::Node#errors`.")]
def self.errors : Array(XML::Error)?
if @@errors.empty?
nil
Expand All @@ -49,4 +28,39 @@ class XML::Error < Exception
errors
end
end

def self.collect(errors, &)
LibXML.xmlSetStructuredErrorFunc Box.box(errors), ->(ctx, error) {
Box(Array(XML::Error)).unbox(ctx) << XML::Error.new(error)
}
begin
yield
ensure
LibXML.xmlSetStructuredErrorFunc nil, nil
end
end

def self.collect_generic(errors, &)
LibXML.xmlSetGenericErrorFunc Box.box(errors), ->(ctx, fmt) {
# TODO: use va_start and va_end to
message = String.new(fmt).chomp
error = XML::Error.new(message, 0)

{% if flag?(:arm) || flag?(:aarch64) %}
# libxml2 is likely missing ARM unwind tables (.ARM.extab and .ARM.exidx
# sections) which prevent raising from a libxml2 context.
Box(Array(XML::Error)).unbox(ctx) << error
{% else %}
raise error
{% end %}
}

begin
collect(errors) do
yield
end
ensure
LibXML.xmlSetGenericErrorFunc nil, nil
end
end
end
12 changes: 2 additions & 10 deletions src/xml/node.cr
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class XML::Node
end

# :ditto:
def initialize(node : LibXML::Doc*)
def initialize(node : LibXML::Doc*, @errors = nil)
initialize(node.as(LibXML::Node*))
end

Expand Down Expand Up @@ -576,17 +576,9 @@ class XML::Node
xpath(path, namespaces, variables).as(String)
end

# :nodoc:
def errors=(errors)
@node.value._private = errors.as(Void*)
end

# Returns the list of `XML::Error` found when parsing this document.
# Returns `nil` if no errors were found.
def errors : Array(XML::Error)?
ptr = @node.value._private
ptr ? (ptr.as(Array(XML::Error))) : nil
end
getter errors : Array(XML::Error)?

private def check_no_null_byte(string)
if string.includes? Char::ZERO
Expand Down
38 changes: 34 additions & 4 deletions src/xml/reader.cr
Original file line number Diff line number Diff line change
@@ -1,7 +1,31 @@
require "./libxml2"
require "./parser_options"

# `XML::Reader` is a parser for XML that iterates a XML document.
#
# ```
# require "xml"
#
# reader = XML::Reader.new(<<-XML)
# <message>Hello XML!</message>
# XML
# reader.read
# reader.name # => "message"
# reader.read
# reader.value # => "Hello XML!"
# ```
#
# This is an alternative approach to `XML.parse` which parses an entire document
# into an XML data structure.
# `XML::Reader` offers more control and does not need to store the XML document
# in memory entirely. The latter is especially useful for large documents with
# the `IO`-based constructor.
#
# WARNING: This type is not concurrency-safe.
class XML::Reader
# Returns the errors reported while parsing.
getter errors = [] of XML::Error

# Creates a new reader from a string.
#
# See `XML::ParserOptions.default` for default options.
Expand Down Expand Up @@ -30,7 +54,7 @@ class XML::Reader

# Moves the reader to the next node.
def read : Bool
LibXML.xmlTextReaderRead(@reader) == 1
collect_errors { LibXML.xmlTextReaderRead(@reader) == 1 }
end

# Moves the reader to the next node while skipping subtrees.
Expand All @@ -46,7 +70,7 @@ class XML::Reader
if result == -1
node = LibXML.xmlTextReaderCurrentNode(@reader)
if node.null?
LibXML.xmlTextReaderRead(@reader) == 1
collect_errors { LibXML.xmlTextReaderRead(@reader) == 1 }
elsif !node.value.next.null?
LibXML.xmlTextReaderNext(@reader) == 1
else
Expand Down Expand Up @@ -123,7 +147,7 @@ class XML::Reader

# Returns the node's XML content including subtrees.
def read_inner_xml : String
xml = LibXML.xmlTextReaderReadInnerXml(@reader)
xml = collect_errors { LibXML.xmlTextReaderReadInnerXml(@reader) }
xml ? String.new(xml) : ""
end

Expand All @@ -139,7 +163,7 @@ class XML::Reader
# to avoid doing an extra C call each time.
return "" if node_type.none?

xml = LibXML.xmlTextReaderReadOuterXml(@reader)
xml = collect_errors { LibXML.xmlTextReaderReadOuterXml(@reader) }
xml ? String.new(xml) : ""
end

Expand Down Expand Up @@ -170,4 +194,10 @@ class XML::Reader
def to_unsafe
@reader
end

private def collect_errors
Error.collect(@errors) { yield }.tap do
Error.add_errors(@errors)
end
end
end
4 changes: 3 additions & 1 deletion src/xml/xpath_context.cr
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
class XML::XPathContext
getter errors = [] of XML::Error

def initialize(node : Node)
@ctx = LibXML.xmlXPathNewContext(node.to_unsafe.value.doc)
@ctx.value.node = node.to_unsafe
end

def evaluate(search_path : String)
xpath = LibXML.xmlXPathEvalExpression(search_path, self)
xpath = XML::Error.collect_generic(@errors) { LibXML.xmlXPathEvalExpression(search_path, self) }
unless xpath
{% if flag?(:arm) || flag?(:aarch64) %}
if errors = XML::Error.errors
Expand Down

0 comments on commit 7a04fdd

Please sign in to comment.