Skip to content

Commit

Permalink
Add optional ability to parse XML namespaces
Browse files Browse the repository at this point in the history
This better supports XML structures with namespaces/prefixed elements but is opt in as it changes the structure of the JSON.  This will become the default in oq 2.x
  • Loading branch information
Blacksmoke16 committed Aug 1, 2021
1 parent 2bc47df commit da74b2d
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 19 deletions.
38 changes: 24 additions & 14 deletions src/converters/xml.cr
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ module OQ::Converters::XML
builder = ::JSON::Builder.new output
xml = ::XML::Reader.new input

xmlns = self.parse_deserialize_args args

# Set reader to first element
xml.read

Expand All @@ -32,26 +34,27 @@ module OQ::Converters::XML
xml.read
end

process_element_node xml.expand, builder
process_element_node xml.expand, builder, xmlns: xmlns
end
end
end

private def self.process_element_node(node : ::XML::Node, builder : ::JSON::Builder) : Nil
private def self.process_element_node(node : ::XML::Node, builder : ::JSON::Builder, *, xmlns : Bool) : Nil
# If the node doesn't have nested elements nor attributes nor a namespace; just emit a scalar value
if !has_nested_elements(node) && node.attributes.empty? && node.namespaces.empty?
# TODO: Make checking for namespaces the default behavior in oq 2.x
if (!has_nested_elements(node) && node.attributes.empty?) || (xmlns && node.namespace_definitions.empty?)
return builder.field node.name, get_node_value node
end

# Otherwise process the node as a key/value pair
builder.field self.normalize_node_name node do
builder.object do
process_children node, builder
process_children node, builder, xmlns: xmlns
end
end
end

private def self.process_array_node(name : String, children : Array(::XML::Node), builder : ::JSON::Builder) : Nil
private def self.process_array_node(name : String, children : Array(::XML::Node), builder : ::JSON::Builder, *, xmlns : Bool) : Nil
builder.field name do
builder.array do
children.each do |node|
Expand All @@ -61,24 +64,27 @@ module OQ::Converters::XML
else
# Otherwise process the node within an object
builder.object do
process_children node, builder
process_children node, builder, xmlns: xmlns
end
end
end
end
end
end

private def self.process_children(node : ::XML::Node, builder : ::JSON::Builder) : Nil
private def self.process_children(node : ::XML::Node, builder : ::JSON::Builder, *, xmlns : Bool) : Nil
# Process node attributes
node.attributes.each do |attr|
builder.field "@#{attr.name}", attr.content
end

# Include attributes for namespaces defined on this node
node.namespace_definitions.each do |ns|
key = ns.prefix ? "xmlns:#{ns.prefix}" : "xmlns"
builder.field "@#{key}", ns.href
# TODO: Make this the default behavior in oq 2.x
if xmlns
node.namespace_definitions.each do |ns|
key = ns.prefix ? "xmlns:#{ns.prefix}" : "xmlns"
builder.field "@#{key}", ns.href
end
end

# Determine how to process a node's children
Expand All @@ -95,14 +101,14 @@ module OQ::Converters::XML

# Array
if children.size > 1
process_array_node name, children, builder
process_array_node name, children, builder, xmlns: xmlns
else
if children.first.text?
# node content in attribute object
builder.field "#text", children.first.content
else
# Element
process_element_node children.first, builder
process_element_node children.first, builder, xmlns: xmlns
end
end
end
Expand All @@ -120,10 +126,14 @@ module OQ::Converters::XML
(namespace = node.namespace) && (prefix = namespace.prefix.presence) ? "#{prefix}:#{node.name}" : node.name
end

private def self.parse_deserialize_args(args : NamedTuple) : Bool
args["xmlns"]
end

def self.serialize(input : IO, output : IO, **args) : Nil
json = ::JSON::PullParser.new input
builder = ::XML::Builder.new output
indent, prolog, root, xml_item = self.parse_args(args)
indent, prolog, root, xml_item = self.parse_serialize_args args

builder.indent = indent

Expand All @@ -140,7 +150,7 @@ module OQ::Converters::XML
builder.flush unless prolog
end

private def self.parse_args(args : NamedTuple) : Tuple(String, Bool, String, String)
private def self.parse_serialize_args(args : NamedTuple) : Tuple(String, Bool, String, String)
{
args["indent"],
args["xml_prolog"],
Expand Down
9 changes: 6 additions & 3 deletions src/oq.cr
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ module OQ
# If a tab for each indentation level instead of spaces.
property tab : Bool

property? xmlns : Bool

# The args that'll be passed to `jq`.
@args : Array(String) = [] of String

Expand All @@ -94,7 +96,8 @@ module OQ
@xml_prolog : Bool = true,
@xml_item : String = "item",
@indent : Int32 = 2,
@tab : Bool = false
@tab : Bool = false,
@xmlns : Bool = false
)
end

Expand Down Expand Up @@ -141,7 +144,7 @@ module OQ
input_args.replace(input_args.map do |file_name|
File.tempfile ".#{File.basename file_name}" do |tmp_file|
File.open file_name do |file|
@input_format.converter.deserialize file, tmp_file
@input_format.converter.deserialize file, tmp_file, xmlns: @xmlns
end
end
.tap { |tf| @tmp_files << tf }
Expand All @@ -153,7 +156,7 @@ module OQ
end

spawn do
@input_format.converter.deserialize input, input_write
@input_format.converter.deserialize input, input_write, xmlns: @xmlns
input_write.close
channel.send true
rescue ex
Expand Down
5 changes: 3 additions & 2 deletions src/oq_cli.cr
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@ OptionParser.parse do |parser|
parser.on("-i FORMAT", "--input FORMAT", "Format of the input data. Supported formats: #{OQ::Format}") { |format| (f = OQ::Format.parse?(format)) ? processor.input_format = f : abort "Invalid input format: '#{format}'" }
parser.on("-o FORMAT", "--output FORMAT", "Format of the output data. Supported formats: #{OQ::Format}") { |format| (f = OQ::Format.parse?(format)) ? processor.output_format = f : abort "Invalid output format: '#{format}'" }
parser.on("--indent NUMBER", "Use the given number of spaces for indentation (JSON/XML only).") { |n| processor.indent = n.to_i; processor.add_arg "--indent"; processor.add_arg n }
parser.on("--xml-root ROOT", "Name of the root XML element if converting to XML.") { |r| processor.xml_root = r }
parser.on("--tab", "Use a tab for each indentation level instead of two spaces.") { processor.tab = true; processor.add_arg "--tab" }
parser.on("--no-prolog", "Whether the XML prolog should be emitted if converting to XML.") { processor.xml_prolog = false }
parser.on("--xml-item NAME", "The name for XML array elements without keys.") { |i| processor.xml_item = i }
parser.on("--tab", "Use a tab for each indentation level instead of two spaces.") { processor.tab = true; processor.add_arg "--tab" }
parser.on("--xmlns", "If XML namespaces should be parsed. NOTE: This will become the default in oq 2.x.") { processor.xmlns = true }
parser.on("--xml-root ROOT", "Name of the root XML element if converting to XML.") { |r| processor.xml_root = r }
parser.invalid_option { }
end

Expand Down

0 comments on commit da74b2d

Please sign in to comment.