Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C14n fix and new parameters #563

Merged
merged 4 commits into from
Nov 17, 2011
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 42 additions & 10 deletions ext/nokogiri/xml_document.c
Original file line number Diff line number Diff line change
Expand Up @@ -428,9 +428,14 @@ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
VALUE parent;
VALUE ret;

block = (VALUE)ctx;
node = Nokogiri_wrap_xml_node(Qnil, _node);
if(_node->type == XML_NAMESPACE_DECL){
node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
}
else{
node = Nokogiri_wrap_xml_node(Qnil, _node);
}
parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
block = (VALUE)ctx;

ret = rb_funcall(block, rb_intern("call"), 2, node, parent);

Expand All @@ -440,16 +445,25 @@ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
}

/* call-seq:
* doc.canonicalize
* doc.canonicalize { |node, parent| ... }
* doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
* doc.canonicalize { |obj, parent| ... }
*
* Canonicalize a document and return the results. Takes an optional block
* that takes two parameters the +node+ and that node's +parent+. The block
* must return a non-nil, non-false value if the +node+ passed in should be
* included in the canonicalized document.
* that takes two parameters: the +obj+ and that node's +parent+.
* The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
* The block must return a non-nil, non-false value if the +obj+ passed in
* should be included in the canonicalized document.
*/
static VALUE canonicalize(VALUE self)
static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
{
VALUE mode;
VALUE incl_ns;
VALUE with_comments;
xmlChar **ns;
long ns_len, i;

rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);

xmlDocPtr doc;
xmlOutputBufferPtr buf;
xmlC14NIsVisibleCallback cb = NULL;
Expand All @@ -473,7 +487,25 @@ static VALUE canonicalize(VALUE self)
ctx = (void *)rb_block_proc();
}

xmlC14NExecute(doc, cb, ctx, 0, NULL, 0, buf);
if(NIL_P(incl_ns)){
ns = NULL;
}
else{
ns_len = RARRAY_LEN(incl_ns);
ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
for (i = 0 ; i < ns_len ; i++) {
VALUE entry = rb_ary_entry(incl_ns, i);
const char * ptr = StringValuePtr(entry);
ns[i] = (xmlChar*) ptr;
}
}


xmlC14NExecute(doc, cb, ctx,
(int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
ns,
(int) (NIL_P(with_comments) ? 0 : 1),
buf);

xmlOutputBufferClose(buf);

Expand Down Expand Up @@ -503,7 +535,7 @@ void init_xml_document()
rb_define_method(klass, "encoding", encoding, 0);
rb_define_method(klass, "encoding=", set_encoding, 1);
rb_define_method(klass, "version", version, 0);
rb_define_method(klass, "canonicalize", canonicalize, 0);
rb_define_method(klass, "canonicalize", canonicalize, -1);
rb_define_method(klass, "dup", duplicate_node, -1);
rb_define_method(klass, "url", url, 0);
rb_define_method(klass, "create_entity", create_entity, -1);
Expand Down
6 changes: 6 additions & 0 deletions lib/nokogiri/xml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ def XML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_X
end

module XML
# Original C14N 1.0 spec canonicalization
XML_C14N_1_0 = 0
# Exclusive C14N 1.0 spec canonicalization
XML_C14N_EXCLUSIVE_1_0 = 1
# C14N 1.1 spec canonicalization
XML_C14N_1_1 = 2
class << self
###
# Parse an XML document using the Nokogiri::XML::Reader API. See
Expand Down
8 changes: 8 additions & 0 deletions lib/nokogiri/xml/node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,14 @@ def do_xinclude options = XML::ParseOptions::DEFAULT_XML, &block
process_xincludes(options.to_i)
end

def canonicalize(mode=XML::XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
c14n_root = self
document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
tn = node.is_a?(XML::Node) ? node : parent
tn == c14n_root || tn.ancestors.include?(c14n_root)
end
end

private

def extract_params params # :nodoc:
Expand Down
57 changes: 57 additions & 0 deletions test/xml/test_c14n.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ def test_3_1
c14n = doc.canonicalize
assert_no_match(/version=/, c14n)
assert_match(/Hello, world/, c14n)
assert_no_match(/Comment/, c14n)
c14n = doc.canonicalize(nil, nil, true)
assert_match(/Comment/, c14n)
end

def test_exclude_block_params
Expand Down Expand Up @@ -81,6 +84,60 @@ def test_exclude_block_object
end
assert_equal xml, c14n
end

def test_c14n_node
xml = '<a><b><c></c></b></a>'
doc = Nokogiri.XML xml
c14n = doc.at_xpath('//b').canonicalize
assert_equal '<b><c></c></b>', c14n
end

def test_c14_modes
# http://www.w3.org/TR/xml-exc-c14n/#sec-Enveloping

doc1 = Nokogiri.XML <<-eoxml
<n0:local xmlns:n0="http://foobar.org" xmlns:n3="ftp://example.org">
<n1:elem2 xmlns:n1="http://example.net" xml:lang="en">
<n3:stuff xmlns:n3="ftp://example.org"/>
</n1:elem2>
</n0:local>
eoxml
doc2 = Nokogiri.XML <<-eoxml
<n2:pdu xmlns:n1="http://example.com"
xmlns:n2="http://foo.example"
xml:lang="fr"
xml:space="retain">
<n1:elem2 xmlns:n1="http://example.net" xml:lang="en">
<n3:stuff xmlns:n3="ftp://example.org"/>
</n1:elem2>
</n2:pdu>
eoxml

c14n = doc1.at_xpath('//n1:elem2', {'n1' => 'http://example.net'}).canonicalize
assert_equal '<n1:elem2 xmlns:n0="http://foobar.org" xmlns:n1="http://example.net" xmlns:n3="ftp://example.org" xml:lang="en">
<n3:stuff></n3:stuff>
</n1:elem2>', c14n
c14n = doc2.at_xpath('//n1:elem2', {'n1' => 'http://example.net'}).canonicalize
assert_equal '<n1:elem2 xmlns:n1="http://example.net" xmlns:n2="http://foo.example" xml:lang="en" xml:space="retain">
<n3:stuff xmlns:n3="ftp://example.org"></n3:stuff>
</n1:elem2>', c14n

excl_c14n = '<n1:elem2 xmlns:n1="http://example.net" xml:lang="en">
<n3:stuff xmlns:n3="ftp://example.org"></n3:stuff>
</n1:elem2>'
c14n = doc1.at_xpath('//n1:elem2', {'n1' => 'http://example.net'}).canonicalize(XML::XML_C14N_EXCLUSIVE_1_0)
assert_equal excl_c14n, c14n
c14n = doc2.at_xpath('//n1:elem2', {'n1' => 'http://example.net'}).canonicalize(XML::XML_C14N_EXCLUSIVE_1_0)
assert_equal excl_c14n, c14n

c14n = doc2.at_xpath('//n1:elem2', {'n1' => 'http://example.net'}).canonicalize(XML::XML_C14N_EXCLUSIVE_1_0, ['n2'])
assert_equal '<n1:elem2 xmlns:n1="http://example.net" xmlns:n2="http://foo.example" xml:lang="en">
<n3:stuff xmlns:n3="ftp://example.org"></n3:stuff>
</n1:elem2>', c14n

end


end
end
end