Skip to content

Commit

Permalink
Fix memory management of xmlNs xpath results
Browse files Browse the repository at this point in the history
this commit introduces a bifurcation in how Namespace objects are
managed; Namespace objects wrapped around xmlNs structs from an xpath
query now have their own Ruby object lifecycle and are GCed
independently from their original document.

See comments in xml_node_set.c and xml_namespace.c for more details.

Related to #1155
  • Loading branch information
flavorjones committed Dec 17, 2015
1 parent e723e97 commit 409b0b2
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 39 deletions.
64 changes: 51 additions & 13 deletions ext/nokogiri/xml_namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,34 @@

VALUE cNokogiriXmlNamespace ;

static void dealloc_namespace(xmlNsPtr ns)
{
/*
*
* this deallocator is only used for namespace nodes that are part of an xpath
* node set.
*
* see Nokogiri_wrap_xml_namespace() for more details.
*
*/
NOKOGIRI_DEBUG_START(ns) ;
if (ns->href) {
xmlFree((xmlChar *)ns->href);
}
if (ns->prefix) {
xmlFree((xmlChar *)ns->prefix);
}
xmlFree(ns);
NOKOGIRI_DEBUG_END(ns) ;
}


int Nokogiri_namespace_eh(xmlNodePtr node)
{
return (node->type == XML_NAMESPACE_DECL);
}


/*
* call-seq:
* prefix
Expand Down Expand Up @@ -34,38 +62,48 @@ static VALUE href(VALUE self)
return NOKOGIRI_STR_NEW2(ns->href);
}

static int part_of_an_xpath_node_set_eh(xmlNsPtr node)
{
return (node->next && ! Nokogiri_namespace_eh(node->next));
}

VALUE Nokogiri_wrap_xml_namespace(xmlDocPtr doc, xmlNsPtr node)
{
VALUE ns, document, node_cache;

assert(doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE);

if (node->_private) return (VALUE)node->_private;

if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;

ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, node);

if (doc->_private) {
if (DOC_RUBY_OBJECT_TEST(doc)) {
document = DOC_RUBY_OBJECT(doc);

node_cache = rb_iv_get(document, "@node_cache");
rb_ary_push(node_cache, ns);

rb_iv_set(ns, "@document", DOC_RUBY_OBJECT(doc));
if (part_of_an_xpath_node_set_eh(node)) {
/*
* this is a duplicate returned as part of an xpath query node set, and so
* we need to make sure we manage this memory.
*
* see comments in xml_node_set.c for more details.
*/
ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, dealloc_namespace, node);
} else {
ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, node);
node_cache = rb_iv_get(document, "@node_cache");
rb_ary_push(node_cache, ns);
}

rb_iv_set(ns, "@document", document);
}

node->_private = (void *)ns;

return ns;
}

VALUE Nokogiri_wrap_xml_namespace2(VALUE document, xmlNsPtr node)
{
xmlDocPtr doc;
Data_Get_Struct(document, xmlDoc, doc) ;
return Nokogiri_wrap_xml_namespace(doc, node);
}


void init_xml_namespace()
{
VALUE nokogiri = rb_define_module("Nokogiri");
Expand Down
96 changes: 70 additions & 26 deletions ext/nokogiri/xml_node_set.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <xml_node_set.h>
#include <xml_namespace.h>
#include <libxml/xpathInternals.h>

static ID decorate ;
Expand All @@ -17,20 +18,19 @@ static void Check_Node_Set_Node_Type(VALUE node)
static void deallocate(xmlNodeSetPtr node_set)
{
/*
* xmlXPathFreeNodeSet() contains an implicit assumption that it is being
* called before any of its pointed-to nodes have been free()d. this
* assumption lies in the operation where it dereferences nodeTab pointers
* while searching for namespace nodes to free.
*
* however, since Ruby's GC mechanism cannot guarantee the strict order in
* which ruby objects will be GC'd, nodes may be garbage collected before a
* nodeset containing pointers to those nodes. (this is true regardless of
* how we declare dependencies between objects with rb_gc_mark().)
* since xpath queries return copies of the xmlNs structs,
* xmlXPathFreeNodeSet() frees those xmlNs structs that are in the
* NodeSet.
*
* as a result, xmlXPathFreeNodeSet() will perform unsafe memory operations,
* and calling it would be evil.
* this is bad if someone is still trying to use the Namespace object wrapped
* around the xmlNs, so we need to avoid that.
*
* "In Valgrind We Trust." seriously.
* here we reproduce xmlXPathFreeNodeSet() without the xmlNs logic.
*
* this doesn't cause a leak because Namespace objects that are in an XPath
* query NodeSet are given their own lifecycle in
* Nokogiri_wrap_xml_namespace().
*/
NOKOGIRI_DEBUG_START(node_set) ;
if (node_set->nodeTab != NULL)
Expand Down Expand Up @@ -112,7 +112,6 @@ delete(VALUE self, VALUE rb_node)
{
xmlNodeSetPtr node_set;
xmlNodePtr node;
int j ;

Check_Node_Set_Node_Type(rb_node);

Expand Down Expand Up @@ -227,15 +226,13 @@ static VALUE index_at(VALUE self, long offset)

Data_Get_Struct(self, xmlNodeSet, node_set);

if (offset >= node_set->nodeNr || abs((int)offset) > node_set->nodeNr)
if (offset >= node_set->nodeNr || abs((int)offset) > node_set->nodeNr) {
return Qnil;
}

if (offset < 0)
offset += node_set->nodeNr;
if (offset < 0) { offset += node_set->nodeNr ; }

if (XML_NAMESPACE_DECL == node_set->nodeTab[offset]->type)
return Nokogiri_wrap_xml_namespace2(rb_iv_get(self, "@document"), (xmlNsPtr)(node_set->nodeTab[offset]));
return Nokogiri_wrap_xml_node(Qnil, node_set->nodeTab[offset]);
return Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[offset], self);
}

static VALUE subseq(VALUE self, long beg, long len)
Expand Down Expand Up @@ -300,7 +297,7 @@ static VALUE slice(int argc, VALUE *argv, VALUE self)
if (FIXNUM_P(arg)) {
return index_at(self, FIX2LONG(arg));
}

/* if arg is Range */
switch (rb_range_beg_len(arg, &beg, &len, (long)node_set->nodeNr, 0)) {
case Qfalse:
Expand Down Expand Up @@ -330,17 +327,18 @@ static VALUE to_array(VALUE self, VALUE rb_node)

Data_Get_Struct(self, xmlNodeSet, node_set);

elts = calloc((size_t)node_set->nodeNr, sizeof(VALUE *));
elts = (VALUE *)calloc((size_t)(node_set->nodeNr), sizeof(VALUE));
for(i = 0; i < node_set->nodeNr; i++) {
if (XML_NAMESPACE_DECL == node_set->nodeTab[i]->type)
elts[i] = Nokogiri_wrap_xml_namespace2(rb_iv_get(self, "@document"), (xmlNsPtr)(node_set->nodeTab[i]));
else
elts[i] = Nokogiri_wrap_xml_node(Qnil, node_set->nodeTab[i]);
elts[i] = Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[i], self);
rb_gc_register_address(&elts[i]);
}

list = rb_ary_new4((long)node_set->nodeNr, elts);

/*free(elts); */
for(i = 0; i < node_set->nodeNr; i++) {
rb_gc_unregister_address(&elts[i]);
}
free(elts);

return list;
}
Expand All @@ -360,7 +358,7 @@ static VALUE unlink_nodeset(VALUE self)

nodeNr = node_set->nodeNr ;
for (j = 0 ; j < nodeNr ; j++) {
if (XML_NAMESPACE_DECL != node_set->nodeTab[j]->type) {
if (! Nokogiri_namespace_eh(node_set->nodeTab[j])) {
VALUE node ;
xmlNodePtr node_ptr;
node = Nokogiri_wrap_xml_node(Qnil, node_set->nodeTab[j]);
Expand All @@ -372,6 +370,39 @@ static VALUE unlink_nodeset(VALUE self)
return self ;
}


static void reify_node_set_namespaces(VALUE self)
{
/*
* as mentioned in deallocate() above, xmlNs structs returned in an XPath
* NodeSet are duplicates, and we don't clean them up at deallocate() time.
*
* as a result, we need to make sure the Ruby manages this memory. we do this
* by forcing the creation of a Ruby object wrapped around the xmlNs.
*
* we also have to make sure that the NodeSet has a reference to the
* Namespace object, otherwise GC will kick in and the Namespace won't be
* marked.
*
* we *could* do this safely with *all* the nodes in the NodeSet, but we only
* *need* to do it for xmlNs structs, and so you get the code we have here.
*/
int j ;
xmlNodeSetPtr node_set ;
VALUE namespace_cache ;

Data_Get_Struct(self, xmlNodeSet, node_set);

namespace_cache = rb_iv_get(self, "@namespace_cache");

for (j = 0 ; j < node_set->nodeNr ; j++) {
if (Nokogiri_namespace_eh(node_set->nodeTab[j])) {
rb_ary_push(namespace_cache, Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[j], self));
}
}
}


VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document)
{
VALUE new_set ;
Expand All @@ -393,6 +424,19 @@ VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document)
return new_set ;
}

VALUE Nokogiri_wrap_xml_node_set_node(xmlNodePtr node, VALUE node_set)
{
xmlDocPtr document ;

if (Nokogiri_namespace_eh(node)) {
Data_Get_Struct(rb_iv_get(node_set, "@document"), xmlDoc, document);
return Nokogiri_wrap_xml_namespace(document, (xmlNsPtr)node);
} else {
return Nokogiri_wrap_xml_node(Qnil, node);
}
}


static void xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
{
/*
Expand Down
3 changes: 3 additions & 0 deletions ext/nokogiri/xml_node_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@ void init_xml_node_set();

extern VALUE cNokogiriXmlNodeSet ;
VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document) ;
VALUE Nokogiri_wrap_xml_node_set_node(xmlNodePtr node, VALUE node_set) ;
VALUE Nokogiri_wrap_xml_node_set_namespace(xmlNsPtr node, VALUE node_set) ;
int Nokogiri_namespace_eh(xmlNodePtr node) ;

#endif

0 comments on commit 409b0b2

Please sign in to comment.