Skip to content

Commit

Permalink
using a nodelist to keep track of unlinked nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
tenderlove committed Mar 12, 2009
1 parent a3035d0 commit dafc963
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 50 deletions.
8 changes: 8 additions & 0 deletions ext/nokogiri/native.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ extern VALUE mNokogiriHtml ;
extern VALUE mNokogiriHtmlSax ;
extern VALUE mNokogiriXslt ;

#define NOKOGIRI_ROOT_NODE(_node) \
({ \
nokogiriDocPtr doc = (nokogiriDocPtr)_node->doc; \
xmlNodeSetPtr node_set = (xmlNodeSetPtr)doc->unlinkedNodes; \
xmlXPathNodeSetAdd(node_set, _node); \
_node; \
})

#ifdef DEBUG

#define NOKOGIRI_DEBUG_START(p) if (getenv("NOKOGIRI_NO_FREE")) return ; if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p start\n", __FILE__, __LINE__, p);
Expand Down
35 changes: 30 additions & 5 deletions ext/nokogiri/xml_document.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
#include <xml_document.h>

static void dealloc(xmlDocPtr doc)
static void dealloc(nokogiriDocPtr ndoc)
{
NOKOGIRI_DEBUG_START(doc);
doc->_private = NULL;
xmlFreeDoc(doc);

xmlNodeSetPtr node_set = ndoc->unlinkedNodes;

int i;
for(i = 0; i < node_set->nodeNr; i++) {
xmlAddChild((xmlNodePtr)ndoc, node_set->nodeTab[i]);
}

if (node_set->nodeTab != NULL)
xmlFree(node_set->nodeTab);
xmlFree(node_set);

((xmlDocPtr)ndoc)->_private = NULL;
xmlFreeDoc((xmlDocPtr)ndoc);
NOKOGIRI_DEBUG_END(doc);
}

Expand Down Expand Up @@ -268,9 +280,22 @@ VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
{
VALUE rb_doc = Qnil;

rb_doc = Data_Wrap_Struct(klass ? klass : cNokogiriXmlDocument, 0, dealloc, doc) ;
nokogiriDocPtr ndoc = (nokogiriDocPtr)malloc(sizeof(nokogiriDoc));
memcpy(ndoc, doc, sizeof(xmlDoc));
ndoc->unlinkedNodes = xmlXPathNodeSetCreate(NULL);

xmlSetTreeDoc((xmlNodePtr)ndoc, (xmlDocPtr)ndoc);
xmlSetTreeDoc((xmlNodePtr)doc, (xmlDocPtr)ndoc);
assert((xmlDocPtr)ndoc == ((xmlDocPtr)ndoc)->doc);

rb_doc = Data_Wrap_Struct(
klass ? klass : cNokogiriXmlDocument,
0,
dealloc,
ndoc
);
rb_iv_set(rb_doc, "@decorators", Qnil);
doc->_private = (void *)rb_doc;
((xmlDocPtr)ndoc)->_private = (void *)rb_doc;

return rb_doc ;
}
7 changes: 7 additions & 0 deletions ext/nokogiri/xml_document.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@

#include <native.h>

struct _nokogiriDoc {
xmlDoc doc;
xmlNodeSetPtr unlinkedNodes;
};
typedef struct _nokogiriDoc nokogiriDoc;
typedef nokogiriDoc * nokogiriDocPtr;

void init_xml_document();
VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc);

Expand Down
47 changes: 4 additions & 43 deletions ext/nokogiri/xml_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ static void debug_node_dealloc(xmlNodePtr x)
# define debug_node_dealloc 0
#endif


/* :nodoc: */
typedef xmlNodePtr (*node_other_func)(xmlNodePtr, xmlNodePtr);

Expand Down Expand Up @@ -39,7 +38,7 @@ static VALUE reparent_node_with(VALUE node_obj, VALUE other_obj, node_other_func
rb_raise(rb_eRuntimeError, "Could not reparent node (2)");
}
xmlUnlinkNode(node);
xmlFreeNode(node);
NOKOGIRI_ROOT_NODE(node);
}

// the child was a text node that was coalesced. we need to have the object
Expand Down Expand Up @@ -148,49 +147,11 @@ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
*/
static VALUE unlink_node(VALUE self)
{
/*
* a few words of explanation are probably needed here.
*
* because a node that was created in conjunction with its parent document
* (as opposed to being created bare and inserted into an existing document)
* will always contain references to the document (e.g., in the form of
* strings that were allocated from the document's dictionary), it's
* dangerous for us to unlink a node from the parent document without
* explicitly and immediately freeing the node.
*
* the danger is that the node might be GC'ed after the document has been
* GC'ed, which will cause illegal memory access at best, and segfault at
* worst.
*
* so, we take the strategy you see here, which is:
* - unlink the node
* - dup the node
* - free the original node
* - return the dup'd node, which no longer contains references to the
* original node's document
*
* carry on.
*/
xmlNodePtr node, dup;
xmlNodePtr node;
Data_Get_Struct(self, xmlNode, node);

xmlUnlinkNode(node);
dup = xmlCopyNode(node, 1);

/* We need to remove our node from the document cache. Because pointers
* can (and do) get reused after being freed. */
VALUE node_cache = rb_funcall(
rb_iv_get(self, "@document"),
rb_intern("node_cache"),
0
);
rb_hash_delete(node_cache, INT2NUM((int)node));

xmlFreeNode(node);

DATA_PTR(self) = dup ;
rb_iv_set(self, "@document", Qnil);
return Nokogiri_wrap_xml_node(dup);
NOKOGIRI_ROOT_NODE(node);
return self;
}

/*
Expand Down
1 change: 1 addition & 0 deletions test/hpricot/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def test_scan_boingboing

def test_reparent
doc = Nokogiri.Hpricot(%{<div id="blurb_1"></div>})
assert doc.html?
div1 = doc.search('#blurb_1')
div1.before('<div id="blurb_0"></div>')

Expand Down
2 changes: 1 addition & 1 deletion test/xml/test_node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def test_unlink
assert node.next_sibling
node.unlink
assert !node.parent
assert !node.document
#assert !node.document
assert !node.previous_sibling
assert !node.next_sibling
assert_no_match(/Hello world/, xml.to_s)
Expand Down
2 changes: 1 addition & 1 deletion test/xml/test_node_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_unlink
set.unlink
set.each do |node|
assert !node.parent
assert !node.document
#assert !node.document
assert !node.previous_sibling
assert !node.next_sibling
end
Expand Down

0 comments on commit dafc963

Please sign in to comment.