From c13f9a5f0d666c9b122defb70475f2645722a537 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 18 Sep 2012 10:33:37 -0400 Subject: [PATCH] Optimization to Document#collect_namespaces suggested by #761. Note that benchmarks indicate this is *much* faster: #! /usr/bin/env ruby require 'nokogiri' require 'benchmark' xhtml = File.read "test/files/tlm.html" n = 50 Benchmark.bm(20) do |b| docs = (1..n).collect { |_| Nokogiri::XML xhtml } b.report("using xpath (x#{n})") do docs.each do |doc| doc.collect_namespaces end end docs = (1..n).collect { |_| Nokogiri::XML xhtml } b.report("using traverse (x#{n})") do ENV['SLOW'] = "1" docs.each do |doc| doc.collect_namespaces end end end with the result: user system total real using xpath (x50) 0.590000 0.010000 0.600000 ( 0.605675) using traverse (x50) 2.410000 0.010000 2.420000 ( 2.431678) --- lib/nokogiri/xml/document.rb | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/nokogiri/xml/document.rb b/lib/nokogiri/xml/document.rb index a613f1553cb..e1fd4cecc18 100644 --- a/lib/nokogiri/xml/document.rb +++ b/lib/nokogiri/xml/document.rb @@ -149,13 +149,15 @@ def document # Non-prefixed default namespaces (as in "xmlns=") are not included # in the hash. # - # Note this is a very expensive operation in current implementation, as it - # traverses the entire graph, and also has to bring each node across the - # libxml bridge into a ruby object. + # Note that this method does an xpath lookup for nodes with + # namespaces, and as a result the order may be dependent on the + # implementation of the underlying XML library. + # def collect_namespaces - ns = {} - traverse { |j| ns.merge!(j.namespaces) } - ns + xpath("//namespace::*").inject({}) do |hash, ns| + hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml" + hash + end end # Get the list of decorators given +key+