From c13f9a5f0d666c9b122defb70475f2645722a537 Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike@csa.net>
Date: Tue, 18 Sep 2012 10:33:37 -0400
Subject: [PATCH] Optimization to Document#collect_namespaces suggested by
 #761.

Note that benchmarks indicate this is *much* faster:

    #! /usr/bin/env ruby

    require 'nokogiri'
    require 'benchmark'

    xhtml = File.read "test/files/tlm.html"

    n = 50
    Benchmark.bm(20) do |b|
      docs = (1..n).collect { |_| Nokogiri::XML xhtml }
      b.report("using xpath (x#{n})") do
        docs.each do |doc|
          doc.collect_namespaces
        end
      end

      docs = (1..n).collect { |_| Nokogiri::XML xhtml }
      b.report("using traverse (x#{n})") do
        ENV['SLOW'] = "1"
        docs.each do |doc|
          doc.collect_namespaces
        end
      end
    end

with the result:

                               user     system      total        real
    using xpath (x50)      0.590000   0.010000   0.600000 (  0.605675)
    using traverse (x50)   2.410000   0.010000   2.420000 (  2.431678)
---
 lib/nokogiri/xml/document.rb | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/lib/nokogiri/xml/document.rb b/lib/nokogiri/xml/document.rb
index a613f1553cb..e1fd4cecc18 100644
--- a/lib/nokogiri/xml/document.rb
+++ b/lib/nokogiri/xml/document.rb
@@ -149,13 +149,15 @@ def document
       # Non-prefixed default namespaces (as in "xmlns=") are not included
       # in the hash.
       #
-      # Note this is a very expensive operation in current implementation, as it
-      # traverses the entire graph, and also has to bring each node across the
-      # libxml bridge into a ruby object.
+      # Note that this method does an xpath lookup for nodes with
+      # namespaces, and as a result the order may be dependent on the
+      # implementation of the underlying XML library.
+      #
       def collect_namespaces
-        ns = {}
-        traverse { |j| ns.merge!(j.namespaces) }
-        ns
+        xpath("//namespace::*").inject({}) do |hash, ns|
+          hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
+          hash
+        end
       end
 
       # Get the list of decorators given +key+