From f9237fdfd61c9751a94b35b68e95ae17bd1c019b Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Sun, 10 Sep 2017 19:12:03 -0300 Subject: [PATCH] Introduce Crystal::Hasher for computing a `hash` value --- spec/std/big/big_int_spec.cr | 9 +++-- spec/std/bool_spec.cr | 3 +- spec/std/enum_spec.cr | 2 +- spec/std/float_spec.cr | 4 +- spec/std/hash_spec.cr | 9 ++--- spec/std/named_tuple_spec.cr | 2 +- spec/std/proc_spec.cr | 4 +- spec/std/struct_spec.cr | 5 ++- spec/std/time/span_spec.cr | 4 +- src/big/big_float.cr | 5 +-- src/big/big_int.cr | 5 +-- src/big/big_rational.cr | 5 +-- src/bool.cr | 6 +-- src/char.cr | 6 +-- src/class.cr | 5 ++- src/compiler/crystal/syntax/ast.cr | 12 ++---- src/crystal/hasher.cr | 59 ++++++++++++++++++++++++++++++ src/enum.cr | 6 +-- src/float.cr | 13 +++---- src/hash.cr | 26 +++++++------ src/http/headers.cr | 8 ++-- src/indexable.cr | 12 +++--- src/int.cr | 5 ++- src/json/any.cr | 6 +-- src/named_tuple.cr | 13 +++---- src/nil.cr | 6 +-- src/object.cr | 41 ++++++++++++++------- src/proc.cr | 5 ++- src/reference.cr | 6 +-- src/set.cr | 5 +-- src/string.cr | 12 ++---- src/struct.cr | 11 ++---- src/symbol.cr | 8 ++-- src/time.cr | 5 +-- src/tuple.cr | 11 ++---- src/xml/namespace.cr | 5 +-- src/xml/node.cr | 6 +-- src/xml/node_set.cr | 5 +-- src/yaml/any.cr | 6 +-- 39 files changed, 204 insertions(+), 162 deletions(-) create mode 100644 src/crystal/hasher.cr diff --git a/spec/std/big/big_int_spec.cr b/spec/std/big/big_int_spec.cr index daab8b6ee6f8..1ca6af323a27 100644 --- a/spec/std/big/big_int_spec.cr +++ b/spec/std/big/big_int_spec.cr @@ -324,9 +324,12 @@ describe "BigInt" do end it "#hash" do - hash = 5.to_big_i.hash - hash.should eq(5) - typeof(hash).should eq(UInt64) + b1 = 5.to_big_i + b2 = 5.to_big_i + b3 = 6.to_big_i + + b1.hash.should eq(b2.hash) + b1.hash.should_not eq(b3.hash) end it "clones" do diff --git a/spec/std/bool_spec.cr b/spec/std/bool_spec.cr index 960ed8653f33..c4e03854cf6a 100644 --- a/spec/std/bool_spec.cr +++ b/spec/std/bool_spec.cr @@ -28,8 +28,7 @@ describe "Bool" do end describe "hash" do - it { true.hash.should eq(1) } - it { false.hash.should eq(0) } + it { true.hash.should_not eq(false.hash) } end describe "to_s" do diff --git a/spec/std/enum_spec.cr b/spec/std/enum_spec.cr index ff55cdd6e014..91a16f507488 100644 --- a/spec/std/enum_spec.cr +++ b/spec/std/enum_spec.cr @@ -142,7 +142,7 @@ describe Enum do end it "has hash" do - SpecEnum::Two.hash.should eq(1.hash) + SpecEnum::Two.hash.should_not eq(SpecEnum::Three.hash) end it "parses" do diff --git a/spec/std/float_spec.cr b/spec/std/float_spec.cr index 2039e96e55bf..418e0d7785f9 100644 --- a/spec/std/float_spec.cr +++ b/spec/std/float_spec.cr @@ -206,11 +206,11 @@ describe "Float" do describe "hash" do it "does for Float32" do - 1.2_f32.hash.should_not eq(0) + 1.2_f32.hash.should eq(1.2_f32.hash) end it "does for Float64" do - 1.2.hash.should_not eq(0) + 1.2.hash.should eq(1.2.hash) end end diff --git a/spec/std/hash_spec.cr b/spec/std/hash_spec.cr index 4097a4aefecb..79157dcf301f 100644 --- a/spec/std/hash_spec.cr +++ b/spec/std/hash_spec.cr @@ -146,7 +146,7 @@ describe "Hash" do end it "works with mixed types" do - {1 => :a, "a" => 1, 1.0 => "a", :a => 1.0}.values_at(1, "a", 1.0, :a).should eq({:a, 1, "a", 1.0}) + {1 => :a, "a" => 1, 2.0 => "a", :a => 1.0}.values_at(1, "a", 2.0, :a).should eq({:a, 1, "a", 1.0}) end end @@ -542,14 +542,13 @@ describe "Hash" do end it "computes hash" do - h = { {1 => 2} => {3 => 4} } - h.hash.should_not eq(h.object_id) - + h1 = { {1 => 2} => {3 => 4} } h2 = { {1 => 2} => {3 => 4} } - h.hash.should eq(h2.hash) + h1.hash.should eq(h2.hash) h3 = {1 => 2, 3 => 4} h4 = {3 => 4, 1 => 2} + h3.hash.should eq(h4.hash) end diff --git a/spec/std/named_tuple_spec.cr b/spec/std/named_tuple_spec.cr index 60ae83ac51d4..2a243c32001f 100644 --- a/spec/std/named_tuple_spec.cr +++ b/spec/std/named_tuple_spec.cr @@ -135,7 +135,7 @@ describe "NamedTuple" do it "computes a hash value" do tup1 = {a: 1, b: 'a'} - tup1.hash.should_not eq(0) + tup1.hash.should eq(tup1.dup.hash) tup2 = {b: 'a', a: 1} tup2.hash.should eq(tup1.hash) diff --git a/spec/std/proc_spec.cr b/spec/std/proc_spec.cr index 8d0fef08bfce..0fd2ea55c9d3 100644 --- a/spec/std/proc_spec.cr +++ b/spec/std/proc_spec.cr @@ -91,7 +91,5 @@ describe "Proc" do f2.call('r').should eq(2) end - it "#hash" do - ->{ 1 }.hash.should_not eq(0) - end + typeof(->{ 1 }.hash) end diff --git a/spec/std/struct_spec.cr b/spec/std/struct_spec.cr index f266068d92e6..6dd0187a79c4 100644 --- a/spec/std/struct_spec.cr +++ b/spec/std/struct_spec.cr @@ -42,11 +42,12 @@ describe "Struct" do it "does hash" do s = StructSpec::TestClass.new(1, "hello") - s.hash.should eq(31 + "hello".hash) + s.hash.should eq(s.dup.hash) end it "does hash for struct wrapper (#1940)" do - StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(0) + s = StructSpec::BigIntWrapper.new(BigInt.new(0)) + s.hash.should eq(s.dup.hash) end it "does dup" do diff --git a/spec/std/time/span_spec.cr b/spec/std/time/span_spec.cr index 472472fa664f..d004cd86e5f4 100644 --- a/spec/std/time/span_spec.cr +++ b/spec/std/time/span_spec.cr @@ -176,7 +176,9 @@ describe Time::Span do end it "test hash code" do - Time::Span.new(77).hash.should eq(77) + t1 = Time::Span.new(77) + t2 = Time::Span.new(77) + t1.hash.should eq(t2.hash) end it "test subtract" do diff --git a/src/big/big_float.cr b/src/big/big_float.cr index f5f5ed13d5c0..b7985559911b 100644 --- a/src/big/big_float.cr +++ b/src/big/big_float.cr @@ -76,9 +76,8 @@ struct BigFloat < Float new(mpf) end - def hash - to_f64.hash - end + # TODO: improve this + def_hash to_f64 def self.default_precision LibGMP.mpf_get_default_prec diff --git a/src/big/big_int.cr b/src/big/big_int.cr index 916e6f26b6a0..c03cb8c9c23c 100644 --- a/src/big/big_int.cr +++ b/src/big/big_int.cr @@ -343,9 +343,8 @@ struct BigInt < Int io << "_big_i" end - def hash - to_u64 - end + # TODO: improve this + def_hash to_u64 # Returns a string representation of self. # diff --git a/src/big/big_rational.cr b/src/big/big_rational.cr index 62923d881604..4577723d39d1 100644 --- a/src/big/big_rational.cr +++ b/src/big/big_rational.cr @@ -161,9 +161,8 @@ struct BigRational < Number BigRational.new { |mpq| LibGMP.mpq_abs(mpq, self) } end - def hash - to_f64.hash - end + # TODO: improve this + def_hash to_f64 # Returns the `Float64` representing this rational. def to_f diff --git a/src/bool.cr b/src/bool.cr index 5e7f5f81ae56..550ff41915a8 100644 --- a/src/bool.cr +++ b/src/bool.cr @@ -41,9 +41,9 @@ struct Bool self != other end - # Returns a hash value for this boolean: 0 for `false`, 1 for `true`. - def hash - self ? 1 : 0 + # See `Object#hash(hasher)` + def hash(hasher) + hasher.bool(self) end # Returns `"true"` for `true` and `"false"` for `false`. diff --git a/src/char.cr b/src/char.cr index 0fd31d1c97c2..fce0d5965dc9 100644 --- a/src/char.cr +++ b/src/char.cr @@ -414,9 +414,9 @@ struct Char Unicode.upcase(self, options) { |char| yield char } end - # Returns this char's codepoint. - def hash - ord + # See `Object#hash(hasher)` + def hash(hasher) + hasher.char(self) end # Returns a Char that is one codepoint bigger than this char's codepoint. diff --git a/src/class.cr b/src/class.cr index cfe17b1e900c..980be98abaa5 100644 --- a/src/class.cr +++ b/src/class.cr @@ -3,8 +3,9 @@ class Class to_s(io) end - def hash - crystal_type_id + # See `Object#hash(hasher)` + def hash(hasher) + hasher.class(self) end def ==(other : Class) diff --git a/src/compiler/crystal/syntax/ast.cr b/src/compiler/crystal/syntax/ast.cr index 76a5fce49342..0cbb737cc5a7 100644 --- a/src/compiler/crystal/syntax/ast.cr +++ b/src/compiler/crystal/syntax/ast.cr @@ -1175,9 +1175,7 @@ module Crystal self end - def hash - 0 - end + def_hash end # A qualified identifier. @@ -1545,9 +1543,7 @@ module Crystal Self.new end - def hash - 0 - end + def_hash end abstract class ControlExpression < ASTNode @@ -2025,9 +2021,7 @@ module Crystal Underscore.new end - def hash - 0 - end + def_hash end class Splat < UnaryExpression diff --git a/src/crystal/hasher.cr b/src/crystal/hasher.cr new file mode 100644 index 000000000000..46b93598c904 --- /dev/null +++ b/src/crystal/hasher.cr @@ -0,0 +1,59 @@ +require "crystal/system/random" + +# :nodoc: +struct Crystal::Hasher + # Implementation of a Hasher to compute a fast and safe hash + # value for primitive and basic Crystal objects. All other + # hashes are computed based on these. + # + # TODO: the implementation is naive and should probably use + # another algorithm like SipHash or FNV. + + @@seed = uninitialized UInt64 + Crystal::System::Random.random_bytes(Slice.new(pointerof(@@seed).as(UInt8*), 8)) + + property result : UInt64 = @@seed + + def nil + self + end + + def bool(value) + (value ? 1 : 0).hash(self) + end + + def int(value) + @result = @result * 31 + value.to_u64 + self + end + + def float(value) + @result *= @result * 31 + value.to_f64.unsafe_as(UInt64) + self + end + + def char(value) + value.ord.hash(self) + end + + def enum(value) + value.value.hash(self) + end + + def symbol(value) + value.to_i.hash(self) + end + + def reference(value) + @result = @result * 31 + value.object_id.to_u64 + self + end + + def string(value) + value.to_slice.hash(self) + end + + def class(value) + value.crystal_type_id.hash(self) + end +end diff --git a/src/enum.cr b/src/enum.cr index e9758ba05d13..937b9a8b69a5 100644 --- a/src/enum.cr +++ b/src/enum.cr @@ -274,9 +274,9 @@ struct Enum value == other.value end - # Returns a hash value. This is the hash of the underlying value. - def hash - value.hash + # See `Object#hash(hasher)` + def hash(hasher) + hasher.enum(self) end # Iterates each values in a Flags Enum. diff --git a/src/float.cr b/src/float.cr index 6bd4dbc1644f..1f520d5f2c59 100644 --- a/src/float.cr +++ b/src/float.cr @@ -86,6 +86,11 @@ struct Float end end + # See `Object#hash(hasher)` + def hash(hasher) + hasher.float(self) + end + # Writes this float to the given *io* in the given *format*. # See also: `IO#write_bytes`. def to_io(io : IO, format : IO::ByteFormat) @@ -153,10 +158,6 @@ struct Float32 io << "_f32" end - def hash - unsafe_as(Int32) - end - def clone self end @@ -211,10 +212,6 @@ struct Float64 Printer.print(self, io) end - def hash - unsafe_as(Int64) - end - def clone self end diff --git a/src/hash.cr b/src/hash.cr index 6d7fe7567b52..b606c60feafb 100644 --- a/src/hash.cr +++ b/src/hash.cr @@ -1,3 +1,5 @@ +require "crystal/hasher" + # A `Hash` represents a mapping of keys to values. # # See the [official docs](http://crystal-lang.org/docs/syntax_and_semantics/literals/hash.html) for the basics. @@ -706,18 +708,20 @@ class Hash(K, V) true end - # See also: `Object#hash`. - # - # ``` - # foo = {"foo" => "bar"} - # foo.hash # => 3247054 - # ``` - def hash - hash = size + # See `Object#hash(hasher)` + def hash(hasher) + # The hash value must be the same regardless of the + # order of the keys. + result = hasher.result + each do |key, value| - hash += key.hash ^ value.hash + copy = hasher + copy = key.hash(copy) + copy = value.hash(copy) + result += copy.result end - hash + + result.hash(hasher) end # Duplicates a `Hash`. @@ -864,7 +868,7 @@ class Hash(K, V) end private def bucket_index(key) - key.hash.to_u32.remainder(@buckets_size).to_i + key.hash.remainder(@buckets_size).to_i end private def calculate_new_size(size) diff --git a/src/http/headers.cr b/src/http/headers.cr index e47ff17765b0..6153d6db8e03 100644 --- a/src/http/headers.cr +++ b/src/http/headers.cr @@ -9,13 +9,11 @@ struct HTTP::Headers record Key, name : String do forward_missing_to @name - def hash - h = 0 + def hash(hasher) name.each_byte do |c| - c = normalize_byte(c) - h = 31 * h + c + hasher = normalize_byte(c).hash(hasher) end - h + hasher end def ==(key2) diff --git a/src/indexable.cr b/src/indexable.cr index fa78b16f7b21..3c29593dff6d 100644 --- a/src/indexable.cr +++ b/src/indexable.cr @@ -271,13 +271,13 @@ module Indexable(T) first { nil } end - # Returns a hash code based on `self`'s size and elements. - # - # See also: `Object#hash`. - def hash - reduce(31 * size) do |memo, elem| - 31 * memo + elem.hash + # See `Object#hash(hasher)` + def hash(hasher) + hasher = size.hash(hasher) + each do |elem| + hasher = elem.hash(hasher) end + hasher end # Returns the index of the first appearance of *value* in `self` diff --git a/src/int.cr b/src/int.cr index 0fbcf49d72b5..22ce0dadeb92 100644 --- a/src/int.cr +++ b/src/int.cr @@ -316,8 +316,9 @@ struct Int !even? end - def hash - self + # See `Object#hash(hasher)` + def hash(hasher) + hasher.int(self) end def succ diff --git a/src/json/any.cr b/src/json/any.cr index fecfa508bd6d..f326c72a52b8 100644 --- a/src/json/any.cr +++ b/src/json/any.cr @@ -261,10 +261,8 @@ struct JSON::Any raw == other end - # :nodoc: - def hash - raw.hash - end + # See `Object#hash(hasher)` + def_hash raw # :nodoc: def to_json(json : JSON::Builder) diff --git a/src/named_tuple.cr b/src/named_tuple.cr index cd7468b0a418..b1c9d0d436ce 100644 --- a/src/named_tuple.cr +++ b/src/named_tuple.cr @@ -159,16 +159,13 @@ struct NamedTuple yield end - # Returns a hash value based on this name tuple's size, keys and values. - # - # See also: `Object#hash`. - def hash - hash = 31 * size + # See `Object#hash(hasher)` + def hash(hasher) {% for key in T.keys.sort %} - hash = 31 * hash + {{key.symbolize}}.hash - hash = 31 * hash + self[{{key.symbolize}}].hash + hasher = {{key.symbolize}}.hash(hasher) + hasher = self[{{key.symbolize}}].hash(hasher) {% end %} - hash + hasher end # Same as `to_s`. diff --git a/src/nil.cr b/src/nil.cr index 644fcc98cb71..d4d279a9dac3 100644 --- a/src/nil.cr +++ b/src/nil.cr @@ -67,9 +67,9 @@ struct Nil false end - # Returns `0`. - def hash - 0 + # See `Object#hash(hasher)` + def hash(hasher) + hasher.nil end # Returns an empty string. diff --git a/src/object.cr b/src/object.cr index 1f66f1837494..5c302535dc48 100644 --- a/src/object.cr +++ b/src/object.cr @@ -58,13 +58,33 @@ class Object nil end - # Generates an `Int` hash value for this object. + # Appends this object's value to *hasher*, and returns the modified *hasher*. + # + # Usually the macro `def_hash` can be used to generate this method. + # Otherwise, invoke `hash(hasher)` on each object's instance variables to + # accumulate the result: + # + # ``` + # def hash(hasher) + # hasher = @some_ivar.hash(hasher) + # hasher = @some_other_ivar.hash(hasher) + # hasher + # end + # ``` + abstract def hash(hasher) + + # Generates an `UInt64` hash value for this object. # # This method must have the property that `a == b` implies `a.hash == b.hash`. # # The hash value is used along with `==` by the `Hash` class to determine if two objects # reference the same hash key. - abstract def hash + # + # Subclasses must not override this method. Instead, they must define `hash(hasher)`, + # though usually the macro `def_hash` can be used to generate this method. + def hash + hash(Crystal::Hasher.new).result + end # Returns a string representation of this object. # @@ -1078,28 +1098,23 @@ class Object {% end %} end - # Defines a `hash` method computed from the given fields. + # Defines a `hash(hasher)` that will append a hash value for the given fields. # # ``` # class Person # def initialize(@name, @age) # end # - # # Define a hash method based on @name and @age + # # Define a hash(hasher) method based on @name and @age # def_hash @name, @age # end # ``` macro def_hash(*fields) - def hash - {% if fields.size == 1 %} - {{fields[0]}}.hash - {% else %} - hash = 0 - {% for field in fields %} - hash = 31 * hash + {{field}}.hash - {% end %} - hash + def hash(hasher) + {% for field in fields %} + hasher = {{field}}.hash(hasher) {% end %} + hasher end end diff --git a/src/proc.cr b/src/proc.cr index df48e26f4429..e0365ceacd51 100644 --- a/src/proc.cr +++ b/src/proc.cr @@ -181,8 +181,9 @@ struct Proc call(other) end - def hash - internal_representation.hash + # See `Object#hash(hasher)` + def hash(hasher) + internal_representation.hash(hasher) end def clone diff --git a/src/reference.cr b/src/reference.cr index 94034e03c3bf..5961bc3f0ec4 100644 --- a/src/reference.cr +++ b/src/reference.cr @@ -50,9 +50,9 @@ class Reference {% end %} end - # Returns this reference's `object_id` as the hash value. - def hash - object_id + # See `Object#hash(hasher)` + def hash(hasher) + hasher.reference(self) end def inspect(io : IO) : Nil diff --git a/src/set.cr b/src/set.cr index 991196e0867e..46de45e053a3 100644 --- a/src/set.cr +++ b/src/set.cr @@ -308,9 +308,8 @@ struct Set(T) pp.list("Set{", self, "}") end - def hash - @hash.hash - end + # See `Object#hash(hasher)` + def_hash @hash # Returns `true` if the set and the given set have at least one element in # common. diff --git a/src/string.cr b/src/string.cr index ecf0a89348e4..a3aa1ba0ec46 100644 --- a/src/string.cr +++ b/src/string.cr @@ -3929,15 +3929,9 @@ class String sprintf self, other end - # Returns a hash based on this string’s size and content. - # - # See also: `Object#hash`. - def hash - h = 0 - each_byte do |c| - h = 31 * h + c - end - h + # See `Object#hash(hasher)` + def hash(hasher) + hasher.string(self) end # Returns the number of unicode codepoints in this string. diff --git a/src/struct.cr b/src/struct.cr index 57bda79ad9eb..50d249216e86 100644 --- a/src/struct.cr +++ b/src/struct.cr @@ -70,15 +70,12 @@ struct Struct true end - # Returns a hash value based on this struct's instance variables hash values. - # - # See also: `Object#hash` - def hash : Int32 - hash = 0 + # See `Object#hash(hasher)` + def hash(hasher) {% for ivar in @type.instance_vars %} - hash = 31 * hash + @{{ivar.id}}.hash.to_i32 + hasher = @{{ivar.id}}.hash(hasher) {% end %} - hash + hasher end # Appends this struct's name and instance variables names and values diff --git a/src/symbol.cr b/src/symbol.cr index 8e90a7207c68..81676599dfdf 100644 --- a/src/symbol.cr +++ b/src/symbol.cr @@ -15,11 +15,9 @@ struct Symbol include Comparable(Symbol) - # Generates an `Int32` hash value for this symbol. - # - # See also: `Object#hash`. - def hash : Int32 - to_i + # See `Object#hash(hasher)` + def hash(hasher) + hasher.symbol(self) end # Compares symbol with other based on `String#<=>` method. Returns `-1`, `0` diff --git a/src/time.cr b/src/time.cr index 9d2c270f1148..2f04a580f9dc 100644 --- a/src/time.cr +++ b/src/time.cr @@ -309,9 +309,8 @@ struct Time end end - def hash - @encoded - end + # See `Object#hash(hasher)` + def_hash @encoded def self.days_in_month(year, month) : Int32 unless 1 <= month <= 12 diff --git a/src/tuple.cr b/src/tuple.cr index 01933b6f4980..711aca2a013b 100644 --- a/src/tuple.cr +++ b/src/tuple.cr @@ -307,15 +307,12 @@ struct Tuple size <=> other.size end - # Returns a hash value based on this tuple's length and contents. - # - # See also: `Object#hash`. - def hash - hash = 31 * size + # See `Object#hash(hasher)` + def hash(hasher) {% for i in 0...T.size %} - hash = 31 * hash + self[{{i}}].hash + hasher = self[{{i}}].hash(hasher) {% end %} - hash + hasher end # Returns a tuple containing cloned elements of this tuple using the `clone` method. diff --git a/src/xml/namespace.cr b/src/xml/namespace.cr index 43fa3d2bad69..af5646c89872 100644 --- a/src/xml/namespace.cr +++ b/src/xml/namespace.cr @@ -4,9 +4,8 @@ struct XML::Namespace def initialize(@document : Node, @ns : LibXML::NS*) end - def hash - object_id - end + # See `Object#hash(hasher)` + def_hash object_id def href @ns.value.href ? String.new(@ns.value.href) : nil diff --git a/src/xml/node.cr b/src/xml/node.cr index 3c23a238036f..b856570567bb 100644 --- a/src/xml/node.cr +++ b/src/xml/node.cr @@ -159,10 +159,8 @@ struct XML::Node type == XML::Type::DOCUMENT_FRAG_NODE end - # Returns this node's `#object_id` as the hash value. - def hash - object_id - end + # See `Object#hash(hasher)` + def_hash object_id # Returns the content for this Node. def inner_text diff --git a/src/xml/node_set.cr b/src/xml/node_set.cr index 00c8c759f039..22c566ba9e21 100644 --- a/src/xml/node_set.cr +++ b/src/xml/node_set.cr @@ -28,9 +28,8 @@ struct XML::NodeSet size == 0 end - def hash - object_id - end + # See `Object#hash(hasher)` + def_hash object_id def inspect(io) io << "[" diff --git a/src/yaml/any.cr b/src/yaml/any.cr index 1d2956c41261..4d06de9aa6be 100644 --- a/src/yaml/any.cr +++ b/src/yaml/any.cr @@ -194,10 +194,8 @@ struct YAML::Any raw == other end - # :nodoc: - def hash - raw.hash - end + # See `Object#hash(hasher)` + def_hash raw # :nodoc: def to_yaml(io)