change computation of hash value.

Prepare hash infrastructor to future change of hashing algrorithm to protect against Hash DoS. Class|Struct should define method `def hash(hasher)` and call `hasher << @ivar` inside. As an option, for speed, and for backward compatibility, `def hash` still could be implemented. It will be used for Hash of matched type. `Thread#hash` and `Signal#hash` is implemented as unseeded cause they are used before `StdHasher @@seed` is initialized. Hash::Hasher is default hasher that uses `hash(hasher)` and it is used as default seeded hasher. Also, number normalization for hashing introduced, ie rule 'equality forces hash equality' is forced (`a == b` => `a.hash == b.hash`). Normalization idea is borrowed from Python implementation. It fixes several issues with BigInt and BigFloat on 32bit platform, but not all issues. Fixes crystal-lang#4578 Fixes crystal-lang#3932 Prerequisite for crystal-lang#4557 Replaces crystal-lang#4581 Correlates with crystal-lang#4653
funny-falcon · Sep 3, 2017 · 7563847 · 7563847
1 parent d74c3f3
commit 7563847
Show file tree

Hide file tree

Showing 44 changed files with 658 additions and 151 deletions.
diff --git a/spec/std/big/big_float_spec.cr b/spec/std/big/big_float_spec.cr
@@ -165,8 +165,26 @@ describe "BigFloat" do
   end
 
   it "#hash" do
-    b = 123.to_big_f
-    b.hash.should eq(b.to_f64.hash)
+    big_float = 123.to_big_f
+    big_float.hash.should eq(123.hash)
+    big_float.hash.should eq(big_float.to_f64.hash)
+
+    big_integer = "123456789012345678901".to_big_i
+    big_float = big_integer.to_big_f
+    big_float.should eq(big_integer)
+    big_float.hash_normalize.should eq(big_integer.hash_normalize)
+    big_float.hash.should eq(big_integer.hash)
+
+    float = 123.06125
+    big_float = float.to_big_f
+    big_float.hash.should eq(float.hash)
+
+    big_float = 1.to_big_f
+    big_float = big_float * 0x80000000 * 0x80000000 * 0x80000000
+    float = 1.0_f64
+    float = float * 0x80000000 * 0x80000000 * 0x80000000
+    big_float.hash_normalize.should eq(float.hash_normalize)
+    big_float.hash.should eq(float.hash)
   end
 
   it "clones" do

diff --git a/spec/std/big/big_int_spec.cr b/spec/std/big/big_int_spec.cr
@@ -319,12 +319,35 @@ describe "BigInt" do
 
   it "#hash" do
     hash = 5.to_big_i.hash
-    hash.should eq(5)
-    typeof(hash).should eq(UInt64)
+    hash.should eq(5.hash)
+  end
+
+  it "#hash_normalize" do
+    hn = 5.to_big_i.hash_normalize
+    hn.should eq(5.hash_normalize)
+    hn = (-5).to_big_i.hash_normalize
+    hn.should eq((-5).hash_normalize)
+    hn = 500000000000000_u64.to_big_i.hash_normalize
+    hn.should eq(500000000000000_u64.hash_normalize)
+    hn = (-500000000000000_i64).to_big_i.hash_normalize
+    hn.should eq((-500000000000000_i64).hash_normalize)
+
+    bi = 1.to_big_i
+    bi = bi << 93
+    f = 1.0_f64
+    f = f * 0x80000000 * 0x80000000 * 0x80000000
+    bi.hash_normalize.should eq(f.hash_normalize)
+    (-bi).hash_normalize.should eq((-f).hash_normalize)
   end
 
   it "clones" do
     x = 1.to_big_i
     x.clone.should eq(x)
   end
+
+  it "#to_big_f" do
+    s = "123456789012345678901"
+    x = BigInt.new(s)
+    x.to_big_f.should eq BigFloat.new(s)
+  end
 end
diff --git a/spec/std/big/big_rational_spec.cr b/spec/std/big/big_rational_spec.cr
@@ -163,7 +163,25 @@ describe BigRational do
   it "#hash" do
     b = br(10, 3)
     hash = b.hash
-    hash.should eq(b.to_f64.hash)
+    hash.should eq(b.to_big_f.hash)
+  end
+
+  it "#hash_normalize" do
+    hn = 5.to_big_i.hash_normalize
+    hn.should eq(5.hash_normalize)
+    hn = (-5).to_big_i.hash_normalize
+    hn.should eq((-5).hash_normalize)
+    hn = 500000000000000_u64.to_big_i.hash_normalize
+    hn.should eq(500000000000000_u64.hash_normalize)
+    hn = (-500000000000000_i64).to_big_i.hash_normalize
+    hn.should eq((-500000000000000_i64).hash_normalize)
+
+    bi = 1.to_big_r
+    bi = bi << 93
+    f = 1.0_f64
+    f = f * 0x80000000 * 0x80000000 * 0x80000000
+    bi.hash_normalize.should eq(f.hash_normalize)
+    (-bi).hash_normalize.should eq((-f).hash_normalize)
   end
 
   it "is a number" do
@@ -174,4 +192,10 @@ describe BigRational do
     x = br(10, 3)
     x.clone.should eq(x)
   end
+
+  it "#to_big_f" do
+    x = br(10, 3)
+    f = BigFloat.new(10) / BigFloat.new(3)
+    x.to_big_f.should eq(f)
+  end
 end
diff --git a/spec/std/bool_spec.cr b/spec/std/bool_spec.cr
@@ -28,8 +28,9 @@ describe "Bool" do
   end
 
   describe "hash" do
-    it { true.hash.should eq(1) }
-    it { false.hash.should eq(0) }
+    it { true.hash.should eq(true.hash) }
+    it { false.hash.should eq(false.hash) }
+    it { true.hash.should_not eq(false.hash) }
   end
 
   describe "to_s" do

diff --git a/spec/std/enum_spec.cr b/spec/std/enum_spec.cr
@@ -142,7 +142,7 @@ describe Enum do
   end
 
   it "has hash" do
-    SpecEnum::Two.hash.should eq(1.hash)
+    SpecEnum::Two.hash.should_not eq(SpecEnum::One.hash)
   end
 
   it "parses" do

diff --git a/spec/std/hash_spec.cr b/spec/std/hash_spec.cr
@@ -145,8 +145,8 @@ describe "Hash" do
       end
     end
 
-    it "works with mixed types" do
-      {1 => :a, "a" => 1, 1.0 => "a", :a => 1.0}.values_at(1, "a", 1.0, :a).should eq({:a, 1, "a", 1.0})
+    it "works with mixed types and normalized numbers" do
+      {1 => :a, "a" => 1, 2.0 => "a", :a => 1.0}.values_at(1, 2, "a", 1.0, 2.0, :a).should eq({:a, "a", 1, :a, "a", 1.0})
     end
   end
 

diff --git a/spec/std/struct_spec.cr b/spec/std/struct_spec.cr
@@ -42,11 +42,14 @@ describe "Struct" do
 
   it "does hash" do
     s = StructSpec::TestClass.new(1, "hello")
-    s.hash.should eq(31 + "hello".hash)
+    hasher = Hash::Hasher.new
+    hasher << 1
+    hasher << "hello"
+    s.hash.should eq(hasher.digest)
   end
 
   it "does hash for struct wrapper (#1940)" do
-    StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(0)
+    StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(BigInt.new(0).hash)
   end
 
   it "does dup" do

diff --git a/spec/std/time/span_spec.cr b/spec/std/time/span_spec.cr
@@ -176,7 +176,7 @@ describe Time::Span do
   end
 
   it "test hash code" do
-    Time::Span.new(77).hash.should eq(77)
+    Time::Span.new(77).hash.should eq(77.hash)
   end
 
   it "test subtract" do

diff --git a/src/big/big_float.cr b/src/big/big_float.cr
@@ -76,8 +76,19 @@ struct BigFloat < Float
     new(mpf)
   end
 
-  def hash
-    to_f64.hash
+  def hash_normalize
+    # more exact version of `remainder(HASH_MODULUS).to_f.hash_normalize`
+    LibGMP.mpf_get_d_2exp(out exp, self)
+    frac = BigFloat.new { |mpf|
+      if exp >= 0
+        LibGMP.mpf_div_2exp(mpf, self, exp)
+      else
+        LibGMP.mpf_mul_2exp(mpf, self, -exp)
+      end
+    }
+    float_normalize_wrap do
+      float_normalize_reference(frac, exp)
+    end
   end
 
   def self.default_precision

diff --git a/src/big/big_int.cr b/src/big/big_int.cr
@@ -156,11 +156,7 @@ struct BigInt < Int
   def tdiv(other : Int) : BigInt
     check_division_by_zero other
 
-    if other < 0
-      -self.unsafe_truncated_div(other)
-    else
-      unsafe_truncated_div(other)
-    end
+    unsafe_truncated_div(other)
   end
 
   def unsafe_floored_div(other : BigInt) : BigInt
@@ -176,7 +172,13 @@ struct BigInt < Int
   end
 
   def unsafe_truncated_div(other : Int) : BigInt
-    BigInt.new { |mpz| LibGMP.tdiv_q_ui(mpz, self, other.abs) }
+    if LibGMP::ULong == UInt32 && (other < Int32::MIN || other > UInt32::MAX)
+      unsafe_truncated_div(other.to_big_i)
+    elsif other < 0
+      -BigInt.new { |mpz| LibGMP.tdiv_q_ui(mpz, self, other.abs) }
+    else
+      BigInt.new { |mpz| LibGMP.tdiv_q_ui(mpz, self, other) }
+    end
   end
 
   def %(other : Int) : BigInt
@@ -317,8 +319,19 @@ struct BigInt < Int
     io << "_big_i"
   end
 
-  def hash
-    to_u64
+  private HASH_MODULUS_INT_P = BigInt.new((1_u64 << HASH_BITS) - 1)
+  private HASH_MODULUS_INT_N = -BigInt.new((1_u64 << HASH_BITS) - 1)
+
+  def hash_normalize
+    # it should calculate `remainder(HASH_MODULUS)`
+    if LibGMP::ULong == UInt64
+      v = int_to_hashnorm(LibGMP.tdiv_ui(self, HASH_MODULUS))
+      self < 0 ? -v : v
+    elsif self >= HASH_MODULUS_INT_P || self <= HASH_MODULUS_INT_N
+      unsafe_truncated_mod(HASH_MODULUS_INT_P).to_i64
+    else
+      self.to_i64
+    end
   end
 
   # Returns a string representation of self.

diff --git a/src/big/big_rational.cr b/src/big/big_rational.cr
@@ -161,8 +161,20 @@ struct BigRational < Number
     BigRational.new { |mpq| LibGMP.mpq_abs(mpq, self) }
   end
 
-  def hash
-    to_f64.hash
+  private HASH_MODULUS_RAT_P = BigRational.new((1_u64 << HASH_BITS) - 1)
+  private HASH_MODULUS_RAT_N = -BigRational.new((1_u64 << HASH_BITS) - 1)
+
+  def hash_normalize
+    # more exact version of `remainder(HASH_MODULUS).to_f.hash_normalize`
+    rem = self
+    if self >= HASH_MODULUS_RAT_P || self <= HASH_MODULUS_RAT_N
+      num = numerator
+      denom = denominator
+      div = num.tdiv(denom)
+      floor = div.tdiv(HASH_MODULUS)
+      rem -= floor * HASH_MODULUS
+    end
+    rem.to_big_f.hash_normalize
   end
 
   # Returns the `Float64` representing this rational.

diff --git a/src/bool.cr b/src/bool.cr
@@ -41,9 +41,10 @@ struct Bool
     self != other
   end
 
-  # Returns a hash value for this boolean: 0 for `false`, 1 for `true`.
-  def hash
-    self ? 1 : 0
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher << (self ? 1 : 0)
+    hasher
   end
 
   # Returns `"true"` for `true` and `"false"` for `false`.

diff --git a/src/char.cr b/src/char.cr
@@ -419,6 +419,12 @@ struct Char
     ord
   end
 
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher.raw ord
+    hasher
+  end
+
   # Returns a Char that is one codepoint bigger than this char's codepoint.
   #
   # ```

diff --git a/src/class.cr b/src/class.cr
@@ -3,8 +3,9 @@ class Class
     to_s(io)
   end
 
-  def hash
-    crystal_type_id
+  def hash(hasher)
+    hasher.raw(crystal_type_id)
+    hasher
   end
 
   def ==(other : Class)

diff --git a/src/compiler/crystal/syntax/ast.cr b/src/compiler/crystal/syntax/ast.cr
@@ -1175,8 +1175,9 @@ module Crystal
       self
     end
 
-    def hash
-      0
+    def hash(hasher)
+      hasher << 0
+      hasher
     end
   end
 
@@ -1545,8 +1546,9 @@ module Crystal
       Self.new
     end
 
-    def hash
-      0
+    def hash(hasher)
+      hasher << 0
+      hasher
     end
   end
 
@@ -2025,8 +2027,9 @@ module Crystal
       Underscore.new
     end
 
-    def hash
-      0
+    def hash(hasher)
+      hasher << 0
+      hasher
     end
   end
 

diff --git a/src/enum.cr b/src/enum.cr
@@ -274,9 +274,10 @@ struct Enum
     value == other.value
   end
 
-  # Returns a hash value. This is the hash of the underlying value.
-  def hash
-    value.hash
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher.raw(value)
+    hasher
   end
 
   # Iterates each values in a Flags Enum.

diff --git a/src/event/signal_handler.cr b/src/event/signal_handler.cr
@@ -1,5 +1,6 @@
 require "c/signal"
 require "c/unistd"
+require "signal"
 
 # :nodoc:
 # Singleton that runs Signal events (libevent2) in it's own Fiber.