change computation of hash value.

Prepare hash infrastructor to future change of hashing algrorithm to protect against Hash DoS. Class|Struct should define method `def hash(hasher)` and call `hasher << @ivar` inside. As an option, for speed, and for backward compatibility, `def hash` still could be implemented. It will be used for Hash of matched type. `Thread#hash` and `Signal#hash` is implemented as unseeded cause they are used before `StdHasher @@seed` is initialized. Hash::Hasher is default hasher that uses `hash(hasher)` and it is used as default seeded hasher. Also, number normalization for hashing introduced, ie rule 'equality forces hash equality' is forced (`a == b` => `a.hash == b.hash`). Normalization idea is borrowed from Python implementation. It fixes several issues with BigInt and BigFloat on 32bit platform, but not all issues. Fixes crystal-lang#4578 Fixes crystal-lang#3932 Prerequisite for crystal-lang#4557 Replaces crystal-lang#4581 Correlates with crystal-lang#4653
funny-falcon · Aug 24, 2017 · f5bf1ba · f5bf1ba
1 parent d3537fc
commit f5bf1ba
Show file tree

Hide file tree

Showing 45 changed files with 804 additions and 159 deletions.
diff --git a/spec/std/big/big_float_spec.cr b/spec/std/big/big_float_spec.cr
@@ -2,6 +2,66 @@ require "spec"
 require "big_float"
 
 describe "BigFloat" do
+  describe "new" do
+    bsi = "123456789012345678901"
+    bfsi = BigFloat.new(bsi)
+    bsf = "1234567890.12345678901"
+    bfsf = BigFloat.new(bsf)
+    it "new(String)" do
+      bfsi.to_s.should eq(bsi)
+      bfsf.to_s.should eq(bsf)
+    end
+
+    it "new(BigInt)" do
+      bi = BigInt.new(bsi)
+      bfi = BigFloat.new(bi)
+      bfi.should eq(bfsi)
+      bfi.to_s.should eq(bsi)
+    end
+
+    it "new(BigRational)" do
+      br = BigRational.new(1, 3)
+      bfr = BigFloat.new(br)
+      bf = BigFloat.new(1) / BigFloat.new(3)
+      bfr.should eq(bf)
+    end
+
+    it "new(BigFloat)" do
+      bffi = BigFloat.new(bfsi)
+      bffi.should eq(bfsi)
+      bfff = BigFloat.new(bfsf)
+      bfff.should eq(bfsf)
+    end
+
+    it "new(Int)" do
+      BigFloat.new(1_u8).to_s.should eq("1")
+      BigFloat.new(1_u16).to_s.should eq("1")
+      BigFloat.new(1_u32).to_s.should eq("1")
+      BigFloat.new(1_u64).to_s.should eq("1")
+      BigFloat.new(1_i8).to_s.should eq("1")
+      BigFloat.new(1_i16).to_s.should eq("1")
+      BigFloat.new(1_i32).to_s.should eq("1")
+      BigFloat.new(1_i64).to_s.should eq("1")
+      BigFloat.new(-1_i8).to_s.should eq("-1")
+      BigFloat.new(-1_i16).to_s.should eq("-1")
+      BigFloat.new(-1_i32).to_s.should eq("-1")
+      BigFloat.new(-1_i64).to_s.should eq("-1")
+
+      BigFloat.new(255_u8).to_s.should eq("255")
+      BigFloat.new(65535_u16).to_s.should eq("65535")
+      BigFloat.new(4294967295_u32).to_s.should eq("4294967295")
+      BigFloat.new(18446744073709551615_u64).to_s.should eq("18446744073709551615")
+      BigFloat.new(127_i8).to_s.should eq("127")
+      BigFloat.new(32767_i16).to_s.should eq("32767")
+      BigFloat.new(2147483647_i32).to_s.should eq("2147483647")
+      BigFloat.new(9223372036854775807_i64).to_s.should eq("9223372036854775807")
+      BigFloat.new(-128_i8).to_s.should eq("-128")
+      BigFloat.new(-32768_i16).to_s.should eq("-32768")
+      BigFloat.new(-2147483648_i32).to_s.should eq("-2147483648")
+      BigFloat.new(-9223372036854775808_i64).to_s.should eq("-9223372036854775808")
+    end
+  end
+
   describe "-@" do
     bf = "0.12345".to_big_f
     it { (-bf).to_s.should eq("-0.12345") }
@@ -91,16 +151,34 @@ describe "BigFloat" do
     it { "48600000".to_big_f.to_s.should eq("48600000") }
     it { "12345678.87654321".to_big_f.to_s.should eq("12345678.87654321") }
     it { "9.000000000000987".to_big_f.to_s.should eq("9.000000000000987") }
-    it { "12345678901234567".to_big_f.to_s.should eq("12345678901234567") }
+    it { "1234567890123456789".to_big_f.to_s.should eq("1234567890123456789") }
   end
 
   describe "#inspect" do
     it { "2.3".to_big_f.inspect.should eq("2.3_big_f") }
   end
 
   it "#hash" do
-    b = 123.to_big_f
-    b.hash.should eq(b.to_f64.hash)
+    bf = 123.to_big_f
+    bf.hash.should eq(123.hash)
+    bf.hash.should eq(bf.to_f64.hash)
+
+    bi = "123456789012345678901".to_big_i
+    bf = bi.to_big_f
+    bf.should eq(bi)
+    bf.hash_normalize.should eq(bi.hash_normalize)
+    bf.hash.should eq(bi.hash)
+
+    f = 123.06125
+    bf = f.to_big_f
+    bf.hash.should eq(f.hash)
+
+    bf = 1.to_big_f
+    bf = bf * 0x80000000 * 0x80000000 * 0x80000000
+    f = 1.0_f64
+    f = f * 0x80000000 * 0x80000000 * 0x80000000
+    bf.hash_normalize.should eq(f.hash_normalize)
+    bf.hash.should eq(f.hash)
   end
 
   it "clones" do

diff --git a/spec/std/big/big_int_spec.cr b/spec/std/big/big_int_spec.cr
@@ -291,12 +291,30 @@ describe "BigInt" do
 
   it "#hash" do
     hash = 5.to_big_i.hash
-    hash.should eq(5)
-    typeof(hash).should eq(UInt64)
+    hash.should eq(5.hash)
+  end
+
+  it "#hash_normalize" do
+    hn = 5.to_big_i.hash_normalize
+    hn.should eq(5.hash_normalize)
+    hn = 500000000000000_u64.to_big_i.hash_normalize
+    hn.should eq(500000000000000_u64.hash_normalize)
+
+    bi = 1.to_big_i
+    bi = bi << 93
+    f = 1.0_f64
+    f = f * 0x80000000 * 0x80000000 * 0x80000000
+    bi.hash_normalize.should eq(f.hash_normalize)
   end
 
   it "clones" do
     x = 1.to_big_i
     x.clone.should eq(x)
   end
+
+  it "#to_big_f" do
+    s = "123456789012345678901"
+    x = BigInt.new(s)
+    x.to_big_f.should eq BigFloat.new(s)
+  end
 end
diff --git a/spec/std/big/big_rational_spec.cr b/spec/std/big/big_rational_spec.cr
@@ -149,7 +149,7 @@ describe BigRational do
   it "#hash" do
     b = br(10, 3)
     hash = b.hash
-    hash.should eq(b.to_f64.hash)
+    hash.should eq(b.to_big_f.hash)
   end
 
   it "is a number" do
@@ -160,4 +160,10 @@ describe BigRational do
     x = br(10, 3)
     x.clone.should eq(x)
   end
+
+  it "#to_big_f" do
+    x = br(10, 3)
+    f = BigFloat.new(10) / BigFloat.new(3)
+    x.to_big_f.should eq(f)
+  end
 end
diff --git a/spec/std/bool_spec.cr b/spec/std/bool_spec.cr
@@ -28,8 +28,9 @@ describe "Bool" do
   end
 
   describe "hash" do
-    it { true.hash.should eq(1) }
-    it { false.hash.should eq(0) }
+    it { true.hash.should eq(true.hash) }
+    it { false.hash.should eq(false.hash) }
+    it { true.hash.should_not eq(false.hash) }
   end
 
   describe "to_s" do

diff --git a/spec/std/enum_spec.cr b/spec/std/enum_spec.cr
@@ -142,7 +142,7 @@ describe Enum do
   end
 
   it "has hash" do
-    SpecEnum::Two.hash.should eq(1.hash)
+    SpecEnum::Two.hash.should_not eq(SpecEnum::One.hash)
   end
 
   it "parses" do

diff --git a/spec/std/hash_spec.cr b/spec/std/hash_spec.cr
@@ -145,8 +145,8 @@ describe "Hash" do
       end
     end
 
-    it "works with mixed types" do
-      {1 => :a, "a" => 1, 1.0 => "a", :a => 1.0}.values_at(1, "a", 1.0, :a).should eq({:a, 1, "a", 1.0})
+    it "works with mixed types and normalized numbers" do
+      {1 => :a, "a" => 1, 2.0 => "a", :a => 1.0}.values_at(1, 2, "a", 1.0, 2.0, :a).should eq({:a, "a", 1, :a, "a", 1.0})
     end
   end
 

diff --git a/spec/std/struct_spec.cr b/spec/std/struct_spec.cr
@@ -42,11 +42,14 @@ describe "Struct" do
 
   it "does hash" do
     s = StructSpec::TestClass.new(1, "hello")
-    s.hash.should eq(31 + "hello".hash)
+    hasher = Hash::Hasher.new
+    hasher << 1
+    hasher << "hello"
+    s.hash.should eq(hasher.digest)
   end
 
   it "does hash for struct wrapper (#1940)" do
-    StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(0)
+    StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(BigInt.new(0).hash)
   end
 
   it "does dup" do

diff --git a/spec/std/time/span_spec.cr b/spec/std/time/span_spec.cr
@@ -176,7 +176,7 @@ describe Time::Span do
   end
 
   it "test hash code" do
-    Time::Span.new(77).hash.should eq(77)
+    Time::Span.new(77).hash.should eq(77.hash)
   end
 
   it "test subtract" do

diff --git a/src/big/big_float.cr b/src/big/big_float.cr
@@ -18,7 +18,39 @@ struct BigFloat < Float
   end
 
   def initialize(num : Number)
-    LibGMP.mpf_init_set_d(out @mpf, num.to_f64)
+    # XXX: this case is workaround of Crystal's unrealiable method overloading
+    # remove it when separate BigFloat.new(BigInt) will pass the spec
+    case num
+    when BigInt
+      LibGMP.mpf_init(out @mpf)
+      LibGMP.mpf_set_z(self, num)
+    when BigRational
+      LibGMP.mpf_init(out @mpf)
+      LibGMP.mpf_set_q(self, num)
+    when BigFloat
+      LibGMP.mpf_init(out @mpf)
+      LibGMP.mpf_set(self, num)
+    when Int8, Int16, Int32
+      LibGMP.mpf_init_set_si(out @mpf, num)
+    when UInt8, UInt16, UInt32
+      LibGMP.mpf_init_set_ui(out @mpf, num)
+    when Int64
+      if LibGMP::Long == Int64
+        LibGMP.mpf_init_set_si(out @mpf, num)
+      else
+        LibGMP.mpf_init(out @mpf)
+        LibGMP.mpf_set_z(self, num.to_big_i)
+      end
+    when UInt64
+      if LibGMP::ULong == UInt64
+        LibGMP.mpf_init_set_ui(out @mpf, num)
+      else
+        LibGMP.mpf_init(out @mpf)
+        LibGMP.mpf_set_z(self, num.to_big_i)
+      end
+    else
+      LibGMP.mpf_init_set_d(out @mpf, num.to_f64)
+    end
   end
 
   def initialize(num : Float, precision : Int)
@@ -35,8 +67,19 @@ struct BigFloat < Float
     new(mpf)
   end
 
-  def hash
-    to_f64.hash
+  def hash_normalize
+    # more exact version of `remainder(hash_modulus).to_f.hash_normalize
+    LibGMP.mpf_get_d_2exp(out exp, self)
+    frac = BigFloat.new { |mpf|
+      if exp >= 0
+        LibGMP.mpf_div_2exp(mpf, self, exp)
+      else
+        LibGMP.mpf_mul_2exp(mpf, self, -exp)
+      end
+    }
+    float_normalize_wrap do
+      float_normalize_reference(frac, exp)
+    end
   end
 
   def self.default_precision
@@ -51,16 +94,36 @@ struct BigFloat < Float
     LibGMP.mpf_cmp(self, other)
   end
 
-  def <=>(other : Float)
+  def <=>(other : BigInt)
+    LibGMP.mpf_cmp_z(self, other)
+  end
+
+  def <=>(other : Float32 | Float64)
     LibGMP.mpf_cmp_d(self, other.to_f64)
   end
 
   def <=>(other : Int::Signed)
-    LibGMP.mpf_cmp_si(self, other.to_i64)
+    if LibGMP::Long == Int64
+      LibGMP.mpf_cmp_si(self, other.to_i64)
+    elsif other.is_a?(Int8 | Int16 | Int32)
+      LibGMP.mpf_cmp_si(self, LibGMP::Long.new(other))
+    else
+      LibGMP.mpf_cmp(self, other.to_big_f)
+    end
   end
 
   def <=>(other : Int::Unsigned)
-    LibGMP.mpf_cmp_ui(self, other.to_u64)
+    if LibGMP::ULong == UInt64
+      LibGMP.mpf_cmp_ui(self, other.to_u64)
+    elsif other.is_a?(UInt8 | UInt16 | UInt32)
+      LibGMP.mpf_cmp_ui(self, LibGMP::ULong.new(other))
+    else
+      LibGMP.mpf_cmp(self, other.to_big_f)
+    end
+  end
+
+  def <=>(other : Number)
+    LibGMP.mpf_cmp(self, other.to_big_f)
   end
 
   def -

diff --git a/src/big/big_int.cr b/src/big/big_int.cr
@@ -264,8 +264,19 @@ struct BigInt < Int
     io << "_big_i"
   end
 
-  def hash
-    to_u64
+  private HASH_MODULUS_INT_P = BigInt.new((1_u64 << HASH_BITS) - 1)
+  private HASH_MODULUS_INT_N = -BigInt.new((1_u64 << HASH_BITS) - 1)
+
+  def hash_normalize
+    # it should calculate `remainder(hash_modulus)`
+    if LibGMP::ULong == UInt64
+      v = int_to_hashnorm(LibGMP.tdiv_ui(self, hash_modulus))
+      self < 0 ? -v : v
+    elsif self >= HASH_MODULUS_INT_P || self <= HASH_MODULUS_INT_N
+      unsafe_truncated_mod(HASH_MODULUS_INT_P).to_i64
+    else
+      self.to_i64
+    end
   end
 
   # Returns a string representation of self.
@@ -311,39 +322,47 @@ struct BigInt < Int
   end
 
   def to_i8
-    to_i64.to_i8
+    to_i32.to_i8
   end
 
   def to_i16
-    to_i64.to_i16
+    to_i32.to_i16
   end
 
   def to_i32
-    to_i64.to_i32
+    LibGMP.get_si(self).to_i32
   end
 
   def to_i64
-    LibGMP.get_si(self)
+    if LibGMP::Long == Int64 || (self <= Int32::MAX && self >= Int32::MIN)
+      LibGMP.get_si(self).to_i64
+    else
+      to_s.to_i64
+    end
   end
 
   def to_u
     to_u32
   end
 
   def to_u8
-    to_u64.to_u8
+    to_u32.to_u8
   end
 
   def to_u16
-    to_u64.to_u16
+    to_u32.to_u16
   end
 
   def to_u32
-    to_u64.to_u32
+    LibGMP.get_ui(self).to_u32
   end
 
   def to_u64
-    LibGMP.get_ui(self).to_u64
+    if LibGMP::ULong == UInt64 || (self <= UInt32::MAX && self >= UInt32::MIN)
+      LibGMP.get_ui(self).to_u64
+    else
+      to_s.to_u64
+    end
   end
 
   def to_f