Add Crystal::Hasher.reduce_num and #number (#14304)

crystal-lang · Feb 20, 2024 · 72aea71 · 72aea71
1 parent ee2d34c
commit 72aea71
Show file tree

Hide file tree

Showing 8 changed files with 111 additions and 48 deletions.
diff --git a/spec/std/crystal/hasher_spec.cr b/spec/std/crystal/hasher_spec.cr
@@ -260,4 +260,72 @@ describe "Crystal::Hasher" do
       1_f32.hash.should eq(1.to_big_i.hash)
     end
   end
+
+  describe ".reduce_num" do
+    it "reduces primitive int" do
+      {% for int in Int::Primitive.union_types %}
+        Crystal::Hasher.reduce_num({{ int }}.new(0)).should eq(0_u64)
+        Crystal::Hasher.reduce_num({{ int }}.new(1)).should eq(1_u64)
+        Crystal::Hasher.reduce_num({{ int }}::MAX).should eq(UInt64.new!({{ int }}::MAX % 0x1FFF_FFFF_FFFF_FFFF_u64))
+      {% end %}
+
+      {% for int in Int::Signed.union_types %}
+        Crystal::Hasher.reduce_num({{ int }}.new(-1)).should eq(UInt64::MAX)
+        Crystal::Hasher.reduce_num({{ int }}::MIN).should eq(UInt64::MAX - UInt64.new!({{ int }}::MAX % 0x1FFF_FFFF_FFFF_FFFF_u64))
+      {% end %}
+    end
+
+    it "reduces primitive float" do
+      {% for float in Float::Primitive.union_types %}
+        Crystal::Hasher.reduce_num({{ float }}.new(0)).should eq(0_u64)
+        Crystal::Hasher.reduce_num({{ float }}.new(1)).should eq(1_u64)
+        Crystal::Hasher.reduce_num({{ float }}.new(-1)).should eq(UInt64::MAX)
+        Crystal::Hasher.reduce_num({{ float }}::INFINITY).should eq(Crystal::Hasher::HASH_INF_PLUS)
+        Crystal::Hasher.reduce_num(-{{ float }}::INFINITY).should eq(Crystal::Hasher::HASH_INF_MINUS)
+        Crystal::Hasher.reduce_num({{ float }}::NAN).should eq(Crystal::Hasher::HASH_NAN)
+
+        x = {{ float }}.new(2)
+        i = 1
+        until x.infinite?
+          Crystal::Hasher.reduce_num(x).should eq(1_u64 << (i % 61))
+          x *= 2
+          i += 1
+        end
+
+        x = {{ float }}.new(0.5)
+        i = 1
+        until x.zero?
+          Crystal::Hasher.reduce_num(x).should eq(1_u64 << ((-i) % 61))
+          x /= 2
+          i += 1
+        end
+      {% end %}
+
+      Crystal::Hasher.reduce_num(Float32::MAX).should eq(0x1FFF_F800_0000_003F_u64)
+      Crystal::Hasher.reduce_num(Float64::MAX).should eq(0x1F00_FFFF_FFFF_FFFF_u64)
+    end
+
+    pending "reduces BigInt" do
+      Crystal::Hasher.reduce_num(0.to_big_i).should eq(0_u64)
+      Crystal::Hasher.reduce_num(1.to_big_i).should eq(1_u64)
+      Crystal::Hasher.reduce_num((-1).to_big_i).should eq(UInt64::MAX)
+
+      (1..300).each do |i|
+        Crystal::Hasher.reduce_num(2.to_big_i ** i).should eq(1_u64 << (i % 61))
+      end
+    end
+
+    it "reduces BigFloat" do
+      Crystal::Hasher.reduce_num(0.to_big_f).should eq(0_u64)
+      Crystal::Hasher.reduce_num(1.to_big_f).should eq(1_u64)
+      Crystal::Hasher.reduce_num((-1).to_big_f).should eq(UInt64::MAX)
+      Crystal::Hasher.reduce_num(Float32::MAX.to_big_f).should eq(0x1FFF_F800_0000_003F_u64)
+      Crystal::Hasher.reduce_num(Float64::MAX.to_big_f).should eq(0x1F00_FFFF_FFFF_FFFF_u64)
+
+      (1..300).each do |i|
+        Crystal::Hasher.reduce_num(2.to_big_f ** i).should eq(1_u64 << (i % 61))
+        Crystal::Hasher.reduce_num(0.5.to_big_f ** i).should eq(1_u64 << ((-i) % 61))
+      end
+    end
+  end
 end
diff --git a/src/big/big_float.cr b/src/big/big_float.cr
@@ -500,8 +500,8 @@ end
 
 # :nodoc:
 struct Crystal::Hasher
-  def float(value : BigFloat)
-    normalized_hash = float_normalize_wrap(value) do |value|
+  def self.reduce_num(value : BigFloat)
+    float_normalize_wrap(value) do |value|
       # more exact version of `Math.frexp`
       LibGMP.mpf_get_d_2exp(out exp, value)
       frac = BigFloat.new do |mpf|
@@ -513,6 +513,5 @@ struct Crystal::Hasher
       end
       float_normalize_reference(value, frac, exp)
     end
-    permute(normalized_hash)
   end
 end
diff --git a/src/big/big_int.cr b/src/big/big_int.cr
@@ -988,7 +988,7 @@ struct Crystal::Hasher
   private HASH_MODULUS_INT_P = BigInt.new((1_u64 << HASH_BITS) - 1)
   private HASH_MODULUS_INT_N = -BigInt.new((1_u64 << HASH_BITS) - 1)
 
-  def int(value : BigInt)
+  def self.reduce_num(value : BigInt)
     # it should calculate `remainder(HASH_MODULUS)`
     if LibGMP::UI == UInt64
       v = LibGMP.tdiv_ui(value, HASH_MODULUS).to_i64

diff --git a/src/big/big_rational.cr b/src/big/big_rational.cr
@@ -416,7 +416,7 @@ struct Crystal::Hasher
   private HASH_MODULUS_RAT_P = BigRational.new((1_u64 << HASH_BITS) - 1)
   private HASH_MODULUS_RAT_N = -BigRational.new((1_u64 << HASH_BITS) - 1)
 
-  def float(value : BigRational)
+  def self.reduce_num(value : BigRational)
     rem = value
     if value >= HASH_MODULUS_RAT_P || value <= HASH_MODULUS_RAT_N
       num = value.numerator

diff --git a/src/crystal/hasher.cr b/src/crystal/hasher.cr
@@ -33,7 +33,7 @@ struct Crystal::Hasher
   # Do not output calculated hash value to user's console/form/
   # html/api response, etc. Use some from digest package instead.
 
-  # Based on https://github.com/python/cpython/blob/f051e43/Python/pyhash.c#L34
+  # Based on https://github.com/python/cpython/blob/371c970/Python/pyhash.c#L31
   #
   # For numeric types, the hash of a number x is based on the reduction
   # of x modulo the Mersen Prime P = 2**HASH_BITS - 1.  It's designed
@@ -75,9 +75,9 @@ struct Crystal::Hasher
   private HASH_BITS    = 61
   private HASH_MODULUS = (1_i64 << HASH_BITS) - 1
 
-  private HASH_NAN       =      0_u64
-  private HASH_INF_PLUS  = 314159_u64
-  private HASH_INF_MINUS = (-314159_i64).unsafe_as(UInt64)
+  HASH_NAN       =      0_u64
+  HASH_INF_PLUS  = 314159_u64
+  HASH_INF_MINUS = (-314159_i64).unsafe_as(UInt64)
 
   @@seed = uninitialized UInt64[2]
   Crystal::System::Random.random_bytes(@@seed.to_slice.to_unsafe_bytes)
@@ -105,38 +105,28 @@ struct Crystal::Hasher
     a &+ b
   end
 
-  def nil
-    @a &+= @b
-    @b &+= 1
-    self
+  def self.reduce_num(value : Int8 | Int16 | Int32)
+    value.to_i64.unsafe_as(UInt64)
   end
 
-  def bool(value)
-    (value ? 1 : 0).hash(self)
+  def self.reduce_num(value : UInt8 | UInt16 | UInt32)
+    value.to_u64
   end
 
-  def int(value : Int8 | Int16 | Int32)
-    permute(value.to_i64.unsafe_as(UInt64))
+  def self.reduce_num(value : Int::Unsigned)
+    value.remainder(HASH_MODULUS).to_u64
   end
 
-  def int(value : UInt8 | UInt16 | UInt32)
-    permute(value.to_u64)
-  end
-
-  def int(value : Int::Unsigned)
-    permute(value.remainder(HASH_MODULUS).to_u64)
-  end
-
-  def int(value : Int)
-    permute(value.remainder(HASH_MODULUS).to_i64.unsafe_as(UInt64))
+  def self.reduce_num(value : Int)
+    value.remainder(HASH_MODULUS).to_i64.unsafe_as(UInt64)
   end
 
   # This function is for reference implementation, and it is used for `BigFloat`.
   # For `Float64` and `Float32` all supported architectures allows more effective
   # bitwise calculation.
   # Arguments `frac` and `exp` are result of equivalent `Math.frexp`, though
   # for `BigFloat` custom calculation used for more precision.
-  private def float_normalize_reference(value, frac, exp)
+  private def self.float_normalize_reference(value, frac, exp)
     if value < 0
       frac = -frac
     end
@@ -155,7 +145,7 @@ struct Crystal::Hasher
     {x, exp}
   end
 
-  private def float_normalize_wrap(value, &)
+  private def self.float_normalize_wrap(value, &)
     return HASH_NAN if value.nan?
     if value.infinite?
       return value > 0 ? HASH_INF_PLUS : HASH_INF_MINUS
@@ -170,8 +160,8 @@ struct Crystal::Hasher
     (x * (value < 0 ? -1 : 1)).to_i64.unsafe_as(UInt64)
   end
 
-  def float(value : Float32)
-    normalized_hash = float_normalize_wrap(value) do |value|
+  def self.reduce_num(value : Float32)
+    float_normalize_wrap(value) do |value|
       # This optimized version works on every architecture where endianness
       # of Float32 and Int32 matches and float is IEEE754. All supported
       # architectures fall into this category.
@@ -187,11 +177,10 @@ struct Crystal::Hasher
       end
       {mantissa.to_i64, exp}
     end
-    permute(normalized_hash)
   end
 
-  def float(value : Float64)
-    normalized_hash = float_normalize_wrap(value) do |value|
+  def self.reduce_num(value : Float64)
+    float_normalize_wrap(value) do |value|
       # This optimized version works on every architecture where endianness
       # of Float64 and Int64 matches and float is IEEE754. All supported
       # architectures fall into this category.
@@ -208,15 +197,27 @@ struct Crystal::Hasher
 
       {mantissa.to_i64, exp}
     end
-    permute(normalized_hash)
   end
 
-  def float(value : Float)
-    normalized_hash = float_normalize_wrap(value) do |value|
+  def self.reduce_num(value : Float)
+    float_normalize_wrap(value) do |value|
       frac, exp = Math.frexp value
       float_normalize_reference(value, frac, exp)
     end
-    permute(normalized_hash)
+  end
+
+  def nil
+    @a &+= @b
+    @b &+= 1
+    self
+  end
+
+  def bool(value)
+    (value ? 1 : 0).hash(self)
+  end
+
+  def number(value : Number)
+    permute(Hasher.reduce_num(value))
   end
 
   def char(value)

diff --git a/src/float.cr b/src/float.cr
@@ -88,11 +88,6 @@ struct Float
     end
   end
 
-  # See `Object#hash(hasher)`
-  def hash(hasher)
-    hasher.float(self)
-  end
-
   # Writes this float to the given *io* in the given *format*.
   # See also: `IO#write_bytes`.
   def to_io(io : IO, format : IO::ByteFormat) : Nil

diff --git a/src/int.cr b/src/int.cr
@@ -527,11 +527,6 @@ struct Int
     !even?
   end
 
-  # See `Object#hash(hasher)`
-  def hash(hasher)
-    hasher.int(self)
-  end
-
   def succ : self
     self + 1
   end

diff --git a/src/number.cr b/src/number.cr
@@ -39,6 +39,11 @@ struct Number
     new(1)
   end
 
+  # See `Object#hash(hasher)`
+  def hash(hasher)
+    hasher.number(self)
+  end
+
   # Returns `self`.
   def +
     self