Skip to content

Commit

Permalink
change computation of hash value.
Browse files Browse the repository at this point in the history
Prepare hash infrastructor to future change of hashing algrorithm
to protect against Hash DoS.
Class|Struct should define method `def hash(hasher)` and call
`hasher << @ivar` inside.

As an option, for speed, and for backward compatibility, `def hash`
still could be implemented. It will be used for Hash of matched type.
`Thread#hash` and `Signal#hash` is implemented as unseeded cause they are
 used before `StdHasher @@seed` is initialized.

Hash::Hasher is default hasher that uses `hash(hasher)` and it is used as
default seeded hasher.

Also, number normalization for hashing introduced, ie rule 'equality
forces hash equality' is forced (`a == b` => `a.hash == b.hash`).
Normalization idea is borrowed from Python implementation.
It fixes several issues with BigInt and BigFloat on 32bit platform,
but not all issues.

Fixes crystal-lang#4578
Fixes crystal-lang#3932
Prerequisite for crystal-lang#4557
Replaces crystal-lang#4581
Correlates with crystal-lang#4653
  • Loading branch information
funny-falcon committed Sep 3, 2017
1 parent d74c3f3 commit 7563847
Show file tree
Hide file tree
Showing 44 changed files with 658 additions and 151 deletions.
22 changes: 20 additions & 2 deletions spec/std/big/big_float_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,26 @@ describe "BigFloat" do
end

it "#hash" do
b = 123.to_big_f
b.hash.should eq(b.to_f64.hash)
big_float = 123.to_big_f
big_float.hash.should eq(123.hash)
big_float.hash.should eq(big_float.to_f64.hash)

big_integer = "123456789012345678901".to_big_i
big_float = big_integer.to_big_f
big_float.should eq(big_integer)
big_float.hash_normalize.should eq(big_integer.hash_normalize)
big_float.hash.should eq(big_integer.hash)

float = 123.06125
big_float = float.to_big_f
big_float.hash.should eq(float.hash)

big_float = 1.to_big_f
big_float = big_float * 0x80000000 * 0x80000000 * 0x80000000
float = 1.0_f64
float = float * 0x80000000 * 0x80000000 * 0x80000000
big_float.hash_normalize.should eq(float.hash_normalize)
big_float.hash.should eq(float.hash)
end

it "clones" do
Expand Down
27 changes: 25 additions & 2 deletions spec/std/big/big_int_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -319,12 +319,35 @@ describe "BigInt" do

it "#hash" do
hash = 5.to_big_i.hash
hash.should eq(5)
typeof(hash).should eq(UInt64)
hash.should eq(5.hash)
end

it "#hash_normalize" do
hn = 5.to_big_i.hash_normalize
hn.should eq(5.hash_normalize)
hn = (-5).to_big_i.hash_normalize
hn.should eq((-5).hash_normalize)
hn = 500000000000000_u64.to_big_i.hash_normalize
hn.should eq(500000000000000_u64.hash_normalize)
hn = (-500000000000000_i64).to_big_i.hash_normalize
hn.should eq((-500000000000000_i64).hash_normalize)

bi = 1.to_big_i
bi = bi << 93
f = 1.0_f64
f = f * 0x80000000 * 0x80000000 * 0x80000000
bi.hash_normalize.should eq(f.hash_normalize)
(-bi).hash_normalize.should eq((-f).hash_normalize)
end

it "clones" do
x = 1.to_big_i
x.clone.should eq(x)
end

it "#to_big_f" do
s = "123456789012345678901"
x = BigInt.new(s)
x.to_big_f.should eq BigFloat.new(s)
end
end
26 changes: 25 additions & 1 deletion spec/std/big/big_rational_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,25 @@ describe BigRational do
it "#hash" do
b = br(10, 3)
hash = b.hash
hash.should eq(b.to_f64.hash)
hash.should eq(b.to_big_f.hash)
end

it "#hash_normalize" do
hn = 5.to_big_i.hash_normalize
hn.should eq(5.hash_normalize)
hn = (-5).to_big_i.hash_normalize
hn.should eq((-5).hash_normalize)
hn = 500000000000000_u64.to_big_i.hash_normalize
hn.should eq(500000000000000_u64.hash_normalize)
hn = (-500000000000000_i64).to_big_i.hash_normalize
hn.should eq((-500000000000000_i64).hash_normalize)

bi = 1.to_big_r
bi = bi << 93
f = 1.0_f64
f = f * 0x80000000 * 0x80000000 * 0x80000000
bi.hash_normalize.should eq(f.hash_normalize)
(-bi).hash_normalize.should eq((-f).hash_normalize)
end

it "is a number" do
Expand All @@ -174,4 +192,10 @@ describe BigRational do
x = br(10, 3)
x.clone.should eq(x)
end

it "#to_big_f" do
x = br(10, 3)
f = BigFloat.new(10) / BigFloat.new(3)
x.to_big_f.should eq(f)
end
end
5 changes: 3 additions & 2 deletions spec/std/bool_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ describe "Bool" do
end

describe "hash" do
it { true.hash.should eq(1) }
it { false.hash.should eq(0) }
it { true.hash.should eq(true.hash) }
it { false.hash.should eq(false.hash) }
it { true.hash.should_not eq(false.hash) }
end

describe "to_s" do
Expand Down
2 changes: 1 addition & 1 deletion spec/std/enum_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ describe Enum do
end

it "has hash" do
SpecEnum::Two.hash.should eq(1.hash)
SpecEnum::Two.hash.should_not eq(SpecEnum::One.hash)
end

it "parses" do
Expand Down
4 changes: 2 additions & 2 deletions spec/std/hash_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ describe "Hash" do
end
end

it "works with mixed types" do
{1 => :a, "a" => 1, 1.0 => "a", :a => 1.0}.values_at(1, "a", 1.0, :a).should eq({:a, 1, "a", 1.0})
it "works with mixed types and normalized numbers" do
{1 => :a, "a" => 1, 2.0 => "a", :a => 1.0}.values_at(1, 2, "a", 1.0, 2.0, :a).should eq({:a, "a", 1, :a, "a", 1.0})
end
end

Expand Down
7 changes: 5 additions & 2 deletions spec/std/struct_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,14 @@ describe "Struct" do

it "does hash" do
s = StructSpec::TestClass.new(1, "hello")
s.hash.should eq(31 + "hello".hash)
hasher = Hash::Hasher.new
hasher << 1
hasher << "hello"
s.hash.should eq(hasher.digest)
end

it "does hash for struct wrapper (#1940)" do
StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(0)
StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(BigInt.new(0).hash)
end

it "does dup" do
Expand Down
2 changes: 1 addition & 1 deletion spec/std/time/span_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ describe Time::Span do
end

it "test hash code" do
Time::Span.new(77).hash.should eq(77)
Time::Span.new(77).hash.should eq(77.hash)
end

it "test subtract" do
Expand Down
15 changes: 13 additions & 2 deletions src/big/big_float.cr
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,19 @@ struct BigFloat < Float
new(mpf)
end

def hash
to_f64.hash
def hash_normalize
# more exact version of `remainder(HASH_MODULUS).to_f.hash_normalize`
LibGMP.mpf_get_d_2exp(out exp, self)
frac = BigFloat.new { |mpf|
if exp >= 0
LibGMP.mpf_div_2exp(mpf, self, exp)
else
LibGMP.mpf_mul_2exp(mpf, self, -exp)
end
}
float_normalize_wrap do
float_normalize_reference(frac, exp)
end
end

def self.default_precision
Expand Down
29 changes: 21 additions & 8 deletions src/big/big_int.cr
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,7 @@ struct BigInt < Int
def tdiv(other : Int) : BigInt
check_division_by_zero other

if other < 0
-self.unsafe_truncated_div(other)
else
unsafe_truncated_div(other)
end
unsafe_truncated_div(other)
end

def unsafe_floored_div(other : BigInt) : BigInt
Expand All @@ -176,7 +172,13 @@ struct BigInt < Int
end

def unsafe_truncated_div(other : Int) : BigInt
BigInt.new { |mpz| LibGMP.tdiv_q_ui(mpz, self, other.abs) }
if LibGMP::ULong == UInt32 && (other < Int32::MIN || other > UInt32::MAX)
unsafe_truncated_div(other.to_big_i)
elsif other < 0
-BigInt.new { |mpz| LibGMP.tdiv_q_ui(mpz, self, other.abs) }
else
BigInt.new { |mpz| LibGMP.tdiv_q_ui(mpz, self, other) }
end
end

def %(other : Int) : BigInt
Expand Down Expand Up @@ -317,8 +319,19 @@ struct BigInt < Int
io << "_big_i"
end

def hash
to_u64
private HASH_MODULUS_INT_P = BigInt.new((1_u64 << HASH_BITS) - 1)
private HASH_MODULUS_INT_N = -BigInt.new((1_u64 << HASH_BITS) - 1)

def hash_normalize
# it should calculate `remainder(HASH_MODULUS)`
if LibGMP::ULong == UInt64
v = int_to_hashnorm(LibGMP.tdiv_ui(self, HASH_MODULUS))
self < 0 ? -v : v
elsif self >= HASH_MODULUS_INT_P || self <= HASH_MODULUS_INT_N
unsafe_truncated_mod(HASH_MODULUS_INT_P).to_i64
else
self.to_i64
end
end

# Returns a string representation of self.
Expand Down
16 changes: 14 additions & 2 deletions src/big/big_rational.cr
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,20 @@ struct BigRational < Number
BigRational.new { |mpq| LibGMP.mpq_abs(mpq, self) }
end

def hash
to_f64.hash
private HASH_MODULUS_RAT_P = BigRational.new((1_u64 << HASH_BITS) - 1)
private HASH_MODULUS_RAT_N = -BigRational.new((1_u64 << HASH_BITS) - 1)

def hash_normalize
# more exact version of `remainder(HASH_MODULUS).to_f.hash_normalize`
rem = self
if self >= HASH_MODULUS_RAT_P || self <= HASH_MODULUS_RAT_N
num = numerator
denom = denominator
div = num.tdiv(denom)
floor = div.tdiv(HASH_MODULUS)
rem -= floor * HASH_MODULUS
end
rem.to_big_f.hash_normalize
end

# Returns the `Float64` representing this rational.
Expand Down
7 changes: 4 additions & 3 deletions src/bool.cr
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ struct Bool
self != other
end

# Returns a hash value for this boolean: 0 for `false`, 1 for `true`.
def hash
self ? 1 : 0
# Protocol method for generic hashing.
def hash(hasher)
hasher << (self ? 1 : 0)
hasher
end

# Returns `"true"` for `true` and `"false"` for `false`.
Expand Down
6 changes: 6 additions & 0 deletions src/char.cr
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,12 @@ struct Char
ord
end

# Protocol method for generic hashing.
def hash(hasher)
hasher.raw ord
hasher
end

# Returns a Char that is one codepoint bigger than this char's codepoint.
#
# ```
Expand Down
5 changes: 3 additions & 2 deletions src/class.cr
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ class Class
to_s(io)
end

def hash
crystal_type_id
def hash(hasher)
hasher.raw(crystal_type_id)
hasher
end

def ==(other : Class)
Expand Down
15 changes: 9 additions & 6 deletions src/compiler/crystal/syntax/ast.cr
Original file line number Diff line number Diff line change
Expand Up @@ -1175,8 +1175,9 @@ module Crystal
self
end

def hash
0
def hash(hasher)
hasher << 0
hasher
end
end

Expand Down Expand Up @@ -1545,8 +1546,9 @@ module Crystal
Self.new
end

def hash
0
def hash(hasher)
hasher << 0
hasher
end
end

Expand Down Expand Up @@ -2025,8 +2027,9 @@ module Crystal
Underscore.new
end

def hash
0
def hash(hasher)
hasher << 0
hasher
end
end

Expand Down
7 changes: 4 additions & 3 deletions src/enum.cr
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,10 @@ struct Enum
value == other.value
end

# Returns a hash value. This is the hash of the underlying value.
def hash
value.hash
# Protocol method for generic hashing.
def hash(hasher)
hasher.raw(value)
hasher
end

# Iterates each values in a Flags Enum.
Expand Down
1 change: 1 addition & 0 deletions src/event/signal_handler.cr
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
require "c/signal"
require "c/unistd"
require "signal"

# :nodoc:
# Singleton that runs Signal events (libevent2) in it's own Fiber.
Expand Down
Loading

0 comments on commit 7563847

Please sign in to comment.