Skip to content

Commit

Permalink
change computation of hash value.
Browse files Browse the repository at this point in the history
Prepare hash infrastructor to future change of hashing algrorithm
to protect against Hash DoS.
Class|Struct should define method `def hash(hasher)` and call
`hasher << @ivar` inside.

As an option, for speed, and for backward compatibility, `def hash`
still could be implemented. It will be used for Hash of matched type.
`Thread#hash` and `Signal#hash` is implemented as unseeded cause they are
 used before `StdHasher @@seed` is initialized.

Hash::Hasher is default hasher that uses `hash(hasher)` and it is used as
default seeded hasher.

Also, number normalization for hashing introduced, ie rule 'equality
forces hash equality' is forced (`a == b` => `a.hash == b.hash`).
Normalization idea is borrowed from Python implementation.
It fixes several issues with BigInt and BigFloat on 32bit platform,
but not all issues.

Fixes crystal-lang#4578
Fixes crystal-lang#3932
Prerequisite for crystal-lang#4557
Replaces crystal-lang#4581
Correlates with crystal-lang#4653
  • Loading branch information
funny-falcon committed Aug 24, 2017
1 parent d3537fc commit f5bf1ba
Show file tree
Hide file tree
Showing 45 changed files with 804 additions and 159 deletions.
84 changes: 81 additions & 3 deletions spec/std/big/big_float_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,66 @@ require "spec"
require "big_float"

describe "BigFloat" do
describe "new" do
bsi = "123456789012345678901"
bfsi = BigFloat.new(bsi)
bsf = "1234567890.12345678901"
bfsf = BigFloat.new(bsf)
it "new(String)" do
bfsi.to_s.should eq(bsi)
bfsf.to_s.should eq(bsf)
end

it "new(BigInt)" do
bi = BigInt.new(bsi)
bfi = BigFloat.new(bi)
bfi.should eq(bfsi)
bfi.to_s.should eq(bsi)
end

it "new(BigRational)" do
br = BigRational.new(1, 3)
bfr = BigFloat.new(br)
bf = BigFloat.new(1) / BigFloat.new(3)
bfr.should eq(bf)
end

it "new(BigFloat)" do
bffi = BigFloat.new(bfsi)
bffi.should eq(bfsi)
bfff = BigFloat.new(bfsf)
bfff.should eq(bfsf)
end

it "new(Int)" do
BigFloat.new(1_u8).to_s.should eq("1")
BigFloat.new(1_u16).to_s.should eq("1")
BigFloat.new(1_u32).to_s.should eq("1")
BigFloat.new(1_u64).to_s.should eq("1")
BigFloat.new(1_i8).to_s.should eq("1")
BigFloat.new(1_i16).to_s.should eq("1")
BigFloat.new(1_i32).to_s.should eq("1")
BigFloat.new(1_i64).to_s.should eq("1")
BigFloat.new(-1_i8).to_s.should eq("-1")
BigFloat.new(-1_i16).to_s.should eq("-1")
BigFloat.new(-1_i32).to_s.should eq("-1")
BigFloat.new(-1_i64).to_s.should eq("-1")

BigFloat.new(255_u8).to_s.should eq("255")
BigFloat.new(65535_u16).to_s.should eq("65535")
BigFloat.new(4294967295_u32).to_s.should eq("4294967295")
BigFloat.new(18446744073709551615_u64).to_s.should eq("18446744073709551615")
BigFloat.new(127_i8).to_s.should eq("127")
BigFloat.new(32767_i16).to_s.should eq("32767")
BigFloat.new(2147483647_i32).to_s.should eq("2147483647")
BigFloat.new(9223372036854775807_i64).to_s.should eq("9223372036854775807")
BigFloat.new(-128_i8).to_s.should eq("-128")
BigFloat.new(-32768_i16).to_s.should eq("-32768")
BigFloat.new(-2147483648_i32).to_s.should eq("-2147483648")
BigFloat.new(-9223372036854775808_i64).to_s.should eq("-9223372036854775808")
end
end

describe "-@" do
bf = "0.12345".to_big_f
it { (-bf).to_s.should eq("-0.12345") }
Expand Down Expand Up @@ -91,16 +151,34 @@ describe "BigFloat" do
it { "48600000".to_big_f.to_s.should eq("48600000") }
it { "12345678.87654321".to_big_f.to_s.should eq("12345678.87654321") }
it { "9.000000000000987".to_big_f.to_s.should eq("9.000000000000987") }
it { "12345678901234567".to_big_f.to_s.should eq("12345678901234567") }
it { "1234567890123456789".to_big_f.to_s.should eq("1234567890123456789") }
end

describe "#inspect" do
it { "2.3".to_big_f.inspect.should eq("2.3_big_f") }
end

it "#hash" do
b = 123.to_big_f
b.hash.should eq(b.to_f64.hash)
bf = 123.to_big_f
bf.hash.should eq(123.hash)
bf.hash.should eq(bf.to_f64.hash)

bi = "123456789012345678901".to_big_i
bf = bi.to_big_f
bf.should eq(bi)
bf.hash_normalize.should eq(bi.hash_normalize)
bf.hash.should eq(bi.hash)

f = 123.06125
bf = f.to_big_f
bf.hash.should eq(f.hash)

bf = 1.to_big_f
bf = bf * 0x80000000 * 0x80000000 * 0x80000000
f = 1.0_f64
f = f * 0x80000000 * 0x80000000 * 0x80000000
bf.hash_normalize.should eq(f.hash_normalize)
bf.hash.should eq(f.hash)
end

it "clones" do
Expand Down
22 changes: 20 additions & 2 deletions spec/std/big/big_int_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -291,12 +291,30 @@ describe "BigInt" do

it "#hash" do
hash = 5.to_big_i.hash
hash.should eq(5)
typeof(hash).should eq(UInt64)
hash.should eq(5.hash)
end

it "#hash_normalize" do
hn = 5.to_big_i.hash_normalize
hn.should eq(5.hash_normalize)
hn = 500000000000000_u64.to_big_i.hash_normalize
hn.should eq(500000000000000_u64.hash_normalize)

bi = 1.to_big_i
bi = bi << 93
f = 1.0_f64
f = f * 0x80000000 * 0x80000000 * 0x80000000
bi.hash_normalize.should eq(f.hash_normalize)
end

it "clones" do
x = 1.to_big_i
x.clone.should eq(x)
end

it "#to_big_f" do
s = "123456789012345678901"
x = BigInt.new(s)
x.to_big_f.should eq BigFloat.new(s)
end
end
8 changes: 7 additions & 1 deletion spec/std/big/big_rational_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ describe BigRational do
it "#hash" do
b = br(10, 3)
hash = b.hash
hash.should eq(b.to_f64.hash)
hash.should eq(b.to_big_f.hash)
end

it "is a number" do
Expand All @@ -160,4 +160,10 @@ describe BigRational do
x = br(10, 3)
x.clone.should eq(x)
end

it "#to_big_f" do
x = br(10, 3)
f = BigFloat.new(10) / BigFloat.new(3)
x.to_big_f.should eq(f)
end
end
5 changes: 3 additions & 2 deletions spec/std/bool_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ describe "Bool" do
end

describe "hash" do
it { true.hash.should eq(1) }
it { false.hash.should eq(0) }
it { true.hash.should eq(true.hash) }
it { false.hash.should eq(false.hash) }
it { true.hash.should_not eq(false.hash) }
end

describe "to_s" do
Expand Down
2 changes: 1 addition & 1 deletion spec/std/enum_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ describe Enum do
end

it "has hash" do
SpecEnum::Two.hash.should eq(1.hash)
SpecEnum::Two.hash.should_not eq(SpecEnum::One.hash)
end

it "parses" do
Expand Down
4 changes: 2 additions & 2 deletions spec/std/hash_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ describe "Hash" do
end
end

it "works with mixed types" do
{1 => :a, "a" => 1, 1.0 => "a", :a => 1.0}.values_at(1, "a", 1.0, :a).should eq({:a, 1, "a", 1.0})
it "works with mixed types and normalized numbers" do
{1 => :a, "a" => 1, 2.0 => "a", :a => 1.0}.values_at(1, 2, "a", 1.0, 2.0, :a).should eq({:a, "a", 1, :a, "a", 1.0})
end
end

Expand Down
7 changes: 5 additions & 2 deletions spec/std/struct_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,14 @@ describe "Struct" do

it "does hash" do
s = StructSpec::TestClass.new(1, "hello")
s.hash.should eq(31 + "hello".hash)
hasher = Hash::Hasher.new
hasher << 1
hasher << "hello"
s.hash.should eq(hasher.digest)
end

it "does hash for struct wrapper (#1940)" do
StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(0)
StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(BigInt.new(0).hash)
end

it "does dup" do
Expand Down
2 changes: 1 addition & 1 deletion spec/std/time/span_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ describe Time::Span do
end

it "test hash code" do
Time::Span.new(77).hash.should eq(77)
Time::Span.new(77).hash.should eq(77.hash)
end

it "test subtract" do
Expand Down
75 changes: 69 additions & 6 deletions src/big/big_float.cr
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,39 @@ struct BigFloat < Float
end

def initialize(num : Number)
LibGMP.mpf_init_set_d(out @mpf, num.to_f64)
# XXX: this case is workaround of Crystal's unrealiable method overloading
# remove it when separate BigFloat.new(BigInt) will pass the spec
case num
when BigInt
LibGMP.mpf_init(out @mpf)
LibGMP.mpf_set_z(self, num)
when BigRational
LibGMP.mpf_init(out @mpf)
LibGMP.mpf_set_q(self, num)
when BigFloat
LibGMP.mpf_init(out @mpf)
LibGMP.mpf_set(self, num)
when Int8, Int16, Int32
LibGMP.mpf_init_set_si(out @mpf, num)
when UInt8, UInt16, UInt32
LibGMP.mpf_init_set_ui(out @mpf, num)
when Int64
if LibGMP::Long == Int64
LibGMP.mpf_init_set_si(out @mpf, num)
else
LibGMP.mpf_init(out @mpf)
LibGMP.mpf_set_z(self, num.to_big_i)
end
when UInt64
if LibGMP::ULong == UInt64
LibGMP.mpf_init_set_ui(out @mpf, num)
else
LibGMP.mpf_init(out @mpf)
LibGMP.mpf_set_z(self, num.to_big_i)
end
else
LibGMP.mpf_init_set_d(out @mpf, num.to_f64)
end
end

def initialize(num : Float, precision : Int)
Expand All @@ -35,8 +67,19 @@ struct BigFloat < Float
new(mpf)
end

def hash
to_f64.hash
def hash_normalize
# more exact version of `remainder(hash_modulus).to_f.hash_normalize
LibGMP.mpf_get_d_2exp(out exp, self)
frac = BigFloat.new { |mpf|
if exp >= 0
LibGMP.mpf_div_2exp(mpf, self, exp)
else
LibGMP.mpf_mul_2exp(mpf, self, -exp)
end
}
float_normalize_wrap do
float_normalize_reference(frac, exp)
end
end

def self.default_precision
Expand All @@ -51,16 +94,36 @@ struct BigFloat < Float
LibGMP.mpf_cmp(self, other)
end

def <=>(other : Float)
def <=>(other : BigInt)
LibGMP.mpf_cmp_z(self, other)
end

def <=>(other : Float32 | Float64)
LibGMP.mpf_cmp_d(self, other.to_f64)
end

def <=>(other : Int::Signed)
LibGMP.mpf_cmp_si(self, other.to_i64)
if LibGMP::Long == Int64
LibGMP.mpf_cmp_si(self, other.to_i64)
elsif other.is_a?(Int8 | Int16 | Int32)
LibGMP.mpf_cmp_si(self, LibGMP::Long.new(other))
else
LibGMP.mpf_cmp(self, other.to_big_f)
end
end

def <=>(other : Int::Unsigned)
LibGMP.mpf_cmp_ui(self, other.to_u64)
if LibGMP::ULong == UInt64
LibGMP.mpf_cmp_ui(self, other.to_u64)
elsif other.is_a?(UInt8 | UInt16 | UInt32)
LibGMP.mpf_cmp_ui(self, LibGMP::ULong.new(other))
else
LibGMP.mpf_cmp(self, other.to_big_f)
end
end

def <=>(other : Number)
LibGMP.mpf_cmp(self, other.to_big_f)
end

def -
Expand Down
39 changes: 29 additions & 10 deletions src/big/big_int.cr
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,19 @@ struct BigInt < Int
io << "_big_i"
end

def hash
to_u64
private HASH_MODULUS_INT_P = BigInt.new((1_u64 << HASH_BITS) - 1)
private HASH_MODULUS_INT_N = -BigInt.new((1_u64 << HASH_BITS) - 1)

def hash_normalize
# it should calculate `remainder(hash_modulus)`
if LibGMP::ULong == UInt64
v = int_to_hashnorm(LibGMP.tdiv_ui(self, hash_modulus))
self < 0 ? -v : v
elsif self >= HASH_MODULUS_INT_P || self <= HASH_MODULUS_INT_N
unsafe_truncated_mod(HASH_MODULUS_INT_P).to_i64
else
self.to_i64
end
end

# Returns a string representation of self.
Expand Down Expand Up @@ -311,39 +322,47 @@ struct BigInt < Int
end

def to_i8
to_i64.to_i8
to_i32.to_i8
end

def to_i16
to_i64.to_i16
to_i32.to_i16
end

def to_i32
to_i64.to_i32
LibGMP.get_si(self).to_i32
end

def to_i64
LibGMP.get_si(self)
if LibGMP::Long == Int64 || (self <= Int32::MAX && self >= Int32::MIN)
LibGMP.get_si(self).to_i64
else
to_s.to_i64
end
end

def to_u
to_u32
end

def to_u8
to_u64.to_u8
to_u32.to_u8
end

def to_u16
to_u64.to_u16
to_u32.to_u16
end

def to_u32
to_u64.to_u32
LibGMP.get_ui(self).to_u32
end

def to_u64
LibGMP.get_ui(self).to_u64
if LibGMP::ULong == UInt64 || (self <= UInt32::MAX && self >= UInt32::MIN)
LibGMP.get_ui(self).to_u64
else
to_s.to_u64
end
end

def to_f
Expand Down
Loading

0 comments on commit f5bf1ba

Please sign in to comment.