From f3e4cb3497d27d036e20591c12c6e550eb8f49d2 Mon Sep 17 00:00:00 2001 From: milankl Date: Thu, 13 Feb 2020 13:28:08 +0000 Subject: [PATCH] Round, show, tests --- src/Float8s.jl | 3 +- src/float8.jl | 74 +++++++++++++++++++---- test/runtests.jl | 152 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 214 insertions(+), 15 deletions(-) diff --git a/src/Float8s.jl b/src/Float8s.jl index df1a968..a6ad7bd 100644 --- a/src/Float8s.jl +++ b/src/Float8s.jl @@ -8,7 +8,8 @@ module Float8s (+), (-), (*), (/), (\), (^), sin,cos,tan,asin,acos,atan,sinh,cosh,tanh,asinh,acosh, atanh,exp,exp2,exp10,log,log2,log10,sqrt,log1p, - atan,hypot + atan,hypot, + round export Float8, Float8_4, NaN8, Inf8, NaN8_4, Inf8_4 diff --git a/src/float8.jl b/src/float8.jl index 2d045a0..c6c18ca 100644 --- a/src/float8.jl +++ b/src/float8.jl @@ -149,23 +149,48 @@ end const basetable8, shifttable8 = create_base_shifttable(Float8) const basetable8_4, shifttable8_4 = create_base_shifttable(Float8_4) -basetable(::Type{Float8},i::Int) = @inbounds basetable8[i] -basetable(::Type{Float8_4},i::Int) = @inbounds basetable8_4[i] +function Float8(val::Float32) -shifttable(::Type{Float8},i::Int) = @inbounds shifttable8[i] -shifttable(::Type{Float8_4},i::Int) = @inbounds shifttable8_4[i] + f = reinterpret(UInt32, val) + + if isnan(val) #TODO retain the significant bits for NaN? + return nan8(Float8) + end + + # exponent as Int64 + i = f >> n_significant_bits(Float32) + 1 + @inbounds sh = shifttable8[i] + f &= significand_mask(Float32) -function (::Type{T})(val::Float32) where {T<:AbstractFloat8} + # If `val` is subnormal, the tables are set up to force the + # result to 0, so the significand has an implicit `1` in the + # cases we care about. + + f |= significand_mask(Float32) + 0x1 + @inbounds h = (basetable8[i] + (f >> sh) & significand_mask(Float8)) % UInt8 + + # rounding + nextbit = (f >> (sh-1)) & 1 + if nextbit != 0 && (h & exponent_mask(Float8)) != exponent_mask(Float8) + # Round halfway to even or check lower bits + if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0 + h += one(UInt8) + end + end + return reinterpret(Float8, h) +end + +function Float8_4(val::Float32) f = reinterpret(UInt32, val) if isnan(val) #TODO retain the significant bits for NaN? - return nan8(T) + return nan8(Float8_4) end # exponent as Int64 i = f >> n_significant_bits(Float32) + 1 - sh = shifttable(T,i) + @inbounds sh = shifttable8_4[i] f &= significand_mask(Float32) # If `val` is subnormal, the tables are set up to force the @@ -173,17 +198,17 @@ function (::Type{T})(val::Float32) where {T<:AbstractFloat8} # cases we care about. f |= significand_mask(Float32) + 0x1 - h = (basetable(T,i) + (f >> sh) & significand_mask(T)) % UInt8 + @inbounds h = (basetable8_4[i] + (f >> sh) & significand_mask(Float8_4)) % UInt8 # rounding nextbit = (f >> (sh-1)) & 1 - if nextbit != 0 && (h & exponent_mask(T)) != exponent_mask(T) + if nextbit != 0 && (h & exponent_mask(Float8_4)) != exponent_mask(Float8_4) # Round halfway to even or check lower bits if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0 h += one(UInt8) end end - return reinterpret(T, h) + return reinterpret(Float8_4, h) end first_sig_bit_mask(::Type{Float8}) = 0x00000008 @@ -265,7 +290,7 @@ function ==(x::AbstractFloat8, y::AbstractFloat8) if iszero(x) && iszero(y) # For Float16: (ix|iy)&0x7fff == 0x0000 return true end - return x == y + return reinterpret(UInt8,x) == reinterpret(UInt8,y) end for op in (:<, :<=, :isless) @@ -274,6 +299,7 @@ end for op in (:+, :-, :*, :/, :\, :^) @eval ($op)(a::Float8, b::Float8) = Float8(($op)(Float32(a), Float32(b))) + @eval ($op)(a::Float8_4, b::Float8_4) = Float8_4(($op)(Float32(a), Float32(b))) end for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh, @@ -290,3 +316,29 @@ for func in (:atan,:hypot) $func(a::Float8_4,b::Float8_4) = Float8_4($func(Float32(a),Float32(b))) end end + +function show(io::IO,x::Float8) + if isnan(x) + print(io,"NaN8") + elseif isinf(x) + print(io,"Inf8") + else + io2 = IOBuffer() + print(io2,Float32(x)) + f = String(take!(io2)) + print(io,"Float8("*f*")") + end +end + +function show(io::IO,x::Float8_4) + if isnan(x) + print(io,"NaN8_4") + elseif isinf(x) + print(io,"Inf8_4") + else + io2 = IOBuffer() + print(io2,Float32(x)) + f = String(take!(io2)) + print(io,"Float8_4("*f*")") + end +end diff --git a/test/runtests.jl b/test/runtests.jl index ba3776d..9861d87 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,9 +1,9 @@ -using Float8s +using .Float8s using Test @testset "Conversion Float8 <-> Float32" begin - for i in 0x00:0xff + @testset for i in 0x00:0xff if ~isnan(Float8(i)) @test i == reinterpret(UInt8,Float8(Float32(Float8(i)))) end @@ -12,9 +12,155 @@ end @testset "Conversion Float8_4 <-> Float32" begin - for i in 0x00:0xff + @testset for i in 0x00:0xff if ~isnan(Float8_4(i)) @test i == reinterpret(UInt8,Float8_4(Float32(Float8_4(i)))) end end end + +@testset "Negation" begin + + @testset for i in 0x00:0xff + f8 = Float8(i) + f8_4 = Float8_4(i) + + if ~isnan(f8) + @test f8 == -(-f8) + end + + if ~isnan(f8_4) + @test f8_4 == -(-f8_4) + end + end +end + +@testset "Rounding" begin + + @testset for i in 0x00:0xff + f8 = Float8(i) + f8_4 = Float8_4(i) + + if ~isnan(f8) + @test f8 >= floor(f8) + @test f8 <= ceil(f8) + end + + if ~isnan(f8_4) + @test f8_4 >= floor(f8_4) + @test f8_4 <= ceil(f8_4) + end + end +end + +@testset "Triangle inequality Float8" begin + + @testset for i in 0x00:0xff + for j in 0x00:0xff + + f1 = Float8(i) + f2 = Float8(j) + + if ~isnan(f1) && ~isnan(f2) && isfinite(f1) && isfinite(f2) + @test abs(f1) + abs(f2) >= abs(f1+f2) + @test abs(f1) - abs(f2) <= abs(f1-f2) + @test abs(f1) * abs(f2) >= f1*f2 + end + end + end +end + +@testset "Triangle inequality Float8_4" begin + + @testset for i in 0x00:0xff + for j in 0x00:0xff + + f1 = Float8_4(i) + f2 = Float8_4(j) + + if ~isnan(f1) && ~isnan(f2) && isfinite(f1) && isfinite(f2) + @test abs(f1) + abs(f2) >= abs(f1+f2) + @test abs(f1) - abs(f2) <= abs(f1-f2) + @test abs(f1) * abs(f2) >= f1*f2 + end + end + end +end + +f = Float8(2.) +g = Float8(1.) + +@testset "Comparison Float8" begin + @test f >= g + @test f > g + @test g < f + @test g <= g + @test all([g g] .< [f f]) + @test all([g g] .<= [f f]) + @test all([f f] .> [g g]) + @test all([f f] .>= [g g]) + @test isless(g, f) + @test !isless(f, g) + + @test Float8(2.5) == Float8(2.5) + @test Float8(2.5) != Float8(2.6) +end + +f = Float8_4(2.) +g = Float8_4(1.) + +@testset "Comparison Float8_4" begin + @test f >= g + @test f > g + @test g < f + @test g <= g + @test all([g g] .< [f f]) + @test all([g g] .<= [f f]) + @test all([f f] .> [g g]) + @test all([f f] .>= [g g]) + @test isless(g, f) + @test !isless(f, g) + + @test Float8_4(2.5) == Float8_4(2.5) + @test Float8_4(2.5) != Float8_4(2.7) +end + +@testset "NaN8 and Inf8" begin + @test isnan(NaN8) + @test isnan(-NaN8) + @test !isnan(Inf8) + @test !isnan(-Inf8) + @test !isnan(Float16(2.6)) + @test NaN8 != NaN8 + @test repr(NaN8) == "NaN8" + + @test isinf(Inf8) + @test isinf(-Inf8) + @test !isinf(NaN8) + @test !isinf(-NaN8) + @test !isinf(Float16(2.6)) + @test Inf8 == Inf8 + @test Inf8 != -Inf8 + @test -Inf8 < Inf8 + @test repr(Inf8) == "Inf8" +end + +@testset "NaN8_4 and Inf8_4" begin + @test isnan(NaN8_4) + @test isnan(-NaN8_4) + @test !isnan(Inf8_4) + @test !isnan(-Inf8_4) + @test !isnan(Float8(2.6)) + @test NaN8_4 != NaN8_4 + @test repr(NaN8_4) == "NaN8_4" + + @test isinf(Inf8_4) + @test isinf(-Inf8_4) + @test !isinf(NaN8_4) + @test !isinf(-NaN8_4) + @test !isinf(Float8(2.6)) + @test Inf8_4 == Inf8_4 + @test Inf8_4 != -Inf8_4 + @test -Inf8_4 < Inf8_4 + @test repr(Inf8_4) == "Inf8_4" +end