Skip to content

Commit

Permalink
Round, show, tests
Browse files Browse the repository at this point in the history
  • Loading branch information
milankl committed Feb 13, 2020
1 parent 482aae2 commit f3e4cb3
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 15 deletions.
3 changes: 2 additions & 1 deletion src/Float8s.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ module Float8s
(+), (-), (*), (/), (\), (^),
sin,cos,tan,asin,acos,atan,sinh,cosh,tanh,asinh,acosh,
atanh,exp,exp2,exp10,log,log2,log10,sqrt,log1p,
atan,hypot
atan,hypot,
round

export Float8, Float8_4, NaN8, Inf8, NaN8_4, Inf8_4

Expand Down
74 changes: 63 additions & 11 deletions src/float8.jl
Original file line number Diff line number Diff line change
Expand Up @@ -149,41 +149,66 @@ end
const basetable8, shifttable8 = create_base_shifttable(Float8)
const basetable8_4, shifttable8_4 = create_base_shifttable(Float8_4)

basetable(::Type{Float8},i::Int) = @inbounds basetable8[i]
basetable(::Type{Float8_4},i::Int) = @inbounds basetable8_4[i]
function Float8(val::Float32)

shifttable(::Type{Float8},i::Int) = @inbounds shifttable8[i]
shifttable(::Type{Float8_4},i::Int) = @inbounds shifttable8_4[i]
f = reinterpret(UInt32, val)

if isnan(val) #TODO retain the significant bits for NaN?
return nan8(Float8)
end

# exponent as Int64
i = f >> n_significant_bits(Float32) + 1
@inbounds sh = shifttable8[i]
f &= significand_mask(Float32)

function (::Type{T})(val::Float32) where {T<:AbstractFloat8}
# If `val` is subnormal, the tables are set up to force the
# result to 0, so the significand has an implicit `1` in the
# cases we care about.

f |= significand_mask(Float32) + 0x1
@inbounds h = (basetable8[i] + (f >> sh) & significand_mask(Float8)) % UInt8

# rounding
nextbit = (f >> (sh-1)) & 1
if nextbit != 0 && (h & exponent_mask(Float8)) != exponent_mask(Float8)
# Round halfway to even or check lower bits
if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0
h += one(UInt8)
end
end
return reinterpret(Float8, h)
end

function Float8_4(val::Float32)

f = reinterpret(UInt32, val)

if isnan(val) #TODO retain the significant bits for NaN?
return nan8(T)
return nan8(Float8_4)
end

# exponent as Int64
i = f >> n_significant_bits(Float32) + 1
sh = shifttable(T,i)
@inbounds sh = shifttable8_4[i]
f &= significand_mask(Float32)

# If `val` is subnormal, the tables are set up to force the
# result to 0, so the significand has an implicit `1` in the
# cases we care about.

f |= significand_mask(Float32) + 0x1
h = (basetable(T,i) + (f >> sh) & significand_mask(T)) % UInt8
@inbounds h = (basetable8_4[i] + (f >> sh) & significand_mask(Float8_4)) % UInt8

# rounding
nextbit = (f >> (sh-1)) & 1
if nextbit != 0 && (h & exponent_mask(T)) != exponent_mask(T)
if nextbit != 0 && (h & exponent_mask(Float8_4)) != exponent_mask(Float8_4)
# Round halfway to even or check lower bits
if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0
h += one(UInt8)
end
end
return reinterpret(T, h)
return reinterpret(Float8_4, h)
end

first_sig_bit_mask(::Type{Float8}) = 0x00000008
Expand Down Expand Up @@ -265,7 +290,7 @@ function ==(x::AbstractFloat8, y::AbstractFloat8)
if iszero(x) && iszero(y) # For Float16: (ix|iy)&0x7fff == 0x0000
return true
end
return x == y
return reinterpret(UInt8,x) == reinterpret(UInt8,y)
end

for op in (:<, :<=, :isless)
Expand All @@ -274,6 +299,7 @@ end

for op in (:+, :-, :*, :/, :\, :^)
@eval ($op)(a::Float8, b::Float8) = Float8(($op)(Float32(a), Float32(b)))
@eval ($op)(a::Float8_4, b::Float8_4) = Float8_4(($op)(Float32(a), Float32(b)))
end

for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh,
Expand All @@ -290,3 +316,29 @@ for func in (:atan,:hypot)
$func(a::Float8_4,b::Float8_4) = Float8_4($func(Float32(a),Float32(b)))
end
end

function show(io::IO,x::Float8)
if isnan(x)
print(io,"NaN8")
elseif isinf(x)
print(io,"Inf8")
else
io2 = IOBuffer()
print(io2,Float32(x))
f = String(take!(io2))
print(io,"Float8("*f*")")
end
end

function show(io::IO,x::Float8_4)
if isnan(x)
print(io,"NaN8_4")
elseif isinf(x)
print(io,"Inf8_4")
else
io2 = IOBuffer()
print(io2,Float32(x))
f = String(take!(io2))
print(io,"Float8_4("*f*")")
end
end
152 changes: 149 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
using Float8s
using .Float8s
using Test

@testset "Conversion Float8 <-> Float32" begin

for i in 0x00:0xff
@testset for i in 0x00:0xff
if ~isnan(Float8(i))
@test i == reinterpret(UInt8,Float8(Float32(Float8(i))))
end
Expand All @@ -12,9 +12,155 @@ end

@testset "Conversion Float8_4 <-> Float32" begin

for i in 0x00:0xff
@testset for i in 0x00:0xff
if ~isnan(Float8_4(i))
@test i == reinterpret(UInt8,Float8_4(Float32(Float8_4(i))))
end
end
end

@testset "Negation" begin

@testset for i in 0x00:0xff
f8 = Float8(i)
f8_4 = Float8_4(i)

if ~isnan(f8)
@test f8 == -(-f8)
end

if ~isnan(f8_4)
@test f8_4 == -(-f8_4)
end
end
end

@testset "Rounding" begin

@testset for i in 0x00:0xff
f8 = Float8(i)
f8_4 = Float8_4(i)

if ~isnan(f8)
@test f8 >= floor(f8)
@test f8 <= ceil(f8)
end

if ~isnan(f8_4)
@test f8_4 >= floor(f8_4)
@test f8_4 <= ceil(f8_4)
end
end
end

@testset "Triangle inequality Float8" begin

@testset for i in 0x00:0xff
for j in 0x00:0xff

f1 = Float8(i)
f2 = Float8(j)

if ~isnan(f1) && ~isnan(f2) && isfinite(f1) && isfinite(f2)
@test abs(f1) + abs(f2) >= abs(f1+f2)
@test abs(f1) - abs(f2) <= abs(f1-f2)
@test abs(f1) * abs(f2) >= f1*f2
end
end
end
end

@testset "Triangle inequality Float8_4" begin

@testset for i in 0x00:0xff
for j in 0x00:0xff

f1 = Float8_4(i)
f2 = Float8_4(j)

if ~isnan(f1) && ~isnan(f2) && isfinite(f1) && isfinite(f2)
@test abs(f1) + abs(f2) >= abs(f1+f2)
@test abs(f1) - abs(f2) <= abs(f1-f2)
@test abs(f1) * abs(f2) >= f1*f2
end
end
end
end

f = Float8(2.)
g = Float8(1.)

@testset "Comparison Float8" begin
@test f >= g
@test f > g
@test g < f
@test g <= g
@test all([g g] .< [f f])
@test all([g g] .<= [f f])
@test all([f f] .> [g g])
@test all([f f] .>= [g g])
@test isless(g, f)
@test !isless(f, g)

@test Float8(2.5) == Float8(2.5)
@test Float8(2.5) != Float8(2.6)
end

f = Float8_4(2.)
g = Float8_4(1.)

@testset "Comparison Float8_4" begin
@test f >= g
@test f > g
@test g < f
@test g <= g
@test all([g g] .< [f f])
@test all([g g] .<= [f f])
@test all([f f] .> [g g])
@test all([f f] .>= [g g])
@test isless(g, f)
@test !isless(f, g)

@test Float8_4(2.5) == Float8_4(2.5)
@test Float8_4(2.5) != Float8_4(2.7)
end

@testset "NaN8 and Inf8" begin
@test isnan(NaN8)
@test isnan(-NaN8)
@test !isnan(Inf8)
@test !isnan(-Inf8)
@test !isnan(Float16(2.6))
@test NaN8 != NaN8
@test repr(NaN8) == "NaN8"

@test isinf(Inf8)
@test isinf(-Inf8)
@test !isinf(NaN8)
@test !isinf(-NaN8)
@test !isinf(Float16(2.6))
@test Inf8 == Inf8
@test Inf8 != -Inf8
@test -Inf8 < Inf8
@test repr(Inf8) == "Inf8"
end

@testset "NaN8_4 and Inf8_4" begin
@test isnan(NaN8_4)
@test isnan(-NaN8_4)
@test !isnan(Inf8_4)
@test !isnan(-Inf8_4)
@test !isnan(Float8(2.6))
@test NaN8_4 != NaN8_4
@test repr(NaN8_4) == "NaN8_4"

@test isinf(Inf8_4)
@test isinf(-Inf8_4)
@test !isinf(NaN8_4)
@test !isinf(-NaN8_4)
@test !isinf(Float8(2.6))
@test Inf8_4 == Inf8_4
@test Inf8_4 != -Inf8_4
@test -Inf8_4 < Inf8_4
@test repr(Inf8_4) == "Inf8_4"
end

0 comments on commit f3e4cb3

Please sign in to comment.