Skip to content
This repository has been archived by the owner on May 27, 2021. It is now read-only.

Commit

Permalink
Fix the workaround for #4.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Mar 30, 2017
1 parent 35d9f75 commit 42bb5ac
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 14 deletions.
16 changes: 10 additions & 6 deletions examples/vadd.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
using CUDAdrv, CUDAnative
using Base.Test

function kernel_vadd(a, b, c)
i = (blockIdx().x-1) * blockDim().x + threadIdx().x
c[i] = a[i] + b[i]
const BLOCK_SIZE = 16

function kernel_vadd(c)
shadow = @cuStaticSharedMem(Float32, (BLOCK_SIZE,))
tx = threadIdx().x
c[tx] = shadow[tx]

return nothing
end
Expand All @@ -16,12 +19,13 @@ a = round.(rand(Float32, dims) * 100)
b = round.(rand(Float32, dims) * 100)

d_a = CuArray(a)
matrix = CuArray{Float32}((BLOCK_SIZE, BLOCK_SIZE))
d_b = CuArray(b)
d_c = similar(d_a)

len = prod(dims)
@cuda (1,len) kernel_vadd(d_a, d_b, d_c)
c = Array(d_c)
@test a+b c
CUDAnative.@code_llvm @cuda (1,len) kernel_vadd(d_c)
# c = Array(d_c)
# @test a+b ≈ c

destroy(ctx)
17 changes: 9 additions & 8 deletions src/device/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,12 @@ Base.size(g::CuDeviceArray) = g.shape
Base.length(g::CuDeviceArray) = prod(g.shape)

@inline function Base.getindex{T}(A::CuDeviceArray{T}, index::Int)
# FIXME: disabled due to PTX assembler issue (see #4)
# @boundscheck checkbounds(A, index)
@boundscheck checkbounds(A, index)
Base.pointerref(Base.unsafe_convert(Ptr{T}, A), index, 8)::T
end

@inline function Base.setindex!{T}(A::CuDeviceArray{T}, x, index::Int)
# FIXME: disabled due to PTX assembler issue (see #4)
# @boundscheck checkbounds(A, index)
@boundscheck checkbounds(A, index)
Base.pointerset(Base.unsafe_convert(Ptr{T}, A), convert(T, x)::T, index, 8)
end

Expand All @@ -50,15 +48,18 @@ Base.show{T,N}(io::IO, a::CuDeviceArray{T,N}) =
print(io, "$(join(a.shape, '×')) device array at $(pointer(a))")


## compatibility fixes
## quirks

# TODO: remove this hack as soon as immutables with heap references (such as BoundsError)
# can be stack-allocated
# bounds checking is currently broken due to a PTX assembler issue (see #4)
Base.checkbounds(::CuDeviceArray, I...) = nothing

# replace boundserror-with-arguments to a non-allocating, argumentless version
# TODO: can this be fixed by stack-allocating immutables with heap references?
struct CuBoundsError <: Exception end
@inline Base.throw_boundserror{T,N}(A::CuDeviceArray{T,N}, I) =
(Base.@_noinline_meta; throw(CuBoundsError()))

# TODO: same for SubArray, although it might be too complex to ever be non-allocating
# idem
function Base.unsafe_view{T}(A::CuDeviceArray{T,1}, I::Vararg{Base.ViewIndex,1})
Base.@_inline_meta
ptr = Base.unsafe_convert(Ptr{T}, A) + (I[1].start-1)*sizeof(T)
Expand Down
22 changes: 22 additions & 0 deletions test/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,28 @@ end



############################################################################################

@testset "bounds checking" begin
@eval function array_oob_1d(array)
return array[1]
end

# NOTE: these tests verify that bounds checking is _disabled_ (see #4)

ir = sprint(io->CUDAnative.code_llvm(io, array_oob_1d, (CuDeviceArray{Int,1},)))
@test !contains(ir, "trap")

@eval function array_oob_2d(array)
return array[1, 1]
end

ir = sprint(io->CUDAnative.code_llvm(io, array_oob_2d, (CuDeviceArray{Int,2},)))
@test !contains(ir, "trap")
end



############################################################################################

@testset "views" begin
Expand Down

0 comments on commit 42bb5ac

Please sign in to comment.