Skip to content

Commit

Permalink
Merge pull request qutip#163 from ytdHuang/cuda-ext
Browse files Browse the repository at this point in the history
Change default `word_size=64` in the extension for `CUDA.jl`
  • Loading branch information
albertomercurio authored Jun 9, 2024
2 parents 52c8d6c + ef4cb06 commit 39b436d
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 31 deletions.
6 changes: 3 additions & 3 deletions ext/QuantumToolboxCUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,15 @@ CuSparseMatrixCSR{T}(A::QuantumObject{<:SparseMatrixCSC}) where {T} =
QuantumObject(CuSparseMatrixCSR{T}(A.data), A.type, A.dims)

@doc raw"""
cu(A::QuantumObject; word_size::Int=32)
cu(A::QuantumObject; word_size::Int=64)
Return a new [`QuantumObject`](@ref) where `A.data` is in the type of `CUDA` arrays for gpu calculations.
# Arguments
- `A::QuantumObject`: The [`QuantumObject`](@ref)
- `word_size::Int`: The word size of the element type of `A`, can be either `32` or `64`. Default to `32`.
- `word_size::Int`: The word size of the element type of `A`, can be either `32` or `64`. Default to `64`.
"""
cu(A::QuantumObject; word_size::Int = 32) =
cu(A::QuantumObject; word_size::Int = 64) =
((word_size == 64) || (word_size == 32)) ? cu(A, Val(word_size)) :
throw(DomainError(word_size, "The word size should be 32 or 64."))
cu(A::QuantumObject{T}, word_size::TW) where {T<:Union{Vector,Matrix},TW<:Union{Val{32},Val{64}}} =
Expand Down
71 changes: 43 additions & 28 deletions test/cuda_ext.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,34 +22,18 @@ CUDA.versioninfo()
@test_throws DomainError cu(ψdi; word_size = 16)

# type conversion of CUDA dense arrays
@test typeof(cu(ψdi; word_size = 64).data) == typeof(CuArray(ψdi).data) == CuArray{Int64,1,CUDA.Mem.DeviceBuffer}
@test typeof(cu(ψdi; word_size = 32).data) ==
typeof(CuArray{Int32}(ψdi).data) ==
CuArray{Int32,1,CUDA.Mem.DeviceBuffer}
@test typeof(cu(ψdf; word_size = 64).data) == typeof(CuArray(ψdf).data) == CuArray{Float64,1,CUDA.Mem.DeviceBuffer}
@test typeof(cu(ψdf; word_size = 32).data) ==
typeof(CuArray{Float32}(ψdf).data) ==
CuArray{Float32,1,CUDA.Mem.DeviceBuffer}
@test typeof(cu(ψdc; word_size = 64).data) ==
typeof(CuArray(ψdc).data) ==
CuArray{ComplexF64,1,CUDA.Mem.DeviceBuffer}
@test typeof(cu(ψdc; word_size = 32).data) ==
typeof(CuArray{ComplexF32}(ψdc).data) ==
CuArray{ComplexF32,1,CUDA.Mem.DeviceBuffer}
@test typeof(cu(Xdi; word_size = 64).data) == typeof(CuArray(Xdi).data) == CuArray{Int64,2,CUDA.Mem.DeviceBuffer}
@test typeof(cu(Xdi; word_size = 32).data) ==
typeof(CuArray{Int32}(Xdi).data) ==
CuArray{Int32,2,CUDA.Mem.DeviceBuffer}
@test typeof(cu(Xdf; word_size = 64).data) == typeof(CuArray(Xdf).data) == CuArray{Float64,2,CUDA.Mem.DeviceBuffer}
@test typeof(cu(Xdf; word_size = 32).data) ==
typeof(CuArray{Float32}(Xdf).data) ==
CuArray{Float32,2,CUDA.Mem.DeviceBuffer}
@test typeof(cu(Xdc; word_size = 64).data) ==
typeof(CuArray(Xdc).data) ==
CuArray{ComplexF64,2,CUDA.Mem.DeviceBuffer}
@test typeof(cu(Xdc; word_size = 32).data) ==
typeof(CuArray{ComplexF32}(Xdc).data) ==
CuArray{ComplexF32,2,CUDA.Mem.DeviceBuffer}
@test typeof(cu(ψdi; word_size = 64).data) == typeof(CuArray(ψdi).data) <: CuArray{Int64,1}
@test typeof(cu(ψdi; word_size = 32).data) == typeof(CuArray{Int32}(ψdi).data) <: CuArray{Int32,1}
@test typeof(cu(ψdf; word_size = 64).data) == typeof(CuArray(ψdf).data) <: CuArray{Float64,1}
@test typeof(cu(ψdf; word_size = 32).data) == typeof(CuArray{Float32}(ψdf).data) <: CuArray{Float32,1}
@test typeof(cu(ψdc; word_size = 64).data) == typeof(CuArray(ψdc).data) <: CuArray{ComplexF64,1}
@test typeof(cu(ψdc; word_size = 32).data) == typeof(CuArray{ComplexF32}(ψdc).data) <: CuArray{ComplexF32,1}
@test typeof(cu(Xdi; word_size = 64).data) == typeof(CuArray(Xdi).data) <: CuArray{Int64,2}
@test typeof(cu(Xdi; word_size = 32).data) == typeof(CuArray{Int32}(Xdi).data) <: CuArray{Int32,2}
@test typeof(cu(Xdf; word_size = 64).data) == typeof(CuArray(Xdf).data) <: CuArray{Float64,2}
@test typeof(cu(Xdf; word_size = 32).data) == typeof(CuArray{Float32}(Xdf).data) <: CuArray{Float32,2}
@test typeof(cu(Xdc; word_size = 64).data) == typeof(CuArray(Xdc).data) <: CuArray{ComplexF64,2}
@test typeof(cu(Xdc; word_size = 32).data) == typeof(CuArray{ComplexF32}(Xdc).data) <: CuArray{ComplexF32,2}

# type conversion of CUDA sparse arrays
@test typeof(cu(ψsi; word_size = 64).data) == typeof(CuSparseVector(ψsi).data) == CuSparseVector{Int64,Int32}
Expand Down Expand Up @@ -84,4 +68,35 @@ CUDA.versioninfo()
@test typeof(CuSparseMatrixCSR{Float32}(Xsf).data) == CuSparseMatrixCSR{Float32,Int32}
@test typeof(CuSparseMatrixCSR(Xsc).data) == CuSparseMatrixCSR{ComplexF64,Int32}
@test typeof(CuSparseMatrixCSR{ComplexF32}(Xsc).data) == CuSparseMatrixCSR{ComplexF32,Int32}

# brief example in README and documentation
N = 20
ω64 = 1.0 # Float64
ω32 = 1.0f0 # Float32
γ64 = 0.1 # Float64
γ32 = 0.1f0 # Float32
tlist = range(0, 10, 100)

## calculate by CPU
a_cpu = destroy(N)
ψ0_cpu = fock(N, 3)
H_cpu = ω64 * a_cpu' * a_cpu
sol_cpu = mesolve(H_cpu, ψ0_cpu, tlist, [sqrt(γ64) * a_cpu], e_ops = [a_cpu' * a_cpu], progress_bar = false)

## calculate by GPU (with 64-bit)
a_gpu64 = cu(destroy(N))
ψ0_gpu64 = cu(fock(N, 3))
H_gpu64 = ω64 * a_gpu64' * a_gpu64
sol_gpu64 =
mesolve(H_gpu64, ψ0_gpu64, tlist, [sqrt(γ64) * a_gpu64], e_ops = [a_gpu64' * a_gpu64], progress_bar = false)

## calculate by GPU (with 32-bit)
a_gpu32 = cu(destroy(N), word_size = 32)
ψ0_gpu32 = cu(fock(N, 3), word_size = 32)
H_gpu32 = ω32 * a_gpu32' * a_gpu32
sol_gpu32 =
mesolve(H_gpu32, ψ0_gpu32, tlist, [sqrt(γ32) * a_gpu32], e_ops = [a_gpu32' * a_gpu32], progress_bar = false)

@test all([isapprox(sol_cpu.expect[i], sol_gpu64.expect[i]) for i in 1:length(tlist)])
@test all([isapprox(sol_cpu.expect[i], sol_gpu32.expect[i]; atol = 1e-6) for i in 1:length(tlist)])
end

0 comments on commit 39b436d

Please sign in to comment.