diff --git a/NDTensors/Project.toml b/NDTensors/Project.toml index 77f81d1deb..8a27aead81 100644 --- a/NDTensors/Project.toml +++ b/NDTensors/Project.toml @@ -37,12 +37,14 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Metal = "dde4c033-4e86-420c-a63e-0dd931031962" Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" TBLIS = "48530278-0828-4a49-9772-0f3830dfa1e9" +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" [extensions] NDTensorsCUDAExt = "CUDA" NDTensorsMetalExt = "Metal" NDTensorsOctavianExt = "Octavian" NDTensorsTBLISExt = "TBLIS" +NDTensorsAMDGPUExt = "AMDGPU" [compat] Accessors = "0.1.33" diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/NDTensorsAMDGPUExt.jl b/NDTensors/ext/NDTensorsAMDGPUExt/NDTensorsAMDGPUExt.jl new file mode 100644 index 0000000000..76fa1b065c --- /dev/null +++ b/NDTensors/ext/NDTensorsAMDGPUExt/NDTensorsAMDGPUExt.jl @@ -0,0 +1,11 @@ +module NDTensorsAMDGPUExt + +include("copyto.jl") +include("set_types.jl") +include("adapt.jl") +include("indexing.jl") +include("linearalgebra.jl") +include("mul.jl") +include("permutedims.jl") + +end diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/adapt.jl b/NDTensors/ext/NDTensorsAMDGPUExt/adapt.jl new file mode 100644 index 0000000000..9e4a25c7a7 --- /dev/null +++ b/NDTensors/ext/NDTensorsAMDGPUExt/adapt.jl @@ -0,0 +1,31 @@ +using NDTensors: NDTensors, EmptyStorage, adapt_storagetype, emptytype +using NDTensors.AMDGPUExtensions: AMDGPUExtensions, ROCArrayAdaptor +using NDTensors.GPUArraysCoreExtensions: storagemode +using NDTensors.TypeParameterAccessors: + default_type_parameter, + set_type_parameter, + set_type_parameters, + type_parameter, + type_parameters +using Adapt: Adapt, adapt +using AMDGPU: AMDGPU, ROCArray, ROCVector +using Functors: fmap + +function AMDGPUExtensions.roc(xs; storagemode=default_type_parameter(ROCArray, storagemode)) + return fmap(x -> adapt(ROCArrayAdaptor{storagemode}(), x), xs) +end + +function Adapt.adapt_storage(adaptor::ROCArrayAdaptor, xs::AbstractArray) + new_parameters = (type_parameters(xs, (eltype, ndims))..., storagemode(adaptor)) + roctype = set_type_parameters(ROCArray, (eltype, ndims, storagemode), new_parameters) + return isbits(xs) ? xs : adapt(roctype, xs) +end + +function NDTensors.adapt_storagetype( + adaptor::ROCArrayAdaptor, xs::Type{EmptyStorage{ElT,StoreT}} +) where {ElT,StoreT} + roctype = set_type_parameters( + ROCVector, (eltype, storagemode), (ElT, storagemode(adaptor)) + ) + return emptytype(adapt_storagetype(roctype, StoreT)) +end diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/copyto.jl b/NDTensors/ext/NDTensorsAMDGPUExt/copyto.jl new file mode 100644 index 0000000000..cba61603a2 --- /dev/null +++ b/NDTensors/ext/NDTensorsAMDGPUExt/copyto.jl @@ -0,0 +1,35 @@ +using NDTensors.Expose: Exposed, expose, parent, unexpose +using LinearAlgebra: LinearAlgebra, Adjoint +using AMDGPU: ROCArray + +# Same definition as `MtlArray`. +function Base.copy(src::Exposed{<:ROCArray,<:Base.ReshapedArray}) + return reshape(copy(parent(src)), size(unexpose(src))) +end + +function Base.copy( + src::Exposed{ + <:ROCArray,<:SubArray{<:Any,<:Any,<:Base.ReshapedArray{<:Any,<:Any,<:Adjoint}} + }, +) + return copy(@view copy(expose(parent(src)))[parentindices(unexpose(src))...]) +end + +function Base.copyto!(dest::Exposed{<:ROCArray}, src::Exposed{<:ROCArray,<:SubArray}) + copyto!(dest, expose(copy(src))) + return unexpose(dest) +end + +function Base.copyto!( + dest::Exposed{<:ROCArray}, src::Exposed{<:ROCArray,<:Base.ReshapedArray} +) + copyto!(dest, expose(parent(src))) + return unexpose(dest) +end + +function Base.copyto!( + dest::Exposed{<:ROCArray}, src::Exposed{<:ROCArray,<:LinearAlgebra.Transpose} +) + copyto!(expose(transpose(dest)), expose(parent(src))) + return unexpose(dest) +end diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/indexing.jl b/NDTensors/ext/NDTensorsAMDGPUExt/indexing.jl new file mode 100644 index 0000000000..c0b9fc4afd --- /dev/null +++ b/NDTensors/ext/NDTensorsAMDGPUExt/indexing.jl @@ -0,0 +1,23 @@ +using NDTensors.Expose: Exposed, expose, parent, unexpose +using NDTensors.GPUArraysCoreExtensions: cpu +using AMDGPU: AMDGPU, ROCArray +using GPUArraysCore: @allowscalar + +function Base.getindex(E::Exposed{<:ROCArray}) + return @allowscalar unexpose(E)[] +end + +function Base.setindex!(E::Exposed{<:ROCArray}, x::Number) + @allowscalar unexpose(E)[] = x + return unexpose(E) +end + +function Base.getindex(E::Exposed{<:ROCArray,<:Adjoint}, i, j) + return (expose(parent(E))[j, i])' +end + +Base.any(f, E::Exposed{<:ROCArray,<:NDTensors.Tensor}) = any(f, data(unexpose(E))) + +function Base.print_array(io::IO, E::Exposed{<:ROCArray}) + return Base.print_array(io, expose(cpu(E))) +end diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/linearalgebra.jl b/NDTensors/ext/NDTensorsAMDGPUExt/linearalgebra.jl new file mode 100644 index 0000000000..642d2e6da0 --- /dev/null +++ b/NDTensors/ext/NDTensorsAMDGPUExt/linearalgebra.jl @@ -0,0 +1,22 @@ +using NDTensors.AMDGPUExtensions: roc +using NDTensors.Expose: Expose, Exposed, expose, ql, ql_positive +using NDTensors.GPUArraysCoreExtensions: cpu +using NDTensors.TypeParameterAccessors: unwrap_array_type +using LinearAlgebra: svd +using Adapt: adapt +using AMDGPU: ROCMatrix + +function LinearAlgebra.svd(A::Exposed{<:ROCMatrix}; kwargs...) + U, S, V = svd(cpu(A)) + return roc.((U, S, V)) +end + +## TODO currently AMDGPU doesn't have ql so make a ql function +function Expose.ql(A::Exposed{<:ROCMatrix}) + Q, L = ql(expose(cpu(A))) + return adapt(unwrap_array_type(A), Matrix(Q)), adapt(unwrap_array_type(A), L) +end +function Expose.ql_positive(A::Exposed{<:ROCMatrix}) + Q, L = ql_positive(expose(cpu(A))) + return adapt(unwrap_array_type(A), Matrix(Q)), adapt(unwrap_array_type(A), L) +end diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/mul.jl b/NDTensors/ext/NDTensorsAMDGPUExt/mul.jl new file mode 100644 index 0000000000..8d332e8452 --- /dev/null +++ b/NDTensors/ext/NDTensorsAMDGPUExt/mul.jl @@ -0,0 +1,45 @@ +using NDTensors.Expose: Exposed, expose, parent, unexpose +using LinearAlgebra: LinearAlgebra, Adjoint, Transpose, mul! +using AMDGPU: ROCArray + +# This was calling generic matrix multiplication. +function LinearAlgebra.mul!( + CM::Exposed{<:ROCArray,<:LinearAlgebra.Transpose}, + AM::Exposed{<:ROCArray}, + BM::Exposed{<:ROCArray}, + α, + β, +) + mul!(transpose(CM), transpose(BM), transpose(AM), α, β) + return unexpose(CM) +end + +# This was calling generic matrix multiplication. +function LinearAlgebra.mul!( + CM::Exposed{<:ROCArray,<:LinearAlgebra.Adjoint}, + AM::Exposed{<:ROCArray}, + BM::Exposed{<:ROCArray}, + α, + β, +) + mul!(CM', BM', AM', α, β) + return unexpose(CM) +end + +# Fix issue in AMDGPU.jl where it cannot distinguish +# Transpose{Reshape{Adjoint{ROCArray}}} as a ROCArray and calls generic matmul +function LinearAlgebra.mul!( + CM::Exposed{<:ROCArray}, + AM::Exposed{<:ROCArray}, + BM::Exposed{ + <:ROCArray, + <:LinearAlgebra.Transpose{ + <:Any,<:Base.ReshapedArray{<:Any,<:Any,<:LinearAlgebra.Adjoint} + }, + }, + α, + β, +) + mul!(CM, AM, expose(transpose(copy(expose(parent(BM))))), α, β) + return unexpose(CM) +end diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/permutedims.jl b/NDTensors/ext/NDTensorsAMDGPUExt/permutedims.jl new file mode 100644 index 0000000000..cc284e6389 --- /dev/null +++ b/NDTensors/ext/NDTensorsAMDGPUExt/permutedims.jl @@ -0,0 +1,23 @@ +using NDTensors.Expose: Exposed, expose, parent, unexpose +using AMDGPU: ROCArray + +function Base.permutedims!( + Edest::Exposed{<:ROCArray,<:Base.ReshapedArray}, Esrc::Exposed{<:ROCArray}, perm +) + Aperm = permutedims(Esrc, perm) + copyto!(expose(parent(Edest)), expose(Aperm)) + return unexpose(Edest) +end + +# There is an issue in AMDGPU where if Edest is a reshaped{<:Adjoint} +# .= can fail. So instead force Esrc into the shape of parent(Edest) +function Base.permutedims!( + Edest::Exposed{<:ROCArray,<:Base.ReshapedArray{<:Any,<:Any,<:Adjoint}}, + Esrc::Exposed{<:ROCArray}, + perm, + f, +) + Aperm = reshape(permutedims(Esrc, perm), size(parent(Edest))) + parent(Edest) .= f.(parent(Edest), Aperm) + return unexpose(Edest) +end diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/set_types.jl b/NDTensors/ext/NDTensorsAMDGPUExt/set_types.jl new file mode 100644 index 0000000000..59ed52b5d0 --- /dev/null +++ b/NDTensors/ext/NDTensorsAMDGPUExt/set_types.jl @@ -0,0 +1,11 @@ +# TypeParameterAccessors definitions +using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position +using NDTensors.GPUArraysCoreExtensions: storagemode +using AMDGPU: AMDGPU, ROCArray + +function TypeParameterAccessors.default_type_parameters(::Type{<:ROCArray}) + return (Float64, 1, AMDGPU.Mem.HIPBuffer) +end +TypeParameterAccessors.position(::Type{<:ROCArray}, ::typeof(eltype)) = Position(1) +TypeParameterAccessors.position(::Type{<:ROCArray}, ::typeof(ndims)) = Position(2) +TypeParameterAccessors.position(::Type{<:ROCArray}, ::typeof(storagemode)) = Position(3) diff --git a/NDTensors/ext/NDTensorsCUDAExt/linearalgebra.jl b/NDTensors/ext/NDTensorsCUDAExt/linearalgebra.jl index 4720df1e21..4781386ea2 100644 --- a/NDTensors/ext/NDTensorsCUDAExt/linearalgebra.jl +++ b/NDTensors/ext/NDTensorsCUDAExt/linearalgebra.jl @@ -41,3 +41,16 @@ function NDTensors.svd_catch_error(A::CuMatrix, ::CUDA.CUSOLVER.QRAlgorithm) end return USV end + +using NDTensors.GPUArraysCoreExtensions: cpu +using NDTensors.Expose: Expose, expose, ql, ql_positive +using NDTensors.TypeParameterAccessors: unwrap_array_type +## TODO currently AMDGPU doesn't have ql so make a ql function +function Expose.ql(A::Exposed{<:CuMatrix}) + Q, L = ql(expose(cpu(A))) + return adapt(unwrap_array_type(A), Matrix(Q)), adapt(unwrap_array_type(A), L) +end +function Expose.ql_positive(A::Exposed{<:CuMatrix}) + Q, L = ql_positive(expose(cpu(A))) + return adapt(unwrap_array_type(A), Matrix(Q)), adapt(unwrap_array_type(A), L) +end diff --git a/NDTensors/ext/NDTensorsCUDAExt/set_types.jl b/NDTensors/ext/NDTensorsCUDAExt/set_types.jl index 9244e99d12..3b0d7a592a 100644 --- a/NDTensors/ext/NDTensorsCUDAExt/set_types.jl +++ b/NDTensors/ext/NDTensorsCUDAExt/set_types.jl @@ -1,7 +1,7 @@ # TypeParameterAccessors definitions using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position using NDTensors.GPUArraysCoreExtensions: storagemode -## TODO remove TypeParameterAccessors when SetParameters is removed + function TypeParameterAccessors.position(::Type{<:CuArray}, ::typeof(eltype)) return Position(1) end diff --git a/NDTensors/src/adapt.jl b/NDTensors/src/adapt.jl index cffebe862a..df5770224f 100644 --- a/NDTensors/src/adapt.jl +++ b/NDTensors/src/adapt.jl @@ -2,7 +2,6 @@ using .GPUArraysCoreExtensions: GPUArraysCoreExtensions adapt_structure(to, x::TensorStorage) = setdata(x, adapt(to, data(x))) adapt_structure(to, x::Tensor) = setstorage(x, adapt(to, storage(x))) -## use unwrap cpu here because Expose is included before NDTensors function GPUArraysCoreExtensions.cpu(eltype::Type{<:Number}, x) return fmap(x -> adapt(Array{eltype}, x), x) end diff --git a/NDTensors/src/imports.jl b/NDTensors/src/imports.jl index 4d1c789e41..21d3bcd5d5 100644 --- a/NDTensors/src/imports.jl +++ b/NDTensors/src/imports.jl @@ -29,6 +29,7 @@ for lib in [ :UnspecifiedTypes, :TypeParameterAccessors, :GPUArraysCoreExtensions, + :AMDGPUExtensions, :CUDAExtensions, :MetalExtensions, :Expose, @@ -58,9 +59,10 @@ using Base.Cartesian: @nexprs using Base.Threads: @spawn +using .AMDGPUExtensions: roc using .CUDAExtensions: cu -using .MetalExtensions: mtl using .GPUArraysCoreExtensions: cpu +using .MetalExtensions: mtl import Base: # Types diff --git a/NDTensors/src/lib/AMDGPUExtensions/.JuliaFormatter.toml b/NDTensors/src/lib/AMDGPUExtensions/.JuliaFormatter.toml new file mode 100644 index 0000000000..08f664cdb9 --- /dev/null +++ b/NDTensors/src/lib/AMDGPUExtensions/.JuliaFormatter.toml @@ -0,0 +1,2 @@ +style = "blue" +indent = 2 diff --git a/NDTensors/src/lib/AMDGPUExtensions/src/AMDGPUExtensions.jl b/NDTensors/src/lib/AMDGPUExtensions/src/AMDGPUExtensions.jl new file mode 100644 index 0000000000..e9e77e6cc5 --- /dev/null +++ b/NDTensors/src/lib/AMDGPUExtensions/src/AMDGPUExtensions.jl @@ -0,0 +1,4 @@ +module AMDGPUExtensions +include("roc.jl") + +end diff --git a/NDTensors/src/lib/AMDGPUExtensions/src/roc.jl b/NDTensors/src/lib/AMDGPUExtensions/src/roc.jl new file mode 100644 index 0000000000..2cd0aca64f --- /dev/null +++ b/NDTensors/src/lib/AMDGPUExtensions/src/roc.jl @@ -0,0 +1,14 @@ +using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position +using NDTensors.GPUArraysCoreExtensions: storagemode +# Implemented in NDTensorsAMDGPUExt +function roc end + +## Here we need an ROCArrayAdaptor to prevent conversion of 64 bit numbers to 32 bit. +## We cannot write `adapt(CuVector, x)` because this +## will not allow us to properly utilize the buffer preference without changing the value of +## default_buffertype. Also `adapt(CuVector{<:Any, <:Any, Buffertype})` fails to work properly +struct ROCArrayAdaptor{B} end + +function TypeParameterAccessors.position(::Type{<:ROCArrayAdaptor}, ::typeof(storagemode)) + return Position(1) +end diff --git a/NDTensors/src/lib/AMDGPUExtensions/test/runtests.jl b/NDTensors/src/lib/AMDGPUExtensions/test/runtests.jl new file mode 100644 index 0000000000..da274f21da --- /dev/null +++ b/NDTensors/src/lib/AMDGPUExtensions/test/runtests.jl @@ -0,0 +1,9 @@ +@eval module $(gensym()) +using Test: @testset, @test +using NDTensors.AMDGPUExtensions: roc, ROCArrayAdaptor +using NDTensors.GPUArraysCoreExtensions: storagemode +@testset "roc and ROCArrayAdaptor" begin + @test roc isa Function + @test storagemode(ROCArrayAdaptor{1}) == 1 +end +end diff --git a/NDTensors/src/lib/CUDAExtensions/src/cuda.jl b/NDTensors/src/lib/CUDAExtensions/src/cuda.jl index 36b793f748..9fa41e2f5b 100644 --- a/NDTensors/src/lib/CUDAExtensions/src/cuda.jl +++ b/NDTensors/src/lib/CUDAExtensions/src/cuda.jl @@ -1,9 +1,9 @@ using NDTensors.TypeParameterAccessors: TypeParameterAccessors using NDTensors.GPUArraysCoreExtensions: storagemode -# Implemented in `ITensorGPU` and NDTensorCUDA +# Implemented in `ITensorGPU` and NDTensorsCUDAExt function cu end -## Here we need an NDTensorCuArrayAdaptor because the CuArrayAdaptor provided by CUDA +## Here we need an CuArrayAdaptor because the CuArrayAdaptor provided by CUDA ## converts 64 bit numbers to 32 bit. We cannot write `adapt(CuVector, x)` because this ## Will not allow us to properly utilize the buffer preference without changing the value of ## default_buffertype. Also `adapt(CuVector{<:Any, <:Any, Buffertype})` fails to work properly diff --git a/NDTensors/src/linearalgebra/linearalgebra.jl b/NDTensors/src/linearalgebra/linearalgebra.jl index d880cb5cfa..bd1690d127 100644 --- a/NDTensors/src/linearalgebra/linearalgebra.jl +++ b/NDTensors/src/linearalgebra/linearalgebra.jl @@ -389,11 +389,6 @@ function ql_positive(M::AbstractMatrix) # TODO: Change to `isgpu`, or better yet rewrite # in terms of broadcasting and linear algebra # like `qr_positive`. - iscuda = iscu(M) - if iscuda - cutype = unwrap_array_type(M) - M = NDTensors.cpu(M) - end sparseQ, L = ql(M) Q = convert(typeof(L), sparseQ) nr, nc = size(L) @@ -407,10 +402,6 @@ function ql_positive(M::AbstractMatrix) end end end - if iscuda - Q = adapt(cutype, Q) - L = adapt(cutype, L) - end return (Q, L) end @@ -423,16 +414,7 @@ function ql(A::AbstractMatrix) T = eltype(A) AA = similar(A, LinearAlgebra._qreltype(T), size(A)) copyto!(expose(AA), expose(A)) - iscuda = iscu(AA) - if iscuda - cutype = unwrap_array_type(AA) - AA = NDTensors.cpu(AA) - end Q, L = ql!(AA) - if iscuda - Q = adapt(cutype, Q) - L = adapt(cutype, L) - end return (Q, L) end # @@ -440,6 +422,8 @@ end # about unpacking Q and L from the A matrix. # function ql!(A::StridedMatrix{<:LAPACK.BlasFloat}) + ## TODO is this really necessary here, we could create Expose function if + ## we need this function on CU/GPU if iscu(A) throw("Error: ql is not implemented in CUDA.jl") end diff --git a/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl b/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl index b1b63ab483..d431edb189 100644 --- a/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl +++ b/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl @@ -5,6 +5,7 @@ module TestITensorDMRG using ITensors using NDTensors using NDTensors.CUDAExtensions: cu +using NDTensors.AMDGPUExtensions: roc using Random reference_energies = Dict([ @@ -12,7 +13,10 @@ reference_energies = Dict([ ]) is_broken(dev, elt::Type, conserve_qns::Val) = false +## Disable blocksparse GPU testing on CUDA and ROC backends while +## we work on the blocksparse backend. In the future these will work too is_broken(dev::typeof(cu), elt::Type, conserve_qns::Val{true}) = true +is_broken(dev::typeof(roc), elt::Type, conserve_qns::Val{true}) = true include("dmrg.jl") diff --git a/NDTensors/test/NDTensorsTestUtils/device_list.jl b/NDTensors/test/NDTensorsTestUtils/device_list.jl index b4b3f58d21..8e105f6fe5 100644 --- a/NDTensors/test/NDTensorsTestUtils/device_list.jl +++ b/NDTensors/test/NDTensorsTestUtils/device_list.jl @@ -2,6 +2,9 @@ using NDTensors: NDTensors if "cuda" in ARGS || "all" in ARGS using CUDA end +if "rocm" in ARGS || "all" in ARGS + using AMDGPU +end if "metal" in ARGS || "all" in ARGS using Metal end @@ -22,6 +25,10 @@ function devices_list(test_args) end end + if "rocm" in test_args || "all" in test_args + push!(devs, NDTensors.AMDGPUExtensions.roc) + end + if "metal" in test_args || "all" in test_args push!(devs, NDTensors.MetalExtensions.mtl) end diff --git a/NDTensors/test/Project.toml b/NDTensors/test/Project.toml index 7e4cbc9661..a460653466 100644 --- a/NDTensors/test/Project.toml +++ b/NDTensors/test/Project.toml @@ -23,4 +23,5 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [extras] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Metal = "dde4c033-4e86-420c-a63e-0dd931031962" diff --git a/NDTensors/test/lib/runtests.jl b/NDTensors/test/lib/runtests.jl index 65038f82a2..619bfa1c18 100644 --- a/NDTensors/test/lib/runtests.jl +++ b/NDTensors/test/lib/runtests.jl @@ -4,6 +4,7 @@ using Test: @testset @testset "Test NDTensors lib $lib" for lib in [ "AlgorithmSelection", "AllocateData", + "AMDGPUExtensions", "BaseExtensions", "BlockSparseArrays", "BroadcastMapConversion", diff --git a/NDTensors/test/runtests.jl b/NDTensors/test/runtests.jl index 6d81096d07..66366562c1 100644 --- a/NDTensors/test/runtests.jl +++ b/NDTensors/test/runtests.jl @@ -2,6 +2,7 @@ using SafeTestsets: @safetestset @safetestset "NDTensors" begin using Test: @testset + using NDTensors: NDTensors @testset "$(@__DIR__)" begin filenames = filter(readdir(@__DIR__)) do f startswith("test_")(f) && endswith(".jl")(f) @@ -15,11 +16,9 @@ using SafeTestsets: @safetestset end end if "cuda" in ARGS || "all" in ARGS - using NDTensors: NDTensors include(joinpath(pkgdir(NDTensors), "ext", "examples", "NDTensorCUDA.jl")) end if "metal" in ARGS || "all" in ARGS - using NDTensors: NDTensors include(joinpath(pkgdir(NDTensors), "ext", "examples", "NDTensorMetal.jl")) end end diff --git a/NDTensors/test/test_linearalgebra.jl b/NDTensors/test/test_linearalgebra.jl index 289b2c4625..96bde8efdf 100644 --- a/NDTensors/test/test_linearalgebra.jl +++ b/NDTensors/test/test_linearalgebra.jl @@ -39,6 +39,11 @@ end if !is_supported_eltype(dev, elt) continue end + ## Looks like AMDGPU has an issue with QR when A is singular + ## TODO potentially make an is_broken function? + if dev == NDTensors.AMDGPUExtensions.roc && singular + continue + end eps = Base.eps(real(elt)) * 100 #this is set rather tight, so if you increase/change m,n you may have open up the tolerance on eps. n, m = 4, 8 Id = Diagonal(fill(1.0, min(n, m))) diff --git a/src/mps/dmrg.jl b/src/mps/dmrg.jl index 6eb753eac9..80e8af50d8 100644 --- a/src/mps/dmrg.jl +++ b/src/mps/dmrg.jl @@ -251,6 +251,7 @@ function dmrg( ## Right now there is a conversion problem in CUDA.jl where `UnifiedMemory` Arrays are being converted ## into `DeviceMemory`. This conversion line is here temporarily to fix that problem when it arises ## Adapt is only called when using CUDA backend. CPU will work as implemented previously. + ## TODO this might be the only place we really need iscu if its not fixed. phi::ITensor = if NDTensors.iscu(phi) && NDTensors.iscu(vecs[1]) adapt(set_eltype(unwrap_array_type(phi), eltype(vecs[1])), vecs[1]) else