Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Atomix #1790

Draft
wants to merge 32 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
a82a844
Implement KernelAbstractions backend in CUDA.jl
vchuravy Feb 13, 2023
f510c50
Use Atomix
vchuravy Mar 10, 2023
871ea08
Use cas
vchuravy Mar 10, 2023
67c2bcd
fixup! Use cas
vchuravy Mar 10, 2023
c00d65b
Support load and store of Int8, Int16
vchuravy Mar 10, 2023
df39575
be less stupid
vchuravy Mar 10, 2023
6d58044
fixup! be less stupid
vchuravy Mar 10, 2023
ce4482d
fix modify! implementation
vchuravy Mar 10, 2023
b4fd958
fixup! fix modify! implementation
vchuravy Mar 10, 2023
e22f8f6
fix atomic usage in linalg.jl
vchuravy Mar 11, 2023
ecef692
fixup! fix atomic usage in linalg.jl
vchuravy Mar 11, 2023
7ae6cca
add error for SM_60
vchuravy Mar 11, 2023
80c6ab6
Fixup modify
vchuravy Mar 11, 2023
aa62586
skip shmem for now
vchuravy Mar 11, 2023
6325ee9
fix volatile load
vchuravy Mar 11, 2023
c7c6f33
add more low-level tests
vchuravy Mar 11, 2023
6192f44
cleanup and use egal
vchuravy Mar 12, 2023
7437924
add fallback for < sm_60
vchuravy Mar 12, 2023
5d63f5f
add __cas_volatile_16 and global/shared
vchuravy Mar 12, 2023
450808b
no I am not losing my mind
vchuravy Mar 12, 2023
353e036
fixup! no I am not losing my mind
vchuravy Mar 12, 2023
334fad2
cleanup cas
vchuravy Mar 12, 2023
68bf406
try to test sm_60, sm_35
vchuravy Mar 12, 2023
f248574
fix yet another silly mistake
vchuravy Mar 14, 2023
992630e
walkback sm_50 support
vchuravy Mar 15, 2023
fa0e3a2
fix CAS call
vchuravy Mar 15, 2023
c9396fe
fix threadfence on sm_60
vchuravy Mar 15, 2023
2f59488
fixup! fix CAS call
vchuravy Mar 15, 2023
f937f11
fix store_volatile! call
vchuravy Mar 15, 2023
f7e938a
test Float16 and BFloat16
vchuravy Mar 15, 2023
aed3933
fix sizeof checks in cas
vchuravy Mar 15, 2023
66fadf5
handle byte size cas
vchuravy Mar 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 54 additions & 8 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

[[AbstractFFTs]]
deps = ["ChainRulesCore", "LinearAlgebra"]
git-tree-sha1 = "69f7020bd72f069c219b5e8c236c1fa90d2cb409"
git-tree-sha1 = "16b6dbc4cf7caee4e1e75c49485ec67b667098a0"
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
version = "1.2.1"
version = "1.3.1"

[[Adapt]]
deps = ["LinearAlgebra", "Requires"]
Expand All @@ -18,6 +18,12 @@ uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
[[Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"

[[Atomix]]
deps = ["UnsafeAtomics"]
git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be"
uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
version = "0.1.0"

[[BFloat16s]]
deps = ["LinearAlgebra", "Printf", "Random", "Test"]
git-tree-sha1 = "dbf84058d0a8cbbadee18d25cf606934b22d7c66"
Expand Down Expand Up @@ -64,9 +70,9 @@ version = "0.1.6"

[[Compat]]
deps = ["Dates", "LinearAlgebra", "UUIDs"]
git-tree-sha1 = "61fdd77467a5c3ad071ef8277ac6bd6af7dd4c04"
git-tree-sha1 = "7a60c856b9fa189eb34f5f8a6f6b5529b7942957"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "4.6.0"
version = "4.6.1"

[[CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
Expand All @@ -87,9 +93,9 @@ deps = ["ArgTools", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"

[[ExprTools]]
git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d"
git-tree-sha1 = "c1d06d129da9f55715c6c212866f5b1bddc5fa00"
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
version = "0.1.8"
version = "0.1.9"

[[GPUArrays]]
deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"]
Expand All @@ -105,9 +111,9 @@ version = "0.1.4"

[[GPUCompiler]]
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "95185985a5d2388c6d0fedb06181ad4ddd40e0cb"
git-tree-sha1 = "19d693666a304e8c371798f4900f7435558c7cde"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.17.2"
version = "0.17.3"

[[InteractiveUtils]]
deps = ["Markdown"]
Expand All @@ -130,6 +136,12 @@ git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.4.1"

[[KernelAbstractions]]
deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "SnoopPrecompile", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"]
git-tree-sha1 = "17d0bb94eef881b09c57967be12cca70fefb3304"
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
version = "0.9.0"

[[LLVM]]
deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "df115c31f5c163697eede495918d8e85045c8f04"
Expand Down Expand Up @@ -178,6 +190,12 @@ version = "0.3.23"
[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"

[[MacroTools]]
deps = ["Markdown", "Random"]
git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.10"

[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
Expand Down Expand Up @@ -253,6 +271,12 @@ uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

[[SnoopPrecompile]]
deps = ["Preferences"]
git-tree-sha1 = "e760a70afdcd461cf01a575947738d359234665c"
uuid = "66db9d55-30c0-4569-8b51-7e840670fc0c"
version = "1.0.3"

[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"

Expand All @@ -266,6 +290,17 @@ git-tree-sha1 = "ef28127915f4229c971eb43f3fc075dd3fe91880"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "2.2.0"

[[StaticArrays]]
deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"]
git-tree-sha1 = "2d7d9e1ddadc8407ffd460e24218e37ef52dd9a3"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "1.5.16"

[[StaticArraysCore]]
git-tree-sha1 = "6b7ba252635a5eff6a0b0664a41ee140a1c9e72a"
uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
version = "1.4.0"

[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand Down Expand Up @@ -295,6 +330,17 @@ uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[[UnsafeAtomics]]
git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278"
uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f"
version = "0.2.1"

[[UnsafeAtomicsLLVM]]
deps = ["LLVM", "UnsafeAtomics"]
git-tree-sha1 = "33af9d2031d0dc09e2be9a0d4beefec4466def8e"
uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249"
version = "0.1.0"

[[Zlib_jll]]
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
Expand Down
6 changes: 6 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "4.0.1"
[deps]
AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82"
CUDA_Driver_jll = "4ee394cb-3365-5eb0-8335-949819d2adfc"
Expand All @@ -14,6 +15,7 @@ CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Expand All @@ -28,10 +30,13 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
UnsafeAtomics = "013be700-e6cd-48c3-b4a1-df204f14c38f"
UnsafeAtomicsLLVM = "d80eeb9a-aca5-4d75-85e5-170c8b632249"

[compat]
AbstractFFTs = "0.4, 0.5, 1.0"
Adapt = "3.3"
Atomix = "0.1"
BFloat16s = "0.2, 0.3, 0.4"
CEnum = "0.2, 0.3, 0.4"
CUDA_Driver_jll = "0.2"
Expand All @@ -47,4 +52,5 @@ RandomNumbers = "1.5.3"
Reexport = "0.2, 1.0"
Requires = "0.5, 1.0"
SpecialFunctions = "1.3, 2"
UnsafeAtomicsLLVM = "0.1"
julia = "1.6"
2 changes: 1 addition & 1 deletion lib/cublas/linalg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function LinearAlgebra.dot(x::AnyCuArray{T1}, y::AnyCuArray{T2}) where {T1,T2}
val = CUDA.reduce_block(+, local_val, zero(T), shuffle)
if threadIdx().x == 1i32
# NOTE: introduces nondeterminism
@inbounds CUDA.@atomic res[] += val
@inbounds CUDA.@atomic res[1i32] += val
end

return
Expand Down
4 changes: 4 additions & 0 deletions src/CUDA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ include("../lib/nvml/NVML.jl")
const has_nvml = NVML.has_nvml
export NVML, has_nvml

# KernelAbstractions
include("CUDAKernels.jl")
export CUDABackend

include("precompile.jl")

end
Loading