From db77840168b49dd393dca7fc1e1051b4d488371a Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Sat, 5 Aug 2023 14:52:23 +0300 Subject: [PATCH] Update docs (#459) --- docs/src/api.md | 15 +-- docs/src/index.md | 155 ++++++++++++++++-------------- docs/src/quickstart.md | 42 -------- src/device/gcn/synchronization.jl | 1 + src/highlevel.jl | 22 ++++- 5 files changed, 103 insertions(+), 132 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index c5a2c5bb..f4f46577 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -1,12 +1,11 @@ # AMDGPU API Reference -## Kernel launching +## Synchronization ```@docs -@roc -AMDGPU.AbstractKernel -AMDGPU.HostKernel -AMDGPU.rocfunction +AMDGPU.synchronize +AMDGPU.@sync +AMDGPU.device_synchronize ``` ## Device code API @@ -38,9 +37,3 @@ AMDGPU.Device.blockDim ```@docs AMDGPU.sync_workgroup ``` - -### Global Variables - -```@docs -AMDGPU.Device.get_global_pointer -``` diff --git a/docs/src/index.md b/docs/src/index.md index 9ea9ab54..4eaf8025 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,107 +1,114 @@ # Programming AMD GPUs with Julia -!!! tip - This documentation assumes that you are familiar with the main concepts of GPU programming and mostly describes the specifics of running Julia code on AMD GPUs. - For a much more gentle introduction to GPGPU with Julia consult the well-written [CUDA.jl documentation](https://cuda.juliagpu.org/stable/). +## The Julia AMDGPU stack -## The ROCm stack +Julia support for programming AMD GPUs is currently provided by the +[AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) package. +This package contains everything necessary to program for AMD GPUs in Julia, including: -ROCm (short for Radeon Open Compute platforM) is AMD's open-source GPU computing platform, supported by most modern AMD GPUs ([detailed hardware support](https://github.com/RadeonOpenCompute/ROCm#hardware-and-software-support)) and some AMD APUs. -ROCm works solely on Linux and no plans to support either Windows or macOS have been announced by AMD. +* An interface for compiling and running kernels written in Julia through LLVM's AMDGPU backend. +* An interface for working with the HIP runtime API, + necessary for launching compiled kernels and controlling the GPU. +* An array type implementing the [GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl) + interface, providing high-level array operations. -A necessary prerequisite to use this Julia package is to have a working ROCm stack installed. -A quick way to verify this is to check the output of `rocminfo`. -For more information, consult the official [installation guide](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html). -Even though the only platforms officially supported by AMD are certain versions of Ubuntu, CentOS, RHEL, and SLES [^1], there are options to install ROCm on other Linux distributions, including: - * Arch Linux - See the [rocm-arch](https://github.com/rocm-arch/rocm-arch) repository or the slightly older PKGBUILDs in the AUR. - * Gentoo - Check Portage for the `rocr-runtime` package and [justxi's rocm repo](https://github.com/justxi/rocm) for unofficial ROCm package ebuilds. +## Installation -[^1]: +Simply add the AMDGPU.jl package to your Julia environment: -## The Julia AMDGPU stack +```julia +using Pkg +Pkg.add("AMDGPU") +``` -Julia support for programming AMD GPUs is currently provided by the [AMDGPU.jl package](https://github.com/jpsamaroo/AMDGPU.jl). This package contains everything necessary to program for AMD GPUs in Julia, including: +To ensure that everything works, you can run the test suite: -* An interface for working with the HSA runtime API, necessary for launching compiled kernels and controlling the GPU. -* An interface for compiling and running kernels written in Julia through LLVM's AMDGPU backend. -* An array type implementing the [GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl) interface, providing high-level array operations. +```julia +using AMDGPU +using Pkg +Pkg.test("AMDGPU") +``` ## Required Software -* [ROCT](https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface) (JLL available) -* [ROCR](https://github.com/RadeonOpenCompute/ROCR-Runtime) (JLL available) -* [ROCm-Device-Libs](https://github.com/RadeonOpenCompute/ROCm-Device-Libs) (JLL available) -* [HIP](https://github.com/ROCm-Developer-Tools/HIP) (JLL available) -* Recent Linux kernel with AMDGPU and HSA enabled (Cannot be provided as a JLL) -* `ld.lld` binary provided by system LLVM (No JLL yet) +For optimal experience, you should have full ROCm stack installed. +Refer to official ROCm stack installation instructions: -### Optional Packages +Currently, AMDGPU.jl utilizes following libraries: -* [rocBLAS](https://github.com/ROCmSoftwarePlatform/rocBLAS) for BLAS support (JLL available) -* [rocFFT](https://github.com/ROCmSoftwarePlatform/rocFFT) for FFT support (No JLL yet) -* [rocRAND](https://github.com/ROCmSoftwarePlatform/rocRAND) for RNG support (JLL available) -* [MIOpen](https://github.com/ROCmSoftwarePlatform/MIOpen) for DNN support (JLL available on Julia 1.9) +* [ROCT](https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface) +* [ROCR](https://github.com/RadeonOpenCompute/ROCR-Runtime) +* [ROCm-Device-Libs](https://github.com/RadeonOpenCompute/ROCm-Device-Libs) +* [HIP](https://github.com/ROCm-Developer-Tools/HIP) +* [rocBLAS](https://github.com/ROCmSoftwarePlatform/rocBLAS) for BLAS support +* [rocFFT](https://github.com/ROCmSoftwarePlatform/rocFFT) for FFT support +* [rocFFT](https://github.com/ROCmSoftwarePlatform/rocSOLVER) for LAPACK support +* [rocRAND](https://github.com/ROCmSoftwarePlatform/rocRAND) for RNG support +* [MIOpen](https://github.com/ROCmSoftwarePlatform/MIOpen) for DNN support Other ROCm packages are currently unused by AMDGPU. -### JLL usage +### ROCm artifacts -By default, AMDGPU provides and uses JLL packages for core libraries, -so as long as `ld.lld` is available, you should be all set for most basic functionality. -For example, Julia 1.9 provides ROCm 5.2.3 libraries. +Currently AMDGPU.jl does not provide ROCm artifacts. +One needs to build a newer version of them. +See #440 issue: . -If this does not work for you, or if you have a full ROCm installation available -on your system (common for HPC/supercomputer users), -you can set the `JULIA_AMDGPU_DISABLE_ARTIFACTS` environment variable to "1" -to disable usage of JLL artifacts: +### LLVM compatibility -```bash -JULIA_AMDGPU_DISABLE_ARTIFACTS=1 julia --project=. -``` +As a rule of thumb, Julia's LLVM version should match ROCm LLVM's version. +For example, Julia 1.9 relies on LLVM 14, so the matching ROCm version is `5.2.x` +(although `5.4` is confirmed to work as well). -Note that currently ROCm-Device-Libs are always provided by AMDGPU to ensure -compatibility with Julia's version of LLVM; please file an issue if this is -problematic on your system. +### Extra Setup Details -!!! note "LLVM compatibility" - For proper support, Julia's LLVM version should match ROCm LLVM's version. - For example, Julia 1.9 relies on LLVM 14, - so the matching ROCm version is `5.2.x`. +List of additional steps that may be needed to take to ensure everything is working: -### Extra Setup Details +- Make sure your user is in the same group as `/dev/kfd`, other than `root`. -Currently, the requirements to get everything working properly is a bit poorly -documented in the upstream docs for any distro other than Ubuntu. -So here is a list of requirements I've found through the process of making this work: + For example, it might be the `render` group: -- Make sure `/dev/kfd` has a group other than root that you can add your user to. - I recommend adding your user to the `video` group, and setting the - ownership of `/dev/kfd` to `root:video` with `660` permissions. + ``` + crw-rw---- 1 root render 234, 0 Aug 5 11:43 kfd + ``` -- These libraries should be in the standard library locations, or in your `LD_LIBRARY_PATH`: - * libhsakmt.so - * libhsa-runtime64.so.1 - * libamdhip64.so + In this case, you can add yourself to it: -- And `ld.lld` should be in your `PATH`. + ``` + sudo usermod -aG render username + ``` -In terms of Linux kernel versions, just pick the newest one you can. If -building your own kernel, make sure all the regular AMDGPU and HSA options are -enabled. +- ROCm libraries should be in the standard library locations, or in your `LD_LIBRARY_PATH`. -Once all of this is setup properly, you should be able to do `using AMDGPU` -successfully. See the Quickstart documentation for an introduction to using -AMDGPU.jl. +- If you get an error message along the lines of `GLIB_CXX_... not found`, + it's possible that the C++ runtime used to build the ROCm stack + and the one used by Julia are different. + If you built the ROCm stack yourself this is very likely the case + since Julia normally ships with its own C++ runtime. -#### Navi 2 (GFX103x) support + For more information, check out this [GitHub issue](https://github.com/JuliaLang/julia/issues/34276). + A quick fix is to use the `LD_PRELOAD` environment variable to make Julia use the system C++ runtime library, for example: -ROCm stack officially supports only GFX1030 (6900XT). -However, the ISA between GFX103x devices is nearly identical (if not identical). + ``` + LD_PRELOAD=/usr/lib/libstdc++.so julia + ``` -Therefore, if you have any other GFX103x device, -you can override your gfx version with `HSA_OVERRIDE_GFX_VERSION=10.3.0` env variable -before launching Julia and be able to use your device: + Alternatively, you can build Julia from source as described + [here](https://github.com/JuliaLang/julia/blob/master/doc/build/build.md). + To quickly debug this issue start Julia and try to load a ROCm library: -```bash -HSA_OVERRIDE_GFX_VERSION=10.3.0 julia --project=. -``` + ``` + using Libdl + Libdl.dlopen("/opt/rocm/hsa/lib/libhsa-runtime64.so.1") + ``` + +- `ld.lld` should be in your `PATH`. + +- For better experience use whatever Linux kernel + is officially supported by ROCm stack. + + +Once all of this is setup properly, you should be able to do `using AMDGPU` +successfully. + +See the [Quick Start](@ref) documentation for an introduction to using AMDGPU.jl. diff --git a/docs/src/quickstart.md b/docs/src/quickstart.md index d331727e..e2cc128c 100644 --- a/docs/src/quickstart.md +++ b/docs/src/quickstart.md @@ -1,47 +1,5 @@ # Quick Start -## Installation - -See [JLL usage](@ref) for info about ROCm stack installation. -Simply add the AMDGPU.jl package to your Julia environment: - -```julia -using Pkg -Pkg.add("AMDGPU") -``` - -You can then load the `AMDGPU` package and run the unit tests: - -```julia -using AMDGPU -using Pkg -Pkg.test("AMDGPU") -``` - -!!! warning - If you get an error message along the lines of `GLIB_CXX_... not found`, - it's possible that the C++ runtime used to build the ROCm stack and the one used by Julia are different. - If you built the ROCm stack yourself this is very likely the case since Julia normally ships with its own C++ runtime. - For more information, check out this [GitHub issue](https://github.com/JuliaLang/julia/issues/34276). - - A quick fix is to use the `LD_PRELOAD` environment variable to make Julia use the system C++ runtime library, for example: - - ```sh - LD_PRELOAD=/usr/lib/libstdc++.so julia - ``` - - Alternatively, you can build Julia from source as described [here](https://github.com/JuliaLang/julia/blob/master/doc/build/build.md). - - You can quickly debug this issue by starting Julia and trying to load a ROCm library: - - ```julia - using Libdl - Libdl.dlopen("/opt/rocm/hsa/lib/libhsa-runtime64.so.1") - ``` - -!!! warning - If during the build process you get an error message along the lines of `hipErrorNoBinaryForGpu: Coudn't find binary for current devices!` and you already have ROCm installed locally then you should set the environment variable `JULIA_AMDGPU_DISABLE_ARTIFACTS=1` and reload AMDGPU.jl. - ## Running a simple kernel As a simple test, we will try to add two random vectors diff --git a/src/device/gcn/synchronization.jl b/src/device/gcn/synchronization.jl index eafaa629..15e43ab6 100644 --- a/src/device/gcn/synchronization.jl +++ b/src/device/gcn/synchronization.jl @@ -2,5 +2,6 @@ sync_workgroup() Waits until all wavefronts in a workgroup have reached this call. +This function is meant to be used inside kernels. """ @inline sync_workgroup() = ccall("llvm.amdgcn.s.barrier", llvmcall, Cvoid, ()) diff --git a/src/highlevel.jl b/src/highlevel.jl index 87e1488e..853a7c99 100644 --- a/src/highlevel.jl +++ b/src/highlevel.jl @@ -146,13 +146,15 @@ priority!(f::Base.Callable, priority::Symbol) = task_local_state!(f; priority) default_isa(device::HIPDevice) = Runtime.default_isa(Runtime.hsa_device(device)) """ - synchronize(stream::HIPStream = stream()) + synchronize(stream::HIPStream = stream(); blocking::Bool = true) Blocks until all kernels currently executing on `stream` have completed. + +If there are running HostCalls, then it non-blocking synchronization is required +which can be done with `blocking=false` keyword. +Additionally, it stops any running global hostcall afterwards. +Note, that non-blocking synchronization is slower than blocking. """ -# TODO -# allow non blocking sync of several HIPStreams -# and only then disable global hostcall function synchronize(stm::HIPStream = stream(); blocking::Bool = true) throw_if_exception(stm.device) HIP.synchronize(stm; blocking) @@ -175,11 +177,15 @@ function synchronize(stm::HIPStream = stream(); blocking::Bool = true) end return end +# TODO +# allow non blocking sync of several HIPStreams +# and only then disable global hostcall """ @sync ex -Run expression `ex` and synchronize the GPU afterwards. +Run expression `ex` on currently active stream +and synchronize the GPU on that stream afterwards. See also: [`synchronize`](@ref). """ @@ -191,6 +197,12 @@ macro sync(ex) end end +""" +Blocks until all kernels on all streams have completed. +Uses currently active device. +""" +device_synchronize() = HIP.device_synchronize() + """ rocconvert(x)