Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

openai-triton: update to v2.2.0 pass compiler and libcuda paths to runtime #292996

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions pkgs/by-name/op/openai-triton-llvm/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ let
isNative = stdenv.hostPlatform == stdenv.buildPlatform;
in stdenv.mkDerivation (finalAttrs: {
pname = "openai-triton-llvm";
version = "17.0.0-c5dede880d17";
version = "18.1.0-5e5a22caf88a";

outputs = [
"out"
Expand All @@ -60,8 +60,8 @@ in stdenv.mkDerivation (finalAttrs: {
src = fetchFromGitHub {
owner = "llvm";
repo = "llvm-project";
rev = "c5dede880d175f7229c9b2923f4753e12702305d";
hash = "sha256-v4r3+7XVFK+Dzxt/rErZNJ9REqFO3JmGN4X4vZ+77ew=";
rev = "5e5a22caf88ac1ccfa8dc5720295fdeba0ad9372";
hash = "sha256-hrpvBgW+Bn2ZPTdVAFCrCPcFH1r1yXsuxToFueJXvNU=";
};

nativeBuildInputs = [
Expand All @@ -74,6 +74,7 @@ in stdenv.mkDerivation (finalAttrs: {
doxygen
sphinx
python3Packages.recommonmark
python3Packages.myst-parser
];

buildInputs = [
Expand Down

This file was deleted.

This file was deleted.

52 changes: 26 additions & 26 deletions pkgs/development/python-modules/openai-triton/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
, filelock
, torchWithRocm
, python
, writeScriptBin

, runCommand

Expand All @@ -27,33 +28,23 @@
}:

let
ptxas = "${cudaPackages.cuda_nvcc}/bin/ptxas"; # Make sure cudaPackages is the right version each update (See python/setup.py)
mkBinaryStub = name: "${writeScriptBin name ''
echo binary ${name} is not available: openai-triton was built without CUDA support
''}/bin/${name}";
in
buildPythonPackage rec {
pname = "triton";
version = "2.1.0";
version = "2.2.0";
pyproject = true;

src = fetchFromGitHub {
owner = "openai";
repo = pname;
rev = "v${version}";
hash = "sha256-8UTUwLH+SriiJnpejdrzz9qIquP2zBp1/uwLdHmv0XQ=";
# Release v2.2.0 is not tagged, but published on pypi: https://github.com/openai/triton/issues/3160
rev = "0e7b97bd47fc4beb21ae960a516cd9a7ae9bc060";
hash = "sha256-UdxoHkFnFFBfvGa/NvgvGebbtwGYbrAICQR9JZ4nvYo=";
};

patches = [
# fix overflow error
(fetchpatch {
url = "https://github.com/openai/triton/commit/52c146f66b79b6079bcd28c55312fc6ea1852519.patch";
hash = "sha256-098/TCQrzvrBAbQiaVGCMaF3o5Yc3yWDxzwSkzIuAtY=";
})
] ++ lib.optionals (!cudaSupport) [
./0000-dont-download-ptxas.patch
# openai-triton wants to get ptxas version even if ptxas is not
# used, resulting in ptxas not found error.
./0001-ptxas-disable-version-key-for-non-cuda-targets.patch
];

nativeBuildInputs = [
setuptools
pythonRelaxDepsHook
Expand Down Expand Up @@ -111,6 +102,11 @@ buildPythonPackage rec {
# Use our linker flags
substituteInPlace python/triton/common/build.py \
--replace '${oldStr}' '${newStr}'
# triton/common/build.py will be called both on build, and sometimes in runtime.
substituteInPlace python/triton/common/build.py \
--replace 'os.getenv("TRITON_LIBCUDA_PATH")' '"${cudaPackages.cuda_cudart}/lib"'
Copy link
Contributor

@SomeoneSerge SomeoneSerge Mar 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The environment variable says "libcuda" (the userspace driver), not "libcudart"? Is this a confusion on upstream's side?

This is cursed.

Yes and this is intentional: isn't triton literally a tool for compiling kernels on the fly from some subset of python?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, wasn't there an attempt to make CUDA stuff optional in triton? In that case we don't want to refer to backendStdenv but to the conditional stdenv (otherwise the cpu-only version pulls two different GCCs into the closure)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The environment variable says "libcuda" (the userspace driver), not "libcudart"? Is this a confusion on upstream's side?

It wants libcuda.so.1, I'm not sure where I need to look for it?

Yes and this is intentional: isn't triton literally a tool for compiling kernels on the fly from some subset of python?

The cursed part is that build and runtime step are closely intermixed, but the build step doesn't have a way to provide some values for runtime.
It provides ptxas as third_party binary, but libcuda and etc are expecting that it is running with the same binaries as it built with, while this is not necessary true, as I'm sure some things there are not ABI-compatible. I see in 3.0.0 version the build process is making much more sense.

Also, wasn't there an attempt to make CUDA stuff optional in triton? In that case we don't want to refer to backendStdenv but to the conditional stdenv (otherwise the cpu-only version pulls two different GCCs into the closure)

The CC variable read override is only enabled on cudaSupport, otherwise it doesn't try to call CC at runtime, and the build-time CC is enough (At least, I haven't experienced that with vLLM)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It wants libcuda.so.1, I'm not sure where I need to look for it?

Libcuda depends on the (nvidia) kernel (module) that runs on the user's machine, so we don't link it through the nix store, we link it through /run/opengl-driver/lib: ${addDriverRunpath.driverLink}/lib.

More specifically, we use the fake driver ${getLib cudaPackages.cuda_cudart}/lib/stubs at build/link time, and ${addDriverRunpath.driverLink}/lib at runtime. It's also important that at runtime we first try to dlopen("libcuda.so", ...) first, and only then dlopen("/run/opengl-driver/lib/libcuda.so", ...) because we want things to also work on FHS distributions and respect the optional LD_LIBRARY_PATH

The cursed part is that build and runtime step are closely intermixed, but the build step doesn't have a way to provide some values for runtime.

Oh right, we should probably try to explicitly track the references retained at runtime.

The CC variable read override is only enabled on cudaSupport, otherwise it doesn't try to call CC at runtime, and the build-time CC is enough (At least, I haven't experienced that with vLLM)

Do they not use triton.common.build at runtime for their jit/aot?

Copy link
Member Author

@CertainLach CertainLach Mar 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More specifically, we use the fake driver ${getLib cudaPackages.cuda_cudart}/lib/stubs at build/link time, and ${addDriverRunpath.driverLink}/lib at runtime. It's also important that at runtime we first try to dlopen("libcuda.so", ...) first, and only then dlopen("/run/opengl-driver/lib/libcuda.so", ...) because we want things to also work on FHS distributions and respect the optional LD_LIBRARY_PATH

I don't think we can achieve that with this code? It runs both at compile time, and at runtime...
Except by patching it in preInstall?..

Do they not use triton.common.build at runtime for their jit/aot?

Not in vLLM on ROCm, I'm not sure about other projects using triton directly.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well we should open an issue asking for a more fine-grained support. Note they do not use the variable on master any more, but use whereis which is also platform-specific: https://github.com/feihugis/triton/blob/a9d1935e795cf28aa3c3be8ac5c14723e6805de5/python/triton/compiler.py#L1354-L1357

substituteInPlace python/triton/common/build.py \
--replace 'os.environ.get("CC")' '"${cudaPackages.backendStdenv.cc}/bin/cc"'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest making this (and the above) something like os.environ.get("CC", default=${...}). I would also suggest upstream to make this TRITON_CC instead of CC, to avoid unintentionally changing the compiler

'';

# Avoid GLIBCXX mismatch with other cuda-enabled python packages
Expand All @@ -125,24 +121,28 @@ buildPythonPackage rec {

# The rest (including buildPhase) is relative to ./python/
cd python

mkdir -p $out/${python.sitePackages}/triton/third_party/cuda/bin
function install_binary {
export TRITON_''${1^^}_PATH=$2
ln -s $2 $out/${python.sitePackages}/triton/third_party/cuda/bin/
}
'' + lib.optionalString cudaSupport ''
export CC=${cudaPackages.backendStdenv.cc}/bin/cc;
export CXX=${cudaPackages.backendStdenv.cc}/bin/c++;

# Work around download_and_copy_ptxas()
mkdir -p $PWD/triton/third_party/cuda/bin
ln -s ${ptxas} $PWD/triton/third_party/cuda/bin
install_binary ptxas ${cudaPackages.cuda_nvcc}/bin/ptxas
install_binary cuobjdump ${cudaPackages.cuda_cuobjdump}/bin/cuobjdump
install_binary nvdisasm ${cudaPackages.cuda_nvdisasm}/bin/nvdisasm
'' + lib.optionalString (!cudaSupport) ''
install_binary ptxas ${mkBinaryStub "ptxas"}
install_binary cuobjdump ${mkBinaryStub "cuobjdump"}
install_binary nvdisasm ${mkBinaryStub "nvdisasm"}
'';

# CMake is run by setup.py instead
dontUseCmakeConfigure = true;

# Setuptools (?) strips runpath and +x flags. Let's just restore the symlink
postFixup = lib.optionalString cudaSupport ''
rm -f $out/${python.sitePackages}/triton/third_party/cuda/bin/ptxas
ln -s ${ptxas} $out/${python.sitePackages}/triton/third_party/cuda/bin/ptxas
'';

checkInputs = [ cmake ]; # ctest
dontUseSetuptoolsCheck = true;

Expand Down
Loading