Update PyTorch to >=2.4.0 to get fix for CUDA array interface bug, an…

…d drop CUDA 11 PyTorch tests. (#17475) This PR updates our PyTorch lower bound to 2.4.0 to get the bugfix from pytorch/pytorch#121458. Also, this PR drops CUDA 11 tests because conda-forge no longer produces CUDA 11 builds of PyTorch. This was causing a failure on Hopper GPUs because the last available CUDA 11 builds from conda-forge do not include sm90 support. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) - Matthew Roeschke (https://github.com/mroeschke) URL: #17475
rapidsai · Dec 2, 2024 · 852338e · 852338e
1 parent d1bad33
commit 852338e
Show file tree

Hide file tree

Showing 5 changed files with 9 additions and 20 deletions.
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -80,7 +80,6 @@ dependencies:
 - python-confluent-kafka>=2.5.0,<2.6.0a0
 - python-xxhash
 - python>=3.10,<3.13
-- pytorch>=2.1.0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==25.2.*,>=0.0.0a0
 - rich
@@ -97,8 +96,6 @@ dependencies:
 - sphinxcontrib-websupport
 - streamz
 - sysroot_linux-64==2.17
-- tokenizers==0.15.2
-- transformers==4.39.3
 - typing_extensions>=4.0.0
 - zlib>=1.2.13
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -78,7 +78,7 @@ dependencies:
 - python-confluent-kafka>=2.5.0,<2.6.0a0
 - python-xxhash
 - python>=3.10,<3.13
-- pytorch>=2.1.0
+- pytorch>=2.4.0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==25.2.*,>=0.0.0a0
 - rich

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -885,12 +885,9 @@ dependencies:
       - output_types: conda
         matrices:
           - matrix:
-              arch: x86_64
+              cuda: "12.*"
             packages:
-              # Currently, CUDA + aarch64 builds of pytorch do not exist on conda-forge.
-              - pytorch>=2.1.0
-              # We only install these on x86_64 to avoid pulling pytorch as a
-              # dependency of transformers.
+              - pytorch>=2.4.0
               - *tokenizers
               - *transformers
           - matrix:

diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py
@@ -187,7 +187,7 @@ def test_column_from_ephemeral_cupy_try_lose_reference():
     ),
 )
 def test_cuda_array_interface_pytorch():
-    torch = pytest.importorskip("torch", minversion="1.6.0")
+    torch = pytest.importorskip("torch", minversion="2.4.0")
     if not torch.cuda.is_available():
         pytest.skip("need gpu version of pytorch to be installed")
 
@@ -202,15 +202,10 @@ def test_cuda_array_interface_pytorch():
 
     assert_eq(got, cudf.Series(buffer, dtype=np.bool_))
 
-    # TODO: This test fails with PyTorch 2. It appears that PyTorch
-    # checks that the pointer is device-accessible even when the
-    # size is zero. See
-    # https://github.com/pytorch/pytorch/issues/98133
-    #
-    # index = cudf.Index([], dtype="float64")
-    # tensor = torch.tensor(index)
-    # got = cudf.Index(tensor)
-    # assert_eq(got, index)
+    index = cudf.Index([], dtype="float64")
+    tensor = torch.tensor(index)
+    got = cudf.Index(tensor)
+    assert_eq(got, index)
 
     index = cudf.core.index.RangeIndex(start=0, stop=100)
     tensor = torch.tensor(index)

diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
@@ -213,7 +213,7 @@ dependencies:
       - output_types: conda
         packages:
           - numpy
-          - pytorch>=2.1.0
+          - pytorch>=2.4.0
   test_seaborn:
     common:
       - output_types: conda