[fbgemm_gpu] Add support for Python 3.12

- Add support for Python 3.12
pytorch · Dec 7, 2023 · c769d79 · c769d79
1 parent dbc3157
commit c769d79
Show file tree

Hide file tree

Showing 11 changed files with 50 additions and 33 deletions.
diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash
@@ -138,7 +138,7 @@ test_setup_conda_environment () {
   if [ "$pytorch_variant_type" == "" ]; then
     echo "Usage: ${FUNCNAME[0]} ENV_NAME PYTHON_VERSION PYTORCH_INSTALLER PYTORCH_VERSION PYTORCH_VARIANT_TYPE [PYTORCH_VARIANT_VERSION]"
     echo "Example(s):"
-    echo "    ${FUNCNAME[0]} build_env 3.10 pip test cuda 12.1.0       # Setup environment with pytorch-test for Python 3.10 + CUDA 12.1.0"
+    echo "    ${FUNCNAME[0]} build_env 3.12 pip test cuda 12.1.0       # Setup environment with pytorch-test for Python 3.12 + CUDA 12.1.0"
     return 1
   else
     echo "################################################################################"

diff --git a/.github/workflows/fbgemm_gpu_cpu_nightly.yml b/.github/workflows/fbgemm_gpu_cpu_nightly.yml
@@ -64,7 +64,7 @@ jobs:
           { arch: x86, instance: "linux.4xlarge" },
           { arch: arm, instance: "linux.arm64.2xlarge" },
         ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
 
     steps:
     - name: Setup Build Container
@@ -128,7 +128,7 @@ jobs:
           { arch: x86, instance: "linux.4xlarge" },
           { arch: arm, instance: "linux.arm64.2xlarge" },
         ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
     needs: build_artifact
 
     steps:

diff --git a/.github/workflows/fbgemm_gpu_cpu_release.yml b/.github/workflows/fbgemm_gpu_cpu_release.yml
@@ -61,7 +61,7 @@ jobs:
           { arch: x86, instance: "linux.4xlarge" },
           { arch: arm, instance: "linux.arm64.2xlarge" },
         ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
 
     steps:
     - name: Setup Build Container
@@ -125,7 +125,7 @@ jobs:
           { arch: x86, instance: "linux.4xlarge" },
           { arch: arm, instance: "linux.arm64.2xlarge" },
         ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
     needs: build_artifact
 
     steps:

diff --git a/.github/workflows/fbgemm_gpu_cuda_nightly.yml b/.github/workflows/fbgemm_gpu_cuda_nightly.yml
@@ -62,7 +62,7 @@ jobs:
         host-machine: [
           { arch: x86, instance: "linux.24xlarge" },
         ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
         cuda-version: [ "11.8.0", "12.1.1" ]
 
     steps:
@@ -133,7 +133,7 @@ jobs:
         host-machine: [
           { arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
         ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
         cuda-version: [ "11.8.0", "12.1.1" ]
         # Specify exactly ONE CUDA version for artifact publish
         cuda-version-publish: [ "12.1.1" ]

diff --git a/.github/workflows/fbgemm_gpu_cuda_release.yml b/.github/workflows/fbgemm_gpu_cuda_release.yml
@@ -66,7 +66,7 @@ jobs:
         host-machine: [
           { arch: x86, instance: "linux.24xlarge" },
         ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
         cuda-version: [ "11.8.0", "12.1.1" ]
 
     steps:
@@ -134,7 +134,7 @@ jobs:
         host-machine: [
           { arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
         ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
         cuda-version: [ "11.8.0", "12.1.1" ]
     needs: build_artifact
 

diff --git a/.github/workflows/fbgemm_gpu_pip.yml b/.github/workflows/fbgemm_gpu_pip.yml
@@ -59,7 +59,7 @@ jobs:
           { instance: "linux.4xlarge" },
           { instance: "linux.arm64.2xlarge" },
         ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
 
     steps:
     - name: Setup Build Container
@@ -110,7 +110,7 @@ jobs:
         host-machine: [
           { instance: "linux.g5.4xlarge.nvidia.gpu" },
         ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
         cuda-version: [ "11.8.0", "12.1.1" ]
         # Specify exactly ONE CUDA version for artifact publish
         cuda-version-publish: [ "11.8.0" ]

diff --git a/fbgemm_gpu/setup.py b/fbgemm_gpu/setup.py
@@ -393,18 +393,16 @@ def main(argv: List[str]) -> None:
         cmdclass={
             "install": FbgemmGpuInstaller,
         },
-        # PyPI package information.
+        # PyPI package information
         classifiers=[
             "Development Status :: 4 - Beta",
             "Intended Audience :: Developers",
             "Intended Audience :: Science/Research",
             "License :: OSI Approved :: BSD License",
-            "Programming Language :: Python :: 3",
-            "Programming Language :: Python :: 3.8",
-            "Programming Language :: Python :: 3.9",
-            "Programming Language :: Python :: 3.10",
-            "Programming Language :: Python :: 3.11",
             "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        ] + [
+            f"Programming Language :: Python :: {x}"
+            for x in ["3", "3.8", "3.9", "3.10", "3.11", "3.12"]
         ],
     )
 

diff --git a/fbgemm_gpu/test/batched_unary_embeddings_test.py b/fbgemm_gpu/test/batched_unary_embeddings_test.py
@@ -7,6 +7,7 @@
 
 
 import random
+import sys
 import unittest
 from math import sqrt
 from typing import List, Tuple
@@ -45,6 +46,13 @@
 }
 
 
+def torch_compiled(model: torch.nn.Module, **kwargs) -> torch.nn.Module:
+    if sys.version_info < (3, 12, 0):
+        return torch.compile(model, **kwargs)
+    else:
+        return model
+
+
 class TableBatchedEmbeddingsTest(unittest.TestCase):
     class RefEmb(torch.nn.Module):
         def __init__(self, num_tasks: int, hash_sizes: List[int]) -> None:
@@ -147,7 +155,7 @@ def _test_main(
             param.detach().copy_(ref_emb.emb_modules[i].weight)
         output_ref = ref_emb(offsets, indices)
         if torch_compile:
-            unary_emb = torch.compile(unary_emb, dynamic=True, fullgraph=True)
+            unary_emb = torch_compiled(unary_emb, dynamic=True, fullgraph=True)
         output = unary_emb(offsets_tensor, indices_tensor)
         torch.testing.assert_close(
             output_ref,
@@ -169,7 +177,7 @@ def _test_main(
             param.detach().copy_(ref_emb.emb_modules[i].weight)
         output_ref = ref_emb(offsets, indices)
         if torch_compile:
-            unary_emb = torch.compile(unary_emb, dynamic=True, fullgraph=True)
+            unary_emb = torch_compiled(unary_emb, dynamic=True, fullgraph=True)
         output = unary_emb(offsets_tensor.long(), indices_tensor.long())
         torch.testing.assert_close(
             output_ref,

diff --git a/fbgemm_gpu/test/jagged_tensor_ops_test.py b/fbgemm_gpu/test/jagged_tensor_ops_test.py
@@ -9,6 +9,7 @@
 
 import itertools
 import random
+import sys
 import unittest
 from typing import Callable, Dict, List, Tuple
 
@@ -127,6 +128,13 @@ def hash_size_cumsum_to_offsets(hash_size_cum_sum_list: List[int]) -> List[int]:
     return hash_size_offsets_list
 
 
+def torch_compiled(model: torch.nn.Module, **kwargs) -> torch.nn.Module:
+    if sys.version_info < (3, 12, 0):
+        return torch.compile(model, **kwargs)
+    else:
+        return model
+
+
 # e.g. "test_faketensor__test_cumsum": [unittest.expectedFailure]
 # Please avoid putting tests here, you should put operator-specific
 # skips and failures in deeplearning/fbgemm/fbgemm_gpu/test/failures_dict.json
@@ -381,7 +389,7 @@ def test_jagged_2d_to_dense_dynamic_shape(
         values = ref_values.clone().to(dtype).detach().requires_grad_(True)
         offsets = offsets.to(device_type)
         ref_output_values = ref_output_values.to(device_type)
-        output_values = torch.compile(
+        output_values = torch_compiled(
             torch.ops.fbgemm.jagged_2d_to_dense, dynamic=True, fullgraph=True
         )(
             values=values,
@@ -597,7 +605,7 @@ def lengths_to_segment_ids(lengths: torch.Tensor) -> torch.Tensor:
         values = ref_values.clone().detach().requires_grad_(False)
         offsets = offsets.to(device_type)
         ref_output_values = ref_output_values.to(device_type)
-        output_values = torch.compile(
+        output_values = torch_compiled(
             torch.ops.fbgemm.jagged_1d_to_dense, dynamic=True, fullgraph=True
         )(
             values=values,
@@ -977,7 +985,6 @@ def test_dense_to_jagged_dynamic_shape(
         )
         values_2d = values_2d.clone().detach().requires_grad_(True)
 
-        @torch.compile(fullgraph=True, dynamic=True)
         def jagged_to_dense(
             values: torch.Tensor, offsets: torch.Tensor, max_lengths: List[int]
         ) -> torch.Tensor:
@@ -993,13 +1000,11 @@ def jagged_to_dense(
         torch._dynamo.mark_dynamic(dense, 0)
         torch._dynamo.mark_dynamic(dense, -1)
 
-        @torch.compile(fullgraph=True, dynamic=True)
         def dense_to_jagged_withL(
             dense: torch.Tensor, offsets: torch.Tensor, total_L: List[int]
         ) -> Tuple[torch.Tensor, torch.Tensor]:
             return torch.ops.fbgemm.dense_to_jagged(dense, offsets, total_L)
 
-        @torch.compile(fullgraph=False, dynamic=True)
         def dense_to_jagged_noL(
             dense: torch.Tensor, offsets: torch.Tensor
         ) -> Tuple[torch.Tensor, torch.Tensor]:
@@ -1325,21 +1330,18 @@ def test_jagged_elementwise_binary_dynamic_shape(
 
         x_padded = self._to_padded_dense(x_values, x_offsets, max_lengths)
 
-        @torch.compile(fullgraph=True, dynamic=True)
         def jagged_dense_elementwise_add(
             x_values: torch.Tensor, x_offsets: torch.Tensor, y: torch.Tensor
         ) -> torch.Tensor:
             return torch.ops.fbgemm.jagged_dense_elementwise_add(x_values, x_offsets, y)
 
-        @torch.compile(fullgraph=True, dynamic=True)
         def jagged_dense_elementwise_add_jagged_output(
             x_values: torch.Tensor, x_offsets: torch.Tensor, y: torch.Tensor
         ) -> Tuple[torch.Tensor, torch.Tensor]:
             return torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output(
                 x_values, x_offsets, y
             )
 
-        @torch.compile(fullgraph=True, dynamic=True)
         def jagged_dense_elementwise_mul(
             x_values: torch.Tensor, x_offsets: torch.Tensor, y: torch.Tensor
         ) -> Tuple[torch.Tensor, torch.Tensor]:
@@ -1614,7 +1616,7 @@ def test_jagged_dense_dense_elementwise_add_jagged_output_dynamic_shape(
         )
         output_ref = x_padded + y_0 + y_1
         x_values.to(device_type)
-        (output, output_offsets) = torch.compile(
+        (output, output_offsets) = torch_compiled(
             torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output,
             fullgraph=True,
             dynamic=True,
@@ -1825,7 +1827,7 @@ def test_batched_dense_vec_jagged_2d_mul_dynamic_shape(
         torch._dynamo.mark_dynamic(values, 1)
         torch._dynamo.mark_dynamic(offsets, 0)
 
-        output = torch.compile(
+        output = torch_compiled(
             torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul,
             fullgraph=True,
             dynamic=True,
@@ -2521,7 +2523,7 @@ def test_jagged_dense_bmm_dynamic_shape(
         torch._dynamo.mark_dynamic(x_values, 1)
         torch._dynamo.mark_dynamic(lengths, 0)  # offsets = lengths + 1
 
-        output, _ = torch.compile(
+        output, _ = torch_compiled(
             torch.ops.fbgemm.jagged_dense_bmm, fullgraph=True, dynamic=True
         )(
             x_values,

diff --git a/fbgemm_gpu/test/quantize_ops_test.py b/fbgemm_gpu/test/quantize_ops_test.py
@@ -7,6 +7,7 @@
 import logging
 import os
 import random
+import sys
 import unittest
 from ctypes import c_float, c_int32, cast, POINTER, pointer
 from typing import Callable, Dict, List, Tuple
@@ -1084,7 +1085,7 @@ def test_quantize_and_dequantize_op_fp8_rowwise(
                 dynamic=True,
                 fullgraph=True,
             )
-            if test_compile
+            if test_compile and sys.version_info < (3, 12, 0)
             else torch.ops.fbgemm.FP8RowwiseQuantizedToFloat
         )
 

diff --git a/fbgemm_gpu/test/sparse_ops_test.py b/fbgemm_gpu/test/sparse_ops_test.py
@@ -13,6 +13,7 @@
 import logging
 import os
 import random
+import sys
 import unittest
 from itertools import accumulate
 from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Type, Union
@@ -103,6 +104,13 @@ def permute_scripted(
     )
 
 
+def torch_compiled(model: torch.nn.Module, **kwargs) -> torch.nn.Module:
+    if sys.version_info < (3, 12, 0):
+        return torch.compile(model, **kwargs)
+    else:
+        return model
+
+
 class SparseOpsTest(unittest.TestCase):
     @staticmethod
     @settings(suppress_health_check=suppressed_list)
@@ -2018,7 +2026,7 @@ def test_pack_segments(
             pack_segments_fun = torch.ops.fbgemm.pack_segments
 
             if torch_compile:
-                pack_segments_fun = torch.compile(pack_segments_fun, dynamic=True)
+                pack_segments_fun = torch_compiled(pack_segments_fun, dynamic=True)
 
             packed_cuda = pack_segments_fun(
                 t_in=input_data.cuda(),
@@ -2114,7 +2122,7 @@ def test_pack_segments_smaller_max_len(
         if gpu_available:
             pack_segments_fun = torch.ops.fbgemm.pack_segments
             if torch_compile:
-                pack_segments_fun = torch.compile(pack_segments_fun)
+                pack_segments_fun = torch_compiled(pack_segments_fun)
 
             packed_cuda = pack_segments_fun(
                 t_in=input_data.cuda(),