Skip to content

Commit

Permalink
[fbgemm_gpu] Add support for Python 3.12
Browse files Browse the repository at this point in the history
- Add support for Python 3.12
  • Loading branch information
q10 committed Dec 7, 2023
1 parent dbc3157 commit c769d79
Show file tree
Hide file tree
Showing 11 changed files with 50 additions and 33 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/fbgemm_gpu_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ test_setup_conda_environment () {
if [ "$pytorch_variant_type" == "" ]; then
echo "Usage: ${FUNCNAME[0]} ENV_NAME PYTHON_VERSION PYTORCH_INSTALLER PYTORCH_VERSION PYTORCH_VARIANT_TYPE [PYTORCH_VARIANT_VERSION]"
echo "Example(s):"
echo " ${FUNCNAME[0]} build_env 3.10 pip test cuda 12.1.0 # Setup environment with pytorch-test for Python 3.10 + CUDA 12.1.0"
echo " ${FUNCNAME[0]} build_env 3.12 pip test cuda 12.1.0 # Setup environment with pytorch-test for Python 3.12 + CUDA 12.1.0"
return 1
else
echo "################################################################################"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/fbgemm_gpu_cpu_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
{ arch: x86, instance: "linux.4xlarge" },
{ arch: arm, instance: "linux.arm64.2xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]

steps:
- name: Setup Build Container
Expand Down Expand Up @@ -128,7 +128,7 @@ jobs:
{ arch: x86, instance: "linux.4xlarge" },
{ arch: arm, instance: "linux.arm64.2xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
needs: build_artifact

steps:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/fbgemm_gpu_cpu_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
{ arch: x86, instance: "linux.4xlarge" },
{ arch: arm, instance: "linux.arm64.2xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]

steps:
- name: Setup Build Container
Expand Down Expand Up @@ -125,7 +125,7 @@ jobs:
{ arch: x86, instance: "linux.4xlarge" },
{ arch: arm, instance: "linux.arm64.2xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
needs: build_artifact

steps:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/fbgemm_gpu_cuda_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
host-machine: [
{ arch: x86, instance: "linux.24xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1" ]

steps:
Expand Down Expand Up @@ -133,7 +133,7 @@ jobs:
host-machine: [
{ arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1" ]
# Specify exactly ONE CUDA version for artifact publish
cuda-version-publish: [ "12.1.1" ]
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/fbgemm_gpu_cuda_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
host-machine: [
{ arch: x86, instance: "linux.24xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1" ]

steps:
Expand Down Expand Up @@ -134,7 +134,7 @@ jobs:
host-machine: [
{ arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1" ]
needs: build_artifact

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/fbgemm_gpu_pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
{ instance: "linux.4xlarge" },
{ instance: "linux.arm64.2xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]

steps:
- name: Setup Build Container
Expand Down Expand Up @@ -110,7 +110,7 @@ jobs:
host-machine: [
{ instance: "linux.g5.4xlarge.nvidia.gpu" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1" ]
# Specify exactly ONE CUDA version for artifact publish
cuda-version-publish: [ "11.8.0" ]
Expand Down
10 changes: 4 additions & 6 deletions fbgemm_gpu/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,18 +393,16 @@ def main(argv: List[str]) -> None:
cmdclass={
"install": FbgemmGpuInstaller,
},
# PyPI package information.
# PyPI package information
classifiers=[
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: BSD License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
] + [
f"Programming Language :: Python :: {x}"
for x in ["3", "3.8", "3.9", "3.10", "3.11", "3.12"]
],
)

Expand Down
12 changes: 10 additions & 2 deletions fbgemm_gpu/test/batched_unary_embeddings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@


import random
import sys
import unittest
from math import sqrt
from typing import List, Tuple
Expand Down Expand Up @@ -45,6 +46,13 @@
}


def torch_compiled(model: torch.nn.Module, **kwargs) -> torch.nn.Module:
if sys.version_info < (3, 12, 0):
return torch.compile(model, **kwargs)
else:
return model


class TableBatchedEmbeddingsTest(unittest.TestCase):
class RefEmb(torch.nn.Module):
def __init__(self, num_tasks: int, hash_sizes: List[int]) -> None:
Expand Down Expand Up @@ -147,7 +155,7 @@ def _test_main(
param.detach().copy_(ref_emb.emb_modules[i].weight)
output_ref = ref_emb(offsets, indices)
if torch_compile:
unary_emb = torch.compile(unary_emb, dynamic=True, fullgraph=True)
unary_emb = torch_compiled(unary_emb, dynamic=True, fullgraph=True)
output = unary_emb(offsets_tensor, indices_tensor)
torch.testing.assert_close(
output_ref,
Expand All @@ -169,7 +177,7 @@ def _test_main(
param.detach().copy_(ref_emb.emb_modules[i].weight)
output_ref = ref_emb(offsets, indices)
if torch_compile:
unary_emb = torch.compile(unary_emb, dynamic=True, fullgraph=True)
unary_emb = torch_compiled(unary_emb, dynamic=True, fullgraph=True)
output = unary_emb(offsets_tensor.long(), indices_tensor.long())
torch.testing.assert_close(
output_ref,
Expand Down
24 changes: 13 additions & 11 deletions fbgemm_gpu/test/jagged_tensor_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import itertools
import random
import sys
import unittest
from typing import Callable, Dict, List, Tuple

Expand Down Expand Up @@ -127,6 +128,13 @@ def hash_size_cumsum_to_offsets(hash_size_cum_sum_list: List[int]) -> List[int]:
return hash_size_offsets_list


def torch_compiled(model: torch.nn.Module, **kwargs) -> torch.nn.Module:
if sys.version_info < (3, 12, 0):
return torch.compile(model, **kwargs)
else:
return model


# e.g. "test_faketensor__test_cumsum": [unittest.expectedFailure]
# Please avoid putting tests here, you should put operator-specific
# skips and failures in deeplearning/fbgemm/fbgemm_gpu/test/failures_dict.json
Expand Down Expand Up @@ -381,7 +389,7 @@ def test_jagged_2d_to_dense_dynamic_shape(
values = ref_values.clone().to(dtype).detach().requires_grad_(True)
offsets = offsets.to(device_type)
ref_output_values = ref_output_values.to(device_type)
output_values = torch.compile(
output_values = torch_compiled(
torch.ops.fbgemm.jagged_2d_to_dense, dynamic=True, fullgraph=True
)(
values=values,
Expand Down Expand Up @@ -597,7 +605,7 @@ def lengths_to_segment_ids(lengths: torch.Tensor) -> torch.Tensor:
values = ref_values.clone().detach().requires_grad_(False)
offsets = offsets.to(device_type)
ref_output_values = ref_output_values.to(device_type)
output_values = torch.compile(
output_values = torch_compiled(
torch.ops.fbgemm.jagged_1d_to_dense, dynamic=True, fullgraph=True
)(
values=values,
Expand Down Expand Up @@ -977,7 +985,6 @@ def test_dense_to_jagged_dynamic_shape(
)
values_2d = values_2d.clone().detach().requires_grad_(True)

@torch.compile(fullgraph=True, dynamic=True)
def jagged_to_dense(
values: torch.Tensor, offsets: torch.Tensor, max_lengths: List[int]
) -> torch.Tensor:
Expand All @@ -993,13 +1000,11 @@ def jagged_to_dense(
torch._dynamo.mark_dynamic(dense, 0)
torch._dynamo.mark_dynamic(dense, -1)

@torch.compile(fullgraph=True, dynamic=True)
def dense_to_jagged_withL(
dense: torch.Tensor, offsets: torch.Tensor, total_L: List[int]
) -> Tuple[torch.Tensor, torch.Tensor]:
return torch.ops.fbgemm.dense_to_jagged(dense, offsets, total_L)

@torch.compile(fullgraph=False, dynamic=True)
def dense_to_jagged_noL(
dense: torch.Tensor, offsets: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
Expand Down Expand Up @@ -1325,21 +1330,18 @@ def test_jagged_elementwise_binary_dynamic_shape(

x_padded = self._to_padded_dense(x_values, x_offsets, max_lengths)

@torch.compile(fullgraph=True, dynamic=True)
def jagged_dense_elementwise_add(
x_values: torch.Tensor, x_offsets: torch.Tensor, y: torch.Tensor
) -> torch.Tensor:
return torch.ops.fbgemm.jagged_dense_elementwise_add(x_values, x_offsets, y)

@torch.compile(fullgraph=True, dynamic=True)
def jagged_dense_elementwise_add_jagged_output(
x_values: torch.Tensor, x_offsets: torch.Tensor, y: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
return torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output(
x_values, x_offsets, y
)

@torch.compile(fullgraph=True, dynamic=True)
def jagged_dense_elementwise_mul(
x_values: torch.Tensor, x_offsets: torch.Tensor, y: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
Expand Down Expand Up @@ -1614,7 +1616,7 @@ def test_jagged_dense_dense_elementwise_add_jagged_output_dynamic_shape(
)
output_ref = x_padded + y_0 + y_1
x_values.to(device_type)
(output, output_offsets) = torch.compile(
(output, output_offsets) = torch_compiled(
torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output,
fullgraph=True,
dynamic=True,
Expand Down Expand Up @@ -1825,7 +1827,7 @@ def test_batched_dense_vec_jagged_2d_mul_dynamic_shape(
torch._dynamo.mark_dynamic(values, 1)
torch._dynamo.mark_dynamic(offsets, 0)

output = torch.compile(
output = torch_compiled(
torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul,
fullgraph=True,
dynamic=True,
Expand Down Expand Up @@ -2521,7 +2523,7 @@ def test_jagged_dense_bmm_dynamic_shape(
torch._dynamo.mark_dynamic(x_values, 1)
torch._dynamo.mark_dynamic(lengths, 0) # offsets = lengths + 1

output, _ = torch.compile(
output, _ = torch_compiled(
torch.ops.fbgemm.jagged_dense_bmm, fullgraph=True, dynamic=True
)(
x_values,
Expand Down
3 changes: 2 additions & 1 deletion fbgemm_gpu/test/quantize_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
import os
import random
import sys
import unittest
from ctypes import c_float, c_int32, cast, POINTER, pointer
from typing import Callable, Dict, List, Tuple
Expand Down Expand Up @@ -1084,7 +1085,7 @@ def test_quantize_and_dequantize_op_fp8_rowwise(
dynamic=True,
fullgraph=True,
)
if test_compile
if test_compile and sys.version_info < (3, 12, 0)
else torch.ops.fbgemm.FP8RowwiseQuantizedToFloat
)

Expand Down
12 changes: 10 additions & 2 deletions fbgemm_gpu/test/sparse_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import logging
import os
import random
import sys
import unittest
from itertools import accumulate
from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Type, Union
Expand Down Expand Up @@ -103,6 +104,13 @@ def permute_scripted(
)


def torch_compiled(model: torch.nn.Module, **kwargs) -> torch.nn.Module:
if sys.version_info < (3, 12, 0):
return torch.compile(model, **kwargs)
else:
return model


class SparseOpsTest(unittest.TestCase):
@staticmethod
@settings(suppress_health_check=suppressed_list)
Expand Down Expand Up @@ -2018,7 +2026,7 @@ def test_pack_segments(
pack_segments_fun = torch.ops.fbgemm.pack_segments

if torch_compile:
pack_segments_fun = torch.compile(pack_segments_fun, dynamic=True)
pack_segments_fun = torch_compiled(pack_segments_fun, dynamic=True)

packed_cuda = pack_segments_fun(
t_in=input_data.cuda(),
Expand Down Expand Up @@ -2114,7 +2122,7 @@ def test_pack_segments_smaller_max_len(
if gpu_available:
pack_segments_fun = torch.ops.fbgemm.pack_segments
if torch_compile:
pack_segments_fun = torch.compile(pack_segments_fun)
pack_segments_fun = torch_compiled(pack_segments_fun)

packed_cuda = pack_segments_fun(
t_in=input_data.cuda(),
Expand Down

0 comments on commit c769d79

Please sign in to comment.