diff --git a/.github/scripts/build_wheel.bash b/.github/scripts/build_wheel.bash
deleted file mode 100644
index 02439cfc35..0000000000
--- a/.github/scripts/build_wheel.bash
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-# Exit on failure
-set -e
-
-# shellcheck source=/dev/null
-. "$(dirname "$(realpath -s "$0")")/setup_env.bash"
-
-verbose=0
-package_name=""
-python_version=""
-pytorch_channel_name=""
-cuda_version="x"
-miniconda_prefix="${HOME}/miniconda"
-
-usage () {
-  echo "Usage: bash build_wheel.bash -o PACKAGE_NAME -p PYTHON_VERSION -P PYTORCH_CHANNEL_NAME -c CUDA_VERSION [-m MINICONDA_PREFIX] [-v] [-h]"
-  echo "-v                  : verbose"
-  echo "-h                  : help"
-  echo "PACKAGE_NAME        : output package name (e.g., fbgemm_gpu_nightly)"
-  echo "PYTHON_VERSION      : Python version (e.g., 3.8, 3.9, 3.10)"
-  echo "PYTORCH_CHANNEL_NAME: PyTorch's channel name (e.g., pytorch-nightly, pytorch-test (=pre-release), pytorch (=stable release))"
-  echo "CUDA_VERSION        : PyTorch's CUDA version (e.g., 11.6, 11.7)"
-  echo "MINICONDA_PREFIX    : path to install Miniconda (default: \$HOME/miniconda)"
-  echo "Example 1: Python 3.10 + PyTorch nightly (CUDA 11.7), install miniconda at /home/user/tmp/miniconda"
-  echo "       bash build_wheel.bash -v -P pytorch-nightly -p 3.10 -c 11.7 -m /home/user/tmp/miniconda"
-  echo "Example 2: Python 3.10 + PyTorch stable (CPU), install miniconda at \$HOME/miniconda"
-  echo "       bash build_wheel.bash -v -P pytorch -p 3.10 -c \"\""
-}
-
-while getopts vfho:p:P:c:m: flag
-do
-    case "$flag" in
-        v) verbose="1";;
-        o) package_name="${OPTARG}";;
-        p) python_version="${OPTARG}";;
-        P) pytorch_channel_name="${OPTARG}";;
-        c) cuda_version="${OPTARG}";;
-        m) miniconda_prefix="${OPTARG}";;
-        h) usage
-           exit 0;;
-        *) usage
-           exit 1;;
-    esac
-done
-
-if [ "$python_version" == "" ] || [ "$cuda_version" == "x" ] || [ "$miniconda_prefix" == "" ] || [ "$pytorch_channel_name" == "" ] || [ "$package_name" == "" ]; then
-  usage
-  exit 1
-fi
-python_tag="${python_version//\./}"
-
-if [ "$verbose" == "1" ]; then
-  # Print each line verbosely
-  set -x -e
-fi
-
-################################################################################
-echo "## 0. Minimal check"
-################################################################################
-
-if [ ! -d "fbgemm_gpu" ]; then
-  echo "Error: this script must be executed in FBGEMM/"
-  exit 1
-elif [ "$(which gcc 2>/dev/null)" == "" ]; then
-  echo "Error: GCC is needed to compile FBGEMM"
-  exit 1
-fi
-
-################################################################################
-echo "## 1. Set up Miniconda"
-################################################################################
-
-setup_miniconda "$miniconda_prefix"
-
-################################################################################
-echo "## 2. Create build_binary environment"
-################################################################################
-
-create_conda_pytorch_environment build_binary "$python_version" "$pytorch_channel_name" "$cuda_version"
-
-cd fbgemm_gpu
-
-# cuDNN is needed to "build" FBGEMM
-install_cudnn build_binary "$miniconda_prefix/build_only/cudnn" "$cuda_version"
-
-conda run -n build_binary python -m pip install -r requirements.txt
-
-# TODO: Do we need these checks?
-ldd --version
-conda info
-conda run -n build_binary python --version
-gcc --version
-conda run -n build_binary python -c "import torch.distributed"
-conda run -n build_binary python -c "import skbuild"
-conda run -n build_binary python -c "import numpy"
-cd ../
-
-################################################################################
-echo "## 3. Build FBGEMM_GPU"
-################################################################################
-
-cd fbgemm_gpu
-rm -rf dist _skbuild
-if [ "$cuda_version" == "" ]; then
-  # CPU version
-  build_arg="--cpu_only"
-  package_name="${package_name}_cpu"
-else
-  # GPU version
-  # We build only CUDA 7.0 and 8.0 (i.e., for v100 and a100) because of 100 MB binary size limit from PYPI website.
-  build_arg="-DTORCH_CUDA_ARCH_LIST=7.0;8.0"
-fi
-
-# manylinux1_x86_64 is specified for pypi upload: distribute python extensions as wheels on Linux
-conda run -n build_binary python setup.py bdist_wheel --package_name="${package_name}" --python-tag="py${python_tag}" "${build_arg}" --plat-name=manylinux1_x86_64
-cd ../
-
-# Usage:
-#     pip install $(ls fbgemm_gpu/dist/${package_name}-*.whl)
-#     python -c "import fbgemm_gpu"
-
-wheel_name="$(ls fbgemm_gpu/dist/"${package_name}"-*.whl)"
-echo "Successfully built $wheel_name"
diff --git a/.github/scripts/test_wheel.bash b/.github/scripts/test_wheel.bash
deleted file mode 100644
index ff6d08b450..0000000000
--- a/.github/scripts/test_wheel.bash
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-# Exit on failure
-set -e
-
-# shellcheck source=/dev/null
-. "$(dirname "$(realpath -s "$0")")/setup_env.bash"
-
-
-verbose=0
-python_version=""
-cuda_version="x"
-fbgemm_wheel_path="x"
-miniconda_prefix="${HOME}/miniconda"
-
-usage () {
-  echo "Usage: bash test_wheel.bash -p PYTHON_VERSION -P PYTORCH_CHANNEL_NAME -c CUDA_VERSION -w FBGEMM_WHEEL_PATH [-m MINICONDA_PREFIX] [-v] [-h]"
-  echo "-v                  : verbose"
-  echo "-h                  : help"
-  echo "PYTHON_VERSION      : Python version (e.g., 3.8, 3.9, 3.10)"
-  echo "PYTORCH_CHANNEL_NAME: PyTorch's channel name (e.g., pytorch-nightly, pytorch-test (=pre-release), pytorch (=stable release))"
-  echo "CUDA_VERSION        : PyTorch's CUDA version (e.g., 11.6, 11.7)"
-  echo "FBGEMM_WHEEL_PATH   : path to FBGEMM_GPU's wheel file"
-  echo "MINICONDA_PREFIX    : path to install Miniconda (default: \$HOME/miniconda)"
-  echo "Example 1: Python 3.10 + PyTorch nightly (CUDA 11.7), install miniconda at /home/user/tmp/miniconda, using dist/fbgemm_gpu.whl"
-  echo "       bash test_wheel.bash -v -p 3.10 -P pytorch-nightly -c 11.7 -m /home/user/tmp/miniconda -w dist/fbgemm_gpu.whl"
-  echo "Example 2: Python 3.10 + PyTorch stable (CPU), install miniconda at \$HOME/miniconda, using /tmp/fbgemm_gpu_cpu.whl"
-  echo "       bash test_wheel.bash -v -p 3.10 -P pytorch -c \"\" -w /tmp/fbgemm_gpu_cpu.whl"
-}
-
-while getopts vhp:P:c:m:w: flag
-do
-    case "$flag" in
-        v) verbose="1";;
-        p) python_version="${OPTARG}";;
-        P) pytorch_channel_name="${OPTARG}";;
-        c) cuda_version="${OPTARG}";;
-        m) miniconda_prefix="${OPTARG}";;
-        w) fbgemm_wheel_path="${OPTARG}";;
-        h) usage
-           exit 0;;
-        *) usage
-           exit 1;;
-    esac
-done
-
-if [ "$python_version" == "" ] || [ "$cuda_version" == "x" ] || [ "$miniconda_prefix" == "" ] || [ "$pytorch_channel_name" == "" ] || [ "$fbgemm_wheel_path" == "" ]; then
-  usage
-  exit 1
-fi
-
-if [ "$verbose" == "1" ]; then
-  # Print each line verbosely
-  set -x -e
-fi
-
-################################################################################
-echo "## 0. Minimal check"
-################################################################################
-
-if [ ! -d "fbgemm_gpu" ]; then
-  echo "Error: this script must be executed in FBGEMM/"
-  exit 1
-fi
-
-################################################################################
-echo "## 1. Set up Miniconda"
-################################################################################
-
-setup_miniconda "$miniconda_prefix"
-
-################################################################################
-echo "## 2. Create test_binary environment"
-################################################################################
-
-create_conda_pytorch_environment test_binary "$python_version" "$pytorch_channel_name" "$cuda_version"
-conda install -n test_binary -y pytest
-
-cd fbgemm_gpu
-conda run -n test_binary python -m pip install -r requirements.txt
-cd ../
-
-################################################################################
-echo "## 3. Install and test FBGEMM_GPU"
-################################################################################
-
-conda run -n test_binary python -m pip install "$fbgemm_wheel_path"
-conda run -n test_binary python -c "import fbgemm_gpu"
-
-if [ "$cuda_version" == "" ]; then
-  # CPU version: unfortunately, not all tests are properly excluded,
-  # so we cherry-pick what we can run.
-  conda run -n test_binary python fbgemm_gpu/test/batched_unary_embeddings_test.py -v
-  conda run -n test_binary python fbgemm_gpu/test/input_combine_test.py -v
-  conda run -n test_binary python fbgemm_gpu/test/layout_transform_ops_test.py -v
-  conda run -n test_binary python fbgemm_gpu/test/merge_pooled_embeddings_test.py -v
-  conda run -n test_binary python fbgemm_gpu/test/permute_pooled_embedding_modules_test.py -v
-  conda run -n test_binary python fbgemm_gpu/test/quantize_ops_test.py -v
-  conda run -n test_binary python fbgemm_gpu/test/sparse_ops_test.py -v
-else
-  # GPU version
-  # Don't run it in the fbgemm_gpu directory; fbgemm_gpu has a fbgemm_gpu directory,
-  # which confuses "import" in Python.
-  # conda run -n test_binary python -m pytest fbgemm_gpu -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors
-  conda run -n test_binary python -m pytest fbgemm_gpu -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors --ignore-glob=**/ssd_split_table_batched_embeddings_test.py --ignore-glob=**/split_table_batched_embeddings_test.py
-fi
-
-echo "Test succeeded"
diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
deleted file mode 100644
index 8ab3f72eae..0000000000
--- a/.github/workflows/build_wheel.yml
+++ /dev/null
@@ -1,160 +0,0 @@
-name: Build Wheel
-
-on:
-  workflow_call:
-    inputs:
-      release_version:
-        required: true
-        type: string
-      upload_pypi:
-        required: true
-        type: boolean
-
-jobs:
-  setup_wheel_jobs:
-    runs-on: [ubuntu-latest]
-    outputs:
-      pytorch_channel: ${{ steps.output_variables.outputs.pytorch_channel }}
-      fbgemm_package_name: ${{ steps.output_variables.outputs.fbgemm_package_name }}
-      torchrec_package_name: ${{ steps.output_variables.outputs.torchrec_package_name }}
-    steps:
-    - id: output_variables
-      run: |
-        if [ x"${{ inputs.release_version }}" == x"nightly" ]; then
-          echo "pytorch_channel=pytorch-nightly" >> $GITHUB_OUTPUT
-          echo "fbgemm_package_name=fbgemm_gpu_nightly" >> $GITHUB_OUTPUT
-          echo "torchrec_package_name=torchrec_nightly" >> $GITHUB_OUTPUT
-        elif [ x"${{ inputs.release_version }}" == x"prerelease" ]; then
-          echo "pytorch_channel=pytorch-test" >> $GITHUB_OUTPUT
-          echo "fbgemm_package_name=fbgemm_gpu_test" >> $GITHUB_OUTPUT
-          echo "torchrec_package_name=torchrec_test" >> $GITHUB_OUTPUT
-        elif [ x"${{ inputs.release_version }}" == x"release" ]; then
-          echo "pytorch_channel=pytorch" >> $GITHUB_OUTPUT
-          echo "fbgemm_package_name=fbgemm_gpu" >> $GITHUB_OUTPUT
-          echo "torchrec_package_name=torchrec" >> $GITHUB_OUTPUT
-        else
-          echo "Error: unknown release_version ${{ inputs.release_version }}"
-          exit 1
-        fi
-
-  # Build on CPU hosts and upload *.whl as an GitHub Action artifact
-  build_wheel:
-    needs: [setup_wheel_jobs]
-    strategy:
-      matrix:
-        os: [linux.4xlarge]
-        python-version: ["3.8", "3.9", "3.10"]
-        cuda-tag: ["cpu", "cu11"]
-
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    with:
-      job-name: build_wheel (py${{ matrix.python-version }}-${{ matrix.cuda-tag }})
-      runner: ${{ matrix.os }}
-      repository: pytorch/fbgemm
-      gpu-arch-type: cpu
-      upload-artifact: ${{ needs.setup_wheel_jobs.outputs.fbgemm_package_name }}_${{ matrix.python-version }}_${{ matrix.cuda-tag }}
-      timeout: 120
-      script: |
-        set -x
-        # Checkout FBGEMM_GPU
-        git submodule update --init
-
-        # Build wheel
-        if [ x"${{ matrix.cuda-tag }}" == x"cpu" ]; then
-          # Empty string
-          CUDA_VERSION=""
-        else
-          CUDA_VERSION="11.7.1"
-        fi
-        bash .github/scripts/build_wheel.bash -v -p ${{ matrix.python-version }} -o ${{ needs.setup_wheel_jobs.outputs.fbgemm_package_name }} -P ${{ needs.setup_wheel_jobs.outputs.pytorch_channel }} -c "${CUDA_VERSION}" -m "/opt/conda"
-
-        # Put a wheel file in RUNNER_ARTIFACT_DIR
-        FBGEMM_ARTIFACT_PATH="${RUNNER_ARTIFACT_DIR}/${{ needs.setup_wheel_jobs.outputs.fbgemm_package_name }}_${{ matrix.python-version }}_${{ matrix.cuda-tag }}"
-        mkdir -p "$FBGEMM_ARTIFACT_PATH"
-        mv fbgemm_gpu/dist/*.whl "${FBGEMM_ARTIFACT_PATH}"
-
-  # Download the GitHub Action artifact and test the artifact on a GPU machine
-  test_wheel_gpu:
-    needs: [setup_wheel_jobs, build_wheel]
-    strategy:
-      matrix:
-        os: [linux.g5.4xlarge.nvidia.gpu]
-        python-version: ["3.8", "3.9", "3.10"]
-        cuda-tag: ["cu11"]
-
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    with:
-      job-name: test_wheel_gpu (py${{ matrix.python-version }}-${{ matrix.cuda-tag }})
-      runner: ${{ matrix.os }}
-      repository: pytorch/fbgemm
-      gpu-arch-type: cuda
-      gpu-arch-version: 11.7
-      download-artifact: ${{ needs.setup_wheel_jobs.outputs.fbgemm_package_name }}_${{ matrix.python-version }}_${{ matrix.cuda-tag }}
-      timeout: 120
-      script: |
-        set -x
-        # Checkout FBGEMM_GPU
-        git submodule update --init
-
-        # Test Wheel
-        CUDA_VERSION="11.7.1"
-        FBGEMM_ARTIFACT_PATH="${RUNNER_ARTIFACT_DIR}/${{ needs.setup_wheel_jobs.outputs.fbgemm_package_name }}_${{ matrix.python-version }}_${{ matrix.cuda-tag }}"
-        WHEEL_PATH="$(ls ${FBGEMM_ARTIFACT_PATH}/*.whl)"
-        bash .github/scripts/test_wheel.bash -v -p ${{ matrix.python-version }} -P ${{ needs.setup_wheel_jobs.outputs.pytorch_channel }} -c "${CUDA_VERSION}" -w "${WHEEL_PATH}" -m "/opt/conda"
-
-  # Download the GitHub Action artifact and test the artifact on a GPU machine
-  test_wheel_cpu:
-    needs: [setup_wheel_jobs, build_wheel]
-    strategy:
-      matrix:
-        os: [linux.4xlarge]
-        python-version: ["3.8", "3.9", "3.10"]
-        cuda-tag: ["cpu"]
-
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    with:
-      job-name: test_wheel_cpu (py${{ matrix.python-version }}-${{ matrix.cuda-tag }})
-      runner: ${{ matrix.os }}
-      repository: pytorch/fbgemm
-      download-artifact: ${{ needs.setup_wheel_jobs.outputs.fbgemm_package_name }}_${{ matrix.python-version }}_${{ matrix.cuda-tag }}
-      timeout: 120
-      script: |
-        set -x
-        # Checkout FBGEMM_GPU
-        git submodule update --init
-
-        # Test Wheel
-        CUDA_VERSION=""
-        FBGEMM_ARTIFACT_PATH="${RUNNER_ARTIFACT_DIR}/${{ needs.setup_wheel_jobs.outputs.fbgemm_package_name }}_${{ matrix.python-version }}_${{ matrix.cuda-tag }}"
-        WHEEL_PATH="$(ls ${FBGEMM_ARTIFACT_PATH}/*.whl)"
-        bash .github/scripts/test_wheel.bash -v -p ${{ matrix.python-version }} -P ${{ needs.setup_wheel_jobs.outputs.pytorch_channel }} -c "${CUDA_VERSION}" -w "${WHEEL_PATH}" -m "/opt/conda"
-
-  # Upload the created wheels to PyPI
-  upload_pypi:
-    needs: [setup_wheel_jobs, test_wheel_gpu, test_wheel_cpu]
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.8", "3.9", "3.10"]
-        cuda-tag: ["cu11", "cpu"]
-    runs-on: ${{ matrix.os }}
-
-    steps:
-    - name: Download an artifact
-      uses: actions/download-artifact@v3
-      with:
-        name: ${{ needs.setup_wheel_jobs.outputs.fbgemm_package_name }}_${{ matrix.python-version }}_${{ matrix.cuda-tag }}
-        path: ${{ runner.temp }}/artifacts/
-    - name: Upload a wheel to PyPI
-      if: ${{ inputs.upload_pypi }}
-      env:
-        PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
-      run: |
-        # Install Twine
-        sudo apt install python3-pip
-        pip3 install twine
-
-        # Upload FBGEMM_GPU binary
-        FBGEMM_ARTIFACT_PATH="${{ runner.temp }}/artifacts/${{ needs.setup_wheel_jobs.outputs.fbgemm_package_name }}_${{ matrix.python-version }}_${{ matrix.cuda-tag }}"
-        WHEEL_PATH="$(ls ${FBGEMM_ARTIFACT_PATH}/*.whl)"
-        twine upload --username __token__ --password "$PYPI_TOKEN" --skip-existing --verbose "$WHEEL_PATH"
diff --git a/.github/workflows/push_wheel_trigger.yml b/.github/workflows/push_wheel_trigger.yml
deleted file mode 100644
index 9e4d6871e2..0000000000
--- a/.github/workflows/push_wheel_trigger.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: Push Wheel
-
-on:
-  # For debugging, please use test_wheel_*.yml
-  # run every day at 10:30 AM
-  # schedule:
-  #   - cron:  '30 10 * * *'
-  # or manually trigger it
-  workflow_dispatch:
-    inputs:
-      release_version:
-        type: choice
-        required: true
-        default: 'nightly'
-        options:
-        - nightly
-        - prerelease
-        - release
-      upload_pypi:
-        type: choice
-        required: true
-        default: true
-        options:
-        - true
-        - false
-
-jobs:
-  push_wheel:
-    uses: ./.github/workflows/build_wheel.yml
-    with:
-      # if it's triggered by "schedule", nightly + true will be chosen
-      release_version: ${{ inputs.release_version || 'nightly' }}
-      upload_pypi: ${{ (inputs.upload_pypi || 'true') == 'true' }}
-    secrets: inherit
diff --git a/.github/workflows/test_wheel_trigger.yml b/.github/workflows/test_wheel_trigger.yml
deleted file mode 100644
index 66ab537bbe..0000000000
--- a/.github/workflows/test_wheel_trigger.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-name: Test Wheel
-
-on:
-  pull_request:
-    branches:
-      - main
-    types:
-      - opened
-      - synchronize
-      - reopened
-      - labeled
-
-jobs:
-  test_wheel_nightly:
-    if: contains(github.event.pull_request.labels.*.name, 'test_wheel_nightly')
-    uses: ./.github/workflows/build_wheel.yml
-    with:
-      release_version: "nightly"
-      upload_pypi: false
-
-  test_wheel_prerelease:
-    if: contains(github.event.pull_request.labels.*.name, 'test_wheel_prerelease')
-    uses: ./.github/workflows/build_wheel.yml
-    with:
-      release_version: "prerelease"
-      upload_pypi: false
-
-  test_wheel_release:
-    if: contains(github.event.pull_request.labels.*.name, 'test_wheel_release')
-    uses: ./.github/workflows/build_wheel.yml
-    with:
-      release_version: "release"
-      upload_pypi: false
diff --git a/.jenkins/rocm/build_and_test.sh b/.jenkins/rocm/build_and_test.sh
deleted file mode 100755
index a1b44644cc..0000000000
--- a/.jenkins/rocm/build_and_test.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-
-# exit immediately on failure, or if an undefined variable is used
-set -eux
-
-FBGEMM_REPO_DIR=${1:-/workspace/FBGEMM}
-
-git config --global --add safe.directory "$FBGEMM_REPO_DIR"
-git config --global --add safe.directory "$FBGEMM_REPO_DIR/third_party/asmjit"
-git config --global --add safe.directory "$FBGEMM_REPO_DIR/third_party/cpuinfo"
-git config --global --add safe.directory "$FBGEMM_REPO_DIR/third_party/googletest"
-git config --global --add safe.directory "$FBGEMM_REPO_DIR/third_party/hipify_torch"
-
-# Install dependencies
-apt-get update --allow-insecure-repositories && \
-  apt-get install -y --allow-unauthenticated \
-  git \
-  jq \
-  sshfs \
-  sshpass \
-  unzip
-
-apt-get install -y locales
-locale-gen en_US.UTF-8
-
-pip3 install click
-pip3 install jinja2
-pip3 install ninja
-# scikit-build >=0.16.5 needs a newer CMake
-pip3 install --upgrade cmake
-pip3 install scikit-build
-pip3 install --upgrade hypothesis
-pip3 install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/rocm5.3/
-
-pip3 list
-
-# Build fbgemm_gpu
-cd "$FBGEMM_REPO_DIR/fbgemm_gpu"
-MAX_JOBS="$(nproc)"
-export MAX_JOBS
-export PYTORCH_ROCM_ARCH="gfx908"
-python setup.py build develop
-
-export FBGEMM_TEST_WITH_ROCM=1
-
-# Test fbgemm_gpu
-cd test
-
-python batched_unary_embeddings_test.py --verbose
-python input_combine_test.py --verbose
-python jagged_tensor_ops_test.py --verbose
-python layout_transform_ops_test.py --verbose
-python merge_pooled_embeddings_test.py --verbose
-python metric_ops_test.py --verbose
-python permute_pooled_embedding_modules_test.py --verbose
-python quantize_ops_test.py --verbose
-python sparse_ops_test.py --verbose
-python split_embedding_inference_converter_test.py --verbose
-# test_nbit_forward_fused_pooled_emb_quant is failing. It's skipped in the test code
-python split_table_batched_embeddings_test.py --verbose
-python uvm_test.py --verbose