Skip to content

Commit

Permalink
Merge branch 'main' of github.com:chengzeyi/stable-fast
Browse files Browse the repository at this point in the history
  • Loading branch information
chengzeyi committed Feb 20, 2024
2 parents 94c5548 + ce7de20 commit 7e36d62
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 18 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ jobs:
- "3.10"
- "3.11"
torch_version:
- "2.1.1"
- "2.1.2"
- "2.2.0"
cuda_short_version:
- "118"
- "121"
Expand Down
45 changes: 39 additions & 6 deletions .github/workflows/wheels_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
# windows does not have per version binary, it is just 'python3'
PY: python${{ contains(inputs.os, 'ubuntu') && inputs.python || '3' }}

container: ${{ contains(inputs.os, 'ubuntu') && 'quay.io/pypa/manylinux2014_x86_64' || null }}
# container: ${{ contains(inputs.os, 'ubuntu') && 'quay.io/pypa/manylinux2014_x86_64' || null }}
timeout-minutes: 360
defaults:
run:
Expand Down Expand Up @@ -117,14 +117,46 @@ jobs:
fp.write("TORCH_CUDA_ARCH_LIST=" + arch_list + "\n")
- run: echo "${TORCH_CUDA_ARCH_LIST}"

- if: contains(inputs.os, 'ubuntu')
name: Free Disk Space (Ubuntu)
uses: jlumbroso/[email protected]
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false

# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true

- if: runner.os == 'Linux'
name: (Linux) install cuda
run: >
yum install wget git prename -y &&
yum clean all --verbose &&
wget -q "${{ steps.cuda_info.outputs.CUDA_INSTALL_SCRIPT }}" -O cuda.run &&
sh ./cuda.run --silent --toolkit &&
run: |
# yum install wget git prename -y
# yum clean all --verbose
sudo apt update
sudo apt install -y wget git rename
sudo apt clean -y
sudo apt autoremove -y
wget -q "${{ steps.cuda_info.outputs.CUDA_INSTALL_SCRIPT }}" -O cuda.run
sudo sh ./cuda.run --silent --toolkit --toolkitpath=/usr/local/cuda || cat /tmp/cuda-installer.log
rm ./cuda.run
echo "CUDA_HOME=/usr/local/cuda" >> ${GITHUB_ENV}
echo "PATH=/usr/local/cuda/bin:$PATH" >> ${GITHUB_ENV}
- if: runner.os == 'Linux'
name: (Linux) install python
run: |
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt update
sudo apt install -y python${{ inputs.python }} python${{ inputs.python }}-dev python${{ inputs.python }}-venv
sudo apt clean -y
sudo apt autoremove -y
- name: Recursive checkout
uses: actions/checkout@v3
Expand Down Expand Up @@ -182,6 +214,7 @@ jobs:
run: |
cudnn_next_version_major=$((${CUDNN_VERSION_MAJOR} + 1))
cudnn_package_name="${CUDNN_PYPI_PACKAGE}>=${CUDNN_VERSION_MAJOR}.0.0.0,<${cudnn_next_version_major}.0.0.0"
$PY -m pip install --upgrade pip
$PY -m pip install wheel setuptools ninja twine "torch==${{ inputs.torch_version }}" "${cudnn_package_name}" -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu${{ inputs.cuda_short_version }} --no-cache-dir
- name: Build wheel
Expand Down
37 changes: 37 additions & 0 deletions community/reproduce_vae_segfault.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import torch
import torch.nn.functional as F

from diffusers import AutoencoderKL

from sfast.compilers.stable_diffusion_pipeline_compiler import (
compile_vae,
CompilationConfig,
)

device = torch.device("cuda:0")

SD_2_1_DIFFUSERS_MODEL = "stabilityai/stable-diffusion-2-1"
variant = {"variant": "fp16"}
vae_orig = AutoencoderKL.from_pretrained(
SD_2_1_DIFFUSERS_MODEL,
subfolder="vae",
torch_dtype=torch.float16,
**variant,
)

vae_orig.to(device)

sfast_config = CompilationConfig.Default()
sfast_config.enable_xformers = False
sfast_config.enable_triton = True
sfast_config.enable_cuda_graph = False
vae = compile_vae(vae_orig, sfast_config)

sample_imgs = torch.randn(4, 3, 128, 128, dtype=vae.dtype, device=device)
latents1 = torch.randn(4, 4, 16, 16, dtype=vae.dtype, device=device)

latents = vae.encode(sample_imgs).latent_dist.sample()

sample_imgs_dup = sample_imgs.clone().detach().requires_grad_(True)
latents2 = vae_orig.encode(sample_imgs_dup).latent_dist.sample()
print("Test done")
18 changes: 10 additions & 8 deletions src/sfast/csrc/jit/python_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,18 @@ void RegisterCustomPythonOperator(const std::string &schema,
auto arguments = parsed_schema.arguments();
auto returns = parsed_schema.returns();

std::shared_ptr<py::function> func_ptr(
std::shared_ptr<const py::function> func_ptr(
new py::function(py::reinterpret_borrow<const py::function>(
py::handle(const_cast<PyObject *>(py_callable.get())))),
py::handle(py_callable.get()))),
[](py::function *ptr) {
// Check if the current thread is holding the GIL
if (PyGILState_Check()) {
delete ptr;
} else {
py::gil_scoped_acquire gil;
delete ptr;
if (Py_IsInitialized()) {
// Check if the current thread is holding the GIL
if (PyGILState_Check()) {
delete ptr;
} else {
py::gil_scoped_acquire gil;
delete ptr;
}
}
});

Expand Down
7 changes: 5 additions & 2 deletions src/sfast/jit/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import logging
import inspect
import functools
import torch
import sfast
from .overrides import TracingMode

logger = logging.getLogger()


class ScriptModuleClearHook:

Expand All @@ -13,8 +16,8 @@ def __init__(self, script_module_c):
def __del__(self):
try:
sfast._C._jit_clear_class_type_registration(self.class_type)
except Exception:
pass
except Exception as e:
logger.warning(f'Failed to clear class type registration: {e}')


def attach_script_module_clear_hook(
Expand Down

0 comments on commit 7e36d62

Please sign in to comment.