lllyasviel · KamilMatejuk · Jul 22, 2023
diff --git a/annotator/hed/__init__.py b/annotator/hed/__init__.py
@@ -12,6 +12,7 @@
 
 from einops import rearrange
 from annotator.util import annotator_ckpts_path
+import config
 
 
 class DoubleConvBlock(torch.nn.Module):
@@ -60,14 +61,14 @@ def __init__(self):
         if not os.path.exists(modelpath):
             from basicsr.utils.download_util import load_file_from_url
             load_file_from_url(remote_model_path, model_dir=annotator_ckpts_path)
-        self.netNetwork = ControlNetHED_Apache2().float().cuda().eval()
+        self.netNetwork = ControlNetHED_Apache2().float().to(config.device).eval()
         self.netNetwork.load_state_dict(torch.load(modelpath))
 
     def __call__(self, input_image):
         assert input_image.ndim == 3
         H, W, C = input_image.shape
         with torch.no_grad():
-            image_hed = torch.from_numpy(input_image.copy()).float().cuda()
+            image_hed = torch.from_numpy(input_image.copy()).float().to(config.device)
             image_hed = rearrange(image_hed, 'h w c -> 1 c h w')
             edges = self.netNetwork(image_hed)
             edges = [e.detach().cpu().numpy().astype(np.float32)[0, 0] for e in edges]

diff --git a/annotator/midas/__init__.py b/annotator/midas/__init__.py
@@ -8,17 +8,18 @@
 
 from einops import rearrange
 from .api import MiDaSInference
+import config
 
 
 class MidasDetector:
     def __init__(self):
-        self.model = MiDaSInference(model_type="dpt_hybrid").cuda()
+        self.model = MiDaSInference(model_type="dpt_hybrid").to(config.device)
 
     def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1):
         assert input_image.ndim == 3
         image_depth = input_image
         with torch.no_grad():
-            image_depth = torch.from_numpy(image_depth).float().cuda()
+            image_depth = torch.from_numpy(image_depth).float().to(config.device)
             image_depth = image_depth / 127.5 - 1.0
             image_depth = rearrange(image_depth, 'h w c -> 1 c h w')
             depth = self.model(image_depth)[0]

diff --git a/annotator/mlsd/__init__.py b/annotator/mlsd/__init__.py
@@ -13,6 +13,7 @@
 from .utils import pred_lines
 
 from annotator.util import annotator_ckpts_path
+import config
 
 
 remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/mlsd_large_512_fp32.pth"
@@ -26,7 +27,7 @@ def __init__(self):
             load_file_from_url(remote_model_path, model_dir=annotator_ckpts_path)
         model = MobileV2_MLSD_Large()
         model.load_state_dict(torch.load(model_path), strict=True)
-        self.model = model.cuda().eval()
+        self.model = model.to(config.device).eval()
 
     def __call__(self, input_image, thr_v, thr_d):
         assert input_image.ndim == 3

diff --git a/annotator/mlsd/utils.py b/annotator/mlsd/utils.py
@@ -14,6 +14,7 @@
 import cv2
 import torch
 from  torch.nn import  functional as F
+import config
 
 
 def deccode_output_score_and_ptss(tpMap, topk_n = 200, ksize = 5):
@@ -58,7 +59,7 @@ def pred_lines(image, model,
     batch_image = np.expand_dims(resized_image, axis=0).astype('float32')
     batch_image = (batch_image / 127.5) - 1.0
 
-    batch_image = torch.from_numpy(batch_image).float().cuda()
+    batch_image = torch.from_numpy(batch_image).float().to(config.device)
     outputs = model(batch_image)
     pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3)
     start = vmap[:, :, :2]
@@ -109,7 +110,7 @@ def pred_squares(image,
     batch_image = np.expand_dims(resized_image, axis=0).astype('float32')
     batch_image = (batch_image / 127.5) - 1.0
 
-    batch_image = torch.from_numpy(batch_image).float().cuda()
+    batch_image = torch.from_numpy(batch_image).float().to(config.device)
     outputs = model(batch_image)
 
     pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3)

diff --git a/annotator/openpose/body.py b/annotator/openpose/body.py
@@ -10,13 +10,14 @@
 
 from . import util
 from .model import bodypose_model
+import config
 
 class Body(object):
     def __init__(self, model_path):
         self.model = bodypose_model()
         if torch.cuda.is_available():
-            self.model = self.model.cuda()
-            print('cuda')
+            self.model = self.model.to(config.device)
+
         model_dict = util.transfer(self.model, torch.load(model_path))
         self.model.load_state_dict(model_dict)
         self.model.eval()
@@ -42,7 +43,7 @@ def __call__(self, oriImg):
 
             data = torch.from_numpy(im).float()
             if torch.cuda.is_available():
-                data = data.cuda()
+                data = data.to(config.device)
             # data = data.permute([2, 0, 1]).unsqueeze(0).float()
             with torch.no_grad():
                 Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)

diff --git a/annotator/openpose/hand.py b/annotator/openpose/hand.py
@@ -11,13 +11,13 @@
 
 from .model import handpose_model
 from . import util
+import config
 
 class Hand(object):
     def __init__(self, model_path):
         self.model = handpose_model()
         if torch.cuda.is_available():
-            self.model = self.model.cuda()
-            print('cuda')
+            self.model = self.model.to(config.device)
         model_dict = util.transfer(self.model, torch.load(model_path))
         self.model.load_state_dict(model_dict)
         self.model.eval()
@@ -42,7 +42,7 @@ def __call__(self, oriImg):
 
             data = torch.from_numpy(im).float()
             if torch.cuda.is_available():
-                data = data.cuda()
+                data = data.to(config.device)
             # data = data.permute([2, 0, 1]).unsqueeze(0).float()
             with torch.no_grad():
                 output = self.model(data).cpu().numpy()

diff --git a/annotator/uniformer/__init__.py b/annotator/uniformer/__init__.py
@@ -7,6 +7,7 @@
 from annotator.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot
 from annotator.uniformer.mmseg.core.evaluation import get_palette
 from annotator.util import annotator_ckpts_path
+import config
 
 
 checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth"
@@ -19,7 +20,7 @@ def __init__(self):
             from basicsr.utils.download_util import load_file_from_url
             load_file_from_url(checkpoint_file, model_dir=annotator_ckpts_path)
         config_file = os.path.join(os.path.dirname(annotator_ckpts_path), "uniformer", "exp", "upernet_global_small", "config.py")
-        self.model = init_segmentor(config_file, modelpath).cuda()
+        self.model = init_segmentor(config_file, modelpath).to(config.device)
 
     def __call__(self, img):
         result = inference_segmentor(self.model, img)

diff --git a/annotator/uniformer/mmcv/engine/test.py b/annotator/uniformer/mmcv/engine/test.py
@@ -10,6 +10,7 @@
 
 import annotator.uniformer.mmcv as mmcv
 from annotator.uniformer.mmcv.runner import get_dist_info
+import config
 
 
 def single_gpu_test(model, data_loader):
@@ -114,12 +115,12 @@ def collect_results_cpu(result_part, size, tmpdir=None):
         dir_tensor = torch.full((MAX_LEN, ),
                                 32,
                                 dtype=torch.uint8,
-                                device='cuda')
+                                device=config.device)
         if rank == 0:
             mmcv.mkdir_or_exist('.dist_test')
             tmpdir = tempfile.mkdtemp(dir='.dist_test')
             tmpdir = torch.tensor(
-                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+                bytearray(tmpdir.encode()), dtype=torch.uint8, device=config.device)
             dir_tensor[:len(tmpdir)] = tmpdir
         dist.broadcast(dir_tensor, 0)
         tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
@@ -170,14 +171,14 @@ def collect_results_gpu(result_part, size):
     rank, world_size = get_dist_info()
     # dump result part to tensor with pickle
     part_tensor = torch.tensor(
-        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
+        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device=config.device)
     # gather all result part tensor shape
-    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+    shape_tensor = torch.tensor(part_tensor.shape, device=config.device)
     shape_list = [shape_tensor.clone() for _ in range(world_size)]
     dist.all_gather(shape_list, shape_tensor)
     # padding result part tensor to max length
     shape_max = torch.tensor(shape_list).max()
-    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+    part_send = torch.zeros(shape_max, dtype=torch.uint8, device=config.device)
     part_send[:shape_tensor[0]] = part_tensor
     part_recv_list = [
         part_tensor.new_zeros(shape_max) for _ in range(world_size)

diff --git a/annotator/uniformer/mmseg/apis/inference.py b/annotator/uniformer/mmseg/apis/inference.py
@@ -6,9 +6,10 @@
 
 from annotator.uniformer.mmseg.datasets.pipelines import Compose
 from annotator.uniformer.mmseg.models import build_segmentor
+import config
 
 
-def init_segmentor(config, checkpoint=None, device='cuda:0'):
+def init_segmentor(config, checkpoint=None, device=config.device):
     """Initialize a segmentor from config file.
 
     Args:

diff --git a/annotator/uniformer/mmseg/apis/test.py b/annotator/uniformer/mmseg/apis/test.py
@@ -9,6 +9,7 @@
 import torch.distributed as dist
 from annotator.uniformer.mmcv.image import tensor2imgs
 from annotator.uniformer.mmcv.runner import get_dist_info
+import config
 
 
 def np2tmp(array, temp_file_name=None):
@@ -171,11 +172,11 @@ def collect_results_cpu(result_part, size, tmpdir=None):
         dir_tensor = torch.full((MAX_LEN, ),
                                 32,
                                 dtype=torch.uint8,
-                                device='cuda')
+                                device=config.device)
         if rank == 0:
             tmpdir = tempfile.mkdtemp()
             tmpdir = torch.tensor(
-                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+                bytearray(tmpdir.encode()), dtype=torch.uint8, device=config.device)
             dir_tensor[:len(tmpdir)] = tmpdir
         dist.broadcast(dir_tensor, 0)
         tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
@@ -209,14 +210,14 @@ def collect_results_gpu(result_part, size):
     rank, world_size = get_dist_info()
     # dump result part to tensor with pickle
     part_tensor = torch.tensor(
-        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
+        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device=config.device)
     # gather all result part tensor shape
-    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+    shape_tensor = torch.tensor(part_tensor.shape, device=config.device)
     shape_list = [shape_tensor.clone() for _ in range(world_size)]
     dist.all_gather(shape_list, shape_tensor)
     # padding result part tensor to max length
     shape_max = torch.tensor(shape_list).max()
-    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+    part_send = torch.zeros(shape_max, dtype=torch.uint8, device=config.device)
     part_send[:shape_tensor[0]] = part_tensor
     part_recv_list = [
         part_tensor.new_zeros(shape_max) for _ in range(world_size)

diff --git a/annotator/uniformer/mmseg/apis/train.py b/annotator/uniformer/mmseg/apis/train.py
@@ -9,6 +9,7 @@
 from annotator.uniformer.mmseg.core import DistEvalHook, EvalHook
 from annotator.uniformer.mmseg.datasets import build_dataloader, build_dataset
 from annotator.uniformer.mmseg.utils import get_root_logger
+import config
 
 
 def set_random_seed(seed, deterministic=False):
@@ -60,7 +61,7 @@ def train_segmentor(model,
         # Sets the `find_unused_parameters` parameter in
         # torch.nn.parallel.DistributedDataParallel
         model = MMDistributedDataParallel(
-            model.cuda(),
+            model.to(config.device),
             device_ids=[torch.cuda.current_device()],
             broadcast_buffers=False,
             find_unused_parameters=find_unused_parameters)

diff --git a/cldm/cldm.py b/cldm/cldm.py
@@ -17,6 +17,7 @@
 from ldm.models.diffusion.ddpm import LatentDiffusion
 from ldm.util import log_txt_as_img, exists, instantiate_from_config
 from ldm.models.diffusion.ddim import DDIMSampler
+import config
 
 
 class ControlledUnetModel(UNetModel):
@@ -424,12 +425,12 @@ def configure_optimizers(self):
 
     def low_vram_shift(self, is_diffusing):
         if is_diffusing:
-            self.model = self.model.cuda()
-            self.control_model = self.control_model.cuda()
+            self.model = self.model.to(config.device)
+            self.control_model = self.control_model.to(config.device)
             self.first_stage_model = self.first_stage_model.cpu()
             self.cond_stage_model = self.cond_stage_model.cpu()
         else:
             self.model = self.model.cpu()
             self.control_model = self.control_model.cpu()
-            self.first_stage_model = self.first_stage_model.cuda()
-            self.cond_stage_model = self.cond_stage_model.cuda()
+            self.first_stage_model = self.first_stage_model.to(config.device)
+            self.cond_stage_model = self.cond_stage_model.to(config.device)
diff --git a/config.py b/config.py
@@ -1 +1,2 @@
 save_memory = False
+device = 'cuda' # 'cpu'
diff --git a/gradio_canny2image.py b/gradio_canny2image.py
@@ -19,7 +19,7 @@
 
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_canny.pth', location='cuda'))
-model = model.cuda()
+model = model.to(config.device)
 ddim_sampler = DDIMSampler(model)
 
 
@@ -31,7 +31,7 @@ def process(input_image, prompt, a_prompt, n_prompt, num_samples, image_resoluti
         detected_map = apply_canny(img, low_threshold, high_threshold)
         detected_map = HWC3(detected_map)
 
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.from_numpy(detected_map.copy()).float().to(config.device) / 255.0
         control = torch.stack([control for _ in range(num_samples)], dim=0)
         control = einops.rearrange(control, 'b h w c -> b c h w').clone()
 

diff --git a/gradio_depth2image.py b/gradio_depth2image.py
@@ -19,7 +19,7 @@
 
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_depth.pth', location='cuda'))
-model = model.cuda()
+model = model.to(config.device)
 ddim_sampler = DDIMSampler(model)
 
 
@@ -33,7 +33,7 @@ def process(input_image, prompt, a_prompt, n_prompt, num_samples, image_resoluti
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
 
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.from_numpy(detected_map.copy()).float().to(config.device) / 255.0
         control = torch.stack([control for _ in range(num_samples)], dim=0)
         control = einops.rearrange(control, 'b h w c -> b c h w').clone()
 

diff --git a/gradio_fake_scribble2image.py b/gradio_fake_scribble2image.py
@@ -19,7 +19,7 @@
 
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_scribble.pth', location='cuda'))
-model = model.cuda()
+model = model.to(config.device)
 ddim_sampler = DDIMSampler(model)
 
 
@@ -37,7 +37,7 @@ def process(input_image, prompt, a_prompt, n_prompt, num_samples, image_resoluti
         detected_map[detected_map > 4] = 255
         detected_map[detected_map < 255] = 0
 
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.from_numpy(detected_map.copy()).float().to(config.device) / 255.0
         control = torch.stack([control for _ in range(num_samples)], dim=0)
         control = einops.rearrange(control, 'b h w c -> b c h w').clone()
 

diff --git a/gradio_hed2image.py b/gradio_hed2image.py
@@ -19,7 +19,7 @@
 
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_hed.pth', location='cuda'))
-model = model.cuda()
+model = model.to(config.device)
 ddim_sampler = DDIMSampler(model)
 
 
@@ -33,7 +33,7 @@ def process(input_image, prompt, a_prompt, n_prompt, num_samples, image_resoluti
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
 
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.from_numpy(detected_map.copy()).float().to(config.device) / 255.0
         control = torch.stack([control for _ in range(num_samples)], dim=0)
         control = einops.rearrange(control, 'b h w c -> b c h w').clone()
 

diff --git a/gradio_hough2image.py b/gradio_hough2image.py
@@ -19,7 +19,7 @@
 
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_mlsd.pth', location='cuda'))
-model = model.cuda()
+model = model.to(config.device)
 ddim_sampler = DDIMSampler(model)
 
 
@@ -33,7 +33,7 @@ def process(input_image, prompt, a_prompt, n_prompt, num_samples, image_resoluti
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_NEAREST)
 
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.from_numpy(detected_map.copy()).float().to(config.device) / 255.0
         control = torch.stack([control for _ in range(num_samples)], dim=0)
         control = einops.rearrange(control, 'b h w c -> b c h w').clone()