Initial commit.

im-stuff · Jan 17, 2023 · 220afe3 · 220afe3
commit 220afe3
Show file tree

Hide file tree

Showing 77 changed files with 129,040 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+__pycache__/
+*.py[cod]
+output/
+models/checkpoints
+models/vae
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -0,0 +1,72 @@
+ComfyUI
+=======
+A powerful and modular stable diffusion GUI.
+-----------
+![ComfyUI Screenshot](comfyui_screenshot.png)
+
+This ui will let you design and execute advanced stable diffusion pipelines using a graph/nodes/flowchart based interface.
+
+
+# Installing
+
+Git clone this repo.
+
+Put your SD checkpoints (the huge ckpt/safetensors files) in: models/checkpoints
+
+Put your VAE in: models/vae
+
+At the time of writing this pytorch has issues with python versions higher than 3.10 so make sure your python/pip versions are 3.10.
+
+### AMD
+AMD users can install rocm and pytorch with pip if you don't have it already installed:
+
+```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.2"```
+
+### NVIDIA
+
+Nvidia users should install Xformers.
+
+### Dependencies
+
+Install the dependencies:
+
+```pip install -r requirements.txt```
+
+
+
+# Running
+
+```python main.py```
+
+
+# Notes
+
+Only parts of the graph that have an output with all the correct inputs will be executed.
+
+Only parts of the graph that change from each execution to the next will be executed, if you submit the same graph twice only the first will be executed. If you change the last part of the graph only the part you changed and the part that depends on it will be executed.
+
+Dragging a generated png on the webpage or loading one will give you the full workflow including seeds that were used to create it.
+
+You can use () to change emphasis of a word or phrase like: (good code:1.2) or (bad code:0.8). The default emphasis for () is 1.1. To use () characters in your actual prompt escape them like \\( or \\).
+
+### Fedora
+
+To get python 3.10 on fedora:
+```dnf install python3.10```
+
+Then you can:
+
+```python3.10 -m ensurepip```
+
+This will let you use: pip3.10 to install all the dependencies.
+
+
+# QA
+
+### Why did you make this?
+
+I wanted to learn how Stable Diffusion worked in detail. I also wanted something clean and powerful that would let me experiment with SD without restrictions.
+
+### Who is this for?
+
+This is for anyone that wants to make complex workflows with SD or that wants to learn more how SD works. The interface follows closely how SD works and the code should be much more simple to understand than other SD UIs.
diff --git a/comfy/k_diffusion/augmentation.py b/comfy/k_diffusion/augmentation.py
@@ -0,0 +1,105 @@
+from functools import reduce
+import math
+import operator
+
+import numpy as np
+from skimage import transform
+import torch
+from torch import nn
+
+
+def translate2d(tx, ty):
+    mat = [[1, 0, tx],
+           [0, 1, ty],
+           [0, 0,  1]]
+    return torch.tensor(mat, dtype=torch.float32)
+
+
+def scale2d(sx, sy):
+    mat = [[sx,  0, 0],
+           [ 0, sy, 0],
+           [ 0,  0, 1]]
+    return torch.tensor(mat, dtype=torch.float32)
+
+
+def rotate2d(theta):
+    mat = [[torch.cos(theta), torch.sin(-theta), 0],
+           [torch.sin(theta),  torch.cos(theta), 0],
+           [               0,                 0, 1]]
+    return torch.tensor(mat, dtype=torch.float32)
+
+
+class KarrasAugmentationPipeline:
+    def __init__(self, a_prob=0.12, a_scale=2**0.2, a_aniso=2**0.2, a_trans=1/8):
+        self.a_prob = a_prob
+        self.a_scale = a_scale
+        self.a_aniso = a_aniso
+        self.a_trans = a_trans
+
+    def __call__(self, image):
+        h, w = image.size
+        mats = [translate2d(h / 2 - 0.5, w / 2 - 0.5)]
+
+        # x-flip
+        a0 = torch.randint(2, []).float()
+        mats.append(scale2d(1 - 2 * a0, 1))
+        # y-flip
+        do = (torch.rand([]) < self.a_prob).float()
+        a1 = torch.randint(2, []).float() * do
+        mats.append(scale2d(1, 1 - 2 * a1))
+        # scaling
+        do = (torch.rand([]) < self.a_prob).float()
+        a2 = torch.randn([]) * do
+        mats.append(scale2d(self.a_scale ** a2, self.a_scale ** a2))
+        # rotation
+        do = (torch.rand([]) < self.a_prob).float()
+        a3 = (torch.rand([]) * 2 * math.pi - math.pi) * do
+        mats.append(rotate2d(-a3))
+        # anisotropy
+        do = (torch.rand([]) < self.a_prob).float()
+        a4 = (torch.rand([]) * 2 * math.pi - math.pi) * do
+        a5 = torch.randn([]) * do
+        mats.append(rotate2d(a4))
+        mats.append(scale2d(self.a_aniso ** a5, self.a_aniso ** -a5))
+        mats.append(rotate2d(-a4))
+        # translation
+        do = (torch.rand([]) < self.a_prob).float()
+        a6 = torch.randn([]) * do
+        a7 = torch.randn([]) * do
+        mats.append(translate2d(self.a_trans * w * a6, self.a_trans * h * a7))
+
+        # form the transformation matrix and conditioning vector
+        mats.append(translate2d(-h / 2 + 0.5, -w / 2 + 0.5))
+        mat = reduce(operator.matmul, mats)
+        cond = torch.stack([a0, a1, a2, a3.cos() - 1, a3.sin(), a5 * a4.cos(), a5 * a4.sin(), a6, a7])
+
+        # apply the transformation
+        image_orig = np.array(image, dtype=np.float32) / 255
+        if image_orig.ndim == 2:
+            image_orig = image_orig[..., None]
+        tf = transform.AffineTransform(mat.numpy())
+        image = transform.warp(image_orig, tf.inverse, order=3, mode='reflect', cval=0.5, clip=False, preserve_range=True)
+        image_orig = torch.as_tensor(image_orig).movedim(2, 0) * 2 - 1
+        image = torch.as_tensor(image).movedim(2, 0) * 2 - 1
+        return image, image_orig, cond
+
+
+class KarrasAugmentWrapper(nn.Module):
+    def __init__(self, model):
+        super().__init__()
+        self.inner_model = model
+
+    def forward(self, input, sigma, aug_cond=None, mapping_cond=None, **kwargs):
+        if aug_cond is None:
+            aug_cond = input.new_zeros([input.shape[0], 9])
+        if mapping_cond is None:
+            mapping_cond = aug_cond
+        else:
+            mapping_cond = torch.cat([aug_cond, mapping_cond], dim=1)
+        return self.inner_model(input, sigma, mapping_cond=mapping_cond, **kwargs)
+
+    def set_skip_stages(self, skip_stages):
+        return self.inner_model.set_skip_stages(skip_stages)
+
+    def set_patch_size(self, patch_size):
+        return self.inner_model.set_patch_size(patch_size)
diff --git a/comfy/k_diffusion/config.py b/comfy/k_diffusion/config.py
@@ -0,0 +1,110 @@
+from functools import partial
+import json
+import math
+import warnings
+
+from jsonmerge import merge
+
+from . import augmentation, layers, models, utils
+
+
+def load_config(file):
+    defaults = {
+        'model': {
+            'sigma_data': 1.,
+            'patch_size': 1,
+            'dropout_rate': 0.,
+            'augment_wrapper': True,
+            'augment_prob': 0.,
+            'mapping_cond_dim': 0,
+            'unet_cond_dim': 0,
+            'cross_cond_dim': 0,
+            'cross_attn_depths': None,
+            'skip_stages': 0,
+            'has_variance': False,
+        },
+        'dataset': {
+            'type': 'imagefolder',
+        },
+        'optimizer': {
+            'type': 'adamw',
+            'lr': 1e-4,
+            'betas': [0.95, 0.999],
+            'eps': 1e-6,
+            'weight_decay': 1e-3,
+        },
+        'lr_sched': {
+            'type': 'inverse',
+            'inv_gamma': 20000.,
+            'power': 1.,
+            'warmup': 0.99,
+        },
+        'ema_sched': {
+            'type': 'inverse',
+            'power': 0.6667,
+            'max_value': 0.9999
+        },
+    }
+    config = json.load(file)
+    return merge(defaults, config)
+
+
+def make_model(config):
+    config = config['model']
+    assert config['type'] == 'image_v1'
+    model = models.ImageDenoiserModelV1(
+        config['input_channels'],
+        config['mapping_out'],
+        config['depths'],
+        config['channels'],
+        config['self_attn_depths'],
+        config['cross_attn_depths'],
+        patch_size=config['patch_size'],
+        dropout_rate=config['dropout_rate'],
+        mapping_cond_dim=config['mapping_cond_dim'] + (9 if config['augment_wrapper'] else 0),
+        unet_cond_dim=config['unet_cond_dim'],
+        cross_cond_dim=config['cross_cond_dim'],
+        skip_stages=config['skip_stages'],
+        has_variance=config['has_variance'],
+    )
+    if config['augment_wrapper']:
+        model = augmentation.KarrasAugmentWrapper(model)
+    return model
+
+
+def make_denoiser_wrapper(config):
+    config = config['model']
+    sigma_data = config.get('sigma_data', 1.)
+    has_variance = config.get('has_variance', False)
+    if not has_variance:
+        return partial(layers.Denoiser, sigma_data=sigma_data)
+    return partial(layers.DenoiserWithVariance, sigma_data=sigma_data)
+
+
+def make_sample_density(config):
+    sd_config = config['sigma_sample_density']
+    sigma_data = config['sigma_data']
+    if sd_config['type'] == 'lognormal':
+        loc = sd_config['mean'] if 'mean' in sd_config else sd_config['loc']
+        scale = sd_config['std'] if 'std' in sd_config else sd_config['scale']
+        return partial(utils.rand_log_normal, loc=loc, scale=scale)
+    if sd_config['type'] == 'loglogistic':
+        loc = sd_config['loc'] if 'loc' in sd_config else math.log(sigma_data)
+        scale = sd_config['scale'] if 'scale' in sd_config else 0.5
+        min_value = sd_config['min_value'] if 'min_value' in sd_config else 0.
+        max_value = sd_config['max_value'] if 'max_value' in sd_config else float('inf')
+        return partial(utils.rand_log_logistic, loc=loc, scale=scale, min_value=min_value, max_value=max_value)
+    if sd_config['type'] == 'loguniform':
+        min_value = sd_config['min_value'] if 'min_value' in sd_config else config['sigma_min']
+        max_value = sd_config['max_value'] if 'max_value' in sd_config else config['sigma_max']
+        return partial(utils.rand_log_uniform, min_value=min_value, max_value=max_value)
+    if sd_config['type'] == 'v-diffusion':
+        min_value = sd_config['min_value'] if 'min_value' in sd_config else 0.
+        max_value = sd_config['max_value'] if 'max_value' in sd_config else float('inf')
+        return partial(utils.rand_v_diffusion, sigma_data=sigma_data, min_value=min_value, max_value=max_value)
+    if sd_config['type'] == 'split-lognormal':
+        loc = sd_config['mean'] if 'mean' in sd_config else sd_config['loc']
+        scale_1 = sd_config['std_1'] if 'std_1' in sd_config else sd_config['scale_1']
+        scale_2 = sd_config['std_2'] if 'std_2' in sd_config else sd_config['scale_2']
+        return partial(utils.rand_split_log_normal, loc=loc, scale_1=scale_1, scale_2=scale_2)
+    raise ValueError('Unknown sample density type')