diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..d9b6166 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,32 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. Linux, Windows 10] + - Browser [e.g. Chrome, Safari, Brave] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.gitignore b/.gitignore index 95674ac..c69a763 100644 --- a/.gitignore +++ b/.gitignore @@ -5,9 +5,13 @@ # VQGAN weights assets/*.ckpt assets/*.yaml +assets/* # Outputs output* +# samples +samples/* + # Test data test-samples/ diff --git a/.streamlit/config.toml b/.streamlit/config.toml index 011d1f8..5456a34 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -1,3 +1,3 @@ [server] # Default is 200 MB -maxUploadSize = 10 +maxUploadSize = 5000 diff --git a/app.py b/app.py index 38558df..e164561 100644 --- a/app.py +++ b/app.py @@ -11,10 +11,12 @@ import datetime import shutil import torch -import json -import os +import json, time +import os, io, timeit import base64 import traceback +import clip +from stqdm import stqdm import argparse @@ -25,6 +27,7 @@ from omegaconf import OmegaConf import imageio import numpy as np +from retry import retry # Catch import issue, introduced in version 1.1 # Deprecate in a few minor versions @@ -43,10 +46,18 @@ except ModuleNotFoundError: pass +#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:30" + +@retry((PermissionError, OSError), tries=5, delay=2) +def save_preview_image(im): + """Save the preview image and in case we get any error try again.""" + # Save prev_im.png + im.save(f"prev_image.png", format='PNG') def generate_image( text_input: str = "the first day of the waters", vqgan_ckpt: str = "vqgan_imagenet_f16_16384", + clip_model: str = "ViT-B/32", num_steps: int = 300, image_x: int = 300, image_y: int = 300, @@ -54,6 +65,10 @@ def generate_image( image_prompts: List[Image.Image] = [], continue_prev_run: bool = False, seed: Optional[int] = None, + cutn: int = 32, + cut_pow: float = 1.0, + step_size: float = 0.05, + opt_name: str = "Adam", mse_weight: float = 0, mse_weight_decay: float = 0, mse_weight_decay_steps: int = 0, @@ -72,10 +87,15 @@ def generate_image( run = VQGANCLIPRun( text_input=text_input, vqgan_ckpt=vqgan_ckpt, + clip_model=clip_model, num_steps=num_steps, image_x=image_x, image_y=image_y, seed=seed, + cutn=cutn, + cut_pow=cut_pow, + step_size=step_size, + opt_name=opt_name, init_image=init_image, image_prompts=image_prompts, continue_prev_run=continue_prev_run, @@ -125,7 +145,10 @@ def generate_image( ### Model init ------------------------------------------------------------- if continue_prev_run is True: - run.model_init(init_image=st.session_state["prev_im"]) + try: + run.model_init(init_image=st.session_state["prev_im"]) + except KeyError: + run.model_init(init_image=Image.open("prev_image.png")) elif init_image is not None: run.model_init(init_image=init_image) else: @@ -136,32 +159,56 @@ def generate_image( frames = [] try: - # Try block catches st.script_runner.StopExecution, no need of a dedicated stop button + # Try block catches st.StopExecution, no need of a dedicated stop button # Reason is st.form is meant to be self-contained either within sidebar, or in main body # The way the form is implemented in this app splits the form across both regions # This is intended to prevent the model settings from crowding the main body # However, touching any button resets the app state, making it impossible to # implement a stop button that can still dump output # Thankfully there's a built-in stop button :) + while True: # While loop to accomodate running predetermined steps or running indefinitely - status_text.text(f"Running step {step_counter}") + + # trying to check how long it takes to execute each loop which should give us how long it took to process each image generation iteration. + start = timeit.default_timer() - _, im = run.iterate() + _, im = run.iterate() if num_steps > 0: # skip when num_steps = -1 step_progress_bar.progress((step_counter + 1) / num_steps) else: step_progress_bar.progress(100) + + duration = timeit.default_timer() - start + + if duration >= 1: + speed = "s/it" + else: + speed = "it/s" + duration = 1 / duration + + if num_steps > 0: + total_number_steps = f"/{num_steps}" + percent = f"{100 * float(step_counter)/float(num_steps):.2f}%" + else: + total_number_steps = "" + percent = "" + + status_text.text(f"Running step: {step_counter}{total_number_steps} {percent} | {duration:.2f}{speed}") # At every step, display and save image im_display_slot.image(im, caption="Output image", output_format="PNG") st.session_state["prev_im"] = im - + + # We save the preview image using the save_preview_image() function + # so we can try multiple times in case there is an error opening or saving the image. + save_preview_image(im) + # ref: https://stackoverflow.com/a/33117447/13095028 - # im_byte_arr = io.BytesIO() - # im.save(im_byte_arr, format="JPEG") - # frames.append(im_byte_arr.getvalue()) # read() + #im_byte_arr = io.BytesIO() + #im.save(im_byte_arr, format="JPEG") + #frames.append(im_byte_arr.getvalue()) # read() frames.append(np.asarray(im)) step_counter += 1 @@ -182,16 +229,28 @@ def generate_image( runoutputdir.mkdir() # Save final image - im.save(runoutputdir / "output.PNG", format="PNG") + im.save(runoutputdir / "output.png", format="PNG") + + # Save all the frames into the step folder so we can see the process better if we want to. + runoutputdir_step_folder = Path(f"{runoutputdir}/steps") + print (f"Saving frames to folder: {runoutputdir_step_folder}") + runoutputdir_step_folder.mkdir() + + frame_number = 0 + for frame in frames: + #print (frame) + frame_data = Image.fromarray(frame) + frame_data.save(f"{runoutputdir_step_folder}/{frame_number}.png", format='PNG') + frame_number += 1 # Save init image if init_image is not None: - init_image.save(runoutputdir / "init-image.JPEG", format="JPEG") + init_image.save(runoutputdir / "init-image.jpeg", format="JPEG") # Save image prompts for count, image_prompt in enumerate(image_prompts): image_prompt.save( - runoutputdir / f"image-prompt-{count}.JPEG", format="JPEG" + runoutputdir / f"image-prompt-{count}.jpeg", format="JPEG" ) # Save animation @@ -208,9 +267,14 @@ def generate_image( "continue_prev_run": continue_prev_run, "prev_run_id": prev_run_id, "seed": run.seed, + "cutn": cutn, + "cut_pow": cut_pow, + "step_size": step_size, + "opt_name": opt_name, "Xdim": image_x, "ydim": image_y, "vqgan_ckpt": vqgan_ckpt, + "clip_model": clip_model, "start_time": run_start_dt.strftime("%Y%m%dT%H%M%S"), "end_time": datetime.datetime.now().strftime("%Y%m%dT%H%M%S"), "mse_weight": mse_weight, @@ -245,12 +309,16 @@ def generate_image( json.dump(details, f, indent=4) status_text.text("Done!") # End of run + + vid_display_slot.video("temp.mp4") - except st.script_runner.StopException as e: + except st.StopException as e: # Dump output to dashboard print(f"Received Streamlit StopException") - status_text.text("Execution interruped, dumping outputs ...") + #status_text.text("Execution interruped, dumping outputs ...") + print("Execution interruped, dumping outputs ...") writer = imageio.get_writer("temp.mp4", fps=24) + for frame in frames: writer.append_data(frame) writer.close() @@ -260,19 +328,32 @@ def generate_image( runoutputdir = outputdir / ( run_start_dt.strftime("%Y%m%dT%H%M%S") + "-" + run_id ) + print ("Saving to folder: ", runoutputdir) runoutputdir.mkdir() # Save final image - im.save(runoutputdir / "output.PNG", format="PNG") + im.save(runoutputdir / "output.png", format="PNG") + + # Save all the frames into the step folder so we can see the process better if we want to. + runoutputdir_step_folder = Path(f"{runoutputdir}/steps") + print (f"Saving frames to folder: {runoutputdir_step_folder}") + runoutputdir_step_folder.mkdir() + + frame_number = 0 + for frame in frames: + #print (frame) + frame_data = Image.fromarray(frame) + frame_data.save(f"{runoutputdir_step_folder}/{frame_number}.png", format='PNG') + frame_number += 1 # Save init image if init_image is not None: - init_image.save(runoutputdir / "init-image.JPEG", format="JPEG") + init_image.save(runoutputdir / "init-image.jpeg", format="JPEG") # Save image prompts for count, image_prompt in enumerate(image_prompts): image_prompt.save( - runoutputdir / f"image-prompt-{count}.JPEG", format="JPEG" + runoutputdir / f"image-prompt-{count}.jpeg", format="JPEG" ) # Save animation @@ -289,9 +370,14 @@ def generate_image( "continue_prev_run": continue_prev_run, "prev_run_id": prev_run_id, "seed": run.seed, + "cutn": cutn, + "cut_pow": cut_pow, + "step_size": step_size, + "opt_name": opt_name, "Xdim": image_x, "ydim": image_y, "vqgan_ckpt": vqgan_ckpt, + "clip_model": clip_model, "start_time": run_start_dt.strftime("%Y%m%dT%H%M%S"), "end_time": datetime.datetime.now().strftime("%Y%m%dT%H%M%S"), "mse_weight": mse_weight, @@ -325,7 +411,9 @@ def generate_image( with open(runoutputdir / "details.json", "w") as f: json.dump(details, f, indent=4) - status_text.text("Done!") # End of run + status_text.text("Done!") # End of run + + vid_display_slot.video("temp.mp4") if __name__ == "__main__": @@ -339,7 +427,7 @@ def generate_image( args = parser.parse_args() # Select specific GPU if chosen - if args.gpu is not None: + if args.gpu is not None and args.gpu != "cpu": for i in args.gpu.split(","): assert ( int(i) < torch.cuda.device_count() @@ -350,10 +438,13 @@ def generate_image( except RuntimeError: print(traceback.format_exc()) else: + os.environ["CUDA_VISIBLE_DEVICES"] = "" device = None defaults = OmegaConf.load("defaults.yaml") outputdir = Path("output") + #print (outputdir) + if not outputdir.exists(): outputdir.mkdir() @@ -394,6 +485,31 @@ def generate_image( index=default_weight_index, help="Choose which weights to load, trained on different datasets. Make sure the weights and configs are downloaded to `assets/` as per the README!", ) + + use_clip_model = st.sidebar.checkbox( + "Clip Model", + value=defaults["use_clip_model"], + help="Clip Model to use", + ) + clip_model = st.sidebar.empty() + if use_clip_model is True: + clip_model = clip_model.text_input( + "Clip Model:", + value=defaults["clip_model"], + help="""Model Versions:‏‏‎ + ‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎ + Initially, we’ve released one CLIP model based on the Vision Transformer architecture equivalent to ViT-B/32, + along with the RN50 model, using the architecture equivalent to ResNet-50. + As part of the staged release process, we have also released the RN101 model, as well as RN50x4, + a RN50 scaled up 4x according to the EfficientNet scaling rule. In July 2021, we additionally released the RN50x16 and ViT-B/16 models, + and in January 2022, the RN50x64 and ViT-L/14 models were released. Lastly, the ViT-L/14@336px model was released in April 2022. + ‎‎‎‎‎‎‎‎‎‎‎‎‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ ‎‏‏‎ + Defautl = ViT-B/32 . + Available Models: """ + str(clip.available_models()), + ) + else: + clip_model = "ViT-B/32" + num_steps = st.sidebar.number_input( "Num steps", value=defaults["num_steps"], @@ -430,6 +546,63 @@ def generate_image( else: seed = None + continue_prev_run = st.sidebar.checkbox( + "Continue previous run", + value=defaults["continue_prev_run"], + help="Use existing image and existing weights for the next run. If yes, ignores 'Use starting image'", + ) + + use_cutout_augmentations = st.sidebar.checkbox( + "Use cutout augmentations", + value=True, + help="Adds cutout augmentatinos in the image generation process. Uses up to additional 4 GiB of GPU memory. Greatly improves image quality. Toggled on by default.", + ) + + use_cutn = st.sidebar.checkbox( + "Use Cutn", + value=defaults["use_cutn"], + help="Check to set the number of cuts to pass to CLIP, lower values uses less VRAM, higher values increase the image quality. Will add option to specify the number of cuts", + ) + cutn = st.sidebar.empty() + cut_pow = st.sidebar.empty() + if use_cutn is True: + cutn = cutn.number_input( + "Number of Cuts sent to CLIP", + value=defaults["cutn"], + min_value=1, + step=1, + help="Specify the number of cuts to pass to CLIP, lower values uses less VRAM but higher values increases the image quality", + ) + cut_pow = cut_pow.number_input( + "Cut power.", + value=defaults["cut_pow"], + min_value=0.0001, + step=0.1, + help="Specify the power each cut will have.", + ) + else: + cutn = 32 + cut_pow = 1.0 + + + custom_step_size = st.sidebar.checkbox( + "Custom Step Size/Learing Rate", + value=defaults["custom_step_size"], + help="Customize the Step Size or Learning Rate value.", + ) + step_size = st.sidebar.empty() + if custom_step_size is True: + step_size = step_size.number_input( + "Custom Step Size or Learning Rate", + value=defaults["step_size"], + min_value=0.00001, + step=0.001, + help="Specify a custom Step Size or Learning Rate to use. Ref: https://en.wikipedia.org/wiki/Learning_rate", + format="%.5f", + ) + else: + step_size = 0.05 + use_custom_starting_image = st.sidebar.checkbox( "Use starting image", value=defaults["use_starting_image"], @@ -471,12 +644,39 @@ def generate_image( image_prompts = [Image.open(i).convert("RGB") for i in image_prompts] else: image_prompts = [] - - continue_prev_run = st.sidebar.checkbox( - "Continue previous run", - value=defaults["continue_prev_run"], - help="Use existing image and existing weights for the next run. If yes, ignores 'Use starting image'", + + + use_custom_opt = st.sidebar.checkbox( + "Custom Optimizer", + value=defaults["use_custom_opt"], + help="Use a custom optimizer.", ) + + opt_name = st.sidebar.empty() + if use_custom_opt is True: + optimizer_list = ["Adam","Adadelta","Adagrad","AdamW","Adamax","ASGD","NAdam","RAdam","RMSprop","Rprop","SGD"] + opt_name = st.sidebar.selectbox( + "Optimizer:", optimizer_list, index=optimizer_list.index(defaults["opt_name"]), + help=""" + Defautl = Adam + + List of optimizers: + + Adadelta: Implements Adadelta algorithm. \n + Adagrad: Implements Adagrad algorithm.\n + Adam: Implements Adam algorithm.\n + AdamW: Implements AdamW algorithm.\n + Adamax: Implements Adamax algorithm (a variant of Adam based on infinity norm).\n + ASGD: Implements Averaged Stochastic Gradient Descent.\n + NAdam: Implements NAdam algorithm.\n + RAdam: Implements RAdam algorithm.\n + RMSprop: Implements RMSprop algorithm.\n + Rprop: Implements the resilient backpropagation algorithm.\n + SGD: Implements stochastic gradient descent (optionally with momentum).\n + """, + ) + else: + opt_name = "Adam" use_mse_reg = st.sidebar.checkbox( "Use MSE regularization", @@ -525,9 +725,9 @@ def generate_image( "TV loss weight", value=defaults["tv_loss_weight"], min_value=0.0, - step=1e-4, - help="Set weights for TV loss regularization, which encourages spatial smoothness. Ref: https://github.com/jcjohnson/neural-style/issues/302", - format="%.1e", + step=0.0001, + help="Set weights for TV loss regularization, which encourages spatial smoothness, the lower the value the better the result. Some good values are are 0.000085, 0.0001 or 0.0002 Ref: https://github.com/jcjohnson/neural-style/issues/302", + format="%.6f", ) else: tv_loss_weight = 0 @@ -560,10 +760,11 @@ def generate_image( zoom_factor = zoom_factor_widget.number_input( "Zoom factor", value=1.0, - min_value=0.1, - max_value=10.0, - step=0.02, - format="%.2f", + min_value=-100.0, + max_value=100.0, + step=0.0001, + help="Factor to zoom in each frame, 1 is no zoom, less than 1 is zoom out, more than 1 is zoom in.", + format="%.4f", ) transform_interval = transform_interval_widget.number_input( "Iterations per frame", @@ -579,18 +780,12 @@ def generate_image( zoom_factor = 1 transform_interval = 1 - use_cutout_augmentations = st.sidebar.checkbox( - "Use cutout augmentations", - value=True, - help="Adds cutout augmentatinos in the image generation process. Uses up to additional 4 GiB of GPU memory. Greatly improves image quality. Toggled on by default.", - ) - submitted = st.form_submit_button("Run!") # End of form status_text = st.empty() status_text.text("Pending input prompt") - step_progress_bar = st.progress(0) + step_progress_bar = st.progress(0) im_display_slot = st.empty() vid_display_slot = st.empty() @@ -641,10 +836,14 @@ def generate_image( # Inputs text_input=text_input, vqgan_ckpt=radio, + clip_model=clip_model, num_steps=num_steps, image_x=int(image_x), image_y=int(image_y), seed=int(seed) if set_seed is True else None, + cutn=int(cutn), + step_size=float(step_size), + opt_name=opt_name, init_image=init_image, image_prompts=image_prompts, continue_prev_run=continue_prev_run, @@ -661,5 +860,5 @@ def generate_image( device=device, ) - vid_display_slot.video("temp.mp4") + # debug_slot.write(st.session_state) # DEBUG diff --git a/defaults.yaml b/defaults.yaml index 6633c53..24636f5 100644 --- a/defaults.yaml +++ b/defaults.yaml @@ -1,9 +1,18 @@ # Modify for different systems, e.g. larger default xdim/ydim for more powerful GPUs -num_steps: 500 -Xdim: 640 -ydim: 480 +use_clip_model: false +clip_model: ViT-B/32 +num_steps: -1 +Xdim: 662 +ydim: 360 set_seed: false seed: 0 +use_cutn: false +cutn: 32 +cut_pow: 1.0 +custom_step_size: false +step_size: 0.05 +use_custom_opt: false +opt_name: Adam use_starting_image: false use_image_prompts: false continue_prev_run: false @@ -11,5 +20,6 @@ mse_weight: 0.5 mse_weight_decay: 0.1 mse_weight_decay_steps: 50 use_mse_regularization: false -use_tv_loss_regularization: true -tv_loss_weight: 1e-3 \ No newline at end of file +use_tv_loss_regularization: false +# best values for tv_loss_weight are 0.000085, 0.0001 or 0.0002 +tv_loss_weight: 0.000085 \ No newline at end of file diff --git a/diffusion_app.py b/diffusion_app.py index 795f881..c3f1866 100644 --- a/diffusion_app.py +++ b/diffusion_app.py @@ -100,7 +100,7 @@ def generate_image( frames = [] try: - # Try block catches st.script_runner.StopExecution, no need of a dedicated stop button + # Try block catches st.StopExecution, no need of a dedicated stop button # Reason is st.form is meant to be self-contained either within sidebar, or in main body # The way the form is implemented in this app splits the form across both regions # This is intended to prevent the model settings from crowding the main body @@ -181,7 +181,7 @@ def generate_image( status_text.text("Done!") # End of run - except st.script_runner.StopException as e: + except st.StopException as e: # Dump output to dashboard print(f"Received Streamlit StopException") status_text.text("Execution interruped, dumping outputs ...") diff --git a/environment.yml b/environment.yml index 618063f..615b7eb 100644 --- a/environment.yml +++ b/environment.yml @@ -4,11 +4,11 @@ channels: - conda-forge - defaults dependencies: - - pytorch::pytorch=1.10.0 - - pytorch::torchvision=0.11.1 - - cudatoolkit=10.2 + - pytorch::pytorch=1.12.0 + - pytorch::torchvision=0.13.1 + - cudatoolkit=10.2 # The cudatoolkit library could also be updated to 11.3 but might give some troubles with older GPUs, for an RTX 3050 or higher cudatoolkit=11.3 is recommended. - omegaconf - - pytorch-lightning + - pytorch-lightning=1.5.8 # For compatibility - tqdm - regex - kornia @@ -32,6 +32,8 @@ dependencies: # - imgtag - einops - transformers + - torch-optimizer + - retry - git+https://github.com/openai/CLIP # For guided diffusion - lpips diff --git a/logic.py b/logic.py index 1f774b8..3c112e5 100644 --- a/logic.py +++ b/logic.py @@ -21,8 +21,57 @@ import cv2 import numpy as np import kornia.augmentation as K +from torch_optimizer import DiffGrad, AdamP, RAdam +# Set the optimiser +def get_opt(opt_name, z, opt_lr): + """ + List of optimizers + Adadelta: Implements Adadelta algorithm. + Adagrad: Implements Adagrad algorithm. + Adam: Implements Adam algorithm. + AdamW: Implements AdamW algorithm. + Adamax: Implements Adamax algorithm (a variant of Adam based on infinity norm). + ASGD: Implements Averaged Stochastic Gradient Descent. + NAdam: Implements NAdam algorithm. + RAdam: Implements RAdam algorithm. + RMSprop: Implements RMSprop algorithm. + Rprop: Implements the resilient backpropagation algorithm. + SGD: Implements stochastic gradient descent (optionally with momentum).""" + + if opt_name == "Adam": + opt = optim.Adam([z], lr=opt_lr) + elif opt_name == "AdamW": + opt = optim.AdamW([z], lr=opt_lr) + elif opt_name == "Adagrad": + opt = optim.Adagrad([z], lr=opt_lr) + elif opt_name == "Adamax": + opt = optim.Adamax([z], lr=opt_lr) + elif opt_name == "AdamP": + opt = AdamP([z], lr=opt_lr) + elif opt_name == "Adadelta": + opt = optim.Adadelta([z], lr=opt_lr, eps=1e-9, weight_decay=1e-9) + elif opt_name == "ASGD": + opt = optim.ASGD([z], lr=opt_lr) + elif opt_name == "DiffGrad": + opt = DiffGrad([z], lr=opt_lr, eps=1e-9, weight_decay=1e-9) + elif opt_name == "NAdam": + opt = optim.NAdam([z], lr=opt_lr) + elif opt_name == "RAdam": + opt = RAdam([z], lr=opt_lr) + elif opt_name == "RMSprop": + opt = optim.RMSprop([z], lr=opt_lr) + elif opt_name == "Rprop": + opt = optim.Rprop([z], lr=opt_lr) + elif opt_name == "SGD": + opt = optim.SGD([z], lr=opt_lr) + + else: + print(f"Unknown optimiser: {opt_name} | Are choices broken?") + opt = optim.Adam([z], lr=opt_lr) + return opt + class Run: """ Subclass this to house your own implementation of CLIP-based image generation @@ -63,6 +112,7 @@ def __init__( self, text_input: str = "the first day of the waters", vqgan_ckpt: str = "vqgan_imagenet_f16_16384", + clip_model: str = "ViT-B/32", num_steps: int = 300, image_x: int = 300, image_y: int = 300, @@ -70,10 +120,14 @@ def __init__( image_prompts: List[Image.Image] = [], continue_prev_run: bool = False, seed: Optional[int] = None, + cutn: int = 32, + cut_pow: float = 1.0, + step_size: float = 0.05, + opt_name: str = "Adam", mse_weight=0.5, mse_weight_decay=0.1, mse_weight_decay_steps=50, - tv_loss_weight=1e-3, + tv_loss_weight=0.000085, use_cutout_augmentations: bool = True, # use_augs: bool = True, # noise_fac: float = 0.1, @@ -86,11 +140,12 @@ def __init__( rotation_angle: float = 0, zoom_factor: float = 1, transform_interval: int = 10, - device: Optional[torch.device] = None, + device: Optional[torch.device] = "cpu", ) -> None: super().__init__() self.text_input = text_input self.vqgan_ckpt = vqgan_ckpt + self.clip_model = clip_model self.num_steps = num_steps self.image_x = image_x self.image_y = image_y @@ -98,6 +153,11 @@ def __init__( self.image_prompts = image_prompts self.continue_prev_run = continue_prev_run self.seed = seed + self.cutn = cutn + self.cut_pow = cut_pow + self.step_size = step_size + self.opt_name = opt_name + self.device = device # Setup ------------------------------------------------------------------------------ # Split text by "|" symbol @@ -115,22 +175,26 @@ def __init__( init_image=init_image, init_weight=mse_weight, # clip.available_models() - # ['RN50', 'RN101', 'RN50x4', 'ViT-B/32'] + # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14', 'ViT-L/14@336px'] # Visual Transformer seems to be the smallest - clip_model="ViT-B/32", + clip_model=clip_model, vqgan_config=f"assets/{vqgan_ckpt}.yaml", vqgan_checkpoint=f"assets/{vqgan_ckpt}.ckpt", - step_size=0.05, - cutn=64, - cut_pow=1.0, + cutn=cutn, + cut_pow=cut_pow, + step_size=step_size, + opt_name=opt_name, display_freq=50, seed=seed, + device=device, ) - if device is None: - self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + if device is None or device == "cpu": + #self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + device = torch.device("cpu") else: - self.device = device + #self.device = device + self.device = torch.device(f"{device}" if torch.cuda.is_available() else "cpu") print("Using device:", device) @@ -155,6 +219,8 @@ def __init__( self.rotation_angle = rotation_angle self.zoom_factor = zoom_factor self.transform_interval = transform_interval + + def load_model( self, prev_model: nn.Module = None, prev_perceptor: nn.Module = None @@ -214,6 +280,7 @@ def model_init(self, init_image: Image.Image = None) -> None: None, :, None, None ] + if self.seed is not None: torch.manual_seed(self.seed) else: @@ -239,7 +306,8 @@ def model_init(self, init_image: Image.Image = None) -> None: self.z = self.z.view([-1, toksY, toksX, e_dim]).permute(0, 3, 1, 2) self.z_orig = self.z.clone() self.z.requires_grad_(True) - self.opt = optim.Adam([self.z], lr=self.args.step_size) + #self.opt = optim.Adam([self.z], lr=self.args.step_size) + self.opt = get_opt(self.opt_name, self.z, self.args.step_size) self.normalize = transforms.Normalize( mean=[0.48145466, 0.4578275, 0.40821073], @@ -315,11 +383,14 @@ def _ascend_txt(self) -> List: result[f"prompt_loss_{count}"] = prompt(iii) return result - + def iterate(self) -> Tuple[List[float], Image.Image]: if not self.use_scrolling_zooming: # Forward prop self.opt.zero_grad() + #for param in self.model.parameters(): + #param.grad = None + losses = self._ascend_txt() # Grab an image @@ -377,11 +448,16 @@ def iterate(self) -> Tuple[List[float], Image.Image]: TF.to_tensor(transformed_im).to(self.device).unsqueeze(0) * 2 - 1 ) self.z.requires_grad_(True) - self.opt = optim.Adam([self.z], lr=self.args.step_size) + + #self.opt = optim.Adam([self.z], lr=self.args.step_size) + self.opt = get_opt(self.opt_name, self.z, self.args.step_size) for _ in range(self.transform_interval): # Forward prop self.opt.zero_grad() + #for param in self.model.parameters(): + #param.grad = None + losses = self._ascend_txt() # Grab an image @@ -393,9 +469,13 @@ def iterate(self) -> Tuple[List[float], Image.Image]: self.opt.step() with torch.no_grad(): self.z.copy_(self.z.maximum(self.z_min).minimum(self.z_max)) - + # Advance iteration counter self.iterate_counter += 1 + + for param_group in self.opt.param_groups: + #print (param_group) + print (f"Learning Rate: {param_group['lr']}") print( f"Step {self.iterate_counter} losses: {[(i, j.item()) for i, j in losses.items()]}"