Skip to content

Commit

Permalink
Fix sdxl inference and add compatible changes for launcher support (#…
Browse files Browse the repository at this point in the history
…9995)

* Fix open_clip_torch version

Signed-off-by: Mingyuan Ma <[email protected]>

* Update sdxl inference scripts

Signed-off-by: Mingyuan Ma <[email protected]>

* Apply isort and black reformatting

Signed-off-by: Victor49152 <[email protected]>

* set ddp to false

Signed-off-by: Mingyuan Ma <[email protected]>

---------

Signed-off-by: Mingyuan Ma <[email protected]>
Signed-off-by: Victor49152 <[email protected]>
Co-authored-by: Victor49152 <[email protected]>
  • Loading branch information
2 people authored and monica-sekoyan committed Oct 11, 2024
1 parent b2f77e4 commit b50b30e
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ model:
resume_from_checkpoint: null # manually set the checkpoint file to load from
apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
ddp_overlap: False # True for using PyTorch DDP overlap.

optim:
name: fused_adam
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ def model_cfg_modifier(model_cfg):
model_cfg.precision = cfg.trainer.precision
model_cfg.ckpt_path = None
model_cfg.inductor = False
model_cfg.unet_config.from_pretrained = "/opt/nemo-aligner/checkpoints/sdxl/unet_nemo.ckpt"
model_cfg.unet_config.from_NeMo = True
model_cfg.first_stage_config.from_pretrained = "/opt/nemo-aligner/checkpoints/sdxl/vae_nemo.ckpt"
model_cfg.first_stage_config.from_NeMo = True
# model_cfg.unet_config.from_pretrained = "/opt/nemo-aligner/checkpoints/sdxl/unet_nemo.ckpt"
# model_cfg.unet_config.from_NeMo = True
# model_cfg.first_stage_config.from_pretrained = "/opt/nemo-aligner/checkpoints/sdxl/vae_nemo.ckpt"
# model_cfg.first_stage_config.from_NeMo = True
model_cfg.first_stage_config._target_ = 'nemo.collections.multimodal.models.text_to_image.stable_diffusion.ldm.autoencoder.AutoencoderKLInferenceWrapper'
# model_cfg.fsdp = True

Expand Down
62 changes: 31 additions & 31 deletions nemo/collections/multimodal/parts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,41 +303,41 @@ def setup_trainer_and_model_for_inference(

# Create the NLPSaveRestoreConnector object for model saving and restoring.
save_restore_connector = NLPSaveRestoreConnector()
if cfg.model.restore_from_path is not None:
if cfg.model.restore_from_path.endswith(".nemo") or os.path.isdir(cfg.model.restore_from_path):
# Set the model_extracted_dir attribute if the restore path is a directory.
if os.path.isdir(cfg.model.restore_from_path):
save_restore_connector.model_extracted_dir = cfg.model.restore_from_path

if cfg.model.restore_from_path.endswith(".nemo") or os.path.isdir(cfg.model.restore_from_path):
# Set the model_extracted_dir attribute if the restore path is a directory.
if os.path.isdir(cfg.model.restore_from_path):
save_restore_connector.model_extracted_dir = cfg.model.restore_from_path

# Restore the model configuration from the specified path and modify it for inference.
model_cfg = model_provider.restore_from(
restore_path=cfg.model.restore_from_path,
trainer=trainer,
save_restore_connector=save_restore_connector,
return_config=True,
)
with open_dict(model_cfg):
model_cfg_modifier(model_cfg) # modify the configuration for inference
# Restore the model configuration from the specified path and modify it for inference.
model_cfg = model_provider.restore_from(
restore_path=cfg.model.restore_from_path,
trainer=trainer,
save_restore_connector=save_restore_connector,
return_config=True,
)
with open_dict(model_cfg):
model_cfg_modifier(model_cfg) # modify the configuration for inference

# Restore the model from the specified path and configuration, and set it up for inference.
model = model_provider.restore_from(
restore_path=cfg.model.restore_from_path,
trainer=trainer,
override_config_path=model_cfg,
save_restore_connector=save_restore_connector,
strict=True,
)
# Restore the model from the specified path and configuration, and set it up for inference.
model = model_provider.restore_from(
restore_path=cfg.model.restore_from_path,
trainer=trainer,
override_config_path=model_cfg,
save_restore_connector=save_restore_connector,
strict=True,
)

elif cfg.model.restore_from_path.endswith(".ckpt"):
logging.warning(
"Loading from .ckpt checkpoint for inference is experimental! It doesn't support models with model parallelism!"
)
elif cfg.model.restore_from_path.endswith(".ckpt"):
logging.warning(
"Loading from .ckpt checkpoint for inference is experimental! It doesn't support models with model parallelism!"
)

model = model_provider.load_from_checkpoint(
cfg.model.restore_from_path,
hparams_file=cfg.model.get("hparams_file"),
trainer=trainer,
)
model = model_provider.load_from_checkpoint(
cfg.model.restore_from_path,
hparams_file=cfg.model.get("hparams_file"),
trainer=trainer,
)

else:
# load a model from scratch
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements_multimodal.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ einops_exts
imageio
kornia
nerfacc>=0.5.3
open_clip_torch
open_clip_torch==2.24.0
PyMCubes
taming-transformers
torchdiffeq
Expand Down

0 comments on commit b50b30e

Please sign in to comment.