Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data engine #37

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added builder/__init__.py
Empty file.
91 changes: 91 additions & 0 deletions builder/builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Copyright 2023 Haotian Liu
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from transformers import BitsAndBytesConfig
import torch


class ModelBuilder:
@classmethod
def judge_able_to_build(cls, model_name: str) -> bool:
"""
Judge if the model can be built by this builder.
Args:
model_name: The name of the model.

Returns:
bool: True if the model can be built by this builder.
"""
raise NotImplementedError

@classmethod
def build(cls, model_path, model_base, model_name, **kwargs):
"""
Build the model.
Returns:
tokenizer: The tokenizer of the model.
model: The model. This one must be returned. Otherwise, an error will be raised.
image_processor: The image processor.
"""
raise NotImplementedError


def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, load_4bit=False, device_map="auto",
device="cuda", use_flash_attn=False, **kwargs):
kwargs = {"device_map": device_map, **kwargs}

if device != "cuda":
kwargs['device_map'] = {"": device}

if load_8bit:
kwargs['load_in_8bit'] = True
elif load_4bit:
kwargs['load_in_4bit'] = True
kwargs['quantization_config'] = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type='nf4'
)
else:
kwargs['torch_dtype'] = torch.float16

if use_flash_attn:
kwargs['attn_implementation'] = 'flash_attention_2'

# import here to avoid circular import
from .llava_builder import LLaVABuilder
from .omnillm_builder import OmniLLMBuilder
from .language_model_builder import LanguageModelBuilder

# Note: please put LanguageModelBuilder at the end of the list if you want you add your own builder
model_builder_list = [LLaVABuilder, OmniLLMBuilder, LanguageModelBuilder]

tokenizer, model, image_processor = None, None, None
for builder in model_builder_list:
if builder.judge_able_to_build(model_name):
tokenizer, model, image_processor = builder.build(model_path, model_base, model_name, **kwargs)
break

if model is None:
raise ValueError(f"Cannot find a suitable builder for model {model_name}\n Please check whether the model name\
is correct. If the model you use is not supported by default, please implement a new builder and add to the \
model_builder_list in the file RLAIF-V/builder/builder.py")

if hasattr(model.config, "max_sequence_length"):
context_len = model.config.max_sequence_length
else:
context_len = 2048

return tokenizer, model, image_processor, context_len
43 changes: 43 additions & 0 deletions builder/language_model_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from builder.builder import ModelBuilder
import warnings

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch


class LanguageModelBuilder(ModelBuilder):
"""
**Note**: Please put this class at the end of the model builder list.
This model builder is a fallback builder for all language models.
It returns no image processor.
"""

@classmethod
def judge_able_to_build(cls, model_name: str) -> bool:
return True

@classmethod
def build(cls, model_path, model_base, model_name, **kwargs):
warnings.warn(
"Warning: LanguageModel is the fall back model. Please make sure you are loading the correct model.")
if model_base is not None:
# PEFT model
from peft import PeftModel
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, **kwargs)
print(f"Loading LoRA weights from {model_path}")
model = PeftModel.from_pretrained(model, model_path)
print(f"Merging weights")
model = model.merge_and_unload()
print('Convert to FP16...')
model.to(torch.float16)
else:
if 'mpt' in model_name.lower():
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, trust_remote_code=True,
**kwargs)
else:
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)

return tokenizer, model, None
114 changes: 35 additions & 79 deletions llava/model/builder.py → builder/llava_builder.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,27 @@
# Copyright 2023 Haotian Liu
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import os
from builder.builder import ModelBuilder
import warnings
import os
import shutil

from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, BitsAndBytesConfig
from transformers import AutoTokenizer, AutoConfig
import torch
from llava.model import *
from llava.constants import DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN


def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, load_4bit=False, device_map="auto", device="cuda", use_flash_attn=False, **kwargs):
kwargs = {"device_map": device_map, **kwargs}

if device != "cuda":
kwargs['device_map'] = {"": device}
class LLaVABuilder(ModelBuilder):
@classmethod
def judge_able_to_build(cls, model_name: str) -> bool:
lower_name = model_name.lower()
return 'llava' in lower_name or ('rlaif' in lower_name and '7b' in lower_name)

if load_8bit:
kwargs['load_in_8bit'] = True
elif load_4bit:
kwargs['load_in_4bit'] = True
kwargs['quantization_config'] = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type='nf4'
)
else:
kwargs['torch_dtype'] = torch.float16

if use_flash_attn:
kwargs['attn_implementation'] = 'flash_attention_2'

if 'llava' in model_name.lower():
# Load LLaVA model
@classmethod
def build(cls, model_path, model_base, model_name, **kwargs):
if 'lora' in model_name.lower() and model_base is None:
warnings.warn('There is `lora` in model name but no `model_base` is provided. If you are loading a LoRA model, please provide the `model_base` argument. Detailed instruction: https://github.com/haotian-liu/LLaVA#launch-a-model-worker-lora-weights-unmerged.')
warnings.warn(
'There is `lora` in model name but no `model_base` is provided. If you are loading a LoRA model, \
please provide the `model_base` argument. Detailed instruction: \
https://github.com/haotian-liu/LLaVA#launch-a-model-worker-lora-weights-unmerged.')
if 'lora' in model_name.lower() and model_base is not None:
from llava.model.language_model.llava_llama import LlavaConfig
lora_cfg_pretrained = LlavaConfig.from_pretrained(model_path)
Expand All @@ -57,12 +30,15 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
model = LlavaLlamaForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=lora_cfg_pretrained, **kwargs)
token_num, tokem_dim = model.lm_head.out_features, model.lm_head.in_features
if model.lm_head.weight.shape[0] != token_num:
model.lm_head.weight = torch.nn.Parameter(torch.empty(token_num, tokem_dim, device=model.device, dtype=model.dtype))
model.model.embed_tokens.weight = torch.nn.Parameter(torch.empty(token_num, tokem_dim, device=model.device, dtype=model.dtype))
model.lm_head.weight = torch.nn.Parameter(
torch.empty(token_num, tokem_dim, device=model.device, dtype=model.dtype))
model.model.embed_tokens.weight = torch.nn.Parameter(
torch.empty(token_num, tokem_dim, device=model.device, dtype=model.dtype))

print('Loading additional LLaVA weights...')
if os.path.exists(os.path.join(model_path, 'non_lora_trainables.bin')):
non_lora_trainables = torch.load(os.path.join(model_path, 'non_lora_trainables.bin'), map_location='cpu')
non_lora_trainables = (
torch.load(os.path.join(model_path, 'non_lora_trainables.bin'), map_location='cpu'))
else:
# this is probably from HF Hub
from huggingface_hub import hf_hub_download
Expand All @@ -72,10 +48,15 @@ def load_from_hf(repo_id, filename, subfolder=None):
filename=filename,
subfolder=subfolder)
return torch.load(cache_file, map_location='cpu')

non_lora_trainables = load_from_hf(model_path, 'non_lora_trainables.bin')
non_lora_trainables = {(k[11:] if k.startswith('base_model.') else k): v for k, v in non_lora_trainables.items()}
non_lora_trainables = {
(k[11:] if k.startswith('base_model.') else k): v for k, v in non_lora_trainables.items()
}
if any(k.startswith('model.model.') for k in non_lora_trainables):
non_lora_trainables = {(k[6:] if k.startswith('model.') else k): v for k, v in non_lora_trainables.items()}
non_lora_trainables = {
(k[6:] if k.startswith('model.') else k): v for k, v in non_lora_trainables.items()
}
model.load_state_dict(non_lora_trainables, strict=False)

from peft import PeftModel
Expand All @@ -89,14 +70,17 @@ def load_from_hf(repo_id, filename, subfolder=None):
print('Loading LLaVA from base model...')
if 'mpt' in model_name.lower():
if not os.path.isfile(os.path.join(model_path, 'configuration_mpt.py')):
shutil.copyfile(os.path.join(model_base, 'configuration_mpt.py'), os.path.join(model_path, 'configuration_mpt.py'))
shutil.copyfile(os.path.join(model_base, 'configuration_mpt.py'),
os.path.join(model_path, 'configuration_mpt.py'))
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=True)
cfg_pretrained = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
model = LlavaMptForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=cfg_pretrained, **kwargs)
model = LlavaMptForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=cfg_pretrained,
**kwargs)
else:
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
cfg_pretrained = AutoConfig.from_pretrained(model_path)
model = LlavaLlamaForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=cfg_pretrained, **kwargs)
model = LlavaLlamaForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=cfg_pretrained,
**kwargs)

mm_projector_weights = torch.load(os.path.join(model_path, 'mm_projector.bin'), map_location='cpu')
mm_projector_weights = {k: v.to(torch.float16) for k, v in mm_projector_weights.items()}
Expand All @@ -119,31 +103,7 @@ def load_from_hf(repo_id, filename, subfolder=None):
low_cpu_mem_usage=True,
**kwargs
)
else:
# Load language model
if model_base is not None:
# PEFT model
from peft import PeftModel
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, **kwargs)
print(f"Loading LoRA weights from {model_path}")
model = PeftModel.from_pretrained(model, model_path)
print(f"Merging weights")
model = model.merge_and_unload()
print('Convert to FP16...')
model.to(torch.float16)
else:
use_fast = False
if 'mpt' in model_name.lower():
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, trust_remote_code=True, **kwargs)
else:
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)

image_processor = None

if 'llava' in model_name.lower():
mm_use_im_start_end = getattr(model.config, "mm_use_im_start_end", False)
mm_use_im_patch_token = getattr(model.config, "mm_use_im_patch_token", True)
if mm_use_im_patch_token:
Expand All @@ -153,15 +113,11 @@ def load_from_hf(repo_id, filename, subfolder=None):
model.resize_token_embeddings(len(tokenizer))

vision_tower = model.get_vision_tower()
device_map = kwargs.get('device_map', 'auto')
if not vision_tower.is_loaded:
vision_tower.load_model(device_map=device_map)
if device_map != 'auto':
vision_tower.to(device=device_map[''], dtype=torch.float16)
image_processor = vision_tower.image_processor

if hasattr(model.config, "max_sequence_length"):
context_len = model.config.max_sequence_length
else:
context_len = 2048

return tokenizer, model, image_processor, context_len
return tokenizer, model, image_processor
47 changes: 47 additions & 0 deletions builder/omnillm_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from builder.builder import ModelBuilder

from transformers import AutoTokenizer
import torch
from omnilmm.constants import DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
from omnilmm.model.omnilmm import OmniLMMForCausalLM
from omnilmm.model.utils import build_transform


class OmniLLMBuilder(ModelBuilder):
@classmethod
def judge_able_to_build(cls, model_name: str) -> bool:
lower_name = model_name.lower()
return 'omnilmm' in lower_name or ('rlaif' in lower_name and '12b' in lower_name)

@classmethod
def build(cls, model_path, _, model_name, **kwargs):
tokenizer = AutoTokenizer.from_pretrained(model_path, model_max_length=2048)

if False:
# model on multiple devices for small size gpu memory (Nvidia 3090 24G x2)
with init_empty_weights():
model = OmniLMMForCausalLM.from_pretrained(model_name, tune_clip=True, torch_dtype=torch.bfloat16)
model = load_checkpoint_and_dispatch(model, model_name, dtype=torch.bfloat16,
device_map="auto",
no_split_module_classes=['Eva', 'MistralDecoderLayer', 'ModuleList',
'Resampler']
)
else:
model = OmniLMMForCausalLM.from_pretrained(
model_path, tune_clip=True, torch_dtype=torch.bfloat16
).to(device='cuda', dtype=torch.bfloat16)

img_processor = build_transform(
is_train=False, input_size=model.model.config.image_size, std_mode='OPENAI_CLIP')
mm_use_im_start_end = getattr(model.config, "mm_use_im_start_end", False)

tokenizer.add_tokens([DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN,
DEFAULT_IM_END_TOKEN], special_tokens=True)
vision_config = model.model.vision_config
vision_config.im_patch_token = tokenizer.convert_tokens_to_ids(
[DEFAULT_IMAGE_PATCH_TOKEN])[0]
vision_config.use_im_start_end = mm_use_im_start_end
vision_config.im_start_token, vision_config.im_end_token = tokenizer.convert_tokens_to_ids(
[DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN])

return tokenizer, model, img_processor
Loading