From bd0c110a13f247ca09fcd2e20d3c5925aa5c0ad0 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Thu, 21 Nov 2024 20:00:50 -0500 Subject: [PATCH 1/6] refine Signed-off-by: n1ck-guo --- auto_round/script/mllm.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py index d3fbb399..953070fa 100644 --- a/auto_round/script/mllm.py +++ b/auto_round/script/mllm.py @@ -171,8 +171,8 @@ def __init__(self, *args, **kwargs): self.add_argument("--pack", action='store_true', help="a video may associate with multiple questions, if pack==True," " will ask all questions for a video in a single") - self.add_argument("--use-subtitle", action='store_true') - self.add_argument("--fps", type=float, default=-1) + self.add_argument("--fps", type=float, default=-1, + help="set the fps for a video.") # Work Dir # Infer + Eval or Infer Only self.add_argument("--mode", type=str, default='all', choices=['all', 'infer'], @@ -183,14 +183,16 @@ def __init__(self, *args, **kwargs): # API Kwargs, Apply to API VLMs and Judge API LLMs self.add_argument('--retry', type=int, default=None, help='retry numbers for API VLMs') # Explicitly Set the Judge Model - self.add_argument('--judge', type=str, default=None) + self.add_argument('--judge', type=str, default=None, + help="whether is a judge model.") # Logging Utils - self.add_argument('--verbose', action='store_true') + self.add_argument('--verbose', action='store_true', + help="whether to display verbose information.") # Configuration for Resume # Ignore: will not rerun failed VLM inference self.add_argument('--ignore', action='store_true', help='ignore failed indices. ') # Rerun: will remove all evaluation temp files - self.add_argument('--rerun', action='store_true') + self.add_argument('--rerun', action='store_true', help="If true, will remove all evvaluation temp files and rerun.") def setup_parser(): From c105dc0b84bf6b9fbc4d9c1cb4ea513e4f722388 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Thu, 21 Nov 2024 20:37:08 -0500 Subject: [PATCH 2/6] split args Signed-off-by: n1ck-guo --- auto_round/__main__.py | 20 ++++++--- auto_round/mllm/autoround_mllm.py | 3 -- auto_round/script/mllm.py | 74 +++++++++++++++++-------------- test/test_basic_usage.py | 13 ++++++ 4 files changed, 67 insertions(+), 43 deletions(-) diff --git a/auto_round/__main__.py b/auto_round/__main__.py index 53490a01..17691ab4 100644 --- a/auto_round/__main__.py +++ b/auto_round/__main__.py @@ -33,11 +33,17 @@ def run_fast(): def run_mllm(): - from auto_round.script.mllm import setup_parser, tune, eval - args = setup_parser() - if args.eval: + if "--eval" in sys.argv: + from auto_round.script.mllm import setup_lmeval_parser, eval + sys.argv.remove("--eval") + args = setup_lmeval_parser() eval(args) + elif "--lmms" in sys.argv: + sys.argv.remove("--lmms") + run_lmms() else: + from auto_round.script.mllm import setup_parser, tune + args = setup_parser() tune(args) def run_lmms(): @@ -49,10 +55,10 @@ def run_lmms(): lmms_eval(args) def switch(): - if "--lmms" in sys.argv: - sys.argv.remove("--lmms") - run_lmms() - elif "--mllm" in sys.argv: + # if "--lmms" in sys.argv: + # sys.argv.remove("--lmms") + # run_lmms() + if "--mllm" in sys.argv: sys.argv.remove("--mllm") run_mllm() else: diff --git a/auto_round/mllm/autoround_mllm.py b/auto_round/mllm/autoround_mllm.py index 1f23cf4a..f2b57152 100644 --- a/auto_round/mllm/autoround_mllm.py +++ b/auto_round/mllm/autoround_mllm.py @@ -208,9 +208,6 @@ def __init__( enable_torch_compile=enable_torch_compile, **kwargs, ) - - - def calib(self, nsamples, bs): diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py index 953070fa..81139354 100644 --- a/auto_round/script/mllm.py +++ b/auto_round/script/mllm.py @@ -160,39 +160,7 @@ def __init__(self, *args, **kwargs): self.add_argument("--to_quant_block_names", default=None, type=str, help="Names of quantitative blocks, please use commas to separate them.") - ## ======================= VLM eval======================= - self.add_argument("--tasks", type=str, - default="MMBench_DEV_EN_V11,ScienceQA_VAL,TextVQA_VAL,POPE", - help="eval tasks for VLMEvalKit.") - # Args that only apply to Video Dataset - self.add_argument("--nframe", type=int, default=8, - help="the number of frames to sample from a video," - " only applicable to the evaluation of video benchmarks.") - self.add_argument("--pack", action='store_true', - help="a video may associate with multiple questions, if pack==True," - " will ask all questions for a video in a single") - self.add_argument("--fps", type=float, default=-1, - help="set the fps for a video.") - # Work Dir - # Infer + Eval or Infer Only - self.add_argument("--mode", type=str, default='all', choices=['all', 'infer'], - help="when mode set to 'all', will perform both inference and evaluation;" - " when set to 'infer' will only perform the inference.") - self.add_argument('--eval_data_dir', type=str, default=None, - help='path for VLMEvalKit to store the eval data. Default will store in ~/LMUData') - # API Kwargs, Apply to API VLMs and Judge API LLMs - self.add_argument('--retry', type=int, default=None, help='retry numbers for API VLMs') - # Explicitly Set the Judge Model - self.add_argument('--judge', type=str, default=None, - help="whether is a judge model.") - # Logging Utils - self.add_argument('--verbose', action='store_true', - help="whether to display verbose information.") - # Configuration for Resume - # Ignore: will not rerun failed VLM inference - self.add_argument('--ignore', action='store_true', help='ignore failed indices. ') - # Rerun: will remove all evaluation temp files - self.add_argument('--rerun', action='store_true', help="If true, will remove all evvaluation temp files and rerun.") + def setup_parser(): @@ -217,6 +185,46 @@ def setup_parser(): return args +def setup_lmeval_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", "--model_name", "--model_name_or_path", + help="model name or path") + parser.add_argument("--tasks", type=str, + default="MMBench_DEV_EN_V11,ScienceQA_VAL,TextVQA_VAL,POPE", + help="eval tasks for VLMEvalKit.") + # Args that only apply to Video Dataset + parser.add_argument("--nframe", type=int, default=8, + help="the number of frames to sample from a video," + " only applicable to the evaluation of video benchmarks.") + parser.add_argument("--pack", action='store_true', + help="a video may associate with multiple questions, if pack==True," + " will ask all questions for a video in a single") + parser.add_argument("--fps", type=float, default=-1, + help="set the fps for a video.") + # Work Dir + # Infer + Eval or Infer Only + parser.add_argument("--mode", type=str, default='all', choices=['all', 'infer'], + help="when mode set to 'all', will perform both inference and evaluation;" + " when set to 'infer' will only perform the inference.") + parser.add_argument('--eval_data_dir', type=str, default=None, + help='path for VLMEvalKit to store the eval data. Default will store in ~/LMUData') + # API Kwargs, Apply to API VLMs and Judge API LLMs + parser.add_argument('--retry', type=int, default=None, help='retry numbers for API VLMs') + # Explicitly Set the Judge Model + parser.add_argument('--judge', type=str, default=None, + help="whether is a judge model.") + # Logging Utils + parser.add_argument('--verbose', action='store_true', + help="whether to display verbose information.") + # Configuration for Resume + # Ignore: will not rerun failed VLM inference + parser.add_argument('--ignore', action='store_true', help='ignore failed indices. ') + # Rerun: will remove all evaluation temp files + parser.add_argument('--rerun', action='store_true', help="if true, will remove all evaluation temp files and rerun.") + args = parser.parse_args() + return args + + def tune(args): if args.format is None: args.format = "auto_round" diff --git a/test/test_basic_usage.py b/test/test_basic_usage.py index 6d28801d..78b0a4f4 100644 --- a/test/test_basic_usage.py +++ b/test/test_basic_usage.py @@ -32,11 +32,24 @@ def test_auto_round_cmd(self): # test mllm script + # test auto_round_mllm help res = os.system( f"cd .. && {python_path} -m auto_round --mllm -h") if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" + # test auto_round_mllm --eval help + res = os.system( + f"cd .. && {python_path} -m auto_round --mllm --eval -h") + if res > 0 or res == -1: + assert False, "cmd line test fail, please have a check" + + # test auto_round_mllm --lmms help + res = os.system( + f"cd .. && {python_path} -m auto_round --mllm --lmms -h") + if res > 0 or res == -1: + assert False, "cmd line test fail, please have a check" + res = os.system( f"cd .. && {python_path} -m auto_round --mllm --iter 2 --nsamples 10 --format auto_round --output_dir ./saved") if res > 0 or res == -1: From 1c7ac268052c8e96addb3ee9e5d387945388c9b3 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Thu, 21 Nov 2024 20:40:59 -0500 Subject: [PATCH 3/6] fix Signed-off-by: n1ck-guo --- auto_round/script/mllm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py index 81139354..97557e64 100644 --- a/auto_round/script/mllm.py +++ b/auto_round/script/mllm.py @@ -221,6 +221,8 @@ def setup_lmeval_parser(): parser.add_argument('--ignore', action='store_true', help='ignore failed indices. ') # Rerun: will remove all evaluation temp files parser.add_argument('--rerun', action='store_true', help="if true, will remove all evaluation temp files and rerun.") + parser.add_argument("--output_dir", default="./eval_result", type=str, + help="the directory to save quantized model") args = parser.parse_args() return args @@ -410,7 +412,6 @@ def eval(args): data_store_dir=args.eval_data_dir, dataset=args.tasks, pack=args.pack, - use_subtitle=args.use_subtitle, fps=args.fps, nframe=args.nframe, rerun=args.rerun, @@ -429,7 +430,7 @@ def setup_lmms_parser(): default="pope,textvqa_val,scienceqa,mmbench_en", help="To get full list of tasks, use the command lmms-eval --tasks list", ) - parser.add_argument("--output_dir", default="./tmp_autoround", type=str, + parser.add_argument("--output_dir", default="./eval_result", type=str, help="the directory to save quantized model") parser.add_argument( "--num_fewshot", From b35e384f7cc91844b2087e0d1468d4b2456a6220 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Thu, 21 Nov 2024 21:07:26 -0500 Subject: [PATCH 4/6] pylint Signed-off-by: n1ck-guo --- auto_round/script/mllm.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py index 97557e64..612fcaec 100644 --- a/auto_round/script/mllm.py +++ b/auto_round/script/mllm.py @@ -218,9 +218,11 @@ def setup_lmeval_parser(): help="whether to display verbose information.") # Configuration for Resume # Ignore: will not rerun failed VLM inference - parser.add_argument('--ignore', action='store_true', help='ignore failed indices. ') + parser.add_argument('--ignore', action='store_true', + help='ignore failed indices. ') # Rerun: will remove all evaluation temp files - parser.add_argument('--rerun', action='store_true', help="if true, will remove all evaluation temp files and rerun.") + parser.add_argument('--rerun', action='store_true', + help="if true, will remove all evaluation temp files and rerun.") parser.add_argument("--output_dir", default="./eval_result", type=str, help="the directory to save quantized model") args = parser.parse_args() @@ -277,8 +279,9 @@ def tune(args): processor, image_processor = None, None if "llava" in model_name: from llava.model.builder import load_pretrained_model # pylint: disable=E0401 - tokenizer, model, image_processor, _ = load_pretrained_model(model_name, model_base=None, model_name=model_name, - torch_dtype=torch_dtype) + tokenizer, model, image_processor, _ = load_pretrained_model( + model_name, model_base=None, model_name=model_name, + torch_dtype=torch_dtype) model_type = "llava" else: config = AutoConfig.from_pretrained(model_name, trust_remote_code=not args.disable_trust_remote_code) From 0661414123f4c2658813c12cb5ad15f95c173460 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Fri, 22 Nov 2024 00:29:19 -0500 Subject: [PATCH 5/6] modify api of processor Signed-off-by: n1ck-guo --- auto_round/mllm/README.md | 3 +-- auto_round/mllm/autoround_mllm.py | 3 ++- auto_round/mllm/processor.py | 5 +++-- auto_round/mllm/template.py | 4 ++-- auto_round/script/mllm.py | 3 +-- test/test_mllm.py | 9 ++++----- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/auto_round/mllm/README.md b/auto_round/mllm/README.md index cd342f70..622b3000 100644 --- a/auto_round/mllm/README.md +++ b/auto_round/mllm/README.md @@ -22,14 +22,13 @@ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoTok model_name = "Qwen/Qwen2-VL-2B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=trust_remote_code) -tokenizer.processor = processor model = Qwen2VLForConditionalGeneration.from_pretrained( model_name, trust_remote_code=True) dataset = "/path/to/llava.json" extra_data_dir = "/path/to/images/dir" bits, group_size = 4, 128 -autoround = AutoRoundMLLM(model, tokenizer, bits=bits, group_size=group_size, dataset=dataset, extra_data_dir=extra_data_dir) +autoround = AutoRoundMLLM(model, tokenizer, processor=processor, bits=bits, group_size=group_size, dataset=dataset, extra_data_dir=extra_data_dir) autoround.quantize() output_dir = "./tmp_autoround" diff --git a/auto_round/mllm/autoround_mllm.py b/auto_round/mllm/autoround_mllm.py index f2b57152..bfcb35c0 100644 --- a/auto_round/mllm/autoround_mllm.py +++ b/auto_round/mllm/autoround_mllm.py @@ -97,6 +97,7 @@ def __init__( self, model, tokenizer, + processor = None, image_processor = None, bits: int = 4, group_size: int = 128, @@ -144,7 +145,7 @@ def __init__( self.image_processor = image_processor self.template = template if template is not None else model.config.model_type self.template = get_template( - self.template, model=model, tokenizer=tokenizer, image_processor=image_processor) + self.template, model=model, tokenizer=tokenizer, processor=processor, image_processor=image_processor) dataset = self.template.default_dataset if dataset is None else dataset from ..calib_dataset import CALIB_DATASETS diff --git a/auto_round/mllm/processor.py b/auto_round/mllm/processor.py index 7de74d7f..88abbc0b 100644 --- a/auto_round/mllm/processor.py +++ b/auto_round/mllm/processor.py @@ -30,9 +30,10 @@ class BasicProcessor: def __init__(self): pass - def post_init(self, model, tokenizer, image_processor=None, **kwargs): + def post_init(self, model, tokenizer, processor=None, image_processor=None, **kwargs): self.model = model self.tokenizer = tokenizer + self.processor = processor if image_processor is not None: self.image_processor = image_processor else: @@ -73,7 +74,7 @@ def get_input( if truncation is True and truncation_strategy == "text": text = self.tokenizer.decode(self.tokenizer(text).input_ids[:max_length]) - ret = self.tokenizer.processor( + ret = self.processor( text=text, images=images, return_tensors=return_tensors, diff --git a/auto_round/mllm/template.py b/auto_round/mllm/template.py index 16bb4e21..d6a24dcb 100644 --- a/auto_round/mllm/template.py +++ b/auto_round/mllm/template.py @@ -144,7 +144,7 @@ def _load_preset_template(): _load_preset_template() -def get_template(template_or_path: str, model=None, tokenizer=None, image_processor=None): +def get_template(template_or_path: str, model=None, tokenizer=None, processor=None, image_processor=None): """Get template by template name or from a json file. Args: @@ -163,6 +163,6 @@ def get_template(template_or_path: str, model=None, tokenizer=None, image_proces logger.warning(f"Unable to recognize {template_or_path}, using default template instead.") template = TEMPLATES["default"] - template.processor.post_init(model=model, tokenizer=tokenizer, image_processor=image_processor) + template.processor.post_init(model=model, tokenizer=tokenizer, processor=processor, image_processor=image_processor) return template \ No newline at end of file diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py index 612fcaec..75ea4722 100644 --- a/auto_round/script/mllm.py +++ b/auto_round/script/mllm.py @@ -287,7 +287,6 @@ def tune(args): config = AutoConfig.from_pretrained(model_name, trust_remote_code=not args.disable_trust_remote_code) tokenizer = AutoTokenizer.from_pretrained(model_name) processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=not args.disable_trust_remote_code) - tokenizer.processor = processor model_type = config.model_type if "qwen2_vl" in model_type: from transformers import Qwen2VLForConditionalGeneration @@ -370,7 +369,7 @@ def tune(args): if "--truncation" not in sys.argv: args.truncation = None - autoround = round(model, tokenizer, image_processor=image_processor, dataset=args.dataset, + autoround = round(model, tokenizer, processor=processor, image_processor=image_processor, dataset=args.dataset, extra_data_dir=args.extra_data_dir, bits=args.bits, group_size=args.group_size, sym=not args.asym, batch_size=args.batch_size, seqlen=seqlen, nblocks=args.nblocks, iters=args.iters, lr=args.lr, minmax_lr=args.minmax_lr, amp=not args.disable_amp, diff --git a/test/test_mllm.py b/test/test_mllm.py index 3441962d..1d09ca92 100644 --- a/test/test_mllm.py +++ b/test/test_mllm.py @@ -42,12 +42,12 @@ def tearDownClass(self): def test_tune(self): tokenizer = AutoTokenizer.from_pretrained(self.model_name) processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True) - tokenizer.processor = processor model = Qwen2VLForConditionalGeneration.from_pretrained( self.model_name, trust_remote_code=True, device_map="auto") bits, group_size = 4, 128 autoround = AutoRoundMLLM( - model, tokenizer, bits=bits, group_size=group_size, + model, tokenizer, processor=processor, + bits=bits, group_size=group_size, nsamples=1, batch_size=1, iters=2, dataset=self.dataset,seqlen=256) autoround.quantize() @@ -57,12 +57,12 @@ def test_tune(self): def test_quant_vision(self): ## bug need to fix tokenizer = AutoTokenizer.from_pretrained(self.model_name) processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True) - tokenizer.processor = processor model = Qwen2VLForConditionalGeneration.from_pretrained( self.model_name, trust_remote_code=True, device_map="auto") bits, group_size = 4, 128 autoround = AutoRoundMLLM( - model, tokenizer, bits=bits, group_size=group_size, + model, tokenizer, processor=processor, + bits=bits, group_size=group_size, nsamples=5, batch_size=3, iters=2, dataset=self.dataset, quant_nontext_module=False,seqlen=256) autoround.quantize() @@ -72,7 +72,6 @@ def test_quant_block_names(self): from auto_round.utils import get_multimodal_block_names,find_matching_blocks tokenizer = AutoTokenizer.from_pretrained(self.model_name) processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True) - tokenizer.processor = processor model = Qwen2VLForConditionalGeneration.from_pretrained( self.model_name, trust_remote_code=True, device_map="auto") to_quant_block_names = 'visual.*12,layers.0,model.layers.*9' From a652d647c8325a6b5c4f72940c108fb839244066 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Fri, 22 Nov 2024 03:39:33 -0500 Subject: [PATCH 6/6] fix Signed-off-by: n1ck-guo --- auto_round/__main__.py | 2 -- auto_round/mllm/eval.py | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/auto_round/__main__.py b/auto_round/__main__.py index 17691ab4..7785b812 100644 --- a/auto_round/__main__.py +++ b/auto_round/__main__.py @@ -47,8 +47,6 @@ def run_mllm(): tune(args) def run_lmms(): - from transformers.utils.versions import require_version - require_version("lmms_eval", "lmms_eval need to be installed, `pip install lmms_eval`") # from auto_round.script.lmms_eval import setup_lmms_args, eval from auto_round.script.mllm import setup_lmms_parser, lmms_eval args = setup_lmms_parser() diff --git a/auto_round/mllm/eval.py b/auto_round/mllm/eval.py index 887286de..109ac834 100644 --- a/auto_round/mllm/eval.py +++ b/auto_round/mllm/eval.py @@ -349,7 +349,8 @@ def lmms_eval( apply_chat_template=False ): from auto_round import AutoRoundConfig - + from transformers.utils.versions import require_version + require_version("lmms_eval", "lmms_eval need to be installed, `pip install lmms_eval`") if isinstance(tasks, str): tasks = tasks.replace(' ', '').split(',')