From bd0c110a13f247ca09fcd2e20d3c5925aa5c0ad0 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Thu, 21 Nov 2024 20:00:50 -0500
Subject: [PATCH 1/6] refine

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 auto_round/script/mllm.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py
index d3fbb399..953070fa 100644
--- a/auto_round/script/mllm.py
+++ b/auto_round/script/mllm.py
@@ -171,8 +171,8 @@ def __init__(self, *args, **kwargs):
         self.add_argument("--pack", action='store_true',
                           help="a video may associate with multiple questions, if pack==True,"
                                " will ask all questions for a video in a single")
-        self.add_argument("--use-subtitle", action='store_true')
-        self.add_argument("--fps", type=float, default=-1)
+        self.add_argument("--fps", type=float, default=-1,
+                          help="set the fps for a video.")
         # Work Dir
         # Infer + Eval or Infer Only
         self.add_argument("--mode", type=str, default='all', choices=['all', 'infer'],
@@ -183,14 +183,16 @@ def __init__(self, *args, **kwargs):
         # API Kwargs, Apply to API VLMs and Judge API LLMs
         self.add_argument('--retry', type=int, default=None, help='retry numbers for API VLMs')
         # Explicitly Set the Judge Model
-        self.add_argument('--judge', type=str, default=None)
+        self.add_argument('--judge', type=str, default=None,
+                          help="whether is a judge model.")
         # Logging Utils
-        self.add_argument('--verbose', action='store_true')
+        self.add_argument('--verbose', action='store_true',
+                          help="whether to display verbose information.")
         # Configuration for Resume
         # Ignore: will not rerun failed VLM inference
         self.add_argument('--ignore', action='store_true', help='ignore failed indices. ')
         # Rerun: will remove all evaluation temp files
-        self.add_argument('--rerun', action='store_true')
+        self.add_argument('--rerun', action='store_true', help="If true, will remove all evvaluation temp files and rerun.")
 
 
 def setup_parser():

From c105dc0b84bf6b9fbc4d9c1cb4ea513e4f722388 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Thu, 21 Nov 2024 20:37:08 -0500
Subject: [PATCH 2/6] split args

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 auto_round/__main__.py            | 20 ++++++---
 auto_round/mllm/autoround_mllm.py |  3 --
 auto_round/script/mllm.py         | 74 +++++++++++++++++--------------
 test/test_basic_usage.py          | 13 ++++++
 4 files changed, 67 insertions(+), 43 deletions(-)

diff --git a/auto_round/__main__.py b/auto_round/__main__.py
index 53490a01..17691ab4 100644
--- a/auto_round/__main__.py
+++ b/auto_round/__main__.py
@@ -33,11 +33,17 @@ def run_fast():
 
 
 def run_mllm():
-    from auto_round.script.mllm import setup_parser, tune, eval
-    args = setup_parser()
-    if args.eval:
+    if "--eval" in sys.argv:
+        from auto_round.script.mllm import setup_lmeval_parser, eval
+        sys.argv.remove("--eval")
+        args = setup_lmeval_parser()
         eval(args)
+    elif "--lmms" in sys.argv:
+        sys.argv.remove("--lmms")
+        run_lmms()
     else:
+        from auto_round.script.mllm import setup_parser, tune
+        args = setup_parser()
         tune(args)
 
 def run_lmms():
@@ -49,10 +55,10 @@ def run_lmms():
     lmms_eval(args)
 
 def switch():
-    if "--lmms" in sys.argv:
-        sys.argv.remove("--lmms")
-        run_lmms()
-    elif "--mllm" in sys.argv:
+    # if "--lmms" in sys.argv:
+    #     sys.argv.remove("--lmms")
+    #     run_lmms()
+    if "--mllm" in sys.argv:
         sys.argv.remove("--mllm")
         run_mllm()
     else:
diff --git a/auto_round/mllm/autoround_mllm.py b/auto_round/mllm/autoround_mllm.py
index 1f23cf4a..f2b57152 100644
--- a/auto_round/mllm/autoround_mllm.py
+++ b/auto_round/mllm/autoround_mllm.py
@@ -208,9 +208,6 @@ def __init__(
             enable_torch_compile=enable_torch_compile,
             **kwargs,
         )
-
-        
-    
             
 
     def calib(self, nsamples, bs):
diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py
index 953070fa..81139354 100644
--- a/auto_round/script/mllm.py
+++ b/auto_round/script/mllm.py
@@ -160,39 +160,7 @@ def __init__(self, *args, **kwargs):
         self.add_argument("--to_quant_block_names", default=None, type=str,
                           help="Names of quantitative blocks, please use commas to separate them.")
 
-        ## ======================= VLM eval=======================
-        self.add_argument("--tasks", type=str,
-                          default="MMBench_DEV_EN_V11,ScienceQA_VAL,TextVQA_VAL,POPE",
-                          help="eval tasks for VLMEvalKit.")
-        # Args that only apply to Video Dataset
-        self.add_argument("--nframe", type=int, default=8,
-                          help="the number of frames to sample from a video,"
-                               " only applicable to the evaluation of video benchmarks.")
-        self.add_argument("--pack", action='store_true',
-                          help="a video may associate with multiple questions, if pack==True,"
-                               " will ask all questions for a video in a single")
-        self.add_argument("--fps", type=float, default=-1,
-                          help="set the fps for a video.")
-        # Work Dir
-        # Infer + Eval or Infer Only
-        self.add_argument("--mode", type=str, default='all', choices=['all', 'infer'],
-                          help="when mode set to 'all', will perform both inference and evaluation;"
-                               " when set to 'infer' will only perform the inference.")
-        self.add_argument('--eval_data_dir', type=str, default=None,
-                          help='path for VLMEvalKit to store the eval data. Default will store in ~/LMUData')
-        # API Kwargs, Apply to API VLMs and Judge API LLMs
-        self.add_argument('--retry', type=int, default=None, help='retry numbers for API VLMs')
-        # Explicitly Set the Judge Model
-        self.add_argument('--judge', type=str, default=None,
-                          help="whether is a judge model.")
-        # Logging Utils
-        self.add_argument('--verbose', action='store_true',
-                          help="whether to display verbose information.")
-        # Configuration for Resume
-        # Ignore: will not rerun failed VLM inference
-        self.add_argument('--ignore', action='store_true', help='ignore failed indices. ')
-        # Rerun: will remove all evaluation temp files
-        self.add_argument('--rerun', action='store_true', help="If true, will remove all evvaluation temp files and rerun.")
+        
 
 
 def setup_parser():
@@ -217,6 +185,46 @@ def setup_parser():
     return args
 
 
+def setup_lmeval_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", "--model_name", "--model_name_or_path",
+                          help="model name or path")
+    parser.add_argument("--tasks", type=str,
+                        default="MMBench_DEV_EN_V11,ScienceQA_VAL,TextVQA_VAL,POPE",
+                        help="eval tasks for VLMEvalKit.")
+    # Args that only apply to Video Dataset
+    parser.add_argument("--nframe", type=int, default=8,
+                        help="the number of frames to sample from a video,"
+                            " only applicable to the evaluation of video benchmarks.")
+    parser.add_argument("--pack", action='store_true',
+                        help="a video may associate with multiple questions, if pack==True,"
+                            " will ask all questions for a video in a single")
+    parser.add_argument("--fps", type=float, default=-1,
+                        help="set the fps for a video.")
+    # Work Dir
+    # Infer + Eval or Infer Only
+    parser.add_argument("--mode", type=str, default='all', choices=['all', 'infer'],
+                        help="when mode set to 'all', will perform both inference and evaluation;"
+                            " when set to 'infer' will only perform the inference.")
+    parser.add_argument('--eval_data_dir', type=str, default=None,
+                        help='path for VLMEvalKit to store the eval data. Default will store in ~/LMUData')
+    # API Kwargs, Apply to API VLMs and Judge API LLMs
+    parser.add_argument('--retry', type=int, default=None, help='retry numbers for API VLMs')
+    # Explicitly Set the Judge Model
+    parser.add_argument('--judge', type=str, default=None,
+                        help="whether is a judge model.")
+    # Logging Utils
+    parser.add_argument('--verbose', action='store_true',
+                        help="whether to display verbose information.")
+    # Configuration for Resume
+    # Ignore: will not rerun failed VLM inference
+    parser.add_argument('--ignore', action='store_true', help='ignore failed indices. ')
+    # Rerun: will remove all evaluation temp files
+    parser.add_argument('--rerun', action='store_true', help="if true, will remove all evaluation temp files and rerun.")
+    args = parser.parse_args()
+    return args
+
+
 def tune(args):
     if args.format is None:
         args.format = "auto_round"
diff --git a/test/test_basic_usage.py b/test/test_basic_usage.py
index 6d28801d..78b0a4f4 100644
--- a/test/test_basic_usage.py
+++ b/test/test_basic_usage.py
@@ -32,11 +32,24 @@ def test_auto_round_cmd(self):
 
 
         # test mllm script
+        # test auto_round_mllm help
         res = os.system(
             f"cd .. && {python_path} -m auto_round --mllm -h")
         if res > 0 or res == -1:
             assert False, "cmd line test fail, please have a check"
 
+        # test auto_round_mllm --eval help
+        res = os.system(
+            f"cd .. && {python_path} -m auto_round --mllm --eval -h")
+        if res > 0 or res == -1:
+            assert False, "cmd line test fail, please have a check"
+        
+        # test auto_round_mllm --lmms help
+        res = os.system(
+            f"cd .. && {python_path} -m auto_round --mllm --lmms -h")
+        if res > 0 or res == -1:
+            assert False, "cmd line test fail, please have a check"
+
         res = os.system(
             f"cd .. && {python_path} -m auto_round --mllm --iter 2 --nsamples 10 --format auto_round --output_dir ./saved")
         if res > 0 or res == -1:

From 1c7ac268052c8e96addb3ee9e5d387945388c9b3 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Thu, 21 Nov 2024 20:40:59 -0500
Subject: [PATCH 3/6] fix

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 auto_round/script/mllm.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py
index 81139354..97557e64 100644
--- a/auto_round/script/mllm.py
+++ b/auto_round/script/mllm.py
@@ -221,6 +221,8 @@ def setup_lmeval_parser():
     parser.add_argument('--ignore', action='store_true', help='ignore failed indices. ')
     # Rerun: will remove all evaluation temp files
     parser.add_argument('--rerun', action='store_true', help="if true, will remove all evaluation temp files and rerun.")
+    parser.add_argument("--output_dir", default="./eval_result", type=str,
+                          help="the directory to save quantized model")
     args = parser.parse_args()
     return args
 
@@ -410,7 +412,6 @@ def eval(args):
         data_store_dir=args.eval_data_dir,
         dataset=args.tasks,
         pack=args.pack,
-        use_subtitle=args.use_subtitle,
         fps=args.fps,
         nframe=args.nframe,
         rerun=args.rerun,
@@ -429,7 +430,7 @@ def setup_lmms_parser():
         default="pope,textvqa_val,scienceqa,mmbench_en",
         help="To get full list of tasks, use the command lmms-eval --tasks list",
     )
-    parser.add_argument("--output_dir", default="./tmp_autoround", type=str,
+    parser.add_argument("--output_dir", default="./eval_result", type=str,
                           help="the directory to save quantized model")
     parser.add_argument(
         "--num_fewshot",

From b35e384f7cc91844b2087e0d1468d4b2456a6220 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Thu, 21 Nov 2024 21:07:26 -0500
Subject: [PATCH 4/6] pylint

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 auto_round/script/mllm.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py
index 97557e64..612fcaec 100644
--- a/auto_round/script/mllm.py
+++ b/auto_round/script/mllm.py
@@ -218,9 +218,11 @@ def setup_lmeval_parser():
                         help="whether to display verbose information.")
     # Configuration for Resume
     # Ignore: will not rerun failed VLM inference
-    parser.add_argument('--ignore', action='store_true', help='ignore failed indices. ')
+    parser.add_argument('--ignore', action='store_true',
+                        help='ignore failed indices. ')
     # Rerun: will remove all evaluation temp files
-    parser.add_argument('--rerun', action='store_true', help="if true, will remove all evaluation temp files and rerun.")
+    parser.add_argument('--rerun', action='store_true',
+                        help="if true, will remove all evaluation temp files and rerun.")
     parser.add_argument("--output_dir", default="./eval_result", type=str,
                           help="the directory to save quantized model")
     args = parser.parse_args()
@@ -277,8 +279,9 @@ def tune(args):
     processor, image_processor = None, None
     if "llava" in model_name:
         from llava.model.builder import load_pretrained_model  # pylint: disable=E0401
-        tokenizer, model, image_processor, _ = load_pretrained_model(model_name, model_base=None, model_name=model_name,
-                                                                     torch_dtype=torch_dtype)
+        tokenizer, model, image_processor, _ = load_pretrained_model(
+            model_name, model_base=None, model_name=model_name,
+            torch_dtype=torch_dtype)
         model_type = "llava"
     else:
         config = AutoConfig.from_pretrained(model_name, trust_remote_code=not args.disable_trust_remote_code)

From 0661414123f4c2658813c12cb5ad15f95c173460 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Fri, 22 Nov 2024 00:29:19 -0500
Subject: [PATCH 5/6] modify api of processor

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 auto_round/mllm/README.md         | 3 +--
 auto_round/mllm/autoround_mllm.py | 3 ++-
 auto_round/mllm/processor.py      | 5 +++--
 auto_round/mllm/template.py       | 4 ++--
 auto_round/script/mllm.py         | 3 +--
 test/test_mllm.py                 | 9 ++++-----
 6 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/auto_round/mllm/README.md b/auto_round/mllm/README.md
index cd342f70..622b3000 100644
--- a/auto_round/mllm/README.md
+++ b/auto_round/mllm/README.md
@@ -22,14 +22,13 @@ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoTok
 model_name = "Qwen/Qwen2-VL-2B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=trust_remote_code)
-tokenizer.processor = processor
 model = Qwen2VLForConditionalGeneration.from_pretrained(
     model_name, trust_remote_code=True) 
 dataset = "/path/to/llava.json"
 extra_data_dir = "/path/to/images/dir" 
 
 bits, group_size = 4, 128
-autoround = AutoRoundMLLM(model, tokenizer, bits=bits, group_size=group_size, dataset=dataset, extra_data_dir=extra_data_dir)
+autoround = AutoRoundMLLM(model, tokenizer, processor=processor, bits=bits, group_size=group_size, dataset=dataset, extra_data_dir=extra_data_dir)
 
 autoround.quantize()
 output_dir = "./tmp_autoround"
diff --git a/auto_round/mllm/autoround_mllm.py b/auto_round/mllm/autoround_mllm.py
index f2b57152..bfcb35c0 100644
--- a/auto_round/mllm/autoround_mllm.py
+++ b/auto_round/mllm/autoround_mllm.py
@@ -97,6 +97,7 @@ def __init__(
             self,
             model,
             tokenizer,
+            processor = None,
             image_processor = None,
             bits: int = 4,
             group_size: int = 128,
@@ -144,7 +145,7 @@ def __init__(
         self.image_processor = image_processor
         self.template = template if template is not None else model.config.model_type
         self.template = get_template(
-            self.template, model=model, tokenizer=tokenizer, image_processor=image_processor)
+            self.template, model=model, tokenizer=tokenizer, processor=processor, image_processor=image_processor)
         
         dataset = self.template.default_dataset if dataset is None else dataset
         from ..calib_dataset import CALIB_DATASETS
diff --git a/auto_round/mllm/processor.py b/auto_round/mllm/processor.py
index 7de74d7f..88abbc0b 100644
--- a/auto_round/mllm/processor.py
+++ b/auto_round/mllm/processor.py
@@ -30,9 +30,10 @@ class BasicProcessor:
     def __init__(self):
         pass
     
-    def post_init(self, model, tokenizer, image_processor=None, **kwargs):
+    def post_init(self, model, tokenizer, processor=None, image_processor=None, **kwargs):
         self.model = model
         self.tokenizer = tokenizer
+        self.processor = processor
         if image_processor is not None:
             self.image_processor = image_processor
         else:
@@ -73,7 +74,7 @@ def get_input(
         if truncation is True and truncation_strategy == "text":
             text = self.tokenizer.decode(self.tokenizer(text).input_ids[:max_length])
 
-        ret = self.tokenizer.processor(
+        ret = self.processor(
             text=text,
             images=images,
             return_tensors=return_tensors,
diff --git a/auto_round/mllm/template.py b/auto_round/mllm/template.py
index 16bb4e21..d6a24dcb 100644
--- a/auto_round/mllm/template.py
+++ b/auto_round/mllm/template.py
@@ -144,7 +144,7 @@ def _load_preset_template():
 _load_preset_template()
 
 
-def get_template(template_or_path: str, model=None, tokenizer=None, image_processor=None):
+def get_template(template_or_path: str, model=None, tokenizer=None, processor=None, image_processor=None):
     """Get template by template name or from a json file.
 
     Args:
@@ -163,6 +163,6 @@ def get_template(template_or_path: str, model=None, tokenizer=None, image_proces
             logger.warning(f"Unable to recognize {template_or_path}, using default template instead.")
             template = TEMPLATES["default"]
 
-    template.processor.post_init(model=model, tokenizer=tokenizer, image_processor=image_processor)
+    template.processor.post_init(model=model, tokenizer=tokenizer, processor=processor, image_processor=image_processor)
 
     return template
\ No newline at end of file
diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py
index 612fcaec..75ea4722 100644
--- a/auto_round/script/mllm.py
+++ b/auto_round/script/mllm.py
@@ -287,7 +287,6 @@ def tune(args):
         config = AutoConfig.from_pretrained(model_name, trust_remote_code=not args.disable_trust_remote_code)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=not args.disable_trust_remote_code)
-        tokenizer.processor = processor
         model_type = config.model_type
         if "qwen2_vl" in model_type:
             from transformers import Qwen2VLForConditionalGeneration
@@ -370,7 +369,7 @@ def tune(args):
     if "--truncation" not in sys.argv:
         args.truncation = None
 
-    autoround = round(model, tokenizer, image_processor=image_processor, dataset=args.dataset,
+    autoround = round(model, tokenizer, processor=processor, image_processor=image_processor, dataset=args.dataset,
                       extra_data_dir=args.extra_data_dir, bits=args.bits, group_size=args.group_size,
                       sym=not args.asym, batch_size=args.batch_size, seqlen=seqlen, nblocks=args.nblocks,
                       iters=args.iters, lr=args.lr, minmax_lr=args.minmax_lr, amp=not args.disable_amp,
diff --git a/test/test_mllm.py b/test/test_mllm.py
index 3441962d..1d09ca92 100644
--- a/test/test_mllm.py
+++ b/test/test_mllm.py
@@ -42,12 +42,12 @@ def tearDownClass(self):
     def test_tune(self):
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True)
-        tokenizer.processor = processor
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             self.model_name, trust_remote_code=True, device_map="auto")
         bits, group_size = 4, 128
         autoround = AutoRoundMLLM(
-            model, tokenizer, bits=bits, group_size=group_size,
+            model, tokenizer, processor=processor, 
+            bits=bits, group_size=group_size,
             nsamples=1,
             batch_size=1, iters=2, dataset=self.dataset,seqlen=256)
         autoround.quantize()
@@ -57,12 +57,12 @@ def test_tune(self):
     def test_quant_vision(self): ## bug need to fix
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True)
-        tokenizer.processor = processor
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             self.model_name, trust_remote_code=True, device_map="auto")
         bits, group_size = 4, 128
         autoround = AutoRoundMLLM(
-            model, tokenizer, bits=bits, group_size=group_size,
+            model, tokenizer, processor=processor,
+            bits=bits, group_size=group_size,
             nsamples=5,
             batch_size=3, iters=2, dataset=self.dataset, quant_nontext_module=False,seqlen=256)
         autoround.quantize()
@@ -72,7 +72,6 @@ def test_quant_block_names(self):
         from auto_round.utils import get_multimodal_block_names,find_matching_blocks
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True)
-        tokenizer.processor = processor
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             self.model_name, trust_remote_code=True, device_map="auto")
         to_quant_block_names = 'visual.*12,layers.0,model.layers.*9'

From a652d647c8325a6b5c4f72940c108fb839244066 Mon Sep 17 00:00:00 2001
From: n1ck-guo <heng.guo@intel.com>
Date: Fri, 22 Nov 2024 03:39:33 -0500
Subject: [PATCH 6/6] fix

Signed-off-by: n1ck-guo <heng.guo@intel.com>
---
 auto_round/__main__.py  | 2 --
 auto_round/mllm/eval.py | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/auto_round/__main__.py b/auto_round/__main__.py
index 17691ab4..7785b812 100644
--- a/auto_round/__main__.py
+++ b/auto_round/__main__.py
@@ -47,8 +47,6 @@ def run_mllm():
         tune(args)
 
 def run_lmms():
-    from transformers.utils.versions import require_version
-    require_version("lmms_eval", "lmms_eval need to be installed, `pip install lmms_eval`")
     # from auto_round.script.lmms_eval import setup_lmms_args, eval
     from auto_round.script.mllm import setup_lmms_parser, lmms_eval
     args = setup_lmms_parser()
diff --git a/auto_round/mllm/eval.py b/auto_round/mllm/eval.py
index 887286de..109ac834 100644
--- a/auto_round/mllm/eval.py
+++ b/auto_round/mllm/eval.py
@@ -349,7 +349,8 @@ def lmms_eval(
         apply_chat_template=False
         ):
     from auto_round import AutoRoundConfig
-
+    from transformers.utils.versions import require_version
+    require_version("lmms_eval", "lmms_eval need to be installed, `pip install lmms_eval`")
     if isinstance(tasks, str):
         tasks = tasks.replace(' ', '').split(',')