fix qat commands

Signed-off-by: changwangss <[email protected]>
intel · Jun 20, 2024 · f145368 · f145368
1 parent d7e3771
commit f145368
Show file tree

Hide file tree

Showing 46 changed files with 26 additions and 9,445 deletions.
diff --git a/docs/api_doc/optimization/optimizer.rst b/docs/api_doc/optimization/optimizer.rst
diff --git a/docs/devcatalog.md b/docs/devcatalog.md
@@ -99,7 +99,8 @@ raw_datasets = raw_datasets.map(lambda e: tokenizer(e['sentence'], truncation=Tr
 Documentation for API usage can be found [here](https://github.com/intel/intel-extension-for-transformers/tree/main/docs)
 
 ```python
-from intel_extension_for_transformers.transformers import QuantizationConfig, metrics, objectives
+from intel_extension_for_transformers.transformers import metrics, objectives
+from neural_compressor.config import PostTrainingQuantConfig
 from intel_extension_for_transformers.transformers.trainer import NLPTrainer
 # load config, model and metric
 config = AutoConfig.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english",num_labels=2)
@@ -120,7 +121,9 @@ trainer = NLPTrainer(model=model,
     tokenizer=tokenizer
 )
 # model quantization using trainer
-q_config = QuantizationConfig(metrics=[metrics.Metric(name="eval_accuracy")])
+tune_metric = metrics.Metric(name="eval_accuracy")
+trainer.metrics = tune_metric
+q_config = PostTrainingQuantConfig()
 model = trainer.quantize(quant_config=q_config)
 
 # test sentiment analysis with quantization

diff --git a/docs/get_started.md b/docs/get_started.md
@@ -75,17 +75,17 @@ model = trainer.distill(distillation_config=d_conf, teacher_model=teacher_model)
 ## Quantized Length Adaptive Transformer
 Quantized Length Adaptive Transformer leverages sequence-length reduction and low-bit representation techniques to further enhance model inference performance, enabling adaptive sequence-length sizes to accommodate different computational budget requirements with an optimal accuracy efficiency tradeoff.
 ```python
-from intel_extension_for_transformers.transformers import QuantizationConfig, DynamicLengthConfig, metric, objectives
+from intel_extension_for_transformers.transformers import DynamicLengthConfig, metric, objectives
+from neural_compressor.config import PostTrainingQuantConfig
 from intel_extension_for_transformers.transformers.trainer import NLPTrainer
 
 # Replace transformers.Trainer with NLPTrainer
 # trainer = transformers.Trainer(...)
 trainer = NLPTrainer(...)
 metric = metrics.Metric(name="eval_f1", is_relative=True, criterion=0.01)
-q_config = QuantizationConfig(
-    approach="static",
-    metrics=[metric],
-    objectives=[objectives.performance]
+trainer.metrics = metric
+q_config = PostTrainingQuantConfig(
+    approach="static"
 )
 # Apply the length config
 dynamic_length_config = DynamicLengthConfig(length_config=length_config)

diff --git a/docs/quantization.md b/docs/quantization.md
@@ -157,16 +157,8 @@ In terms of evaluating the status of a specific model during tuning, we should h
     Please refer to [objective document](objectives.md) for the details.
 
 ### Create an Instance of QuantizationConfig
-The QuantizationConfig contains all the information related to the model quantization behavior. If you have created Metric and Objective instance(default Objective is "performance"), then you can create an instance of QuantizationConfig.
+The QuantizationConfig contains all the information related to the model quantization behavior. If you have created Metric and Objective instance(default Objective is "performance"), then you can create an instance of PostTrainingQuantConfig or QuantizationAwareTrainingConfig.
 
-- arguments:
-
-|Argument   |Type       |Description                                        |Default value    |
-|:----------|:----------|:-----------------------------------------------|:----------------|
-|approach   |string     |Which quantization approach you used            |"static"|
-|timeout    |integer    |Tuning timeout(seconds), 0 means early stop; combine with max_trials field to decide when to exit|0    |
-|max_trials |integer    |Max tune times                                  |100              |
-|objective |list of Objective|Objective with accuracy constraint guaranteed|performance|
 
 - example:
     ```python

diff --git a/...ls/pytorch/question-answering/bert-large-uncased-whole-word-masking-finetuned-squad.ipynb b/...ls/pytorch/question-answering/bert-large-uncased-whole-word-masking-finetuned-squad.ipynb
@@ -1000,10 +1000,9 @@
     "    is_relative=True, # Metric tolerance mode, True is for relative, otherwise for absolute.\n",
     "    criterion=0.25, # Performance tolerance when optimizing the model.\n",
     ")\n",
-    "quantization_config = QuantizationConfig(\n",
-    "    approach=\"static\",\n",
-    "    max_trials=200,\n",
-    "    metrics=[tune_metric],\n",
+    "trainer_static.metrics = tune_metric\n",
+    "quantization_config = PostTrainingQuantConfig(\n",
+    "    approach=\"static\"\n",
     ")\n",
     "\n",
     "# run quantization\n",

diff --git a/examples/huggingface/pytorch/language-modeling/quantization/run_tuning.sh b/examples/huggingface/pytorch/language-modeling/quantization/run_tuning.sh
@@ -84,7 +84,7 @@ function run_tuning {
                     --evaluation_strategy steps \
                     --save_strategy steps \
                     --save_total_limit 1 \
-                    --safe_serialization False"
+                    --save_safetensors False"
         fi
     elif [ "${topology}" = "gpt_j" ]; then
         if [ "${task}" = "clm" ]; then
@@ -121,7 +121,7 @@ function run_tuning {
                     --save_strategy steps \
                     --metric_for_best_model accuracy \
                     --save_total_limit 1 \
-                    --safe_serialization False"
+                    --save_safetensors False"
         fi
     elif [ "${topology}" = "xlnet" ]; then
         if [ "${task}" = "plm" ]; then
@@ -146,7 +146,7 @@ function run_tuning {
                     --save_strategy steps \
                     --metric_for_best_model accuracy \
                     --save_total_limit 1 \
-                    --safe_serialization False"
+                    --save_safetensors False"
         fi
     elif [ "${topology}" = "gpt_neox" ]; then
         if [ "${task}" = "clm" ]; then

diff --git a/examples/huggingface/pytorch/multiple-choice/quantization/run_tuning.sh b/examples/huggingface/pytorch/multiple-choice/quantization/run_tuning.sh
@@ -57,7 +57,7 @@ function run_tuning {
                    --evaluation_strategy steps \
                    --save_strategy steps \
                    --save_total_limit 1 \
-                   --safe_serialization False"
+                   --save_safetensors False"
     fi
 
     python -u ./run_swag.py \

diff --git a/examples/huggingface/pytorch/question-answering/pruning/group_lasso/README.md b/examples/huggingface/pytorch/question-answering/pruning/group_lasso/README.md
diff --git a/examples/huggingface/pytorch/question-answering/pruning/group_lasso/bert_config.json b/examples/huggingface/pytorch/question-answering/pruning/group_lasso/bert_config.json