intel · kevinintel · Jun 24, 2024 · Jun 12, 2024 · Jun 12, 2024 · Jun 12, 2024
@@ -99,7 +99,8 @@ raw_datasets = raw_datasets.map(lambda e: tokenizer(e['sentence'], truncation=Tr
 Documentation for API usage can be found [here](https://github.com/intel/intel-extension-for-transformers/tree/main/docs)
 
 ```python
-from intel_extension_for_transformers.transformers import QuantizationConfig, metrics, objectives
+from intel_extension_for_transformers.transformers import metrics, objectives
+from neural_compressor.config import PostTrainingQuantConfig
 from intel_extension_for_transformers.transformers.trainer import NLPTrainer
 # load config, model and metric
 config = AutoConfig.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english",num_labels=2)
@@ -120,7 +121,9 @@ trainer = NLPTrainer(model=model,
     tokenizer=tokenizer
 )
 # model quantization using trainer
-q_config = QuantizationConfig(metrics=[metrics.Metric(name="eval_accuracy")])
+tune_metric = metrics.Metric(name="eval_accuracy")
+trainer.metrics = tune_metric
+q_config = PostTrainingQuantConfig()
 model = trainer.quantize(quant_config=q_config)
 
 # test sentiment analysis with quantization

@@ -49,39 +49,20 @@ Where $D$ is a distance measurement as before, $F_t^{n_i}$ the output feature of
 ## usage
 ### Pytorch Script:
 ```python
-from intel_extension_for_transformers.transformers import metric, objectives, DistillationConfig, Criterion
+
 from intel_extension_for_transformers.transformers.trainer import NLPTrainer
+from neural_compressor.config import DistillationConfig
 # Replace transformers.Trainer with NLPTrainer
 # trainer = transformers.Trainer(......)
 trainer = NLPTrainer(......)
 metric = metrics.Metric(name="eval_accuracy")
-d_conf = DistillationConfig(metrics=tune_metric)
-model = trainer.distill(
-    distillation_config=d_conf, teacher_model=teacher_model
-)
+trainer.metrics = metric
+d_conf = DistillationConfig(teacher_model=teacher_model, criterion=criterion)
+model = trainer.distill(distillation_config=d_conf)
 ```
 
 Please refer to [example](../examples/huggingface/pytorch/text-classification/distillation/run_glue.py) for the details.
 
-### Tensorflow Script:
-```python
-from intel_extension_for_transformers.transformers import (DistillationConfig, metrics)
-from intel_extension_for_transformers.transformers.distillation import Criterion
-
-optimizer = TFOptimization(...)
-metric_ = metrics.Metric(name="eval_accuracy")
-criterion = Criterion(name='KnowledgeLoss',
-                    layer_mappings=[['classifier', 'classifier']],
-                    loss_types=['CE', 'CE'],
-                    loss_weight_ratio=[0.5, 0.5],
-                    add_origin_loss=False)
-distillation_conf = DistillationConfig(metrics=metric_,
-                                        criterion=criterion)
-distilled_model = optimizer.distill(
-            distillation_config=distillation_conf,
-            teacher_model=teacher_model)
-```
-Please refer to [example](../examples/huggingface/tensorflow/text-classification/distillation/run_glue.py) for the details.
 ### Create an Instance of Metric
 The Metric defines which metric will be used to measure the performance of tuned models.
 - example:
@@ -94,19 +75,23 @@ The Metric defines which metric will be used to measure the performance of tuned
 ### Create an Instance of Criterion(Optional)
 The criterion used in training phase.
 
-- arguments:
+- KnowledgeDistillationLossConfig arguments:
     |Argument   |Type       |Description                                        |Default value    |
     |:----------|:----------|:-----------------------------------------------|:----------------|
-    |name       |String|Name of criterion, like:"KnowledgeLoss", "IntermediateLayersLoss"  |"KnowledgeLoss"|
     |temperature|Float |parameter for KnowledgeDistillationLoss               |1.0             |
     |loss_types|List of string|Type of loss                               |['CE', 'CE']        |
     |loss_weight_ratio|List of float|weight ratio of loss                 |[0.5, 0.5]     |
+
+- IntermediateLayersKnowledgeDistillationLossConfig arguments:
+    |Argument   |Type       |Description                                        |Default value    |
+    |:----------|:----------|:-----------------------------------------------|:----------------|
+    |loss_types|List of string|Type of loss                               |['CE', 'CE']        |
+    |loss_weight_ratio|List of float|weight ratio of loss                 |[0.5, 0.5]     |
     |layer_mappings|List|parameter for IntermediateLayersLoss             |[] |
     |add_origin_loss|bool|parameter for IntermediateLayersLoss            |False |
-
 - example:
     ```python
-    criterion = Criterion(name='KnowledgeLoss')
+    criterion = KnowledgeDistillationLossConfig()
     ```
 
 ### Create an Instance of DistillationConfig
@@ -115,20 +100,18 @@ The DistillationConfig contains all the information related to the model distill
 - arguments:
     |Argument   |Type       |Description                                        |Default value    |
     |:----------|:----------|:-----------------------------------------------|:----------------|
-    |framework  |string     |which framework you used                        |"pytorch"        |
-    |criterion|Criterion |criterion of training                              |"KnowledgeLoss"|
-    |metrics    |Metric    |Used to evaluate accuracy of tuning model, no need for NoTrainerOptimizer|None    |
+    |teacher_model  |torch.nn.Module     | teacher model object                    |None        |
+    |criterion|Criterion |criterion of training                              |KnowledgeLoss object|
+
 
 - example:
     ```python
-    d_conf = DistillationConfig(metrics=metric, criterion=criterion)
+    d_conf = DistillationConfig(teacher_model=teacher_model, criterion=criterion)
     ```
 
 ### Distill with Trainer
 - Distill with Trainer
     NLPTrainer inherits from transformers.Trainer, so you can create a trainer as in examples of Transformers. Then you can distill model with trainer.distill function.
     ```python
-    model = trainer.distill(
-        distillation_config=d_conf, teacher_model=teacher_model
-    )
+    model = trainer.distill(distillation_config=d_conf)
     ```
@@ -37,8 +37,8 @@ Intel Extension for Transformers is a powerful toolkit with multiple model optim
     <th>Model</th>
     <th>Task</th>
     <th>Dataset</th>
-    <th>PostTrainingDynamic</th>
-    <th>PostTrainingStatic</th>
+    <th>dynamic</th>
+    <th>static</th>
   </tr>
 </thead>
 <tbody align="center">
@@ -177,7 +177,7 @@ Intel Extension for Transformers is a powerful toolkit with multiple model optim
     <th>Model</th>
     <th>Task</th>
     <th>Dataset</th>
-    <th>QuantizationAwareTraining</th>
+    <th>qat</th>
     <th>No Trainer quantization</th>
   </tr>
 </thead>
@@ -206,7 +206,7 @@ Intel Extension for Transformers is a powerful toolkit with multiple model optim
     <th>Model</th>
     <th>Task</th>
     <th>Dataset</th>
-    <th>PostTrainingStatic</th>
+    <th>static</th>
   </tr>
 </thead>
 <tbody align="center">
@@ -232,7 +232,7 @@ Intel Extension for Transformers is a powerful toolkit with multiple model optim
     <th>Model</th>
     <th>Task</th>
     <th>Dataset</th>
-    <th>PostTrainingStatic</th>
+    <th>static</th>
   </tr>
 </thead>
 <tbody align="center">

@@ -22,9 +22,9 @@ We support exporting PyTorch models into ONNX models with our well-designed API
 | Input Model | Export FP32 | Export BF16 | Export INT8 |
 | --- | --- | --- | --- |
 | FP32 PyTorch Model | &#10004; | &#10004; | / |
-| INT8 PyTorch Model <br> (PostTrainingDynamic) | / | / | &#10004; |
-| INT8 PyTorch Model <br> (PostTrainingStatic) | / | / | &#10004; |
-| INT8 PyTorch Model <br> (QuantizationAwareTraining) | / | / | &#10004; |
+| INT8 PyTorch Model <br> (dynamic) | / | / | &#10004; |
+| INT8 PyTorch Model <br> (static) | / | / | &#10004; |
+| INT8 PyTorch Model <br> (qat) | / | / | &#10004; |
 
 
 ## Examples

@@ -13,7 +13,7 @@
 
 ## Quantization
 ```python
-from intel_extension_for_transformers.transformers import QuantizationConfig, metrics, objectives
+from neural_compressor.config import PostTrainingQuantConfig
 from intel_extension_for_transformers.transformers.trainer import NLPTrainer
 
 config = AutoConfig.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english",num_labels=2)
@@ -27,7 +27,9 @@ trainer = NLPTrainer(model=model,
     eval_dataset=raw_datasets["validation"],
     tokenizer=tokenizer
 )
-q_config = QuantizationConfig(metrics=[metrics.Metric(name="eval_loss", greater_is_better=False)])
+quantization_config = PostTrainingQuantConfig(
+    approach="static",
+)
 model = trainer.quantize(quant_config=q_config)
 
 input = tokenizer("I like Intel Extension for Transformers", return_tensors="pt")
@@ -73,17 +75,17 @@ model = trainer.distill(distillation_config=d_conf, teacher_model=teacher_model)
 ## Quantized Length Adaptive Transformer
 Quantized Length Adaptive Transformer leverages sequence-length reduction and low-bit representation techniques to further enhance model inference performance, enabling adaptive sequence-length sizes to accommodate different computational budget requirements with an optimal accuracy efficiency tradeoff.
 ```python
-from intel_extension_for_transformers.transformers import QuantizationConfig, DynamicLengthConfig, metric, objectives
+from intel_extension_for_transformers.transformers import DynamicLengthConfig, metric, objectives
+from neural_compressor.config import PostTrainingQuantConfig
 from intel_extension_for_transformers.transformers.trainer import NLPTrainer
 
 # Replace transformers.Trainer with NLPTrainer
 # trainer = transformers.Trainer(...)
 trainer = NLPTrainer(...)
 metric = metrics.Metric(name="eval_f1", is_relative=True, criterion=0.01)
-q_config = QuantizationConfig(
-    approach="PostTrainingStatic",
-    metrics=[metric],
-    objectives=[objectives.performance]
+trainer.metrics = metric
+q_config = PostTrainingQuantConfig(
+    approach="static"
 )
 # Apply the length config
 dynamic_length_config = DynamicLengthConfig(length_config=length_config)

@@ -7,32 +7,23 @@ Pruning
 ## Introduction
 Pruning is the process of removing redundant parameters of a network. The idea bears similarity to the ["optimal brain damage"](http://yann.lecun.com/exdb/publis/pdf/lecun-90b.pdf) hypothesis by Yann LeCun. There are two types of pruning: Unstructured and Structured. Unstructured pruning means finding and removing the less salient connection in the model, the place could be anywhere in the matrix. Structured pruning means deleting entire blocks, filters, or channels.
 
-## Pruning types
-
-There are three pruning types in Intel® Extension for Transformers:
-
-- Magnitude (Unstructured)
-  - The algorithm prunes the weight by the lowest absolute value at each layer with a given sparsity target. 
-
-- Group Lasso (Structured)
-  - The algorithm uses Group lasso regularization to prune entire rows, columns, or blocks of parameters that result in a smaller dense network.
-
-- Pattern Lock (Unstructured & Structured)
-  - The algorithm locks the sparsity pattern in fine tune phase by freezing those zero values of the weight tensor during the weight update of training.
-
 ## Usage
 ### Script:
 ```python
 
-from intel_extension_for_transformers.transformers import metrics, objectives, PrunerConfig, PruningConfig,
+from intel_extension_for_transformers.transformers import metrics
+from neural_compressor.config import WeightPruningConfig
 from intel_extension_for_transformers.transformers.trainer import NLPTrainer
 # Replace transformers.Trainer with NLPTrainer
 # trainer = transformers.Trainer(......)
 trainer = NLPTrainer(......)
 metric = metrics.Metric(name="eval_accuracy")
-pruner_config = PrunerConfig(prune_type='BasicMagnitude', target_sparsity_ratio=0.9)
-p_conf = PruningConfig(pruner_config=[pruner_config], metrics=metric)
-model = trainer.prune(pruning_config=p_conf)
+trainer.metrics = tune_metric
+pruning_conf = WeightPruningConfig([{"start_step": 0, "end_step": 2}],
+                                    target_sparsity=0,9,
+                                    pruning_scope="local",
+                                    pruning_type="magnitude")
+model = trainer.prune(pruning_config=pruning_conf)
 ```
 Please refer to [example](../examples/huggingface/pytorch/text-classification/pruning) for the details.
 
@@ -45,41 +36,27 @@ The Metric defines which metric will be used to measure the performance of tuned
 
     Please refer to [metrics document](metrics.md) for the details.
 
-### Create list of an instance of PrunerConfig(Optional)
-PrunerConfig defines which pruning algorithm to use and how to apply it during the training process. Intel® Extension for Transformers supports pruning types "BasicMagnitude", "PatternLock", and "GroupLasso". You can create different pruners for different layers.
+### Create an instance of WeightPruningConfig
+[WeightPruningConfig](neural-compressor_neural_compressor_config.py at master · intel_neural-compressor.html) defines which pruning algorithm to use and how to apply it during the training process. Intel® Extension for Transformers supports pruning types "magnitude", "pattern_lock", and "GroupLasso". You can create different pruners for different layers.
 
 - arguments:
     |Argument   |Type       |Description                                        |Default value    |
     |:----------|:----------|:-----------------------------------------------|:----------------|
-    |epoch_range|list of integer|Which epochs to pruning                     |[0, 4]           |
-    |initial_sparsity_ratio|float |Initial sparsity goal                     |0.0              |
-    |target_sparsity_ratio|float  |Target sparsity goal                      |0.97             |
+    |pruning_configs |list of dicts|Which epochs to pruning                     |[{}]         |
+    |target_sparsity |float |Initial sparsity goal                     |0.90            |
     |update_frequency|integer|Frequency to updating sparsity                 |1                |
-    |prune_type|string|Pruning algorithm                                     |'BasicMagnitude' |
-    |method|string|Pruning method                                            |'per_tensor' |
-    |names|list of string|List of weight name to be pruned. If no weight is specified, all weights of the model will be pruned|[]|
-    |parameters|dict of string|The hyper-parameters for pruning, refer to [the link](https://github.com/intel/neural-compressor/blob/master/docs/source/pruning.md)|None|
+    |pruning_type |string|Pruning algorithm |'snip_momentum' |
+
 
-- example:
-    ```python
-    pruner_config = PrunerConfig(prune_type='BasicMagnitude', target_sparsity_ratio=0.9)
-    ```
-
-### Create an instance of PruningConfig
-The PruningConfig contains all the information related to the model pruning behavior. If you have created Metric and PrunerConfig instance, then you can create an instance of PruningConfig. Metric and pruner are optional.
-
-- arguments:
-    |Argument   |Type       |Description                                        |Default value    |
-    |:----------|:----------|:-----------------------------------------------|:----------------|
-    |framework  |string     |Which framework you used                        |"pytorch"        |
-    |initial_sparsity_ratio|float |Initial sparsity goal, if pruner_config argument is defined, it didn't need                       |0.0|
-    |target_sparsity_ratio|float |Target sparsity goal, if pruner argument is defined, it didn't need                       |0.97|
-    |metrics    |Metric    |Used to evaluate accuracy of tuning model, no need for NoTrainerOptimizer|None    |
-    |pruner_config |PrunerConfig    |Defined pruning behavior, if it is None, then NLP will create a default a pruner with 'BasicMagnitude' pruning type                                  |None              |
+The WeightPruningConfig contains all the information related to the model pruning behavior. If you have created Metric and WeightPruningConfig instance, then you can create an instance of WeightPruningConfig. Metric and pruner are optional.
 
 - example:
     ```python
-    pruning_conf = PruningConfig(pruner_config=[pruner_config], metrics=tune_metric)
+    from neural_compressor.config import WeightPruningConfig
+    pruning_conf = WeightPruningConfig([{"start_step": 0, "end_step": 2}],
+                                        target_sparsity=0,9,
+                                        pruning_scope="local",
+                                        pruning_type="magnitude")
     ```
 
 ### Prune with Trainer