From 423b0dd8f78d9ca1b77f3db08e4e91f9a59322b9 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Fri, 9 Aug 2024 16:20:21 +0800 Subject: [PATCH 1/3] update accuracy Signed-off-by: Sun, Xuehao --- docs/source/llm_recipes.md | 168 ++++++++++++++++++------------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/docs/source/llm_recipes.md b/docs/source/llm_recipes.md index 328bba3ba09..8a9c17e7cd7 100644 --- a/docs/source/llm_recipes.md +++ b/docs/source/llm_recipes.md @@ -17,8 +17,8 @@ This document aims to publish the specific recipes we achieved for the popular L | EleutherAI/gpt-j-6b | ✔ | ✔ | ✔ | | facebook/opt-1.3b | ✔ | ✔ | ✔ | | facebook/opt-30b | ✔ | ✔ | ✔ | -| meta-llama/Llama-2-7b-hf | ✔ | ✔ | ✔ | -| meta-llama/Llama-2-13b-hf | ✔ | ✔ | ✔ | +| meta-llama/Llama-2-7b-hf | WIP | ✔ | ✔ | +| meta-llama/Llama-2-13b-hf | WIP | ✔ | ✔ | | meta-llama/Llama-2-70b-hf | ✔ | ✔ | ✔ | | tiiuae/falcon-7b | ✔ | ✔ | ✔ | | tiiuae/falcon-40b | ✔ | ✔ | ✔ | @@ -29,8 +29,8 @@ This document aims to publish the specific recipes we achieved for the popular L | databricks/dolly-v2-12b | ✖ | ✔ | ✖ | | EleutherAI/gpt-neox-20b | ✖ | ✔ | ✔ | | mistralai/Mistral-7B-v0.1 | ✖ | ✔ | ✔ | -| THUDM/chatglm2-6b | ✔ | ✔ | ✔ | -| THUDM/chatglm3-6b | WIP | ✔ | WIP | +| THUDM/chatglm2-6b | WIP | ✔ | WIP | +| THUDM/chatglm3-6b | WIP | ✔ | ✔ | **Detail recipes can be found [HERE](https://github.com/intel/intel-extension-for-transformers/blob/main/examples/huggingface/pytorch/text-generation/quantization/llm_quantization_recipes.md).** @@ -68,48 +68,48 @@ This document aims to publish the specific recipes we achieved for the popular L baichuan-inc/Baichuan-13B-Chat 67.57% - 69.07% - 1.0222 + 67.86% + 1.0043 67.55% 0.9997 - 68.12% - 1.0081 - 66.93% - 0.9905 + 67.46% + 0.9984 + N/A + N/A baichuan-inc/Baichuan2-13B-Chat 71.51% - 75.57% - 1.0568 + 75.51% + 1.0559 71.57% 1.0008 - 70.81% - 0.9902 - N/A - N/A + 71.45% + 0.9992 + 70.87% + 0.9911 baichuan-inc/Baichuan2-7B-Chat 67.67% - 68.06% - 1.0058 + 67.51% + 0.9976 67.61% 0.9991 - 67.90% - 1.0034 - N/A - N/A + 68.08% + 1.0061 + 67.18% + 0.9928 bigscience/bloom-1b7 46.34% - 47.99% - 1.0356 + 47.97% + 1.0352 46.21% 0.9972 - 46.90% - 1.0121 + 47.00% + 1.0142 N/A N/A @@ -128,14 +128,14 @@ This document aims to publish the specific recipes we achieved for the popular L EleutherAI/gpt-j-6b 68.31% + 68.00% + 0.9955 68.27% 0.9994 - 68.27% - 0.9994 - 68.35% - 1.0006 - 68.02% - 0.9958 + 68.23% + 0.9988 + 67.40% + 0.9867 EleutherAI/gpt-neox-20b @@ -144,68 +144,68 @@ This document aims to publish the specific recipes we achieved for the popular L N/A 72.29% 0.9994 - 71.74% - 0.9918 + 72.15% + 0.9975 N/A N/A facebook/opt-1.3b 57.89% - 57.68% - 0.9964 + 57.35% + 0.9907 58.12% 1.0040 - 58.26% - 1.0064 + 58.01% + 1.0021 N/A N/A facebook/opt-30b 71.49% - 71.78% - 1.0041 + 71.51% + 1.0003 71.53% 1.0006 - 71.59% - 1.0014 - 71.80% - 1.0043 + 71.82% + 1.0046 + 71.43% + 0.9992 meta-llama/Llama-2-13b-hf 76.77% - 76.25% - 0.9932 + N/A + N/A 76.89% 1.0016 - 77.66% - 1.0116 - 76.60% - 0.9978 + 76.96% + 1.0025 + N/A + N/A meta-llama/Llama-2-70b-hf 79.64% - 79.14% - 0.9937 + 79.53% + 0.9986 79.62% 0.9997 - 80.09% - 1.0057 - 79.68% - 1.0005 + 80.05% + 1.0051 + N/A + N/A meta-llama/Llama-2-7b-hf 73.92% - 73.45% - 0.9936 + N/A + N/A 73.90% 0.9997 - 73.84% - 0.9989 + 73.51% + 0.9945 N/A N/A @@ -216,22 +216,22 @@ This document aims to publish the specific recipes we achieved for the popular L N/A 75.80% 0.9987 - 76.25% - 1.0046 - 75.74% - 0.9979 + 75.37% + 0.9930 + 75.82% + 0.9989 THUDM/chatglm2-6b 53.23% - 52.86% - 0.9930 + N/A + N/A 53.00% 0.9957 - 52.90% - 0.9938 - 52.92% - 0.9942 + N/A + N/A + N/A + N/A THUDM/chatglm3-6b @@ -242,31 +242,31 @@ This document aims to publish the specific recipes we achieved for the popular L 0.9990 N/A N/A - N/A - N/A + 58.59% + 0.9915 tiiuae/falcon-40b 77.22% - 76.95% - 0.9965 + 77.26% + 1.0005 77.18% 0.9995 - 77.55% - 1.0043 - 77.82% - 1.0078 + 77.97% + 1.0097 + N/A + N/A tiiuae/falcon-7b 74.67% - 76.63% - 1.0262 + 76.17% + 1.0201 74.73% 1.0008 - 75.06% - 1.0052 - 74.00% - 0.9910 + 74.79% + 1.0016 + N/A + N/A From ee875707bd2fa96e58089d020c0ede50e8cedf3b Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Fri, 9 Aug 2024 19:47:33 +0800 Subject: [PATCH 2/3] Add 3x examples readme.md Signed-off-by: Sun, Xuehao --- README.md | 2 +- .../.config/model_params_tensorflow_3x.json | 25 +-- examples/3.x_api/README.md | 205 ++++++++++++++++++ 3 files changed, 211 insertions(+), 21 deletions(-) create mode 100644 examples/3.x_api/README.md diff --git a/README.md b/README.md index bcc9c2fcc96..d7f02d5aa02 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloade Workflow APIs LLMs Recipes - Examples + Examples diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json index 74b40ea4f5d..e2a052656f8 100644 --- a/examples/.config/model_params_tensorflow_3x.json +++ b/examples/.config/model_params_tensorflow_3x.json @@ -8,20 +8,6 @@ "batch_size": 64, "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/fp32_bert_squad.pb" }, - "distilbert_base": { - "model_src_dir": "nlp/distilbert_base/quantization/ptq", - "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset", - "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb", - "main_script": "main.py", - "batch_size": 128 - }, - "distilbert_base_sq": { - "model_src_dir": "nlp/distilbert_base/quantization/ptq", - "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset", - "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb", - "main_script": "main.py", - "batch_size": 128 - }, "opt_125m_sq": { "model_src_dir": "nlp/large_language_models/quantization/ptq/smoothquant", "dataset_location": "", @@ -97,9 +83,9 @@ "model_src_dir": "object_detection/yolo_v5/quantization/ptq", "dataset_location": "/tf_dataset2/datasets/coco_yolov5/coco", "input_model": "/tf_dataset2/models/tensorflow/yolo_v5/yolov5s.pb", - "main_script": "main.py", + "main_script": "main.py", "batch_size": 1 - }, + }, "faster_rcnn_resnet50": { "model_src_dir": "object_detection/faster_rcnn_resnet50/quantization/ptq", "dataset_location": "/tf_dataset/tensorflow/coco_val.record", @@ -125,14 +111,14 @@ "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq", "dataset_location": "/tf_dataset/tensorflow/coco_val.record", "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1/frozen_inference_graph.pb", - "main_script": "main.py", + "main_script": "main.py", "batch_size": 10 }, "ssd_mobilenet_v1_ckpt": { "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq", "dataset_location": "/tf_dataset/tensorflow/coco_val.record", "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1", - "main_script": "main.py", + "main_script": "main.py", "batch_size": 10 }, "wide_deep_large_ds": { @@ -158,5 +144,4 @@ "batch_size": 1 } } -} - +} \ No newline at end of file diff --git a/examples/3.x_api/README.md b/examples/3.x_api/README.md new file mode 100644 index 00000000000..f36d5b45c6e --- /dev/null +++ b/examples/3.x_api/README.md @@ -0,0 +1,205 @@ +# Examples + +Intel® Neural Compressor validated examples with multiple compression techniques, including quantization, pruning, knowledge distillation and orchestration. Part of the validated cases can be found in the example tables, and the release data is available [here](../docs/source/validated_model_list.md). + +# TensorFlow Examples + +## Quantization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelDomainMethodExamples
bert_large_squad_model_zooNatural Language ProcessingPost-Training Static Quantizationlink
gpt-j-6BNatural Language ProcessingPost-Training Static Quantizationlink
transformer_ltNatural Language ProcessingPost-Training Static Quantizationlink
inception_v3Image RecognitionPost-Training Static Quantizationlink
mobilenetv2Image RecognitionPost-Training Static Quantizationlink
resnetv2_50Image RecognitionPost-Training Static Quantizationlink
vgg16Image RecognitionPost-Training Static Quantizationlink
ViTImage RecognitionPost-Training Static Quantizationlink
GraphSageGraph NetworksPost-Training Static Quantizationlink
yolo_v5Object DetectionPost-Training Static Quantizationlink
faster_rcnn_resnet50Object DetectionPost-Training Static Quantizationlink
mask_rcnn_inception_v2Object DetectionPost-Training Static Quantizationlink
mask_rcnn_inception_v2_ckptObject DetectionPost-Training Static Quantizationlink
ssd_mobilenet_v1Object DetectionPost-Training Static Quantizationlink
ssd_mobilenet_v1_ckptObject DetectionPost-Training Static Quantizationlink
wide_deep_large_dsRecommendationPost-Training Static Quantizationlink
3dunet-mlperfSemantic Image SegmentationPost-Training Static Quantizationlink
style_transferStyle TransferPost-Training Static Quantizationlink
+ +# PyTorch Examples + +## Quantization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelDomainMethod Algorithm Examples
gpt_jNatural Language ProcessingWeight-Only QuantizationRTNlink
Weight-Only QuantizationGPTQlink
Static Quantization (IPEX)link
llama2_7bNatural Language ProcessingWeight-Only QuantizationGPTQlink
Static Quantization (IPEX)link
opt_125mNatural Language ProcessingStatic Quantization (IPEX)link
Weight-Only QuantizationGPTQlink
Static Quantization (PT2E)link
resnet18Image RecognitionMixed Precisionlink
Static Quantizationlink
From 02236c2d42660f0e8681af17c3f262470a74bf8b Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Fri, 9 Aug 2024 21:33:18 +0800 Subject: [PATCH 3/3] update tensorflow model list Signed-off-by: Sun, Xuehao --- examples/3.x_api/README.md | 162 +++++++++++++++---------------------- 1 file changed, 63 insertions(+), 99 deletions(-) diff --git a/examples/3.x_api/README.md b/examples/3.x_api/README.md index f36d5b45c6e..fd79f210533 100644 --- a/examples/3.x_api/README.md +++ b/examples/3.x_api/README.md @@ -2,6 +2,69 @@ Intel® Neural Compressor validated examples with multiple compression techniques, including quantization, pruning, knowledge distillation and orchestration. Part of the validated cases can be found in the example tables, and the release data is available [here](../docs/source/validated_model_list.md). + +# PyTorch Examples + +## Quantization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelDomainMethod Examples
gpt_jNatural Language ProcessingWeight-Only Quantizationlink
Static Quantization (IPEX)link
llama2_7bNatural Language ProcessingWeight-Only Quantizationlink
Static Quantization (IPEX)link
opt_125mNatural Language ProcessingStatic Quantization (IPEX)link
Static Quantization (PT2E)link
Weight-Only Quantizationlink
resnet18Image RecognitionMixed Precisionlink
Static Quantizationlink
+ + # TensorFlow Examples ## Quantization @@ -22,12 +85,6 @@ Intel® Neural Compressor validated examples with multiple compression technique Post-Training Static Quantization link - - gpt-j-6B - Natural Language Processing - Post-Training Static Quantization - link - transformer_lt Natural Language Processing @@ -88,24 +145,12 @@ Intel® Neural Compressor validated examples with multiple compression technique Post-Training Static Quantization link - - mask_rcnn_inception_v2_ckpt - Object Detection - Post-Training Static Quantization - link - ssd_mobilenet_v1 Object Detection Post-Training Static Quantization link - - ssd_mobilenet_v1_ckpt - Object Detection - Post-Training Static Quantization - link - wide_deep_large_ds Recommendation @@ -118,88 +163,7 @@ Intel® Neural Compressor validated examples with multiple compression technique Post-Training Static Quantization link - - style_transfer - Style Transfer - Post-Training Static Quantization - link - -# PyTorch Examples - -## Quantization - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ModelDomainMethod Algorithm Examples
gpt_jNatural Language ProcessingWeight-Only QuantizationRTNlink
Weight-Only QuantizationGPTQlink
Static Quantization (IPEX)link
llama2_7bNatural Language ProcessingWeight-Only QuantizationGPTQlink
Static Quantization (IPEX)link
opt_125mNatural Language ProcessingStatic Quantization (IPEX)link
Weight-Only QuantizationGPTQlink
Static Quantization (PT2E)link
resnet18Image RecognitionMixed Precisionlink
Static Quantizationlink