From 651a658fcc22bd1d3033d775d06ff9e2b426763a Mon Sep 17 00:00:00 2001 From: ZhangJianyu Date: Wed, 17 Jul 2024 16:15:22 +0800 Subject: [PATCH 1/3] mv 3x to /docs/source --- README.md | 52 ++++++++++----------- docs/{ => source}/3x/PT_DynamicQuant.md | 0 docs/{ => source}/3x/PT_MXQuant.md | 0 docs/{ => source}/3x/PT_MixedPrecision.md | 0 docs/{ => source}/3x/PT_SmoothQuant.md | 0 docs/{ => source}/3x/PT_StaticQuant.md | 0 docs/{ => source}/3x/PT_WeightOnlyQuant.md | 0 docs/{ => source}/3x/PyTorch.md | 0 docs/{ => source}/3x/TF_Quant.md | 0 docs/{ => source}/3x/TF_SQ.md | 0 docs/{ => source}/3x/TensorFlow.md | 0 docs/{ => source}/3x/autotune.md | 0 docs/{ => source}/3x/benchmark.md | 0 docs/{ => source}/3x/design.md | 0 docs/{ => source}/3x/get_started.md | 0 docs/{ => source}/3x/imgs/architecture.png | Bin docs/{ => source}/3x/imgs/data_format.png | Bin docs/{ => source}/3x/imgs/mx_workflow.png | Bin docs/{ => source}/3x/imgs/smoothquant.png | Bin docs/{ => source}/3x/imgs/sq_convert.png | Bin docs/{ => source}/3x/imgs/sq_pc.png | Bin docs/{ => source}/3x/imgs/workflow.png | Bin docs/{ => source}/3x/llm_recipes.md | 0 docs/{ => source}/3x/quantization.md | 0 24 files changed, 26 insertions(+), 26 deletions(-) rename docs/{ => source}/3x/PT_DynamicQuant.md (100%) rename docs/{ => source}/3x/PT_MXQuant.md (100%) rename docs/{ => source}/3x/PT_MixedPrecision.md (100%) rename docs/{ => source}/3x/PT_SmoothQuant.md (100%) rename docs/{ => source}/3x/PT_StaticQuant.md (100%) rename docs/{ => source}/3x/PT_WeightOnlyQuant.md (100%) rename docs/{ => source}/3x/PyTorch.md (100%) rename docs/{ => source}/3x/TF_Quant.md (100%) rename docs/{ => source}/3x/TF_SQ.md (100%) rename docs/{ => source}/3x/TensorFlow.md (100%) rename docs/{ => source}/3x/autotune.md (100%) rename docs/{ => source}/3x/benchmark.md (100%) rename docs/{ => source}/3x/design.md (100%) rename docs/{ => source}/3x/get_started.md (100%) rename docs/{ => source}/3x/imgs/architecture.png (100%) rename docs/{ => source}/3x/imgs/data_format.png (100%) rename docs/{ => source}/3x/imgs/mx_workflow.png (100%) rename docs/{ => source}/3x/imgs/smoothquant.png (100%) rename docs/{ => source}/3x/imgs/sq_convert.png (100%) rename docs/{ => source}/3x/imgs/sq_pc.png (100%) rename docs/{ => source}/3x/imgs/workflow.png (100%) rename docs/{ => source}/3x/llm_recipes.md (100%) rename docs/{ => source}/3x/quantization.md (100%) diff --git a/README.md b/README.md index 91690432918..54478d43890 100644 --- a/README.md +++ b/README.md @@ -34,21 +34,21 @@ In particular, the tool provides the key features, typical examples, and open co ```Shell pip install neural-compressor ``` -> **Note**: +> **Note**: > Further installation methods can be found under [Installation Guide](https://github.com/intel/neural-compressor/blob/master/docs/source/installation_guide.md). check out our [FAQ](https://github.com/intel/neural-compressor/blob/master/docs/source/faq.md) for more details. ## Getting Started -Setting up the environment: +Setting up the environment: ```bash pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision ``` After successfully installing these packages, try your first quantization program. ### Weight-Only Quantization (LLMs) -Following example code demonstrates Weight-Only Quantization on LLMs, it supports Intel CPU, Intel Gaudi2 AI Accelerator, Nvidia GPU, best device will be selected automatically. +Following example code demonstrates Weight-Only Quantization on LLMs, it supports Intel CPU, Intel Gaudi2 AI Accelerator, Nvidia GPU, best device will be selected automatically. -To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built). +To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built). ```bash # Run a container with an interactive shell docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest @@ -86,9 +86,9 @@ woq_conf = PostTrainingQuantConfig( ) quantized_model = fit(model=float_model, conf=woq_conf, calib_dataloader=dataloader) ``` -**Note:** +**Note:** -To try INT4 model inference, please directly use [Intel Extension for Transformers](https://github.com/intel/intel-extension-for-transformers), which leverages Intel Neural Compressor for model quantization. +To try INT4 model inference, please directly use [Intel Extension for Transformers](https://github.com/intel/intel-extension-for-transformers), which leverages Intel Neural Compressor for model quantization. ### Static Quantization (Non-LLMs) @@ -116,10 +116,10 @@ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloade - Architecture - Workflow + Architecture + Workflow APIs - LLMs Recipes + LLMs Recipes Examples @@ -130,15 +130,15 @@ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloade - Overview - Static Quantization - Dynamic Quantization - Smooth Quantization + Overview + Static Quantization + Dynamic Quantization + Smooth Quantization - Weight-Only Quantization - MX Quantization - Mixed Precision + Weight-Only Quantization + MX Quantization + Mixed Precision @@ -148,9 +148,9 @@ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloade - Overview - Static Quantization - Smooth Quantization + Overview + Static Quantization + Smooth Quantization @@ -160,24 +160,24 @@ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloade - Auto Tune - Benchmark + Auto Tune + Benchmark -> **Note**: +> **Note**: > From 3.0 release, we recommend to use 3.X API. Compression techniques during training such as QAT, Pruning, Distillation only available in [2.X API](https://github.com/intel/neural-compressor/blob/master/docs/source/2x_user_guide.md) currently. ## Selected Publications/Events -* Blog by Intel: [Neural Compressor: Boosting AI Model Efficiency](https://community.intel.com/t5/Blogs/Tech-Innovation/Artificial-Intelligence-AI/Neural-Compressor-Boosting-AI-Model-Efficiency/post/1604740) (June 2024) +* Blog by Intel: [Neural Compressor: Boosting AI Model Efficiency](https://community.intel.com/t5/Blogs/Tech-Innovation/Artificial-Intelligence-AI/Neural-Compressor-Boosting-AI-Model-Efficiency/post/1604740) (June 2024) * Blog by Intel: [Optimization of Intel AI Solutions for Alibaba Cloud’s Qwen2 Large Language Models](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-ai-solutions-accelerate-alibaba-qwen2-llms.html) (June 2024) * Blog by Intel: [Accelerate Meta* Llama 3 with Intel AI Solutions](https://www.intel.com/content/www/us/en/developer/articles/technical/accelerate-meta-llama3-with-intel-ai-solutions.html) (Apr 2024) * EMNLP'2023 (Under Review): [TEQ: Trainable Equivalent Transformation for Quantization of LLMs](https://openreview.net/forum?id=iaI8xEINAf&referrer=%5BAuthor%20Console%5D) (Sep 2023) * arXiv: [Efficient Post-training Quantization with FP8 Formats](https://arxiv.org/abs/2309.14592) (Sep 2023) * arXiv: [Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs](https://arxiv.org/abs/2309.05516) (Sep 2023) -> **Note**: +> **Note**: > View [Full Publication List](https://github.com/intel/neural-compressor/blob/master/docs/source/publication_list.md). ## Additional Content @@ -187,8 +187,8 @@ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloade * [Legal Information](./docs/source/legal_information.md) * [Security Policy](SECURITY.md) -## Communication +## Communication - [GitHub Issues](https://github.com/intel/neural-compressor/issues): mainly for bug reports, new feature requests, question asking, etc. -- [Email](mailto:inc.maintainers@intel.com): welcome to raise any interesting research ideas on model compression techniques by email for collaborations. +- [Email](mailto:inc.maintainers@intel.com): welcome to raise any interesting research ideas on model compression techniques by email for collaborations. - [Discord Channel](https://discord.com/invite/Wxk3J3ZJkU): join the discord channel for more flexible technical discussion. - [WeChat group](/docs/source/imgs/wechat_group.jpg): scan the QA code to join the technical discussion. diff --git a/docs/3x/PT_DynamicQuant.md b/docs/source/3x/PT_DynamicQuant.md similarity index 100% rename from docs/3x/PT_DynamicQuant.md rename to docs/source/3x/PT_DynamicQuant.md diff --git a/docs/3x/PT_MXQuant.md b/docs/source/3x/PT_MXQuant.md similarity index 100% rename from docs/3x/PT_MXQuant.md rename to docs/source/3x/PT_MXQuant.md diff --git a/docs/3x/PT_MixedPrecision.md b/docs/source/3x/PT_MixedPrecision.md similarity index 100% rename from docs/3x/PT_MixedPrecision.md rename to docs/source/3x/PT_MixedPrecision.md diff --git a/docs/3x/PT_SmoothQuant.md b/docs/source/3x/PT_SmoothQuant.md similarity index 100% rename from docs/3x/PT_SmoothQuant.md rename to docs/source/3x/PT_SmoothQuant.md diff --git a/docs/3x/PT_StaticQuant.md b/docs/source/3x/PT_StaticQuant.md similarity index 100% rename from docs/3x/PT_StaticQuant.md rename to docs/source/3x/PT_StaticQuant.md diff --git a/docs/3x/PT_WeightOnlyQuant.md b/docs/source/3x/PT_WeightOnlyQuant.md similarity index 100% rename from docs/3x/PT_WeightOnlyQuant.md rename to docs/source/3x/PT_WeightOnlyQuant.md diff --git a/docs/3x/PyTorch.md b/docs/source/3x/PyTorch.md similarity index 100% rename from docs/3x/PyTorch.md rename to docs/source/3x/PyTorch.md diff --git a/docs/3x/TF_Quant.md b/docs/source/3x/TF_Quant.md similarity index 100% rename from docs/3x/TF_Quant.md rename to docs/source/3x/TF_Quant.md diff --git a/docs/3x/TF_SQ.md b/docs/source/3x/TF_SQ.md similarity index 100% rename from docs/3x/TF_SQ.md rename to docs/source/3x/TF_SQ.md diff --git a/docs/3x/TensorFlow.md b/docs/source/3x/TensorFlow.md similarity index 100% rename from docs/3x/TensorFlow.md rename to docs/source/3x/TensorFlow.md diff --git a/docs/3x/autotune.md b/docs/source/3x/autotune.md similarity index 100% rename from docs/3x/autotune.md rename to docs/source/3x/autotune.md diff --git a/docs/3x/benchmark.md b/docs/source/3x/benchmark.md similarity index 100% rename from docs/3x/benchmark.md rename to docs/source/3x/benchmark.md diff --git a/docs/3x/design.md b/docs/source/3x/design.md similarity index 100% rename from docs/3x/design.md rename to docs/source/3x/design.md diff --git a/docs/3x/get_started.md b/docs/source/3x/get_started.md similarity index 100% rename from docs/3x/get_started.md rename to docs/source/3x/get_started.md diff --git a/docs/3x/imgs/architecture.png b/docs/source/3x/imgs/architecture.png similarity index 100% rename from docs/3x/imgs/architecture.png rename to docs/source/3x/imgs/architecture.png diff --git a/docs/3x/imgs/data_format.png b/docs/source/3x/imgs/data_format.png similarity index 100% rename from docs/3x/imgs/data_format.png rename to docs/source/3x/imgs/data_format.png diff --git a/docs/3x/imgs/mx_workflow.png b/docs/source/3x/imgs/mx_workflow.png similarity index 100% rename from docs/3x/imgs/mx_workflow.png rename to docs/source/3x/imgs/mx_workflow.png diff --git a/docs/3x/imgs/smoothquant.png b/docs/source/3x/imgs/smoothquant.png similarity index 100% rename from docs/3x/imgs/smoothquant.png rename to docs/source/3x/imgs/smoothquant.png diff --git a/docs/3x/imgs/sq_convert.png b/docs/source/3x/imgs/sq_convert.png similarity index 100% rename from docs/3x/imgs/sq_convert.png rename to docs/source/3x/imgs/sq_convert.png diff --git a/docs/3x/imgs/sq_pc.png b/docs/source/3x/imgs/sq_pc.png similarity index 100% rename from docs/3x/imgs/sq_pc.png rename to docs/source/3x/imgs/sq_pc.png diff --git a/docs/3x/imgs/workflow.png b/docs/source/3x/imgs/workflow.png similarity index 100% rename from docs/3x/imgs/workflow.png rename to docs/source/3x/imgs/workflow.png diff --git a/docs/3x/llm_recipes.md b/docs/source/3x/llm_recipes.md similarity index 100% rename from docs/3x/llm_recipes.md rename to docs/source/3x/llm_recipes.md diff --git a/docs/3x/quantization.md b/docs/source/3x/quantization.md similarity index 100% rename from docs/3x/quantization.md rename to docs/source/3x/quantization.md From 74fdd2b4812f0a06b3553a8ca4a76b42f4be7046 Mon Sep 17 00:00:00 2001 From: ZhangJianyu Date: Thu, 18 Jul 2024 20:51:38 +0800 Subject: [PATCH 2/3] add api3.0, covert get_started by 3x, add comments to the py files for api doc --- docs/build_docs/build.sh | 14 ++- docs/source/3x/get_started.md | 88 ----------------- docs/source/api-doc/api_2.rst | 29 ++++++ docs/source/api-doc/api_3.rst | 27 ++++++ docs/source/api-doc/apis.rst | 21 +--- .../api-doc/tf_quantization_autotune.rst | 6 ++ .../source/api-doc/tf_quantization_common.rst | 7 ++ .../source/api-doc/tf_quantization_config.rst | 6 ++ .../api-doc/torch_quantization_autotune.rst | 6 ++ .../api-doc/torch_quantization_common.rst | 7 ++ .../api-doc/torch_quantization_config.rst | 6 ++ docs/source/get_started.md | 96 ++++++++++++++----- neural_compressor/tensorflow/__init__.py | 1 + .../tensorflow/quantization/__init__.py | 2 + .../tensorflow/quantization/autotune.py | 2 + .../tensorflow/quantization/config.py | 2 + .../tensorflow/quantization/quantize.py | 2 + neural_compressor/torch/__init__.py | 1 + .../torch/quantization/__init__.py | 1 + .../torch/quantization/autotune.py | 2 + .../torch/quantization/config.py | 2 + .../torch/quantization/quantize.py | 1 + 22 files changed, 195 insertions(+), 134 deletions(-) delete mode 100644 docs/source/3x/get_started.md create mode 100644 docs/source/api-doc/api_2.rst create mode 100644 docs/source/api-doc/api_3.rst create mode 100644 docs/source/api-doc/tf_quantization_autotune.rst create mode 100644 docs/source/api-doc/tf_quantization_common.rst create mode 100644 docs/source/api-doc/tf_quantization_config.rst create mode 100644 docs/source/api-doc/torch_quantization_autotune.rst create mode 100644 docs/source/api-doc/torch_quantization_common.rst create mode 100644 docs/source/api-doc/torch_quantization_config.rst diff --git a/docs/build_docs/build.sh b/docs/build_docs/build.sh index fac266b3872..d533938759c 100755 --- a/docs/build_docs/build.sh +++ b/docs/build_docs/build.sh @@ -84,6 +84,7 @@ cp -rf ../docs/ ./source cp -f "../README.md" "./source/docs/source/Welcome.md" cp -f "../SECURITY.md" "./source/docs/source/SECURITY.md" + all_md_files=`find ./source/docs -name "*.md"` for md_file in ${all_md_files} do @@ -91,10 +92,10 @@ do done -sed -i 's/.\/docs\/source\/_static/./g' ./source/docs/source/Welcome.md ./source/docs/source/user_guide.md -sed -i 's/.md/.html/g; s/.\/docs\/source\//.\//g' ./source/docs/source/Welcome.md ./source/docs/source/user_guide.md -sed -i 's/\/examples\/README.html/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/README.md/g' ./source/docs/source/user_guide.md -sed -i 's/https\:\/\/intel.github.io\/neural-compressor\/lates.\/api-doc\/apis.html/https\:\/\/intel.github.io\/neural-compressor\/latest\/docs\/source\/api-doc\/apis.html/g' ./source/docs/source/Welcome.md ./source/docs/source/user_guide.md +# sed -i 's/.\/docs\/source\/_static/./g' ./source/docs/source/Welcome.md ./source/docs/source/user_guide.md +#sed -i 's/.md/.html/g; s/.\/docs\/source\//.\//g' ./source/docs/source/Welcome.md ./source/docs/source/user_guide.md +#sed -i 's/\/examples\/README.html/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/README.md/g' ./source/docs/source/user_guide.md +#sed -i 's/https\:\/\/intel.github.io\/neural-compressor\/lates.\/api-doc\/apis.html/https\:\/\/intel.github.io\/neural-compressor\/latest\/docs\/source\/api-doc\/apis.html/g' ./source/docs/source/Welcome.md ./source/docs/source/user_guide.md sed -i 's/examples\/README.html/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/README.md/g' ./source/docs/source/Welcome.md @@ -130,6 +131,8 @@ if [[ ${UPDATE_VERSION_FOLDER} -eq 1 ]]; then cp -r ${SRC_FOLDER}/* ${DST_FOLDER} python update_html.py ${DST_FOLDER} ${VERSION} cp -r ./source/docs/source/imgs ${DST_FOLDER}/docs/source + cp -r ./source/docs/source/3x/imgs ${DST_FOLDER}/docs/source/3x + cp source/_static/index.html ${DST_FOLDER} else @@ -143,6 +146,7 @@ if [[ ${UPDATE_LATEST_FOLDER} -eq 1 ]]; then cp -r ${SRC_FOLDER}/* ${LATEST_FOLDER} python update_html.py ${LATEST_FOLDER} ${VERSION} cp -r ./source/docs/source/imgs ${LATEST_FOLDER}/docs/source + cp -r ./source/docs/source/3x/imgs ${LATEST_FOLDER}/docs/source/3x cp source/_static/index.html ${LATEST_FOLDER} else echo "skip to create ${LATEST_FOLDER}" @@ -152,7 +156,7 @@ echo "Create document is done" if [[ ${CHECKOUT_GH_PAGES} -eq 1 ]]; then git clone -b gh-pages --single-branch https://github.com/intel/neural-compressor.git ${RELEASE_FOLDER} - + if [[ ${UPDATE_VERSION_FOLDER} -eq 1 ]]; then python update_version.py ${ROOT_DST_FOLDER} ${VERSION} cp -rf ${DST_FOLDER} ${RELEASE_FOLDER} diff --git a/docs/source/3x/get_started.md b/docs/source/3x/get_started.md deleted file mode 100644 index 76a43c60924..00000000000 --- a/docs/source/3x/get_started.md +++ /dev/null @@ -1,88 +0,0 @@ -# Getting Started - -1. [Quick Samples](#quick-samples) - -2. [Feature Matrix](#feature-matrix) - -## Quick Samples - -```shell -# Install Intel Neural Compressor -pip install neural-compressor-pt -``` -```python -from transformers import AutoModelForCausalLM -from neural_compressor.torch.quantization import RTNConfig, prepare, convert - -user_model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-125m") -quant_config = RTNConfig() -prepared_model = prepare(model=user_model, quant_config=quant_config) -quantized_model = convert(model=prepared_model) -``` - -## Feature Matrix -Intel Neural Compressor 3.X extends PyTorch and TensorFlow's APIs to support compression techniques. -The below table provides a quick overview of the APIs available in Intel Neural Compressor 3.X. -The Intel Neural Compressor 3.X mainly focuses on quantization-related features, especially for algorithms that benefit LLM accuracy and inference. -It also provides some common modules across different frameworks. For example, Auto-tune support accuracy driven quantization and mixed precision, benchmark aimed to measure the multiple instances performance of the quantized model. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Overview
ArchitectureWorkflowAPIsLLMs RecipesExamples
PyTorch Extension APIs
OverviewStatic QuantizationDynamic QuantizationSmooth Quantization
Weight-Only QuantizationMX QuantizationMixed Precision
Tensorflow Extension APIs
OverviewStatic QuantizationSmooth Quantization
Other Modules
Auto TuneBenchmark
- -> **Note**: -> From 3.0 release, we recommend to use 3.X API. Compression techniques during training such as QAT, Pruning, Distillation only available in [2.X API](https://github.com/intel/neural-compressor/blob/master/docs/source/2x_user_guide.md) currently. diff --git a/docs/source/api-doc/api_2.rst b/docs/source/api-doc/api_2.rst new file mode 100644 index 00000000000..b5528a0426a --- /dev/null +++ b/docs/source/api-doc/api_2.rst @@ -0,0 +1,29 @@ +2.0 API +#### + +**User facing APIs:** + +.. toctree:: + :maxdepth: 1 + + quantization.rst + mix_precision.rst + training.rst + benchmark.rst + config.rst + objective.rst + + +**Advanced APIs:** + +.. toctree:: + :maxdepth: 1 + + compression.rst + strategy.rst + model.rst + +**API document example:** + +.. toctree:: + api_doc_example.rst diff --git a/docs/source/api-doc/api_3.rst b/docs/source/api-doc/api_3.rst new file mode 100644 index 00000000000..7c01e073f0b --- /dev/null +++ b/docs/source/api-doc/api_3.rst @@ -0,0 +1,27 @@ +3.0 API +#### + +**PyTorch Extension API:** + +.. toctree:: + :maxdepth: 1 + + torch_quantization_common.rst + torch_quantization_config.rst + torch_quantization_autotune.rst + +**Tensorflow Extension API:** + +.. toctree:: + :maxdepth: 1 + + tf_quantization_common.rst + tf_quantization_config.rst + tf_quantization_autotune.rst + +**Other Modules:** + +.. toctree:: + :maxdepth: 1 + + benchmark.rst diff --git a/docs/source/api-doc/apis.rst b/docs/source/api-doc/apis.rst index 63d8f2f5ca8..8f5f2111157 100644 --- a/docs/source/api-doc/apis.rst +++ b/docs/source/api-doc/apis.rst @@ -1,29 +1,12 @@ APIs #### -**User facing APIs:** - .. toctree:: :maxdepth: 1 - quantization.rst - mix_precision.rst - training.rst - benchmark.rst - config.rst - objective.rst - - -**Advanced APIs:** + api_3.rst .. toctree:: :maxdepth: 1 - compression.rst - strategy.rst - model.rst - -**API document example:** - -.. toctree:: - api_doc_example.rst + api_2.rst \ No newline at end of file diff --git a/docs/source/api-doc/tf_quantization_autotune.rst b/docs/source/api-doc/tf_quantization_autotune.rst new file mode 100644 index 00000000000..241b7e42c77 --- /dev/null +++ b/docs/source/api-doc/tf_quantization_autotune.rst @@ -0,0 +1,6 @@ +Tensorflow Quantization AutoTune +============ + +.. autoapisummary:: + + neural_compressor.tensorflow.quantization.autotune diff --git a/docs/source/api-doc/tf_quantization_common.rst b/docs/source/api-doc/tf_quantization_common.rst new file mode 100644 index 00000000000..7542b4a7c63 --- /dev/null +++ b/docs/source/api-doc/tf_quantization_common.rst @@ -0,0 +1,7 @@ +Tensorflow Quantization Base API +################################# + +.. autoapisummary:: + + neural_compressor.tensorflow.quantization.quantize + diff --git a/docs/source/api-doc/tf_quantization_config.rst b/docs/source/api-doc/tf_quantization_config.rst new file mode 100644 index 00000000000..4f5c757c31c --- /dev/null +++ b/docs/source/api-doc/tf_quantization_config.rst @@ -0,0 +1,6 @@ +Tensorflow Quantization Config +============ + +.. autoapisummary:: + + neural_compressor.tensorflow.quantization.config diff --git a/docs/source/api-doc/torch_quantization_autotune.rst b/docs/source/api-doc/torch_quantization_autotune.rst new file mode 100644 index 00000000000..3466ead4a09 --- /dev/null +++ b/docs/source/api-doc/torch_quantization_autotune.rst @@ -0,0 +1,6 @@ +Pytorch Quantization AutoTune +============ + +.. autoapisummary:: + + neural_compressor.torch.quantization.autotune diff --git a/docs/source/api-doc/torch_quantization_common.rst b/docs/source/api-doc/torch_quantization_common.rst new file mode 100644 index 00000000000..b4e53d2ef35 --- /dev/null +++ b/docs/source/api-doc/torch_quantization_common.rst @@ -0,0 +1,7 @@ +Pytorch Quantization Base API +################################# + +.. autoapisummary:: + + neural_compressor.torch.quantization.quantize + diff --git a/docs/source/api-doc/torch_quantization_config.rst b/docs/source/api-doc/torch_quantization_config.rst new file mode 100644 index 00000000000..cc60be355d6 --- /dev/null +++ b/docs/source/api-doc/torch_quantization_config.rst @@ -0,0 +1,6 @@ +Pytorch Quantization Config +============ + +.. autoapisummary:: + + neural_compressor.torch.quantization.config diff --git a/docs/source/get_started.md b/docs/source/get_started.md index 61c22912c41..0ba1e10d111 100644 --- a/docs/source/get_started.md +++ b/docs/source/get_started.md @@ -2,35 +2,87 @@ 1. [Quick Samples](#quick-samples) -2. [Validated Models](#validated-models) +2. [Feature Matrix](#feature-matrix) ## Quick Samples -### Quantization with Python API ```shell -# Install Intel Neural Compressor and TensorFlow -pip install neural-compressor -pip install tensorflow -# Prepare fp32 model -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb +# Install Intel Neural Compressor +pip install neural-compressor-pt ``` ```python -from neural_compressor.data import DataLoader, Datasets -from neural_compressor.config import PostTrainingQuantConfig +from transformers import AutoModelForCausalLM +from neural_compressor.torch.quantization import RTNConfig, prepare, convert -dataset = Datasets("tensorflow")["dummy"](shape=(1, 224, 224, 3)) -dataloader = DataLoader(framework="tensorflow", dataset=dataset) +user_model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-125m") +quant_config = RTNConfig() +prepared_model = prepare(model=user_model, quant_config=quant_config) +quantized_model = convert(model=prepared_model) +``` -from neural_compressor.quantization import fit +## Feature Matrix +Intel Neural Compressor 3.X extends PyTorch and TensorFlow's APIs to support compression techniques. +The below table provides a quick overview of the APIs available in Intel Neural Compressor 3.X. +The Intel Neural Compressor 3.X mainly focuses on quantization-related features, especially for algorithms that benefit LLM accuracy and inference. +It also provides some common modules across different frameworks. For example, Auto-tune support accuracy driven quantization and mixed precision, benchmark aimed to measure the multiple instances performance of the quantized model. -q_model = fit( - model="./mobilenet_v1_1.0_224_frozen.pb", - conf=PostTrainingQuantConfig(), - calib_dataloader=dataloader, -) -``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Overview
ArchitectureWorkflowAPIsLLMs RecipesExamples
PyTorch Extension APIs
OverviewStatic QuantizationDynamic QuantizationSmooth Quantization
Weight-Only QuantizationMX QuantizationMixed Precision
Tensorflow Extension APIs
OverviewStatic QuantizationSmooth Quantization
Other Modules
Auto TuneBenchmark
-## Validated Models -Intel® Neural Compressor validated the quantization for 10K+ models from popular model hubs (e.g., HuggingFace Transformers, Torchvision, TensorFlow Model Hub, ONNX Model Zoo). -Over 30 pruning, knowledge distillation and model export samples are also available. -More details for validated typical models are available [here](/examples/README.md). +> **Note**: +> From 3.0 release, we recommend to use 3.X API. Compression techniques during training such as QAT, Pruning, Distillation only available in [2.X API](https://github.com/intel/neural-compressor/blob/master/docs/source/2x_user_guide.md) currently. diff --git a/neural_compressor/tensorflow/__init__.py b/neural_compressor/tensorflow/__init__.py index 678a02c83ba..738a4baeab8 100644 --- a/neural_compressor/tensorflow/__init__.py +++ b/neural_compressor/tensorflow/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Intel Neural Compressor Tensorflow API""" from neural_compressor.tensorflow.utils import register_algo, Model from neural_compressor.tensorflow.quantization import ( diff --git a/neural_compressor/tensorflow/quantization/__init__.py b/neural_compressor/tensorflow/quantization/__init__.py index e9b0f25ffa4..a7f9cb369b9 100644 --- a/neural_compressor/tensorflow/quantization/__init__.py +++ b/neural_compressor/tensorflow/quantization/__init__.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Intel Neural Compressor Tensorflow quantization API""" + from neural_compressor.tensorflow.quantization.quantize import quantize_model from neural_compressor.tensorflow.quantization.autotune import autotune, get_all_config_set, TuningConfig diff --git a/neural_compressor/tensorflow/quantization/autotune.py b/neural_compressor/tensorflow/quantization/autotune.py index 55b089b923c..5782b11c3e5 100644 --- a/neural_compressor/tensorflow/quantization/autotune.py +++ b/neural_compressor/tensorflow/quantization/autotune.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Intel Neural Compressor Tensorflow quantization AutoTune API""" + from copy import deepcopy from typing import Any, Callable, Dict, List, Optional, Tuple, Union diff --git a/neural_compressor/tensorflow/quantization/config.py b/neural_compressor/tensorflow/quantization/config.py index 752f8d4ecbe..b4bc5f6df11 100644 --- a/neural_compressor/tensorflow/quantization/config.py +++ b/neural_compressor/tensorflow/quantization/config.py @@ -14,6 +14,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Intel Neural Compressor Pytorch quantization config API""" + from __future__ import annotations diff --git a/neural_compressor/tensorflow/quantization/quantize.py b/neural_compressor/tensorflow/quantization/quantize.py index fa613759515..70487313986 100644 --- a/neural_compressor/tensorflow/quantization/quantize.py +++ b/neural_compressor/tensorflow/quantization/quantize.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Intel Neural Compressor Tensorflow quantization base API""" + from typing import Any, Callable, Dict, Tuple, Union diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py index 5024997fd6d..0aae49fff10 100644 --- a/neural_compressor/torch/__init__.py +++ b/neural_compressor/torch/__init__.py @@ -11,4 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Intel Neural Compressor Pytorch API""" from .utils import load_empty_model diff --git a/neural_compressor/torch/quantization/__init__.py b/neural_compressor/torch/quantization/__init__.py index 3bc12580848..a01f2a51365 100644 --- a/neural_compressor/torch/quantization/__init__.py +++ b/neural_compressor/torch/quantization/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Intel Neural Compressor Pytorch quantization API""" from neural_compressor.torch.quantization.quantize import quantize, prepare, convert from neural_compressor.torch.quantization.config import ( diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py index 79a23aef97a..1b20f1c94fc 100644 --- a/neural_compressor/torch/quantization/autotune.py +++ b/neural_compressor/torch/quantization/autotune.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Intel Neural Compressor Pytorch quantization AutoTune API""" + from copy import deepcopy from typing import Callable, List, Optional, Union diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 49238ec2ee5..11c6dd6f447 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # pylint:disable=import-error +"""Intel Neural Compressor Pytorch quantization config API""" + from collections import OrderedDict from typing import Callable, Dict, List, NamedTuple, Optional diff --git a/neural_compressor/torch/quantization/quantize.py b/neural_compressor/torch/quantization/quantize.py index bc3020a942c..1bbae4ae535 100644 --- a/neural_compressor/torch/quantization/quantize.py +++ b/neural_compressor/torch/quantization/quantize.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Intel Neural Compressor Pytorch quantization base API""" import copy from typing import Any, Callable, Dict, Tuple From 3531a786ce9774ae39628a214f7b5edcbbcd935d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Jul 2024 05:39:44 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/source/api-doc/apis.rst | 2 +- docs/source/api-doc/tf_quantization_common.rst | 1 - docs/source/api-doc/torch_quantization_common.rst | 1 - neural_compressor/tensorflow/__init__.py | 2 +- neural_compressor/tensorflow/quantization/__init__.py | 2 +- neural_compressor/tensorflow/quantization/autotune.py | 2 +- neural_compressor/tensorflow/quantization/config.py | 2 +- neural_compressor/tensorflow/quantization/quantize.py | 2 +- neural_compressor/torch/__init__.py | 2 +- neural_compressor/torch/quantization/__init__.py | 2 +- neural_compressor/torch/quantization/autotune.py | 2 +- neural_compressor/torch/quantization/config.py | 2 +- neural_compressor/torch/quantization/quantize.py | 2 +- 13 files changed, 11 insertions(+), 13 deletions(-) diff --git a/docs/source/api-doc/apis.rst b/docs/source/api-doc/apis.rst index 8f5f2111157..15f92f83501 100644 --- a/docs/source/api-doc/apis.rst +++ b/docs/source/api-doc/apis.rst @@ -9,4 +9,4 @@ APIs .. toctree:: :maxdepth: 1 - api_2.rst \ No newline at end of file + api_2.rst diff --git a/docs/source/api-doc/tf_quantization_common.rst b/docs/source/api-doc/tf_quantization_common.rst index 7542b4a7c63..3b39d2c79cb 100644 --- a/docs/source/api-doc/tf_quantization_common.rst +++ b/docs/source/api-doc/tf_quantization_common.rst @@ -4,4 +4,3 @@ Tensorflow Quantization Base API .. autoapisummary:: neural_compressor.tensorflow.quantization.quantize - diff --git a/docs/source/api-doc/torch_quantization_common.rst b/docs/source/api-doc/torch_quantization_common.rst index b4e53d2ef35..d2ad03b933d 100644 --- a/docs/source/api-doc/torch_quantization_common.rst +++ b/docs/source/api-doc/torch_quantization_common.rst @@ -4,4 +4,3 @@ Pytorch Quantization Base API .. autoapisummary:: neural_compressor.torch.quantization.quantize - diff --git a/neural_compressor/tensorflow/__init__.py b/neural_compressor/tensorflow/__init__.py index 738a4baeab8..c40489b0bb0 100644 --- a/neural_compressor/tensorflow/__init__.py +++ b/neural_compressor/tensorflow/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Intel Neural Compressor Tensorflow API""" +"""Intel Neural Compressor Tensorflow API.""" from neural_compressor.tensorflow.utils import register_algo, Model from neural_compressor.tensorflow.quantization import ( diff --git a/neural_compressor/tensorflow/quantization/__init__.py b/neural_compressor/tensorflow/quantization/__init__.py index a7f9cb369b9..4457027e8ff 100644 --- a/neural_compressor/tensorflow/quantization/__init__.py +++ b/neural_compressor/tensorflow/quantization/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Intel Neural Compressor Tensorflow quantization API""" +"""Intel Neural Compressor Tensorflow quantization API.""" from neural_compressor.tensorflow.quantization.quantize import quantize_model diff --git a/neural_compressor/tensorflow/quantization/autotune.py b/neural_compressor/tensorflow/quantization/autotune.py index 0fc355c49cf..8dd051b4d38 100644 --- a/neural_compressor/tensorflow/quantization/autotune.py +++ b/neural_compressor/tensorflow/quantization/autotune.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Intel Neural Compressor Tensorflow quantization AutoTune API""" +"""Intel Neural Compressor Tensorflow quantization AutoTune API.""" from copy import deepcopy diff --git a/neural_compressor/tensorflow/quantization/config.py b/neural_compressor/tensorflow/quantization/config.py index b4bc5f6df11..738cc61f95a 100644 --- a/neural_compressor/tensorflow/quantization/config.py +++ b/neural_compressor/tensorflow/quantization/config.py @@ -14,7 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Intel Neural Compressor Pytorch quantization config API""" +"""Intel Neural Compressor Pytorch quantization config API.""" from __future__ import annotations diff --git a/neural_compressor/tensorflow/quantization/quantize.py b/neural_compressor/tensorflow/quantization/quantize.py index 17a44ea455e..5a712202dff 100644 --- a/neural_compressor/tensorflow/quantization/quantize.py +++ b/neural_compressor/tensorflow/quantization/quantize.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Intel Neural Compressor Tensorflow quantization base API""" +"""Intel Neural Compressor Tensorflow quantization base API.""" from typing import Any, Callable, Dict, Tuple, Union diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py index 0aae49fff10..d22aebd52c4 100644 --- a/neural_compressor/torch/__init__.py +++ b/neural_compressor/torch/__init__.py @@ -11,5 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Intel Neural Compressor Pytorch API""" +"""Intel Neural Compressor Pytorch API.""" from .utils import load_empty_model diff --git a/neural_compressor/torch/quantization/__init__.py b/neural_compressor/torch/quantization/__init__.py index a01f2a51365..6c404afba0b 100644 --- a/neural_compressor/torch/quantization/__init__.py +++ b/neural_compressor/torch/quantization/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Intel Neural Compressor Pytorch quantization API""" +"""Intel Neural Compressor Pytorch quantization API.""" from neural_compressor.torch.quantization.quantize import quantize, prepare, convert from neural_compressor.torch.quantization.config import ( diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py index 1b20f1c94fc..7a53b54b0d5 100644 --- a/neural_compressor/torch/quantization/autotune.py +++ b/neural_compressor/torch/quantization/autotune.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Intel Neural Compressor Pytorch quantization AutoTune API""" +"""Intel Neural Compressor Pytorch quantization AutoTune API.""" from copy import deepcopy diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 3d9ec5424b9..9804231b39c 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # pylint:disable=import-error -"""Intel Neural Compressor Pytorch quantization config API""" +"""Intel Neural Compressor Pytorch quantization config API.""" from collections import OrderedDict diff --git a/neural_compressor/torch/quantization/quantize.py b/neural_compressor/torch/quantization/quantize.py index 1bbae4ae535..85e73d47078 100644 --- a/neural_compressor/torch/quantization/quantize.py +++ b/neural_compressor/torch/quantization/quantize.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Intel Neural Compressor Pytorch quantization base API""" +"""Intel Neural Compressor Pytorch quantization base API.""" import copy from typing import Any, Callable, Dict, Tuple