From ab3d43f0f40a3b0922e6960318446b48de858254 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Tue, 6 Jun 2023 21:13:38 +0800 Subject: [PATCH 01/23] Support SmoothQuant Signed-off-by: Mengni Wang --- .../python/tools/quantization/quantize.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index 40f9a3630bea0..26371b475d17d 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -144,6 +144,17 @@ def __init__( a DeQuantizeLinear node. If False, it remains floating-point bias and does not insert any quantization nodes associated with biases. This extra option is only effective when quant_format is QuantFormat.QDQ. + SmoothQuant = True/False : + Default is False. If enabled, SmoothQuant algorithm will be applied before quantization to do + fake input channel quantization. + SmoothQuantAlpha = 0.5 : + Default is 0.5. Support 'auto' or a float value, 'auto' means automatic tuning. It only works + if SmoothQuant is True. It controls the difficulty of weight and activation quantization. A larger + alpha value could be used on models with more significant activation outliers to migrate more + quantization difficulty to weights. + SmoothQuantFolding = True/False : + Default is True. It only works if SmoothQuant is True. If enabled, inserted Mul ops during + SmoothQuant will be folded into the previous op if the previous op is foldable. execution_provider : A enum indicates the Execution Provider such as: CPU, TRT, NNAPI, SNE, etc. Raises: ValueError: Raise ValueError if execution provider is unknown @@ -330,6 +341,17 @@ def quantize_static( Default is 0.01. Constant smoothing factor to use when computing the moving average of the minimum and maximum values. Effective only when the calibration method selected is MinMax and when CalibMovingAverage is set to True. + SmoothQuant = True/False : + Default is False. If enabled, SmoothQuant algorithm will be applied before quantization to do + fake input channel quantization. + SmoothQuantAlpha = 0.5 : + Default is 0.5. Support 'auto' or a float value, 'auto' means automatic tuning. It only works + if SmoothQuant is True. It controls the difficulty of weight and activation quantization. A larger + alpha value could be used on models with more significant activation outliers to migrate more + quantization difficulty to weights. + SmoothQuantFolding = True/False : + Default is True. It only works if SmoothQuant is True. If enabled, inserted Mul ops during + SmoothQuant will be folded into the previous op if the previous op is foldable. """ extra_options = extra_options or {} @@ -362,6 +384,18 @@ def quantize_static( key: extra_options.get(name) for (name, key) in calib_extra_options_keys if name in extra_options } + if extra_options.get('SmoothQuant', False): + from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant + import copy + orig_nodes = [i.name for i in model.graph.node] + def dataloader(): + inc_dataloader = copy.deepcopy(calibration_data_reader) + for data in inc_dataloader: + yield data, None + sq = ORTSmoothQuant(model, dataloader(), reduce_range) + model = sq.transform(extra_options.get('SmoothQuantAlpha', 0.5), extra_options.get('SmoothQuantFolding', True)).model + nodes_to_exclude.extend([i.name for i in model.graph.node if i.name not in orig_nodes]) + with tempfile.TemporaryDirectory(prefix="ort.quant.") as quant_tmp_dir: calibrator = create_calibrator( model, From 8eb35200ccf191aed1a1f083e4a5871493944559 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Wed, 7 Jun 2023 22:46:57 +0800 Subject: [PATCH 02/23] add ut and dependence Signed-off-by: Mengni Wang --- .../python/tools/quantization/quantize.py | 18 ++++++++---------- .../quantization/test_quantize_static.py | 12 ++++++++++++ requirements.txt.in | 1 + 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index 26371b475d17d..03f76c899a4e5 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -147,11 +147,10 @@ def __init__( SmoothQuant = True/False : Default is False. If enabled, SmoothQuant algorithm will be applied before quantization to do fake input channel quantization. - SmoothQuantAlpha = 0.5 : - Default is 0.5. Support 'auto' or a float value, 'auto' means automatic tuning. It only works - if SmoothQuant is True. It controls the difficulty of weight and activation quantization. A larger - alpha value could be used on models with more significant activation outliers to migrate more - quantization difficulty to weights. + SmoothQuantAlpha = float : + Default is 0.5. It only works if SmoothQuant is True. It controls the difficulty of weight + and activation quantization. A larger alpha value could be used on models with more significant + activation outliers to migrate more quantization difficulty to weights. SmoothQuantFolding = True/False : Default is True. It only works if SmoothQuant is True. If enabled, inserted Mul ops during SmoothQuant will be folded into the previous op if the previous op is foldable. @@ -344,11 +343,10 @@ def quantize_static( SmoothQuant = True/False : Default is False. If enabled, SmoothQuant algorithm will be applied before quantization to do fake input channel quantization. - SmoothQuantAlpha = 0.5 : - Default is 0.5. Support 'auto' or a float value, 'auto' means automatic tuning. It only works - if SmoothQuant is True. It controls the difficulty of weight and activation quantization. A larger - alpha value could be used on models with more significant activation outliers to migrate more - quantization difficulty to weights. + SmoothQuantAlpha = float : + Default is 0.5. It only works if SmoothQuant is True. It controls the difficulty of weight + and activation quantization. A larger alpha value could be used on models with more significant + activation outliers to migrate more quantization difficulty to weights. SmoothQuantFolding = True/False : Default is True. It only works if SmoothQuant is True. If enabled, inserted Mul ops during SmoothQuant will be folded into the previous op if the previous op is foldable. diff --git a/onnxruntime/test/python/quantization/test_quantize_static.py b/onnxruntime/test/python/quantization/test_quantize_static.py index 1fb7ad2e9efa4..1fc20e2531e41 100644 --- a/onnxruntime/test/python/quantization/test_quantize_static.py +++ b/onnxruntime/test/python/quantization/test_quantize_static.py @@ -98,6 +98,18 @@ def test_static_quant_config(self): check_model_correctness(self, self._model_fp32_path, quant_model_path, data_reader.get_next()) data_reader.rewind() + def test_smooth_quant(self): + data_reader = InputFeedsNegOneZeroOne(10, {"input": [1, self._channel_size, 1, 3]}) + quant_config = StaticQuantConfig(data_reader, extra_options={'SmoothQuant': True}) + quant_model_path = str(Path(self._tmp_model_dir.name) / "quant.config.onnx") + quantize(self._model_fp32_path, quant_model_path, quant_config) + + data_reader.rewind() + check_model_correctness(self, self._model_fp32_path, quant_model_path, data_reader.get_next()) + data_reader.rewind() + + model = onnx.load(quant_model_path) + self.assertTrue('Mul' in [i.op_type for i in model.graph.node]) if __name__ == "__main__": unittest.main() diff --git a/requirements.txt.in b/requirements.txt.in index 89242061fb119..372d82429dc7f 100644 --- a/requirements.txt.in +++ b/requirements.txt.in @@ -4,3 +4,4 @@ numpy >= @Python_NumPy_VERSION@ packaging protobuf sympy +neural-compressor From c1ccdd505a29655a0b3b0ac9284e9546b8298c69 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 8 Jun 2023 09:15:56 +0800 Subject: [PATCH 03/23] fix python format Signed-off-by: Mengni Wang --- onnxruntime/python/tools/quantization/quantize.py | 8 +++++--- .../test/python/quantization/test_quantize_static.py | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index 03f76c899a4e5..cbd3feb718400 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -382,16 +382,18 @@ def quantize_static( key: extra_options.get(name) for (name, key) in calib_extra_options_keys if name in extra_options } - if extra_options.get('SmoothQuant', False): - from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant + if extra_options.get("SmoothQuant", False): import copy + from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant orig_nodes = [i.name for i in model.graph.node] def dataloader(): inc_dataloader = copy.deepcopy(calibration_data_reader) for data in inc_dataloader: yield data, None sq = ORTSmoothQuant(model, dataloader(), reduce_range) - model = sq.transform(extra_options.get('SmoothQuantAlpha', 0.5), extra_options.get('SmoothQuantFolding', True)).model + model = sq.transform( + extra_options.get("SmoothQuantAlpha", 0.5), extra_options.get("SmoothQuantFolding", True) + ).model nodes_to_exclude.extend([i.name for i in model.graph.node if i.name not in orig_nodes]) with tempfile.TemporaryDirectory(prefix="ort.quant.") as quant_tmp_dir: diff --git a/onnxruntime/test/python/quantization/test_quantize_static.py b/onnxruntime/test/python/quantization/test_quantize_static.py index 1fc20e2531e41..97ad4e3acc6f7 100644 --- a/onnxruntime/test/python/quantization/test_quantize_static.py +++ b/onnxruntime/test/python/quantization/test_quantize_static.py @@ -100,7 +100,7 @@ def test_static_quant_config(self): def test_smooth_quant(self): data_reader = InputFeedsNegOneZeroOne(10, {"input": [1, self._channel_size, 1, 3]}) - quant_config = StaticQuantConfig(data_reader, extra_options={'SmoothQuant': True}) + quant_config = StaticQuantConfig(data_reader, extra_options={"SmoothQuant": True}) quant_model_path = str(Path(self._tmp_model_dir.name) / "quant.config.onnx") quantize(self._model_fp32_path, quant_model_path, quant_config) @@ -109,7 +109,7 @@ def test_smooth_quant(self): data_reader.rewind() model = onnx.load(quant_model_path) - self.assertTrue('Mul' in [i.op_type for i in model.graph.node]) + self.assertTrue("Mul" in [i.op_type for i in model.graph.node]) if __name__ == "__main__": unittest.main() From 7b5e7f9286ece4cb5f548ebccb216e0358dbb21a Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 8 Jun 2023 09:43:04 +0800 Subject: [PATCH 04/23] fix python format Signed-off-by: Mengni Wang --- onnxruntime/python/tools/quantization/quantize.py | 5 ++++- onnxruntime/test/python/quantization/test_quantize_static.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index cbd3feb718400..e41757125bbc9 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -384,12 +384,15 @@ def quantize_static( if extra_options.get("SmoothQuant", False): import copy + from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant - orig_nodes = [i.name for i in model.graph.node] + def dataloader(): inc_dataloader = copy.deepcopy(calibration_data_reader) for data in inc_dataloader: yield data, None + + orig_nodes = [i.name for i in model.graph.node] sq = ORTSmoothQuant(model, dataloader(), reduce_range) model = sq.transform( extra_options.get("SmoothQuantAlpha", 0.5), extra_options.get("SmoothQuantFolding", True) diff --git a/onnxruntime/test/python/quantization/test_quantize_static.py b/onnxruntime/test/python/quantization/test_quantize_static.py index 97ad4e3acc6f7..f771ee3959344 100644 --- a/onnxruntime/test/python/quantization/test_quantize_static.py +++ b/onnxruntime/test/python/quantization/test_quantize_static.py @@ -111,5 +111,6 @@ def test_smooth_quant(self): model = onnx.load(quant_model_path) self.assertTrue("Mul" in [i.op_type for i in model.graph.node]) + if __name__ == "__main__": unittest.main() From e385a303dc1b85282b49c62c5b79c4ae66b5fbf9 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Wed, 14 Jun 2023 19:58:05 +0800 Subject: [PATCH 05/23] Fix dependency and model Signed-off-by: Mengni Wang --- .../python/tools/quantization/quantize.py | 20 +++++++++++++++---- requirements-dev.txt | 1 + requirements.txt.in | 1 - 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index e41757125bbc9..f57ceec7079c8 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -383,21 +383,33 @@ def quantize_static( } if extra_options.get("SmoothQuant", False): + import importlib + + try: + importlib.import_module("neural_compressor.adaptor.ox_utils.smooth_quant") + except Exception as e: + logger.error("{}.".format(e)) + raise RuntimeError("neural-compressor is not correctly installed. Please check your environment.") + import copy + from .quant_utils import save_and_reload_model from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant - def dataloader(): - inc_dataloader = copy.deepcopy(calibration_data_reader) - for data in inc_dataloader: + def inc_dataloader(): + data_reader = copy.deepcopy(calibration_data_reader) + for data in data_reader: yield data, None orig_nodes = [i.name for i in model.graph.node] - sq = ORTSmoothQuant(model, dataloader(), reduce_range) + dataloader = inc_dataloader() + sq = ORTSmoothQuant(model_input, dataloader, reduce_range) + del dataloader model = sq.transform( extra_options.get("SmoothQuantAlpha", 0.5), extra_options.get("SmoothQuantFolding", True) ).model nodes_to_exclude.extend([i.name for i in model.graph.node if i.name not in orig_nodes]) + model = save_and_reload_model(model) with tempfile.TemporaryDirectory(prefix="ort.quant.") as quant_tmp_dir: calibrator = create_calibrator( diff --git a/requirements-dev.txt b/requirements-dev.txt index 73e04e6b37c0b..6390d08d0bab8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -18,3 +18,4 @@ scipy sympy wheel setuptools>=41.4.0 +neural-compressor diff --git a/requirements.txt.in b/requirements.txt.in index 372d82429dc7f..89242061fb119 100644 --- a/requirements.txt.in +++ b/requirements.txt.in @@ -4,4 +4,3 @@ numpy >= @Python_NumPy_VERSION@ packaging protobuf sympy -neural-compressor From 5094bb48fccfbbf659c5e0e63e79680d964b95bc Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Wed, 14 Jun 2023 21:35:16 +0800 Subject: [PATCH 06/23] fix python format Signed-off-by: Mengni Wang --- onnxruntime/python/tools/quantization/quantize.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index f57ceec7079c8..f42cf94292af6 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -388,14 +388,15 @@ def quantize_static( try: importlib.import_module("neural_compressor.adaptor.ox_utils.smooth_quant") except Exception as e: - logger.error("{}.".format(e)) + logger.error(f"{e}.") raise RuntimeError("neural-compressor is not correctly installed. Please check your environment.") import copy - from .quant_utils import save_and_reload_model from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant + from .quant_utils import save_and_reload_model + def inc_dataloader(): data_reader = copy.deepcopy(calibration_data_reader) for data in data_reader: From 636ffd5245677bf97243e32c43c7a797ad28159c Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 15 Jun 2023 09:39:42 +0800 Subject: [PATCH 07/23] fix python format Signed-off-by: Mengni Wang --- onnxruntime/python/tools/quantization/quantize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index f42cf94292af6..9669b5bf680d3 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -388,8 +388,8 @@ def quantize_static( try: importlib.import_module("neural_compressor.adaptor.ox_utils.smooth_quant") except Exception as e: - logger.error(f"{e}.") - raise RuntimeError("neural-compressor is not correctly installed. Please check your environment.") + logging.error(f"{e}.") + raise RuntimeError("neural-compressor is not correctly installed. Please check your environment.") from e import copy From 13adeabc2421775a0c7ce70c3939c853823ce15a Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Tue, 20 Jun 2023 09:38:23 +0800 Subject: [PATCH 08/23] enhance ut Signed-off-by: Mengni Wang --- onnxruntime/test/python/quantization/test_quantize_static.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/python/quantization/test_quantize_static.py b/onnxruntime/test/python/quantization/test_quantize_static.py index f771ee3959344..5f91eb68783f9 100644 --- a/onnxruntime/test/python/quantization/test_quantize_static.py +++ b/onnxruntime/test/python/quantization/test_quantize_static.py @@ -109,7 +109,7 @@ def test_smooth_quant(self): data_reader.rewind() model = onnx.load(quant_model_path) - self.assertTrue("Mul" in [i.op_type for i in model.graph.node]) + self.assertIn("Mul", [i.op_type for i in model.graph.node]) if __name__ == "__main__": From d7bc8848f4462ee142106cf67e2a3ae17641ee8b Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Mon, 10 Jul 2023 09:04:24 +0800 Subject: [PATCH 09/23] update requirements Signed-off-by: Mengni Wang --- requirements-dev.txt | 1 - tools/ci_build/requirements.txt | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 6390d08d0bab8..73e04e6b37c0b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -18,4 +18,3 @@ scipy sympy wheel setuptools>=41.4.0 -neural-compressor diff --git a/tools/ci_build/requirements.txt b/tools/ci_build/requirements.txt index 5d8e56a70045c..00d17ee81a628 100644 --- a/tools/ci_build/requirements.txt +++ b/tools/ci_build/requirements.txt @@ -5,3 +5,6 @@ numpy==1.24.0 coloredlogs==15.0 transformers==4.24.0 psutil + +# package used by smooth quant test +neural-compressor>=2.2 From ebced60900fa4b2cb950e2086c9b5254e7e036b6 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Mon, 10 Jul 2023 13:49:06 +0800 Subject: [PATCH 10/23] Update ThirdPartyNotices.txt --- ThirdPartyNotices.txt | 210 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 209 insertions(+), 1 deletion(-) diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt index e099840e64ce7..6f6faa3a2e56f 100644 --- a/ThirdPartyNotices.txt +++ b/ThirdPartyNotices.txt @@ -6021,4 +6021,212 @@ OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings -in this Software without prior written authorization of the copyright holder. \ No newline at end of file +in this Software without prior written authorization of the copyright holder. + +_____ + +Intel neural-compressor + +https://github.com/intel/neural-compressor + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + ============================================================================ + + Copyright 2016-2019 Intel Corporation + Copyright 2018 YANDEX LLC + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + This distribution includes third party software ("third party programs"). + This third party software, even if included with the distribution of + the Intel software, may be governed by separate license terms, including + without limitation, third party license terms, other Intel software license + terms, and open source software license terms. These separate license terms + govern your use of the third party programs as set forth in the + "THIRD-PARTY-PROGRAMS" file. From 4a3da03b47dde3de72d416e2628a9a824c6c8203 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Sat, 15 Jul 2023 10:13:47 +0800 Subject: [PATCH 11/23] Update requirements.txt --- tools/ci_build/requirements.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/ci_build/requirements.txt b/tools/ci_build/requirements.txt index 00d17ee81a628..5d8e56a70045c 100644 --- a/tools/ci_build/requirements.txt +++ b/tools/ci_build/requirements.txt @@ -5,6 +5,3 @@ numpy==1.24.0 coloredlogs==15.0 transformers==4.24.0 psutil - -# package used by smooth quant test -neural-compressor>=2.2 From 0c5e242d4ab64c945ea91c19727f805a83a149df Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Sat, 15 Jul 2023 10:15:53 +0800 Subject: [PATCH 12/23] Update requirements.txt --- .../github/linux/docker/scripts/manylinux/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt index 2a6625cc41482..26e028978ec13 100644 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt @@ -8,3 +8,4 @@ onnx==1.14.0 protobuf==3.20.2 sympy==1.10.1 flatbuffers +neural-compressor>=2.2 From 9af6db2fcdd4190af7dfc69a4c1eca66427b20fd Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Sat, 15 Jul 2023 10:21:57 +0800 Subject: [PATCH 13/23] Update test_quantize_static.py --- onnxruntime/test/python/quantization/test_quantize_static.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onnxruntime/test/python/quantization/test_quantize_static.py b/onnxruntime/test/python/quantization/test_quantize_static.py index 5f91eb68783f9..eb357dc9badae 100644 --- a/onnxruntime/test/python/quantization/test_quantize_static.py +++ b/onnxruntime/test/python/quantization/test_quantize_static.py @@ -8,6 +8,7 @@ import tempfile import unittest from pathlib import Path +from importlib.util import find_spec import numpy as np import onnx @@ -98,6 +99,7 @@ def test_static_quant_config(self): check_model_correctness(self, self._model_fp32_path, quant_model_path, data_reader.get_next()) data_reader.rewind() + @unittest.skipIf(not find_spec('neural_compressor'), "Skip since neural-compressor is not installed.") def test_smooth_quant(self): data_reader = InputFeedsNegOneZeroOne(10, {"input": [1, self._channel_size, 1, 3]}) quant_config = StaticQuantConfig(data_reader, extra_options={"SmoothQuant": True}) From 4aa01d11703d9ab07e799da1c74ee21fef0020cd Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Sat, 15 Jul 2023 10:28:37 +0800 Subject: [PATCH 14/23] Update test_quantize_static.py --- onnxruntime/test/python/quantization/test_quantize_static.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/python/quantization/test_quantize_static.py b/onnxruntime/test/python/quantization/test_quantize_static.py index eb357dc9badae..8e2e585f3655d 100644 --- a/onnxruntime/test/python/quantization/test_quantize_static.py +++ b/onnxruntime/test/python/quantization/test_quantize_static.py @@ -7,8 +7,8 @@ import tempfile import unittest -from pathlib import Path from importlib.util import find_spec +from pathlib import Path import numpy as np import onnx @@ -99,7 +99,7 @@ def test_static_quant_config(self): check_model_correctness(self, self._model_fp32_path, quant_model_path, data_reader.get_next()) data_reader.rewind() - @unittest.skipIf(not find_spec('neural_compressor'), "Skip since neural-compressor is not installed.") + @unittest.skipIf(not find_spec("neural_compressor"), "Skip since neural-compressor is not installed.") def test_smooth_quant(self): data_reader = InputFeedsNegOneZeroOne(10, {"input": [1, self._channel_size, 1, 3]}) quant_config = StaticQuantConfig(data_reader, extra_options={"SmoothQuant": True}) From a4e1d922799c81ceb11b0e6e400566911b7323dc Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Mon, 17 Jul 2023 15:00:32 +0800 Subject: [PATCH 15/23] Update quantize.py --- onnxruntime/python/tools/quantization/quantize.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index b6337953f7128..8e892a86bb803 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -387,11 +387,10 @@ def quantize_static( raise RuntimeError("neural-compressor is not correctly installed. Please check your environment.") from e import copy + import onnx from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant - from .quant_utils import save_and_reload_model - def inc_dataloader(): data_reader = copy.deepcopy(calibration_data_reader) for data in data_reader: @@ -405,7 +404,10 @@ def inc_dataloader(): extra_options.get("SmoothQuantAlpha", 0.5), extra_options.get("SmoothQuantFolding", True) ).model nodes_to_exclude.extend([i.name for i in model.graph.node if i.name not in orig_nodes]) - model = save_and_reload_model(model) + sq_path = tempfile.TemporaryDirectory(prefix="ort.quant.") + model_input = Path(sq_path.name).joinpath("sq_model.onnx") + onnx.save_model(model, model_input.as_posix(), save_as_external_data=True) + model = load_model_with_shape_infer(Path(model_input)) # use smooth quant model for calibration with tempfile.TemporaryDirectory(prefix="ort.quant.") as quant_tmp_dir: calibrator = create_calibrator( @@ -462,6 +464,8 @@ def inc_dataloader(): "/cpu/ReadMe.md " ) + if extra_options.get("SmoothQuant", False): + sq_path.cleanup() def quantize_dynamic( model_input: Path, From 25fcc9a48a2521c353627fddb235afd7bd07e2cb Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Mon, 17 Jul 2023 15:08:07 +0800 Subject: [PATCH 16/23] Update quantize.py --- onnxruntime/python/tools/quantization/quantize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index 8e892a86bb803..f819a355f56eb 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -387,8 +387,8 @@ def quantize_static( raise RuntimeError("neural-compressor is not correctly installed. Please check your environment.") from e import copy + import onnx - from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant def inc_dataloader(): @@ -467,6 +467,7 @@ def inc_dataloader(): if extra_options.get("SmoothQuant", False): sq_path.cleanup() + def quantize_dynamic( model_input: Path, model_output: Path, From 93fc0f6d62cb237c2f0ff3d671309ccfbf3a9113 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Mon, 17 Jul 2023 15:14:54 +0800 Subject: [PATCH 17/23] Update quantize.py --- onnxruntime/python/tools/quantization/quantize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index f819a355f56eb..19a35150eb6f1 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -387,7 +387,7 @@ def quantize_static( raise RuntimeError("neural-compressor is not correctly installed. Please check your environment.") from e import copy - + import onnx from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant From d5a30c74d65cf6909a334b0ac76552dfbf14b269 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Mon, 17 Jul 2023 16:05:46 +0800 Subject: [PATCH 18/23] Update quantize.py --- onnxruntime/python/tools/quantization/quantize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index 19a35150eb6f1..0dbca28467b30 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -405,8 +405,8 @@ def inc_dataloader(): ).model nodes_to_exclude.extend([i.name for i in model.graph.node if i.name not in orig_nodes]) sq_path = tempfile.TemporaryDirectory(prefix="ort.quant.") - model_input = Path(sq_path.name).joinpath("sq_model.onnx") - onnx.save_model(model, model_input.as_posix(), save_as_external_data=True) + model_input = Path(sq_path.name).joinpath("sq_model.onnx").as_posix() + onnx.save_model(model, model_input, save_as_external_data=True) model = load_model_with_shape_infer(Path(model_input)) # use smooth quant model for calibration with tempfile.TemporaryDirectory(prefix="ort.quant.") as quant_tmp_dir: From b2d7a07f5d09c9986f652c983a0d8e6a93b21c48 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Thu, 20 Jul 2023 10:54:05 +0800 Subject: [PATCH 19/23] Update Dockerfile.arm64 --- dockerfiles/Dockerfile.arm64 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockerfiles/Dockerfile.arm64 b/dockerfiles/Dockerfile.arm64 index 06ce9c1e38040..f0a00c3e9b66e 100644 --- a/dockerfiles/Dockerfile.arm64 +++ b/dockerfiles/Dockerfile.arm64 @@ -14,4 +14,4 @@ RUN /code/dockerfiles/scripts/install_centos_arm64.sh && cd /code && CC=/opt/rh/ FROM arm64v8/centos:7 COPY --from=0 /code/build/Linux/Release/dist /root COPY --from=0 /code/dockerfiles/LICENSE-IMAGE.txt /code/LICENSE-IMAGE.txt -RUN yum install -y python3-wheel python3-pip && python3 -m pip install --upgrade pip && python3 -m pip install /root/*.whl && rm -rf /root/*.whl +RUN yum install -y python3-wheel python3-pip && python3 -m pip install --upgrade pip && python3 -m pip install /root/*.whl && rm -rf /root/*.whl && yum install libgl1-mesa-glx && yum install libglib2.0-0 From 1677511995ca00d47941349e34fb800479ff3781 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Thu, 20 Jul 2023 21:47:15 +0800 Subject: [PATCH 20/23] Update Dockerfile.arm64 --- dockerfiles/Dockerfile.arm64 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockerfiles/Dockerfile.arm64 b/dockerfiles/Dockerfile.arm64 index f0a00c3e9b66e..c6a73b6892cea 100644 --- a/dockerfiles/Dockerfile.arm64 +++ b/dockerfiles/Dockerfile.arm64 @@ -14,4 +14,4 @@ RUN /code/dockerfiles/scripts/install_centos_arm64.sh && cd /code && CC=/opt/rh/ FROM arm64v8/centos:7 COPY --from=0 /code/build/Linux/Release/dist /root COPY --from=0 /code/dockerfiles/LICENSE-IMAGE.txt /code/LICENSE-IMAGE.txt -RUN yum install -y python3-wheel python3-pip && python3 -m pip install --upgrade pip && python3 -m pip install /root/*.whl && rm -rf /root/*.whl && yum install libgl1-mesa-glx && yum install libglib2.0-0 +RUN yum install -y python3-wheel python3-pip mesa-libGL && python3 -m pip install --upgrade pip && python3 -m pip install /root/*.whl && rm -rf /root/*.whl From cec50863f545b81b89e6a46e7cad934fe7fd7f7d Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Sat, 22 Jul 2023 09:18:41 +0800 Subject: [PATCH 21/23] Update Dockerfile.arm64 --- dockerfiles/Dockerfile.arm64 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockerfiles/Dockerfile.arm64 b/dockerfiles/Dockerfile.arm64 index c6a73b6892cea..06ce9c1e38040 100644 --- a/dockerfiles/Dockerfile.arm64 +++ b/dockerfiles/Dockerfile.arm64 @@ -14,4 +14,4 @@ RUN /code/dockerfiles/scripts/install_centos_arm64.sh && cd /code && CC=/opt/rh/ FROM arm64v8/centos:7 COPY --from=0 /code/build/Linux/Release/dist /root COPY --from=0 /code/dockerfiles/LICENSE-IMAGE.txt /code/LICENSE-IMAGE.txt -RUN yum install -y python3-wheel python3-pip mesa-libGL && python3 -m pip install --upgrade pip && python3 -m pip install /root/*.whl && rm -rf /root/*.whl +RUN yum install -y python3-wheel python3-pip && python3 -m pip install --upgrade pip && python3 -m pip install /root/*.whl && rm -rf /root/*.whl From ec1ab872aeb31ba4939c5517832d523835b1e2a8 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Sat, 22 Jul 2023 09:19:58 +0800 Subject: [PATCH 22/23] Update requirements.txt --- .../github/linux/docker/scripts/manylinux/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt index 26e028978ec13..70cf4a0af06e4 100644 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt @@ -8,4 +8,4 @@ onnx==1.14.0 protobuf==3.20.2 sympy==1.10.1 flatbuffers -neural-compressor>=2.2 +neural-compressor>=2.2.1 From 20307c22f691f6381ce4f759285c0b8ccb2fdb83 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Tue, 25 Jul 2023 15:07:28 +0800 Subject: [PATCH 23/23] Update test_quantize_static.py --- onnxruntime/test/python/quantization/test_quantize_static.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/python/quantization/test_quantize_static.py b/onnxruntime/test/python/quantization/test_quantize_static.py index cb6e6ee5a5846..b5ec931691aad 100644 --- a/onnxruntime/test/python/quantization/test_quantize_static.py +++ b/onnxruntime/test/python/quantization/test_quantize_static.py @@ -101,7 +101,7 @@ def test_static_quant_config(self): @unittest.skipIf(not find_spec("neural_compressor"), "Skip since neural-compressor is not installed.") def test_smooth_quant(self): - data_reader = InputFeedsNegOneZeroOne(10, {"input": [1, self._channel_size, 1, 3]}) + data_reader = input_feeds_neg_one_zero_one(10, {"input": [1, self._channel_size, 1, 3]}) quant_config = StaticQuantConfig(data_reader, extra_options={"SmoothQuant": True}) quant_model_path = str(Path(self._tmp_model_dir.name) / "quant.config.onnx") quantize(self._model_fp32_path, quant_model_path, quant_config)