From be9cc40aa51679daed51c88f9ce8ceecd5e6a800 Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Mon, 10 Jan 2022 13:00:58 -0800 Subject: [PATCH] Make some parameters configurable for calibration (#10204) --- .../python/tools/quantization/calibrate.py | 50 ++++++++++++++----- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py index 40b852c6ded43..c902d956acdbb 100644 --- a/onnxruntime/python/tools/quantization/calibrate.py +++ b/onnxruntime/python/tools/quantization/calibrate.py @@ -275,8 +275,9 @@ def __init__(self, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', method='percentile', - num_quantized_bins=128, - percentile=99.99): + num_bins=128, + num_quantized_bins=2048, + percentile=99.999): ''' :param model: ONNX model to calibrate. It can be a ModelProto or a model path :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors. @@ -292,6 +293,7 @@ def __init__(self, self.model_original_outputs = set(output.name for output in self.model.graph.output) self.collector = None self.method = method + self.num_bins = num_bins self.num_quantized_bins = num_quantized_bins self.percentile = percentile @@ -347,6 +349,7 @@ def collect_data(self, data_reader: CalibrationDataReader): if not self.collector: self.collector = HistogramCollector(method=self.method, + num_bins=self.num_bins, num_quantized_bins=self.num_quantized_bins, percentile=self.percentile) self.collector.collect(clean_merged_dict) @@ -369,16 +372,18 @@ def __init__(self, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', method='entropy', + num_bins=128, num_quantized_bins=128): ''' :param model: ONNX model to calibrate. It can be a ModelProto or a model path :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors. :param augmented_model_path: save augmented model to this path. :param method: A string. One of ['entropy', 'percentile']. + :param num_bins: number of bins to create a new histogram for collecting tensor values. :param num_quantized_bins: number of quantized bins. Default 128. ''' super(EntropyCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, - method=method, num_quantized_bins=num_quantized_bins) + method=method, num_bins=num_bins, num_quantized_bins=num_quantized_bins) class PercentileCalibrater(HistogramCalibrater): def __init__(self, @@ -386,7 +391,7 @@ def __init__(self, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', method='percentile', - num_quantized_bins=2048, + num_bins=2048, percentile=99.999): ''' :param model: ONNX model to calibrate. It can be a ModelProto or a model path @@ -397,7 +402,7 @@ def __init__(self, :param percentile: A float number between [0, 100]. Default 99.99. ''' super(PercentileCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, - method=method, num_quantized_bins=num_quantized_bins, + method=method, num_bins=num_bins, percentile=percentile) class CalibrationDataCollector(metaclass=abc.ABCMeta): @@ -429,9 +434,10 @@ class HistogramCollector(CalibrationDataCollector): ref: https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/_modules/ pytorch_quantization/calib/histogram.html """ - def __init__(self, method, num_quantized_bins, percentile): + def __init__(self, method, num_bins, num_quantized_bins, percentile): self.histogram_dict = {} self.method = method + self.num_bins = num_bins self.num_quantized_bins= num_quantized_bins self.percentile = percentile @@ -439,7 +445,9 @@ def get_histogram_dict(self): return self.histogram_dict def collect(self, name_to_arr): - # TODO: Currently we have different collect() for percentile and percentile method respectively. + print("Collecting tensor data and making histogram ...") + + # TODO: Currently we have different collect() for entropy and percentile method respectively. # Need unified collect in the future. if self.method == 'entropy': return self.collect_for_entropy(name_to_arr) @@ -455,8 +463,8 @@ def collect_for_percentile(self, name_to_arr): data_arr = np.absolute(data_arr) # only consider absolute value if tensor not in self.histogram_dict: - # first time it uses num_quantized_bins to compute histogram. - hist, hist_edges = np.histogram(data_arr, bins=self.num_quantized_bins) + # first time it uses num_bins to compute histogram. + hist, hist_edges = np.histogram(data_arr, bins=self.num_bins) self.histogram_dict[tensor] = (hist, hist_edges) else: old_histogram = self.histogram_dict[tensor] @@ -491,7 +499,7 @@ def collect_for_entropy(self, name_to_arr): old_histogram = self.histogram_dict[tensor] self.histogram_dict[tensor] = self.merge_histogram(old_histogram, data_arr, min_value, max_value, threshold) else: - hist, hist_edges = np.histogram(data_arr, self.num_quantized_bins, range=(-threshold, threshold)) + hist, hist_edges = np.histogram(data_arr, self.num_bins, range=(-threshold, threshold)) self.histogram_dict[tensor] = (hist, hist_edges, min_value, max_value, threshold) def merge_histogram(self, old_histogram, data_arr, new_min, new_max, new_threshold): @@ -518,6 +526,7 @@ def merge_histogram(self, old_histogram, data_arr, new_min, new_max, new_thresho def compute_collection_result(self): if not self.histogram_dict or len(self.histogram_dict) == 0: raise ValueError("Histogram has not been collected. Please run collect() first.") + print("Finding optimal threshold for each tensor using {} algorithm ...".format(self.method)) if self.method == 'entropy': return self.compute_entropy() @@ -535,6 +544,10 @@ def compute_percentile(self): thresholds_dict = {} # per tensor thresholds + print("Number of tensors : {}".format(len(histogram_dict))) + print("Number of histogram bins : {}".format(self.num_bins)) + print("Percentile : {}".format(percentile)) + for tensor, histogram in histogram_dict.items(): hist = histogram[0] hist_edges = histogram[1] @@ -551,6 +564,10 @@ def compute_entropy(self): thresholds_dict = {} # per tensor thresholds + print("Number of tensors : {}".format(len(histogram_dict))) + print("Number of histogram bins : {} (The number may increase depends on the data it collects)".format(self.num_bins)) + print("Number of quantized bins : {}".format(self.num_quantized_bins)) + for tensor, histogram in histogram_dict.items(): optimal_threshold = self.get_entropy_threshold(histogram, num_quantized_bins) thresholds_dict[tensor] = optimal_threshold @@ -631,12 +648,19 @@ def get_entropy_threshold(self, histogram, num_quantized_bins): def create_calibrator(model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', - calibrate_method=CalibrationMethod.MinMax): + calibrate_method=CalibrationMethod.MinMax, + extra_options={}): if calibrate_method == CalibrationMethod.MinMax: return MinMaxCalibrater(model, op_types_to_calibrate, augmented_model_path) elif calibrate_method == CalibrationMethod.Entropy: - return EntropyCalibrater(model, op_types_to_calibrate, augmented_model_path) + # default settings for entropy algorithm + num_bins = 128 if 'num_bins' not in extra_options else extra_options['num_bins'] + num_quantized_bins = 128 if 'num_quantized_bins' not in extra_options else extra_options['num_quantized_bins'] + return EntropyCalibrater(model, op_types_to_calibrate, augmented_model_path, num_bins=num_bins, num_quantized_bins=num_quantized_bins) elif calibrate_method == CalibrationMethod.Percentile: - return PercentileCalibrater(model, op_types_to_calibrate, augmented_model_path) + # default settings for percentile algorithm + num_bins = 2048 if 'num_bins' not in extra_options else extra_options['num_bins'] + percentile = 99.999 if 'percentile' not in extra_options else extra_options['percentile'] + return PercentileCalibrater(model, op_types_to_calibrate, augmented_model_path, num_bins=num_bins, percentile=percentile) raise ValueError('Unsupported calibration method {}'.format(calibrate_method))