diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt index c8d21e74265..d86161032c2 100644 --- a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt +++ b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt @@ -1,2 +1,2 @@ -tensorflow==2.11.0 +tensorflow neural-compressor \ No newline at end of file diff --git a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py index 4f279e20073..160cdb01e44 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py @@ -86,7 +86,7 @@ def __init__(self, framework_specific_info): cfg_yaml_name = "{}.yaml".format(self.__class__.__name__[: -len("Adaptor")].lower()) self.itex_mode = self.backend == "itex" or cfg_yaml_name == "tensorflow_itex.yaml" - if self.itex_mode: + if self.itex_mode: # pragma: no cover self._check_itex() self.query_handler = TensorflowQuery( @@ -109,7 +109,7 @@ def __init__(self, framework_specific_info): self._last_dequantize_ops = None - def _check_itex(self): + def _check_itex(self): # pragma: no cover try: import intel_extension_for_tensorflow except: @@ -133,7 +133,7 @@ def _tuning_cfg_to_fw(self, tuning_cfg): invalid_op_names = [i for i in self.quantize_config["op_wise_config"] if i not in dispatched_op_names] - for op_name in invalid_op_names: + for op_name in invalid_op_names: # pragma: no cover self.quantize_config["op_wise_config"].pop(op_name) for each_op_info in tuning_cfg["op"]: @@ -144,7 +144,7 @@ def _tuning_cfg_to_fw(self, tuning_cfg): self.quantize_config["op_wise_config"].pop(op_name) if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "fp32": fp32_ops.append(op_name) - if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "bf16": + if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "bf16": # pragma: no cover bf16_ops.append(op_name) continue @@ -342,7 +342,7 @@ def _dump_model_op_stats(self, model_graphdef): res[origin_op_type]["INT8"] += 1 if i.op in fp32_op_list: - if "T" not in i.attr and i.op != "Cast": + if "T" not in i.attr and i.op != "Cast": # pragma: no cover continue if i.op == "Cast": if i.attr["DstT"].type == dtypes.bfloat16: @@ -432,7 +432,7 @@ def _query_quantizable_ops(self, matched_nodes): ) and len(first_conv_or_matmul_node) == 0: first_conv_or_matmul_node.append((node_name, self.unify_op_type_mapping[node_op])) self.recipes_ops["first_conv_or_matmul_quantization"] = first_conv_or_matmul_node - if exclude_first_quantizable_op and ( + if exclude_first_quantizable_op and ( # pragma: no cover self.unify_op_type_mapping[node_op].find("conv2d") != -1 or self.unify_op_type_mapping[node_op].find("matmul") != -1 ): @@ -493,7 +493,7 @@ def _filter_unquantizable_concat(self, matched_nodes): concat_nodes = g.query_fusion_pattern_nodes([["ConcatV2"]]) for i in concat_nodes: concat_node_name = i[0] - if concat_node_name not in target_concat_nodes: + if concat_node_name not in target_concat_nodes: # pragma: no cover continue input_positive_status = [] for index in range(graph_info[concat_node_name].node.attr["N"].i): @@ -507,7 +507,7 @@ def _filter_unquantizable_concat(self, matched_nodes): else: positive_input = g.has_positive_input(each_input_node.name) input_positive_status.append(positive_input) - if not any(input_positive_status): + if not any(input_positive_status): # pragma: no cover matched_nodes.remove(i) def _filter_unquantizable_concat_performance_only(self, matched_nodes): @@ -522,7 +522,7 @@ def _filter_unquantizable_concat_performance_only(self, matched_nodes): concat_nodes = g.query_fusion_pattern_nodes([["ConcatV2"]]) for i in concat_nodes: concat_node_name = i[0] - if concat_node_name not in target_concat_nodes: + if concat_node_name not in target_concat_nodes: # pragma: no cover continue input_positive_status = [] control_flow = False @@ -531,9 +531,9 @@ def _filter_unquantizable_concat_performance_only(self, matched_nodes): graph_info[concat_node_name].node.input[index] ) each_input_node = graph_info[each_input_name].node - if each_input_node.op in ("Switch"): + if each_input_node.op in ("Switch"): # pragma: no cover control_flow = True - if control_flow: + if control_flow: # pragma: no cover matched_nodes.remove(i) def parse_quant_config(self, quant_config, model, calib_iteration): @@ -588,7 +588,7 @@ def _query_fw_capability(self, model): def check_match(patterns, input_pattern): for i in patterns: - if input_pattern == [i for i in i.replace("+", " ").strip().split(" ") if i]: + if input_pattern == [i for i in i.replace("+", " ").strip().split(" ") if i]: # pragma: no cover return True return False @@ -641,7 +641,7 @@ def quantize_input(self, model): """ scale = None # quantize input only support tensorflow version > 2.1.0 - if version1_lt_version2(tf.version.VERSION, "2.1.0"): + if version1_lt_version2(tf.version.VERSION, "2.1.0"): # pragma: no cover logger.warning("Quantize input needs tensorflow 2.1.0 and newer.") return model, scale @@ -872,7 +872,7 @@ def precisions(self): return self._precisions @precisions.setter - def precisions(self, precisions): + def precisions(self, precisions): # pragma: no cover """Set precision.""" if not isinstance(precisions, list): precisions = [precisions] @@ -881,7 +881,7 @@ def precisions(self, precisions): self._precisions = precisions @staticmethod - def check_value(name, src, supported_type, supported_value=[]): + def check_value(name, src, supported_type, supported_value=[]): # pragma: no cover """Check if the given object is the given supported type and in the given supported value. Example:: @@ -946,7 +946,7 @@ def _get_specified_version_cfg(self, data): config = None def _compare(version1, version2): - if parse_version(version1) == parse_version(version2): + if parse_version(version1) == parse_version(version2): # pragma: no cover return 0 elif parse_version(version1) < parse_version(version2): return -1 @@ -979,7 +979,7 @@ def _compare(version1, version2): # convention. Replacing them with dot for version comparison. sorted_list = [i.replace("-up", ".") for i in sorted_list] sorted_list = sorted(sorted_list, key=cmp_to_key(_compare), reverse=True) - else: + else: # pragma: no cover assert isinstance(sorted_list, str) sorted_list = list(sorted_list.replace("-up", ".").split()) for i in sorted_list: @@ -1025,7 +1025,7 @@ def _one_shot_query(self): def _update_cfg_with_usr_definition(self): """Add user defined precision configuration.""" tensorflow_config = TensorFlowConfig() - if tensorflow_config.precisions is not None: + if tensorflow_config.precisions is not None: # pragma: no cover self.cur_config["precisions"]["names"] = ",".join(tensorflow_config.precisions) def get_version(self): @@ -1288,7 +1288,7 @@ def get_fuse_patterns(self): elif version1_gte_version2(tf.version.VERSION, "2.1.0"): patterns["int8"] = tf_int8_pattern_list patterns["uint8"] = tf_uint8_pattern_list - if self.itex_mode: + if self.itex_mode: # pragma: no cover patterns["int8"].append("FusedBatchNormV3 + Relu") patterns["int8"].append("FusedBatchNormV3 + LeakyRelu") elif version1_eq_version2(tf.version.VERSION, "1.15.0-up3"): # pragma: no cover @@ -1340,7 +1340,7 @@ def get_op_types_by_precision(self, precision): tf.version.VERSION, "1.15.0-up3" ): return ["Conv2D", "MatMul", "ConcatV2", "MaxPool", "AvgPool"] - return ["MatMul", "ConcatV2", "MaxPool", "AvgPool"] + return ["MatMul", "ConcatV2", "MaxPool", "AvgPool"] # pragma: no cover if precision == "uint8": if tf.version.VERSION in spr_base_verions: return [key for key in self.cur_config["int8"][self.quant_mode].keys() if "Norm" not in key] @@ -1348,7 +1348,7 @@ def get_op_types_by_precision(self, precision): tf.version.VERSION, "1.15.0-up3" ): return ["Conv2D", "MatMul", "ConcatV2", "MaxPool", "AvgPool", "DepthwiseConv2dNative"] - return ["Conv2D", "MatMul", "ConcatV2", "MaxPool", "AvgPool"] + return ["Conv2D", "MatMul", "ConcatV2", "MaxPool", "AvgPool"] # pragma: no cover if precision == "bf16": if tf.version.VERSION in spr_base_verions: return self.cur_config[precision] @@ -1356,7 +1356,7 @@ def get_op_types_by_precision(self, precision): tf.version.VERSION, "1.15.0-up3" ): return self.cur_config[precision] - return [] + return [] # pragma: no cover def get_mixed_precision_combination(self): """Get the valid mixed precisions. diff --git a/neural_compressor/tensorflow/quantization/utils/graph_converter.py b/neural_compressor/tensorflow/quantization/utils/graph_converter.py index a0a924ecbe7..302bfe13717 100644 --- a/neural_compressor/tensorflow/quantization/utils/graph_converter.py +++ b/neural_compressor/tensorflow/quantization/utils/graph_converter.py @@ -204,7 +204,7 @@ def __init__( self.scale_info.update({"bf16_ops": self.bf16_ops}) self.scale_info.update({"fp32_ops": self.fp32_ops}) - if "backend" in self.model.kwargs: + if "backend" in self.model.kwargs: # pragma: no cover self._sampling_model = Model(self.model._model, **self.model.kwargs) else: self._sampling_model = Model( @@ -245,12 +245,12 @@ def _inference(self, model): output_tensor = model.output_tensor # TF table initialization: https://github.com/tensorflow/tensorflow/issues/8665 node_names = [node.name for node in sess.graph.as_graph_def().node] - if "init_all_tables" in node_names: + if "init_all_tables" in node_names: # pragma: no cover init_table_op = sess.graph.get_operation_by_name("init_all_tables") sess.run(init_table_op) logger.info("Start sampling on calibration dataset.") - if hasattr(self.data_loader, "__len__") and len(self.data_loader) == 0: + if hasattr(self.data_loader, "__len__") and len(self.data_loader) == 0: # pragma: no cover feed_dict = {} _ = ( sess.run(output_tensor, feed_dict) @@ -333,7 +333,7 @@ def _inference_llm(self, model): feed_dict = {} if len(input_tensor_names) == 1: feed_dict[input_tensor_names[0]] = inputs - else: + else: # pragma: no cover assert len(input_tensor_names) == len(inputs), "inputs len must equal with input_tensor" for i, input_tensor_name in enumerate(input_tensor_names): feed_dict[input_tensor_name] = inputs[i] @@ -365,7 +365,7 @@ def _check_tf_version(self): # pragma: no cover if version1_gte_version2(tf.version.VERSION, "2.9.0"): is_supported_version = True - if tf.version.VERSION == "1.15.0-up3": + if tf.version.VERSION == "1.15.0-up3": # pragma: no cover is_supported_version = True if tf.version.VERSION in SPR_BASE_VERSIONS: @@ -405,7 +405,7 @@ def _check_tf_version(self): # pragma: no cover ) ) - def _check_args(self): + def _check_args(self): # pragma: no cover """Check model's arguments.""" if ( self.model.workspace_path @@ -429,7 +429,7 @@ def _gen_tmp_filenames(self): self._tmp_model = self._fp32_model else: # to keep temp model - if "backend" in self.model.kwargs: + if "backend" in self.model.kwargs: # pragma: no cover self._tmp_model = Model(self.model._model, **self.model.kwargs) else: self._tmp_model = Model( @@ -707,7 +707,7 @@ def _generate_calibration_data(self, tmp_path, output_data, enable_kl_algo=False if "backend" in self._tmp_model.kwargs: model = Model(tmp_path, **self._tmp_model.kwargs) - else: + else: # pragma: no cover model = Model( tmp_path, **self._tmp_model.kwargs, @@ -755,7 +755,9 @@ def _freeze_requantization_ranges(self, additional_data=None): self.scale_info.update(quantizev2_min) self.scale_info.update(requant_min_max) - if "scale_propagation_max_pooling" in self.recipes and self.recipes["scale_propagation_max_pooling"]: + if ( + "scale_propagation_max_pooling" in self.recipes and self.recipes["scale_propagation_max_pooling"] + ): # pragma: no cover self._tmp_graph_def = ScaleProPagationTransformer(self._tmp_graph_def).do_transformation() if debug and not self.new_api: @@ -817,7 +819,7 @@ def _fuse_requantize_with_fused_quantized_node(self): self._tmp_model.graph_def = self._tmp_graph_def - def _post_clean(self): + def _post_clean(self): # pragma: no cover """Delete the temporarily files generated during the quantization process. :return: None @@ -840,7 +842,7 @@ def quantize_with_qdq_pattern(self): self._insert_qdq_pairs() self._convert_qdq() - except ValueError as e: + except ValueError as e: # pragma: no cover logger.error("Fail to quantize graph due to {}.".format(str(e))) self._tmp_model = None raise @@ -885,10 +887,10 @@ def _insert_qdq_pairs(self): self.itex_mode, ).get_quantized_nodes() - if self.itex_mode: + if self.itex_mode: # pragma: no cover self.quantized_node_info.extend(self._search_y_pattern_for_itex()) - if self._enable_kl_op_names: + if self._enable_kl_op_names: # pragma: no cover self._get_fp32_print_node_names(self._enable_kl_op_names) self._generate_calibration_data(self._fp32_logged_model_path, self._fp32_print_data, True) diff --git a/neural_compressor/tensorflow/utils/model_wrappers.py b/neural_compressor/tensorflow/utils/model_wrappers.py index a2b65b7ad96..e1a58f2f53b 100644 --- a/neural_compressor/tensorflow/utils/model_wrappers.py +++ b/neural_compressor/tensorflow/utils/model_wrappers.py @@ -42,7 +42,7 @@ def get_tf_model_type(model): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "-1" model_type = get_model_type(model) - except: + except: # pragma: no cover os.environ.pop("CUDA_DEVICE_ORDER") os.environ.pop("CUDA_VISIBLE_DEVICES") raise TypeError( @@ -77,14 +77,14 @@ def get_model_type(model): model = tf.keras.models.load_model(model) if isinstance(model, tf.keras.Model) and hasattr(model, "to_json"): return "keras" - return "saved_model" + return "saved_model" # pragma: no cover except: pass if isinstance(model, tf.keras.Model) and hasattr(model, "to_json"): if json.loads(model.to_json())["class_name"] in ["Sequential", "Functional"]: # Keras adaptor only support Sequential or Functional model return "keras" - else: + else: # pragma: no cover # otherwise, the backend will fallback to tensorflow_itex return "saved_model" if isinstance(model, tf.Graph): @@ -93,16 +93,16 @@ def get_model_type(model): return "graph_def" elif not version1_gte_version2(tf.version.VERSION, "2.16.1") and isinstance( model, tf.compat.v1.estimator.Estimator - ): + ): # pragma: no cover return "estimator" elif isinstance(model, str): model = os.path.abspath(os.path.expanduser(model)) if model.endswith(".pb") and os.path.isfile(model): - if is_saved_model_format(os.path.dirname(model)): + if is_saved_model_format(os.path.dirname(model)): # pragma: no cover return "saved_model" else: return "frozen_pb" - elif model.endswith(".ckpt") and os.path.isfile(model): + elif model.endswith(".ckpt") and os.path.isfile(model): # pragma: no cover return "slim" elif os.path.isdir(model): if is_ckpt_format(model): @@ -156,7 +156,7 @@ def validate_and_inference_input_output(graph_def, input_tensor_names, output_te output_tensor_names = output_tensor_names elif temp_output_tensor_names: output_tensor_names = temp_output_tensor_names - else: + else: # pragma: no cover _, output_tensor_names = get_input_output_node_names(graph_def) return input_tensor_names, output_tensor_names @@ -254,7 +254,7 @@ def frozen_pb_session(model, input_tensor_names, output_tensor_names, **kwargs): def _contains_function_with_implements_attr(saved_model_proto): meta_graph = saved_model_proto.meta_graphs[0] for function in meta_graph.graph_def.library.function: - if function.attr.get("_implements", None) or function.attr.get("api_implements", None): + if function.attr.get("_implements", None) or function.attr.get("api_implements", None): # pragma: no cover return True return False @@ -339,8 +339,9 @@ def _get_graph_from_saved_model_v3(model, input_tensor_names, output_tensor_name """ from neural_compressor.adaptor.tf_utils.util import parse_saved_model - if isinstance(model, tf.keras.Model): - tmp_dir = DEFAULT_WORKSPACE + "/saved_model" + if isinstance(model, tf.keras.Model): # pragma: no cover + save_folder = "/tmp_model.keras" if version1_gte_version2(tf.version.VERSION, "2.16.1") else "/saved_model" + tmp_dir = DEFAULT_WORKSPACE + save_folder model.save(tmp_dir) model = tmp_dir graph_def, _, _, _, input_names, output_names = parse_saved_model( @@ -474,10 +475,10 @@ def _get_graph_from_saved_model_v1(model): meta_graph = get_meta_graph_def(model, saved_model_tags) signature_def = get_signature_def(meta_graph, signature_key) - inputs, outputs = get_inputs_outputs(signature_def) + inputs, outputs = get_inputs_outputs(signature_def) # pragma: no cover # Check SavedModel for assets directory. collection_def = meta_graph.collection_def - if constants.ASSETS_KEY in collection_def: + if constants.ASSETS_KEY in collection_def: # pragma: no cover raise ValueError("SavedModels with assets/ directory are not supported.") from tensorflow.compat.v1 import graph_util as tf_graph_util @@ -492,13 +493,13 @@ def _get_graph_from_saved_model_v1(model): sess.run(tf.compat.v1.tables_initializer()) output_nodes = list(set([output.split(":")[0] for output in outputs])) node_ops = [node.op for node in graph.as_graph_def().node] - if "MakeIterator" in node_ops: + if "MakeIterator" in node_ops: # pragma: no cover output_nodes.append("MakeIterator") table_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TABLE_INITIALIZERS) # For table initialization - for table_op in table_ops: + for table_op in table_ops: # pragma: no cover output_nodes.append(table_op.name) - if len(table_ops) > 0: + if len(table_ops) > 0: # pragma: no cover output_nodes.append("init_all_tables") graph_def = tf_graph_util.convert_variables_to_constants(sess, graph.as_graph_def(), output_nodes) return graph_def, inputs, outputs @@ -962,10 +963,10 @@ def _load_sess(self, model, **kwargs): def iter_op(self): """Return model iter op list.""" self._iter_op = [] - if self._sess is None: + if self._sess is None: # pragma: no cover self._load_sess(self._model, **self.kwargs) op_list = [node.op for node in self._sess.graph.as_graph_def().node] - if "MakeIterator" in op_list: + if "MakeIterator" in op_list: # pragma: no cover self._iter_op.append(self._sess.graph.get_operation_by_name("MakeIterator")) return self._iter_op @@ -979,7 +980,7 @@ def input_tensor_names(self): @input_tensor_names.setter def input_tensor_names(self, tensor_names): """Set input tensor names.""" - if len(tensor_names) == 0: + if len(tensor_names) == 0: # pragma: no cover logger.warning("Input tensor names is empty.") return if self._sess is not None: @@ -998,7 +999,7 @@ def output_tensor_names(self): @output_tensor_names.setter def output_tensor_names(self, tensor_names): """Set output tensor names.""" - if len(tensor_names) == 0: + if len(tensor_names) == 0: # pragma: no cover logger.warning("Output tensor names should not be empty.") return if self._sess is not None: @@ -1019,7 +1020,7 @@ def output_node_names(self): """Return output node names.""" output_node_names = tensor_to_node(self.output_tensor_names) iter_op_list = self.iter_op - if iter_op_list != []: + if iter_op_list != []: # pragma: no cover output_node_names += [iter_op.name for iter_op in iter_op_list] return copy.deepcopy(output_node_names) @@ -1039,7 +1040,7 @@ def output_tensor(self): def save(self, root=None): """Save Tensorflow model.""" - if not root: + if not root: # pragma: no cover root = DEFAULT_WORKSPACE + "/save.pb" root = os.path.abspath(os.path.expanduser(root)) # if not have suffix, default append .pb @@ -1091,12 +1092,12 @@ def get_weight(self, tensor_name): @property def model(self): """Return model in AutoTrackable object.""" - if self._auto_trackable: + if self._auto_trackable: # pragma: no cover return self._auto_trackable root = os.path.abspath(os.path.expanduser(DEFAULT_WORKSPACE)) root += str(time.time()) - if os.path.exists(root): + if os.path.exists(root): # pragma: no cover shutil.rmtree(root) os.makedirs(root, exist_ok=True) if not self._sess: @@ -1124,7 +1125,7 @@ def build_saved_model(self, root=None): builder (tf.compat.v1.saved_model.builder.SavedModelBuilder): builds the SavedModel protocol buffer and saves variables and assets. """ - if not root: + if not root: # pragma: no cover root = DEFAULT_WORKSPACE root = os.path.abspath(os.path.expanduser(root)) if os.path.exists(root): @@ -1231,7 +1232,7 @@ def model(self): @property def weight_name_mapping(self): """Return weight_name_mapping function.""" - if not self._weight_name_mapping: + if not self._weight_name_mapping: # pragma: no cover self._weight_name_mapping = self.kwargs.get("weight_name_mapping", None) assert self._weight_name_mapping is not None, "weight_name_mapping should not be None!" return self._weight_name_mapping @@ -1245,7 +1246,7 @@ def weight_name_mapping(self, weight_name_mapping): @property def sq_weight_scale_dict(self): """Return dict of weight scaler for smooth quantization.""" - if not self._sq_weight_scale_dict: + if not self._sq_weight_scale_dict: # pragma: no cover self._sq_weight_scale_dict = self.kwargs.get("sq_weight_scale_dict", None) assert self._weight_name_mapping is not None, "sq_weight_scale_dict should not be None!" return self._sq_weight_scale_dict @@ -1313,7 +1314,7 @@ def adjust_weight(self, graph_def): for idx, weight_tensor in enumerate(model.variables): parsed_weight_name = self.weight_name_mapping(weight_tensor.name) if parsed_weight_name in self.sq_weight_scale_dict: - if len(weight_tensor.shape) == 4: + if len(weight_tensor.shape) == 4: # pragma: no cover shape_parm = [0, 1, 3, 2] elif len(weight_tensor.shape) == 2: shape_parm = [1, 0] @@ -1334,10 +1335,10 @@ def save(self, root=None): from neural_compressor.tensorflow.quantization.utils.utility import parse_saved_model, reconstruct_saved_model - if not root: + if not root: # pragma: no cover root = DEFAULT_WORKSPACE root = os.path.abspath(os.path.expanduser(root)) - if os.path.exists(root): + if os.path.exists(root): # pragma: no cover shutil.rmtree(root) os.makedirs(root, exist_ok=True) @@ -1355,7 +1356,7 @@ class TensorflowCheckpointModel(TensorflowBaseModel): @property def graph_def(self): """Return graph definition.""" - if self.model_type == "graph_def": + if self.model_type == "graph_def": # pragma: no cover return self.sess.graph.as_graph_def() from tensorflow.compat.v1 import graph_util