From 214917151ace07ed07a22a52360b5cf0078fc0b6 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Wed, 23 Nov 2022 17:57:02 +0800 Subject: [PATCH 01/43] draft verion for tf2onnx int8 --- .../tf_utils/graph_rewriter/onnx/__init__.py | 16 + .../graph_rewriter/onnx/onnx_graph.py | 1380 +++++++++++++++++ .../tf_utils/graph_rewriter/onnx/onnx_node.py | 432 ++++++ .../graph_rewriter/onnx/onnx_schema.py | 123 ++ .../graph_rewriter/onnx/qdq_rewriter.py | 176 +++ .../graph_rewriter/onnx/tf2onnx_utils.py | 401 +++++ .../adaptor/tf_utils/tf2onnx_converter.py | 122 ++ neural_compressor/model/base_model.py | 1 - neural_compressor/model/model.py | 27 + ...test_tensorflow_qdq_convert_to_onnx_qdq.py | 98 ++ 10 files changed, 2775 insertions(+), 1 deletion(-) create mode 100644 neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py create mode 100644 neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py create mode 100644 neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py create mode 100644 neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py create mode 100644 neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/qdq_rewriter.py create mode 100644 neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py create mode 100644 neural_compressor/adaptor/tf_utils/tf2onnx_converter.py create mode 100644 test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py new file mode 100644 index 00000000000..369707c0ef6 --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py new file mode 100644 index 00000000000..af25eb75c9e --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py @@ -0,0 +1,1380 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import collections +import logging +import six +import numpy as np + +from onnx import helper, numpy_helper, shape_inference, AttributeProto, TensorProto +from . import tf2onnx_utils as utils +from .onnx_node import OnnxNode + +logger = logging.getLogger("neural_compressor") + +class OnnxGraph: + """"Class that provides graph manipulation and matching.""" + + def __init__(self, nodes, output_shapes=None, dtypes=None, target=None, opset=None, extra_opset=None, + input_names=None, output_names=None, is_subgraph=False, graph_name=None): + """Create Graph. + Args: + nodes: list of Node() + output_shapes: dict of tensorflow output shapes + dtypes: dict of tensorflow dtype + """ + if target is None: + target = [] + self._nodes = [] + self._nodes_by_name = {} + self._output_to_node_name = {} + self._output_to_consumers = {} + self._input_to_graph = {} + self.shapes = {} + self.graph_name = graph_name or utils.set_name("tfqdq_2_onnxqdq") + self._is_subgraph = is_subgraph + self.ta_reads = [] + # A list of index, output tuples of potential scan outputs in this graph + # Used by the tflite while loop handler + self.scan_outputs = [] + # Used by lstm_tf2_rewriter to indicate this subgraph is an LSTM cell + self.lstm_rewriter_context = None + self.gru_rewriter_context = None + self.func_inputs = [] + self.ragged_variant_list_reads = [] + self.ragged_variant_list_writes = [] + + self._dtypes = dtypes + self._output_shapes = output_shapes + + self.set_config(target, opset, extra_opset) + + self.outputs = output_names if output_names is not None else [] + + self.parent_graph = None + self.contained_graphs = {} # {node_name: {node_attribute_name: Graph}} + + ops = [OnnxNode(node, self) for node in nodes] + if input_names is not None: + input_names_set = set(input_names) + for n in ops: + for i, out in enumerate(n.output): + if out in input_names_set and not n.is_graph_input(): + n.output[i] = utils.set_name("@@ALLOC") + ops.append(OnnxNode(helper.make_node("Placeholder", [], outputs=[out], name=out), self)) + logger.info("Created placeholder for input %s", out) + + input_nodes = {n.output[0]: n for n in ops if n.is_graph_input()} + if input_names is not None: + self.inputs = [input_nodes[n] for n in input_names] + else: + self.inputs = list(input_nodes.values()) + + self.reset_nodes(ops) + + # add identity node after each output, in case it is renamed during conversion. + for o in self.outputs: + n = self.get_node_by_output_in_current_graph(o) + if n.is_graph_input(): + # Don't add identity if the node is also an input. We want to keep input names the same. + continue + new_output_name = utils.add_port_to_name(n.name + "_" + utils.set_name("raw_output_")) + n_shapes = n.output_shapes + n_dtypes = n.output_dtypes + o_shape = self.get_shape(o) + o_dtype = self.get_dtype(o) + body_graphs = n.graph.contained_graphs.pop(n.name, None) + self.remove_node(n.name) + + new_outputs = [output if output != o else new_output_name for output in n.output] + # domain should be passed to new node + branches = {} + if body_graphs: + for attr_name, body_graph in body_graphs.items(): + body_graph.parent_graph = self + branches[attr_name] = body_graph + + _ = self.make_node(n.type, n.input, outputs=new_outputs, attr=n.attr, name=n.name, + skip_conversion=n._skip_conversion, dtypes=n_dtypes, shapes=n_shapes, + domain=n.domain, branches=branches) + + self.replace_all_inputs(o, new_output_name, ops=self.get_nodes()) + self.make_node("Identity", [new_output_name], outputs=[o], op_name_scope=n.name + "_" + "graph_outputs", + dtypes=[o_dtype], shapes=[o_shape]) + self.copy_shape(new_output_name, o) + self.copy_dtype(new_output_name, o) + + def create_new_graph_with_same_config(self): + """Create a clean graph inheriting current graph's configuration.""" + return OnnxGraph([], output_shapes={}, dtypes={}, target=self._target, opset=self._opset, + extra_opset=self.extra_opset, output_names=[]) + + def set_config(self, target=None, opset=None, extra_opset=None): + """Set graph fields containing conversion options""" + if target is None: + target = utils.DEFAULT_TARGET + + self._opset = utils.find_opset(opset) + self._target = set(target) + + if extra_opset is not None: + utils.assert_error(isinstance(extra_opset, list), "invalid extra_opset") + self._extra_opset = extra_opset + + @property + def input_names(self): + """Placeholder node outputs""" + return [node.output[0] for node in self.inputs] + + @property + def opset(self): + return self._opset + + @property + def extra_opset(self): + return self._extra_opset + + def is_target(self, *names): + """Return True if target platform contains any name.""" + return any(name in self._target for name in names) + + def make_consts(self, values, np_type=np.int64, skip_conversion=False, raw=True): + """create list of consts of same type""" + consts = [] + for value in values: + np_val = np.array(value).astype(np_type) + consts.append(self.make_const(utils.set_name("const"), np_val, skip_conversion, raw)) + return consts + + def make_const(self, name, np_val, skip_conversion=False, raw=True): + """Make a new constant in the graph. + Args: + name: const node name, must be unique. + np_val: value of type numpy ndarray. + skip_conversion: bool, indicate whether this created node would be mapped during conversion. + raw: whether to store data at field of raw_data or the specific field according to its dtype + """ + np_val_flat = np_val.flatten() + is_bytes = np_val.dtype == object and len(np_val_flat) > 0 and isinstance(np_val_flat[0], bytes) + if raw and not is_bytes: + onnx_tensor = numpy_helper.from_array(np_val, name) + else: + onnx_tensor = helper.make_tensor(name, utils.map_numpy_to_onnx_dtype(np_val.dtype), + np_val.shape, np_val_flat, raw=False) + dtype = onnx_tensor.data_type + node = self.make_node("Const", [], outputs=[name], name=name, attr={"value": onnx_tensor}, + skip_conversion=skip_conversion, dtypes=[dtype], infer_shape_dtype=False) + self.set_shape(name, np_val.shape) + self.set_dtype(name, utils.map_numpy_to_onnx_dtype(np_val.dtype)) + return node + + def copy_const(self, node, name=None): + """Copy a const node, using name if specified""" + # TODO: support attr copy starting at opset 12 + if name is None: + name = utils.set_name(node.name) + return self.make_const(name, node.get_tensor_value(as_list=False)) + + def make_node(self, op_type, inputs, attr=None, output_count=1, outputs=None, skip_conversion=True, + op_name_scope=None, name=None, shapes=None, dtypes=None, domain=utils.ONNX_DOMAIN, + infer_shape_dtype=True, branches=None): + """Make a new onnx node in the graph""" + if attr is None: + attr = {} + if shapes is None: + shapes = [] + if dtypes is None: + dtypes = [] + if branches is None: + branches = {} + if name is None: + name = utils.set_name(op_type) + + if op_name_scope: + name = "_".join([op_name_scope, name]) + + logger.debug("Making node: Name=%s, OP=%s", name, op_type) + + if outputs is None: + outputs = [name + ":" + str(i) for i in range(output_count)] + + output_count = len(outputs) + raw_attr = {} + onnx_attrs = [] + for a, v in attr.items(): + if isinstance(v, AttributeProto): + onnx_attrs.append(v) + else: + raw_attr[a] = v + + n = self.get_node_by_name(name) + utils.assert_error(n is None, "name %s already exists in node: \n%s", name, n) + for o in outputs: + n = self.get_node_by_output_in_current_graph(o) + utils.assert_error(n is None, "output tensor named %s already exists in node: \n%s", o, n) + + onnx_node = helper.make_node(op_type, inputs, outputs, name=name, domain=domain, **raw_attr) + + for name2 in onnx_node.input: + self._register_input_name(name2, onnx_node) + + if op_type in ["If", "Loop", "Scan"]: + # we force the op containing inner graphs not skipped during conversion. + skip_conversion = False + + node = OnnxNode(onnx_node, self, skip_conversion=skip_conversion) + if onnx_attrs: + _ = [node.set_attr_onnx(a) for a in onnx_attrs] + + for branch, body in branches.items(): + node.set_body_graph_as_attr(branch, body) + + if shapes: + utils.assert_error(len(shapes) == output_count, + "output shape count %s not equal to output count %s", len(shapes), output_count) + for i in range(output_count): + self.set_shape(node.output[i], shapes[i]) + + if dtypes: + utils.assert_error(len(dtypes) == output_count, + "output dtypes count %s not equal to output count %s", len(dtypes), output_count) + for i in range(output_count): + self.set_dtype(node.output[i], dtypes[i]) + + if (not shapes or not dtypes) and infer_shape_dtype: + self.update_node_shape_dtype(node, override=False) + + logger.debug("Made node: %s\n%s", node.name, node.summary) + self._nodes.append(node) + return node + + def append_node(self, node): + """Add a node to the graph.""" + output_shapes = node.output_shapes + output_dtypes = node.output_dtypes + node.graph = self + self._nodes.append(node) + self._nodes_by_name[node.name] = node + for i, name in enumerate(node.output): + self._output_to_node_name[name] = node.name + self.set_dtype(name, output_dtypes[i]) + self.set_shape(name, output_shapes[i]) + for name in node.input: + self._register_input_name(name, node) + + def remove_node(self, node_name): + """Remove node in current graph.""" + utils.assert_error(node_name in self._nodes_by_name, "node %s not in current graph, cannot remove", node_name) + node = self.get_node_by_name(node_name) + del self._nodes_by_name[node_name] + if node_name in self.contained_graphs: + del self.contained_graphs[node_name] + + if node in self.inputs: + self.inputs.remove(node) + + for op_output in node.output: + if op_output == "": + continue + del self._output_to_node_name[op_output] + + if op_output in self._output_shapes: + del self._output_shapes[op_output] + if op_output in self._dtypes: + del self._dtypes[op_output] + + for op_input in node.input: + if op_input == "": + continue + utils.assert_error( + op_input in self._output_to_consumers, + "Input %r of node %r not found.", op_input, node_name) + self._unregister_input_name(op_input, node) + + self._nodes.remove(node) + node.graph = None + + def reset_nodes(self, ops): + """Reset the graph with node list.""" + remained_dtypes = {} + remained_shapes = {} + remained_sub_graphs = {} + for op in ops: + for op_output in op.output: + # this check should be removed once we make sure all output tensors have dtype/shape. + if op_output in self._dtypes: + remained_dtypes[op_output] = self._dtypes[op_output] + if op_output in self._output_shapes: + remained_shapes[op_output] = self._output_shapes[op_output] + + if op.name in self.contained_graphs: + remained_sub_graphs[op.name] = self.contained_graphs[op.name] + + self._nodes = ops + self.contained_graphs = remained_sub_graphs + self._nodes_by_name = {op.name: op for op in ops} + self._output_to_node_name = {} + self._output_to_consumers = {} + for op in ops: + for op_output in op.output: + self._output_to_node_name[op_output] = op.name + inps = op.input + for op_input in inps: + self._register_input_name(op_input, op) + + for n in self.inputs: + if n not in ops: + raise ValueError("graph input '" + n.name + "' not exist") + for o in self.outputs: + if o not in self._output_to_node_name: + raise ValueError("graph output '" + o.name + "' not exist") + + self._dtypes = remained_dtypes + self._output_shapes = remained_shapes + + def is_empty_input(self, name): + # in ONNX, operation may have optional input and an empty string may be used + # in the place of an actual argument's name to indicate a missing argument + return name == utils.ONNX_EMPTY_INPUT + + def check_integrity(self): + """ + Check graph integrity. Every node's input needs to associate with a node. + Return broken outputs. + """ + broken_outputs = set() + for node in self.get_nodes(): + for inp in node.input: + if self.get_node_by_output(inp) is None and not self.is_empty_input(inp): + broken_outputs.add(inp) + return list(broken_outputs) + + def update_node_shape_dtype(self, node, override=False): + """Try the best to infer shapes and dtypes for outputs of the node, + by default, we respect TF shapes and dtypes. + """ + if node.is_const() or node.is_graph_input(): + return + # NOTE: only support onnx node for now + if not utils.is_onnx_domain(node.domain): + return + + logger.debug("Infer shape and dtype for [%s]", node.name) + # NOTE: shape inference for some ops need the input values of the op, e.g., Reshape + # op needs the "Shape" value to infer output shape. + initializers = [] + for i, inp in enumerate(node.inputs): + if inp is None: + if not self.is_empty_input(node.input[i]): + if logger.isEnabledFor(logging.INFO): + logger.warning( + "[%s] infer a inexistent node: [%s], please check the code", + node.name, node.input[i] + ) + continue + if inp.is_const(): + t = inp.get_attr("value") + tensor = helper.get_attribute_value(t) + tensor.name = inp.output[0] + initializers.append(tensor) + + input_shapes = [self.get_shape(i) for i in node.input] + input_dtypes = [self.get_dtype(i) for i in node.input] + + shapes, dtypes = utils.infer_onnx_shape_dtype(node, self._opset, input_shapes, input_dtypes, initializers) + if not shapes or not dtypes: + return + + for output, shape, dtype in zip(node.output, shapes, dtypes): + if dtype == TensorProto.UNDEFINED: + logger.debug("Inferred dtype for [%s, type: %s] is UNDEFINED, SKIP", node.name, node.type) + else: + existing_dtype = self.get_dtype(output) + if existing_dtype is not None and existing_dtype != dtype and not override: + dtype = existing_dtype + self.set_dtype(output, dtype) + logger.debug("Set dtype of [%s] to %s", output, dtype) + + if shape is None: + logger.debug("Inferred shape for [%s, type: %s] is None, SKIP", node.name, node.type) + else: + existing_shape = self.get_shape(output) + if existing_shape is not None and not utils.are_shapes_equal(existing_shape, shape) and not override: + shape = existing_shape + self.set_shape(output, shape) + logger.debug("Set shape of [%s] to %s", output, shape) + + def update_proto(self, external_tensor_storage=None): + """Update the onnx protobuf from out internal Node structure.""" + for node in self._nodes: + node.update_proto(external_tensor_storage) + + def get_nodes(self): + """Get node list.""" + return self._nodes + + def get_node_by_output(self, output, search_in_parent_graphs=True): + """Get node by node output id recursively going through nested graphs. + Args: + search_in_parent_graphs: search in all parent graphs + """ + ret = None + g = self + while not ret and g: + ret = g.get_node_by_output_in_current_graph(output) + if ret: + return ret + + if not search_in_parent_graphs: + break + g = g.parent_graph + return ret + + def get_node_by_output_in_current_graph(self, output): + """Get node by node output id.""" + name = self._output_to_node_name.get(output) + ret = None + if name: + ret = self._nodes_by_name.get(name) + return ret + + def get_node_by_name(self, name): + """Get node by name.""" + ret = self._nodes_by_name.get(name) + return ret + + def set_node_by_name(self, node): + """Set node by name.""" + self._nodes_by_name[node.name] = node + for op_output in node.output: + self._output_to_node_name[op_output] = node.name + for name in node.input: + self._register_input_name(name, node) + + def is_const(self, output): + return self.get_node_by_output(output).is_const() + + def get_tensor_value(self, output, as_list=True): + return self.get_node_by_output(output).get_tensor_value(as_list) + + def rename_tensors(self, tensors_to_rename): + """Replace tensor names within nodes and graph inputs/outputs""" + def rename_list(l): + return [tensors_to_rename.get(t, t) for t in l] + + def rename_keys(d): + return {tensors_to_rename.get(k, k): v for k, v in d.items()} + + self._output_to_node_name = rename_keys(self._output_to_node_name) + self._output_to_consumers = rename_keys(self._output_to_consumers) + self._dtypes = rename_keys(self._dtypes) + self._output_shapes = rename_keys(self._output_shapes) + self.outputs = rename_list(self.outputs) + for node in self._nodes: + node._input = rename_list(node._input) + node._output = rename_list(node._output) + + def change_node_name(self, node, new_name): + """Remove node in current graph.""" + utils.assert_error(new_name not in self._nodes_by_name, "node %s not unique ", new_name) + dtypes = node.output_dtypes + shapes = node.output_shapes + self.remove_node(node.name) + new_node = self.make_node(node.type, node.input, output_count=len(node.output), + attr=node.attr, dtypes=dtypes, shapes=shapes, name=new_name) + for i, old_output in enumerate(node.output): + new_output = utils.add_port_to_name(new_name, i) + for j, k in enumerate(self.outputs): + if k == old_output: + self.outputs[j] = new_output + break + self.replace_all_inputs(old_output, new_output, ops=self.get_nodes()) + return new_node + + def add_graph_input(self, name, dtype=None, shape=None): + """Add placeholder node as graph's input. Order matters only for subgraph. + Placeholders in original graph are assumed for main graph, order not matters. + """ + if dtype is None: + dtype = self.get_dtype(name) + + if shape is None: + shape = self.get_shape(name) + + new_node = self.make_node("Placeholder", [], outputs=[name], dtypes=[dtype], shapes=[shape]) + self.inputs.append(new_node) + + def add_graph_input_with_default(self, name, default_const, dtype=None, shape=None): + """Add placeholderwithdefault.""" + if dtype is None: + dtype = self.get_dtype(name) + + if shape is None: + shape = self.get_shape(name) + + default_const_name = utils.add_port_to_name(utils.set_name("{}_default".format(name))) + default_const.output = [default_const_name] + new_node = self.make_node("PlaceholderWithDefault", [default_const_name], outputs=[name], + dtypes=[dtype], shapes=[shape]) + self.inputs.append(new_node) + + def add_graph_output(self, name, dtype=None, shape=None): + """Add node output as graph's output.""" + utils.assert_error(name in self._output_to_node_name, "output %s not exist in the graph", name) + + if dtype is None: + dtype = self.get_dtype(name) + + if shape is None: + shape = self.get_shape(name) + + if name not in self.outputs: + utils.assert_error(shape is not None, "shape for output %s should not be None", name) + utils.assert_error(dtype is not None, "dtype for output %s should not be None", name) + self.outputs.append(name) + self.set_shape(name, shape) + self.set_dtype(name, dtype) + else: + raise ValueError("graph output " + name + " already exists") + + def get_dtype(self, name): + """Get dtype for node.""" + node = self.get_node_by_output(name, search_in_parent_graphs=True) + return node.graph._dtypes.get(name) if node else None + + def set_dtype(self, name, dtype): + """Set dtype for node.""" + node = self.get_node_by_output(name, search_in_parent_graphs=True) + node.graph._dtypes[name] = dtype + + def copy_dtype(self, src_name, dst_name): + """Copy dtype from another node.""" + dtype = self.get_dtype(src_name) + self.set_dtype(dst_name, dtype) + + def get_shape(self, name): + """Get shape for node.""" + utils.assert_error(isinstance(name, six.text_type), "get_shape name is invalid type: %s", name) + node = self.get_node_by_output(name, search_in_parent_graphs=True) + shape = node.graph._output_shapes.get(name) if node else None + if shape: + for i, v in enumerate(shape): + if v is None: + # pylint: disable=unsupported-assignment-operation + shape[i] = -1 + # hack to allow utils.ONNX_UNKNOWN_DIMENSION to override batchsize if needed. + # default is -1. + if shape[0] == -1: + # pylint: disable=unsupported-assignment-operation + shape[0] = utils.ONNX_UNKNOWN_DIMENSION + return shape + return shape + + def get_rank(self, name): + """Returns len(get_shape(name)) or None if shape is None""" + shape = self.get_shape(name) + if shape is None: + return None + return len(shape) + + def set_shape(self, name, val): + """Set new shape of node.""" + if isinstance(val, np.ndarray): + val = val.tolist() + if isinstance(val, tuple): + val = list(val) + node = self.get_node_by_output(name, search_in_parent_graphs=True) + utils.assert_error(node is not None, "cannot find node by output id %s", name) + node.graph._output_shapes[name] = val + + def copy_shape(self, input_name, output_name): + """Copy shape from another node.""" + shape = self.get_shape(input_name) + # assert shape is not None + if shape is not None: + self.set_shape(output_name, shape) + + def topological_sort(self, ops): + """Topological sort of graph.""" + # sort by name, the result will be reversed alphabeta + ops.sort(key=lambda op: op.name) + + def _push_stack(stack, node, in_stack): + stack.append(node) + if node in in_stack: + raise ValueError('Graph has cycles, node.name=%r.' % ops[node].name) + in_stack[node] = True + + def _get_unvisited_child(g, node, not_visited): + for child in g[node]: + if child in not_visited: + return child + return -1 + + n = len(ops) + g = [[] for _ in range(n)] + op_name_to_index = {} + for i, op in enumerate(ops): + op_name_to_index[op.name] = i + + for i, op in enumerate(ops): + all_input = set(op.input) + implicit_inputs = op.get_implicit_inputs() + all_input |= set(implicit_inputs) + # remove those empty inputs + all_input = list(filter(lambda a: a != '', all_input)) + for inp in sorted(all_input): + j = self.get_node_by_output(inp) + utils.assert_error(j is not None, "Cannot find node with output %r in graph %r", inp, self.graph_name) + if self.parent_graph and j.name not in op_name_to_index: + # there might be some outer-scoped inputs for an inner Graph. + pass + else: + g[op_name_to_index[j.name]].append(i) + + # label for each op. highest = sink nodes. + label = [-1 for _ in range(n)] + stack = [] + in_stack = dict() + not_visited = dict.fromkeys(range(n)) + label_counter = n - 1 + + while not_visited: + node = list(not_visited.keys())[0] + _push_stack(stack, node, in_stack) + while stack: + node = _get_unvisited_child(g, stack[-1], not_visited) + if node != -1: + _push_stack(stack, node, in_stack) + else: + node = stack.pop() + in_stack.pop(node) + not_visited.pop(node) + label[node] = label_counter + label_counter -= 1 + + ret = [x for _, x in sorted(zip(label, ops))] + self.reset_nodes(ret) + + def make_graph(self, doc, graph_name=None, external_tensor_storage=None): + """ + Create GraphProto for onnx from internal graph. + Args: + optimize: optimize graph via onnx + doc: text for doc string of the graph + """ + graph_name = graph_name or self.graph_name + self.delete_unused_nodes(self.outputs) + self.topological_sort(self.get_nodes()) + self.update_proto(external_tensor_storage) + + ops = [] + const_ops = [] + graph_inputs = self.inputs.copy() + for op in self.get_nodes(): + if op.is_const(): + const_ops.append(op) + elif op.is_graph_input(): + if op not in graph_inputs: + graph_inputs.append(op) + else: + ops.append(op) + + # create initializers for placeholder with default nodes + initializers = [] + placeholder_default_const_ops = [] + for op in graph_inputs: + if op.type == "PlaceholderWithDefault": + utils.assert_error(op.inputs[0] is not None, "Cannot find node with output {}".format(op.input[0])) + utils.assert_error(op.inputs[0].is_const(), + "non-const default value for PlaceholderWithDefault node '%s' is not supported. " + "Use the --use_default or --ignore_default flags to convert this node.", op.name) + # copy the tensor value, set its name to current node's output, add as initializer + value = op.inputs[0].get_tensor_value(as_list=False) + tensor = numpy_helper.from_array(value, op.output[0]) + initializers.append(tensor) + placeholder_default_const_ops.append(op.inputs[0]) + + # create initializers for constant nodes + const_ops = [op for op in const_ops if op not in placeholder_default_const_ops] + for op in const_ops: + # not to use numpy_helper.from_array to create a new tensor + # because sometimes onnx will have a bug that only check the tensor data in specific field + # such as at upsample it only checks the float_data field. + t = op.get_value_attr(external_tensor_storage) + tensor = helper.get_attribute_value(t) + tensor.name = op.output[0] + initializers.append(tensor) + + # create input_tensor_values + input_ids = [op.output[0] for op in graph_inputs] + # onnx with IR version below 4 requires initializer should be in inputs. + # here we check opset version rather than IR version for the reason: + # https://github.com/onnx/tensorflow-onnx/pull/557 + # opset 9 come with IR 4. + if self.opset < 9: + input_ids += [op.output[0] for op in const_ops] + + input_tensor_values = self.make_onnx_graph_io(input_ids) + + # create output_tensor_values + output_tensor_values = self.make_onnx_graph_io(self.outputs) + + tensor_value_info = [] + + for op in ops: + if op.domain in [utils.ONNX_DOMAIN, utils.AI_ONNX_ML_DOMAIN]: + continue + # We still don't 100% trust the accuracy of all the shapes in graph.py, but for custom ops they are + # almost certainly accurate and onnx has no other way of knowing them. + for out in op.output: + if out == '' or out in self.outputs: + continue + dtype = self.get_dtype(out) + shape = self.get_shape(out) + v = utils.make_onnx_inputs_outputs(out, dtype, shape) + tensor_value_info.append(v) + + # create graph proto + graph = helper.make_graph([op.op for op in ops], + graph_name, + input_tensor_values, + output_tensor_values, + initializer=initializers, + doc_string=doc, + value_info=tensor_value_info) + + return graph + + def make_model(self, graph_doc, graph_name="tfqdq_to_onnxqdq", **kwargs): + """ + Create final ModelProto for onnx from internal graph. + Args: + optimize: optimize graph via onnx + doc: text for doc string of the model + """ + graph = self.make_graph(graph_doc, graph_name) + + if "producer_name" not in kwargs: + kwargs = { + "producer_name": "neural compressor", + "producer_version": "1.0.0" + } + if "opset_imports" not in kwargs: + opsets = [helper.make_opsetid(utils.ONNX_DOMAIN, self._opset)] + opsets.append(utils.AI_ONNX_ML_OPSET) + if self.extra_opset is not None: + opsets.extend(self.extra_opset) + kwargs["opset_imports"] = opsets + model_proto = helper.make_model(graph, **kwargs) + + utils.assert_error(self.opset in utils.OPSET_TO_IR_VERSION, + "Opset %s is not supported yet. Please use a lower opset" % self.opset) + + # set the IR version based on opset + try: + model_proto.ir_version = utils.OPSET_TO_IR_VERSION.get(self.opset, model_proto.ir_version) + except: # pylint: disable=bare-except + logger.error("ir_version override failed - install the latest onnx version") + + return model_proto + + def make_onnx_graph_io(self, ids): + """Create tensor_value_info for passed input/output ids.""" + tensor_value_infos = [] + for name in ids: + dtype = self.get_dtype(name) + shape = self.get_shape(name) + + utils.assert_error(dtype is not None, "missing output dtype for " + name) + # TODO: allow None output shape or not? e.g. shape=(?,) + #utils.assert_error(shape is not None, "missing output shape for " + name) + if shape is None: logger.warning("missing output shape for %s", name) + + v = utils.make_onnx_inputs_outputs(name, dtype, shape) + tensor_value_infos.append(v) + return tensor_value_infos + + def dump_graph(self): + """Dump graph with shapes (helpful for debugging).""" + for node in self.get_nodes(): + input_names = ["{}{}".format(n, self.get_shape(n)) for n in node.input] + logger.debug("%s %s %s %s", + node.type, + self.get_shape(node.output[0]), + node.name, + ", ".join(input_names)) + + def follow_inputs(self, node, num, space=""): + """Follow inputs for (helpful for debugging).""" + val = [] + top = space == "" + if num == 0: + return [] + val.append("{}{} {} {}".format(space, node.type, node.name, self.get_shape(port_name(node.name)))) + space += " " + for j in node.inputs: + val.extend(self.follow_inputs(j, num - 1, space)) + if top: + print("\n".join(reversed(val))) + print() + return [] + return val + + def dump_node_statistics(self, include_attrs=False, include_subgraphs=True): + """Return a counter of op types (and optionally attribute names) within the graph""" + op_cnt = collections.Counter() + attr_cnt = collections.Counter() + for n in self.get_nodes(): + op_cnt[n.type] += 1 + for k in n.attr.keys(): + attr_cnt[k] += 1 + body_graphs = n.get_body_graphs() + if body_graphs and include_subgraphs: + for b_g in body_graphs.values(): + g_op_cnt, g_attr_cnt = b_g.dump_node_statistics(include_attrs=True, include_subgraphs=True) + op_cnt += g_op_cnt + attr_cnt += g_attr_cnt + + if include_attrs: + return op_cnt, attr_cnt + return op_cnt + + def remove_input(self, node, to_be_removed, input_index=None): + """Remove input from Node. + Args: + node: the node we expect the input on + to_be_removed: the node name we want to remove + input_index: if not None, index of the input to be removed, + the method is more efficient if *input_index* is specified, + otherwise, it has to look for every input named *old_input*. + """ + assert isinstance(node, OnnxNode) and isinstance(to_be_removed, six.text_type) + if input_index is not None: + assert node.input[input_index] == to_be_removed + if node.input[input_index] in self._output_to_consumers: + to_ops = self._output_to_consumers[node.input[input_index]] + if node.name in to_ops: + to_ops.remove(node.name) + del node.input[input_index] + return + + for i, name in enumerate(node.input): + if name == to_be_removed: + utils.assert_error( + node.input.count(node.input[i]) <= 1, + "Node %r takes multiple times the same input %r. This case is not handled.", + node.name, node.input[i]) + self._unregister_input_name(node.input[i], node) + del node.input[i] + break + + # don't remove output from parent since others might depend on it + + def insert_new_node_on_input(self, node, op_type, input_name, name=None, domain=None, input_index=None, **kwargs): + """Create and insert a new node into the graph. + Args: + node: we want to replace the input for this node + op_type: type for new operation + input_name: the name(s) of the outputs above us + if scalar, new node placed above input_name + if list, new node placed above input_name[0]. list is inputs into new node + name: the name of the new op + kwargs: attributes of the new node + + Returns: + node that was inserted + """ + if name is None: + name = utils.set_name(node.name) + new_output = utils.add_port_to_name(name) + if not isinstance(input_name, list): + input_name = [input_name] + + new_node = self.make_node(op_type, input_name, attr=kwargs, outputs=[new_output], name=name, domain=domain) + if input_index is None: + for i, n in enumerate(node.input): + if n == input_name[0]: + self.replace_input(node, node.input[i], new_output, i) + break + else: + self.replace_input(node, node.input[input_index], new_output, input_index) + return new_node + + def insert_node_on_output(self, node, output_name=None): + """ + The inserted node takes the *output_name* as input and produces a + new output. The function goes through every node taking *output_name* + and replaces it by the new output name. + """ + if output_name is None: + output_name = node.input[0] + new_output = node.output[0] + + to_replace = [self.get_node_by_name(n) for n in self._output_to_consumers[output_name]] + to_replace = [n for n in to_replace if n != node] + self.replace_all_inputs(output_name, new_output, ops=to_replace) + return node + + def insert_new_node_on_output(self, op_type, output_name=None, name=None, inputs=None, domain=None, **kwargs): + """Create and insert a new node into the graph. + It then calls insert_node_on_output. + + Args: + op_type: type for new operation + output_name: the names of the outputs above us + name: the name of the new op + kwargs: attributes of the new node + + Returns: + node that was inserted + """ + utils.assert_error(isinstance(output_name, six.text_type), "output_name's type is not expected: %s", + type(output_name)) + utils.assert_error(isinstance(op_type, six.text_type), "op_type's type is not expected: %s", + type(op_type)) + utils.assert_error(output_name is not None, "output_name cannot be None for op_type=%r.", op_type) + + if inputs is None: + inputs = [output_name] + if name is None: + name = utils.set_name(op_type) + + new_output = utils.add_port_to_name(name) + new_node = self.make_node(op_type, inputs, attr=kwargs, outputs=[new_output], name=name, domain=domain) + return self.insert_node_on_output(new_node, output_name) + + def find_output_consumers(self, output_name): + """Find all nodes consuming a given output.""" + if output_name in self._output_to_consumers: + ops = self._output_to_consumers[output_name] + ops = [self.get_node_by_name(n) for n in ops] + else: + ops = [] # self.get_nodes() + nodes = [] + for node in ops: + if node is None: + continue + if output_name in node.input: + nodes.append(node) + + # find consumers in sub graphs + if output_name in self._input_to_graph: + for g in self._input_to_graph[output_name].values(): + nodes.extend(g.find_output_consumers(output_name)) + return nodes + + def _register_input_name(self, input_name, node, only_graph=False): + "Register node taking a specific input." + if not only_graph: + if input_name not in self._output_to_consumers: + self._output_to_consumers[input_name] = set() + self._output_to_consumers[input_name].add(node.name) + if self.parent_graph is not None: + if input_name not in self.parent_graph._input_to_graph: + self.parent_graph._input_to_graph[input_name] = {} + self.parent_graph._input_to_graph[input_name][id(self)] = self + self.parent_graph._register_input_name(input_name, node, only_graph=True) + + def _unregister_input_name(self, input_name, node, only_graph=False): + "Unregister node taking a specific input." + node_name = node.name + if not only_graph: + if input_name in self._output_to_consumers[input_name]: + if node_name in self._output_to_consumers[input_name]: + self._output_to_consumers[input_name].remove(node_name) + if (self.parent_graph is not None and + input_name in self.parent_graph._input_to_graph and + id(self) in self.parent_graph._input_to_graph[input_name]): + del self.parent_graph._input_to_graph[input_name][id(self)] + self.parent_graph._unregister_input_name(input_name, node, only_graph=True) + + def replace_all_inputs(self, old_input, new_input, ops=None): + """ + Replace all inputs pointing to old_input with new_input. + *ops* is used if defined, otherwise `_output_to_consumers` + is used to determine the impacted nodes. + """ + if old_input == new_input: + return + if new_input not in self._output_to_consumers: + self._output_to_consumers[new_input] = set() + + if ops is not None: + keep_ops = True + elif old_input in self._output_to_consumers: + ops = list( + filter(lambda a: a is not None, + map(self.get_node_by_name, self._output_to_consumers[old_input]))) + keep_ops = False + else: + ops = [] + keep_ops = False + + for node in ops: + assert node is not None + if old_input in node.input and new_input in node.output: + raise RuntimeError("creating a circle in the graph is not allowed: " + node.name) + self._register_input_name(new_input, node) + + for i, input_name in enumerate(node.input): + if input_name == old_input: + self.replace_input(node, node.input[i], new_input, i) + + # modify references in sub graphs + if old_input in self._input_to_graph: + for g in self._input_to_graph[old_input].values(): + g.replace_all_inputs(old_input, new_input, + ops=g.get_nodes() if keep_ops else None) + + def replace_input(self, node, old_input, new_input, input_index=None): + """ + Replace one input in a node. + The method is more efficient if *input_index* is specified. + Otherwise, it renames every output named *old_input*. + """ + assert isinstance(node, OnnxNode) and isinstance(old_input, six.text_type) \ + and isinstance(new_input, six.text_type) + is_replaced = False + if input_index is None: + for i, input_name in enumerate(node.input): + if input_name == old_input: + node.input[i] = new_input + is_replaced = True + elif node.input[input_index] == old_input: + node.input[input_index] = new_input + is_replaced = True + else: + raise RuntimeError("Unable to replace input %r into %r for node %r." % (old_input, new_input, node.name)) + + to_ops = self._output_to_consumers.get(old_input, None) + if to_ops is not None: + if node.name in to_ops: + # A node may take twice the same entry. + to_ops.remove(node.name) + + self._register_input_name(new_input, node) + return is_replaced + + def replace_inputs(self, node, new_inputs): + """Replace node inputs.""" + assert isinstance(node, Node) and isinstance(new_inputs, list) + + for old_input in node.input: + to_ops = self._output_to_consumers.get(old_input, None) + if to_ops is not None and old_input in to_ops: + # To avoid issues when a node + # takes twice the same entry. + to_ops.remove(old_input) + + for input_name in new_inputs: + assert isinstance(input_name, six.text_type) + self._register_input_name(input_name, node) + + node.input = new_inputs + return True + + def _extract_sub_graph_nodes(self, dest_node, input_checker=None): + """Return nodes of subgraph ending with dest_node. + Args: + dest_node: output node of the subgraph to find + input_checker: customized input check function: bool func(node) + + Return: + a set of nodes + """ + res_set = set() + if not dest_node or (input_checker and input_checker(dest_node) is False): + return res_set + + processing_set = set([dest_node]) + while processing_set: + top_node = processing_set.pop() + res_set.add(top_node) + all_inputs = top_node.input + list(top_node.get_implicit_inputs()) + for input_id in all_inputs: + # we don't care about nested graph here, just handle current graph cropping. + node = self.get_node_by_output(input_id, search_in_parent_graphs=False) + if not node: + # some nodes (for example Scan) have optional inputs, which + # might have empty input. + # subgraph might have input defined in outer graph + continue + if node not in res_set: + if input_checker and input_checker(node) is False: + continue + processing_set.add(node) + return res_set + + def extract_sub_graph_nodes(self, outputs_name, input_checker=None, remove_unused_inputs=True): + """Return nodes of subgraph having output_ids as outputs. + Args: + output_ids: output node output id of the subgraph to find + input_checker: customized input check function: bool func(node) + remove_unused_inputs: bool, indicates whether unused placeholder inputs will be removed + in the resulting nodes. + Return: + a list of nodes + """ + res_set = set() + + outputs_to_keep = list(outputs_name) + if not remove_unused_inputs: + # add placeholder nodes even if they are not connected to outputs. + # placeholder nodes with defaults can have inputs themselves + outputs_to_keep += [inp.output[0] for inp in self.inputs] + + for output in outputs_to_keep: + node = self.get_node_by_output(output, search_in_parent_graphs=False) + res_set = res_set.union(self._extract_sub_graph_nodes(node, input_checker)) + + return list(res_set) + + def delete_unused_nodes(self, outputs_name): + """Delete nodes not in subgraph ending with output_names.""" + if not outputs_name: + logger.debug("Outputs not specified, delete_unused_nodes not taking effect.") + return + + # we need keep those placeholders that are used as input of Loop's body graph. + # some of them are not used in the graph, but still need be there to keep the graph complete. + related_nodes = self.extract_sub_graph_nodes(outputs_name, remove_unused_inputs=False) + for node in related_nodes: + attr_body_graphs = node.get_body_graphs() + if attr_body_graphs: + for body_graph in attr_body_graphs.values(): + body_graph.delete_unused_nodes(body_graph.outputs) + self.reset_nodes(related_nodes) + + def safe_to_remove_nodes(self, to_delete): + """ List of nodes that safe to delete (i.e. outputs not consumed by other nodes.)""" + safe_to_remove = [] + delete_set = set(to_delete) + for n in delete_set: + out_consumers = set() + for out in n.output: + out_consumers |= set(self.find_output_consumers(out)) + if out_consumers.issubset(delete_set): + safe_to_remove.append(n) + return safe_to_remove + + # TODO(tomwildenhain): Remove this function + def safe_remove_nodes(self, to_delete): + """Delete nodes in `to_delete` without third-party node consuming it.""" + delete_set = set(to_delete) + for n in delete_set: + out_consumers = set() + for out in n.output: + out_consumers |= set(self.find_output_consumers(out)) + if out_consumers.issubset(delete_set): + self.remove_node(n.name) + + def is_safe_to_remove_nodes(self, to_delete, outputs_to_ignore=None): + """Returns true if the outputs of all the nodes in to_delete have no third-party nodes consuming them.""" + delete_set = set(to_delete) + outputs_to_ignore_set = set(outputs_to_ignore or []) + for n in delete_set: + out_consumers = set() + for out in n.output: + if out in outputs_to_ignore_set: + continue + out_consumers |= set(self.find_output_consumers(out)) + if not out_consumers.issubset(delete_set): + return False + return True + + +class GraphUtil(object): + """Utilities for Graph manipulation.""" + + @staticmethod + def optimize_graph(graph, catch_errors=True, optimizers=None): + return optimizer.optimize_graph(graph, catch_errors, optimizers=optimizers) + + @staticmethod + def optimize_model_proto(onnx_model_proto, catch_errors=True, return_graph=False, + optimizers=None): + """Optimize the model proto, for example: eliminating all useless Transpose pairs. + + Returns: + model proto (and possibly graph) after optimization, if optimizer run successfully + or onnx_model_proto, if exceptions happens + """ + try: + kwargs = GraphUtil.get_onnx_model_properties(onnx_model_proto) + graph = GraphUtil.create_graph_from_onnx_model(onnx_model_proto) + graph = GraphUtil.optimize_graph(graph, catch_errors, optimizers=optimizers) + model_proto = graph.make_model(onnx_model_proto.graph.doc_string, + graph_name=onnx_model_proto.graph.name, **kwargs) + + if onnx_model_proto.metadata_props: + metadata_props = {p.key: p.value for p in onnx_model_proto.metadata_props} + helper.set_model_props(model_proto, metadata_props) + if return_graph: + return model_proto, graph + return model_proto + except Exception as e: + if not catch_errors: + raise e + # sometimes, onnx shape inference will fail for some reason, + # return onnx_model_proto for this case + logger.warning("Failed to optimize model proto", exc_info=1) + if return_graph: + return onnx_model_proto, None + return onnx_model_proto + + @staticmethod + def get_onnx_model_properties(onnx_model_proto): + """Get ModelProto properties.""" + kwargs = {} + if onnx_model_proto.HasField('ir_version'): + kwargs["ir_version"] = onnx_model_proto.ir_version + if onnx_model_proto.HasField('producer_name'): + kwargs["producer_name"] = onnx_model_proto.producer_name + if onnx_model_proto.HasField('producer_version'): + kwargs["producer_version"] = onnx_model_proto.producer_version + if onnx_model_proto.HasField('domain'): + kwargs["domain"] = onnx_model_proto.domain + if onnx_model_proto.HasField('model_version'): + kwargs["model_version"] = onnx_model_proto.model_version + if onnx_model_proto.HasField('doc_string'): + kwargs["doc_string"] = onnx_model_proto.doc_string + kwargs["opset_imports"] = onnx_model_proto.opset_import + + return kwargs + + @staticmethod + def create_graph_from_onnx_model(onnx_model_proto, target=None): + """Create Graph loading onnx model proto.""" + # apply shape inference on the model + inferred_model = shape_inference.infer_shapes(onnx_model_proto) + utils.initialize_name_counter(inferred_model) + graph_proto = inferred_model.graph + + opset_version = None + extra_opset = [] + for opset in onnx_model_proto.opset_import: + if not opset.domain: + # domain field is None or empty means it is onnx domain + opset_version = opset.version + else: + extra_opset.append(opset) + + utils.assert_error(opset_version is not None, "opset version is not specified for onnx domain") + main_graph = GraphUtil.create_graph_from_onnx_graph(graph_proto, opset_version, extra_opset, target) + return main_graph + + @staticmethod + def create_graph_from_onnx_graph(graph_proto, opset_version=None, extra_opset=None, target=None): + """Create Graph loading onnx graph proto.""" + output_shapes = {} + output_dtypes = {} + + shapes, dtypes = GraphUtil._parse_shape_and_type_from_value_infos(graph_proto.value_info) + output_shapes.update(shapes) + output_dtypes.update(dtypes) + + shapes, dtypes = GraphUtil._parse_shape_and_type_from_value_infos(graph_proto.output) + output_shapes.update(shapes) + output_dtypes.update(dtypes) + + nodes_to_append = [] + for n in graph_proto.node: + if n.op_type == "Constant": + n.op_type = "Const" + + # some pytorch model had empty names - make one up + if not n.name: + n.name = utils.set_name("was_empty") + nodes_to_append.append(n) + + output_names = [] + for n in graph_proto.output: + output_names.append(n.name) + + g = OnnxGraph(nodes_to_append, output_shapes, output_dtypes, target, opset_version, extra_opset, None, output_names) + const_nodes = GraphUtil._parse_graph_initializer(g, graph_proto) + GraphUtil._parse_graph_input(g, graph_proto, [n.name for n in const_nodes]) + + for n in g.get_nodes(): + for attr_name, attr_val in n.attr.items(): + if attr_val.HasField('g'): + # it was assumed that the a.g has inferred shapes/dtypes. + sub_g = GraphUtil.create_graph_from_onnx_graph(attr_val.g, opset_version, extra_opset) + n.set_body_graph_as_attr(attr_name, sub_g) + return g + + @staticmethod + def get_node_count_from_onnx_graph(graph_proto): + op_cnt = collections.Counter() + for n in graph_proto.node: + op_cnt[n.op_type] += 1 + return op_cnt + + @staticmethod + def _parse_shape_and_type_from_value_infos(value_infos): + """Get nodes output shapes and types from value infos.""" + output_shapes = {} + output_dtypes = {} + for shape_info in value_infos: + type_proto = shape_info.type + elem_type = type_proto.tensor_type.elem_type + output_dtypes[shape_info.name] = elem_type + if not type_proto.tensor_type.HasField("shape"): + output_shapes[shape_info.name] = None + continue + shape = type_proto.tensor_type.shape + tuned_shape = [] + for d in shape.dim: + if d.HasField('dim_param'): + tuned_shape.append(-1) + elif d.HasField('dim_value'): + tuned_shape.append(d.dim_value) + else: + # it is found, some unknown dims is missing after inference. + tuned_shape.append(-1) + output_shapes[shape_info.name] = tuned_shape + + return output_shapes, output_dtypes + + @staticmethod + def _parse_graph_initializer(g, graph_proto): + """Get graph initializers and put into Graph object.""" + const_nodes = [] + for initializer in graph_proto.initializer: + np_val = numpy_helper.to_array(initializer) + const_nodes.append(g.make_const(initializer.name, np_val)) + + return const_nodes + + @staticmethod + def _parse_graph_input(g, graph_proto, const_node_names): + """Get graph inputs not defined as initializers and put into Graph object.""" + shapes, dtypes = GraphUtil._parse_shape_and_type_from_value_infos(graph_proto.input) + # make sure the input is added in order we read from graph_proto, + # because for subgraphs, the input orders matter. + for graph_input in graph_proto.input: + name = graph_input.name + const_initializer_node = g.get_node_by_output_in_current_graph(name) + if const_initializer_node is None: # is actual input rather than initializer + shape = shapes[name] + dtype = dtypes[name] + if name not in const_node_names: + g.add_graph_input(name, dtype, shape) + else: + g.add_graph_input_with_default(name, g.get_node_by_name(name), dtype, shape) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py new file mode 100644 index 00000000000..bd97146a2fb --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py @@ -0,0 +1,432 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import copy +import logging +import numpy as np + +from onnx import helper, numpy_helper, AttributeProto, TensorProto +from .onnx_schema import get_schema +from . import tf2onnx_utils as utils + +logger = logging.getLogger("neural_compressor") + +class OnnxNode: + """A ONNX Node Wrapper use for graph manipulations.""" + + def __init__(self, node, graph, skip_conversion=False): + """Create ONNX Node. + Args: + node: Onnx node in NodeProto + graph: OnnxGraph + """ + self._op = node + self.graph = graph + self._input = list(node.input) + self._output = list(node.output) + self._attr = {} + + graph.set_node_by_name(self) + # dict to original attributes + for attr in node.attribute: + self._attr[attr.name] = attr + self._skip_conversion = skip_conversion + + @property + def input(self): + return self._input + + @input.setter + def input(self, val): + # The setter can catch that all inputs are change + # but it cannot catch that one input is changed. + # That's method replace_input and replace_inputs must + # be used to change inputs to let the graph instance + # update its internal indices. + self._input = copy.deepcopy(val) + + @property + def output(self): + return self._output + + @output.setter + def output(self, val): + """Set op output. Output should be updated explicitly, + changing it would require output mapping changed. + """ + self._graph_check() + for each_output in self._output: + del self.graph._output_to_node_name[each_output] + + self._output = val.copy() + for each_output in self._output: + self.graph._output_to_node_name[each_output] = self.name + + @property + def inputs(self): + """Input node objects.""" + self._graph_check() + val = [self.graph.get_node_by_output(n) for n in self._input] + return val + + @property + def attr(self): + return self._attr + + def get_value_attr(self, external_tensor_storage=None): + """Return onnx attr for value property of node. + Attr is modified to point to external tensor data stored in external_tensor_storage, if included. + """ + a = self._attr["value"] + if external_tensor_storage is not None and self in external_tensor_storage.node_to_modified_value_attr: + return external_tensor_storage.node_to_modified_value_attr[self] + if external_tensor_storage is None or a.type != AttributeProto.TENSOR: + return a + if np.product(a.t.dims) > external_tensor_storage.external_tensor_size_threshold: + a = copy.deepcopy(a) + tensor_name = self.name.strip() + "_" + str(external_tensor_storage.name_counter) + for c in '~"#%&*:<>?/\\{|}': + tensor_name = tensor_name.replace(c, '_') + external_tensor_storage.name_counter += 1 + external_tensor_storage.name_to_tensor_data[tensor_name] = a.t.raw_data + external_tensor_storage.node_to_modified_value_attr[self] = a + a.t.raw_data = b'' + a.t.ClearField("raw_data") + location = a.t.external_data.add() + location.key = "location" + location.value = tensor_name + a.t.data_location = TensorProto.EXTERNAL + return a + + def get_onnx_attrs(self, external_tensor_storage=None): + """Return onnx valid attributes. + Attrs point to external tensor data stored in external_tensor_storage, if included.""" + schema = get_schema(self.type, self.graph.opset, self.domain) + if schema is None and not (self.is_const() or self.is_graph_input()): + logger.debug("Node %s uses non-stardard onnx op <%s, %s>, skip attribute check", + self.name, self.domain, self.type) + onnx_attrs = {} + for a in self._attr.values(): + if a.name == "value": + onnx_attrs[a.name] = self.get_value_attr(external_tensor_storage) + elif schema is None or schema.has_attribute(a.name): + onnx_attrs[a.name] = a + return onnx_attrs + + @property + def name(self): + return self._op.name + + def child_name(self): + return utils.set_name(self.name) + + @property + def op(self): + """TODO: have a better interface for this.""" + return self._op + + @property + def type(self): + """Return Op type.""" + return self._op.op_type + + @type.setter + def type(self, val): + """Set Op type.""" + self._op.op_type = val + + @property + def domain(self): + """Return Op type.""" + return self._op.domain + + @domain.setter + def domain(self, val): + """Set Op type.""" + self._op.domain = val + + @property + def data_format(self): + """Return data_format.""" + attr_str = self.get_attr_value("data_format") + return "unkown" if attr_str is None else attr_str.decode("utf-8") + + @data_format.setter + def data_format(self, val): + """Set data_format.""" + self.set_attr("data_format", val) + + def is_nhwc(self): + """Return True if node is in NHWC format.""" + utils.assert_error('D' not in self.data_format, "is_nhwc called on %s with spatial=2 but data_format=%s", + self.name, self.data_format) + return self.data_format == "NHWC" + + def is_const(self): + """Return True if node is a constant.""" + return self.type in ["Const", "ConstV2"] + + def is_scalar(self): + """Return True if node is a constant with a scalar value.""" + if not self.is_const(): + return False + t = self.get_attr("value", default=None) + if t is None: + return False + t = numpy_helper.to_array(helper.get_attribute_value(t)) + return t.shape == tuple() + + def is_graph_input(self): + return self.type in ["Placeholder", "PlaceholderWithDefault", "PlaceholderV2"] + + def is_graph_input_default_const(self): + return self.is_const() and any( + out.is_graph_input() for out in self.graph.find_output_consumers(self.output[0]) + ) + + def is_while(self): + return self.type in ["While", "StatelessWhile", "Loop"] + + def __str__(self): + return str(self._op) + + def __repr__(self): + return "" % (self.type, self._op.name) + + @property + def summary(self): + """Return node summary information.""" + lines = [] + lines.append("OP={}".format(self.type)) + lines.append("Name={}".format(self.name)) + + g = self.graph + if self.input: + lines.append("Inputs:") + for name in self.input: + node = g.get_node_by_output(name) + op = node.type if node else "N/A" + lines.append("\t{}={}, {}, {}".format(name, op, g.get_shape(name), g.get_dtype(name))) + + if self.output: + for name in self.output: + lines.append("Outpus:") + lines.append("\t{}={}, {}".format(name, g.get_shape(name), g.get_dtype(name))) + + return '\n'.join(lines) + + def get_attr(self, name, default=None): + """Get raw attribute value.""" + attr = self.attr.get(name, default) + return attr + + def get_attr_value(self, name, default=None): + attr = self.get_attr(name) + if attr: + return helper.get_attribute_value(attr) + return default + + def get_attr_int(self, name): + """Get attribute value as int.""" + attr_int = self.get_attr_value(name) + utils.assert_error( + attr_int is not None and isinstance(attr_int, int), + "attribute %s is None", name + ) + return attr_int + + def get_attr_str(self, name, encoding="utf-8"): + """Get attribute value as string.""" + attr_str = self.get_attr_value(name) + utils.assert_error( + attr_str is not None and isinstance(attr_str, bytes), + "attribute %s is None", name + ) + return attr_str.decode(encoding) + + def set_attr(self, name, value): + self.attr[name] = helper.make_attribute(name, value) + + def set_attr_onnx(self, value): + self.attr[value.name] = value + + @property + def skip_conversion(self): + return self._skip_conversion + + @skip_conversion.setter + def skip_conversion(self, val): + self._skip_conversion = val + + # If some Node is created as onnx_node, then we don't need convert it + def need_skip(self): + return self._skip_conversion + + @property + def output_shapes(self): + """Get output shapes.""" + self._graph_check() + val = [self.graph.get_shape(n) for n in self._output] + return val + + @property + def output_dtypes(self): + """Get output dtypes.""" + self._graph_check() + val = [self.graph.get_dtype(n) for n in self._output] + return val + + def get_tensor_value(self, as_list=True): + """Get value for onnx tensor. + Args: + as_list: whether return numpy ndarray in list. + Returns: + If as_list=True, return the array as a (possibly nested) list. + Otherwise, return data of type np.ndarray. + + If a tensor is a scalar having value 1, + when as_list=False, return np.array(1), type is + when as_list=True, return 1, type is . + """ + if not self.is_const(): + raise ValueError("get tensor value: '{}' must be Const".format(self.name)) + + t = self.get_attr("value") + if t: + t = numpy_helper.to_array(helper.get_attribute_value(t)) + if as_list is True: + t = t.tolist() # t might be scalar after tolist() + return t + + def scalar_to_dim1(self): + """Get value for onnx tensor.""" + if not self.is_const(): + raise ValueError("get tensor value: {} must be Const".format(self.name)) + + t = self.get_attr("value") + if t: + t = helper.get_attribute_value(t) + if not t.dims: + t.dims.extend([1]) + return t.dims + + def set_tensor_value(self, new_val): + """Set new value for existing onnx tensor. + Args: + new_val: value of type numpy ndarray + """ + if not self.is_const(): + raise ValueError("set tensor value: {} must be Const".format(self.name)) + t = self.get_attr("value") + if not t: + raise ValueError("set tensor value: {} is None".format(self.name)) + t = helper.get_attribute_value(t) + onnx_tensor = numpy_helper.from_array(new_val, t.name) + del t + self.set_attr("value", onnx_tensor) + # track shapes in _output_shapes + self._graph_check() + self.graph.set_shape(onnx_tensor.name, list(onnx_tensor.dims)) + + def get_body_graphs(self): + self._graph_check() + return self.graph.contained_graphs.get(self.name, None) + + def set_body_graph_as_attr(self, attr_name, graph): + self._graph_check() + if self.name not in self.graph.contained_graphs: + self.graph.contained_graphs[self.name] = {} + + self.graph.contained_graphs[self.name].update({attr_name: graph}) + graph.parent_graph = self.graph + + def update_proto(self, external_tensor_storage=None): + """Update protobuf from internal structure.""" + nodes = list(self._op.input) + for node in nodes: + self._op.input.remove(node) + self._op.input.extend(self.input) + nodes = list(self._op.output) + for node in nodes: + self._op.output.remove(node) + self._op.output.extend(self.output) + + # update attributes to proto + del self._op.attribute[:] + + # check attribute of type GraphProto + attr_graphs = self.get_body_graphs() + if attr_graphs: + for attr_name, sub_graph in attr_graphs.items(): + graph_proto = sub_graph.make_graph("graph for " + self.name + " " + attr_name, + external_tensor_storage=external_tensor_storage) + self.set_attr(attr_name, graph_proto) + + attr = list(self.get_onnx_attrs(external_tensor_storage).values()) + if attr: + self._op.attribute.extend(attr) + + def get_implicit_inputs(self, recursive=True): + """Get implicit inputs if the node has attributes being GraphProto.""" + output_available_in_cur_graph = set() + all_node_inputs = set() + + graphs = [] + body_graphs = self.get_body_graphs() + if body_graphs: + graphs.extend(body_graphs.values()) + + while graphs: + graph = graphs.pop() + for n in graph.get_nodes(): + output_available_in_cur_graph |= set(n.output) + for i in n.input: + all_node_inputs.add(i) + + if recursive: + b_graphs = n.get_body_graphs() + if b_graphs: + graphs.extend(b_graphs.values()) + + outer_scope_node_input_ids = all_node_inputs - output_available_in_cur_graph + return list(outer_scope_node_input_ids) + + def _graph_check(self): + utils.assert_error(self.graph is not None, "Node %s not belonging any graph", + self.name) + + def maybe_cast_input(self, supported, type_map): + """.maybe_cast_input + Args: + supported: list of supported types for inputs + type_map: dict type to supported type mapping + """ + did_cast = False + for i, name in enumerate(self.input): + dtype = self.graph.get_dtype(name) + if dtype not in supported[i]: + tdtype = type_map.get(dtype) + if tdtype is None: + raise RuntimeError("don't know how to cast type {} on node {}".format(dtype, name)) + shape = self.graph.get_shape(name) + cast_node = self.graph.insert_new_node_on_input( + self, "Cast", name, to=tdtype) + self.graph.set_dtype(cast_node.output[0], tdtype) + self.graph.set_shape(cast_node.output[0], shape) + did_cast = True + return did_cast diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py new file mode 100644 index 00000000000..895600432ee --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py @@ -0,0 +1,123 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import copy +from collections import defaultdict, OrderedDict +from onnx import defs, helper, TensorProto, OperatorSetIdProto, shape_inference + +from . import tf2onnx_utils as utils + +logger = logging.getLogger("neural_compressor") + + +class OnnxOpSchema(object): + """Wrapper for Onnx schema.""" + + def __init__(self, name, domain, since_version, attributes): + """Create a Onnx schema + Args: + name (str): op name + attributes (List[str]): valid attributes + domain (str): default value "" means it's Onnx domain + since_version (int): opset version, default is 1 + """ + self._name = name + self._domain = domain + self._attributes = attributes + self._since_version = since_version + + @property + def attributes(self): + return self._attributes + + @property + def domain(self): + return self._domain + + @property + def name(self): + return self._name + + @property + def since_version(self): + return self._since_version + + @staticmethod + def from_onnx_schema(onnx_schema): + name = onnx_schema.name + domain = onnx_schema.domain + since_version = int(onnx_schema.since_version) + attributes = onnx_schema.attributes + return OnnxOpSchema(name, domain, since_version, attributes) + + def has_attribute(self, attr): + return attr in self.attributes + + +def _register_all_schemas_with_history(): + """Register all schemas with history""" + onnx_schemas = defs.get_all_schemas_with_history() + name_domain_version_schema_map = defaultdict(lambda: defaultdict(dict)) + for s in onnx_schemas: + schema = OnnxOpSchema.from_onnx_schema(s) + name_domain_version_schema_map[schema.name][schema.domain][schema.since_version] = schema + + ordered_map = defaultdict(lambda: defaultdict(OrderedDict)) + for name, domain_version_schema_map in name_domain_version_schema_map.items(): + for domain, version_schema_map in domain_version_schema_map.items(): + ordered_map[name][domain] = OrderedDict( + sorted(version_schema_map.items(), key=lambda x: -x[0]) + ) + return ordered_map + + +def _parse_domain_opset_versions(schemas): + """ Get max opset version among all schemas within each domain. """ + domain_opset_versions = dict() + for domain_version_schema_map in schemas.values(): + for domain, version_schema_map in domain_version_schema_map.items(): + # version_schema_map is sorted by since_version in descend order + max_version = next(iter(version_schema_map)) + if domain not in domain_opset_versions: + domain_opset_versions[domain] = int(max_version) + else: + domain_opset_versions[domain] = max(domain_opset_versions[domain], int(max_version)) + return domain_opset_versions + + +# format is >> +# SinceVersion is sorted from high to low +_schemas = _register_all_schemas_with_history() + +_domain_opset_versions = _parse_domain_opset_versions(_schemas) + +def get_schema(name, max_inclusive_opset_version, domain=None): + """Get schema by name within specific version.""" + domain = domain or utils.ONNX_DOMAIN + domain_version_schema_map = _schemas[name] + version_schema_map = domain_version_schema_map[domain] + for version, schema in version_schema_map.items(): + if version <= max_inclusive_opset_version: + return schema + return None + +def get_max_supported_opset_version(domain=None): + """Get max supported opset version by current onnx package given a domain.""" + domain = domain or utils.ONNX_DOMAIN + return _domain_opset_versions.get(domain, None) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/qdq_rewriter.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/qdq_rewriter.py new file mode 100644 index 00000000000..75e2940fa81 --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/qdq_rewriter.py @@ -0,0 +1,176 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +from onnx import TensorProto, helper +from . import tf2onnx_utils as utils + +# pylint: disable=missing-docstring + +def extract_numpy_array(node): + return np.frombuffer(node.attr["value"].t.raw_data, dtype="float32") + +def convert_qdq_nodes(onnx_graph, match_results): + + for match in match_results: + qdq_node = match.get_op('output') + qdq_node_output_dtype = onnx_graph.get_dtype(qdq_node.output[0]) + qdq_node_output_shape = onnx_graph.get_shape(qdq_node.output[0]) + + # Get the attributes of qdq node + narrow_range = qdq_node.attr['narrow_range'].i + signed_input = qdq_node.attr['signed_input'].i + range_given = qdq_node.get_attr_value("range_given", qdq_node.type != "QuantizeAndDequantizeV2" or \ + qdq_node.type != "QuantizeAndDequantizeV4") + + min_quantized, max_quantized = [-127, 127] + if not narrow_range and signed_input: + min_quantized = -128 + + if not signed_input: + min_quantized, max_quantized = [0, 255] + + # Get axis attribute for per channel implementation. + axis = qdq_node.get_attr_value('axis', -1) + q_attrs = {} + + quantized_np_dtype = np.int8 if signed_input else np.uint8 + quantized_dtype = TensorProto.INT8 if signed_input else TensorProto.UINT8 + + if axis != -1: + utils.assert_error(onnx_graph.opset >= 13, "Opset >= 13 is required for per channel quantization") + q_attrs['axis'] = axis + + if not range_given: + min_np = np.array(min_quantized, np.float32) + max_np = np.array(max_quantized, np.float32) + max_quantized_const = onnx_graph.make_const(utils.set_name("max_const"), max_np).output[0] + if signed_input: + min_quantized_const = onnx_graph.make_const(utils.set_name("min_const"), min_np).output[0] + reduce_attr = {'keepdims': 0} + if axis != -1: + inp_rank = onnx_graph.get_rank(qdq_node.input[0]) + utils.assert_error(inp_rank is not None, "Input rank cannot be unknown for qdq op %s", qdq_node.name) + reduce_axes = [i for i in range(inp_rank) if i != axis] + reduce_attr['axes'] = reduce_axes + + max_value = onnx_graph.make_node("ReduceMax", [qdq_node.input[0]], attr=reduce_attr).output[0] + if signed_input: + min_value = onnx_graph.make_node("ReduceMin", [qdq_node.input[0]], attr=reduce_attr).output[0] + + scale_from_max_side = onnx_graph.make_node("Div", [max_value, max_quantized_const]).output[0] + if signed_input: + scale_from_min_side = onnx_graph.make_node("Div", [min_value, min_quantized_const]).output[0] + scale = onnx_graph.make_node("Max", [scale_from_min_side, scale_from_max_side]).output[0] + else: + scale = scale_from_max_side + + if axis == -1: + zero_point_np = np.zeros([], dtype=quantized_np_dtype) + zero_point = onnx_graph.make_const(utils.set_name("zero_point"), zero_point_np).output[0] + else: + zero_tensor = helper.make_tensor("value", quantized_dtype, dims=[1], vals=[0]) + scale_shape = onnx_graph.make_node("Shape", [scale]).output[0] + zero_point = onnx_graph.make_node("ConstantOfShape", inputs=[scale_shape], attr={"value": zero_tensor}).output[0] + else: + # Get the min and max value of the inputs to QDQ op + min_value = extract_numpy_array(qdq_node.inputs[1]) + max_value = extract_numpy_array(qdq_node.inputs[2]) + + num_channels = min_value.shape[0] + scales = np.zeros(num_channels, dtype=np.float32) + + for i in range(num_channels): + # Calculate scales from the min and max values + scale_from_min_side = min_value[i] / min_quantized if min_quantized < 0 else 0 + scale_from_max_side = max_value[i] / max_quantized if max_quantized > 0 else 0 + + if scale_from_min_side > scale_from_max_side: + scale = scale_from_min_side + else: + scale = scale_from_max_side + + utils.assert_error(scale > 0, "Quantize/Dequantize scale must be greater than zero") + scales[i] = np.float32(scale) + + # Set scalars for scale and zero point for per layer quantization + if num_channels == 1: + scales = scales[0] + zero_point_np = np.zeros([], dtype=quantized_np_dtype) + else: + utils.assert_error(axis != -1, "Axis must be specified for per channel quantization") + zero_point_np = np.zeros([num_channels], dtype=quantized_np_dtype) + + # Split it into QuantizeLinear and DequantizeLinear and remove the QDQ node reference + cast_scale = scales.astype(np.float32) + scale = onnx_graph.make_const(name=utils.set_name("quant_scale"), np_val=cast_scale).output[0] + zero_point = onnx_graph.make_const(utils.set_name("zero_point"), zero_point_np).output[0] + + quant_node = onnx_graph.make_node(op_type="QuantizeLinear", + inputs=[qdq_node.input[0], scale, zero_point], + shapes=[qdq_node_output_shape], + attr=q_attrs, + dtypes=[quantized_dtype], + name=utils.set_name("QuantLinearNode")) + + onnx_graph.set_shape(quant_node.output[0], qdq_node_output_shape) + + onnx_graph.remove_node(qdq_node.name) + + dequant_node = onnx_graph.make_node(op_type="DequantizeLinear", + inputs=[quant_node.output[0], scale, zero_point], + outputs=[qdq_node.output[0]], + shapes=[qdq_node_output_shape], + attr=q_attrs, + dtypes=[qdq_node_output_dtype], + name=utils.set_name("DequantLinearNode")) + onnx_graph.set_shape(dequant_node.output[0], qdq_node_output_shape) + + return onnx_graph.get_nodes() + +def rewrite_quantize_and_dequantize(g, ops): + + pattern_for_qdq_v2 = \ + OpTypePattern('QuantizeAndDequantizeV2', name='output', inputs=[ + OpTypePattern("*"), + OpTypePattern(None), + OpTypePattern(None), + ]) + pattern_for_qdq_v3 = \ + OpTypePattern('QuantizeAndDequantizeV3', name='output', inputs=[ + OpTypePattern("*"), + OpTypePattern(None), + OpTypePattern(None), + OpTypePattern(None), + ]) + pattern_for_qdq_v4 = \ + OpTypePattern('QuantizeAndDequantizeV4', name='output', inputs=[ + OpTypePattern("*"), + OpTypePattern(None), + OpTypePattern(None), + ]) + + # Match all the patterns for QDQ ops + patterns = [pattern_for_qdq_v2, pattern_for_qdq_v3, pattern_for_qdq_v4] + match_results = [] + for pattern in patterns: + matcher = GraphMatcher(pattern) + results = list(matcher.match_ops(ops)) + match_results.extend(results) + + return create_qdq_nodes(g, match_results) \ No newline at end of file diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py new file mode 100644 index 00000000000..721f1a32c2c --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py @@ -0,0 +1,401 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import copy +import logging +import re +import tensorflow as tf +from google.protobuf import text_format +import numpy as np +from tensorflow.core.framework import types_pb2, tensor_pb2, graph_pb2 +from tensorflow.python.framework import tensor_util +from onnx import helper, onnx_pb, numpy_helper, defs, TensorProto, OperatorSetIdProto, shape_inference + +logger = logging.getLogger("neural_compressor") + +DEFAULT_OPSET_VERSION = 14 + +PREFERRED_OPSET = 14 + +ONNX_UNKNOWN_DIMENSION = -1 + + +# Built-in supported domains +ONNX_DOMAIN = "" +AI_ONNX_ML_DOMAIN = "ai.onnx.ml" + +# Built-in supported opset +AI_ONNX_ML_OPSET = helper.make_opsetid(AI_ONNX_ML_DOMAIN, 2) + +ONNX_EMPTY_INPUT = "" + +# ignore the following attributes +TF2ONNX_IGNORED_NODE_ATTRS = { + "T", "unknown_rank", "_class", "Tshape", "use_cudnn_on_gpu", "Index", "Tpaddings", + "TI", "Tparams", "Tindices", "Tlen", "Tdim", "Tin", "dynamic_size", "Tmultiples", + "Tblock_shape", "Tcrops", "index_type", "Taxis", "U", "maxval", + "Tout", "Tlabels", "Tindex", "element_shape", "Targmax", "Tperm", "Tcond", + "T_threshold", "shape_type", "_lower_using_switch_merge", + "parallel_iterations", "_num_original_outputs", "output_types", "output_shapes", + "key_dtype", "value_dtype" "capacity", "component_types", "shapes", "SrcT", "Treal", + "Toutput_types", "dense_shapes", "Tdense", "Tsegmentids", "Tshift", "Tnumsegments" +} + +TF2ONNX_SUBGRAPH_ATTRS = { + "body", "cond", "then_branch", "else_branch", "f" +} + +TF2ONNX_DTYPE_MAP = { + types_pb2.DT_FLOAT: onnx_pb.TensorProto.FLOAT, + types_pb2.DT_DOUBLE: onnx_pb.TensorProto.DOUBLE, + types_pb2.DT_HALF: onnx_pb.TensorProto.FLOAT16, + types_pb2.DT_BFLOAT16: onnx_pb.TensorProto.FLOAT16, + types_pb2.DT_INT8: onnx_pb.TensorProto.INT8, + types_pb2.DT_INT16: onnx_pb.TensorProto.INT16, + types_pb2.DT_INT32: onnx_pb.TensorProto.INT32, + types_pb2.DT_UINT8: onnx_pb.TensorProto.UINT8, + types_pb2.DT_QUINT8: onnx_pb.TensorProto.UINT8, + types_pb2.DT_UINT16: onnx_pb.TensorProto.UINT16, + types_pb2.DT_UINT32: onnx_pb.TensorProto.UINT32, + types_pb2.DT_UINT64: onnx_pb.TensorProto.UINT64, + types_pb2.DT_INT64: onnx_pb.TensorProto.INT64, + types_pb2.DT_STRING: onnx_pb.TensorProto.STRING, + types_pb2.DT_COMPLEX64: onnx_pb.TensorProto.COMPLEX64, + types_pb2.DT_COMPLEX128: onnx_pb.TensorProto.COMPLEX128, + types_pb2.DT_BOOL: onnx_pb.TensorProto.BOOL, + types_pb2.DT_RESOURCE: onnx_pb.TensorProto.INT64, + types_pb2.DT_VARIANT: onnx_pb.TensorProto.UNDEFINED +} + + +# +# mapping dtypes from onnx to numpy +# +ONNX_TO_NUMPY_DTYPE = { + onnx_pb.TensorProto.FLOAT: np.float32, + onnx_pb.TensorProto.FLOAT16: np.float16, + onnx_pb.TensorProto.DOUBLE: np.float64, + onnx_pb.TensorProto.INT32: np.int32, + onnx_pb.TensorProto.INT16: np.int16, + onnx_pb.TensorProto.INT8: np.int8, + onnx_pb.TensorProto.UINT8: np.uint8, + onnx_pb.TensorProto.UINT16: np.uint16, + onnx_pb.TensorProto.UINT32: np.uint32, + onnx_pb.TensorProto.UINT64: np.uint64, + onnx_pb.TensorProto.INT64: np.int64, + onnx_pb.TensorProto.BOOL: bool, + onnx_pb.TensorProto.COMPLEX64: np.complex64, + onnx_pb.TensorProto.COMPLEX128: np.complex128, + onnx_pb.TensorProto.STRING: object, +} + +# Mapping opset to IR version. +# Note: opset 7 and opset 8 came out with IR3 but we need IR4 because of PlaceholderWithDefault +# Refer from https://github.com/onnx/onnx/blob/main/docs/Versioning.md#released-versions +OPSET_TO_IR_VERSION = { + 1: 3, 2: 3, 3: 3, 4: 3, 5: 3, 6: 3, 7: 4, 8: 4, 9: 4, 10: 5, 11: 6, 12: 7, 13: 7, 14: 7, 15: 8, 16: 8, 17: 8 +} + + +DEFAULT_TARGET = [] + +INSERTED_OP_NAME = 1 +def set_name(name): + """Set op name for inserted ops.""" + global INSERTED_OP_NAME + INSERTED_OP_NAME += 1 + return "{}__{}".format(name, INSERTED_OP_NAME) + +def find_opset(opset): + """Find opset.""" + if opset is None or opset == 0: + opset = defs.onnx_opset_version() + if opset > PREFERRED_OPSET: + # if we use a newer onnx opset than most runtimes support, default to the one most supported + opset = PREFERRED_OPSET + return opset + +def assert_error(bool_val, error_msg, *args): + """Raise error message.""" + if not bool_val: + raise ValueError("Assert failure: " + error_msg % args) + +def map_numpy_to_onnx_dtype(np_dtype): + """Map numpy dtype to ONNX dtype""" + for onnx_dtype, numpy_dtype in ONNX_TO_NUMPY_DTYPE.items(): + if numpy_dtype == np_dtype: + return onnx_dtype + raise ValueError("unsupported numpy dtype '%s' for mapping to onnx" % np_dtype) + +def add_port_to_name(name, nr=0): + """Map node output number to name.""" + return name + ":" + str(nr) + +def get_tensorflow_node_attr(node, name): + """Parse tensorflow node attribute.""" + return node.get_attr(name) + +def get_tensorflow_tensor_shape(tensor): + """Get shape from tensorflow tensor.""" + shape = [] + try: + shape = tensor.get_shape().as_list() + except Exception: # pylint: disable=broad-except + shape = None + return shape + +def get_tensorflow_node_shape_attr(node): + """Get shape from tensorflow attr "shape".""" + dims = None + try: + shape = get_tensorflow_node_attr(node, "shape") + if not shape.unknown_rank: + dims = [int(d.size) for d in shape.dim] + except: # pylint: disable=bare-except + pass + return dims + +def map_tensorflow_dtype(dtype): + """Convert tensorflow dtype to ONNX.""" + if dtype: + dtype = TF2ONNX_DTYPE_MAP[dtype] + return dtype + +def get_tensorflow_tensor_data(tensor): + """Get data from tensorflow tensor.""" + if not isinstance(tensor, tensor_pb2.TensorProto): + raise ValueError("Require the tensor is instance of TensorProto") + np_data = tensor_util.MakeNdarray(tensor) + if not isinstance(np_data, np.ndarray): + raise ValueError("np_data=", np_data, " isn't ndarray") + return np_data + +def convert_tensorflow_tensor_to_onnx(tensor, name=""): + """Convert tensorflow tensor to onnx tensor.""" + np_data = get_tensorflow_tensor_data(tensor) + if np_data.dtype == object: + # assume np_data is string, numpy_helper.from_array accepts ndarray, + # in which each item is of str while the whole dtype is of object. + try: + # Faster but fails on Unicode + np_data = np_data.astype(np.str).astype(object) + except UnicodeDecodeError: + decode = np.vectorize(lambda x: x.decode('UTF-8')) + np_data = decode(np_data).astype(object) + except: # pylint: disable=bare-except + raise RuntimeError("Not support type: {}".format(type(np_data.flat[0]))) + return numpy_helper.from_array(np_data, name=name) + +def read_tensorflow_node_attrs(node): + """Read tensorflow node attribute names.""" + attr = {} + + for each_attr in node.node_def.attr: + value = get_tensorflow_node_attr(node, each_attr) + if each_attr in TF2ONNX_IGNORED_NODE_ATTRS or each_attr in TF2ONNX_SUBGRAPH_ATTRS or \ + isinstance(value, tensor_pb2.TensorProto): + pass + elif each_attr == "shape": + shape = get_tensorflow_node_shape_attr(node) + if shape is not None: + attr[each_attr] = shape + elif each_attr == "DstT": + attr["to"] = map_tensorflow_dtype(value) + elif isinstance(value, tf.DType): + attr[each_attr] = map_tensorflow_dtype(value) + elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], tf.DType): + attr[each_attr] = [map_tensorflow_dtype(v) for v in value] + else: + attr[each_attr] = get_tensorflow_node_attr(node, each_attr) + + return attr + +def infer_onnx_shape_dtype(node, opset_version, input_shapes, input_dtypes, initializers=None): + """ + Infer shapes and dtypes for outputs of the node. + Sometimes, shape inference needs the values of node's inputs, so initializers are used. + """ + + def build_onnx_op(node): + """Build onnx op""" + onnx_node = helper.make_node(node.type, node.input, node.output, name=node.name) + # deal with attributes + attr = [] + attr_graphs = node.get_body_graphs() + if attr_graphs: + for attr_name, sub_graph in attr_graphs.items(): + copied_sub_graph = copy.deepcopy(sub_graph) + graph_proto = copied_sub_graph.make_graph("graph for " + node.name + " " + attr_name) + attr.append(helper.make_attribute(attr_name, graph_proto)) + attr.extend(node.get_onnx_attrs().values()) + if attr: + onnx_node.attribute.extend(attr) + return onnx_node + + inputs = [] + outputs = [] + for inp, shape, dtype in zip(node.input, input_shapes, input_dtypes): + inputs.append(utils.make_onnx_inputs_outputs(inp, dtype, shape)) + for output in node.output: + outputs.append(utils.make_onnx_inputs_outputs(output, TensorProto.UNDEFINED, None)) + graph_proto = helper.make_graph([build_onnx_op(node)], "infer-graph", inputs, outputs, initializer=initializers) + imp = OperatorSetIdProto() + imp.version = opset_version + model_proto = helper.make_model(graph_proto, opset_imports=[imp]) + + inferred_model = None + try: + try: + inferred_model = shape_inference.infer_shapes(model_proto, strict_mode=True) + except TypeError: + # strict_mode arg doesn't exist in old onnx packages + inferred_model = shape_inference.infer_shapes(model_proto) + except Exception: # pylint: disable=broad-except + logger.warning( + "ONNX Failed to infer shapes and dtypes for [%s, type: %s]", + node.name, node.type, exc_info=1 + ) + return None, None + + shapes = {} + dtypes = {} + for output in inferred_model.graph.output: + tensor_type = output.type.tensor_type + if tensor_type.HasField("elem_type"): + dtypes[output.name] = tensor_type.elem_type + else: + dtypes[output.name] = TensorProto.UNDEFINED + # Missing dim_value in shapes of onnx means unknown which is -1 in our convertor + if tensor_type.HasField("shape"): + shapes[output.name] = [ + dim.dim_value if dim.HasField("dim_value") else -1 for dim in tensor_type.shape.dim + ] + else: + shapes[output.name] = None + output_shapes = [] + output_dtypes = [] + for output in node.output: + if output in shapes: + output_shapes.append(shapes[output]) + else: + output_shapes.append(None) + if output in dtypes: + output_dtypes.append(dtypes[output]) + else: + output_dtypes.append(TensorProto.UNDEFINED) + return output_shapes, output_dtypes + +def make_onnx_shape(shape): + """shape with -1 is not valid in onnx ... make it a name.""" + if shape: + # don't do this if input is a scalar + return [set_name("unk") if i == -1 else i for i in shape] + return shape + +class SeqType: + """Wrap around TensorProto.* to signify a tensor sequence of a given type""" + def __init__(self, tensor_dtype): + self.dtype = tensor_dtype + + def __eq__(self, other): + if isinstance(other, SeqType): + return self.dtype == other.dtype + return NotImplemented + + def __repr__(self): + return "SeqType(%r)" % self.dtype + +def make_onnx_inputs_outputs(name, elem_type, shape, **kwargs): + """Wrapper for creating onnx graph inputs or outputs + name, # type: Text + elem_type, # type: TensorProto.DataType + shape, # type: Optional[Sequence[int]] + """ + if elem_type is None: + elem_type = onnx_pb.TensorProto.UNDEFINED + elif isinstance(elem_type, SeqType): + return helper.make_tensor_sequence_value_info(name, elem_type.dtype, make_onnx_shape(shape), **kwargs) + return helper.make_tensor_value_info( + name, + elem_type, + make_onnx_shape(shape), + **kwargs + ) + +def save_protobuf(path, message, as_text=False): + """Save ONNX protobuf file.""" + dir_name = os.path.dirname(path) + if dir_name: + os.makedirs(dir_name, exist_ok=True) + if as_text: + with open(path, "w") as f: + f.write(text_format.MessageToString(message)) + else: + with open(path, "wb") as f: + f.write(message.SerializeToString()) + +def is_onnx_domain(domain): + """Check if it's onnx domain.""" + if domain is None or domain == "": + return True + return False + +def is_list_or_tuple(obj): + """Check the object is list or tuple.""" + return isinstance(obj, (list, tuple)) + +def are_shapes_equal(src, dest): + """ Check whether 2 shapes are equal. """ + if src is None: + return dest is None + if dest is None: + return src is None + + assert_error(is_list_or_tuple(src), "invalid type for src") + assert_error(is_list_or_tuple(dest), "invalid type for dest") + + if len(src) != len(dest): + return False + return all(i == j for i, j in zip(src, dest)) + +def get_subgraphs_from_onnx(model_proto): + """Returns an iterator over the graphs/subgraphs of a model (using dfs)""" + stack = [model_proto.graph] + while stack: + g = stack.pop() + yield g + for node in g.node: + for attr in node.attribute: + if hasattr(attr, "g"): + stack.append(attr.g) + if hasattr(attr, "graphs"): + stack.extend(attr.graphs) + +def initialize_name_counter(model_proto): + """Avoid name conflicts by initializing the counter used by make_name based on the provided model""" + suffix_regex = re.compile(r"__(\d+)(:\d+)?$") + def avoid_name(name): + global INSERTED_OP_NAME + suffix = suffix_regex.search(name) + if suffix: + INSERTED_OP_NAME = max(INSERTED_OP_NAME, int(suffix.group(1)) + 1) + for g in get_subgraphs_from_onnx(model_proto): + for n in g.node: + avoid_name(n.name) + for out in n.output: + avoid_name(out) diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py new file mode 100644 index 00000000000..847b7979a0b --- /dev/null +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -0,0 +1,122 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import logging +import tensorflow as tf +from onnx import helper +from tensorflow.core.framework import tensor_pb2 + +from neural_compressor.utils.utility import dump_elapsed_time +from .graph_rewriter.onnx import tf2onnx_utils as utils +from .graph_rewriter.onnx.onnx_graph import OnnxGraph + + +logger = logging.getLogger("neural_compressor") + +class TensorflowQDQToOnnxQDQConverter: + """Convert tensorflow QDQ graph to ONNX QDQ graph.""" + def __init__(self, model, input_names, output_names, opset_version=utils.DEFAULT_OPSET_VERSION): + """constructor + + Args: + model (graphdef): tensorflow QDQ graphdef + """ + graph = tf.Graph() + with graph.as_default(): + tf.import_graph_def(model) + + self.graph = graph + self.opset_version = opset_version + self.input_names = input_names + self.output_names = output_names + + @dump_elapsed_time("Pass TensorflowQDQToOnnxQDQConverter") + def convert(self, save_path): + """ convert tensorflow QDQ model to onnx QDQ model + + Args: + input_graph_def (graphdef): tensorflow QDQ graphdef object + + Returns: + onnx QDQ graph + """ + onnx_nodes = [] + output_shapes = {} + dtypes = {} + functions = {} + logger.info("Using ONNX opset %s", self.opset_version) + + node_list = self.graph.get_operations() + + # create dict with output to shape mappings + for node in node_list: + for out in node.outputs: + shape = utils.get_tensorflow_tensor_shape(out) + dtypes[out.name] = utils.map_tensorflow_dtype(out.dtype) + output_shapes[out.name] = shape + + # Convert the TF FP32 node to ONNX FP32 node + for node in node_list: + attr_dict = utils.read_tensorflow_node_attrs(node) + convert_to_onnx = True + for each_attr in node.node_def.attr: + value = utils.get_tensorflow_node_attr(node, each_attr) + if each_attr == "T": + if value and not isinstance(value, list): + dtypes[node.name] = utils.map_tensorflow_dtype(value) + elif each_attr in utils.TF2ONNX_SUBGRAPH_ATTRS: + input_shapes = [input.get_shape() for input in node.inputs] + nattr = utils.get_tensorflow_node_attr(node, each_attr) + attr_dict[each_attr] = nattr.name + functions[nattr.name] = input_shapes + elif isinstance(value, tensor_pb2.TensorProto): + onnx_tensor = utils.convert_tensorflow_tensor_to_onnx( + value, name=utils.add_port_to_name(node.name)) + attr_dict[each_attr] = onnx_tensor + node_type = node.type + node_input_names = [i.name for i in node.inputs] + node_output_names = [i.name for i in node.outputs] + + if convert_to_onnx: + try: + onnx_node = helper.make_node(node_type, node_input_names, node_output_names, + name=node.name, **attr_dict) + onnx_nodes.append(onnx_node) + except Exception as ex: + logger.error("tf2onnx node convert failed for %s, ex=%s", node.name, ex) + raise + + # Build ONNX Graph using onnx_nodes, output_shapes and dtypes + onnx_graph = OnnxGraph(onnx_nodes, output_shapes, dtypes) + + # Convert QDQ pattern and Insert them into the ONNX Graph + + + + + # + + + # Build ONNX model + model_proto = onnx_graph.make_model("converted from neural compressor") + # Save ONNX model + utils.save_protobuf(save_path, model_proto) + + logger.info("Successfully converted TensorFlow model to ONNX") + logger.info("Model inputs: %s", [n.name for n in model_proto.graph.input]) + logger.info("Model outputs: %s", [n.name for n in model_proto.graph.output]) + diff --git a/neural_compressor/model/base_model.py b/neural_compressor/model/base_model.py index 029723ad821..9d94877cd7f 100644 --- a/neural_compressor/model/base_model.py +++ b/neural_compressor/model/base_model.py @@ -42,7 +42,6 @@ def save(self, root, *args, **kwargs): def export( self, save_path: str, - input, target_model_type: str = 'ONNX', quant_format: str = 'QDQ', opset_version: int = 14, diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py index 13629a19038..8075befddc5 100644 --- a/neural_compressor/model/model.py +++ b/neural_compressor/model/model.py @@ -954,6 +954,33 @@ def convert(self, src_type="QDQ", dst_type="TFDO", *args, **kwargs): ''' abstract method of model saving, Tensorflow model only''' raise NotImplementedError + def export( + self, + save_path: str, + target_model_type: str = 'ONNX', + quant_format: str = 'QDQ', + opset_version: int = 14, + *args, + **kwargs + ): + if target_model_type != 'ONNX': + logger.warning("We only support target model type ONNX currently.") + sys.exit(0) + + if quant_format == 'QDQ' and opset_version < 13: # pragma: no cover + opset_version = 13 + logger.warning("QDQ format requires opset_version >= 13, " + + "we reset opset_version={} here".format(opset_version)) + + from neural_compressor.adaptor.tf_utils.tf2onnx_converter import TensorflowQDQToOnnxQDQConverter + TensorflowQDQToOnnxQDQConverter(self.graph_def, self.input_tensor_names, \ + self.output_tensor_names, opset_version).convert(save_path) + + info = "The QDQ ONNX Model is exported to path: {0}".format(save_path) + logger.info("*"*len(info)) + logger.info(info) + logger.info("*"*len(info)) + class TensorflowSavedModelModel(TensorflowBaseModel): def get_all_weight_names(self): diff --git a/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py b/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py new file mode 100644 index 00000000000..718f6f825ee --- /dev/null +++ b/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py @@ -0,0 +1,98 @@ +# +# -*- coding: utf-8 -*- +# +import unittest +import os +import shutil +import yaml + +from neural_compressor.adaptor.tf_utils.util import disable_random +from neural_compressor.experimental import Quantization, common + +from neural_compressor.adaptor.tf_utils.util import version1_lt_version2, version1_gte_version2 + +import tensorflow as tf +from tensorflow.python.framework import graph_util + +def build_fake_yaml(fake_yaml, save_path, **kwargs): + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open(file=save_path, mode=kwargs['mode'], encoding=kwargs['encoding']) as f: + yaml.dump(y, f) + +@unittest.skipIf(tf.version.VERSION.find('up') == -1 and tf.version.VERSION < '2.0', "Only supports tf 1.15.up2/up3 and 2.x") +class TestConvertTensorflowQDQToOnnxQDQ(unittest.TestCase): + @classmethod + def setUpClass(self): + fake_yaml = ''' + model: + name: fake_model_cpu + framework: tensorflow_itex + inputs: input + device: cpu + quantization: + model_wise: + weight: + granularity: per_tensor + scheme: sym + dtype: int8 + algorithm: minmax + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: basic + accuracy_criterion: + relative: 0.1 + exit_policy: + performance_only: True + workspace: + path: workspace + ''' + build_fake_yaml(fake_yaml, 'fake_yaml.yaml', mode="w", encoding="utf-8") + + @classmethod + def tearDownClass(self): + os.remove('fake_yaml.yaml') + if version1_gte_version2(tf.version.VERSION, '2.8.0'): + shutil.rmtree('workspace') + + @disable_random() + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + def test_convert_tf_qdq_to_onnx_qdq(self): + x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") + top_relu = tf.nn.relu(x) + paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) + x_pad = tf.pad(top_relu, paddings, "CONSTANT") + conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], + initializer=tf.compat.v1.random_normal_initializer()) + conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") + normed = tf.compat.v1.layers.batch_normalization(conv) + + conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], + initializer=tf.compat.v1.random_normal_initializer()) + conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") + normed2 = tf.compat.v1.layers.batch_normalization(conv2) + add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + relu = tf.nn.relu(add) + relu6 = tf.nn.relu6(relu, name='op_to_store') + + out_name = relu6.name.split(':')[0] + with tf.compat.v1.Session() as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + output_graph_def = graph_util.convert_variables_to_constants( + sess=sess, + input_graph_def=sess.graph_def, + output_node_names=[out_name]) + + quantizer = Quantization('fake_yaml.yaml') + dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = output_graph_def + output_graph = quantizer.fit() + output_graph.export("workspace/tf_qdq_to_onnx_qdq.onxx") + +if __name__ == '__main__': + unittest.main() From 2da01df2998cec8fe5a8b484ea3a9947fc033f81 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Thu, 24 Nov 2022 17:27:44 +0800 Subject: [PATCH 02/43] convert qdq nodes Signed-off-by: Lv, Liang1 --- .../graph_rewriter/onnx/onnx_graph.py | 79 ++++++++ .../graph_rewriter/onnx/qdq_rewriter.py | 176 ------------------ .../graph_rewriter/onnx/tf2onnx_utils.py | 26 +-- .../adaptor/tf_utils/tf2onnx_converter.py | 89 +++++++-- ...test_tensorflow_qdq_convert_to_onnx_qdq.py | 8 +- 5 files changed, 177 insertions(+), 201 deletions(-) delete mode 100644 neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/qdq_rewriter.py diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py index af25eb75c9e..61d8ffa6d0d 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py @@ -1198,6 +1198,85 @@ def is_safe_to_remove_nodes(self, to_delete, outputs_to_ignore=None): return False return True + def convert_qdq_nodes(self, q_node, dq_node): + """Convert tensorflow QuantizeV2/Dequantize nodes to QuantizeLinear/DequantizeLinear.""" + qdq_node_output_dtype = self.get_dtype(dq_node.output[0]) + qdq_node_output_shape = self.get_shape(dq_node.output[0]) + + # Get the attributes of qdq node + narrow_range = q_node.attr['narrow_range'].i + signed_input = bool(q_node.get_attr_value('T', TensorProto.INT8) == TensorProto.INT8) + + min_quantized, max_quantized = [-127, 127] + if not narrow_range and signed_input: + min_quantized = -128 + + if not signed_input: + min_quantized, max_quantized = [0, 255] + + # Get axis attribute for per channel implementation. + axis = q_node.get_attr_value('axis', -1) + q_attrs = {} + + quantized_np_dtype = np.int8 if signed_input else np.uint8 + quantized_dtype = TensorProto.INT8 if signed_input else TensorProto.UINT8 + + if axis != -1: + utils.assert_error(self.opset >= 13, "Opset >= 13 is required for per channel quantization") + q_attrs['axis'] = axis + + min_np = np.array(min_quantized, np.float32) + max_np = np.array(max_quantized, np.float32) + max_quantized_const = self.make_const(utils.set_name("max_const"), max_np).output[0] + if signed_input: + min_quantized_const = self.make_const(utils.set_name("min_const"), min_np).output[0] + reduce_attr = {'keepdims': 0} + if axis != -1: + inp_rank = self.get_rank(q_node.input[0]) + utils.assert_error(inp_rank is not None, "Input rank cannot be unknown for qdq op %s", q_node.name) + reduce_axes = [i for i in range(inp_rank) if i != axis] + reduce_attr['axes'] = reduce_axes + + max_value = self.make_node("ReduceMax", [q_node.input[0]], attr=reduce_attr).output[0] + if signed_input: + min_value = self.make_node("ReduceMin", [q_node.input[0]], attr=reduce_attr).output[0] + + scale_from_max_side = self.make_node("Div", [max_value, max_quantized_const]).output[0] + if signed_input: + scale_from_min_side = self.make_node("Div", [min_value, min_quantized_const]).output[0] + scale = self.make_node("Max", [scale_from_min_side, scale_from_max_side]).output[0] + else: + scale = scale_from_max_side + + if axis == -1: + zero_point_np = np.zeros([], dtype=quantized_np_dtype) + zero_point = self.make_const(utils.set_name("zero_point"), zero_point_np).output[0] + else: + zero_tensor = helper.make_tensor("value", quantized_dtype, dims=[1], vals=[0]) + scale_shape = self.make_node("Shape", [scale]).output[0] + zero_point = self.make_node("ConstantOfShape", inputs=[scale_shape], + attr={"value": zero_tensor}).output[0] + + quant_node = self.make_node(op_type="QuantizeLinear", + inputs=[q_node.input[0], scale, zero_point], + shapes=[qdq_node_output_shape], + attr=q_attrs, + dtypes=[quantized_dtype], + name=utils.set_name("QuantLinearNode")) + + self.set_shape(quant_node.output[0], qdq_node_output_shape) + + self.remove_node(q_node.name) + self.remove_node(dq_node.name) + + dequant_node = self.make_node(op_type="DequantizeLinear", + inputs=[quant_node.output[0], scale, zero_point], + outputs=[dq_node.output[0]], + shapes=[qdq_node_output_shape], + attr=q_attrs, + dtypes=[qdq_node_output_dtype], + name=utils.set_name("DequantLinearNode")) + self.set_shape(dequant_node.output[0], qdq_node_output_shape) class GraphUtil(object): """Utilities for Graph manipulation.""" diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/qdq_rewriter.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/qdq_rewriter.py deleted file mode 100644 index 75e2940fa81..00000000000 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/qdq_rewriter.py +++ /dev/null @@ -1,176 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -from onnx import TensorProto, helper -from . import tf2onnx_utils as utils - -# pylint: disable=missing-docstring - -def extract_numpy_array(node): - return np.frombuffer(node.attr["value"].t.raw_data, dtype="float32") - -def convert_qdq_nodes(onnx_graph, match_results): - - for match in match_results: - qdq_node = match.get_op('output') - qdq_node_output_dtype = onnx_graph.get_dtype(qdq_node.output[0]) - qdq_node_output_shape = onnx_graph.get_shape(qdq_node.output[0]) - - # Get the attributes of qdq node - narrow_range = qdq_node.attr['narrow_range'].i - signed_input = qdq_node.attr['signed_input'].i - range_given = qdq_node.get_attr_value("range_given", qdq_node.type != "QuantizeAndDequantizeV2" or \ - qdq_node.type != "QuantizeAndDequantizeV4") - - min_quantized, max_quantized = [-127, 127] - if not narrow_range and signed_input: - min_quantized = -128 - - if not signed_input: - min_quantized, max_quantized = [0, 255] - - # Get axis attribute for per channel implementation. - axis = qdq_node.get_attr_value('axis', -1) - q_attrs = {} - - quantized_np_dtype = np.int8 if signed_input else np.uint8 - quantized_dtype = TensorProto.INT8 if signed_input else TensorProto.UINT8 - - if axis != -1: - utils.assert_error(onnx_graph.opset >= 13, "Opset >= 13 is required for per channel quantization") - q_attrs['axis'] = axis - - if not range_given: - min_np = np.array(min_quantized, np.float32) - max_np = np.array(max_quantized, np.float32) - max_quantized_const = onnx_graph.make_const(utils.set_name("max_const"), max_np).output[0] - if signed_input: - min_quantized_const = onnx_graph.make_const(utils.set_name("min_const"), min_np).output[0] - reduce_attr = {'keepdims': 0} - if axis != -1: - inp_rank = onnx_graph.get_rank(qdq_node.input[0]) - utils.assert_error(inp_rank is not None, "Input rank cannot be unknown for qdq op %s", qdq_node.name) - reduce_axes = [i for i in range(inp_rank) if i != axis] - reduce_attr['axes'] = reduce_axes - - max_value = onnx_graph.make_node("ReduceMax", [qdq_node.input[0]], attr=reduce_attr).output[0] - if signed_input: - min_value = onnx_graph.make_node("ReduceMin", [qdq_node.input[0]], attr=reduce_attr).output[0] - - scale_from_max_side = onnx_graph.make_node("Div", [max_value, max_quantized_const]).output[0] - if signed_input: - scale_from_min_side = onnx_graph.make_node("Div", [min_value, min_quantized_const]).output[0] - scale = onnx_graph.make_node("Max", [scale_from_min_side, scale_from_max_side]).output[0] - else: - scale = scale_from_max_side - - if axis == -1: - zero_point_np = np.zeros([], dtype=quantized_np_dtype) - zero_point = onnx_graph.make_const(utils.set_name("zero_point"), zero_point_np).output[0] - else: - zero_tensor = helper.make_tensor("value", quantized_dtype, dims=[1], vals=[0]) - scale_shape = onnx_graph.make_node("Shape", [scale]).output[0] - zero_point = onnx_graph.make_node("ConstantOfShape", inputs=[scale_shape], attr={"value": zero_tensor}).output[0] - else: - # Get the min and max value of the inputs to QDQ op - min_value = extract_numpy_array(qdq_node.inputs[1]) - max_value = extract_numpy_array(qdq_node.inputs[2]) - - num_channels = min_value.shape[0] - scales = np.zeros(num_channels, dtype=np.float32) - - for i in range(num_channels): - # Calculate scales from the min and max values - scale_from_min_side = min_value[i] / min_quantized if min_quantized < 0 else 0 - scale_from_max_side = max_value[i] / max_quantized if max_quantized > 0 else 0 - - if scale_from_min_side > scale_from_max_side: - scale = scale_from_min_side - else: - scale = scale_from_max_side - - utils.assert_error(scale > 0, "Quantize/Dequantize scale must be greater than zero") - scales[i] = np.float32(scale) - - # Set scalars for scale and zero point for per layer quantization - if num_channels == 1: - scales = scales[0] - zero_point_np = np.zeros([], dtype=quantized_np_dtype) - else: - utils.assert_error(axis != -1, "Axis must be specified for per channel quantization") - zero_point_np = np.zeros([num_channels], dtype=quantized_np_dtype) - - # Split it into QuantizeLinear and DequantizeLinear and remove the QDQ node reference - cast_scale = scales.astype(np.float32) - scale = onnx_graph.make_const(name=utils.set_name("quant_scale"), np_val=cast_scale).output[0] - zero_point = onnx_graph.make_const(utils.set_name("zero_point"), zero_point_np).output[0] - - quant_node = onnx_graph.make_node(op_type="QuantizeLinear", - inputs=[qdq_node.input[0], scale, zero_point], - shapes=[qdq_node_output_shape], - attr=q_attrs, - dtypes=[quantized_dtype], - name=utils.set_name("QuantLinearNode")) - - onnx_graph.set_shape(quant_node.output[0], qdq_node_output_shape) - - onnx_graph.remove_node(qdq_node.name) - - dequant_node = onnx_graph.make_node(op_type="DequantizeLinear", - inputs=[quant_node.output[0], scale, zero_point], - outputs=[qdq_node.output[0]], - shapes=[qdq_node_output_shape], - attr=q_attrs, - dtypes=[qdq_node_output_dtype], - name=utils.set_name("DequantLinearNode")) - onnx_graph.set_shape(dequant_node.output[0], qdq_node_output_shape) - - return onnx_graph.get_nodes() - -def rewrite_quantize_and_dequantize(g, ops): - - pattern_for_qdq_v2 = \ - OpTypePattern('QuantizeAndDequantizeV2', name='output', inputs=[ - OpTypePattern("*"), - OpTypePattern(None), - OpTypePattern(None), - ]) - pattern_for_qdq_v3 = \ - OpTypePattern('QuantizeAndDequantizeV3', name='output', inputs=[ - OpTypePattern("*"), - OpTypePattern(None), - OpTypePattern(None), - OpTypePattern(None), - ]) - pattern_for_qdq_v4 = \ - OpTypePattern('QuantizeAndDequantizeV4', name='output', inputs=[ - OpTypePattern("*"), - OpTypePattern(None), - OpTypePattern(None), - ]) - - # Match all the patterns for QDQ ops - patterns = [pattern_for_qdq_v2, pattern_for_qdq_v3, pattern_for_qdq_v4] - match_results = [] - for pattern in patterns: - matcher = GraphMatcher(pattern) - results = list(matcher.match_ops(ops)) - match_results.extend(results) - - return create_qdq_nodes(g, match_results) \ No newline at end of file diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py index 721f1a32c2c..3ec0551366f 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py @@ -70,6 +70,7 @@ types_pb2.DT_INT32: onnx_pb.TensorProto.INT32, types_pb2.DT_UINT8: onnx_pb.TensorProto.UINT8, types_pb2.DT_QUINT8: onnx_pb.TensorProto.UINT8, + types_pb2.DT_QINT8: onnx_pb.TensorProto.INT8, types_pb2.DT_UINT16: onnx_pb.TensorProto.UINT16, types_pb2.DT_UINT32: onnx_pb.TensorProto.UINT32, types_pb2.DT_UINT64: onnx_pb.TensorProto.UINT64, @@ -205,23 +206,26 @@ def read_tensorflow_node_attrs(node): """Read tensorflow node attribute names.""" attr = {} - for each_attr in node.node_def.attr: - value = get_tensorflow_node_attr(node, each_attr) - if each_attr in TF2ONNX_IGNORED_NODE_ATTRS or each_attr in TF2ONNX_SUBGRAPH_ATTRS or \ + for attr_name in node.node_def.attr: + value = get_tensorflow_node_attr(node, attr_name) + if attr_name == 'T' and node.type in ('QuantizeV2', 'Dequantize'): + attr[attr_name] = TensorProto.INT8 if get_tensorflow_node_attr(node, attr_name) == 'qint8' \ + else TensorProto.UINT8 + elif attr_name in TF2ONNX_IGNORED_NODE_ATTRS or attr_name in TF2ONNX_SUBGRAPH_ATTRS or \ isinstance(value, tensor_pb2.TensorProto): pass - elif each_attr == "shape": + elif attr_name == "shape": shape = get_tensorflow_node_shape_attr(node) if shape is not None: - attr[each_attr] = shape - elif each_attr == "DstT": + attr[attr_name] = shape + elif attr_name == "DstT": attr["to"] = map_tensorflow_dtype(value) elif isinstance(value, tf.DType): - attr[each_attr] = map_tensorflow_dtype(value) + attr[attr_name] = map_tensorflow_dtype(value) elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], tf.DType): - attr[each_attr] = [map_tensorflow_dtype(v) for v in value] + attr[attr_name] = [map_tensorflow_dtype(v) for v in value] else: - attr[each_attr] = get_tensorflow_node_attr(node, each_attr) + attr[attr_name] = get_tensorflow_node_attr(node, attr_name) return attr @@ -250,9 +254,9 @@ def build_onnx_op(node): inputs = [] outputs = [] for inp, shape, dtype in zip(node.input, input_shapes, input_dtypes): - inputs.append(utils.make_onnx_inputs_outputs(inp, dtype, shape)) + inputs.append(make_onnx_inputs_outputs(inp, dtype, shape)) for output in node.output: - outputs.append(utils.make_onnx_inputs_outputs(output, TensorProto.UNDEFINED, None)) + outputs.append(make_onnx_inputs_outputs(output, TensorProto.UNDEFINED, None)) graph_proto = helper.make_graph([build_onnx_op(node)], "infer-graph", inputs, outputs, initializer=initializers) imp = OperatorSetIdProto() imp.version = opset_version diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py index 847b7979a0b..a87355794ba 100644 --- a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -18,8 +18,9 @@ import logging import tensorflow as tf from onnx import helper -from tensorflow.core.framework import tensor_pb2 +from tensorflow.core.framework import tensor_pb2, node_def_pb2 +from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.utils.utility import dump_elapsed_time from .graph_rewriter.onnx import tf2onnx_utils as utils from .graph_rewriter.onnx.onnx_graph import OnnxGraph @@ -35,15 +36,83 @@ def __init__(self, model, input_names, output_names, opset_version=utils.DEFAULT Args: model (graphdef): tensorflow QDQ graphdef """ + graph_def = self.pre_optimize(model) graph = tf.Graph() with graph.as_default(): - tf.import_graph_def(model) + tf.import_graph_def(graph_def) self.graph = graph self.opset_version = opset_version self.input_names = input_names self.output_names = output_names + def duplicate_quantizev2_nodes(self, model): + """Duplicate QuantizeV2 nodes if the Dequantize nodes share the same QuantizeV2.""" + cur_graph = GraphAnalyzer() + cur_graph.graph = model + graph_info = cur_graph.parse_graph() + + # Scan the QDQ pairs + patterns = [['QuantizeV2'], ['Dequantize']] + matched_nodes = cur_graph.query_fusion_pattern_nodes(patterns) + + # Append the QDQ pairs to QuantizeV2 nodes map and Dequantize nodes map + quantizev2_map = {} + dequantize_map = {} + + for i in matched_nodes: + quantizev2_input_node_name = graph_info[i[0]].node.input[0] + if quantizev2_input_node_name in quantizev2_map: + quantizev2_map[quantizev2_input_node_name].append(graph_info[i[0]].node) + dequantize_map[quantizev2_input_node_name].append(graph_info[i[1]].node) + else: + quantizev2_map[quantizev2_input_node_name] = [graph_info[i[0]].node] + dequantize_map[quantizev2_input_node_name] = [graph_info[i[1]].node] + + # Find out the QuantizeV2 nodes which needs to be duplicated + for input_map_node_name, quantizev2_nodes in quantizev2_map.items(): + if input_map_node_name not in cur_graph.node_name_details: + continue + + dequantize_nodes = dequantize_map[input_map_node_name] + if len(dequantize_nodes) == 1: + continue + + do_duplicate = True + quantizev2_node_name = quantizev2_nodes[0].name + for index, node in enumerate(quantizev2_nodes): + if index == 0: + continue + if node.name != quantizev2_node_name: + do_duplicate = False + + # Duplicate the QuantizeV2 nodes + if do_duplicate: + for index in range(len(dequantize_nodes) - 1): + dequantize_node = dequantize_nodes[index + 1] + new_quantizev2_node = node_def_pb2.NodeDef() + new_quantizev2_node.CopyFrom(quantizev2_nodes[0]) + new_quantizev2_node.name = quantizev2_nodes[0].name + '_copy_' + str(index+1) + cur_graph.add_node( + new_quantizev2_node, input_map_node_name, [dequantize_node.name]) + cur_graph.node_name_details[dequantize_node.name].node.ClearField('input') + cur_graph.node_name_details[dequantize_node.name].node.input.extend([ + new_quantizev2_node.name, new_quantizev2_node.name + ':1', + new_quantizev2_node.name + ':2']) + + return cur_graph.dump_graph() + + def pre_optimize(self, model): + """Pre optimize the tensorflow graphdef to make ONNX QDQ model convert more easier.""" + # Convert HostConst to Const + for node in model.node: + if node.op == 'HostConst': + node.op = 'Const' + + # Duplicat the QuantizeV2 node if it has multi Dequantize nodes + model = self.duplicate_quantizev2_nodes(model) + return model + @dump_elapsed_time("Pass TensorflowQDQToOnnxQDQConverter") def convert(self, save_path): """ convert tensorflow QDQ model to onnx QDQ model @@ -103,20 +172,18 @@ def convert(self, save_path): # Build ONNX Graph using onnx_nodes, output_shapes and dtypes onnx_graph = OnnxGraph(onnx_nodes, output_shapes, dtypes) - # Convert QDQ pattern and Insert them into the ONNX Graph - - - - - # - + # Convert TF QDQ pattern to ONNX QDQ format + for node in onnx_graph.get_nodes(): + if node.type == 'Dequantize': + parent_node = onnx_graph.get_node_by_name(node.input[0].rsplit(':', 1)[0]) + if parent_node: + if parent_node.type == 'QuantizeV2': + onnx_graph.convert_qdq_nodes(parent_node, node) # Build ONNX model model_proto = onnx_graph.make_model("converted from neural compressor") # Save ONNX model utils.save_protobuf(save_path, model_proto) - logger.info("Successfully converted TensorFlow model to ONNX") logger.info("Model inputs: %s", [n.name for n in model_proto.graph.input]) logger.info("Model outputs: %s", [n.name for n in model_proto.graph.output]) - diff --git a/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py b/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py index 718f6f825ee..9419abb72fa 100644 --- a/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py +++ b/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py @@ -55,8 +55,6 @@ def setUpClass(self): @classmethod def tearDownClass(self): os.remove('fake_yaml.yaml') - if version1_gte_version2(tf.version.VERSION, '2.8.0'): - shutil.rmtree('workspace') @disable_random() @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") @@ -92,7 +90,11 @@ def test_convert_tf_qdq_to_onnx_qdq(self): quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() - output_graph.export("workspace/tf_qdq_to_onnx_qdq.onxx") + output_graph.save("/home/lvl/tf_itex_qdq.pb") + output_graph.export("workspace/tf_qdq_to_onnx_qdq.onnx") + import onnx + onnx_model = onnx.load("workspace/tf_qdq_to_onnx_qdq.onnx") + onnx.checker.check_model(onnx_model) if __name__ == '__main__': unittest.main() From 7063653013cd7b85d9727bf48612970e7f0dd3dd Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Fri, 25 Nov 2022 15:59:31 +0800 Subject: [PATCH 03/43] support tf onnx mapping Signed-off-by: Lv, Liang1 --- .../graph_rewriter/onnx/onnx_graph.py | 8 ++- .../graph_rewriter/onnx/tf2onnx_utils.py | 2 +- .../adaptor/tf_utils/tf2onnx_converter.py | 56 ++++++++++++++++--- 3 files changed, 56 insertions(+), 10 deletions(-) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py index 61d8ffa6d0d..6ff7adfc187 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py @@ -64,7 +64,12 @@ def __init__(self, nodes, output_shapes=None, dtypes=None, target=None, opset=No self.set_config(target, opset, extra_opset) - self.outputs = output_names if output_names is not None else [] + self.inputs = [] + self.outputs = [] + for output_name in output_names: + self.outputs.append(output_name +':0') + input_ops_names = [input_name + ':0' for input_name in input_names] + input_names = input_ops_names self.parent_graph = None self.contained_graphs = {} # {node_name: {node_attribute_name: Graph}} @@ -340,6 +345,7 @@ def reset_nodes(self, ops): for n in self.inputs: if n not in ops: raise ValueError("graph input '" + n.name + "' not exist") + for o in self.outputs: if o not in self._output_to_node_name: raise ValueError("graph output '" + o.name + "' not exist") diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py index 3ec0551366f..6ed7b2e761c 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py @@ -22,7 +22,7 @@ import tensorflow as tf from google.protobuf import text_format import numpy as np -from tensorflow.core.framework import types_pb2, tensor_pb2, graph_pb2 +from tensorflow.core.framework import types_pb2, tensor_pb2 from tensorflow.python.framework import tensor_util from onnx import helper, onnx_pb, numpy_helper, defs, TensorProto, OperatorSetIdProto, shape_inference diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py index a87355794ba..42cf99cf56b 100644 --- a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -24,7 +24,7 @@ from neural_compressor.utils.utility import dump_elapsed_time from .graph_rewriter.onnx import tf2onnx_utils as utils from .graph_rewriter.onnx.onnx_graph import OnnxGraph - +import tf2onnx as t2o logger = logging.getLogger("neural_compressor") @@ -36,17 +36,18 @@ def __init__(self, model, input_names, output_names, opset_version=utils.DEFAULT Args: model (graphdef): tensorflow QDQ graphdef """ - graph_def = self.pre_optimize(model) + graph_def = self.tf_graph_optimize(model) + graph = tf.Graph() with graph.as_default(): - tf.import_graph_def(graph_def) + tf.import_graph_def(graph_def, name='') self.graph = graph self.opset_version = opset_version self.input_names = input_names self.output_names = output_names - def duplicate_quantizev2_nodes(self, model): + def duplicate_tf_quantizev2_nodes(self, model): """Duplicate QuantizeV2 nodes if the Dequantize nodes share the same QuantizeV2.""" cur_graph = GraphAnalyzer() cur_graph.graph = model @@ -102,7 +103,7 @@ def duplicate_quantizev2_nodes(self, model): return cur_graph.dump_graph() - def pre_optimize(self, model): + def tf_graph_optimize(self, model): """Pre optimize the tensorflow graphdef to make ONNX QDQ model convert more easier.""" # Convert HostConst to Const for node in model.node: @@ -110,7 +111,7 @@ def pre_optimize(self, model): node.op = 'Const' # Duplicat the QuantizeV2 node if it has multi Dequantize nodes - model = self.duplicate_quantizev2_nodes(model) + model = self.duplicate_tf_quantizev2_nodes(model) return model @dump_elapsed_time("Pass TensorflowQDQToOnnxQDQConverter") @@ -166,11 +167,12 @@ def convert(self, save_path): name=node.name, **attr_dict) onnx_nodes.append(onnx_node) except Exception as ex: - logger.error("tf2onnx node convert failed for %s, ex=%s", node.name, ex) + logger.error("tensorflow node convert to onnx failed for %s, ex=%s", node.name, ex) raise # Build ONNX Graph using onnx_nodes, output_shapes and dtypes - onnx_graph = OnnxGraph(onnx_nodes, output_shapes, dtypes) + onnx_graph = OnnxGraph(onnx_nodes, output_shapes, dtypes, input_names=self.input_names, + output_names=self.output_names) # Convert TF QDQ pattern to ONNX QDQ format for node in onnx_graph.get_nodes(): @@ -180,6 +182,44 @@ def convert(self, save_path): if parent_node.type == 'QuantizeV2': onnx_graph.convert_qdq_nodes(parent_node, node) + rewriters = [ + t2o.rewriter.rewrite_biasadd_with_conv2d + ] + + t2o.tfonnx.run_rewriters(onnx_graph, rewriters, False) + + # some nodes may already copied into inner Graph, so remove them from main Graph. + onnx_graph.delete_unused_nodes(onnx_graph.outputs) + t2o.tfonnx.topological_sort(onnx_graph, False) + + # create ops mapping for the desired opsets + ops_mapping = t2o.handler.tf_op.create_mapping(onnx_graph.opset, onnx_graph.extra_opset) + mapped_op, unmapped_op, exceptions = \ + t2o.tfonnx.tensorflow_onnx_mapping(onnx_graph, ops_mapping) + if unmapped_op: + logger.error("Unsupported ops: %s", unmapped_op) + if exceptions: + raise exceptions[0] + + # onnx requires topological sorting + t2o.tfonnx.topological_sort(onnx_graph, False) + + onnx_graph.update_proto() + + op_cnt, attr_cnt = onnx_graph.dump_node_statistics(include_attrs=True, include_subgraphs=False) + logger.info( + "Summay Stats:\n" + "\ttensorflow ops: {}\n" + "\ttensorflow attr: {}\n" + "\tonnx mapped: {}\n" + "\tonnx unmapped: {}".format(op_cnt, attr_cnt, mapped_op, unmapped_op)) + + onnx_graph = t2o.optimizer.optimize_graph(onnx_graph) + + # some nodes may already copied into inner Graph, so remove them from main Graph. + onnx_graph.delete_unused_nodes(onnx_graph.outputs) + t2o.tfonnx.topological_sort(onnx_graph, False) + # Build ONNX model model_proto = onnx_graph.make_model("converted from neural compressor") # Save ONNX model From c5594c59caa4003d794fe41cb2513a7ddf5fe0f6 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Mon, 2 Jan 2023 22:06:32 +0800 Subject: [PATCH 04/43] add tf fp32 to onnx fp32 convert support Signed-off-by: Lv, Liang1 --- .../experimental/export/tf2onnx.py | 21 +++---- neural_compressor/model/tensorflow_model.py | 4 +- ...test_tensorflow_qdq_convert_to_onnx_qdq.py | 57 +++++++++++++++++++ 3 files changed, 68 insertions(+), 14 deletions(-) diff --git a/neural_compressor/experimental/export/tf2onnx.py b/neural_compressor/experimental/export/tf2onnx.py index 254504e0cd0..d66aabe2307 100644 --- a/neural_compressor/experimental/export/tf2onnx.py +++ b/neural_compressor/experimental/export/tf2onnx.py @@ -17,32 +17,29 @@ """Helper functions to export model from TensorFlow to ONNX.""" -import os from neural_compressor.utils import logger +import tf2onnx as t2o def tf_to_fp32_onnx( - fp32_model, + graph_def, save_path, opset_version=14, input_names=None, output_names=None ): - """Export FP32 Tensorflow model into FP32 ONNX model. + """Export FP32 Tensorflow model into FP32 ONNX model using tf2onnx tool. Args: - fp32_model (torch.nn.module): fp32 model. - int8_model (torch.nn.module): int8 model. + graph_def (graph_def to convert): fp32 graph_def. save_path (str): save path of ONNX model. - example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model. opset_version (int, optional): opset version. Defaults to 14. - dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, - "output": {0: "batch_size"}}. input_names (list, optional): input names. Defaults to None. output_names (list, optional): output names. Defaults to None. - do_constant_folding (bool, optional): do constant folding or not. Defaults to True. - verbose (bool, optional): dump verbose or not. Defaults to True. """ - + input_names[:] = [i+":0" for i in input_names] + output_names[:] = [o+":0" for o in output_names] + t2o.convert.from_graph_def(graph_def=graph_def, input_names=input_names, + output_names=output_names, opset=opset_version, output_path=save_path) info = "The FP32 ONNX Model exported to path: {0}".format(save_path) logger.info("*"*len(info)) logger.info(info) @@ -84,4 +81,4 @@ def tf_to_int8_onnx( info = "The INT8 ONNX Model is exported to path: {0}".format(save_path) logger.info("*"*len(info)) logger.info(info) - logger.info("*"*len(info)) \ No newline at end of file + logger.info("*"*len(info)) diff --git a/neural_compressor/model/tensorflow_model.py b/neural_compressor/model/tensorflow_model.py index 767c9004f0e..1300c52ed62 100644 --- a/neural_compressor/model/tensorflow_model.py +++ b/neural_compressor/model/tensorflow_model.py @@ -871,8 +871,8 @@ def export(self, save_path, conf): self.graph_def, save_path, opset_version=conf.opset_version, - input_names=conf.input_names, - output_names=conf.output_names + input_names=conf.input_names if conf.input_names else self.input_tensor_names, + output_names=conf.output_names if conf.output_names else self.output_tensor_names ) else: # pragma: no cover assert False, "Not allowed dtype: {}, pleas use 'fp32' or 'int8'.".format(conf.dtype) diff --git a/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py b/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py index 5c1623e990f..42e7559cfc4 100644 --- a/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py +++ b/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py @@ -101,6 +101,63 @@ def test_convert_tf_qdq_to_onnx_qdq(self): onnx_model = onnx.load("workspace/tf_qdq_to_onnx_qdq.onnx") onnx.checker.check_model(onnx_model) + import onnxruntime as ort + from neural_compressor.data import Datasets, DATALOADERS + ort_session = ort.InferenceSession("workspace/tf_qdq_to_onnx_qdq.onnx") + dataset = Datasets("tensorflow")["dummy"]((100, 56, 56, 16)) + dataloader = DATALOADERS["tensorflow"](dataset) + it = iter(dataloader) + input = next(it) + input_dict = {'input:0': input[0]} + ort_session.run(None, input_dict) + + @disable_random() + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + def test_convert_tf_fp32_to_onnx_fp32(self): + x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") + top_relu = tf.nn.relu(x) + paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) + x_pad = tf.pad(top_relu, paddings, "CONSTANT") + conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], + initializer=tf.compat.v1.random_normal_initializer()) + conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") + normed = tf.compat.v1.layers.batch_normalization(conv) + + conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], + initializer=tf.compat.v1.random_normal_initializer()) + conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") + normed2 = tf.compat.v1.layers.batch_normalization(conv2) + add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + relu = tf.nn.relu(add) + relu6 = tf.nn.relu6(relu, name='op_to_store') + + out_name = relu6.name.split(':')[0] + with tf.compat.v1.Session() as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + output_graph_def = graph_util.convert_variables_to_constants( + sess=sess, + input_graph_def=sess.graph_def, + output_node_names=[out_name]) + + from neural_compressor.model.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(output_graph_def) + config = TF2ONNXConfig(dtype="fp32") + inc_model.export("workspace/tf_fp32_to_onnx_fp32.onnx", config) + + import onnx + onnx_model = onnx.load("workspace/tf_fp32_to_onnx_fp32.onnx") + onnx.checker.check_model(onnx_model) + + import onnxruntime as ort + from neural_compressor.data import Datasets, DATALOADERS + ort_session = ort.InferenceSession("workspace/tf_fp32_to_onnx_fp32.onnx") + dataset = Datasets("tensorflow")["dummy"]((100, 56, 56, 16)) + dataloader = DATALOADERS["tensorflow"](dataset) + it = iter(dataloader) + input = next(it) + input_dict = {'input:0': input[0]} + ort_session.run(None, input_dict) if __name__ == '__main__': unittest.main() From 7e2eb78032f1b78fae916a0288733a7f3373d8ae Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Tue, 3 Jan 2023 09:32:10 +0800 Subject: [PATCH 05/43] update code for review comments Signed-off-by: Lv, Liang1 --- neural_compressor/experimental/export/tf2onnx.py | 14 +------------- neural_compressor/model/tensorflow_model.py | 2 +- .../test_tensorflow_qdq_convert_to_onnx_qdq.py | 2 +- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/neural_compressor/experimental/export/tf2onnx.py b/neural_compressor/experimental/export/tf2onnx.py index d66aabe2307..86aa21b096d 100644 --- a/neural_compressor/experimental/export/tf2onnx.py +++ b/neural_compressor/experimental/export/tf2onnx.py @@ -56,23 +56,11 @@ def tf_to_int8_onnx( """Export INT8 Tensorflow model into INT8 ONNX model. Args: - fp32_model (torch.nn.module): fp32 model. - int8_model (torch.nn.module): int8 model. - q_config (dict): containing quantization configuration. + int8_model (tensorflow ITEX QDQ model): int8 model. save_path (str): save path of ONNX model. - example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model. opset_version (int, optional): opset version. Defaults to 14. - dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, - "output": {0: "batch_size"}}. input_names (list, optional): input names. Defaults to None. output_names (list, optional): output names. Defaults to None. - quant_format (str, optional): quantization format of ONNX model. Defaults to 'QDQ'. - dtype (str, optional): data types of activation and weight of ONNX model. Defaults to 'U8S8'. - recipe (str, optionl): Recipe for processing nn.quantized.Linear module. - 'QDQ_OP_FP32_BIAS': inserting QDQ before quantizable op and using fp32 bias. - 'QDQ_OP_INT32_BIAS': inserting QDQ before quantizable op and using int32 bias. - 'QDQ_OP_FP32_BIAS_QDQ': inserting QDQ before and after quantizable op and using fp32 bias. - Defaults to 'QDQ_OP_FP32_BIAS'. """ from neural_compressor.adaptor.tf_utils.tf2onnx_converter import TensorflowQDQToOnnxQDQConverter TensorflowQDQToOnnxQDQConverter(int8_model, input_names, \ diff --git a/neural_compressor/model/tensorflow_model.py b/neural_compressor/model/tensorflow_model.py index 1300c52ed62..d0daa25e165 100644 --- a/neural_compressor/model/tensorflow_model.py +++ b/neural_compressor/model/tensorflow_model.py @@ -848,7 +848,7 @@ def save(self, root=None): logger.info("Save quantized model to {}.".format(pb_file)) def export(self, save_path, conf): - """Export the Tensorflow int8 model to ONNX int8 model.""" + """Export the Tensorflow fp32/int8 model to ONNX fp32/int8 model.""" from neural_compressor.config import TF2ONNXConfig if isinstance(conf, TF2ONNXConfig): diff --git a/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py b/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py index 42e7559cfc4..e14afad73a8 100644 --- a/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py +++ b/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py @@ -139,7 +139,7 @@ def test_convert_tf_fp32_to_onnx_fp32(self): input_graph_def=sess.graph_def, output_node_names=[out_name]) - from neural_compressor.model.model import Model + from neural_compressor.model import Model from neural_compressor.config import TF2ONNXConfig inc_model = Model(output_graph_def) config = TF2ONNXConfig(dtype="fp32") From 02f2ea2129cac9bc31fbc00809ca509749032cbd Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Thu, 5 Jan 2023 22:17:25 +0800 Subject: [PATCH 06/43] add fp32 export examples and update docstring Signed-off-by: Lv, Liang1 --- examples/tensorflow/tf2onnx/README.md | 9 + .../resnet50v1.5/fp32_export/README.md | 45 ++ .../tf2onnx/resnet50v1.5/fp32_export/main.py | 104 ++++ .../resnet50v1.5/fp32_export/requirements.txt | 8 + .../resnet50v1.5/fp32_export/run_benchmark.sh | 39 ++ .../resnet50v1.5/fp32_export/run_export.sh | 35 ++ .../ssd_mobilenet_v1/fp32_export/README.md | 44 ++ .../ssd_mobilenet_v1/fp32_export/main.py | 132 +++++ .../fp32_export/requirements.txt | 8 + .../fp32_export/run_benchmark.sh | 39 ++ .../fp32_export/run_export.sh | 35 ++ .../tf_utils/graph_rewriter/onnx/__init__.py | 1 + .../graph_rewriter/onnx/onnx_graph.py | 495 +++--------------- .../tf_utils/graph_rewriter/onnx/onnx_node.py | 119 ++--- .../graph_rewriter/onnx/onnx_schema.py | 20 +- .../graph_rewriter/onnx/tf2onnx_utils.py | 33 +- .../adaptor/tf_utils/tf2onnx_converter.py | 14 +- .../experimental/export/tf2onnx.py | 7 +- neural_compressor/metric/__init__.py | 4 +- neural_compressor/model/tensorflow_model.py | 4 +- 20 files changed, 661 insertions(+), 534 deletions(-) create mode 100644 examples/tensorflow/tf2onnx/README.md create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_export.sh create mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md create mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py create mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_export.sh diff --git a/examples/tensorflow/tf2onnx/README.md b/examples/tensorflow/tf2onnx/README.md new file mode 100644 index 00000000000..b18c7940ebe --- /dev/null +++ b/examples/tensorflow/tf2onnx/README.md @@ -0,0 +1,9 @@ +# Tensorflow models export to ONNX moldes Examples + +These examples show how to export Tensorflow models to ONNX models including FP32 and INT8. +Please note that we only support exporting ONNX QDQ format for INT8 now. + +The following examples are available: + +* [resnet50v1.5](/examples/tensorflow/tf2onnx/resnet50v1.5): image recognition ResNet50 model from MLPerf. +* [ssd_mobilenet_v1](/examples/tensorflow/tf2onnx/ssd_mobilenet_v1): object detection ssd_mobilenet_v1 model. diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md new file mode 100644 index 00000000000..8c4fea27f9c --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md @@ -0,0 +1,45 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow RestNet50 FP32 model to ONNX FP32 model using Intel® Neural Compressor. + + +## Prerequisite + +### 1. Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### 2. Install requirements +```shell +pip install -r requirements.txt +``` + +### 3. Prepare Pretrained model + +```bash +wget https://zenodo.org/record/2535873/files/resnet50_v1.pb +``` + +### 4. Prepare Dataset + +Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: + +```bash +ls /path/to/ImageNet +ILSVRC2012_img_val val.txt +``` + +## Run Command + +### Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1.onnx +``` + +### Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/ImageNet/ +``` diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py new file mode 100644 index 00000000000..c8a0ca8f678 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py @@ -0,0 +1,104 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import os +import onnxruntime as ort + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func(model, dataloader, metric, postprocess): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + output, label = postprocess((output, label)) + metric.update(output[1], label) + return metric.result() + + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning and benchmark option.""" + if self.args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + config = TF2ONNXConfig(dtype="fp32", inputs_as_nchw="input_tensor:0") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + model = onnx.load(self.args.input_graph) + data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') + label_path = os.path.join(self.args.dataset_location, 'val.txt') + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, + 'transform': {'ResizeWithAspectRatio': {'height': 224, 'width': 224}, + 'CenterCrop': {'size': 224}, + 'Normalize': {'mean': [123.68, 116.78, 103.94]}, + 'Cast': {'dtype': 'float32'}, + 'Transpose': {'perm': [2, 0, 1]}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import GeneralTopK + top1 = GeneralTopK(k=1) + from neural_compressor.data.transforms.imagenet_transform import LabelShift + postprocess = LabelShift(label_shift=-1) + def eval(onnx_model): + return eval_func(onnx_model, dataloader, top1, postprocess) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt new file mode 100644 index 00000000000..8d1eeb068a8 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt @@ -0,0 +1,8 @@ +tf2onnx==1.13.0 +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh new file mode 100644 index 00000000000..6f154088752 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md new file mode 100644 index 00000000000..13f3d32848a --- /dev/null +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md @@ -0,0 +1,44 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow VGG16 FP32 model to ONNX FP32 model using Intel® Neural Compressor. + + +## Prerequisite + +### 1. Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### 2. Install requirements +```shell +pip install -r requirements.txt +``` + +### 3. Prepare Pretrained model + +```shell +export MODEL=ssd_mobilenet_v1_coco_2018_01_28 +wget http://download.tensorflow.org/models/object_detection/$MODEL.tar.gz +tar -xvf $MODEL.tar.gz +``` + +### 4. Prepare Dataset + +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). + + +## Run Command + +### Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx +``` + +### Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ +``` diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py new file mode 100644 index 00000000000..69cdac86fff --- /dev/null +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py @@ -0,0 +1,132 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import os +import onnxruntime as ort +import numpy as np + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func(model, dataloader, metric, postprocess=None): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + # input_names = [i.name for i in sess.get_inputs()] + # for input_data, label in dataloader: + # output = sess.run(None, dict(zip(input_names, [input_data]))) + # metric.update(output, label) + # return metric.result() + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for inputs, labels in dataloader: + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), \ + 'number of input tensors must align with graph inputs' + + if isinstance(inputs, dict): # pragma: no cover + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + # in case dataloader contains non-array input + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + + predictions = session.run(None, ort_inputs) + + if postprocess is not None: + predictions, labels = postprocess((predictions, labels)) + + if not hasattr(metric, "compare_label") or \ + (hasattr(metric, "compare_label") and metric.compare_label): + metric.update(predictions, labels) + acc = metric.result() + return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning and benchmark option.""" + if self.args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + inc_model.input_tensor_names = ["image_tensor"] + inc_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + config = TF2ONNXConfig(dtype="fp32") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + model = onnx.load(self.args.input_graph) + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': 16, + 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 300}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + def eval(onnx_model): + return eval_func(onnx_model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt new file mode 100644 index 00000000000..8d1eeb068a8 --- /dev/null +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt @@ -0,0 +1,8 @@ +tf2onnx==1.13.0 +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh new file mode 100644 index 00000000000..6f154088752 --- /dev/null +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py index 369707c0ef6..f0bba5393ef 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py @@ -14,3 +14,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Tensorflow model export to ONNX model Graph Rewriters.""" diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py index 6ff7adfc187..1d671cc7513 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +"""ONNX Graph wrapper for Tensorflow model converting to ONNX model.""" import collections import logging @@ -28,15 +29,23 @@ logger = logging.getLogger("neural_compressor") class OnnxGraph: - """"Class that provides graph manipulation and matching.""" + """Class that provides graph manipulation and matching.""" def __init__(self, nodes, output_shapes=None, dtypes=None, target=None, opset=None, extra_opset=None, input_names=None, output_names=None, is_subgraph=False, graph_name=None): - """Create Graph. + """Create ONNX Graph. + Args: nodes: list of Node() output_shapes: dict of tensorflow output shapes dtypes: dict of tensorflow dtype + target: list of workarounds applied to help certain platforms + opset: the opset to be used (int, default is latest) + extra_opset: list of extra opset's, for example the opset's used by custom ops + input_names: list of input node names in graph, input name format as node_name:port_id. Optional. + output_names: list of output node names in graph, format is node_name:port_id. Optional. + is_subgraph: bool, check subgraph. + graph_name: str, graph name. """ if target is None: target = [] @@ -124,13 +133,9 @@ def __init__(self, nodes, output_shapes=None, dtypes=None, target=None, opset=No self.copy_shape(new_output_name, o) self.copy_dtype(new_output_name, o) - def create_new_graph_with_same_config(self): - """Create a clean graph inheriting current graph's configuration.""" - return OnnxGraph([], output_shapes={}, dtypes={}, target=self._target, opset=self._opset, - extra_opset=self.extra_opset, output_names=[]) def set_config(self, target=None, opset=None, extra_opset=None): - """Set graph fields containing conversion options""" + """Set graph fields containing conversion options.""" if target is None: target = utils.DEFAULT_TARGET @@ -143,31 +148,26 @@ def set_config(self, target=None, opset=None, extra_opset=None): @property def input_names(self): - """Placeholder node outputs""" + """Placeholder node outputs.""" return [node.output[0] for node in self.inputs] @property def opset(self): + """Get opset.""" return self._opset @property def extra_opset(self): + """Get extra opset.""" return self._extra_opset def is_target(self, *names): """Return True if target platform contains any name.""" return any(name in self._target for name in names) - def make_consts(self, values, np_type=np.int64, skip_conversion=False, raw=True): - """create list of consts of same type""" - consts = [] - for value in values: - np_val = np.array(value).astype(np_type) - consts.append(self.make_const(utils.set_name("const"), np_val, skip_conversion, raw)) - return consts - def make_const(self, name, np_val, skip_conversion=False, raw=True): - """Make a new constant in the graph. + """Make a new constant node in the graph. + Args: name: const node name, must be unique. np_val: value of type numpy ndarray. @@ -188,17 +188,10 @@ def make_const(self, name, np_val, skip_conversion=False, raw=True): self.set_dtype(name, utils.map_numpy_to_onnx_dtype(np_val.dtype)) return node - def copy_const(self, node, name=None): - """Copy a const node, using name if specified""" - # TODO: support attr copy starting at opset 12 - if name is None: - name = utils.set_name(node.name) - return self.make_const(name, node.get_tensor_value(as_list=False)) - def make_node(self, op_type, inputs, attr=None, output_count=1, outputs=None, skip_conversion=True, op_name_scope=None, name=None, shapes=None, dtypes=None, domain=utils.ONNX_DOMAIN, infer_shape_dtype=True, branches=None): - """Make a new onnx node in the graph""" + """Make a new onnx node in the graph.""" if attr is None: attr = {} if shapes is None: @@ -354,26 +347,15 @@ def reset_nodes(self, ops): self._output_shapes = remained_shapes def is_empty_input(self, name): - # in ONNX, operation may have optional input and an empty string may be used - # in the place of an actual argument's name to indicate a missing argument - return name == utils.ONNX_EMPTY_INPUT + """Check if the input is empty. - def check_integrity(self): - """ - Check graph integrity. Every node's input needs to associate with a node. - Return broken outputs. + in ONNX, operation may have optional input and an empty string may be used + in the place of an actual argument's name to indicate a missing argument. """ - broken_outputs = set() - for node in self.get_nodes(): - for inp in node.input: - if self.get_node_by_output(inp) is None and not self.is_empty_input(inp): - broken_outputs.add(inp) - return list(broken_outputs) + return name == utils.ONNX_EMPTY_INPUT def update_node_shape_dtype(self, node, override=False): - """Try the best to infer shapes and dtypes for outputs of the node, - by default, we respect TF shapes and dtypes. - """ + """Try the best to infer shapes and dtypes for outputs of the node.""" if node.is_const() or node.is_graph_input(): return # NOTE: only support onnx node for now @@ -425,10 +407,10 @@ def update_node_shape_dtype(self, node, override=False): self.set_shape(output, shape) logger.debug("Set shape of [%s] to %s", output, shape) - def update_proto(self, external_tensor_storage=None): + def update_proto(self): """Update the onnx protobuf from out internal Node structure.""" for node in self._nodes: - node.update_proto(external_tensor_storage) + node.update_proto() def get_nodes(self): """Get node list.""" @@ -436,7 +418,9 @@ def get_nodes(self): def get_node_by_output(self, output, search_in_parent_graphs=True): """Get node by node output id recursively going through nested graphs. + Args: + output: node's output search_in_parent_graphs: search in all parent graphs """ ret = None @@ -473,91 +457,13 @@ def set_node_by_name(self, node): self._register_input_name(name, node) def is_const(self, output): + """Check if the node is const.""" return self.get_node_by_output(output).is_const() def get_tensor_value(self, output, as_list=True): + """Get the tensor value of the node.""" return self.get_node_by_output(output).get_tensor_value(as_list) - def rename_tensors(self, tensors_to_rename): - """Replace tensor names within nodes and graph inputs/outputs""" - def rename_list(l): - return [tensors_to_rename.get(t, t) for t in l] - - def rename_keys(d): - return {tensors_to_rename.get(k, k): v for k, v in d.items()} - - self._output_to_node_name = rename_keys(self._output_to_node_name) - self._output_to_consumers = rename_keys(self._output_to_consumers) - self._dtypes = rename_keys(self._dtypes) - self._output_shapes = rename_keys(self._output_shapes) - self.outputs = rename_list(self.outputs) - for node in self._nodes: - node._input = rename_list(node._input) - node._output = rename_list(node._output) - - def change_node_name(self, node, new_name): - """Remove node in current graph.""" - utils.assert_error(new_name not in self._nodes_by_name, "node %s not unique ", new_name) - dtypes = node.output_dtypes - shapes = node.output_shapes - self.remove_node(node.name) - new_node = self.make_node(node.type, node.input, output_count=len(node.output), - attr=node.attr, dtypes=dtypes, shapes=shapes, name=new_name) - for i, old_output in enumerate(node.output): - new_output = utils.add_port_to_name(new_name, i) - for j, k in enumerate(self.outputs): - if k == old_output: - self.outputs[j] = new_output - break - self.replace_all_inputs(old_output, new_output, ops=self.get_nodes()) - return new_node - - def add_graph_input(self, name, dtype=None, shape=None): - """Add placeholder node as graph's input. Order matters only for subgraph. - Placeholders in original graph are assumed for main graph, order not matters. - """ - if dtype is None: - dtype = self.get_dtype(name) - - if shape is None: - shape = self.get_shape(name) - - new_node = self.make_node("Placeholder", [], outputs=[name], dtypes=[dtype], shapes=[shape]) - self.inputs.append(new_node) - - def add_graph_input_with_default(self, name, default_const, dtype=None, shape=None): - """Add placeholderwithdefault.""" - if dtype is None: - dtype = self.get_dtype(name) - - if shape is None: - shape = self.get_shape(name) - - default_const_name = utils.add_port_to_name(utils.set_name("{}_default".format(name))) - default_const.output = [default_const_name] - new_node = self.make_node("PlaceholderWithDefault", [default_const_name], outputs=[name], - dtypes=[dtype], shapes=[shape]) - self.inputs.append(new_node) - - def add_graph_output(self, name, dtype=None, shape=None): - """Add node output as graph's output.""" - utils.assert_error(name in self._output_to_node_name, "output %s not exist in the graph", name) - - if dtype is None: - dtype = self.get_dtype(name) - - if shape is None: - shape = self.get_shape(name) - - if name not in self.outputs: - utils.assert_error(shape is not None, "shape for output %s should not be None", name) - utils.assert_error(dtype is not None, "dtype for output %s should not be None", name) - self.outputs.append(name) - self.set_shape(name, shape) - self.set_dtype(name, dtype) - else: - raise ValueError("graph output " + name + " already exists") - def get_dtype(self, name): """Get dtype for node.""" node = self.get_node_by_output(name, search_in_parent_graphs=True) @@ -592,7 +498,7 @@ def get_shape(self, name): return shape def get_rank(self, name): - """Returns len(get_shape(name)) or None if shape is None""" + """Returns len(get_shape(name)) or None if shape is None.""" shape = self.get_shape(name) if shape is None: return None @@ -677,17 +583,17 @@ def _get_unvisited_child(g, node, not_visited): ret = [x for _, x in sorted(zip(label, ops))] self.reset_nodes(ret) - def make_graph(self, doc, graph_name=None, external_tensor_storage=None): - """ - Create GraphProto for onnx from internal graph. + def make_graph(self, doc, graph_name=None): + """Create GraphProto for onnx from internal graph. + Args: - optimize: optimize graph via onnx doc: text for doc string of the graph + graph_name: optimize graph name """ graph_name = graph_name or self.graph_name self.delete_unused_nodes(self.outputs) self.topological_sort(self.get_nodes()) - self.update_proto(external_tensor_storage) + self.update_proto() ops = [] const_ops = [] @@ -722,7 +628,7 @@ def make_graph(self, doc, graph_name=None, external_tensor_storage=None): # not to use numpy_helper.from_array to create a new tensor # because sometimes onnx will have a bug that only check the tensor data in specific field # such as at upsample it only checks the float_data field. - t = op.get_value_attr(external_tensor_storage) + t = op.get_value_attr() tensor = helper.get_attribute_value(t) tensor.name = op.output[0] initializers.append(tensor) @@ -768,11 +674,11 @@ def make_graph(self, doc, graph_name=None, external_tensor_storage=None): return graph def make_model(self, graph_doc, graph_name="tfqdq_to_onnxqdq", **kwargs): - """ - Create final ModelProto for onnx from internal graph. + """Create final ModelProto for onnx from internal graph. + Args: - optimize: optimize graph via onnx - doc: text for doc string of the model + graph_doc: text for doc string of the model + graph_name: optimize graph name """ graph = self.make_graph(graph_doc, graph_name) @@ -826,24 +732,8 @@ def dump_graph(self): node.name, ", ".join(input_names)) - def follow_inputs(self, node, num, space=""): - """Follow inputs for (helpful for debugging).""" - val = [] - top = space == "" - if num == 0: - return [] - val.append("{}{} {} {}".format(space, node.type, node.name, self.get_shape(port_name(node.name)))) - space += " " - for j in node.inputs: - val.extend(self.follow_inputs(j, num - 1, space)) - if top: - print("\n".join(reversed(val))) - print() - return [] - return val - def dump_node_statistics(self, include_attrs=False, include_subgraphs=True): - """Return a counter of op types (and optionally attribute names) within the graph""" + """Return a counter of op types (and optionally attribute names) within the graph.""" op_cnt = collections.Counter() attr_cnt = collections.Counter() for n in self.get_nodes(): @@ -861,39 +751,9 @@ def dump_node_statistics(self, include_attrs=False, include_subgraphs=True): return op_cnt, attr_cnt return op_cnt - def remove_input(self, node, to_be_removed, input_index=None): - """Remove input from Node. - Args: - node: the node we expect the input on - to_be_removed: the node name we want to remove - input_index: if not None, index of the input to be removed, - the method is more efficient if *input_index* is specified, - otherwise, it has to look for every input named *old_input*. - """ - assert isinstance(node, OnnxNode) and isinstance(to_be_removed, six.text_type) - if input_index is not None: - assert node.input[input_index] == to_be_removed - if node.input[input_index] in self._output_to_consumers: - to_ops = self._output_to_consumers[node.input[input_index]] - if node.name in to_ops: - to_ops.remove(node.name) - del node.input[input_index] - return - - for i, name in enumerate(node.input): - if name == to_be_removed: - utils.assert_error( - node.input.count(node.input[i]) <= 1, - "Node %r takes multiple times the same input %r. This case is not handled.", - node.name, node.input[i]) - self._unregister_input_name(node.input[i], node) - del node.input[i] - break - - # don't remove output from parent since others might depend on it - def insert_new_node_on_input(self, node, op_type, input_name, name=None, domain=None, input_index=None, **kwargs): """Create and insert a new node into the graph. + Args: node: we want to replace the input for this node op_type: type for new operation @@ -923,7 +783,8 @@ def insert_new_node_on_input(self, node, op_type, input_name, name=None, domain= return new_node def insert_node_on_output(self, node, output_name=None): - """ + """Insert a node into the graph. + The inserted node takes the *output_name* as input and produces a new output. The function goes through every node taking *output_name* and replaces it by the new output name. @@ -938,8 +799,7 @@ def insert_node_on_output(self, node, output_name=None): return node def insert_new_node_on_output(self, op_type, output_name=None, name=None, inputs=None, domain=None, **kwargs): - """Create and insert a new node into the graph. - It then calls insert_node_on_output. + """Create and insert a new node into the graph. It then calls insert_node_on_output. Args: op_type: type for new operation @@ -986,7 +846,7 @@ def find_output_consumers(self, output_name): return nodes def _register_input_name(self, input_name, node, only_graph=False): - "Register node taking a specific input." + """Register node taking a specific input.""" if not only_graph: if input_name not in self._output_to_consumers: self._output_to_consumers[input_name] = set() @@ -998,7 +858,7 @@ def _register_input_name(self, input_name, node, only_graph=False): self.parent_graph._register_input_name(input_name, node, only_graph=True) def _unregister_input_name(self, input_name, node, only_graph=False): - "Unregister node taking a specific input." + """Unregister node taking a specific input.""" node_name = node.name if not only_graph: if input_name in self._output_to_consumers[input_name]: @@ -1011,10 +871,9 @@ def _unregister_input_name(self, input_name, node, only_graph=False): self.parent_graph._unregister_input_name(input_name, node, only_graph=True) def replace_all_inputs(self, old_input, new_input, ops=None): - """ - Replace all inputs pointing to old_input with new_input. - *ops* is used if defined, otherwise `_output_to_consumers` - is used to determine the impacted nodes. + """Replace all inputs pointing to old_input with new_input. + + *ops* is used if defined, otherwise `_output_to_consumers` is used to determine the impacted nodes. """ if old_input == new_input: return @@ -1049,8 +908,8 @@ def replace_all_inputs(self, old_input, new_input, ops=None): ops=g.get_nodes() if keep_ops else None) def replace_input(self, node, old_input, new_input, input_index=None): - """ - Replace one input in a node. + """Replace one input in a node. + The method is more efficient if *input_index* is specified. Otherwise, it renames every output named *old_input*. """ @@ -1077,32 +936,15 @@ def replace_input(self, node, old_input, new_input, input_index=None): self._register_input_name(new_input, node) return is_replaced - def replace_inputs(self, node, new_inputs): - """Replace node inputs.""" - assert isinstance(node, Node) and isinstance(new_inputs, list) - - for old_input in node.input: - to_ops = self._output_to_consumers.get(old_input, None) - if to_ops is not None and old_input in to_ops: - # To avoid issues when a node - # takes twice the same entry. - to_ops.remove(old_input) - - for input_name in new_inputs: - assert isinstance(input_name, six.text_type) - self._register_input_name(input_name, node) - - node.input = new_inputs - return True - def _extract_sub_graph_nodes(self, dest_node, input_checker=None): """Return nodes of subgraph ending with dest_node. + Args: - dest_node: output node of the subgraph to find + dest_node: output node of the subgraph to find. input_checker: customized input check function: bool func(node) Return: - a set of nodes + a set of nodes. """ res_set = set() if not dest_node or (input_checker and input_checker(dest_node) is False): @@ -1129,13 +971,15 @@ def _extract_sub_graph_nodes(self, dest_node, input_checker=None): def extract_sub_graph_nodes(self, outputs_name, input_checker=None, remove_unused_inputs=True): """Return nodes of subgraph having output_ids as outputs. + Args: - output_ids: output node output id of the subgraph to find - input_checker: customized input check function: bool func(node) - remove_unused_inputs: bool, indicates whether unused placeholder inputs will be removed + outputs_name: output node name of the subgraph to find. + input_checker: customized input check function: bool func(node). + remove_unused_inputs: bool, indicates whether unused placeholder inputs will be removed. in the resulting nodes. + Return: - a list of nodes + a list of nodes. """ res_set = set() @@ -1167,43 +1011,6 @@ def delete_unused_nodes(self, outputs_name): body_graph.delete_unused_nodes(body_graph.outputs) self.reset_nodes(related_nodes) - def safe_to_remove_nodes(self, to_delete): - """ List of nodes that safe to delete (i.e. outputs not consumed by other nodes.)""" - safe_to_remove = [] - delete_set = set(to_delete) - for n in delete_set: - out_consumers = set() - for out in n.output: - out_consumers |= set(self.find_output_consumers(out)) - if out_consumers.issubset(delete_set): - safe_to_remove.append(n) - return safe_to_remove - - # TODO(tomwildenhain): Remove this function - def safe_remove_nodes(self, to_delete): - """Delete nodes in `to_delete` without third-party node consuming it.""" - delete_set = set(to_delete) - for n in delete_set: - out_consumers = set() - for out in n.output: - out_consumers |= set(self.find_output_consumers(out)) - if out_consumers.issubset(delete_set): - self.remove_node(n.name) - - def is_safe_to_remove_nodes(self, to_delete, outputs_to_ignore=None): - """Returns true if the outputs of all the nodes in to_delete have no third-party nodes consuming them.""" - delete_set = set(to_delete) - outputs_to_ignore_set = set(outputs_to_ignore or []) - for n in delete_set: - out_consumers = set() - for out in n.output: - if out in outputs_to_ignore_set: - continue - out_consumers |= set(self.find_output_consumers(out)) - if not out_consumers.issubset(delete_set): - return False - return True - def convert_qdq_nodes(self, q_node, dq_node): """Convert tensorflow QuantizeV2/Dequantize nodes to QuantizeLinear/DequantizeLinear.""" qdq_node_output_dtype = self.get_dtype(dq_node.output[0]) @@ -1283,183 +1090,3 @@ def convert_qdq_nodes(self, q_node, dq_node): dtypes=[qdq_node_output_dtype], name=utils.set_name("DequantLinearNode")) self.set_shape(dequant_node.output[0], qdq_node_output_shape) - -class GraphUtil(object): - """Utilities for Graph manipulation.""" - - @staticmethod - def optimize_graph(graph, catch_errors=True, optimizers=None): - return optimizer.optimize_graph(graph, catch_errors, optimizers=optimizers) - - @staticmethod - def optimize_model_proto(onnx_model_proto, catch_errors=True, return_graph=False, - optimizers=None): - """Optimize the model proto, for example: eliminating all useless Transpose pairs. - - Returns: - model proto (and possibly graph) after optimization, if optimizer run successfully - or onnx_model_proto, if exceptions happens - """ - try: - kwargs = GraphUtil.get_onnx_model_properties(onnx_model_proto) - graph = GraphUtil.create_graph_from_onnx_model(onnx_model_proto) - graph = GraphUtil.optimize_graph(graph, catch_errors, optimizers=optimizers) - model_proto = graph.make_model(onnx_model_proto.graph.doc_string, - graph_name=onnx_model_proto.graph.name, **kwargs) - - if onnx_model_proto.metadata_props: - metadata_props = {p.key: p.value for p in onnx_model_proto.metadata_props} - helper.set_model_props(model_proto, metadata_props) - if return_graph: - return model_proto, graph - return model_proto - except Exception as e: - if not catch_errors: - raise e - # sometimes, onnx shape inference will fail for some reason, - # return onnx_model_proto for this case - logger.warning("Failed to optimize model proto", exc_info=1) - if return_graph: - return onnx_model_proto, None - return onnx_model_proto - - @staticmethod - def get_onnx_model_properties(onnx_model_proto): - """Get ModelProto properties.""" - kwargs = {} - if onnx_model_proto.HasField('ir_version'): - kwargs["ir_version"] = onnx_model_proto.ir_version - if onnx_model_proto.HasField('producer_name'): - kwargs["producer_name"] = onnx_model_proto.producer_name - if onnx_model_proto.HasField('producer_version'): - kwargs["producer_version"] = onnx_model_proto.producer_version - if onnx_model_proto.HasField('domain'): - kwargs["domain"] = onnx_model_proto.domain - if onnx_model_proto.HasField('model_version'): - kwargs["model_version"] = onnx_model_proto.model_version - if onnx_model_proto.HasField('doc_string'): - kwargs["doc_string"] = onnx_model_proto.doc_string - kwargs["opset_imports"] = onnx_model_proto.opset_import - - return kwargs - - @staticmethod - def create_graph_from_onnx_model(onnx_model_proto, target=None): - """Create Graph loading onnx model proto.""" - # apply shape inference on the model - inferred_model = shape_inference.infer_shapes(onnx_model_proto) - utils.initialize_name_counter(inferred_model) - graph_proto = inferred_model.graph - - opset_version = None - extra_opset = [] - for opset in onnx_model_proto.opset_import: - if not opset.domain: - # domain field is None or empty means it is onnx domain - opset_version = opset.version - else: - extra_opset.append(opset) - - utils.assert_error(opset_version is not None, "opset version is not specified for onnx domain") - main_graph = GraphUtil.create_graph_from_onnx_graph(graph_proto, opset_version, extra_opset, target) - return main_graph - - @staticmethod - def create_graph_from_onnx_graph(graph_proto, opset_version=None, extra_opset=None, target=None): - """Create Graph loading onnx graph proto.""" - output_shapes = {} - output_dtypes = {} - - shapes, dtypes = GraphUtil._parse_shape_and_type_from_value_infos(graph_proto.value_info) - output_shapes.update(shapes) - output_dtypes.update(dtypes) - - shapes, dtypes = GraphUtil._parse_shape_and_type_from_value_infos(graph_proto.output) - output_shapes.update(shapes) - output_dtypes.update(dtypes) - - nodes_to_append = [] - for n in graph_proto.node: - if n.op_type == "Constant": - n.op_type = "Const" - - # some pytorch model had empty names - make one up - if not n.name: - n.name = utils.set_name("was_empty") - nodes_to_append.append(n) - - output_names = [] - for n in graph_proto.output: - output_names.append(n.name) - - g = OnnxGraph(nodes_to_append, output_shapes, output_dtypes, target, opset_version, extra_opset, None, output_names) - const_nodes = GraphUtil._parse_graph_initializer(g, graph_proto) - GraphUtil._parse_graph_input(g, graph_proto, [n.name for n in const_nodes]) - - for n in g.get_nodes(): - for attr_name, attr_val in n.attr.items(): - if attr_val.HasField('g'): - # it was assumed that the a.g has inferred shapes/dtypes. - sub_g = GraphUtil.create_graph_from_onnx_graph(attr_val.g, opset_version, extra_opset) - n.set_body_graph_as_attr(attr_name, sub_g) - return g - - @staticmethod - def get_node_count_from_onnx_graph(graph_proto): - op_cnt = collections.Counter() - for n in graph_proto.node: - op_cnt[n.op_type] += 1 - return op_cnt - - @staticmethod - def _parse_shape_and_type_from_value_infos(value_infos): - """Get nodes output shapes and types from value infos.""" - output_shapes = {} - output_dtypes = {} - for shape_info in value_infos: - type_proto = shape_info.type - elem_type = type_proto.tensor_type.elem_type - output_dtypes[shape_info.name] = elem_type - if not type_proto.tensor_type.HasField("shape"): - output_shapes[shape_info.name] = None - continue - shape = type_proto.tensor_type.shape - tuned_shape = [] - for d in shape.dim: - if d.HasField('dim_param'): - tuned_shape.append(-1) - elif d.HasField('dim_value'): - tuned_shape.append(d.dim_value) - else: - # it is found, some unknown dims is missing after inference. - tuned_shape.append(-1) - output_shapes[shape_info.name] = tuned_shape - - return output_shapes, output_dtypes - - @staticmethod - def _parse_graph_initializer(g, graph_proto): - """Get graph initializers and put into Graph object.""" - const_nodes = [] - for initializer in graph_proto.initializer: - np_val = numpy_helper.to_array(initializer) - const_nodes.append(g.make_const(initializer.name, np_val)) - - return const_nodes - - @staticmethod - def _parse_graph_input(g, graph_proto, const_node_names): - """Get graph inputs not defined as initializers and put into Graph object.""" - shapes, dtypes = GraphUtil._parse_shape_and_type_from_value_infos(graph_proto.input) - # make sure the input is added in order we read from graph_proto, - # because for subgraphs, the input orders matter. - for graph_input in graph_proto.input: - name = graph_input.name - const_initializer_node = g.get_node_by_output_in_current_graph(name) - if const_initializer_node is None: # is actual input rather than initializer - shape = shapes[name] - dtype = dtypes[name] - if name not in const_node_names: - g.add_graph_input(name, dtype, shape) - else: - g.add_graph_input_with_default(name, g.get_node_by_name(name), dtype, shape) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py index bd97146a2fb..876729886c6 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +"""ONNX Node wrapper for Tensorflow model converting to ONNX model.""" import copy import logging @@ -27,13 +28,15 @@ logger = logging.getLogger("neural_compressor") class OnnxNode: - """A ONNX Node Wrapper use for graph manipulations.""" + """A ONNX Node Wrapper used for graph manipulations.""" def __init__(self, node, graph, skip_conversion=False): """Create ONNX Node. + Args: - node: Onnx node in NodeProto - graph: OnnxGraph + node: Onnx node in NodeProto. + graph: OnnxGraph. + skip_conversion: bool for skipping node conversion. """ self._op = node self.graph = graph @@ -49,24 +52,29 @@ def __init__(self, node, graph, skip_conversion=False): @property def input(self): + """Get node input.""" return self._input @input.setter def input(self, val): - # The setter can catch that all inputs are change - # but it cannot catch that one input is changed. - # That's method replace_input and replace_inputs must - # be used to change inputs to let the graph instance - # update its internal indices. + """Node input setter. + + The setter can catch that all inputs are change but it cannot catch that one input is changed. + That's method replace_input and replace_inputs must be used to change inputs to let + the graph instance update its internal indices. + """ self._input = copy.deepcopy(val) @property def output(self): + """Get node output.""" return self._output @output.setter def output(self, val): - """Set op output. Output should be updated explicitly, + """Set op output. + + Output should be updated explicitly, changing it would require output mapping changed. """ self._graph_check() @@ -86,36 +94,15 @@ def inputs(self): @property def attr(self): + """Get node attributes.""" return self._attr - def get_value_attr(self, external_tensor_storage=None): - """Return onnx attr for value property of node. - Attr is modified to point to external tensor data stored in external_tensor_storage, if included. - """ - a = self._attr["value"] - if external_tensor_storage is not None and self in external_tensor_storage.node_to_modified_value_attr: - return external_tensor_storage.node_to_modified_value_attr[self] - if external_tensor_storage is None or a.type != AttributeProto.TENSOR: - return a - if np.product(a.t.dims) > external_tensor_storage.external_tensor_size_threshold: - a = copy.deepcopy(a) - tensor_name = self.name.strip() + "_" + str(external_tensor_storage.name_counter) - for c in '~"#%&*:<>?/\\{|}': - tensor_name = tensor_name.replace(c, '_') - external_tensor_storage.name_counter += 1 - external_tensor_storage.name_to_tensor_data[tensor_name] = a.t.raw_data - external_tensor_storage.node_to_modified_value_attr[self] = a - a.t.raw_data = b'' - a.t.ClearField("raw_data") - location = a.t.external_data.add() - location.key = "location" - location.value = tensor_name - a.t.data_location = TensorProto.EXTERNAL - return a - - def get_onnx_attrs(self, external_tensor_storage=None): - """Return onnx valid attributes. - Attrs point to external tensor data stored in external_tensor_storage, if included.""" + def get_value_attr(self): + """Return onnx attr for value property of node.""" + return self._attr["value"] + + def get_onnx_attrs(self): + """Return onnx valid attributes.""" schema = get_schema(self.type, self.graph.opset, self.domain) if schema is None and not (self.is_const() or self.is_graph_input()): logger.debug("Node %s uses non-stardard onnx op <%s, %s>, skip attribute check", @@ -123,21 +110,23 @@ def get_onnx_attrs(self, external_tensor_storage=None): onnx_attrs = {} for a in self._attr.values(): if a.name == "value": - onnx_attrs[a.name] = self.get_value_attr(external_tensor_storage) + onnx_attrs[a.name] = self.get_value_attr() elif schema is None or schema.has_attribute(a.name): onnx_attrs[a.name] = a return onnx_attrs @property def name(self): + """Get op name.""" return self._op.name def child_name(self): + """Set child name.""" return utils.set_name(self.name) @property def op(self): - """TODO: have a better interface for this.""" + """Get node's op.""" return self._op @property @@ -152,12 +141,12 @@ def type(self, val): @property def domain(self): - """Return Op type.""" + """Return Op domain.""" return self._op.domain @domain.setter def domain(self, val): - """Set Op type.""" + """Set Op domain.""" self._op.domain = val @property @@ -192,20 +181,25 @@ def is_scalar(self): return t.shape == tuple() def is_graph_input(self): + """Check if the node is the input of the graph.""" return self.type in ["Placeholder", "PlaceholderWithDefault", "PlaceholderV2"] def is_graph_input_default_const(self): + """Check if the node is the input of the graph and const.""" return self.is_const() and any( out.is_graph_input() for out in self.graph.find_output_consumers(self.output[0]) ) def is_while(self): + """Check if the node is while op.""" return self.type in ["While", "StatelessWhile", "Loop"] def __str__(self): + """Return string of the node op.""" return str(self._op) def __repr__(self): + """Return string of op type and name.""" return "" % (self.type, self._op.name) @property @@ -236,6 +230,7 @@ def get_attr(self, name, default=None): return attr def get_attr_value(self, name, default=None): + """Get attribute value.""" attr = self.get_attr(name) if attr: return helper.get_attribute_value(attr) @@ -260,21 +255,26 @@ def get_attr_str(self, name, encoding="utf-8"): return attr_str.decode(encoding) def set_attr(self, name, value): + """Set node's attribute.""" self.attr[name] = helper.make_attribute(name, value) def set_attr_onnx(self, value): + """Set node's onnx attributes.""" self.attr[value.name] = value @property def skip_conversion(self): + """Get skip conversion setting.""" return self._skip_conversion @skip_conversion.setter def skip_conversion(self, val): + """Set skip conversion.""" self._skip_conversion = val # If some Node is created as onnx_node, then we don't need convert it def need_skip(self): + """Check if need to skip conversion.""" return self._skip_conversion @property @@ -293,8 +293,10 @@ def output_dtypes(self): def get_tensor_value(self, as_list=True): """Get value for onnx tensor. + Args: as_list: whether return numpy ndarray in list. + Returns: If as_list=True, return the array as a (possibly nested) list. Otherwise, return data of type np.ndarray. @@ -327,6 +329,7 @@ def scalar_to_dim1(self): def set_tensor_value(self, new_val): """Set new value for existing onnx tensor. + Args: new_val: value of type numpy ndarray """ @@ -344,10 +347,12 @@ def set_tensor_value(self, new_val): self.graph.set_shape(onnx_tensor.name, list(onnx_tensor.dims)) def get_body_graphs(self): + """Get body graphs.""" self._graph_check() return self.graph.contained_graphs.get(self.name, None) def set_body_graph_as_attr(self, attr_name, graph): + """Set body graphs as attribute.""" self._graph_check() if self.name not in self.graph.contained_graphs: self.graph.contained_graphs[self.name] = {} @@ -355,7 +360,7 @@ def set_body_graph_as_attr(self, attr_name, graph): self.graph.contained_graphs[self.name].update({attr_name: graph}) graph.parent_graph = self.graph - def update_proto(self, external_tensor_storage=None): + def update_proto(self): """Update protobuf from internal structure.""" nodes = list(self._op.input) for node in nodes: @@ -373,11 +378,10 @@ def update_proto(self, external_tensor_storage=None): attr_graphs = self.get_body_graphs() if attr_graphs: for attr_name, sub_graph in attr_graphs.items(): - graph_proto = sub_graph.make_graph("graph for " + self.name + " " + attr_name, - external_tensor_storage=external_tensor_storage) + graph_proto = sub_graph.make_graph("graph for " + self.name + " " + attr_name) self.set_attr(attr_name, graph_proto) - attr = list(self.get_onnx_attrs(external_tensor_storage).values()) + attr = list(self.get_onnx_attrs().values()) if attr: self._op.attribute.extend(attr) @@ -407,26 +411,5 @@ def get_implicit_inputs(self, recursive=True): return list(outer_scope_node_input_ids) def _graph_check(self): - utils.assert_error(self.graph is not None, "Node %s not belonging any graph", - self.name) - - def maybe_cast_input(self, supported, type_map): - """.maybe_cast_input - Args: - supported: list of supported types for inputs - type_map: dict type to supported type mapping - """ - did_cast = False - for i, name in enumerate(self.input): - dtype = self.graph.get_dtype(name) - if dtype not in supported[i]: - tdtype = type_map.get(dtype) - if tdtype is None: - raise RuntimeError("don't know how to cast type {} on node {}".format(dtype, name)) - shape = self.graph.get_shape(name) - cast_node = self.graph.insert_new_node_on_input( - self, "Cast", name, to=tdtype) - self.graph.set_dtype(cast_node.output[0], tdtype) - self.graph.set_shape(cast_node.output[0], shape) - did_cast = True - return did_cast + """Check the graph is None.""" + utils.assert_error(self.graph is not None, "Node %s not belonging any graph", self.name) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py index 895600432ee..2538f6457cc 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py @@ -15,27 +15,27 @@ # See the License for the specific language governing permissions and # limitations under the License. # +"""ONNX Operator Schemas for Tensorflow model converting to ONNX model.""" import logging -import copy from collections import defaultdict, OrderedDict -from onnx import defs, helper, TensorProto, OperatorSetIdProto, shape_inference +from onnx import defs from . import tf2onnx_utils as utils logger = logging.getLogger("neural_compressor") - class OnnxOpSchema(object): """Wrapper for Onnx schema.""" def __init__(self, name, domain, since_version, attributes): - """Create a Onnx schema + """Create a Onnx schema. + Args: name (str): op name - attributes (List[str]): valid attributes domain (str): default value "" means it's Onnx domain since_version (int): opset version, default is 1 + attributes (List[str]): valid attributes """ self._name = name self._domain = domain @@ -44,22 +44,27 @@ def __init__(self, name, domain, since_version, attributes): @property def attributes(self): + """Get valid attributes.""" return self._attributes @property def domain(self): + """Get domain info.""" return self._domain @property def name(self): + """Get op name.""" return self._name @property def since_version(self): + """Get opset version.""" return self._since_version @staticmethod def from_onnx_schema(onnx_schema): + """Static method to construct OnnxOpSchema.""" name = onnx_schema.name domain = onnx_schema.domain since_version = int(onnx_schema.since_version) @@ -67,11 +72,12 @@ def from_onnx_schema(onnx_schema): return OnnxOpSchema(name, domain, since_version, attributes) def has_attribute(self, attr): + """Check if has the attribute.""" return attr in self.attributes def _register_all_schemas_with_history(): - """Register all schemas with history""" + """Register all schemas with history.""" onnx_schemas = defs.get_all_schemas_with_history() name_domain_version_schema_map = defaultdict(lambda: defaultdict(dict)) for s in onnx_schemas: @@ -88,7 +94,7 @@ def _register_all_schemas_with_history(): def _parse_domain_opset_versions(schemas): - """ Get max opset version among all schemas within each domain. """ + """Get max opset version among all schemas within each domain.""" domain_opset_versions = dict() for domain_version_schema_map in schemas.values(): for domain, version_schema_map in domain_version_schema_map.items(): diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py index 6ed7b2e761c..c56fa57b7ae 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # +"""Utils for Tensorflow model converting to ONNX model.""" + import os import copy import logging @@ -137,7 +139,7 @@ def assert_error(bool_val, error_msg, *args): raise ValueError("Assert failure: " + error_msg % args) def map_numpy_to_onnx_dtype(np_dtype): - """Map numpy dtype to ONNX dtype""" + """Map numpy dtype to ONNX dtype.""" for onnx_dtype, numpy_dtype in ONNX_TO_NUMPY_DTYPE.items(): if numpy_dtype == np_dtype: return onnx_dtype @@ -230,13 +232,13 @@ def read_tensorflow_node_attrs(node): return attr def infer_onnx_shape_dtype(node, opset_version, input_shapes, input_dtypes, initializers=None): - """ - Infer shapes and dtypes for outputs of the node. + """Infer shapes and dtypes for outputs of the node. + Sometimes, shape inference needs the values of node's inputs, so initializers are used. """ def build_onnx_op(node): - """Build onnx op""" + """Build onnx op.""" onnx_node = helper.make_node(node.type, node.input, node.output, name=node.name) # deal with attributes attr = [] @@ -305,30 +307,35 @@ def build_onnx_op(node): return output_shapes, output_dtypes def make_onnx_shape(shape): - """shape with -1 is not valid in onnx ... make it a name.""" + """Shape with -1 is not valid in onnx ... make it a name.""" if shape: # don't do this if input is a scalar return [set_name("unk") if i == -1 else i for i in shape] return shape class SeqType: - """Wrap around TensorProto.* to signify a tensor sequence of a given type""" + """Wrap around TensorProto.* to signify a tensor sequence of a given type.""" def __init__(self, tensor_dtype): + """Initlization.""" self.dtype = tensor_dtype def __eq__(self, other): + """Check if the SeqType is same.""" if isinstance(other, SeqType): return self.dtype == other.dtype return NotImplemented def __repr__(self): + """Return string of SeqType's dtype.""" return "SeqType(%r)" % self.dtype def make_onnx_inputs_outputs(name, elem_type, shape, **kwargs): - """Wrapper for creating onnx graph inputs or outputs - name, # type: Text - elem_type, # type: TensorProto.DataType - shape, # type: Optional[Sequence[int]] + """Wrapper for creating onnx graph inputs or outputs. + + Args: + name: Text + elem_type: TensorProto.DataType + shape: Optional[Sequence[int]] """ if elem_type is None: elem_type = onnx_pb.TensorProto.UNDEFINED @@ -364,7 +371,7 @@ def is_list_or_tuple(obj): return isinstance(obj, (list, tuple)) def are_shapes_equal(src, dest): - """ Check whether 2 shapes are equal. """ + """Check whether 2 shapes are equal.""" if src is None: return dest is None if dest is None: @@ -378,7 +385,7 @@ def are_shapes_equal(src, dest): return all(i == j for i, j in zip(src, dest)) def get_subgraphs_from_onnx(model_proto): - """Returns an iterator over the graphs/subgraphs of a model (using dfs)""" + """Returns an iterator over the graphs/subgraphs of a model (using dfs).""" stack = [model_proto.graph] while stack: g = stack.pop() @@ -391,7 +398,7 @@ def get_subgraphs_from_onnx(model_proto): stack.extend(attr.graphs) def initialize_name_counter(model_proto): - """Avoid name conflicts by initializing the counter used by make_name based on the provided model""" + """Avoid name conflicts by initializing the counter used by make_name based on the provided model.""" suffix_regex = re.compile(r"__(\d+)(:\d+)?$") def avoid_name(name): global INSERTED_OP_NAME diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py index 42cf99cf56b..b2cd7b283b3 100644 --- a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # +"""Tensorflow QDQ model convert to ONNX QDQ model.""" + import logging import tensorflow as tf from onnx import helper @@ -31,10 +33,13 @@ class TensorflowQDQToOnnxQDQConverter: """Convert tensorflow QDQ graph to ONNX QDQ graph.""" def __init__(self, model, input_names, output_names, opset_version=utils.DEFAULT_OPSET_VERSION): - """constructor + """Constructor, initilization. Args: model (graphdef): tensorflow QDQ graphdef + input_names (list, optional): input names. Defaults to None. + output_names (list, optional): output names. Defaults to None. + opset_version (int, optional): opset version. Defaults to 14. """ graph_def = self.tf_graph_optimize(model) @@ -116,13 +121,10 @@ def tf_graph_optimize(self, model): @dump_elapsed_time("Pass TensorflowQDQToOnnxQDQConverter") def convert(self, save_path): - """ convert tensorflow QDQ model to onnx QDQ model + """Convert tensorflow QDQ model to onnx QDQ model. Args: - input_graph_def (graphdef): tensorflow QDQ graphdef object - - Returns: - onnx QDQ graph + save_path (str): save path of ONNX QDQ model. """ onnx_nodes = [] output_shapes = {} diff --git a/neural_compressor/experimental/export/tf2onnx.py b/neural_compressor/experimental/export/tf2onnx.py index 86aa21b096d..ea7e5ba7df2 100644 --- a/neural_compressor/experimental/export/tf2onnx.py +++ b/neural_compressor/experimental/export/tf2onnx.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Helper functions to export model from TensorFlow to ONNX.""" from neural_compressor.utils import logger @@ -25,7 +24,8 @@ def tf_to_fp32_onnx( save_path, opset_version=14, input_names=None, - output_names=None + output_names=None, + inputs_as_nchw=None ): """Export FP32 Tensorflow model into FP32 ONNX model using tf2onnx tool. @@ -39,7 +39,8 @@ def tf_to_fp32_onnx( input_names[:] = [i+":0" for i in input_names] output_names[:] = [o+":0" for o in output_names] t2o.convert.from_graph_def(graph_def=graph_def, input_names=input_names, - output_names=output_names, opset=opset_version, output_path=save_path) + output_names=output_names, inputs_as_nchw=inputs_as_nchw, + opset=opset_version, output_path=save_path) info = "The FP32 ONNX Model exported to path: {0}".format(save_path) logger.info("*"*len(info)) logger.info(info) diff --git a/neural_compressor/metric/__init__.py b/neural_compressor/metric/__init__.py index 2866945f1c3..9aa6325b1b0 100644 --- a/neural_compressor/metric/__init__.py +++ b/neural_compressor/metric/__init__.py @@ -18,7 +18,7 @@ """Intel Neural Compressor Metric.""" -from .metric import METRICS, Metric, BaseMetric, metric_registry +from .metric import METRICS, Metric, BaseMetric, metric_registry, COCOmAPv2, GeneralTopK from os.path import dirname, basename, isfile, join import glob @@ -29,4 +29,4 @@ __import__(basename(f)[:-3], globals(), locals(), level=1) -__all__ = ["METRICS", "Metric", "BaseMetric", "metric_registry"] +__all__ = ["METRICS", "Metric", "BaseMetric", "metric_registry", "COCOmAPv2", "GeneralTopK"] diff --git a/neural_compressor/model/tensorflow_model.py b/neural_compressor/model/tensorflow_model.py index d0daa25e165..32e59ad7907 100644 --- a/neural_compressor/model/tensorflow_model.py +++ b/neural_compressor/model/tensorflow_model.py @@ -867,12 +867,14 @@ def export(self, save_path, conf): output_names=conf.output_names if conf.output_names else self.output_tensor_names ) elif conf.dtype == 'fp32': + inputs_as_nchw = conf.kwargs.get("inputs_as_nchw", None) tf_to_fp32_onnx( self.graph_def, save_path, opset_version=conf.opset_version, input_names=conf.input_names if conf.input_names else self.input_tensor_names, - output_names=conf.output_names if conf.output_names else self.output_tensor_names + output_names=conf.output_names if conf.output_names else self.output_tensor_names, + inputs_as_nchw=inputs_as_nchw ) else: # pragma: no cover assert False, "Not allowed dtype: {}, pleas use 'fp32' or 'int8'.".format(conf.dtype) From 09ed6f3e79edc871fc241cc5191cda196544106f Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Sun, 15 Jan 2023 23:30:02 +0800 Subject: [PATCH 07/43] enable tf resnet50_v1.5 int8 model exporting to onnx int8 Signed-off-by: Lv, Liang1 --- .../resnet50v1.5/fp32_export/README.md | 4 +- .../tf2onnx/resnet50v1.5/fp32_export/main.py | 5 +- .../resnet50v1.5/fp32_export/run_benchmark.sh | 3 + .../resnet50v1.5/int8_export/README.md | 57 ++++++ .../tf2onnx/resnet50v1.5/int8_export/main.py | 142 +++++++++++++++ .../resnet50v1.5/int8_export/requirements.txt | 9 + .../resnet50v1.5/int8_export/run_benchmark.sh | 42 +++++ .../resnet50v1.5/int8_export/run_export.sh | 35 ++++ .../resnet50v1.5/int8_export/run_tuning.sh | 39 ++++ .../ssd_mobilenet_v1/fp32_export/README.md | 6 +- .../ssd_mobilenet_v1/fp32_export/main.py | 5 +- .../fp32_export/run_benchmark.sh | 3 + .../graph_rewriter/onnx/onnx_graph.py | 172 ++++++++++++++---- .../graph_rewriter/onnx/tf2onnx_utils.py | 3 + .../adaptor/tf_utils/tf2onnx_converter.py | 44 ++++- .../experimental/export/tf2onnx.py | 7 +- neural_compressor/model/tensorflow_model.py | 5 +- 17 files changed, 532 insertions(+), 49 deletions(-) create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_export.sh create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_tuning.sh diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md index 8c4fea27f9c..e1e91116b11 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md @@ -40,6 +40,6 @@ bash run_export.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1.o ### Run benchmark for ONNX FP32 model ```shell -bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ -bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/ImageNet/ +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 ``` diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py index c8a0ca8f678..340132e48d2 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py @@ -53,10 +53,11 @@ def __init__(self): arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') arg_parser.add_argument('--dataset_location', dest='dataset_location', help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') self.args = arg_parser.parse_args() def run(self): - """This is neural_compressor function include tuning and benchmark option.""" + """This is neural_compressor function include export and benchmark option.""" if self.args.export: from neural_compressor.model import Model from neural_compressor.config import TF2ONNXConfig @@ -71,7 +72,7 @@ def run(self): from neural_compressor.utils.create_obj_from_config import create_dataloader dataloader_args = { - 'batch_size': 32, + 'batch_size': self.args.batch_size, 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, 'transform': {'ResizeWithAspectRatio': {'height': 224, 'width': 224}, 'CenterCrop': {'size': 224}, diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh index 6f154088752..e83a029e800 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh @@ -22,6 +22,8 @@ function init_params { --dataset_location=*) dataset_location=$(echo $var |cut -f2 -d=) ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) esac done @@ -33,6 +35,7 @@ function run_benchmark { --input-graph ${input_model} \ --mode ${mode} \ --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ --benchmark } diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md new file mode 100644 index 00000000000..7d82ca4fa0f --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md @@ -0,0 +1,57 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. + + +## Prerequisite + +### 1. Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### 2. Install requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. +```shell +pip install -r requirements.txt +``` + +### 3. Prepare Pretrained model + +```bash +wget https://zenodo.org/record/2535873/files/resnet50_v1.pb +``` + +### 4. Prepare Dataset + +Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: + +```bash +ls /path/to/ImageNet +ILSVRC2012_img_val val.txt +``` +The Raw image dataset is used for running benchmarking for ONNX model. + +TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. The TF records format dataset is used for quantizing Tensorflow FP32 model to Tensorflow INT8 QDQ model. + +## Run Command + +### Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model +```shell +bash run_tuning.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1_int8.pb --dataset_location=/path/to/imagenet/ +``` +Please note this dataset is TF records format. + +### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./resnet50_v1_int8.pb --output_model=./resnet50_v1_int8.onnx +``` + +### Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 +``` +Please note this dataset is Raw image dataset. diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py new file mode 100644 index 00000000000..8f93af6fdfc --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py @@ -0,0 +1,142 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import os +import onnxruntime as ort + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func(model, dataloader, metric, postprocess): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + output, label = postprocess((output, label)) + metric.update(output, label) + return metric.result() + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion + from neural_compressor.utils.create_obj_from_config import create_dataloader + calib_dataloader_args = { + 'batch_size': 10, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) + eval_dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + op_name_list = { + 'resnet_model/dense/MatMul': + { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']}, + } + } + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], + outputs=['softmax_tensor'], + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.3), + op_name_list=op_name_list) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, eval_metric=top1) + q_model.save(self.args.output_graph) + + if self.args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + config = TF2ONNXConfig(dtype="int8", inputs_as_nchw="input_tensor:0") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + model = onnx.load(self.args.input_graph) + data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') + label_path = os.path.join(self.args.dataset_location, 'val.txt') + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, + 'transform': {'ResizeWithAspectRatio': {'height': 224, 'width': 224}, + 'CenterCrop': {'size': 224}, + 'Normalize': {'mean': [123.68, 116.78, 103.94]}, + 'Cast': {'dtype': 'float32'}, + 'Transpose': {'perm': [2, 0, 1]}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import GeneralTopK + top1 = GeneralTopK(k=1) + from neural_compressor.data.transforms.imagenet_transform import LabelShift + postprocess = LabelShift(label_shift=-1) + def eval(onnx_model): + return eval_func(onnx_model, dataloader, top1, postprocess) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt new file mode 100644 index 00000000000..9bdc24cb87b --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt @@ -0,0 +1,9 @@ +tensorflow=2.10.0 +intel-extension-for-tensorflow[cpu] +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_tuning.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_tuning.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md index 13f3d32848a..b47df35f25d 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md @@ -1,7 +1,7 @@ Step-by-Step ============ -This document is used to show how to export Tensorflow VGG16 FP32 model to ONNX FP32 model using Intel® Neural Compressor. +This document is used to show how to export Tensorflow ssd_mobilenet_v1 FP32 model to ONNX FP32 model using Intel® Neural Compressor. ## Prerequisite @@ -39,6 +39,6 @@ bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_mod ### Run benchmark for ONNX FP32 model ```shell -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py index 69cdac86fff..1a1582c6ff8 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py @@ -86,10 +86,11 @@ def __init__(self): arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') arg_parser.add_argument('--dataset_location', dest='dataset_location', help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') self.args = arg_parser.parse_args() def run(self): - """This is neural_compressor function include tuning and benchmark option.""" + """This is neural_compressor function include export and benchmark option.""" if self.args.export: from neural_compressor.model import Model from neural_compressor.config import TF2ONNXConfig @@ -104,7 +105,7 @@ def run(self): from neural_compressor.utils.create_obj_from_config import create_dataloader dataloader_args = { - 'batch_size': 16, + 'batch_size': self.args.batch_size, 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, 'transform': {'Resize': {'size': 300}}, 'filter': None diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh index 6f154088752..e83a029e800 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh @@ -22,6 +22,8 @@ function init_params { --dataset_location=*) dataset_location=$(echo $var |cut -f2 -d=) ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) esac done @@ -33,6 +35,7 @@ function run_benchmark { --input-graph ${input_model} \ --mode ${mode} \ --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ --benchmark } diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py index 1d671cc7513..f6410b02557 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py @@ -77,8 +77,10 @@ def __init__(self, nodes, output_shapes=None, dtypes=None, target=None, opset=No self.outputs = [] for output_name in output_names: self.outputs.append(output_name +':0') - input_ops_names = [input_name + ':0' for input_name in input_names] - input_names = input_ops_names + input_names = None + if input_names: + input_ops_names = [input_name + ':0' for input_name in input_names] + input_names = input_ops_names self.parent_graph = None self.contained_graphs = {} # {node_name: {node_attribute_name: Graph}} @@ -307,6 +309,16 @@ def remove_node(self, node_name): self._nodes.remove(node) node.graph = None + def safe_remove_nodes(self, to_delete): + """Delete nodes in `to_delete` without third-party node consuming it.""" + delete_set = set(to_delete) + for n in delete_set: + out_consumers = set() + for out in n.output: + out_consumers |= set(self.find_output_consumers(out)) + if out_consumers.issubset(delete_set): + self.remove_node(n.name) + def reset_nodes(self, ops): """Reset the graph with node list.""" remained_dtypes = {} @@ -346,6 +358,11 @@ def reset_nodes(self, ops): self._dtypes = remained_dtypes self._output_shapes = remained_shapes + def create_new_graph_with_same_config(self): + """Create a clean graph inheriting current graph's configuration.""" + return OnnxGraph([], output_shapes={}, dtypes={}, target=self._target, opset=self._opset, + extra_opset=self.extra_opset, output_names=[]) + def is_empty_input(self, name): """Check if the input is empty. @@ -751,6 +768,36 @@ def dump_node_statistics(self, include_attrs=False, include_subgraphs=True): return op_cnt, attr_cnt return op_cnt + def remove_input(self, node, to_be_removed, input_index=None): + """Remove input from Node. + + Args: + node: the node we expect the input on + to_be_removed: the node name we want to remove + input_index: if not None, index of the input to be removed, + the method is more efficient if *input_index* is specified, + otherwise, it has to look for every input named *old_input*. + """ + assert isinstance(node, OnnxNode) and isinstance(to_be_removed, six.text_type) + if input_index is not None: + assert node.input[input_index] == to_be_removed + if node.input[input_index] in self._output_to_consumers: + to_ops = self._output_to_consumers[node.input[input_index]] + if node.name in to_ops: + to_ops.remove(node.name) + del node.input[input_index] + return + + for i, name in enumerate(node.input): + if name == to_be_removed: + utils.assert_error( + node.input.count(node.input[i]) <= 1, + "Node %r takes multiple times the same input %r. This case is not handled.", + node.name, node.input[i]) + self._unregister_input_name(node.input[i], node) + del node.input[i] + break + def insert_new_node_on_input(self, node, op_type, input_name, name=None, domain=None, input_index=None, **kwargs): """Create and insert a new node into the graph. @@ -782,6 +829,20 @@ def insert_new_node_on_input(self, node, op_type, input_name, name=None, domain= self.replace_input(node, node.input[input_index], new_output, input_index) return new_node + def add_graph_input(self, name, dtype=None, shape=None): + """Add placeholder node as graph's input. Order matters only for subgraph. + + Placeholders in original graph are assumed for main graph, order not matters. + """ + if dtype is None: + dtype = self.get_dtype(name) + + if shape is None: + shape = self.get_shape(name) + + new_node = self.make_node("Placeholder", [], outputs=[name], dtypes=[dtype], shapes=[shape]) + self.inputs.append(new_node) + def insert_node_on_output(self, node, output_name=None): """Insert a node into the graph. @@ -936,6 +997,24 @@ def replace_input(self, node, old_input, new_input, input_index=None): self._register_input_name(new_input, node) return is_replaced + def replace_inputs(self, node, new_inputs): + """Replace node inputs.""" + assert isinstance(node, OnnxNode) and isinstance(new_inputs, list) + + for old_input in node.input: + to_ops = self._output_to_consumers.get(old_input, None) + if to_ops is not None and old_input in to_ops: + # To avoid issues when a node + # takes twice the same entry. + to_ops.remove(old_input) + + for input_name in new_inputs: + assert isinstance(input_name, six.text_type) + self._register_input_name(input_name, node) + + node.input = new_inputs + return True + def _extract_sub_graph_nodes(self, dest_node, input_checker=None): """Return nodes of subgraph ending with dest_node. @@ -1020,55 +1099,74 @@ def convert_qdq_nodes(self, q_node, dq_node): narrow_range = q_node.attr['narrow_range'].i signed_input = bool(q_node.get_attr_value('T', TensorProto.INT8) == TensorProto.INT8) - min_quantized, max_quantized = [-127, 127] - if not narrow_range and signed_input: - min_quantized = -128 + max_quantized = 127 if not signed_input: - min_quantized, max_quantized = [0, 255] + max_quantized = 255 # Get axis attribute for per channel implementation. axis = q_node.get_attr_value('axis', -1) q_attrs = {} - quantized_np_dtype = np.int8 if signed_input else np.uint8 quantized_dtype = TensorProto.INT8 if signed_input else TensorProto.UINT8 if axis != -1: utils.assert_error(self.opset >= 13, "Opset >= 13 is required for per channel quantization") q_attrs['axis'] = axis - min_np = np.array(min_quantized, np.float32) - max_np = np.array(max_quantized, np.float32) - max_quantized_const = self.make_const(utils.set_name("max_const"), max_np).output[0] - if signed_input: - min_quantized_const = self.make_const(utils.set_name("min_const"), min_np).output[0] - reduce_attr = {'keepdims': 0} - if axis != -1: inp_rank = self.get_rank(q_node.input[0]) utils.assert_error(inp_rank is not None, "Input rank cannot be unknown for qdq op %s", q_node.name) - reduce_axes = [i for i in range(inp_rank) if i != axis] - reduce_attr['axes'] = reduce_axes - max_value = self.make_node("ReduceMax", [q_node.input[0]], attr=reduce_attr).output[0] - if signed_input: - min_value = self.make_node("ReduceMin", [q_node.input[0]], attr=reduce_attr).output[0] + # Get the min and max value of the inputs to QDQ op + min_value = self.get_tensor_value(q_node.input[1]) + max_value = self.get_tensor_value(q_node.input[2]) - scale_from_max_side = self.make_node("Div", [max_value, max_quantized_const]).output[0] - if signed_input: - scale_from_min_side = self.make_node("Div", [min_value, min_quantized_const]).output[0] - scale = self.make_node("Max", [scale_from_min_side, scale_from_max_side]).output[0] + if isinstance(min_value, list): + num_channels = len(min_value) else: - scale = scale_from_max_side + num_channels = 1 + + scales = np.zeros(num_channels, dtype=np.float32) - if axis == -1: - zero_point_np = np.zeros([], dtype=quantized_np_dtype) - zero_point = self.make_const(utils.set_name("zero_point"), zero_point_np).output[0] + # Per-Tensor + if num_channels == 1: + # sing U8 as default for per tensor + max_quantized = 255 + # Calculate scale from the min and max values + scale = (float(max_value) - min_value) / max_quantized if min_value != max_value else 1 + + zero_point = round((0 - min_value) / scale) + zero_point = np.uint8(round(max(0, min(255, zero_point)))) + + utils.assert_error(scale > 0, "Quantize/Dequantize scale must be greater than zero") + scales = np.float32(scale) + zero_point_np = zero_point + # Per-Channel else: - zero_tensor = helper.make_tensor("value", quantized_dtype, dims=[1], vals=[0]) - scale_shape = self.make_node("Shape", [scale]).output[0] - zero_point = self.make_node("ConstantOfShape", inputs=[scale_shape], - attr={"value": zero_tensor}).output[0] + zero_point = np.zeros(num_channels, dtype=np.int8 if signed_input else np.uint8) + for i in range(num_channels): + # Calculate scales from the min and max values + if signed_input: + max_range = max(abs(min_value[i]), abs(max_value[i])) + scale = (float(max_range) * 2) / max_quantized if max_range > 0 else 1 + else: + scale = (float(max_value[i]) - min_value[i]) / max_quantized if min_value[i] != max_value[i] else 1 + + if scale == 1 or signed_input: + zero_point[i] = np.int8(0) + else: + zero_point[i] = round((0 - min_value[i]) / scale) + zero_point[i] = np.uint8(round(max(0, min(255, zero_point[i])))) + + utils.assert_error(scale > 0, "Quantize/Dequantize scale must be greater than zero") + scales[i] = np.float32(scale) + utils.assert_error(axis != -1, "Axis must be specified for per channel quantization") + zero_point_np = zero_point + + # Add QuantizeLinear and DequantizeLinear and remove the TF QDQ node reference + cast_scale = scales.astype(np.float32) + scale = self.make_const(name=utils.set_name("quant_scale"), np_val=cast_scale).output[0] + zero_point = self.make_const(utils.set_name("zero_point"), zero_point_np).output[0] quant_node = self.make_node(op_type="QuantizeLinear", inputs=[q_node.input[0], scale, zero_point], @@ -1090,3 +1188,15 @@ def convert_qdq_nodes(self, q_node, dq_node): dtypes=[qdq_node_output_dtype], name=utils.set_name("DequantLinearNode")) self.set_shape(dequant_node.output[0], qdq_node_output_shape) + + def delete_qdq_nodes(self, q_node, dq_node): + """Delete tensorflow QuantizeV2/Dequantize in the onnx graph.""" + qdq_input = q_node.input[0] + qdq_output = dq_node.output[0] + + qdq_output_consumers = self.find_output_consumers(qdq_output) + for consumer in qdq_output_consumers: + self.replace_input(consumer, qdq_output, qdq_input) + + self.remove_node(dq_node.name) + self.remove_node(q_node.name) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py index c56fa57b7ae..2d3d2ff1c88 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py @@ -36,6 +36,9 @@ ONNX_UNKNOWN_DIMENSION = -1 +NCHW_TO_NHWC = [0, 2, 3, 1] +NHWC_TO_NCHW = [0, 3, 1, 2] + # Built-in supported domains ONNX_DOMAIN = "" diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py index b2cd7b283b3..3ca0a3d132d 100644 --- a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -19,6 +19,7 @@ import logging import tensorflow as tf +import numpy as np from onnx import helper from tensorflow.core.framework import tensor_pb2, node_def_pb2 @@ -30,9 +31,10 @@ logger = logging.getLogger("neural_compressor") + class TensorflowQDQToOnnxQDQConverter: """Convert tensorflow QDQ graph to ONNX QDQ graph.""" - def __init__(self, model, input_names, output_names, opset_version=utils.DEFAULT_OPSET_VERSION): + def __init__(self, model, input_names, output_names, inputs_as_nchw=None, opset_version=utils.DEFAULT_OPSET_VERSION): """Constructor, initilization. Args: @@ -51,6 +53,7 @@ def __init__(self, model, input_names, output_names, opset_version=utils.DEFAULT self.opset_version = opset_version self.input_names = input_names self.output_names = output_names + self.inputs_as_nchw = inputs_as_nchw def duplicate_tf_quantizev2_nodes(self, model): """Duplicate QuantizeV2 nodes if the Dequantize nodes share the same QuantizeV2.""" @@ -119,6 +122,30 @@ def tf_graph_optimize(self, model): model = self.duplicate_tf_quantizev2_nodes(model) return model + def transpose_inputs(self, ctx, inputs_as_nchw): + """Insert a transpose from NHWC to NCHW on model input on users request.""" + + ops = [] + for node in ctx.get_nodes(): + for _, output_name in enumerate(node.output): + if output_name in inputs_as_nchw: + shape = ctx.get_shape(output_name) + if len(shape) != len(utils.NCHW_TO_NHWC): + logger.warning("transpose_input for %s: shape must be rank 4, ignored" % output_name) + ops.append(node) + continue + # insert transpose + op_name = utils.set_name(node.name) + transpose = ctx.insert_new_node_on_output("Transpose", output_name, name=op_name) + transpose.set_attr("perm", utils.NCHW_TO_NHWC) + ctx.copy_shape(output_name, transpose.output[0]) + ctx.set_shape(output_name, np.array(shape)[utils.NHWC_TO_NCHW]) + ops.append(transpose) + ops.append(node) + continue + ops.append(node) + ctx.reset_nodes(ops) + @dump_elapsed_time("Pass TensorflowQDQToOnnxQDQConverter") def convert(self, save_path): """Convert tensorflow QDQ model to onnx QDQ model. @@ -140,6 +167,8 @@ def convert(self, save_path): shape = utils.get_tensorflow_tensor_shape(out) dtypes[out.name] = utils.map_tensorflow_dtype(out.dtype) output_shapes[out.name] = shape + if output_shapes[out.name] is None: + output_shapes[out.name] = [] # Convert the TF FP32 node to ONNX FP32 node for node in node_list: @@ -175,6 +204,8 @@ def convert(self, save_path): # Build ONNX Graph using onnx_nodes, output_shapes and dtypes onnx_graph = OnnxGraph(onnx_nodes, output_shapes, dtypes, input_names=self.input_names, output_names=self.output_names) + if self.inputs_as_nchw: + self.transpose_inputs(onnx_graph, self.inputs_as_nchw) # Convert TF QDQ pattern to ONNX QDQ format for node in onnx_graph.get_nodes(): @@ -184,11 +215,14 @@ def convert(self, save_path): if parent_node.type == 'QuantizeV2': onnx_graph.convert_qdq_nodes(parent_node, node) - rewriters = [ - t2o.rewriter.rewrite_biasadd_with_conv2d - ] + # rewriters = [ + # t2o.rewriter.rewrite_transpose, + # t2o.rewriter.rnn.rewrite_generic_loop, + # t2o.rewriter.cond_rewriter.rewrite_cond, + # ] + + # t2o.tfonnx.run_rewriters(onnx_graph, rewriters, False) - t2o.tfonnx.run_rewriters(onnx_graph, rewriters, False) # some nodes may already copied into inner Graph, so remove them from main Graph. onnx_graph.delete_unused_nodes(onnx_graph.outputs) diff --git a/neural_compressor/experimental/export/tf2onnx.py b/neural_compressor/experimental/export/tf2onnx.py index ea7e5ba7df2..3ffca50bdcf 100644 --- a/neural_compressor/experimental/export/tf2onnx.py +++ b/neural_compressor/experimental/export/tf2onnx.py @@ -18,6 +18,8 @@ from neural_compressor.utils import logger import tf2onnx as t2o +from neural_compressor.utils.utility import LazyImport + def tf_to_fp32_onnx( graph_def, @@ -52,7 +54,8 @@ def tf_to_int8_onnx( save_path, opset_version: int = 14, input_names=None, - output_names=None + output_names=None, + inputs_as_nchw=None ): """Export INT8 Tensorflow model into INT8 ONNX model. @@ -65,7 +68,7 @@ def tf_to_int8_onnx( """ from neural_compressor.adaptor.tf_utils.tf2onnx_converter import TensorflowQDQToOnnxQDQConverter TensorflowQDQToOnnxQDQConverter(int8_model, input_names, \ - output_names, opset_version).convert(save_path) + output_names, inputs_as_nchw, opset_version).convert(save_path) info = "The INT8 ONNX Model is exported to path: {0}".format(save_path) logger.info("*"*len(info)) diff --git a/neural_compressor/model/tensorflow_model.py b/neural_compressor/model/tensorflow_model.py index 32e59ad7907..ec888b05f98 100644 --- a/neural_compressor/model/tensorflow_model.py +++ b/neural_compressor/model/tensorflow_model.py @@ -858,16 +858,17 @@ def export(self, save_path, conf): "we reset opset_version={} here".format(conf.opset_version)) from neural_compressor.experimental.export import tf_to_fp32_onnx, tf_to_int8_onnx + inputs_as_nchw = conf.kwargs.get("inputs_as_nchw", None) if conf.dtype == 'int8': tf_to_int8_onnx( self.graph_def, save_path, opset_version=conf.opset_version, input_names=conf.input_names if conf.input_names else self.input_tensor_names, - output_names=conf.output_names if conf.output_names else self.output_tensor_names + output_names=conf.output_names if conf.output_names else self.output_tensor_names, + inputs_as_nchw=inputs_as_nchw ) elif conf.dtype == 'fp32': - inputs_as_nchw = conf.kwargs.get("inputs_as_nchw", None) tf_to_fp32_onnx( self.graph_def, save_path, From 2db77fffc67d3bc45b6321bc48fbe67935a7e916 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Mon, 16 Jan 2023 23:06:50 +0800 Subject: [PATCH 08/43] add mobilenet_v2 example Signed-off-by: Lv, Liang1 --- .../mobilenet_v2/fp32_export/README.md | 79 +++++++++++ .../tf2onnx/mobilenet_v2/fp32_export/main.py | 108 +++++++++++++++ .../mobilenet_v2/fp32_export/requirements.txt | 8 ++ .../mobilenet_v2/fp32_export/run_benchmark.sh | 42 ++++++ .../mobilenet_v2/fp32_export/run_export.sh | 35 +++++ .../mobilenet_v2/int8_export/README.md | 90 +++++++++++++ .../tf2onnx/mobilenet_v2/int8_export/main.py | 125 ++++++++++++++++++ .../mobilenet_v2/int8_export/requirements.txt | 9 ++ .../mobilenet_v2/int8_export/run_benchmark.sh | 42 ++++++ .../mobilenet_v2/int8_export/run_export.sh | 35 +++++ .../mobilenet_v2/int8_export/run_tuning.sh | 39 ++++++ .../graph_rewriter/onnx/onnx_graph.py | 8 +- .../adaptor/tf_utils/tf2onnx_converter.py | 21 ++- .../experimental/export/tf2onnx.py | 38 +++++- 14 files changed, 666 insertions(+), 13 deletions(-) create mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md create mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py create mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_export.sh create mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md create mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py create mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_export.sh create mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_tuning.sh diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md new file mode 100644 index 00000000000..77b884e7314 --- /dev/null +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md @@ -0,0 +1,79 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow Mobilenet_v2 FP32 model to ONNX FP32 model using Intel® Neural Compressor. + + +## Prerequisite + +### 1. Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### 2. Install requirements +```shell +pip install -r requirements.txt +``` + +### 3. Prepare Pretrained model + +The mobilenet_v2 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). +We can get the pb file by convert the checkpoint file. + + 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) + ```shell + wget https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz + tar -xvf mobilenet_v2_1.4_224.tgz + ``` + + 2. Exporting the Inference Graph + ```shell + git clone https://github.com/tensorflow/models + cd models/research/slim + python export_inference_graph.py \ + --alsologtostderr \ + --model_name=mobilenet_v2 \ + --output_file=/tmp/mobilenet_v2_inf_graph.pb + ``` + Make sure to use intel-tensorflow v1.15, and pip install tf_slim. + #### Install Intel Tensorflow 1.15 up2 + Check your python version and use pip install 1.15.0 up2 from links below: + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl + > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/mobilenet_v2_inf_graph.pb \ + --input_checkpoint=./mobilenet_v2.ckpt \ + --input_binary=true \ + --output_graph=./frozen_mobilenet_v2.pb \ + --output_node_names=MobilenetV2/Predictions/Reshape_1 + ``` + + +### 4. Prepare Dataset + +Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: + +```bash +ls /path/to/ImageNet +ILSVRC2012_img_val val.txt +``` + +## Run Command + +### Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2.onnx +``` + +### Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 +bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 +``` diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py new file mode 100644 index 00000000000..09fb192f1ba --- /dev/null +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py @@ -0,0 +1,108 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import os +import onnxruntime as ort + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func(model, dataloader, metric, postprocess): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + output, label = postprocess((output, label)) + metric.update(output, label) + return metric.result() + + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include export and benchmark option.""" + if self.args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + config = TF2ONNXConfig(dtype="fp32", input_names='input[-1,224,224,3]', + inputs_as_nchw="input:0") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + model = onnx.load(self.args.input_graph) + data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') + label_path = os.path.join(self.args.dataset_location, 'val.txt') + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, + 'transform': {'Rescale': {}, + 'Resize': {'size':256}, + 'CenterCrop': {'size': 224}, + 'Normalize': {'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225]}, + 'Cast': {'dtype': 'float32'}, + 'Transpose': {'perm': [2, 0, 1]}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import GeneralTopK + top1 = GeneralTopK(k=1) + from neural_compressor.data.transforms.imagenet_transform import LabelShift + postprocess = LabelShift(label_shift=-1) + def eval(onnx_model): + return eval_func(onnx_model, dataloader, top1, postprocess) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt new file mode 100644 index 00000000000..8d1eeb068a8 --- /dev/null +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt @@ -0,0 +1,8 @@ +tf2onnx==1.13.0 +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md new file mode 100644 index 00000000000..26c12d4eae2 --- /dev/null +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md @@ -0,0 +1,90 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. + + +## Prerequisite + +### 1. Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### 2. Install requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. +```shell +pip install -r requirements.txt +``` + +### 3. Prepare Pretrained model + +The mobilenet_v2 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). +We can get the pb file by convert the checkpoint file. + + 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) + ```shell + wget https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz + tar -xvf mobilenet_v2_1.4_224.tgz + ``` + + 2. Exporting the Inference Graph + ```shell + git clone https://github.com/tensorflow/models + cd models/research/slim + python export_inference_graph.py \ + --alsologtostderr \ + --model_name=mobilenet_v2 \ + --output_file=/tmp/mobilenet_v2_inf_graph.pb + ``` + Make sure to use intel-tensorflow v1.15, and pip install tf_slim. + #### Install Intel Tensorflow 1.15 up2 + Check your python version and use pip install 1.15.0 up2 from links below: + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl + > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/mobilenet_v2_inf_graph.pb \ + --input_checkpoint=./mobilenet_v2.ckpt \ + --input_binary=true \ + --output_graph=./frozen_mobilenet_v2.pb \ + --output_node_names=MobilenetV2/Predictions/Reshape_1 + ``` + +### 4. Prepare Dataset + +Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: + +```bash +ls /path/to/ImageNet +ILSVRC2012_img_val val.txt +``` +The Raw image dataset is used for running benchmarking for ONNX model. + +TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. The TF records format dataset is used for quantizing Tensorflow FP32 model to Tensorflow INT8 QDQ model. + +## Run Command + +### Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model +```shell +bash run_tuning.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2_int8.pb --dataset_location=/path/to/imagenet/ +``` +Please note this dataset is TF records format. + +### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./mobilenet_v2_int8.pb --output_model=./mobilenet_v2_int8.onnx +``` + +### Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 +bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 +``` +Please note this dataset is Raw image dataset. diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py new file mode 100644 index 00000000000..d295f934349 --- /dev/null +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py @@ -0,0 +1,125 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import os +import onnxruntime as ort + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func(model, dataloader, metric, postprocess): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + output, label = postprocess((output, label)) + metric.update(output, label) + return metric.result() + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': 10, + 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, + 'transform': {'BilinearImagenet': + {'height': 224, 'width': 224}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100]) + q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=dataloader, + eval_dataloader=dataloader) + q_model.save(self.args.output_graph) + + if self.args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + config = TF2ONNXConfig(dtype="int8", input_names='input[-1,224,224,3]', + inputs_as_nchw="input:0") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + model = onnx.load(self.args.input_graph) + data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') + label_path = os.path.join(self.args.dataset_location, 'val.txt') + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, + 'transform': {'Rescale': {}, + 'Resize': {'size':256}, + 'CenterCrop': {'size': 224}, + 'Normalize': {'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225]}, + 'Cast': {'dtype': 'float32'}, + 'Transpose': {'perm': [2, 0, 1]}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import GeneralTopK + top1 = GeneralTopK(k=1) + from neural_compressor.data.transforms.imagenet_transform import LabelShift + postprocess = LabelShift(label_shift=-1) + def eval(onnx_model): + return eval_func(onnx_model, dataloader, top1, postprocess) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt new file mode 100644 index 00000000000..9bdc24cb87b --- /dev/null +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt @@ -0,0 +1,9 @@ +tensorflow=2.10.0 +intel-extension-for-tensorflow[cpu] +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_tuning.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_tuning.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py index f6410b02557..16728a3db84 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py @@ -74,13 +74,7 @@ def __init__(self, nodes, output_shapes=None, dtypes=None, target=None, opset=No self.set_config(target, opset, extra_opset) self.inputs = [] - self.outputs = [] - for output_name in output_names: - self.outputs.append(output_name +':0') - input_names = None - if input_names: - input_ops_names = [input_name + ':0' for input_name in input_names] - input_names = input_ops_names + self.outputs = output_names if output_names is not None else [] self.parent_graph = None self.contained_graphs = {} # {node_name: {node_attribute_name: Graph}} diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py index 3ca0a3d132d..eabe02b2974 100644 --- a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -34,7 +34,7 @@ class TensorflowQDQToOnnxQDQConverter: """Convert tensorflow QDQ graph to ONNX QDQ graph.""" - def __init__(self, model, input_names, output_names, inputs_as_nchw=None, opset_version=utils.DEFAULT_OPSET_VERSION): + def __init__(self, model, input_names, output_names, shape_override, inputs_as_nchw=None, opset_version=utils.DEFAULT_OPSET_VERSION): """Constructor, initilization. Args: @@ -53,8 +53,20 @@ def __init__(self, model, input_names, output_names, inputs_as_nchw=None, opset_ self.opset_version = opset_version self.input_names = input_names self.output_names = output_names + self.shape_override = shape_override self.inputs_as_nchw = inputs_as_nchw + if self.shape_override: + logger.info("Apply shape override:") + for name, shape in self.shape_override.items(): + logger.info("\tSet %s shape to %s", name, shape) + self.graph.get_tensor_by_name(name).set_shape(shape) + graph_def = self.graph.as_graph_def(add_shapes=True) + with tf.Graph().as_default() as inferred_graph: + tf.import_graph_def(graph_def, name="") + self.graph = inferred_graph + + def duplicate_tf_quantizev2_nodes(self, model): """Duplicate QuantizeV2 nodes if the Dequantize nodes share the same QuantizeV2.""" cur_graph = GraphAnalyzer() @@ -164,7 +176,12 @@ def convert(self, save_path): # create dict with output to shape mappings for node in node_list: for out in node.outputs: - shape = utils.get_tensorflow_tensor_shape(out) + shape = None + if self.shape_override: + shape = self.shape_override.get(out.name) + if shape is None: + shape = utils.get_tensorflow_tensor_shape(out) + dtypes[out.name] = utils.map_tensorflow_dtype(out.dtype) output_shapes[out.name] = shape if output_shapes[out.name] is None: diff --git a/neural_compressor/experimental/export/tf2onnx.py b/neural_compressor/experimental/export/tf2onnx.py index 3ffca50bdcf..ae59356bb78 100644 --- a/neural_compressor/experimental/export/tf2onnx.py +++ b/neural_compressor/experimental/export/tf2onnx.py @@ -18,9 +18,29 @@ from neural_compressor.utils import logger import tf2onnx as t2o -from neural_compressor.utils.utility import LazyImport +import re +def _split_nodename_and_shape(name): + """input name with shape into name and shape.""" + # pattern for a node name + inputs = [] + shapes = {} + # input takes in most cases the format name:0, where 0 is the output number + # in some cases placeholders don't have a rank which onnx can't handle so we let uses override the shape + # by appending the same, ie : [1,28,28,3] + name_pattern = r"(?:([\w\d/\-\._:]+)(\[[\-\d,]+\])?),?" + splits = re.split(name_pattern, name) + for i in range(1, len(splits), 3): + inputs.append(splits[i]+':0') + if splits[i + 1] is not None: + shape = [int(n) for n in splits[i + 1][1:-1].split(",")] + shape = [n if n >= 0 else None for n in shape] + shapes[splits[i]+':0'] = shape + if not shapes: + shapes = None + return inputs, shapes + def tf_to_fp32_onnx( graph_def, save_path, @@ -38,11 +58,15 @@ def tf_to_fp32_onnx( input_names (list, optional): input names. Defaults to None. output_names (list, optional): output names. Defaults to None. """ - input_names[:] = [i+":0" for i in input_names] + shape_override = None + if isinstance(input_names, str): + input_names, shape_override = _split_nodename_and_shape(input_names) + else: + input_names[:] = [o+":0" for o in input_names] output_names[:] = [o+":0" for o in output_names] t2o.convert.from_graph_def(graph_def=graph_def, input_names=input_names, output_names=output_names, inputs_as_nchw=inputs_as_nchw, - opset=opset_version, output_path=save_path) + shape_override=shape_override, opset=opset_version, output_path=save_path) info = "The FP32 ONNX Model exported to path: {0}".format(save_path) logger.info("*"*len(info)) logger.info(info) @@ -66,9 +90,15 @@ def tf_to_int8_onnx( input_names (list, optional): input names. Defaults to None. output_names (list, optional): output names. Defaults to None. """ + shape_override = None + if isinstance(input_names, str): + input_names, shape_override = _split_nodename_and_shape(input_names) + else: + input_names[:] = [o+":0" for o in input_names] + output_names[:] = [o+":0" for o in output_names] from neural_compressor.adaptor.tf_utils.tf2onnx_converter import TensorflowQDQToOnnxQDQConverter TensorflowQDQToOnnxQDQConverter(int8_model, input_names, \ - output_names, inputs_as_nchw, opset_version).convert(save_path) + output_names, shape_override, inputs_as_nchw, opset_version).convert(save_path) info = "The INT8 ONNX Model is exported to path: {0}".format(save_path) logger.info("*"*len(info)) From e6f4598196f40b01f9a3024d007098107ab08915 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Tue, 31 Jan 2023 17:26:43 +0800 Subject: [PATCH 09/43] Enable ssd_mobilenet_v1 onnx int8 export Signed-off-by: Lv, Liang1 --- .../ssd_mobilenet_v1/int8_export/README.md | 47 +++++ .../ssd_mobilenet_v1/int8_export/main.py | 161 ++++++++++++++++++ .../int8_export/requirements.txt | 9 + .../int8_export/run_benchmark.sh | 42 +++++ .../int8_export/run_export.sh | 35 ++++ .../int8_export/run_tuning.sh | 39 +++++ .../graph_rewriter/onnx/tf2onnx_utils.py | 146 ++++++++++++++++ .../adaptor/tf_utils/tf2onnx_converter.py | 64 +++++-- 8 files changed, 531 insertions(+), 12 deletions(-) create mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md create mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py create mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_export.sh create mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_tuning.sh diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md new file mode 100644 index 00000000000..66fffb0d400 --- /dev/null +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md @@ -0,0 +1,47 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. + + +## Prerequisite + +### 1. Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### 2. Install requirements +```shell +pip install -r requirements.txt +``` + +### 3. Prepare Pretrained model + +```bash +export MODEL=ssd_mobilenet_v1_coco_2018_01_28 +wget http://download.tensorflow.org/models/object_detection/$MODEL.tar.gz +tar -xvf $MODEL.tar.gz +``` + +### 4. Prepare Dataset + +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). + +## Run Command + +### Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model +```shell +bash run_tuning.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --dataset_location=/path/to/coco_dataset/ +``` + +### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --output_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx +``` + +### Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 +``` diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py new file mode 100644 index 00000000000..069152e9d02 --- /dev/null +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py @@ -0,0 +1,161 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import os +import onnxruntime as ort +import numpy as np + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func(model, dataloader, metric, postprocess=None): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + # input_names = [i.name for i in sess.get_inputs()] + # for input_data, label in dataloader: + # output = sess.run(None, dict(zip(input_names, [input_data]))) + # metric.update(output, label) + # return metric.result() + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for inputs, labels in dataloader: + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), \ + 'number of input tensors must align with graph inputs' + + if isinstance(inputs, dict): # pragma: no cover + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + # in case dataloader contains non-array input + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + + predictions = session.run(None, ort_inputs) + + if postprocess is not None: + predictions, labels = postprocess((predictions, labels)) + + if not hasattr(metric, "compare_label") or \ + (hasattr(metric, "compare_label") and metric.compare_label): + metric.update(predictions, labels) + acc = metric.result() + return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion + from neural_compressor.utils.create_obj_from_config import create_dataloader + calib_dataloader_args = { + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': None, + 'filter': None + } + calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) + eval_dataloader_args = { + 'batch_size': 10, + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 300}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[10, 50, 100, 200], inputs=['image_tensor'], + outputs=['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes'], + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.3)) + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, eval_metric=mAP2) + q_model.save(self.args.output_graph) + + if self.args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + inc_model.input_tensor_names = ["image_tensor"] + inc_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + config = TF2ONNXConfig(dtype="int8") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + model = onnx.load(self.args.input_graph) + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 300}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + def eval(onnx_model): + return eval_func(onnx_model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt new file mode 100644 index 00000000000..9bdc24cb87b --- /dev/null +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt @@ -0,0 +1,9 @@ +tensorflow=2.10.0 +intel-extension-for-tensorflow[cpu] +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_tuning.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_tuning.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py index 2d3d2ff1c88..9cb3e8bbd95 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py @@ -148,6 +148,10 @@ def map_numpy_to_onnx_dtype(np_dtype): return onnx_dtype raise ValueError("unsupported numpy dtype '%s' for mapping to onnx" % np_dtype) +def map_onnx_to_numpy_type(onnx_type): + """Map ONNX dtype to numpy dtype.""" + return ONNX_TO_NUMPY_DTYPE[onnx_type] + def add_port_to_name(name, nr=0): """Map node output number to name.""" return name + ":" + str(nr) @@ -413,3 +417,145 @@ def avoid_name(name): avoid_name(n.name) for out in n.output: avoid_name(out) + +def get_index_from_strided_slice_of_shape(node, outputs_to_values): + """Returns the index of the dimension that the strided slice is reading from the shape node or None.""" + attr_vals = { + 'shrink_axis_mask': 1, + 'ellipsis_mask': 0, + 'begin_mask': 0, + 'new_axis_mask': 0, + 'end_mask': 0 + } + for a in node.node_def.attr: + if a in attr_vals: + i = get_tensorflow_node_attr(node, a) + if i != attr_vals[a]: + return None + i1 = outputs_to_values.get(node.inputs[1].name) + i2 = outputs_to_values.get(node.inputs[2].name) + i3 = outputs_to_values.get(node.inputs[3].name) + if i1 is None or i2 is None or i3 is None: + return None + if i1.shape != (1,) or i2.shape != (1,) or i3.shape != (1,): + return None + i1, i2, i3 = i1[0], i2[0], i3[0] + if i1 + 1 != i2 or i3 != 1: + return None + return i1 + +def compute_const_folding_using_tf(g, const_node_values, graph_outputs): + """Find nodes with constant inputs and compute their values using TF.""" + if const_node_values is None: + const_node_values = {} + graph_outputs = set(graph_outputs) + from tf2onnx.tf_loader import tf_session, tf_placeholder + + ops = g.get_operations() + outputs_to_values = {} + outputs_to_dtypes = {} + outputs_to_shapes = {} + shape_node_outputs = {} + + def is_small_shape(x): + return np.product(x) <= 1000 + + def is_huge_shape(x): + return np.product(x) >= 1000000 + + for node in ops: + # Load values of constants. Use const_node_values if possible + if node.type in ["Const", "ConstV2"]: + tensor = node.node_def.attr["value"].tensor + if node.name in const_node_values: + tensor.tensor_content = const_node_values[node.name] + outputs_to_values[node.outputs[0].name] = get_tensorflow_tensor_data(tensor) + outputs_to_dtypes[node.outputs[0].name] = node.outputs[0].dtype + for out in node.outputs: + outputs_to_shapes[out.name] = get_tensorflow_tensor_shape(out) + + for node in ops: + if node.type == "Shape": + shape = outputs_to_shapes.get(node.inputs[0].name) + if shape is not None: + shape_node_outputs[node.outputs[0].name] = shape + + unneeded_outputs = set() + progress = True + while progress: + progress = False + for node in ops: + # Find ops with constant inputs and compute their values + input_names = [i.name for i in node.inputs] + output_names = [i.name for i in node.outputs] + if node.type == 'StridedSlice' and input_names[0] in shape_node_outputs \ + and output_names[0] not in outputs_to_values: + shape = shape_node_outputs[input_names[0]] + i = get_index_from_strided_slice_of_shape(node, outputs_to_values) + if i is not None and 0 <= i < len(shape) and shape[i] is not None: + np_dtype = map_onnx_to_numpy_type(map_tensorflow_dtype(node.outputs[0].dtype)) + outputs_to_values[output_names[0]] = np.array(shape[i], dtype=np_dtype) + outputs_to_dtypes[node.outputs[0].name] = node.outputs[0].dtype + progress = True + can_fold = node.type not in ['Enter', 'Placeholder', 'PlaceholderWithDefault', 'Switch', 'Merge', + 'NextIteration', 'Exit', 'QuantizeAndDequantizeV2', 'QuantizeAndDequantizeV3', + 'QuantizeAndDequantizeV4'] + can_fold = can_fold and not node.type.startswith('Random') + can_fold = can_fold and len(input_names) > 0 and all(inp in outputs_to_values for inp in input_names) + # We can only fold nodes with a single output + can_fold = can_fold and len(output_names) == 1 and output_names[0] not in outputs_to_values + # Skip if value already computed, used, and discarded + can_fold = can_fold and output_names[0] not in unneeded_outputs and output_names[0] not in graph_outputs + if can_fold: + # Make a mini graph containing just the node to fold + g2 = tf.Graph() + with g2.as_default(): + for inp in input_names: + tf_placeholder(outputs_to_dtypes[inp], name=inp.split(':')[0]) + mini_graph_def = g2.as_graph_def() + mini_graph_def.node.append(node.node_def) + g3 = tf.Graph() + with g3.as_default(): + feed_dict = {} + inp_shapes = [] + for inp in input_names: + inp_np = outputs_to_values[inp] + feed_dict[inp] = inp_np + inp_shapes.append(inp_np.shape) + try: + with tf_session() as sess: + tf.import_graph_def(mini_graph_def, name='') + results = sess.run(output_names, feed_dict=feed_dict) + if is_huge_shape(results[0].shape) and all(is_small_shape(inp) for inp in inp_shapes): + logger.debug("Skipping folding of node %s since result shape %s is much larger " + "than input shapes %s", node.name, results[0].shape, inp_shapes) + else: + outputs_to_values[output_names[0]] = results[0] + outputs_to_dtypes[output_names[0]] = node.outputs[0].dtype + progress = True + except Exception: # pylint: disable=broad-except + logger.debug("Could not fold node %s", node.name) + unneeded_outputs.update(outputs_to_values.keys()) + for node in ops: + # Mark values we need to keep + input_names = [i.name for i in node.inputs] + output_names = [i.name for i in node.outputs] + if len(output_names) == 1 and output_names[0] in outputs_to_values: + continue + for i in input_names: + if i in unneeded_outputs: + unneeded_outputs.remove(i) + for node in unneeded_outputs: + # Remove unneeded values to prevent memory usage explosion + if node in outputs_to_values: + del outputs_to_values[node] + del outputs_to_dtypes[node] + + for node in ops: + # We don't need the constants any more + if node.type in ["Const", "ConstV2"] and node.outputs[0].name in outputs_to_values: + del outputs_to_values[node.outputs[0].name] + del outputs_to_dtypes[node.outputs[0].name] + + logger.info("Computed %d values for constant folding", len(outputs_to_values)) + return outputs_to_values, outputs_to_dtypes diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py index eabe02b2974..a9210e3fce3 100644 --- a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -21,6 +21,7 @@ import tensorflow as tf import numpy as np from onnx import helper +from packaging.version import Version from tensorflow.core.framework import tensor_pb2, node_def_pb2 from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer @@ -171,9 +172,21 @@ def convert(self, save_path): functions = {} logger.info("Using ONNX opset %s", self.opset_version) + self.graph = t2o.shape_inference.infer_shape_for_graph(self.graph) + + op_outputs_with_none_shape = t2o.shape_inference.check_shape_for_tf_graph(self.graph) + if op_outputs_with_none_shape: + if Version(tf.__version__) > Version("1.5.0"): + for op, outs in op_outputs_with_none_shape.items(): + logger.warning("Cannot infer shape for %s: %s", op, ",".join(outs)) + self.graph = t2o.shape_inference.infer_shape_for_graph_legacy(self.graph) + node_list = self.graph.get_operations() - # create dict with output to shape mappings + outputs_to_values, _ = utils.compute_const_folding_using_tf( + self.graph, None, self.output_names) + + # Create dict with output to shape mappings for node in node_list: for out in node.outputs: shape = None @@ -221,6 +234,8 @@ def convert(self, save_path): # Build ONNX Graph using onnx_nodes, output_shapes and dtypes onnx_graph = OnnxGraph(onnx_nodes, output_shapes, dtypes, input_names=self.input_names, output_names=self.output_names) + t2o.tfonnx.fold_constants_using_tf(onnx_graph, outputs_to_values) + if self.inputs_as_nchw: self.transpose_inputs(onnx_graph, self.inputs_as_nchw) @@ -232,21 +247,46 @@ def convert(self, save_path): if parent_node.type == 'QuantizeV2': onnx_graph.convert_qdq_nodes(parent_node, node) - # rewriters = [ - # t2o.rewriter.rewrite_transpose, - # t2o.rewriter.rnn.rewrite_generic_loop, - # t2o.rewriter.cond_rewriter.rewrite_cond, - # ] - - # t2o.tfonnx.run_rewriters(onnx_graph, rewriters, False) - + # Create ops mapping for the desired opsets + ops_mapping = t2o.handler.tf_op.create_mapping(onnx_graph.opset, onnx_graph.extra_opset) - # some nodes may already copied into inner Graph, so remove them from main Graph. + # Run tf2onnx rewriters + rewriters = [ + # single directional + t2o.tfonnx.rewrite_constant_fold, + t2o.rewriter.rewrite_transpose, + t2o.rewriter.rewrite_flatten, + t2o.rewriter.rewrite_random_uniform, + t2o.rewriter.rewrite_random_uniform_fold_const, + t2o.rewriter.rewrite_random_normal, + t2o.rewriter.rewrite_dropout, + t2o.rewriter.rewrite_conv_dilations, + t2o.rewriter.rewrite_eye, + t2o.rewriter.rewrite_leakyrelu, + t2o.rewriter.rewrite_thresholded_relu, + t2o.rewriter.rewrite_conv2d_with_pad, + t2o.rewriter.rewriter_lstm_tf2, + t2o.rewriter.rewrite_gru_tf2, + t2o.rewriter.rewrite_single_direction_lstm, + # bi-directional + t2o.rewriter.rewrite_bi_direction_lstm, + t2o.rewriter.rewrite_single_direction_gru, + t2o.rewriter.rewrite_bi_direction_gru, + t2o.rewriter.rewrite_custom_rnn_cell, + t2o.rewriter.rewrite_generic_loop, + t2o.rewriter.rewrite_cond, + t2o.rewriter.rewrite_biasadd_with_conv2d, + t2o.rewriter.rewrite_layer_normalization, + t2o.rewriter.rewrite_gemm, + t2o.rewriter.rewrite_ragged_variant_shape, + ] + + t2o.tfonnx.run_rewriters(onnx_graph, rewriters, False) + + # Some nodes may already copied into inner Graph, so remove them from main Graph. onnx_graph.delete_unused_nodes(onnx_graph.outputs) t2o.tfonnx.topological_sort(onnx_graph, False) - # create ops mapping for the desired opsets - ops_mapping = t2o.handler.tf_op.create_mapping(onnx_graph.opset, onnx_graph.extra_opset) mapped_op, unmapped_op, exceptions = \ t2o.tfonnx.tensorflow_onnx_mapping(onnx_graph, ops_mapping) if unmapped_op: From 08c05dde35d62347a9e2a84e1a2c7f5905f685ab Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Tue, 31 Jan 2023 22:48:58 +0800 Subject: [PATCH 10/43] fix accuracy issue Signed-off-by: Lv, Liang1 --- neural_compressor/adaptor/tf_utils/tf2onnx_converter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py index a9210e3fce3..248cc30aa9d 100644 --- a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -275,7 +275,8 @@ def convert(self, save_path): t2o.rewriter.rewrite_custom_rnn_cell, t2o.rewriter.rewrite_generic_loop, t2o.rewriter.rewrite_cond, - t2o.rewriter.rewrite_biasadd_with_conv2d, + # rewrite_biasadd_with_conv2d introduces accuracy issue + #t2o.rewriter.rewrite_biasadd_with_conv2d, t2o.rewriter.rewrite_layer_normalization, t2o.rewriter.rewrite_gemm, t2o.rewriter.rewrite_ragged_variant_shape, From 7727e83515e9d2d2d60639708232f2fba46ed812 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Wed, 1 Feb 2023 14:17:40 +0800 Subject: [PATCH 11/43] add support tf benchmark for resnet50v1.5 Signed-off-by: Lv, Liang1 --- .../resnet50v1.5/fp32_export/README.md | 8 ++ .../tf2onnx/resnet50v1.5/fp32_export/main.py | 105 ++++++++++++----- .../resnet50v1.5/int8_export/README.md | 7 ++ .../tf2onnx/resnet50v1.5/int8_export/main.py | 110 ++++++++++++------ 4 files changed, 166 insertions(+), 64 deletions(-) diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md index e1e91116b11..7dc0335868e 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md @@ -38,8 +38,16 @@ ILSVRC2012_img_val val.txt bash run_export.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1.onnx ``` +### Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` +Please note this dataset is TF records format. + ### Run benchmark for ONNX FP32 model ```shell bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 ``` +Please note this dataset is Raw image dataset. diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py index 340132e48d2..bc33067cba1 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py @@ -21,11 +21,12 @@ import onnx import os import onnxruntime as ort +import numpy as np tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func(model, dataloader, metric, postprocess): +def eval_func_onnx(model, dataloader, metric, postprocess): metric.reset() sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) input_names = [i.name for i in sess.get_inputs()] @@ -35,6 +36,21 @@ def eval_func(model, dataloader, metric, postprocess): metric.update(output[1], label) return metric.result() +def eval_func_tf(model, dataloader, metric, postprocess=None): + from neural_compressor.model import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + metric.update(predictions[1], labels) + acc = metric.result() + return acc class eval_classifier_optimized_graph: """Evaluate image classifier with optimized TensorFlow graph.""" @@ -66,39 +82,66 @@ def run(self): inc_model.export(self.args.output_graph, config) if self.args.benchmark: - model = onnx.load(self.args.input_graph) - data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') - label_path = os.path.join(self.args.dataset_location, 'val.txt') + # ONNX FP32 Benchmark + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') + label_path = os.path.join(self.args.dataset_location, 'val.txt') + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, + 'transform': {'ResizeWithAspectRatio': {'height': 224, 'width': 224}, + 'CenterCrop': {'size': 224}, + 'Normalize': {'mean': [123.68, 116.78, 103.94]}, + 'Cast': {'dtype': 'float32'}, + 'Transpose': {'perm': [2, 0, 1]}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, - 'transform': {'ResizeWithAspectRatio': {'height': 224, 'width': 224}, - 'CenterCrop': {'size': 224}, - 'Normalize': {'mean': [123.68, 116.78, 103.94]}, - 'Cast': {'dtype': 'float32'}, - 'Transpose': {'perm': [2, 0, 1]}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + from neural_compressor.metric import GeneralTopK + top1 = GeneralTopK(k=1) + from neural_compressor.data.transforms.imagenet_transform import LabelShift + postprocess = LabelShift(label_shift=-1) + def eval(onnx_model): + return eval_func_onnx(onnx_model, dataloader, top1, postprocess) - from neural_compressor.metric import GeneralTopK - top1 = GeneralTopK(k=1) - from neural_compressor.data.transforms.imagenet_transform import LabelShift - postprocess = LabelShift(label_shift=-1) - def eval(onnx_model): - return eval_func(onnx_model, dataloader, top1, postprocess) + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + # Tensorflow FP32 Benchmark + else: + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': {'height': 224, 'width': 224, + 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + def eval(model): + return eval_func_tf(model, dataloader, top1) - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(self.args.input_graph, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(self.args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md index 7d82ca4fa0f..7e851f1a5dc 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md @@ -44,6 +44,13 @@ bash run_tuning.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1_i ``` Please note this dataset is TF records format. +### Run benchmark for Tensorflow INT8 model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` +Please note this dataset is TF records format. + ### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model ```shell bash run_export.sh --input_model=./resnet50_v1_int8.pb --output_model=./resnet50_v1_int8.onnx diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py index 8f93af6fdfc..6ccb0dd7066 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py @@ -21,11 +21,12 @@ import onnx import os import onnxruntime as ort +import numpy as np tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func(model, dataloader, metric, postprocess): +def eval_func_onnx(model, dataloader, metric, postprocess): metric.reset() sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) input_names = [i.name for i in sess.get_inputs()] @@ -35,6 +36,22 @@ def eval_func(model, dataloader, metric, postprocess): metric.update(output, label) return metric.result() +def eval_func_tf(model, dataloader, metric, postprocess=None): + from neural_compressor.model import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + metric.update(predictions, labels) + acc = metric.result() + return acc + class eval_classifier_optimized_graph: """Evaluate image classifier with optimized TensorFlow graph.""" @@ -103,39 +120,66 @@ def run(self): inc_model.export(self.args.output_graph, config) if self.args.benchmark: - model = onnx.load(self.args.input_graph) - data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') - label_path = os.path.join(self.args.dataset_location, 'val.txt') + # ONNX INT8 Benchmark + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') + label_path = os.path.join(self.args.dataset_location, 'val.txt') - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, - 'transform': {'ResizeWithAspectRatio': {'height': 224, 'width': 224}, - 'CenterCrop': {'size': 224}, - 'Normalize': {'mean': [123.68, 116.78, 103.94]}, - 'Cast': {'dtype': 'float32'}, - 'Transpose': {'perm': [2, 0, 1]}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - - from neural_compressor.metric import GeneralTopK - top1 = GeneralTopK(k=1) - from neural_compressor.data.transforms.imagenet_transform import LabelShift - postprocess = LabelShift(label_shift=-1) - def eval(onnx_model): - return eval_func(onnx_model, dataloader, top1, postprocess) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, + 'transform': {'ResizeWithAspectRatio': {'height': 224, 'width': 224}, + 'CenterCrop': {'size': 224}, + 'Normalize': {'mean': [123.68, 116.78, 103.94]}, + 'Cast': {'dtype': 'float32'}, + 'Transpose': {'perm': [2, 0, 1]}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import GeneralTopK + top1 = GeneralTopK(k=1) + from neural_compressor.data.transforms.imagenet_transform import LabelShift + postprocess = LabelShift(label_shift=-1) + def eval(onnx_model): + return eval_func_onnx(onnx_model, dataloader, top1, postprocess) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + # Tensorflow INT8 Benchmark + else: + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': {'height': 224, 'width': 224, + 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + def eval(model): + return eval_func_tf(model, dataloader, top1) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(self.args.input_graph, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(self.args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() From 3319941bb75f4a35b4e82dd6310a8c9f50416cad Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Wed, 1 Feb 2023 14:52:23 +0800 Subject: [PATCH 12/43] add support tf benchmark for ssd_mobilenet_v1 Signed-off-by: Lv, Liang1 --- .../ssd_mobilenet_v1/fp32_export/README.md | 8 ++ .../ssd_mobilenet_v1/fp32_export/main.py | 115 +++++++++++++----- .../ssd_mobilenet_v1/int8_export/README.md | 12 +- .../ssd_mobilenet_v1/int8_export/main.py | 110 +++++++++++++---- 4 files changed, 183 insertions(+), 62 deletions(-) diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md index b47df35f25d..dd0badc5288 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md @@ -37,8 +37,16 @@ Download CoCo Dataset from [Official Website](https://cocodataset.org/#download) bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx ``` +### Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 +``` +Please note this dataset is TF records format. + ### Run benchmark for ONNX FP32 model ```shell bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` +Please note this dataset is Raw Coco dataset. diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py index 1a1582c6ff8..82b8cc599f1 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py @@ -19,21 +19,15 @@ from argparse import ArgumentParser import tensorflow as tf import onnx -import os import onnxruntime as ort import numpy as np tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func(model, dataloader, metric, postprocess=None): +def eval_func_onnx(model, dataloader, metric, postprocess=None): metric.reset() session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - # input_names = [i.name for i in sess.get_inputs()] - # for input_data, label in dataloader: - # output = sess.run(None, dict(zip(input_names, [input_data]))) - # metric.update(output, label) - # return metric.result() ort_inputs = {} len_inputs = len(session.get_inputs()) inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] @@ -69,6 +63,29 @@ def eval_func(model, dataloader, metric, postprocess=None): acc = metric.result() return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] +def eval_func_tf(model, dataloader, metric, postprocess=None): + metric.reset() + + from neural_compressor.model import Model + if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + metric.update(predictions, labels) + + acc = metric.result() + return acc + class eval_classifier_optimized_graph: """Evaluate image classifier with optimized TensorFlow graph.""" @@ -101,32 +118,64 @@ def run(self): inc_model.export(self.args.output_graph, config) if self.args.benchmark: - model = onnx.load(self.args.input_graph) - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 300}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - def eval(onnx_model): - return eval_func(onnx_model, dataloader, mAP2) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) + # ONNX FP32 Benchmark + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 300}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + def eval(onnx_model): + return eval_func_onnx(onnx_model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + # Tensorflow FP32 Benchmark + else: + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 300}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + def eval(model): + return eval_func_tf(model, dataloader, mAP2) + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig( + inputs=["image_tensor"], + outputs=["num_detections", "detection_boxes", "detection_scores", "detection_classes"], + warmup=10, + iteration=100, + cores_per_instance=4, + num_of_instance=7) + fit(self.args.input_graph, conf, b_dataloader=dataloader) + else: + accuracy = eval(self.args.input_graph) + print('Batch size = %d' % self.args.batch_size) + print("Accuracy: %.5f" % accuracy) if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md index 66fffb0d400..add10df208c 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md @@ -35,6 +35,13 @@ Download CoCo Dataset from [Official Website](https://cocodataset.org/#download) bash run_tuning.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --dataset_location=/path/to/coco_dataset/ ``` +### Run benchmark for Tensorflow INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 +``` +Please note this dataset is TF records format. + ### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model ```shell bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --output_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx @@ -42,6 +49,7 @@ bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --ou ### Run benchmark for ONNX INT8 QDQ model ```shell -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset_raw/ --batch_size=16 +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset_raw/ --batch_size=16 ``` +Please note this dataset is Raw Coco dataset. diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py index 069152e9d02..5714b8d6d9b 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py @@ -26,7 +26,7 @@ tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func(model, dataloader, metric, postprocess=None): +def eval_func_onnx(model, dataloader, metric, postprocess=None): metric.reset() session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) # input_names = [i.name for i in sess.get_inputs()] @@ -69,6 +69,29 @@ def eval_func(model, dataloader, metric, postprocess=None): acc = metric.result() return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] +def eval_func_tf(model, dataloader, metric, postprocess=None): + metric.reset() + + from neural_compressor.model import Model + if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + metric.update(predictions, labels) + + acc = metric.result() + return acc + class eval_classifier_optimized_graph: """Evaluate image classifier with optimized TensorFlow graph.""" @@ -129,32 +152,65 @@ def run(self): inc_model.export(self.args.output_graph, config) if self.args.benchmark: - model = onnx.load(self.args.input_graph) - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 300}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - def eval(onnx_model): - return eval_func(onnx_model, dataloader, mAP2) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) + # ONNX INT8 Benchmark + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 300}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + def eval(onnx_model): + return eval_func_onnx(onnx_model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + # Tensorflow INT8 Benchmark + else: + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 300}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + def eval(model): + return eval_func_tf(model, dataloader, mAP2) + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig( + inputs=["image_tensor"], + outputs=["num_detections", "detection_boxes", "detection_scores", "detection_classes"], + warmup=10, + iteration=100, + cores_per_instance=4, + num_of_instance=7) + fit(self.args.input_graph, conf, b_dataloader=dataloader) + else: + accuracy = eval(self.args.input_graph) + print('Batch size = %d' % self.args.batch_size) + print("Accuracy: %.5f" % accuracy) + if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() From e2bfb75ecb5b8b678a4a91f7c5190a73eef85071 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Wed, 1 Feb 2023 15:32:31 +0800 Subject: [PATCH 13/43] add tensorflow benchmark support for mobilenet_v2 Signed-off-by: Lv, Liang1 --- examples/tensorflow/tf2onnx/README.md | 1 + .../mobilenet_v2/fp32_export/README.md | 8 ++ .../tf2onnx/mobilenet_v2/fp32_export/main.py | 108 ++++++++++++----- .../mobilenet_v2/int8_export/README.md | 7 ++ .../tf2onnx/mobilenet_v2/int8_export/main.py | 113 ++++++++++++------ 5 files changed, 169 insertions(+), 68 deletions(-) diff --git a/examples/tensorflow/tf2onnx/README.md b/examples/tensorflow/tf2onnx/README.md index b18c7940ebe..61a1a5fdab4 100644 --- a/examples/tensorflow/tf2onnx/README.md +++ b/examples/tensorflow/tf2onnx/README.md @@ -6,4 +6,5 @@ Please note that we only support exporting ONNX QDQ format for INT8 now. The following examples are available: * [resnet50v1.5](/examples/tensorflow/tf2onnx/resnet50v1.5): image recognition ResNet50 model from MLPerf. +* [mobilenet_v2](/examples/tensorflow/tf2onnx/mobilenet_v2): image recognition mobilenet_v2 model. * [ssd_mobilenet_v1](/examples/tensorflow/tf2onnx/ssd_mobilenet_v1): object detection ssd_mobilenet_v1 model. diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md index 77b884e7314..7936c7ed2d3 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md @@ -72,8 +72,16 @@ ILSVRC2012_img_val val.txt bash run_export.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2.onnx ``` +### Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` +Please note this dataset is TF records format. + ### Run benchmark for ONNX FP32 model ```shell bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 ``` +Please note this dataset is Raw image dataset. diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py index 09fb192f1ba..e231b948224 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py @@ -21,11 +21,12 @@ import onnx import os import onnxruntime as ort +import numpy as np tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func(model, dataloader, metric, postprocess): +def eval_func_onnx(model, dataloader, metric, postprocess): metric.reset() sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) input_names = [i.name for i in sess.get_inputs()] @@ -35,6 +36,21 @@ def eval_func(model, dataloader, metric, postprocess): metric.update(output, label) return metric.result() +def eval_func_tf(model, dataloader, metric, postprocess=None): + from neural_compressor.model import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + metric.update(predictions, labels) + acc = metric.result() + return acc class eval_classifier_optimized_graph: """Evaluate image classifier with optimized TensorFlow graph.""" @@ -67,41 +83,67 @@ def run(self): inc_model.export(self.args.output_graph, config) if self.args.benchmark: - model = onnx.load(self.args.input_graph) - data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') - label_path = os.path.join(self.args.dataset_location, 'val.txt') + # ONNX FP32 Benchmark + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') + label_path = os.path.join(self.args.dataset_location, 'val.txt') + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, + 'transform': {'Rescale': {}, + 'Resize': {'size':256}, + 'CenterCrop': {'size': 224}, + 'Normalize': {'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225]}, + 'Cast': {'dtype': 'float32'}, + 'Transpose': {'perm': [2, 0, 1]}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, - 'transform': {'Rescale': {}, - 'Resize': {'size':256}, - 'CenterCrop': {'size': 224}, - 'Normalize': {'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225]}, - 'Cast': {'dtype': 'float32'}, - 'Transpose': {'perm': [2, 0, 1]}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + from neural_compressor.metric import GeneralTopK + top1 = GeneralTopK(k=1) + from neural_compressor.data.transforms.imagenet_transform import LabelShift + postprocess = LabelShift(label_shift=-1) + def eval(onnx_model): + return eval_func_onnx(onnx_model, dataloader, top1, postprocess) - from neural_compressor.metric import GeneralTopK - top1 = GeneralTopK(k=1) - from neural_compressor.data.transforms.imagenet_transform import LabelShift - postprocess = LabelShift(label_shift=-1) - def eval(onnx_model): - return eval_func(onnx_model, dataloader, top1, postprocess) + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + # Tensorflow FP32 Benchmark + else: + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, + 'transform': {'BilinearImagenet': {'height': 224, 'width': 224}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + def eval(model): + return eval_func_tf(model, dataloader, top1) - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(self.args.input_graph, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(self.args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md index 26c12d4eae2..ec69f6f5eda 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md @@ -77,6 +77,13 @@ bash run_tuning.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobil ``` Please note this dataset is TF records format. +### Run benchmark for Tensorflow INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./mobilenet_v2_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./mobilenet_v2_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` +Please note this dataset is Raw image dataset. + ### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model ```shell bash run_export.sh --input_model=./mobilenet_v2_int8.pb --output_model=./mobilenet_v2_int8.onnx diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py index d295f934349..57d803ed288 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py @@ -21,11 +21,12 @@ import onnx import os import onnxruntime as ort +import numpy as np tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func(model, dataloader, metric, postprocess): +def eval_func_onnx(model, dataloader, metric, postprocess): metric.reset() sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) input_names = [i.name for i in sess.get_inputs()] @@ -35,6 +36,22 @@ def eval_func(model, dataloader, metric, postprocess): metric.update(output, label) return metric.result() +def eval_func_tf(model, dataloader, metric, postprocess=None): + from neural_compressor.model import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + metric.update(predictions, labels) + acc = metric.result() + return acc + class eval_classifier_optimized_graph: """Evaluate image classifier with optimized TensorFlow graph.""" @@ -84,41 +101,67 @@ def run(self): inc_model.export(self.args.output_graph, config) if self.args.benchmark: - model = onnx.load(self.args.input_graph) - data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') - label_path = os.path.join(self.args.dataset_location, 'val.txt') + # ONNX INT8 Benchmark + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') + label_path = os.path.join(self.args.dataset_location, 'val.txt') - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, - 'transform': {'Rescale': {}, - 'Resize': {'size':256}, - 'CenterCrop': {'size': 224}, - 'Normalize': {'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225]}, - 'Cast': {'dtype': 'float32'}, - 'Transpose': {'perm': [2, 0, 1]}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - - from neural_compressor.metric import GeneralTopK - top1 = GeneralTopK(k=1) - from neural_compressor.data.transforms.imagenet_transform import LabelShift - postprocess = LabelShift(label_shift=-1) - def eval(onnx_model): - return eval_func(onnx_model, dataloader, top1, postprocess) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, + 'transform': {'Rescale': {}, + 'Resize': {'size':256}, + 'CenterCrop': {'size': 224}, + 'Normalize': {'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225]}, + 'Cast': {'dtype': 'float32'}, + 'Transpose': {'perm': [2, 0, 1]}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import GeneralTopK + top1 = GeneralTopK(k=1) + from neural_compressor.data.transforms.imagenet_transform import LabelShift + postprocess = LabelShift(label_shift=-1) + def eval(onnx_model): + return eval_func_onnx(onnx_model, dataloader, top1, postprocess) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + # Tensorflow INT8 Benchmark + else: + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, + 'transform': {'BilinearImagenet': {'height': 224, 'width': 224}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + def eval(model): + return eval_func_tf(model, dataloader, top1) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(self.args.input_graph, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(self.args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() From 8c9dc16f810a21403169475b5b7882f6ed6a668d Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Wed, 1 Feb 2023 20:34:07 +0800 Subject: [PATCH 14/43] enable faster_rcnn_resnet50 example Signed-off-by: Lv, Liang1 --- .../fp32_export/README.md | 60 +++++ .../faster_rcnn_resnet50/fp32_export/main.py | 182 +++++++++++++++ .../fp32_export/requirements.txt | 9 + .../fp32_export/run_benchmark.sh | 42 ++++ .../fp32_export/run_export.sh | 35 +++ .../int8_export/README.md | 65 ++++++ .../faster_rcnn_resnet50/int8_export/main.py | 216 ++++++++++++++++++ .../int8_export/requirements.txt | 9 + .../int8_export/run_benchmark.sh | 42 ++++ .../int8_export/run_export.sh | 35 +++ .../int8_export/run_tuning.sh | 39 ++++ .../mobilenet_v2/fp32_export/README.md | 28 ++- .../mobilenet_v2/fp32_export/requirements.txt | 1 + .../mobilenet_v2/int8_export/README.md | 30 ++- .../mobilenet_v2/int8_export/requirements.txt | 4 +- .../resnet50v1.5/fp32_export/README.md | 28 ++- .../resnet50v1.5/fp32_export/requirements.txt | 1 + .../resnet50v1.5/int8_export/README.md | 30 ++- .../resnet50v1.5/int8_export/requirements.txt | 4 +- .../ssd_mobilenet_v1/fp32_export/README.md | 29 ++- .../fp32_export/requirements.txt | 1 + .../ssd_mobilenet_v1/int8_export/README.md | 31 ++- .../int8_export/requirements.txt | 4 +- .../graph_rewriter/onnx/onnx_graph.py | 31 +++ 24 files changed, 892 insertions(+), 64 deletions(-) create mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md create mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py create mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_export.sh create mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md create mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py create mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_export.sh create mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_tuning.sh diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md new file mode 100644 index 00000000000..51181da12f2 --- /dev/null +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md @@ -0,0 +1,60 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow faster_rcnn_resnet50 FP32 model to ONNX FP32 model using Intel® Neural Compressor. + + +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install requirements +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model + +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz +tar -xvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz +``` + +## 3. Prepare Dataset + +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). +The dataset can be converted into tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +# Run Command + +## Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --output_model=./faster_rcnn_resnet50_fp32_coco.onnx +``` + +## Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 +``` +Please note this dataset is TF records format. + +## Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 +``` +Please note this dataset is Raw Coco dataset. diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py new file mode 100644 index 00000000000..55bb1a378cb --- /dev/null +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py @@ -0,0 +1,182 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import onnxruntime as ort +import numpy as np + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for inputs, labels in dataloader: + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), \ + 'number of input tensors must align with graph inputs' + + if isinstance(inputs, dict): # pragma: no cover + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + # in case dataloader contains non-array input + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + + predictions = session.run(None, ort_inputs) + + if postprocess is not None: + predictions, labels = postprocess((predictions, labels)) + + if not hasattr(metric, "compare_label") or \ + (hasattr(metric, "compare_label") and metric.compare_label): + metric.update(predictions, labels) + acc = metric.result() + return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] + +def eval_func_tf(model, dataloader, metric, postprocess=None): + metric.reset() + + from neural_compressor.model import Model + if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + metric.update(predictions, labels) + + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include export and benchmark option.""" + if self.args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + inc_model.input_tensor_names = ["image_tensor"] + inc_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + config = TF2ONNXConfig(dtype="fp32") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + # ONNX FP32 Benchmark + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 600}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + def eval(onnx_model): + return eval_func_onnx(onnx_model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + # Tensorflow FP32 Benchmark + else: + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 600}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + def eval(model): + return eval_func_tf(model, dataloader, mAP2) + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig( + inputs=["image_tensor"], + outputs=["num_detections", "detection_boxes", "detection_scores", "detection_classes"], + warmup=10, + iteration=100, + cores_per_instance=4, + num_of_instance=7) + fit(self.args.input_graph, conf, b_dataloader=dataloader) + else: + accuracy = eval(self.args.input_graph) + print('Batch size = %d' % self.args.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/requirements.txt new file mode 100644 index 00000000000..16783f94457 --- /dev/null +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/requirements.txt @@ -0,0 +1,9 @@ +tensorflow==2.11.0 +tf2onnx==1.13.0 +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md new file mode 100644 index 00000000000..237f369b5eb --- /dev/null +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md @@ -0,0 +1,65 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. + + +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install requirements +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model + +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz +tar -xvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz +``` + +## 3. Prepare Dataset + +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). +The dataset can be converted into tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +# Run Command + +## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model +```shell +bash run_tuning.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --output_model=./faster_rcnn_resnet50_coco_int8.pb --dataset_location=/path/to/coco_dataset/ +``` + +## Run benchmark for Tensorflow INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 +``` +Please note this dataset is TF records format. + +## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./faster_rcnn_resnet50_coco_int8.pb --output_model=./faster_rcnn_resnet50_coco_int8.onnx +``` + +## Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset_raw/ --batch_size=16 +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset_raw/ --batch_size=16 +``` +Please note this dataset is Raw Coco dataset. diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py new file mode 100644 index 00000000000..eb757a414cd --- /dev/null +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py @@ -0,0 +1,216 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import os +import onnxruntime as ort +import numpy as np + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + # input_names = [i.name for i in sess.get_inputs()] + # for input_data, label in dataloader: + # output = sess.run(None, dict(zip(input_names, [input_data]))) + # metric.update(output, label) + # return metric.result() + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for inputs, labels in dataloader: + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), \ + 'number of input tensors must align with graph inputs' + + if isinstance(inputs, dict): # pragma: no cover + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + # in case dataloader contains non-array input + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + + predictions = session.run(None, ort_inputs) + + if postprocess is not None: + predictions, labels = postprocess((predictions, labels)) + + if not hasattr(metric, "compare_label") or \ + (hasattr(metric, "compare_label") and metric.compare_label): + metric.update(predictions, labels) + acc = metric.result() + return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] + +def eval_func_tf(model, dataloader, metric, postprocess=None): + metric.reset() + + from neural_compressor.model import Model + if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + metric.update(predictions, labels) + + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion + from neural_compressor.utils.create_obj_from_config import create_dataloader + calib_dataloader_args = { + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': None, + 'filter': None + } + calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) + eval_dataloader_args = { + 'batch_size': 10, + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 600}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[10, 50, 100, 200], inputs=['image_tensor'], + outputs=['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes'], + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.32)) + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, eval_metric=mAP2) + q_model.save(self.args.output_graph) + + if self.args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + inc_model.input_tensor_names = ["image_tensor"] + inc_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + config = TF2ONNXConfig(dtype="int8") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + # ONNX INT8 Benchmark + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 600}}, + 'filter': None + } + dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + def eval(onnx_model): + return eval_func_onnx(onnx_model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + # Tensorflow INT8 Benchmark + else: + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 600}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + def eval(model): + return eval_func_tf(model, dataloader, mAP2) + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig( + inputs=["image_tensor"], + outputs=["num_detections", "detection_boxes", "detection_scores", "detection_classes"], + warmup=10, + iteration=100, + cores_per_instance=4, + num_of_instance=7) + fit(self.args.input_graph, conf, b_dataloader=dataloader) + else: + accuracy = eval(self.args.input_graph) + print('Batch size = %d' % self.args.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/requirements.txt new file mode 100644 index 00000000000..b964010af83 --- /dev/null +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/requirements.txt @@ -0,0 +1,9 @@ +tensorflow==2.11.0 +tf2onnx==1.13.0 +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_tuning.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_tuning.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md index 7936c7ed2d3..67a64f3db0d 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md @@ -4,19 +4,29 @@ Step-by-Step This document is used to show how to export Tensorflow Mobilenet_v2 FP32 model to ONNX FP32 model using Intel® Neural Compressor. -## Prerequisite +# Prerequisite -### 1. Installation +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. ```shell # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install requirements + +### Install requirements ```shell pip install -r requirements.txt ``` -### 3. Prepare Pretrained model +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model The mobilenet_v2 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). We can get the pb file by convert the checkpoint file. @@ -56,7 +66,7 @@ We can get the pb file by convert the checkpoint file. ``` -### 4. Prepare Dataset +## 3. Prepare Dataset Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: @@ -65,21 +75,21 @@ ls /path/to/ImageNet ILSVRC2012_img_val val.txt ``` -## Run Command +# Run Command -### Export Tensorflow FP32 model to ONNX FP32 model +## Export Tensorflow FP32 model to ONNX FP32 model ```shell bash run_export.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2.onnx ``` -### Run benchmark for Tensorflow FP32 model +## Run benchmark for Tensorflow FP32 model ```shell bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` Please note this dataset is TF records format. -### Run benchmark for ONNX FP32 model +## Run benchmark for ONNX FP32 model ```shell bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt index 8d1eeb068a8..16783f94457 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt @@ -1,3 +1,4 @@ +tensorflow==2.11.0 tf2onnx==1.13.0 onnx==1.9.0; python_version < '3.10' onnx==1.12.0; python_version == '3.10' diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md index ec69f6f5eda..bc57ea3cfb0 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md @@ -4,21 +4,31 @@ Step-by-Step This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. -## Prerequisite +# Prerequisite -### 1. Installation +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. ```shell # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install requirements + +### Install requirements The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. The Intel Extension for Tensorflow for Intel CPUs is installed as default. ```shell pip install -r requirements.txt ``` -### 3. Prepare Pretrained model +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model The mobilenet_v2 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). We can get the pb file by convert the checkpoint file. @@ -57,7 +67,7 @@ We can get the pb file by convert the checkpoint file. --output_node_names=MobilenetV2/Predictions/Reshape_1 ``` -### 4. Prepare Dataset +## 3. Prepare Dataset Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: @@ -69,27 +79,27 @@ The Raw image dataset is used for running benchmarking for ONNX model. TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. The TF records format dataset is used for quantizing Tensorflow FP32 model to Tensorflow INT8 QDQ model. -## Run Command +# Run Command -### Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model +## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model ```shell bash run_tuning.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2_int8.pb --dataset_location=/path/to/imagenet/ ``` Please note this dataset is TF records format. -### Run benchmark for Tensorflow INT8 QDQ model +## Run benchmark for Tensorflow INT8 QDQ model ```shell bash run_benchmark.sh --input_model=./mobilenet_v2_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 bash run_benchmark.sh --input_model=./mobilenet_v2_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` Please note this dataset is Raw image dataset. -### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model ```shell bash run_export.sh --input_model=./mobilenet_v2_int8.pb --output_model=./mobilenet_v2_int8.onnx ``` -### Run benchmark for ONNX INT8 QDQ model +## Run benchmark for ONNX INT8 QDQ model ```shell bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt index 9bdc24cb87b..b964010af83 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt @@ -1,5 +1,5 @@ -tensorflow=2.10.0 -intel-extension-for-tensorflow[cpu] +tensorflow==2.11.0 +tf2onnx==1.13.0 onnx==1.9.0; python_version < '3.10' onnx==1.12.0; python_version == '3.10' onnxruntime==1.10.0; python_version < '3.10' diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md index 7dc0335868e..139c6b55c6d 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md @@ -4,25 +4,35 @@ Step-by-Step This document is used to show how to export Tensorflow RestNet50 FP32 model to ONNX FP32 model using Intel® Neural Compressor. -## Prerequisite +# Prerequisite -### 1. Installation +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. ```shell # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install requirements + +### Install requirements ```shell pip install -r requirements.txt ``` -### 3. Prepare Pretrained model +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model ```bash wget https://zenodo.org/record/2535873/files/resnet50_v1.pb ``` -### 4. Prepare Dataset +## 3. Prepare Dataset Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: @@ -31,21 +41,21 @@ ls /path/to/ImageNet ILSVRC2012_img_val val.txt ``` -## Run Command +# Run Command -### Export Tensorflow FP32 model to ONNX FP32 model +## Export Tensorflow FP32 model to ONNX FP32 model ```shell bash run_export.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1.onnx ``` -### Run benchmark for Tensorflow FP32 model +## Run benchmark for Tensorflow FP32 model ```shell bash run_benchmark.sh --input_model=./resnet50_v1.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 bash run_benchmark.sh --input_model=./resnet50_v1.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` Please note this dataset is TF records format. -### Run benchmark for ONNX FP32 model +## Run benchmark for ONNX FP32 model ```shell bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt index 8d1eeb068a8..16783f94457 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt @@ -1,3 +1,4 @@ +tensorflow==2.11.0 tf2onnx==1.13.0 onnx==1.9.0; python_version < '3.10' onnx==1.12.0; python_version == '3.10' diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md index 7e851f1a5dc..756fe3c48ae 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md @@ -4,27 +4,37 @@ Step-by-Step This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. -## Prerequisite +# Prerequisite -### 1. Installation +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. ```shell # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install requirements + +### Install requirements The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. The Intel Extension for Tensorflow for Intel CPUs is installed as default. ```shell pip install -r requirements.txt ``` -### 3. Prepare Pretrained model +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model ```bash wget https://zenodo.org/record/2535873/files/resnet50_v1.pb ``` -### 4. Prepare Dataset +## 3. Prepare Dataset Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: @@ -36,27 +46,27 @@ The Raw image dataset is used for running benchmarking for ONNX model. TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. The TF records format dataset is used for quantizing Tensorflow FP32 model to Tensorflow INT8 QDQ model. -## Run Command +# Run Command -### Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model +## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model ```shell bash run_tuning.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1_int8.pb --dataset_location=/path/to/imagenet/ ``` Please note this dataset is TF records format. -### Run benchmark for Tensorflow INT8 model +## Run benchmark for Tensorflow INT8 model ```shell bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` Please note this dataset is TF records format. -### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model ```shell bash run_export.sh --input_model=./resnet50_v1_int8.pb --output_model=./resnet50_v1_int8.onnx ``` -### Run benchmark for ONNX INT8 QDQ model +## Run benchmark for ONNX INT8 QDQ model ```shell bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt index 9bdc24cb87b..b964010af83 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt @@ -1,5 +1,5 @@ -tensorflow=2.10.0 -intel-extension-for-tensorflow[cpu] +tensorflow==2.11.0 +tf2onnx==1.13.0 onnx==1.9.0; python_version < '3.10' onnx==1.12.0; python_version == '3.10' onnxruntime==1.10.0; python_version < '3.10' diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md index dd0badc5288..c597c1a77c2 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md @@ -4,20 +4,29 @@ Step-by-Step This document is used to show how to export Tensorflow ssd_mobilenet_v1 FP32 model to ONNX FP32 model using Intel® Neural Compressor. -## Prerequisite +# Prerequisite -### 1. Installation +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. ```shell # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install requirements +### Install requirements ```shell pip install -r requirements.txt ``` -### 3. Prepare Pretrained model +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model ```shell export MODEL=ssd_mobilenet_v1_coco_2018_01_28 @@ -25,26 +34,26 @@ wget http://download.tensorflow.org/models/object_detection/$MODEL.tar.gz tar -xvf $MODEL.tar.gz ``` -### 4. Prepare Dataset +## 3. Prepare Dataset Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). +The dataset can be converted into tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. +# Run Command -## Run Command - -### Export Tensorflow FP32 model to ONNX FP32 model +## Export Tensorflow FP32 model to ONNX FP32 model ```shell bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx ``` -### Run benchmark for Tensorflow FP32 model +## Run benchmark for Tensorflow FP32 model ```shell bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` Please note this dataset is TF records format. -### Run benchmark for ONNX FP32 model +## Run benchmark for ONNX FP32 model ```shell bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt index 8d1eeb068a8..16783f94457 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt @@ -1,3 +1,4 @@ +tensorflow==2.11.0 tf2onnx==1.13.0 onnx==1.9.0; python_version < '3.10' onnx==1.12.0; python_version == '3.10' diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md index add10df208c..d89a887cd8c 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md @@ -4,19 +4,29 @@ Step-by-Step This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. -## Prerequisite +# Prerequisite -### 1. Installation +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. ```shell # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install requirements + +### Install requirements ```shell pip install -r requirements.txt ``` -### 3. Prepare Pretrained model +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model ```bash export MODEL=ssd_mobilenet_v1_coco_2018_01_28 @@ -24,30 +34,31 @@ wget http://download.tensorflow.org/models/object_detection/$MODEL.tar.gz tar -xvf $MODEL.tar.gz ``` -### 4. Prepare Dataset +## 3. Prepare Dataset Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). +The dataset can be converted into tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. -## Run Command +# Run Command -### Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model +## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model ```shell bash run_tuning.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --dataset_location=/path/to/coco_dataset/ ``` -### Run benchmark for Tensorflow INT8 QDQ model +## Run benchmark for Tensorflow INT8 QDQ model ```shell bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` Please note this dataset is TF records format. -### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model ```shell bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --output_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx ``` -### Run benchmark for ONNX INT8 QDQ model +## Run benchmark for ONNX INT8 QDQ model ```shell bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset_raw/ --batch_size=16 bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset_raw/ --batch_size=16 diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt index 9bdc24cb87b..b964010af83 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt @@ -1,5 +1,5 @@ -tensorflow=2.10.0 -intel-extension-for-tensorflow[cpu] +tensorflow==2.11.0 +tf2onnx==1.13.0 onnx==1.9.0; python_version < '3.10' onnx==1.12.0; python_version == '3.10' onnxruntime==1.10.0; python_version < '3.10' diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py index 16728a3db84..86bb26de3bc 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py @@ -532,6 +532,25 @@ def copy_shape(self, input_name, output_name): if shape is not None: self.set_shape(output_name, shape) + def add_graph_output(self, name, dtype=None, shape=None): + """Add node output as graph's output.""" + utils.assert_error(name in self._output_to_node_name, "output %s not exist in the graph", name) + + if dtype is None: + dtype = self.get_dtype(name) + + if shape is None: + shape = self.get_shape(name) + + if name not in self.outputs: + utils.assert_error(shape is not None, "shape for output %s should not be None", name) + utils.assert_error(dtype is not None, "dtype for output %s should not be None", name) + self.outputs.append(name) + self.set_shape(name, shape) + self.set_dtype(name, dtype) + else: + raise ValueError("graph output " + name + " already exists") + def topological_sort(self, ops): """Topological sort of graph.""" # sort by name, the result will be reversed alphabeta @@ -1084,6 +1103,18 @@ def delete_unused_nodes(self, outputs_name): body_graph.delete_unused_nodes(body_graph.outputs) self.reset_nodes(related_nodes) + def safe_to_remove_nodes(self, to_delete): + """ List of nodes that safe to delete (i.e. outputs not consumed by other nodes.)""" + safe_to_remove = [] + delete_set = set(to_delete) + for n in delete_set: + out_consumers = set() + for out in n.output: + out_consumers |= set(self.find_output_consumers(out)) + if out_consumers.issubset(delete_set): + safe_to_remove.append(n) + return safe_to_remove + def convert_qdq_nodes(self, q_node, dq_node): """Convert tensorflow QuantizeV2/Dequantize nodes to QuantizeLinear/DequantizeLinear.""" qdq_node_output_dtype = self.get_dtype(dq_node.output[0]) From 7c1480001de165413185f5b7233df98ecbea2175 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Wed, 1 Feb 2023 20:39:41 +0800 Subject: [PATCH 15/43] update README Signed-off-by: Lv, Liang1 --- examples/tensorflow/tf2onnx/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/tensorflow/tf2onnx/README.md b/examples/tensorflow/tf2onnx/README.md index 61a1a5fdab4..22e18332e3d 100644 --- a/examples/tensorflow/tf2onnx/README.md +++ b/examples/tensorflow/tf2onnx/README.md @@ -1,10 +1,11 @@ # Tensorflow models export to ONNX moldes Examples These examples show how to export Tensorflow models to ONNX models including FP32 and INT8. -Please note that we only support exporting ONNX QDQ format for INT8 now. +Please note that we only support to export ONNX QDQ format for INT8 now. The following examples are available: * [resnet50v1.5](/examples/tensorflow/tf2onnx/resnet50v1.5): image recognition ResNet50 model from MLPerf. * [mobilenet_v2](/examples/tensorflow/tf2onnx/mobilenet_v2): image recognition mobilenet_v2 model. * [ssd_mobilenet_v1](/examples/tensorflow/tf2onnx/ssd_mobilenet_v1): object detection ssd_mobilenet_v1 model. +* [faster_rcnn_resnet50](/examples/tensorflow/tf2onnx/faster_rcnn_resnet50): object detection faster_rcnn_resnet50 model. From 10cb6f1120b10cd2abc29f701d67c03124fedcb4 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 2 Feb 2023 17:27:25 +0800 Subject: [PATCH 16/43] update vgg16 Signed-off-by: zehao-intel --- .../tf2onnx/vgg16/fp32_export/README.md | 93 + .../tf2onnx/vgg16/fp32_export/main.py | 124 + .../vgg16/fp32_export/requirements.txt | 8 + .../vgg16/fp32_export/run_benchmark.sh | 42 + .../tf2onnx/vgg16/fp32_export/run_export.sh | 35 + .../imagenet_prepare/build_imagenet_data.py | 567 + .../download_and_convert_imagenet.sh | 100 + .../imagenet_prepare/download_imagenet.sh | 99 + .../imagenet_lsvrc_2015_synsets.txt | 1000 + .../imagenet_prepare/imagenet_metadata.txt | 21842 ++++++++++++++++ .../tf2onnx/vgg16/int8_export/README.md | 101 + .../tf2onnx/vgg16/int8_export/main.py | 168 + .../vgg16/int8_export/requirements.txt | 9 + .../vgg16/int8_export/run_benchmark.sh | 42 + .../tf2onnx/vgg16/int8_export/run_export.sh | 35 + .../tf2onnx/vgg16/int8_export/run_tuning.sh | 39 + .../tf2onnx/vgg16/prepare_dataset.sh | 71 + 17 files changed, 24375 insertions(+) create mode 100644 examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md create mode 100644 examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py create mode 100644 examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/vgg16/fp32_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/vgg16/fp32_export/run_export.sh create mode 100644 examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/build_imagenet_data.py create mode 100644 examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_and_convert_imagenet.sh create mode 100644 examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_imagenet.sh create mode 100644 examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt create mode 100644 examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/imagenet_metadata.txt create mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/README.md create mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/main.py create mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/run_export.sh create mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/run_tuning.sh create mode 100644 examples/tensorflow/tf2onnx/vgg16/prepare_dataset.sh diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md b/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md new file mode 100644 index 00000000000..62d5f41c726 --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md @@ -0,0 +1,93 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow VGG16 FP32 model to ONNX FP32 model using Intel® Neural Compressor. + + +# Prerequisite + +## 1. Environment + +### Install Intel® Neural Compressor +```shell +pip install neural-compressor +``` +### Install requirements +```shell +pip install -r requirements.txt +``` + +## 2. Prepare Model + +The vgg16 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). +We can get the pb file by convert the checkpoint file. + + 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) + ```shell + wget http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz + tar -xvf vgg_16_2016_08_28.tar.gz + ``` + + 2. Exporting the Inference Graph + ```shell + git clone https://github.com/tensorflow/models + cd models/research/slim + python export_inference_graph.py \ + --alsologtostderr \ + --model_name=vgg_16 \ + --output_file=/tmp/vgg_16_inf_graph.pb + ``` + Make sure to use intel-tensorflow v1.15, and pip install tf_slim. + #### Install Intel Tensorflow 1.15 up2 + Check your python version and use pip install 1.15.0 up2 from links below: + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl + > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/vgg_16_inf_graph.pb \ + --input_checkpoint=./vgg_16.ckpt \ + --input_binary=true \ + --output_graph=./frozen_vgg16.pb \ + --output_node_names=vgg_16/fc8/squeezed + ``` + +### 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/tf2onnx/vgg16 + # convert validation subset + bash prepare_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd fp32_export + ``` + +## Run Command + +### Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./frozen_vgg16.pb --output_model=./vgg_16.onnx +``` + +## Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./frozen_vgg16.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./frozen_vgg16.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` +Please note this dataset is TF records format. + +### Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./vgg_16.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 +bash run_benchmark.sh --input_model=./vgg_16.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 +``` +Please note this dataset is TF records format. diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py b/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py new file mode 100644 index 00000000000..7520f37201c --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py @@ -0,0 +1,124 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import onnx +import numpy as np +import tensorflow as tf +import onnxruntime as ort +from argparse import ArgumentParser +from neural_compressor.data import LabelShift +from neural_compressor.metric import TensorflowTopK +from neural_compressor.utils.create_obj_from_config import create_dataloader + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + + output, label = postprocess((output, label)) + metric.update(output, label) + + acc = metric.result() + return acc + +def eval_func_tf(model, dataloader, metric, postprocess=None): + from neural_compressor.model import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include export and benchmark option.""" + top1 = TensorflowTopK(k=1) + postprocess = LabelShift(label_shift=1) + if self.args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + config = TF2ONNXConfig(dtype="fp32") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + else: + model = self.args.input_graph + eval_dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + + def eval(model): + if isinstance(model, str): + return eval_func_tf(model, eval_dataloader, top1, postprocess) + else: + return eval_func_onnx(model, eval_dataloader, top1, postprocess) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=eval_dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % eval_dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt new file mode 100644 index 00000000000..8d1eeb068a8 --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt @@ -0,0 +1,8 @@ +tf2onnx==1.13.0 +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/build_imagenet_data.py b/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/build_imagenet_data.py new file mode 100644 index 00000000000..c52d2bd4218 --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/build_imagenet_data.py @@ -0,0 +1,567 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Converts ImageNet data to TFRecords file format with Example protos. + +The raw ImageNet data set is expected to reside in JPEG files located in the +following directory structure. + + data_dir/n01440764/ILSVRC2012_val_00000293.JPEG + data_dir/n01440764/ILSVRC2012_val_00000543.JPEG + ... + +where 'n01440764' is the unique synset label associated with +these images. + +The training data set consists of 1000 sub-directories (i.e. labels) +each containing 1200 JPEG images for a total of 1.2M JPEG images. + +The evaluation data set consists of 1000 sub-directories (i.e. labels) +each containing 50 JPEG images for a total of 50K JPEG images. + +This TensorFlow script converts the training and evaluation data into +a sharded data set consisting of 1024 and 128 TFRecord files, respectively. + + train_directory/train-00000-of-01024 + train_directory/train-00001-of-01024 + ... + train_directory/train-00127-of-01024 + +and + + validation_directory/validation-00000-of-00128 + validation_directory/validation-00001-of-00128 + ... + validation_directory/validation-00127-of-00128 + +Each validation TFRecord file contains ~390 records. Each training TFREcord +file contains ~1250 records. Each record within the TFRecord file is a +serialized Example proto. The Example proto contains the following fields: + + image/encoded: string containing JPEG encoded image in RGB colorspace + image/height: integer, image height in pixels + image/width: integer, image width in pixels + image/colorspace: string, specifying the colorspace, always 'RGB' + image/channels: integer, specifying the number of channels, always 3 + image/format: string, specifying the format, always'JPEG' + + image/filename: string containing the basename of the image file + e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG' + image/class/label: integer specifying the index in a classification layer. + The label ranges from [1, 1000] where 0 is not used. + image/class/synset: string specifying the unique ID of the label, + e.g. 'n01440764' + image/class/text: string specifying the human-readable version of the label + e.g. 'red fox, Vulpes vulpes' + +Note that the length of xmin is identical to the length of xmax, ymin and ymax +for each example. + +Running this script using 16 threads may take around ~2.5 hours on a HP Z420. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from datetime import datetime +import os +import random +import sys +import threading + +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf +tf.compat.v1.disable_eager_execution() + + +tf.compat.v1.app.flags.DEFINE_string('raw_directory', None, + 'Raw data directory') + +tf.compat.v1.app.flags.DEFINE_string('output_directory', None, + 'Output data directory') + +tf.compat.v1.app.flags.DEFINE_integer('shards', 1, + 'Number of shards in TFRecord files.') + +tf.compat.v1.app.flags.DEFINE_string('subset', 'validation', + 'Subset of imagenet, can be validation/train') + +tf.compat.v1.app.flags.DEFINE_integer('num_threads', 1, + 'Number of threads to preprocess the images.') + +# The labels file contains a list of valid labels are held in this file. +# Assumes that the file contains entries as such: +# n01440764 +# n01443537 +# n01484850 +# where each line corresponds to a label expressed as a synset. We map +# each synset contained in the file to an integer (based on the alphabetical +# ordering). See below for details. +tf.compat.v1.app.flags.DEFINE_string('labels_file', + 'imagenet_lsvrc_2015_synsets.txt', + 'Labels file') + +# This file containing mapping from synset to human-readable label. +# Assumes each line of the file looks like: +# +# n02119247 black fox +# n02119359 silver fox +# n02119477 red fox, Vulpes fulva +# +# where each line corresponds to a unique mapping. Note that each line is +# formatted as \t. +tf.compat.v1.app.flags.DEFINE_string('imagenet_metadata_file', + 'imagenet_metadata.txt', + 'ImageNet metadata file') + +FLAGS = tf.compat.v1.app.flags.FLAGS + + +def _int64_feature(value): + """Wrapper for inserting int64 features into Example proto.""" + if not isinstance(value, list): + value = [value] + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + + +def _float_feature(value): + """Wrapper for inserting float features into Example proto.""" + if not isinstance(value, list): + value = [value] + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + + +def _bytes_feature(value): + """Wrapper for inserting bytes features into Example proto.""" + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + +def _convert_to_example(filename, image_buffer, label, synset, human, + height, width): + """Build an Example proto for an example. + + Args: + filename: string, path to an image file, e.g., '/path/to/example.JPG' + image_buffer: string, JPEG encoding of RGB image + label: integer, identifier for the ground truth for the network + synset: string, unique WordNet ID specifying the label, e.g., 'n02323233' + human: string, human-readable label, e.g., 'red fox, Vulpes vulpes' + height: integer, image height in pixels + width: integer, image width in pixels + Returns: + Example proto + """ + + colorspace = b'RGB' + channels = 3 + image_format = b'JPEG' + + example = tf.train.Example(features=tf.train.Features(feature={ + 'image/height': _int64_feature(height), + 'image/width': _int64_feature(width), + 'image/colorspace': _bytes_feature(colorspace), + 'image/channels': _int64_feature(channels), + 'image/class/label': _int64_feature(label), + 'image/class/synset': _bytes_feature(bytes(synset,'utf-8')), + 'image/class/text': _bytes_feature(bytes(human,'utf-8')), + 'image/format': _bytes_feature(image_format), + 'image/filename': _bytes_feature(bytes(os.path.basename(filename),'utf-8')), + 'image/encoded': _bytes_feature(image_buffer)})) + return example + + +class ImageCoder(object): + """Helper class that provides TensorFlow image coding utilities.""" + + def __init__(self): + # Create a single Session to run all image coding calls. + self._sess = tf.compat.v1.Session() + + # Initializes function that converts PNG to JPEG data. + self._png_data = tf.compat.v1.placeholder(dtype=tf.string) + image = tf.image.decode_png(self._png_data, channels=3) + self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) + + # Initializes function that converts CMYK JPEG data to RGB JPEG data. + self._cmyk_data = tf.compat.v1.placeholder(dtype=tf.string) + image = tf.image.decode_jpeg(self._cmyk_data, channels=0) + self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100) + + # Initializes function that decodes RGB JPEG data. + self._decode_jpeg_data = tf.compat.v1.placeholder(dtype=tf.string) + self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) + + def png_to_jpeg(self, image_data): + return self._sess.run(self._png_to_jpeg, + feed_dict={self._png_data: image_data}) + + def cmyk_to_rgb(self, image_data): + return self._sess.run(self._cmyk_to_rgb, + feed_dict={self._cmyk_data: image_data}) + + def decode_jpeg(self, image_data): + image = self._sess.run(self._decode_jpeg, + feed_dict={self._decode_jpeg_data: image_data}) + assert len(image.shape) == 3 + assert image.shape[2] == 3 + return image + + +def _is_png(filename): + """Determine if a file contains a PNG format image. + + Args: + filename: string, path of the image file. + + Returns: + boolean indicating if the image is a PNG. + """ + # File list from: + # https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU + return 'n02105855_2933.JPEG' in filename + + +def _is_cmyk(filename): + """Determine if file contains a CMYK JPEG format image. + + Args: + filename: string, path of the image file. + + Returns: + boolean indicating if the image is a JPEG encoded with CMYK color space. + """ + # File list from: + # https://github.com/cytsai/ilsvrc-cmyk-image-list + blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG', + 'n02447366_23489.JPEG', 'n02492035_15739.JPEG', + 'n02747177_10752.JPEG', 'n03018349_4028.JPEG', + 'n03062245_4620.JPEG', 'n03347037_9675.JPEG', + 'n03467068_12171.JPEG', 'n03529860_11437.JPEG', + 'n03544143_17228.JPEG', 'n03633091_5218.JPEG', + 'n03710637_5125.JPEG', 'n03961711_5286.JPEG', + 'n04033995_2932.JPEG', 'n04258138_17003.JPEG', + 'n04264628_27969.JPEG', 'n04336792_7448.JPEG', + 'n04371774_5854.JPEG', 'n04596742_4225.JPEG', + 'n07583066_647.JPEG', 'n13037406_4650.JPEG'] + return filename.split('/')[-1] in blacklist + + +def _process_image(filename, coder): + """Process a single image file. + + Args: + filename: string, path to an image file e.g., '/path/to/example.JPG'. + coder: instance of ImageCoder to provide TensorFlow image coding utils. + Returns: + image_buffer: string, JPEG encoding of RGB image. + height: integer, image height in pixels. + width: integer, image width in pixels. + """ + # Read the image file. + image_data = tf.io.gfile.GFile(filename, 'rb').read() + + # Clean the dirty data. + if _is_png(filename): + # 1 image is a PNG. + print('Converting PNG to JPEG for %s' % filename) + image_data = coder.png_to_jpeg(image_data) + elif _is_cmyk(filename): + # 22 JPEG images are in CMYK colorspace. + print('Converting CMYK to RGB for %s' % filename) + image_data = coder.cmyk_to_rgb(image_data) + + # Decode the RGB JPEG. + image = coder.decode_jpeg(image_data) + + # Check that image converted to RGB + assert len(image.shape) == 3 + height = image.shape[0] + width = image.shape[1] + assert image.shape[2] == 3 + + return image_data, height, width + + +def _process_image_files_batch(coder, thread_index, ranges, name, filenames, + synsets, labels, humans, num_shards): + """Processes and saves list of images as TFRecord in 1 thread. + + Args: + coder: instance of ImageCoder to provide TensorFlow image coding utils. + thread_index: integer, unique batch to run index is within [0, len(ranges)). + ranges: list of pairs of integers specifying ranges of each batches to + analyze in parallel. + name: string, unique identifier specifying the data set + filenames: list of strings; each string is a path to an image file + synsets: list of strings; each string is a unique WordNet ID + labels: list of integer; each integer identifies the ground truth + humans: list of strings; each string is a human-readable label + num_shards: integer number of shards for this data set. + """ + # Each thread produces N shards where N = int(num_shards / num_threads). + # For instance, if num_shards = 128, and the num_threads = 2, then the first + # thread would produce shards [0, 64). + num_threads = len(ranges) + assert not num_shards % num_threads + num_shards_per_batch = int(num_shards / num_threads) + + shard_ranges = np.linspace(ranges[thread_index][0], + ranges[thread_index][1], + num_shards_per_batch + 1).astype(int) + num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0] + + counter = 0 + for s in xrange(num_shards_per_batch): + # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' + shard = thread_index * num_shards_per_batch + s + output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards) + output_file = os.path.join(FLAGS.output_directory, output_filename) + writer = tf.io.TFRecordWriter(output_file) + + shard_counter = 0 + files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) # HERE + for i in files_in_shard: + filename = filenames[i] + label = labels[i] + synset = synsets[i] + human = humans[i] + + image_buffer, height, width = _process_image(filename, coder) + + example = _convert_to_example(filename, image_buffer, label, synset, human, height, width) + writer.write(example.SerializeToString()) + shard_counter += 1 + counter += 1 + + if not counter % 1000: + print('%s [thread %d]: Processed %d of %d images in thread batch.' % + (datetime.now(), thread_index, counter, num_files_in_thread)) + sys.stdout.flush() + + writer.close() + print('%s [thread %d]: Wrote %d images to %s' % + (datetime.now(), thread_index, shard_counter, output_file)) + sys.stdout.flush() + shard_counter = 0 + print('%s [thread %d]: Wrote %d images to %d shards.' % + (datetime.now(), thread_index, counter, num_files_in_thread)) + sys.stdout.flush() + + +def _process_image_files(name, filenames, synsets, labels, humans, num_shards): + """Process and save list of images as TFRecord of Example protos. + + Args: + name: string, unique identifier specifying the data set + filenames: list of strings; each string is a path to an image file + synsets: list of strings; each string is a unique WordNet ID + labels: list of integer; each integer identifies the ground truth + humans: list of strings; each string is a human-readable label + num_shards: integer number of shards for this data set. + """ + assert len(filenames) == len(synsets) + assert len(filenames) == len(labels) + assert len(filenames) == len(humans) + + # Break all images into batches with a [ranges[i][0], ranges[i][1]]. + spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int) + ranges = [] + threads = [] + for i in xrange(len(spacing) - 1): + ranges.append([spacing[i], spacing[i+1]]) + + # Launch a thread for each batch. + print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges)) + sys.stdout.flush() + + # Create a mechanism for monitoring when all threads are finished. + coord = tf.train.Coordinator() + + # Create a generic TensorFlow-based utility for converting all image codings. + coder = ImageCoder() + + threads = [] + for thread_index in xrange(len(ranges)): + args = (coder, thread_index, ranges, name, filenames, + synsets, labels, humans, num_shards) + t = threading.Thread(target=_process_image_files_batch, args=args) + t.start() + threads.append(t) + + # Wait for all the threads to terminate. + coord.join(threads) + print('%s: Finished writing all %d images in data set.' % + (datetime.now(), len(filenames))) + sys.stdout.flush() + + +def _find_image_files(data_dir, labels_file): + """Build a list of all images files and labels in the data set. + + Args: + data_dir: string, path to the root directory of images. + + Assumes that the ImageNet data set resides in JPEG files located in + the following directory structure. + + data_dir/n01440764/ILSVRC2012_val_00000293.JPEG + data_dir/n01440764/ILSVRC2012_val_00000543.JPEG + + where 'n01440764' is the unique synset label associated with these images. + + labels_file: string, path to the labels file. + + The list of valid labels are held in this file. Assumes that the file + contains entries as such: + n01440764 + n01443537 + n01484850 + where each line corresponds to a label expressed as a synset. We map + each synset contained in the file to an integer (based on the alphabetical + ordering) starting with the integer 1 corresponding to the synset + contained in the first line. + + The reason we start the integer labels at 1 is to reserve label 0 as an + unused background class. + + Returns: + filenames: list of strings; each string is a path to an image file. + synsets: list of strings; each string is a unique WordNet ID. + labels: list of integer; each integer identifies the ground truth. + """ + print('Determining list of input files and labels from %s.' % data_dir) + challenge_synsets = [l.strip() for l in + tf.compat.v1.gfile.FastGFile(labels_file, 'r').readlines()] + + labels = [] + filenames = [] + synsets = [] + + # Leave label index 0 empty as a background class. + label_index = 1 + + # Construct the list of JPEG files and labels. + for synset in challenge_synsets: + jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset) + matching_files = tf.io.gfile.glob(jpeg_file_path) + + labels.extend([label_index] * len(matching_files)) + synsets.extend([synset] * len(matching_files)) + filenames.extend(matching_files) + + if not label_index % 100: + print('Finished finding files in %d of %d classes.' % ( + label_index, len(challenge_synsets))) + label_index += 1 + + # Shuffle the ordering of all image files in order to guarantee + # random ordering of the images with respect to label in the + # saved TFRecord files. Make the randomization repeatable. + shuffled_index = range(len(filenames)) + random.seed(12345) + + random.shuffle(list(range(len(shuffled_index)))) + + filenames = [filenames[i] for i in shuffled_index] + synsets = [synsets[i] for i in shuffled_index] + labels = [labels[i] for i in shuffled_index] + + print('Found %d JPEG files across %d labels inside %s.' % + (len(filenames), len(challenge_synsets), data_dir)) + return filenames, synsets, labels + + +def _find_human_readable_labels(synsets, synset_to_human): + """Build a list of human-readable labels. + + Args: + synsets: list of strings; each string is a unique WordNet ID. + synset_to_human: dict of synset to human labels, e.g., + 'n02119022' --> 'red fox, Vulpes vulpes' + + Returns: + List of human-readable strings corresponding to each synset. + """ + humans = [] + for s in synsets: + assert s in synset_to_human, ('Failed to find: %s' % s) + humans.append(synset_to_human[s]) + return humans + + +def _process_dataset(name, directory, num_shards, synset_to_human): + """Process a complete data set and save it as a TFRecord. + + Args: + name: string, unique identifier specifying the data set. + directory: string, root path to the data set. + num_shards: integer number of shards for this data set. + synset_to_human: dict of synset to human labels, e.g., + 'n02119022' --> 'red fox, Vulpes vulpes' + """ + filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file) + humans = _find_human_readable_labels(synsets, synset_to_human) + + _process_image_files(name, filenames, synsets, labels, + humans, num_shards) + + +def _build_synset_lookup(imagenet_metadata_file): + """Build lookup for synset to human-readable label. + + Args: + imagenet_metadata_file: string, path to file containing mapping from + synset to human-readable label. + + Assumes each line of the file looks like: + + n02119247 black fox + n02119359 silver fox + n02119477 red fox, Vulpes fulva + + where each line corresponds to a unique mapping. Note that each line is + formatted as \t. + + Returns: + Dictionary of synset to human labels, such as: + 'n02119022' --> 'red fox, Vulpes vulpes' + """ + lines = tf.compat.v1.gfile.FastGFile(imagenet_metadata_file, 'r').readlines() + synset_to_human = {} + for l in lines: + if l: + parts = l.strip().split('\t') + assert len(parts) == 2 + synset = parts[0] + human = parts[1] + synset_to_human[synset] = human + return synset_to_human + + +def main(unused_argv): + assert not FLAGS.shards % FLAGS.num_threads, ( + 'Please make the FLAGS.num_threads commensurate with FLAGS.shards') + + print('Saving results to %s' % FLAGS.output_directory) + + # Build a map from synset to human-readable label. + synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file) + + if(FLAGS.raw_directory != None): + _process_dataset(FLAGS.subset, FLAGS.raw_directory,FLAGS.shards, synset_to_human) + +if __name__ == '__main__': + tf.compat.v1.app.run() diff --git a/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_and_convert_imagenet.sh b/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_and_convert_imagenet.sh new file mode 100644 index 00000000000..f9baa85ab07 --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_and_convert_imagenet.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Script to download and preprocess ImageNet Challenge 2012 +# training and validation data set. +# +# The final output of this script are sharded TFRecord files containing +# serialized Example protocol buffers. See build_imagenet_data.py for +# details of how the Example protocol buffers contain the ImageNet data. +# +# The final output of this script appears as such: +# +# data_dir/train-00000-of-01024 +# data_dir/train-00001-of-01024 +# ... +# data_dir/train-00127-of-01024 +# +# and +# +# data_dir/validation-00000-of-00128 +# data_dir/validation-00001-of-00128 +# ... +# data_dir/validation-00127-of-00128 +# +# Note that this script may take several hours to run to completion. The +# conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending +# on the speed of your machine. Please be patient. +# +# **IMPORTANT** +# To download the raw images, the user must create an account with image-net.org +# and generate a username and access_key. The latter two are required for +# downloading the raw images. +# + +set -e + +if [ -z "$1" ]; then + echo "usage download_and_convert_imagenet.sh [data dir]" + exit +fi + +# Create the output and temporary directories. +DATA_DIR="${1%/}" +SCRATCH_DIR="${DATA_DIR}/raw-data/" +mkdir -p "${DATA_DIR}" +mkdir -p "${SCRATCH_DIR}" +WORK_DIR="$0.runfiles/__main__" + +# Download the ImageNet data. +LABELS_FILE="${WORK_DIR}/datasets/imagenet_lsvrc_2015_synsets.txt" +DOWNLOAD_SCRIPT="${WORK_DIR}/datasets/download_imagenet.sh" +"${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}" + +# Note the locations of the train and validation data. +TRAIN_DIRECTORY="${SCRATCH_DIR}train/" +VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/" + +# Preprocess the validation data by moving the images into the appropriate +# sub-directory based on the label (synset) of the image. +echo "Organizing the validation data into sub-directories." +PREPROCESS_VAL_SCRIPT="${WORK_DIR}/datasets/preprocess_imagenet_validation_data.py" +VAL_LABELS_FILE="${WORK_DIR}/datasets/imagenet_2012_validation_synset_labels.txt" + +"${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}" + +# Convert the XML files for bounding box annotations into a single CSV. +echo "Extracting bounding box information from XML." +BOUNDING_BOX_SCRIPT="${WORK_DIR}/datasets/process_bounding_boxes.py" +BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv" +BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/" + +"${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \ + | sort >"${BOUNDING_BOX_FILE}" +echo "Finished downloading and preprocessing the ImageNet data." + +# Build the TFRecords version of the ImageNet data. +BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data" +OUTPUT_DIRECTORY="${DATA_DIR}" +IMAGENET_METADATA_FILE="${WORK_DIR}/datasets/imagenet_metadata.txt" + +"${BUILD_SCRIPT}" \ + --train_directory="${TRAIN_DIRECTORY}" \ + --validation_directory="${VALIDATION_DIRECTORY}" \ + --output_directory="${OUTPUT_DIRECTORY}" \ + --imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \ + --labels_file="${LABELS_FILE}" \ + --bounding_box_file="${BOUNDING_BOX_FILE}" diff --git a/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_imagenet.sh b/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_imagenet.sh new file mode 100644 index 00000000000..c780e179f93 --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_imagenet.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Script to download ImageNet Challenge 2012 training and validation data set. +# +# Downloads and decompresses raw images and bounding boxes. +# +# **IMPORTANT** +# To download the raw images, the user must create an account with image-net.org +# and generate a username and access_key. The latter two are required for +# downloading the raw images. +# +# usage: +# ./download_imagenet.sh [dirname] +set -e + +if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then + cat < Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/vgg_16_inf_graph.pb \ + --input_checkpoint=./vgg_16.ckpt \ + --input_binary=true \ + --output_graph=./frozen_vgg16.pb \ + --output_node_names=vgg_16/fc8/squeezed + ``` + +### 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/tf2onnx/vgg16 + # convert validation subset + bash prepare_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd int8_export + ``` + +## Run Command + +### Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model +```shell +bash run_tuning.sh --input_model=./frozen_vgg16 --output_model=./frozen_vgg16_int8.pb --dataset_location=/path/to/imagenet/ +``` +Please note this dataset is TF records format. + +### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./frozen_vgg16_int8.pb --output_model=./frozen_vgg16_int8.onnx +``` + +## Run benchmark for Tensorflow INT8 model +```shell +bash run_benchmark.sh --input_model=./rozen_vgg16_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./rozen_vgg16_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` +Please note this dataset is TF records format. + +### Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 +bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 +``` +Please note this dataset is TF records format. \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py b/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py new file mode 100644 index 00000000000..4566fe912a6 --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py @@ -0,0 +1,168 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import onnx +import numpy as np +import tensorflow as tf +import onnxruntime as ort +from argparse import ArgumentParser +from neural_compressor.data import LabelShift +from neural_compressor.metric import TensorflowTopK +from neural_compressor.utils.create_obj_from_config import create_dataloader + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') +arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') +arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') +arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') +arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') +arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') +arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') +arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') +args = arg_parser.parse_args() + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + + output, label = postprocess((output, label)) + metric.update(output, label) + + acc = metric.result() + return acc + +def eval_func_tf(model): + from neural_compressor.model import Model + metric = TensorflowTopK(k=1) + postprocess = LabelShift(label_shift=1) + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + eval_dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', eval_dataloader_args) + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + top1 = TensorflowTopK(k=1) + postprocess = LabelShift(label_shift=1) + if args.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion + calib_dataloader_args = { + 'batch_size': 10, + 'dataset': {"ImageRecord": {'root':args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) + eval_dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + op_name_list = { + 'resnet_model/dense/MatMul': + { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']}, + } + } + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], + outputs=['softmax_tensor'], + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.3), + op_name_list=op_name_list) + q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, + eval_func=eval_func_tf) + q_model.save(args.output_graph) + + if args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(args.input_graph) + config = TF2ONNXConfig(dtype="int8") + inc_model.export(args.output_graph, config) + + if args.benchmark: + if args.input_graph.endswith('.onnx'): + model = onnx.load(args.input_graph) + else: + model = args.input_graph + eval_dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + + def eval(model): + if isinstance(model, str): + return eval_func_tf(model) + else: + return eval_func_onnx(model, eval_dataloader, top1, postprocess) + + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=eval_dataloader) + elif args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % eval_dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt new file mode 100644 index 00000000000..9bdc24cb87b --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt @@ -0,0 +1,9 @@ +tensorflow=2.10.0 +intel-extension-for-tensorflow[cpu] +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/vgg16/int8_export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/int8_export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/vgg16/int8_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/int8_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/vgg16/int8_export/run_tuning.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/int8_export/run_tuning.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/prepare_dataset.sh b/examples/tensorflow/tf2onnx/vgg16/prepare_dataset.sh new file mode 100644 index 00000000000..4aad5d69a3f --- /dev/null +++ b/examples/tensorflow/tf2onnx/vgg16/prepare_dataset.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# set -x + +OUTPUT_DIR="./data" +SUBSET="validation" +SHARDS=1 + +help() +{ + cat <<- EOF + Desc: Convert prepared raw imagnet dataset to tfrecord + -h --help help info + --output_dir Output data directory + default: './data' + --raw_dir Raw data directory + --shards Number of shards in TFRecord files. + default: '1' + --subset Subset of imagenet, can be validation/train. + default: 'validation' +EOF + exit 0 +} + +function main { + init_params "$@" + convert_dataset +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --output_dir=*) + OUTPUT_DIR=$(echo $var |cut -f2 -d=) + ;; + --raw_dir=*) + RAW_DIR=$(echo $var |cut -f2 -d=) + ;; + --shards=*) + SHARDS=$(echo $var |cut -f2 -d=) + ;; + --subset=*) + SUBSET=$(echo $var |cut -f2 -d=) + ;; + -h|--help) help + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done +} + +# convert dataset +function convert_dataset { + if [ ! -d ${OUTPUT_DIR} ]; then + mkdir ${OUTPUT_DIR} + fi + python imagenet_prepare/build_imagenet_data.py \ + --imagenet_metadata_file "imagenet_prepare/imagenet_metadata.txt" \ + --labels_file "imagenet_prepare/imagenet_lsvrc_2015_synsets.txt" \ + --output_directory ${OUTPUT_DIR} \ + --subset ${SUBSET} \ + --raw_directory ${RAW_DIR} \ + --shards ${SHARDS} +} + +main "$@" + From cce878bd840a0e176b4aae81d89e590c355d4d6c Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Thu, 2 Feb 2023 21:48:22 +0800 Subject: [PATCH 17/43] enable resnet50v1.0 example Signed-off-by: Lv, Liang1 --- examples/tensorflow/tf2onnx/README.md | 4 +- .../resnet50v1.0/fp32_export/README.md | 60 +++++++ .../tf2onnx/resnet50v1.0/fp32_export/main.py | 123 +++++++++++++ .../resnet50v1.0/fp32_export/requirements.txt | 8 + .../resnet50v1.0/fp32_export/run_benchmark.sh | 42 +++++ .../resnet50v1.0/fp32_export/run_export.sh | 35 ++++ .../resnet50v1.0/int8_export/README.md | 72 ++++++++ .../tf2onnx/resnet50v1.0/int8_export/main.py | 161 ++++++++++++++++++ .../resnet50v1.0/int8_export/requirements.txt | 9 + .../resnet50v1.0/int8_export/run_benchmark.sh | 42 +++++ .../resnet50v1.0/int8_export/run_export.sh | 35 ++++ .../resnet50v1.0/int8_export/run_tuning.sh | 39 +++++ .../tf2onnx/vgg16/fp32_export/README.md | 7 + .../tf2onnx/vgg16/fp32_export/main.py | 1 + .../vgg16/fp32_export/requirements.txt | 1 + .../tf2onnx/vgg16/int8_export/README.md | 7 + .../vgg16/int8_export/requirements.txt | 4 +- 17 files changed, 647 insertions(+), 3 deletions(-) create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/main.py create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_export.sh create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_benchmark.sh create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_export.sh create mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_tuning.sh diff --git a/examples/tensorflow/tf2onnx/README.md b/examples/tensorflow/tf2onnx/README.md index 22e18332e3d..c8985ae328f 100644 --- a/examples/tensorflow/tf2onnx/README.md +++ b/examples/tensorflow/tf2onnx/README.md @@ -3,9 +3,11 @@ These examples show how to export Tensorflow models to ONNX models including FP32 and INT8. Please note that we only support to export ONNX QDQ format for INT8 now. -The following examples are available: +The following examples are available for reference: +* [resnet50v1.0](/examples/tensorflow/tf2onnx/resnet50v1.0): image recognition ResNet50 model. * [resnet50v1.5](/examples/tensorflow/tf2onnx/resnet50v1.5): image recognition ResNet50 model from MLPerf. * [mobilenet_v2](/examples/tensorflow/tf2onnx/mobilenet_v2): image recognition mobilenet_v2 model. +* [vgg16](/examples/tensorflow/tf2onnx/vgg16): image recognition vgg16 model. * [ssd_mobilenet_v1](/examples/tensorflow/tf2onnx/ssd_mobilenet_v1): object detection ssd_mobilenet_v1 model. * [faster_rcnn_resnet50](/examples/tensorflow/tf2onnx/faster_rcnn_resnet50): object detection faster_rcnn_resnet50 model. diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md new file mode 100644 index 00000000000..4fc441d3ac3 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md @@ -0,0 +1,60 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow RestNet50_V1.0 FP32 model to ONNX FP32 model using Intel® Neural Compressor. + + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install requirements +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +### 2. Prepare Pretrained model + +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb +``` + +### 3. Prepare Dataset + +Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: + +```bash +ls /path/to/ImageNet +ILSVRC2012_img_val val.txt +``` + +# Run Command + +## Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./resnet50_fp32_pretrained_model.pb --output_model=./resnet50_v1.onnx +``` + +## Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./resnet50_fp32_pretrained_model.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_fp32_pretrained_model.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +## Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/main.py new file mode 100644 index 00000000000..e7c9da59897 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/main.py @@ -0,0 +1,123 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import numpy as np +import onnxruntime as ort + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + if postprocess: + output, label = postprocess((output, label)) + metric.update(output, label) + return metric.result() + +def eval_func_tf(model, dataloader, metric, postprocess=None): + from neural_compressor.model import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + if postprocess: + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include export and benchmark option.""" + if self.args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + config = TF2ONNXConfig(dtype="fp32") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + else: + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + + def eval(model): + if isinstance(model, str): + return eval_func_tf(model, dataloader, top1) + else: + return eval_func_onnx(model, dataloader, top1) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt new file mode 100644 index 00000000000..8d1eeb068a8 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt @@ -0,0 +1,8 @@ +tf2onnx==1.13.0 +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md new file mode 100644 index 00000000000..3900dc308c0 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md @@ -0,0 +1,72 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. + + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2 Prepare Pretrained model + +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb +``` + + +## 3. Prepare Dataset + +Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: + +```bash +ls /path/to/ImageNet +ILSVRC2012_img_val val.txt +``` +The Raw image dataset is used for running benchmarking for ONNX model. + +TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. The TF records format dataset is used for quantizing Tensorflow FP32 model to Tensorflow INT8 QDQ model. + +# Run Command + +## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model +```shell +bash run_tuning.sh --input_model=./resnet50_fp32_pretrained_model.pb --output_model=./resnet50_v1_int8.pb --dataset_location=/path/to/imagenet/ +``` + +## Run benchmark for Tensorflow INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./resnet50_v1_int8.pb --output_model=./resnet50_v1_int8.onnx +``` + +## Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py new file mode 100644 index 00000000000..d842bca8847 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py @@ -0,0 +1,161 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import numpy as np +import onnxruntime as ort + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + if postprocess: + output, label = postprocess((output, label)) + metric.update(output, label) + return metric.result() + +def eval_func_tf(model, dataloader, metric, postprocess=None): + from neural_compressor.model import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + if postprocess: + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion + from neural_compressor.utils.create_obj_from_config import create_dataloader + calib_dataloader_args = { + 'batch_size': 10, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224}}, + 'filter': None + } + calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) + eval_dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + op_name_list = { + 'resnet_model/dense/MatMul': + { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']}, + } + } + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], + outputs=['softmax_tensor'], + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.3), + op_name_list=op_name_list) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, eval_metric=top1) + q_model.save(self.args.output_graph) + + if self.args.export: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + config = TF2ONNXConfig(dtype="int8", inputs_as_nchw="input_tensor:0") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + else: + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + + def eval(model): + if isinstance(model, str): + return eval_func_tf(model, dataloader, top1) + else: + return eval_func_onnx(model, dataloader, top1) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt new file mode 100644 index 00000000000..9bdc24cb87b --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt @@ -0,0 +1,9 @@ +tensorflow=2.10.0 +intel-extension-for-tensorflow[cpu] +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' + +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_export.sh new file mode 100644 index 00000000000..1c6d1c908fe --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_export.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --export +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_tuning.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_tuning.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md b/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md index 62d5f41c726..4cd9568a640 100644 --- a/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md @@ -12,11 +12,18 @@ This document is used to show how to export Tensorflow VGG16 FP32 model to ONNX ```shell pip install neural-compressor ``` + ### Install requirements ```shell pip install -r requirements.txt ``` +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + ## 2. Prepare Model The vgg16 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py b/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py index 7520f37201c..aa3bee82034 100644 --- a/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py +++ b/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py @@ -103,6 +103,7 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + breakpoint() def eval(model): if isinstance(model, str): return eval_func_tf(model, eval_dataloader, top1, postprocess) diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt index 8d1eeb068a8..16783f94457 100644 --- a/examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt +++ b/examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt @@ -1,3 +1,4 @@ +tensorflow==2.11.0 tf2onnx==1.13.0 onnx==1.9.0; python_version < '3.10' onnx==1.12.0; python_version == '3.10' diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md b/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md index 647323d6292..60bf1dca502 100644 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md @@ -12,6 +12,7 @@ This document is used to show how to export Tensorflow INT8 QDQ model to ONNX IN ```shell pip install neural-compressor ``` + ### Install requirements The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. The Intel Extension for Tensorflow for Intel CPUs is installed as default. @@ -19,6 +20,12 @@ The Intel Extension for Tensorflow for Intel CPUs is installed as default. pip install -r requirements.txt ``` +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + ## 2. Prepare Model The vgg16 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt index 9bdc24cb87b..b964010af83 100644 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt +++ b/examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt @@ -1,5 +1,5 @@ -tensorflow=2.10.0 -intel-extension-for-tensorflow[cpu] +tensorflow==2.11.0 +tf2onnx==1.13.0 onnx==1.9.0; python_version < '3.10' onnx==1.12.0; python_version == '3.10' onnxruntime==1.10.0; python_version < '3.10' From 6f0bd90c23974b863abf098e80c32091d42213f7 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Thu, 2 Feb 2023 23:00:48 +0800 Subject: [PATCH 18/43] refine imagenet dataset for 4 examples Signed-off-by: Lv, Liang1 --- .../imagenet_prepare/build_imagenet_data.py | 0 .../download_and_convert_imagenet.sh | 0 .../imagenet_prepare/download_imagenet.sh | 0 .../imagenet_lsvrc_2015_synsets.txt | 0 .../imagenet_prepare/imagenet_metadata.txt | 0 .../mobilenet_v2/fp32_export/README.md | 22 +++-- .../tf2onnx/mobilenet_v2/fp32_export/main.py | 89 ++++++------------ .../mobilenet_v2/int8_export/README.md | 26 +++--- .../tf2onnx/mobilenet_v2/int8_export/main.py | 91 ++++++------------ ...dataset.sh => prepare_imagenet_dataset.sh} | 0 .../resnet50v1.0/fp32_export/README.md | 18 ++-- .../resnet50v1.0/int8_export/README.md | 21 +++-- .../tf2onnx/resnet50v1.0/int8_export/main.py | 1 - .../resnet50v1.5/fp32_export/README.md | 24 +++-- .../tf2onnx/resnet50v1.5/fp32_export/main.py | 86 ++++++----------- .../resnet50v1.5/int8_export/README.md | 28 +++--- .../tf2onnx/resnet50v1.5/int8_export/main.py | 92 +++++++------------ .../tf2onnx/vgg16/fp32_export/README.md | 11 +-- .../tf2onnx/vgg16/int8_export/README.md | 16 ++-- .../tf2onnx/vgg16/int8_export/main.py | 1 - 20 files changed, 212 insertions(+), 314 deletions(-) rename examples/tensorflow/tf2onnx/{vgg16 => }/imagenet_prepare/build_imagenet_data.py (100%) rename examples/tensorflow/tf2onnx/{vgg16 => }/imagenet_prepare/download_and_convert_imagenet.sh (100%) rename examples/tensorflow/tf2onnx/{vgg16 => }/imagenet_prepare/download_imagenet.sh (100%) rename examples/tensorflow/tf2onnx/{vgg16 => }/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt (100%) rename examples/tensorflow/tf2onnx/{vgg16 => }/imagenet_prepare/imagenet_metadata.txt (100%) rename examples/tensorflow/tf2onnx/{vgg16/prepare_dataset.sh => prepare_imagenet_dataset.sh} (100%) diff --git a/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/build_imagenet_data.py b/examples/tensorflow/tf2onnx/imagenet_prepare/build_imagenet_data.py similarity index 100% rename from examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/build_imagenet_data.py rename to examples/tensorflow/tf2onnx/imagenet_prepare/build_imagenet_data.py diff --git a/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_and_convert_imagenet.sh b/examples/tensorflow/tf2onnx/imagenet_prepare/download_and_convert_imagenet.sh similarity index 100% rename from examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_and_convert_imagenet.sh rename to examples/tensorflow/tf2onnx/imagenet_prepare/download_and_convert_imagenet.sh diff --git a/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_imagenet.sh b/examples/tensorflow/tf2onnx/imagenet_prepare/download_imagenet.sh similarity index 100% rename from examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/download_imagenet.sh rename to examples/tensorflow/tf2onnx/imagenet_prepare/download_imagenet.sh diff --git a/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt b/examples/tensorflow/tf2onnx/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt similarity index 100% rename from examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt rename to examples/tensorflow/tf2onnx/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt diff --git a/examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/imagenet_metadata.txt b/examples/tensorflow/tf2onnx/imagenet_prepare/imagenet_metadata.txt similarity index 100% rename from examples/tensorflow/tf2onnx/vgg16/imagenet_prepare/imagenet_metadata.txt rename to examples/tensorflow/tf2onnx/imagenet_prepare/imagenet_metadata.txt diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md index 67a64f3db0d..755ba78677e 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md @@ -68,14 +68,20 @@ We can get the pb file by convert the checkpoint file. ## 3. Prepare Dataset -Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. -```bash -ls /path/to/ImageNet -ILSVRC2012_img_val val.txt -``` + ```shell + cd examples/tensorflow/tf2onnx/ + # convert validation subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd mobilenet_v2/fp32_export + ``` # Run Command +Please note the dataset is TF records format for running benchmark. ## Export Tensorflow FP32 model to ONNX FP32 model ```shell @@ -87,11 +93,9 @@ bash run_export.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobil bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` -Please note this dataset is TF records format. ## Run benchmark for ONNX FP32 model ```shell -bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 -bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 +bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` -Please note this dataset is Raw image dataset. diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py index e231b948224..69561478cfc 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py @@ -26,13 +26,14 @@ tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func_onnx(model, dataloader, metric, postprocess): +def eval_func_onnx(model, dataloader, metric, postprocess=None): metric.reset() sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) input_names = [i.name for i in sess.get_inputs()] for input_data, label in dataloader: output = sess.run(None, dict(zip(input_names, [input_data]))) - output, label = postprocess((output, label)) + if postprocess: + output, label = postprocess((output, label)) metric.update(output, label) return metric.result() @@ -78,72 +79,40 @@ def run(self): from neural_compressor.model import Model from neural_compressor.config import TF2ONNXConfig inc_model = Model(self.args.input_graph) - config = TF2ONNXConfig(dtype="fp32", input_names='input[-1,224,224,3]', - inputs_as_nchw="input:0") + config = TF2ONNXConfig(dtype="fp32", input_names='input[-1,224,224,3]') inc_model.export(self.args.output_graph, config) if self.args.benchmark: - # ONNX FP32 Benchmark if self.args.input_graph.endswith('.onnx'): model = onnx.load(self.args.input_graph) - data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') - label_path = os.path.join(self.args.dataset_location, 'val.txt') - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, - 'transform': {'Rescale': {}, - 'Resize': {'size':256}, - 'CenterCrop': {'size': 224}, - 'Normalize': {'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225]}, - 'Cast': {'dtype': 'float32'}, - 'Transpose': {'perm': [2, 0, 1]}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - - from neural_compressor.metric import GeneralTopK - top1 = GeneralTopK(k=1) - from neural_compressor.data.transforms.imagenet_transform import LabelShift - postprocess = LabelShift(label_shift=-1) - def eval(onnx_model): - return eval_func_onnx(onnx_model, dataloader, top1, postprocess) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - # Tensorflow FP32 Benchmark else: - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, - 'transform': {'BilinearImagenet': {'height': 224, 'width': 224}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - def eval(model): + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, + 'transform': {'BilinearImagenet': {'height': 224, 'width': 224}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + def eval(model): + if isinstance(model, str): return eval_func_tf(model, dataloader, top1) + else: + return eval_func_onnx(model, dataloader, top1) - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(self.args.input_graph, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(self.args.input_graph) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(self.args.input_graph, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md index bc57ea3cfb0..9f6e9374c55 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md @@ -69,30 +69,31 @@ We can get the pb file by convert the checkpoint file. ## 3. Prepare Dataset -Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. -```bash -ls /path/to/ImageNet -ILSVRC2012_img_val val.txt -``` -The Raw image dataset is used for running benchmarking for ONNX model. - -TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. The TF records format dataset is used for quantizing Tensorflow FP32 model to Tensorflow INT8 QDQ model. + ```shell + cd examples/tensorflow/tf2onnx/ + # convert validation subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd mobilenet_v2/int8_export + ``` # Run Command +Please note the dataset is TF records format for running quantization and benchmark. ## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model ```shell bash run_tuning.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2_int8.pb --dataset_location=/path/to/imagenet/ ``` -Please note this dataset is TF records format. ## Run benchmark for Tensorflow INT8 QDQ model ```shell bash run_benchmark.sh --input_model=./mobilenet_v2_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 bash run_benchmark.sh --input_model=./mobilenet_v2_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` -Please note this dataset is Raw image dataset. ## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model ```shell @@ -101,7 +102,6 @@ bash run_export.sh --input_model=./mobilenet_v2_int8.pb --output_model=./mobilen ## Run benchmark for ONNX INT8 QDQ model ```shell -bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 -bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 +bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` -Please note this dataset is Raw image dataset. diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py index 57d803ed288..b60042374ca 100644 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py +++ b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py @@ -26,13 +26,14 @@ tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func_onnx(model, dataloader, metric, postprocess): +def eval_func_onnx(model, dataloader, metric, postprocess=None): metric.reset() sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) input_names = [i.name for i in sess.get_inputs()] for input_data, label in dataloader: output = sess.run(None, dict(zip(input_names, [input_data]))) - output, label = postprocess((output, label)) + if postprocess: + output, label = postprocess((output, label)) metric.update(output, label) return metric.result() @@ -96,72 +97,40 @@ def run(self): from neural_compressor.model import Model from neural_compressor.config import TF2ONNXConfig inc_model = Model(self.args.input_graph) - config = TF2ONNXConfig(dtype="int8", input_names='input[-1,224,224,3]', - inputs_as_nchw="input:0") + config = TF2ONNXConfig(dtype="int8", input_names='input[-1,224,224,3]') inc_model.export(self.args.output_graph, config) if self.args.benchmark: - # ONNX INT8 Benchmark if self.args.input_graph.endswith('.onnx'): model = onnx.load(self.args.input_graph) - data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') - label_path = os.path.join(self.args.dataset_location, 'val.txt') - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, - 'transform': {'Rescale': {}, - 'Resize': {'size':256}, - 'CenterCrop': {'size': 224}, - 'Normalize': {'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225]}, - 'Cast': {'dtype': 'float32'}, - 'Transpose': {'perm': [2, 0, 1]}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - - from neural_compressor.metric import GeneralTopK - top1 = GeneralTopK(k=1) - from neural_compressor.data.transforms.imagenet_transform import LabelShift - postprocess = LabelShift(label_shift=-1) - def eval(onnx_model): - return eval_func_onnx(onnx_model, dataloader, top1, postprocess) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - # Tensorflow INT8 Benchmark else: - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, - 'transform': {'BilinearImagenet': {'height': 224, 'width': 224}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - def eval(model): - return eval_func_tf(model, dataloader, top1) + model = self.args.input_graph - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(self.args.input_graph, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(self.args.input_graph) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, + 'transform': {'BilinearImagenet': {'height': 224, 'width': 224}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + def eval(model): + if isinstance(model, str): + return eval_func_tf(model, dataloader, top1) + else: + return eval_func_onnx(model, dataloader, top1) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(self.args.input_graph, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() diff --git a/examples/tensorflow/tf2onnx/vgg16/prepare_dataset.sh b/examples/tensorflow/tf2onnx/prepare_imagenet_dataset.sh similarity index 100% rename from examples/tensorflow/tf2onnx/vgg16/prepare_dataset.sh rename to examples/tensorflow/tf2onnx/prepare_imagenet_dataset.sh diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md index 4fc441d3ac3..fcc1c9de9ae 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md @@ -33,14 +33,20 @@ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resne ### 3. Prepare Dataset -Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: - -```bash -ls /path/to/ImageNet -ILSVRC2012_img_val val.txt -``` + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/tf2onnx/ + # convert validation subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd resnet50_v1.0/fp32_export + ``` # Run Command +Please note the dataset is TF records format for running benchmark. ## Export Tensorflow FP32 model to ONNX FP32 model ```shell diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md index 3900dc308c0..5875dc21f5e 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md @@ -36,17 +36,20 @@ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resne ## 3. Prepare Dataset -Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: - -```bash -ls /path/to/ImageNet -ILSVRC2012_img_val val.txt -``` -The Raw image dataset is used for running benchmarking for ONNX model. - -TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. The TF records format dataset is used for quantizing Tensorflow FP32 model to Tensorflow INT8 QDQ model. + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/tf2onnx/ + # convert validation subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd resnet50_v1.0/int8_export + ``` # Run Command +Please note the dataset is TF records format for running quantization and benchmark. ## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model ```shell diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py index d842bca8847..a8564cc3e84 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py @@ -106,7 +106,6 @@ def run(self): } conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], outputs=['softmax_tensor'], - accuracy_criterion = AccuracyCriterion(tolerable_loss=0.3), op_name_list=op_name_list) from neural_compressor.metric import TensorflowTopK top1 = TensorflowTopK(k=1) diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md index 139c6b55c6d..f25ce9a572d 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md @@ -34,14 +34,20 @@ wget https://zenodo.org/record/2535873/files/resnet50_v1.pb ## 3. Prepare Dataset -Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: - -```bash -ls /path/to/ImageNet -ILSVRC2012_img_val val.txt -``` + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/tf2onnx/ + # convert validation subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd resnet50_v1.5/fp32_export + ``` # Run Command +Please note the dataset is TF records format for running benchmark. ## Export Tensorflow FP32 model to ONNX FP32 model ```shell @@ -53,11 +59,9 @@ bash run_export.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1.o bash run_benchmark.sh --input_model=./resnet50_v1.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 bash run_benchmark.sh --input_model=./resnet50_v1.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` -Please note this dataset is TF records format. ## Run benchmark for ONNX FP32 model ```shell -bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 -bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` -Please note this dataset is Raw image dataset. diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py index bc33067cba1..77a34568144 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py @@ -26,13 +26,14 @@ tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func_onnx(model, dataloader, metric, postprocess): +def eval_func_onnx(model, dataloader, metric, postprocess=None): metric.reset() sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) input_names = [i.name for i in sess.get_inputs()] for input_data, label in dataloader: output = sess.run(None, dict(zip(input_names, [input_data]))) - output, label = postprocess((output, label)) + if postprocess: + output, label = postprocess((output, label)) metric.update(output[1], label) return metric.result() @@ -78,70 +79,43 @@ def run(self): from neural_compressor.model import Model from neural_compressor.config import TF2ONNXConfig inc_model = Model(self.args.input_graph) - config = TF2ONNXConfig(dtype="fp32", inputs_as_nchw="input_tensor:0") + config = TF2ONNXConfig(dtype="fp32") inc_model.export(self.args.output_graph, config) if self.args.benchmark: - # ONNX FP32 Benchmark if self.args.input_graph.endswith('.onnx'): model = onnx.load(self.args.input_graph) - data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') - label_path = os.path.join(self.args.dataset_location, 'val.txt') + else: + model = self.args.input_graph - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, - 'transform': {'ResizeWithAspectRatio': {'height': 224, 'width': 224}, - 'CenterCrop': {'size': 224}, - 'Normalize': {'mean': [123.68, 116.78, 103.94]}, - 'Cast': {'dtype': 'float32'}, - 'Transpose': {'perm': [2, 0, 1]}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import GeneralTopK - top1 = GeneralTopK(k=1) - from neural_compressor.data.transforms.imagenet_transform import LabelShift - postprocess = LabelShift(label_shift=-1) - def eval(onnx_model): - return eval_func_onnx(onnx_model, dataloader, top1, postprocess) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - # Tensorflow FP32 Benchmark - else: - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, - 'transform': {'ResizeCropImagenet': {'height': 224, 'width': 224, - 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - def eval(model): + def eval(model): + if isinstance(model, str): return eval_func_tf(model, dataloader, top1) + else: + return eval_func_onnx(model, dataloader, top1) - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(self.args.input_graph, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(self.args.input_graph) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md index 756fe3c48ae..69811e0e347 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md @@ -36,30 +36,31 @@ wget https://zenodo.org/record/2535873/files/resnet50_v1.pb ## 3. Prepare Dataset -Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/ImageNet. The dir include below folder and files: - -```bash -ls /path/to/ImageNet -ILSVRC2012_img_val val.txt -``` -The Raw image dataset is used for running benchmarking for ONNX model. - -TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. The TF records format dataset is used for quantizing Tensorflow FP32 model to Tensorflow INT8 QDQ model. + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/tf2onnx/ + # convert validation subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd resnet50_v1.5/int8_export + ``` # Run Command +Please note the dataset is TF records format for running quantization and benchmark. ## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model ```shell bash run_tuning.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1_int8.pb --dataset_location=/path/to/imagenet/ ``` -Please note this dataset is TF records format. ## Run benchmark for Tensorflow INT8 model ```shell bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` -Please note this dataset is TF records format. ## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model ```shell @@ -68,7 +69,6 @@ bash run_export.sh --input_model=./resnet50_v1_int8.pb --output_model=./resnet50 ## Run benchmark for ONNX INT8 QDQ model ```shell -bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 -bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 +bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` -Please note this dataset is Raw image dataset. diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py index 6ccb0dd7066..511abcc0901 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py @@ -26,13 +26,14 @@ tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func_onnx(model, dataloader, metric, postprocess): +def eval_func_onnx(model, dataloader, metric, postprocess=None): metric.reset() sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) input_names = [i.name for i in sess.get_inputs()] for input_data, label in dataloader: output = sess.run(None, dict(zip(input_names, [input_data]))) - output, label = postprocess((output, label)) + if postprocess: + output, label = postprocess((output, label)) metric.update(output, label) return metric.result() @@ -116,70 +117,43 @@ def run(self): from neural_compressor.model import Model from neural_compressor.config import TF2ONNXConfig inc_model = Model(self.args.input_graph) - config = TF2ONNXConfig(dtype="int8", inputs_as_nchw="input_tensor:0") + config = TF2ONNXConfig(dtype="int8") inc_model.export(self.args.output_graph, config) if self.args.benchmark: - # ONNX INT8 Benchmark if self.args.input_graph.endswith('.onnx'): model = onnx.load(self.args.input_graph) - data_path = os.path.join(self.args.dataset_location, 'ILSVRC2012_img_val') - label_path = os.path.join(self.args.dataset_location, 'val.txt') - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImagenetRaw": {'data_path':data_path, 'image_list':label_path}}, - 'transform': {'ResizeWithAspectRatio': {'height': 224, 'width': 224}, - 'CenterCrop': {'size': 224}, - 'Normalize': {'mean': [123.68, 116.78, 103.94]}, - 'Cast': {'dtype': 'float32'}, - 'Transpose': {'perm': [2, 0, 1]}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - - from neural_compressor.metric import GeneralTopK - top1 = GeneralTopK(k=1) - from neural_compressor.data.transforms.imagenet_transform import LabelShift - postprocess = LabelShift(label_shift=-1) - def eval(onnx_model): - return eval_func_onnx(onnx_model, dataloader, top1, postprocess) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - # Tensorflow INT8 Benchmark else: - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, - 'transform': {'ResizeCropImagenet': {'height': 224, 'width': 224, - 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - def eval(model): - return eval_func_tf(model, dataloader, top1) + model = self.args.input_graph - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(self.args.input_graph, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(self.args.input_graph) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + + def eval(model): + if isinstance(model, str): + return eval_func_tf(model, dataloader, top1) + else: + return eval_func_onnx(model, dataloader, top1) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md b/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md index 4cd9568a640..fed776aa9d0 100644 --- a/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md @@ -70,15 +70,16 @@ We can get the pb file by convert the checkpoint file. We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/tensorflow/tf2onnx/vgg16 + cd examples/tensorflow/tf2onnx/ # convert validation subset - bash prepare_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset - bash prepare_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd fp32_export + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd vgg16/fp32_export ``` ## Run Command +Please note the dataset is TF records format for running benchmark. ### Export Tensorflow FP32 model to ONNX FP32 model ```shell @@ -90,11 +91,9 @@ bash run_export.sh --input_model=./frozen_vgg16.pb --output_model=./vgg_16.onnx bash run_benchmark.sh --input_model=./frozen_vgg16.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 bash run_benchmark.sh --input_model=./frozen_vgg16.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` -Please note this dataset is TF records format. ### Run benchmark for ONNX FP32 model ```shell bash run_benchmark.sh --input_model=./vgg_16.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 bash run_benchmark.sh --input_model=./vgg_16.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 ``` -Please note this dataset is TF records format. diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md b/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md index 60bf1dca502..6bd25e9a383 100644 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md @@ -72,21 +72,21 @@ We can get the pb file by convert the checkpoint file. We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/tensorflow/tf2onnx/vgg16 + cd examples/tensorflow/tf2onnx/ # convert validation subset - bash prepare_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset - bash prepare_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd int8_export + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd vgg16/int8_export ``` ## Run Command +Please note the dataset is TF records format for running quantization and benchmark. ### Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model ```shell -bash run_tuning.sh --input_model=./frozen_vgg16 --output_model=./frozen_vgg16_int8.pb --dataset_location=/path/to/imagenet/ +bash run_tuning.sh --input_model=./frozen_vgg16.pb --output_model=./frozen_vgg16_int8.pb --dataset_location=/path/to/imagenet/ ``` -Please note this dataset is TF records format. ### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model ```shell @@ -98,11 +98,9 @@ bash run_export.sh --input_model=./frozen_vgg16_int8.pb --output_model=./frozen_ bash run_benchmark.sh --input_model=./rozen_vgg16_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 bash run_benchmark.sh --input_model=./rozen_vgg16_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` -Please note this dataset is TF records format. ### Run benchmark for ONNX INT8 QDQ model ```shell bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 -``` -Please note this dataset is TF records format. \ No newline at end of file +``` \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py b/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py index 4566fe912a6..80e111258c8 100644 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py +++ b/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py @@ -120,7 +120,6 @@ def run(self): } conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], outputs=['softmax_tensor'], - accuracy_criterion = AccuracyCriterion(tolerable_loss=0.3), op_name_list=op_name_list) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_func=eval_func_tf) From 61cf0c437ed40981d759c2d2c335f7da8ce18410 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Thu, 2 Feb 2023 23:47:14 +0800 Subject: [PATCH 19/43] update coco dataset for ssd_mobilenet_v1 Signed-off-by: Lv, Liang1 --- .../tf2onnx/prepare_coco_dataset.sh | 136 ++++++++++++++++++ .../ssd_mobilenet_v1/fp32_export/README.md | 21 ++- .../ssd_mobilenet_v1/fp32_export/main.py | 75 ++++------ .../ssd_mobilenet_v1/int8_export/README.md | 25 +++- .../ssd_mobilenet_v1/int8_export/main.py | 84 ++++------- 5 files changed, 226 insertions(+), 115 deletions(-) create mode 100644 examples/tensorflow/tf2onnx/prepare_coco_dataset.sh diff --git a/examples/tensorflow/tf2onnx/prepare_coco_dataset.sh b/examples/tensorflow/tf2onnx/prepare_coco_dataset.sh new file mode 100644 index 00000000000..fea0ff1c373 --- /dev/null +++ b/examples/tensorflow/tf2onnx/prepare_coco_dataset.sh @@ -0,0 +1,136 @@ +!/bin/bash +# set -x + +DATA_DIR="${PWD}/data" +DATA_NAME="val2017" +DATA_URL_LIST='http://images.cocodataset.org/zips/val2017.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip' +PACKAGES_LIST='val2017.zip annotations_trainval2017.zip' +VAL_IMAGE_DIR=$DATA_DIR/val2017 +TRAIN_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json +VAL_ANNOTATIONS_FILE=$DATA_DIR/annotations/instances_val2017.json +TESTDEV_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json +OUTPUT_DIR=$DATA_DIR + +help() +{ + cat <<- EOF + + Desc: Prepare dataset for Tensorflow COCO object detection. + + -h --help help info + + --dataset_location set dataset location, default is ./data + +EOF + exit 0 +} + +function main { + init_params "$@" + download_dataset + convert_to_tf_record +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --dataset_location=*) + DATA_DIR=$(echo "$var" |cut -f2 -d=) + ;; + -h|--help) help + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + +# removes files that will not be used anymore +function remove_zipped_packages { + for package in $PACKAGES_LIST; do + rm "$package" + done +} + +function download_tf_models_repo { + if [ ! -d models ]; then + git clone https://github.com/tensorflow/models.git + fi + cd models || exit + git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 + cd .. +} + +function divide_tf_records_by_dataset { + if [ ! -d "${DATA_DIR}/tf_test2017" ]; then + mkdir "${DATA_DIR}/tf_test2017" + fi + if [ ! -d "${DATA_DIR}/tf_train2017" ]; then + mkdir "${DATA_DIR}/tf_train2017" + fi + if [ ! -d "${DATA_DIR}/tf_val2017" ]; then + mkdir "${DATA_DIR}/tf_val2017" + fi + mv ${DATA_DIR}/coco_testdev.record* ${DATA_DIR}/tf_test2017 + mv ${DATA_DIR}/coco_train.record* ${DATA_DIR}/tf_train2017 + mv ${DATA_DIR}/coco_val.record* ${DATA_DIR}/tf_val2017 +} + +function convert { + cd models/research + protoc object_detection/protos/*.proto --python_out=. + export PYTHONPATH=$PYTHONPATH:$(pwd) + export PYTHONPATH=$PYTHONPATH:$(pwd)/slim + python ./object_detection/dataset_tools/create_coco_tf_record.py --logtostderr \ + --train_image_dir=empty_dir \ + --val_image_dir="${VAL_IMAGE_DIR}" \ + --test_image_dir=empty_dir \ + --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \ + --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \ + --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \ + --output_dir="${OUTPUT_DIR}" +} + +function convert_to_tf_record { + download_tf_models_repo + convert + divide_tf_records_by_dataset +} + +# download_dataset +function download_dataset { + if [ ! -d "${DATA_DIR}" ]; then + mkdir "${DATA_DIR}" + fi + + cd "${DATA_DIR}" || exit + if [ ! -f "${VAL_IMAGE_DIR}" ]; then + + for dataset_dowload_link in $DATA_URL_LIST; do + wget "$dataset_dowload_link" + done + for package in $PACKAGES_LIST; do + unzip -o "$package" + done + remove_zipped_packages + if [ ! -d empty_dir ]; then + mkdir empty_dir + fi + + cd annotations || exit + echo "{ \"images\": {}, \"categories\": {}}" > empty.json + cd .. + else + echo "Dataset ${DATA_NAME} is exist!" + fi + + cd ../ +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md index c597c1a77c2..c48d59b35a2 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md @@ -36,10 +36,27 @@ tar -xvf $MODEL.tar.gz ## 3. Prepare Dataset +### Automatic dataset download + +> **_Note: `prepare_coco_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_coco_dataset.sh` script located in `examples/tensorflow/tf2onnx`. + +Usage: +```shell +cd examples/tensorflow/tf2onnx/ +bash prepare_coco_dataset.sh +cd ssd_mobilenet_v1/fp32_export +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). -The dataset can be converted into tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. # Run Command +Please note the dataset is TF records format for running benchmark. ## Export Tensorflow FP32 model to ONNX FP32 model ```shell @@ -51,11 +68,9 @@ bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_mod bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` -Please note this dataset is TF records format. ## Run benchmark for ONNX FP32 model ```shell bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` -Please note this dataset is Raw Coco dataset. diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py index 82b8cc599f1..dedd3db6ee3 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py @@ -36,7 +36,7 @@ def eval_func_onnx(model, dataloader, metric, postprocess=None): labels = [labels] if len_inputs == 1: ort_inputs.update( - inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + inputs if isinstance(inputs, dict) else {inputs_names[0]: np.array(inputs,dtype=np.uint8)} ) else: assert len_inputs == len(inputs), \ @@ -118,64 +118,39 @@ def run(self): inc_model.export(self.args.output_graph, config) if self.args.benchmark: - # ONNX FP32 Benchmark if self.args.input_graph.endswith('.onnx'): model = onnx.load(self.args.input_graph) - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 300}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - def eval(onnx_model): - return eval_func_onnx(onnx_model, dataloader, mAP2) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - # Tensorflow FP32 Benchmark else: - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { 'batch_size': self.args.batch_size, 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, 'transform': {'Resize': {'size': 300}}, 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - def eval(model): + } + dataloader = create_dataloader('tensorflow', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + + def eval(model): + if isinstance(model, str): return eval_func_tf(model, dataloader, mAP2) - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig( - inputs=["image_tensor"], - outputs=["num_detections", "detection_boxes", "detection_scores", "detection_classes"], - warmup=10, - iteration=100, - cores_per_instance=4, - num_of_instance=7) - fit(self.args.input_graph, conf, b_dataloader=dataloader) else: - accuracy = eval(self.args.input_graph) - print('Batch size = %d' % self.args.batch_size) - print("Accuracy: %.5f" % accuracy) + return eval_func_onnx(model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md index d89a887cd8c..d5b2066a160 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md @@ -36,10 +36,27 @@ tar -xvf $MODEL.tar.gz ## 3. Prepare Dataset +### Automatic dataset download + +> **_Note: `prepare_coco_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_coco_dataset.sh` script located in `examples/tensorflow/tf2onnx`. + +Usage: +```shell +cd examples/tensorflow/tf2onnx/ +bash prepare_coco_dataset.sh +cd ssd_mobilenet_v1/int8_export +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). -The dataset can be converted into tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. # Run Command +Please note the dataset is TF records format for running benchmark. ## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model ```shell @@ -51,7 +68,6 @@ bash run_tuning.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_mod bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` -Please note this dataset is TF records format. ## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model ```shell @@ -60,7 +76,6 @@ bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --ou ## Run benchmark for ONNX INT8 QDQ model ```shell -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset_raw/ --batch_size=16 -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset_raw/ --batch_size=16 +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` -Please note this dataset is Raw Coco dataset. diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py index 5714b8d6d9b..10e0cb1e754 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py @@ -29,11 +29,6 @@ def eval_func_onnx(model, dataloader, metric, postprocess=None): metric.reset() session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - # input_names = [i.name for i in sess.get_inputs()] - # for input_data, label in dataloader: - # output = sess.run(None, dict(zip(input_names, [input_data]))) - # metric.update(output, label) - # return metric.result() ort_inputs = {} len_inputs = len(session.get_inputs()) inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] @@ -42,7 +37,7 @@ def eval_func_onnx(model, dataloader, metric, postprocess=None): labels = [labels] if len_inputs == 1: ort_inputs.update( - inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + inputs if isinstance(inputs, dict) else {inputs_names[0]: np.array(inputs,dtype=np.uint8)} ) else: assert len_inputs == len(inputs), \ @@ -134,7 +129,7 @@ def run(self): eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[10, 50, 100, 200], inputs=['image_tensor'], outputs=['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes'], - accuracy_criterion = AccuracyCriterion(tolerable_loss=0.3)) + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.1)) from neural_compressor.metric import COCOmAPv2 output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) @@ -152,65 +147,40 @@ def run(self): inc_model.export(self.args.output_graph, config) if self.args.benchmark: - # ONNX INT8 Benchmark if self.args.input_graph.endswith('.onnx'): model = onnx.load(self.args.input_graph) - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 300}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - def eval(onnx_model): - return eval_func_onnx(onnx_model, dataloader, mAP2) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - # Tensorflow INT8 Benchmark else: - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { 'batch_size': self.args.batch_size, 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, 'transform': {'Resize': {'size': 300}}, 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - def eval(model): + } + dataloader = create_dataloader('tensorflow', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + + def eval(model): + if isinstance(model, str): return eval_func_tf(model, dataloader, mAP2) - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig( - inputs=["image_tensor"], - outputs=["num_detections", "detection_boxes", "detection_scores", "detection_classes"], - warmup=10, - iteration=100, - cores_per_instance=4, - num_of_instance=7) - fit(self.args.input_graph, conf, b_dataloader=dataloader) else: - accuracy = eval(self.args.input_graph) - print('Batch size = %d' % self.args.batch_size) - print("Accuracy: %.5f" % accuracy) - + return eval_func_onnx(model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() From a7d841740b36c7ce978b09ff708014afce99bacc Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Fri, 3 Feb 2023 09:41:55 +0800 Subject: [PATCH 20/43] remove redundant eval_dataloader in vgg16 Signed-off-by: zehao-intel --- examples/tensorflow/tf2onnx/vgg16/int8_export/main.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py b/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py index 80e111258c8..1ae9aa666fb 100644 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py +++ b/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py @@ -103,14 +103,6 @@ def run(self): 'filter': None } calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) - eval_dataloader_args = { - 'batch_size': 32, - 'dataset': {"ImageRecord": {'root':args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) op_name_list = { 'resnet_model/dense/MatMul': { From 75db36936a180b79f1cd99c9cdfd8798f129f659 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Fri, 3 Feb 2023 10:12:56 +0800 Subject: [PATCH 21/43] refine faster_rcnn_resnet50 example Signed-off-by: Lv, Liang1 --- .../fp32_export/README.md | 21 ++++- .../faster_rcnn_resnet50/fp32_export/main.py | 75 ++++++----------- .../int8_export/README.md | 25 ++++-- .../faster_rcnn_resnet50/int8_export/main.py | 81 ++++++------------- .../ssd_mobilenet_v1/int8_export/README.md | 2 +- 5 files changed, 90 insertions(+), 114 deletions(-) diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md index 51181da12f2..c398a72165f 100644 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md @@ -35,10 +35,27 @@ tar -xvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz ## 3. Prepare Dataset +### Automatic dataset download + +> **_Note: `prepare_coco_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_coco_dataset.sh` script located in `examples/tensorflow/tf2onnx`. + +Usage: +```shell +cd examples/tensorflow/tf2onnx/ +bash prepare_coco_dataset.sh +cd faster_rcnn_resnet50/fp32_export +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). -The dataset can be converted into tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. # Run Command +Please note the dataset is TF records format for running benchmark. ## Export Tensorflow FP32 model to ONNX FP32 model ```shell @@ -50,11 +67,9 @@ bash run_export.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inferen bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` -Please note this dataset is TF records format. ## Run benchmark for ONNX FP32 model ```shell bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` -Please note this dataset is Raw Coco dataset. diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py index 55bb1a378cb..a2d5990f1d3 100644 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py @@ -36,7 +36,7 @@ def eval_func_onnx(model, dataloader, metric, postprocess=None): labels = [labels] if len_inputs == 1: ort_inputs.update( - inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + inputs if isinstance(inputs, dict) else {inputs_names[0]: np.array(inputs,dtype=np.uint8)} ) else: assert len_inputs == len(inputs), \ @@ -118,64 +118,39 @@ def run(self): inc_model.export(self.args.output_graph, config) if self.args.benchmark: - # ONNX FP32 Benchmark if self.args.input_graph.endswith('.onnx'): model = onnx.load(self.args.input_graph) - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 600}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - def eval(onnx_model): - return eval_func_onnx(onnx_model, dataloader, mAP2) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - # Tensorflow FP32 Benchmark else: - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { 'batch_size': self.args.batch_size, 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, 'transform': {'Resize': {'size': 600}}, 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - def eval(model): + } + dataloader = create_dataloader('tensorflow', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + + def eval(model): + if isinstance(model, str): return eval_func_tf(model, dataloader, mAP2) - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig( - inputs=["image_tensor"], - outputs=["num_detections", "detection_boxes", "detection_scores", "detection_classes"], - warmup=10, - iteration=100, - cores_per_instance=4, - num_of_instance=7) - fit(self.args.input_graph, conf, b_dataloader=dataloader) else: - accuracy = eval(self.args.input_graph) - print('Batch size = %d' % self.args.batch_size) - print("Accuracy: %.5f" % accuracy) + return eval_func_onnx(model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md index 237f369b5eb..846a87fb238 100644 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md @@ -35,10 +35,27 @@ tar -xvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz ## 3. Prepare Dataset +### Automatic dataset download + +> **_Note: `prepare_coco_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_coco_dataset.sh` script located in `examples/tensorflow/tf2onnx`. + +Usage: +```shell +cd examples/tensorflow/tf2onnx/ +bash prepare_coco_dataset.sh +cd faster_rcnn_resnet50/int8_export +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). -The dataset can be converted into tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. # Run Command +Please note the dataset is TF records format for running quantization and benchmark. ## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model ```shell @@ -50,7 +67,6 @@ bash run_tuning.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inferen bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` -Please note this dataset is TF records format. ## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model ```shell @@ -59,7 +75,6 @@ bash run_export.sh --input_model=./faster_rcnn_resnet50_coco_int8.pb --output_mo ## Run benchmark for ONNX INT8 QDQ model ```shell -bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset_raw/ --batch_size=16 -bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset_raw/ --batch_size=16 +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 ``` -Please note this dataset is Raw Coco dataset. diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py index eb757a414cd..80ffc2a910f 100644 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py +++ b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py @@ -29,11 +29,6 @@ def eval_func_onnx(model, dataloader, metric, postprocess=None): metric.reset() session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - # input_names = [i.name for i in sess.get_inputs()] - # for input_data, label in dataloader: - # output = sess.run(None, dict(zip(input_names, [input_data]))) - # metric.update(output, label) - # return metric.result() ort_inputs = {} len_inputs = len(session.get_inputs()) inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] @@ -42,7 +37,7 @@ def eval_func_onnx(model, dataloader, metric, postprocess=None): labels = [labels] if len_inputs == 1: ort_inputs.update( - inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + inputs if isinstance(inputs, dict) else {inputs_names[0]: np.array(inputs,dtype=np.uint8)} ) else: assert len_inputs == len(inputs), \ @@ -152,64 +147,40 @@ def run(self): inc_model.export(self.args.output_graph, config) if self.args.benchmark: - # ONNX INT8 Benchmark if self.args.input_graph.endswith('.onnx'): model = onnx.load(self.args.input_graph) - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"COCORaw": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 600}}, - 'filter': None - } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) - - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - def eval(onnx_model): - return eval_func_onnx(onnx_model, dataloader, mAP2) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - # Tensorflow INT8 Benchmark else: - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { 'batch_size': self.args.batch_size, 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, 'transform': {'Resize': {'size': 600}}, 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - def eval(model): + } + dataloader = create_dataloader('tensorflow', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + + def eval(model): + if isinstance(model, str): return eval_func_tf(model, dataloader, mAP2) - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig( - inputs=["image_tensor"], - outputs=["num_detections", "detection_boxes", "detection_scores", "detection_classes"], - warmup=10, - iteration=100, - cores_per_instance=4, - num_of_instance=7) - fit(self.args.input_graph, conf, b_dataloader=dataloader) else: - accuracy = eval(self.args.input_graph) - print('Batch size = %d' % self.args.batch_size) - print("Accuracy: %.5f" % accuracy) + return eval_func_onnx(model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + if __name__ == "__main__": evaluate_opt_graph = eval_classifier_optimized_graph() diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md index d5b2066a160..83d4edbabf4 100644 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md @@ -56,7 +56,7 @@ tensorflow records using the `https://github.com/tensorflow/models.git` dedicate Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). # Run Command -Please note the dataset is TF records format for running benchmark. +Please note the dataset is TF records format for running quantization and benchmark. ## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model ```shell From a121a32ec8afe3e0f3d21686c2b3dda16ccfa8fb Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Fri, 3 Feb 2023 20:29:30 +0800 Subject: [PATCH 22/43] fix pylint issue Signed-off-by: Lv, Liang1 --- .../adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py | 4 ++-- neural_compressor/adaptor/tf_utils/tf2onnx_converter.py | 3 ++- neural_compressor/experimental/export/tf2onnx.py | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py index 86bb26de3bc..3f27c8f4dfe 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py @@ -845,7 +845,7 @@ def insert_new_node_on_input(self, node, op_type, input_name, name=None, domain= def add_graph_input(self, name, dtype=None, shape=None): """Add placeholder node as graph's input. Order matters only for subgraph. - Placeholders in original graph are assumed for main graph, order not matters. + Placeholders in original graph are assumed for main graph, order not matters. """ if dtype is None: dtype = self.get_dtype(name) @@ -1104,7 +1104,7 @@ def delete_unused_nodes(self, outputs_name): self.reset_nodes(related_nodes) def safe_to_remove_nodes(self, to_delete): - """ List of nodes that safe to delete (i.e. outputs not consumed by other nodes.)""" + """List of nodes that safe to delete, i.e. outputs not consumed by other nodes.""" safe_to_remove = [] delete_set = set(to_delete) for n in delete_set: diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py index 248cc30aa9d..032cea32816 100644 --- a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -42,7 +42,9 @@ def __init__(self, model, input_names, output_names, shape_override, inputs_as_n model (graphdef): tensorflow QDQ graphdef input_names (list, optional): input names. Defaults to None. output_names (list, optional): output names. Defaults to None. + shape_override: dict with inputs that override the shapes given by tensorflow. opset_version (int, optional): opset version. Defaults to 14. + inputs_as_nchw (list, optional): transpose the input. Defaults to None. """ graph_def = self.tf_graph_optimize(model) @@ -137,7 +139,6 @@ def tf_graph_optimize(self, model): def transpose_inputs(self, ctx, inputs_as_nchw): """Insert a transpose from NHWC to NCHW on model input on users request.""" - ops = [] for node in ctx.get_nodes(): for _, output_name in enumerate(node.output): diff --git a/neural_compressor/experimental/export/tf2onnx.py b/neural_compressor/experimental/export/tf2onnx.py index ae59356bb78..9a6cb7abc14 100644 --- a/neural_compressor/experimental/export/tf2onnx.py +++ b/neural_compressor/experimental/export/tf2onnx.py @@ -22,7 +22,7 @@ def _split_nodename_and_shape(name): - """input name with shape into name and shape.""" + """Split input name with shape into name and shape.""" # pattern for a node name inputs = [] shapes = {} @@ -57,6 +57,7 @@ def tf_to_fp32_onnx( opset_version (int, optional): opset version. Defaults to 14. input_names (list, optional): input names. Defaults to None. output_names (list, optional): output names. Defaults to None. + inputs_as_nchw (list, optional): transpose the input. Defaults to None. """ shape_override = None if isinstance(input_names, str): @@ -89,6 +90,7 @@ def tf_to_int8_onnx( opset_version (int, optional): opset version. Defaults to 14. input_names (list, optional): input names. Defaults to None. output_names (list, optional): output names. Defaults to None. + inputs_as_nchw (list, optional): transpose the input. Defaults to None. """ shape_override = None if isinstance(input_names, str): From 15fcb782924b5a071d19725a3621f9c0dd3e0ed5 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Fri, 3 Feb 2023 20:41:17 +0800 Subject: [PATCH 23/43] fix spelling check issue Signed-off-by: Lv, Liang1 --- examples/tensorflow/tf2onnx/README.md | 2 +- examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md | 2 +- examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/tensorflow/tf2onnx/README.md b/examples/tensorflow/tf2onnx/README.md index c8985ae328f..8d1cf56639a 100644 --- a/examples/tensorflow/tf2onnx/README.md +++ b/examples/tensorflow/tf2onnx/README.md @@ -1,4 +1,4 @@ -# Tensorflow models export to ONNX moldes Examples +# Tensorflow models export to ONNX models Examples These examples show how to export Tensorflow models to ONNX models including FP32 and INT8. Please note that we only support to export ONNX QDQ format for INT8 now. diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md index fcc1c9de9ae..d771da3df65 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md @@ -1,7 +1,7 @@ Step-by-Step ============ -This document is used to show how to export Tensorflow RestNet50_V1.0 FP32 model to ONNX FP32 model using Intel® Neural Compressor. +This document is used to show how to export Tensorflow ResNet50_V1.0 FP32 model to ONNX FP32 model using Intel® Neural Compressor. # Prerequisite diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md index f25ce9a572d..77b97e92057 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md @@ -1,7 +1,7 @@ Step-by-Step ============ -This document is used to show how to export Tensorflow RestNet50 FP32 model to ONNX FP32 model using Intel® Neural Compressor. +This document is used to show how to export Tensorflow ResNet50 FP32 model to ONNX FP32 model using Intel® Neural Compressor. # Prerequisite From 87aa2f077f2788979dda5783c0e6b7008bec7acb Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Fri, 3 Feb 2023 21:27:24 +0800 Subject: [PATCH 24/43] fix pylint issues Signed-off-by: Lv, Liang1 --- .azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt | 1 + examples/tensorflow/tf2onnx/vgg16/int8_export/README.md | 4 ++-- .../adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py | 5 ++--- neural_compressor/adaptor/tf_utils/tf2onnx_converter.py | 3 ++- requirements.txt | 1 + 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt index 703d84679c2..2763d399be1 100644 --- a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt +++ b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt @@ -1101,6 +1101,7 @@ mobilenet MobileNet mobilenetv Mobilenetv +MobilenetV MobileNetv MobileNetV modalities diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md b/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md index 6bd25e9a383..17d5150779e 100644 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md @@ -95,8 +95,8 @@ bash run_export.sh --input_model=./frozen_vgg16_int8.pb --output_model=./frozen_ ## Run benchmark for Tensorflow INT8 model ```shell -bash run_benchmark.sh --input_model=./rozen_vgg16_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./rozen_vgg16_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +bash run_benchmark.sh --input_model=./frozen_vgg16_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./frozen_vgg16_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` ### Run benchmark for ONNX INT8 QDQ model diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py index 3f27c8f4dfe..931335a609c 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py @@ -22,7 +22,7 @@ import six import numpy as np -from onnx import helper, numpy_helper, shape_inference, AttributeProto, TensorProto +from onnx import helper, numpy_helper, AttributeProto, TensorProto from . import tf2onnx_utils as utils from .onnx_node import OnnxNode @@ -502,7 +502,7 @@ def get_shape(self, name): shape[i] = -1 # hack to allow utils.ONNX_UNKNOWN_DIMENSION to override batchsize if needed. # default is -1. - if shape[0] == -1: + if shape[0] == -1: # pylint: disable=E1136 # pylint/issues/3139 # pylint: disable=unsupported-assignment-operation shape[0] = utils.ONNX_UNKNOWN_DIMENSION return shape @@ -1121,7 +1121,6 @@ def convert_qdq_nodes(self, q_node, dq_node): qdq_node_output_shape = self.get_shape(dq_node.output[0]) # Get the attributes of qdq node - narrow_range = q_node.attr['narrow_range'].i signed_input = bool(q_node.get_attr_value('T', TensorProto.INT8) == TensorProto.INT8) max_quantized = 127 diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py index 032cea32816..86875ea8648 100644 --- a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -35,7 +35,8 @@ class TensorflowQDQToOnnxQDQConverter: """Convert tensorflow QDQ graph to ONNX QDQ graph.""" - def __init__(self, model, input_names, output_names, shape_override, inputs_as_nchw=None, opset_version=utils.DEFAULT_OPSET_VERSION): + def __init__(self, model, input_names, output_names, shape_override, + inputs_as_nchw=None, opset_version=utils.DEFAULT_OPSET_VERSION): """Constructor, initilization. Args: diff --git a/requirements.txt b/requirements.txt index e34d03112d5..59e54044aa8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,4 @@ sqlalchemy==1.4.27 alembic==1.7.7 pywin32; sys_platform != 'linux' deprecated +tf2onnx From aa3e2339d0980bb2330c0491a362dfca2c403cdb Mon Sep 17 00:00:00 2001 From: chensuyue Date: Wed, 8 Feb 2023 14:40:23 +0800 Subject: [PATCH 25/43] add tf2onnx val json Signed-off-by: chensuyue --- examples/.config/model_params_tf2onnx.json | 20 +++++++++++++++++++ .../resnet50v1.0/fp32_export/README.md | 4 ++-- .../resnet50v1.0/fp32_export/requirements.txt | 12 +++++------ .../resnet50v1.0/int8_export/README.md | 2 +- .../resnet50v1.0/int8_export/requirements.txt | 13 +++++------- 5 files changed, 33 insertions(+), 18 deletions(-) create mode 100644 examples/.config/model_params_tf2onnx.json diff --git a/examples/.config/model_params_tf2onnx.json b/examples/.config/model_params_tf2onnx.json new file mode 100644 index 00000000000..a2e152254b8 --- /dev/null +++ b/examples/.config/model_params_tf2onnx.json @@ -0,0 +1,20 @@ +{ + "tf2onnx": { + "resnet50v1.0": { + "model_src_dir": "tf2onnx/resnet50v1.0", + "source_model_dataset": "/tf_dataset/dataset/imagenet", + "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ImagenetRaw_small_5000", + "input_model": "/tf_dataset/pre-trained-models/resnet50/fp32/freezed_resnet50.pb", + "main_script": "main.py", + "batch_size": 32 + }, + "resnet50v1.5": { + "model_src_dir": "tf2onnx/resnet50v1.5", + "source_model_dataset": "/tf_dataset/dataset/imagenet", + "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ImagenetRaw_small_5000", + "input_model": "/tf_dataset/pre-trained-models/resnet50v1_5/fp32/resnet50_v1.pb", + "main_script": "main.py", + "batch_size": 32 + } + } +} \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md index d771da3df65..c2afde82d4c 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md @@ -1,8 +1,8 @@ Step-by-Step ============ -This document is used to show how to export Tensorflow ResNet50_V1.0 FP32 model to ONNX FP32 model using Intel® Neural Compressor. - +This document is used to show how to export Tensorflow ResNet50_V1.0 FP32 model to ONNX FP32 model using Intel® Neural Compressor. +> Note: Validated Framework [Versions](/docs/source/installation_guide.md#validated-software-environment). # Prerequisite diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt index 8d1eeb068a8..f9eecbb4d57 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt @@ -1,8 +1,6 @@ -tf2onnx==1.13.0 -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' +tensorflow +intel-extension-for-tensorflow[cpu] +tf2onnx +onnx +onnxruntime onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md index 5875dc21f5e..6cdaf98ab2b 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md @@ -2,7 +2,7 @@ Step-by-Step ============ This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. - +> Note: Validated Framework [Versions](/docs/source/installation_guide.md#validated-software-environment). # Prerequisite diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt index 9bdc24cb87b..b5e9f4ddb52 100644 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt +++ b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt @@ -1,9 +1,6 @@ -tensorflow=2.10.0 +tensorflow intel-extension-for-tensorflow[cpu] -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' -onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file +tf2onnx +onnx +onnxruntime +onnxruntime-extensions; python_version < '3.10' \ No newline at end of file From b64c58f07e6cce13cf96a664d16c310ea7065299 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Thu, 9 Feb 2023 09:49:49 +0800 Subject: [PATCH 26/43] fix import tf2onnx issue Signed-off-by: Lv, Liang1 --- neural_compressor/adaptor/tf_utils/tf2onnx_converter.py | 5 +++-- neural_compressor/experimental/export/tf2onnx.py | 3 ++- requirements.txt | 1 - test/requirements.txt | 1 + 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py index 86875ea8648..a1c52d96a3e 100644 --- a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -25,10 +25,11 @@ from tensorflow.core.framework import tensor_pb2, node_def_pb2 from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer -from neural_compressor.utils.utility import dump_elapsed_time +from neural_compressor.utils.utility import dump_elapsed_time, LazyImport from .graph_rewriter.onnx import tf2onnx_utils as utils from .graph_rewriter.onnx.onnx_graph import OnnxGraph -import tf2onnx as t2o + +t2o = LazyImport('tf2onnx') logger = logging.getLogger("neural_compressor") diff --git a/neural_compressor/experimental/export/tf2onnx.py b/neural_compressor/experimental/export/tf2onnx.py index 9a6cb7abc14..f2fbd8b9cf1 100644 --- a/neural_compressor/experimental/export/tf2onnx.py +++ b/neural_compressor/experimental/export/tf2onnx.py @@ -17,9 +17,10 @@ """Helper functions to export model from TensorFlow to ONNX.""" from neural_compressor.utils import logger -import tf2onnx as t2o +from neural_compressor.utils.utility import LazyImport import re +t2o = LazyImport('tf2onnx') def _split_nodename_and_shape(name): """Split input name with shape into name and shape.""" diff --git a/requirements.txt b/requirements.txt index 59e54044aa8..e34d03112d5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,4 +22,3 @@ sqlalchemy==1.4.27 alembic==1.7.7 pywin32; sys_platform != 'linux' deprecated -tf2onnx diff --git a/test/requirements.txt b/test/requirements.txt index 32535567cc6..32484046a56 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -21,3 +21,4 @@ fvcore==0.1.5.post20220119 ofa==0.1.0.post202203231606 pymoo==0.5.0 intel-extension-for-pytorch +tf2onnx From 6f10283a621f788ae9d19a61de607cbb047e0ebc Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Thu, 9 Feb 2023 11:02:32 +0800 Subject: [PATCH 27/43] lazyimport tf2onnx for tf2onnx_utils Signed-off-by: Lv, Liang1 --- .../adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py index 9cb3e8bbd95..89b01e60bea 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py @@ -27,6 +27,8 @@ from tensorflow.core.framework import types_pb2, tensor_pb2 from tensorflow.python.framework import tensor_util from onnx import helper, onnx_pb, numpy_helper, defs, TensorProto, OperatorSetIdProto, shape_inference +from neural_compressor.utils.utility import LazyImport +t2o = LazyImport('tf2onnx') logger = logging.getLogger("neural_compressor") @@ -449,7 +451,6 @@ def compute_const_folding_using_tf(g, const_node_values, graph_outputs): if const_node_values is None: const_node_values = {} graph_outputs = set(graph_outputs) - from tf2onnx.tf_loader import tf_session, tf_placeholder ops = g.get_operations() outputs_to_values = {} @@ -511,7 +512,7 @@ def is_huge_shape(x): g2 = tf.Graph() with g2.as_default(): for inp in input_names: - tf_placeholder(outputs_to_dtypes[inp], name=inp.split(':')[0]) + t2o.tf_loader.tf_placeholder(outputs_to_dtypes[inp], name=inp.split(':')[0]) mini_graph_def = g2.as_graph_def() mini_graph_def.node.append(node.node_def) g3 = tf.Graph() @@ -523,7 +524,7 @@ def is_huge_shape(x): feed_dict[inp] = inp_np inp_shapes.append(inp_np.shape) try: - with tf_session() as sess: + with t2o.tf_loader.tf_session() as sess: tf.import_graph_def(mini_graph_def, name='') results = sess.run(output_names, feed_dict=feed_dict) if is_huge_shape(results[0].shape) and all(is_small_shape(inp) for inp in inp_shapes): From 3494918efdf08e440a9f24232ea008cd131af02c Mon Sep 17 00:00:00 2001 From: chensuyue Date: Thu, 9 Feb 2023 13:17:44 +0800 Subject: [PATCH 28/43] install onnx for itex ut Signed-off-by: chensuyue --- .azure-pipelines/scripts/ut/run_basic_itex.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.azure-pipelines/scripts/ut/run_basic_itex.sh b/.azure-pipelines/scripts/ut/run_basic_itex.sh index 81d9c98336f..d786588bb23 100644 --- a/.azure-pipelines/scripts/ut/run_basic_itex.sh +++ b/.azure-pipelines/scripts/ut/run_basic_itex.sh @@ -5,6 +5,8 @@ echo "run basic itex" echo "specify fwk version..." export itex_version='1.1.0' export tensorflow_version='2.11.0-official' +export onnx_version='1.13.0' +export onnxruntime_version='1.13.1' echo "set up UT env..." bash /neural-compressor/.azure-pipelines/scripts/ut/env_setup.sh From 03ad3a1aa3fcee2ff2e55c173d402078598d5770 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Fri, 10 Feb 2023 18:18:17 +0800 Subject: [PATCH 29/43] move vgg16 export path Signed-off-by: Lv, Liang1 --- .../tensorflow_models/vgg16/export/README.md | 118 ++++++++++++ .../tensorflow_models/vgg16/export/main.py | 168 ++++++++++++++++++ .../vgg16/export/requirements.txt | 6 + .../vgg16/export/run_benchmark.sh | 42 +++++ .../vgg16/export/run_export.sh | 57 ++++++ .../tf2onnx/vgg16/fp32_export/main.py | 1 - 6 files changed, 391 insertions(+), 1 deletion(-) create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/README.md create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/requirements.txt create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/run_benchmark.sh create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/run_export.sh diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/README.md b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/README.md new file mode 100644 index 00000000000..86a9ffcbd38 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/README.md @@ -0,0 +1,118 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow FP32/INT8 QDQ model to ONNX FP32/INT8 QDQ model using Intel® Neural Compressor. + + +# Prerequisite + +## 1. Environment + +### Install Intel® Neural Compressor +```shell +pip install neural-compressor +``` + +### Install requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Model + +The vgg16 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). +We can get the pb file by convert the checkpoint file. + + 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) + ```shell + wget http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz + tar -xvf vgg_16_2016_08_28.tar.gz + ``` + + 2. Exporting the Inference Graph + ```shell + git clone https://github.com/tensorflow/models + cd models/research/slim + python export_inference_graph.py \ + --alsologtostderr \ + --model_name=vgg_16 \ + --output_file=/tmp/vgg_16_inf_graph.pb + ``` + Make sure to use intel-tensorflow v1.15, and pip install tf_slim. + #### Install Intel Tensorflow 1.15 up2 + Check your python version and use pip install 1.15.0 up2 from links below: + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl + > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/vgg_16_inf_graph.pb \ + --input_checkpoint=./vgg_16.ckpt \ + --input_binary=true \ + --output_graph=./frozen_vgg16.pb \ + --output_node_names=vgg_16/fc8/squeezed + ``` + +### 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/image_recognition/tensorflow_models/ + # convert validation subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd vgg16/export + ``` + +## Run Command +Please note the dataset is TF records format for running quantization and benchmark. + +### Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./frozen_vgg16.pb --output_model=./vgg_16.onnx --dtype=fp32 --quant_format=qdq +``` + +## Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./frozen_vgg16.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./frozen_vgg16.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./vgg_16.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./vgg_16.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./frozen_vgg16.pb --output_model=./frozen_vgg16_int8.onnx --dtype=int8 --quant_format=qdq --dataset_location=/path/to/imagenet/ +``` + +## Run benchmark for Tensorflow INT8 model +```shell +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 +bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 +``` \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py new file mode 100644 index 00000000000..a4063466c1d --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py @@ -0,0 +1,168 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import onnx +import numpy as np +import tensorflow as tf +import onnxruntime as ort +from argparse import ArgumentParser +from neural_compressor.data import LabelShift +from neural_compressor.metric import TensorflowTopK +from neural_compressor.utils.create_obj_from_config import create_dataloader + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') +arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') +arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') +arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') +arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') +arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') +arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') +arg_parser.add_argument('--dtype', dest='dtype', default='fp32', help='the data type of export') +arg_parser.add_argument('--quant_format', dest='quant_format', default='qdq', help='the quant format of export') +args = arg_parser.parse_args() + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + + output, label = postprocess((output, label)) + metric.update(output, label) + + acc = metric.result() + return acc + +def eval_func_tf(model): + from neural_compressor.model import Model + metric = TensorflowTopK(k=1) + postprocess = LabelShift(label_shift=1) + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + eval_dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', eval_dataloader_args) + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + if args.quant_format != 'qdq': + raise "Only support tensorflow export to ONNX for QDQ format, \ + please make sure input the corret quanti_format." + + top1 = TensorflowTopK(k=1) + postprocess = LabelShift(label_shift=1) + + if args.export: + if args.dtype == 'int8': + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + calib_dataloader_args = { + 'batch_size': 10, + 'dataset': {"ImageRecord": {'root':args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) + op_name_list = { + 'resnet_model/dense/MatMul': + { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']}, + } + } + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], + outputs=['softmax_tensor'], + op_name_list=op_name_list) + q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, + eval_func=eval_func_tf) + q_model.save("./tf-quant.pb") + from neural_compressor.config import TF2ONNXConfig + config = TF2ONNXConfig(dtype=args.dtype) + q_model.export(args.output_graph, config) + else: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(args.input_graph) + config = TF2ONNXConfig(dtype=args.dtype) + inc_model.export(args.output_graph, config) + + if args.benchmark: + if args.input_graph.endswith('.onnx'): + model = onnx.load(args.input_graph) + else: + model = args.input_graph + eval_dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + + def eval(model): + if isinstance(model, str): + return eval_func_tf(model) + else: + return eval_func_onnx(model, eval_dataloader, top1, postprocess) + + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=eval_dataloader) + elif args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % eval_dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/requirements.txt b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/requirements.txt new file mode 100644 index 00000000000..b5e9f4ddb52 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/requirements.txt @@ -0,0 +1,6 @@ +tensorflow +intel-extension-for-tensorflow[cpu] +tf2onnx +onnx +onnxruntime +onnxruntime-extensions; python_version < '3.10' \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/run_benchmark.sh b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/run_export.sh b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/run_export.sh new file mode 100644 index 00000000000..2faf034fc88 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/run_export.sh @@ -0,0 +1,57 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + quant_format=qdq + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --dtype=*) + dtype=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + if [ ${dtype} == 'int8' ] + then + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --dataset_location ${dataset_location} \ + --export + else + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --export + fi +} + +main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py b/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py index aa3bee82034..7520f37201c 100644 --- a/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py +++ b/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py @@ -103,7 +103,6 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) - breakpoint() def eval(model): if isinstance(model, str): return eval_func_tf(model, eval_dataloader, top1, postprocess) From 96332712d1a80144c0f076f2867e352d4d999ff9 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Sat, 11 Feb 2023 11:36:30 +0800 Subject: [PATCH 30/43] move resnet50_v1 export path Signed-off-by: Lv, Liang1 --- .../resnet50_v1/export/README.md | 85 +++++++++ .../resnet50_v1/export/main.py | 165 ++++++++++++++++++ .../resnet50_v1/export/requirements.txt | 6 + .../resnet50_v1/export/run_benchmark.sh | 42 +++++ .../resnet50_v1/export/run_export.sh | 57 ++++++ .../tensorflow_models/vgg16/export/README.md | 4 +- 6 files changed, 357 insertions(+), 2 deletions(-) create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/README.md create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/requirements.txt create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/run_benchmark.sh create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/run_export.sh diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/README.md b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/README.md new file mode 100644 index 00000000000..0cbaba97fec --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/README.md @@ -0,0 +1,85 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. +> Note: Validated Framework [Versions](/docs/source/installation_guide.md#validated-software-environment). + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2 Prepare Pretrained model + +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb +``` + + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/image_recognition/tensorflow_models/ + # convert validation subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd resnet50_v1.0/export + ``` + +# Run Command +Please note the dataset is TF records format for running quantization and benchmark. + +### Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./resnet50_fp32_pretrained_model.pb --output_model=./resnet50_v1.onnx --dtype=fp32 --quant_format=qdq +``` + +## Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./resnet50_fp32_pretrained_model.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_fp32_pretrained_model.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./resnet50_fp32_pretrained_model.pb --output_model=./resnet50_v1_int8.onnx --dtype=int8 --quant_format=qdq --dataset_location=/path/to/imagenet/ +``` + +## Run benchmark for Tensorflow INT8 model +```shell +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py new file mode 100644 index 00000000000..263a0f67a40 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py @@ -0,0 +1,165 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import numpy as np +import onnxruntime as ort + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + if postprocess: + output, label = postprocess((output, label)) + metric.update(output, label) + return metric.result() + +def eval_func_tf(model, dataloader, metric, postprocess=None): + from neural_compressor.model import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + if postprocess: + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + arg_parser.add_argument('--dtype', dest='dtype', default='fp32', help='the data type of export') + arg_parser.add_argument('--quant_format', dest='quant_format', default='qdq', help='the quant format of export') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.export: + if self.args.dtype == 'int8': + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion + from neural_compressor.utils.create_obj_from_config import create_dataloader + calib_dataloader_args = { + 'batch_size': 10, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224}}, + 'filter': None + } + calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) + eval_dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + op_name_list = { + 'resnet_model/dense/MatMul': + { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']}, + } + } + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], + outputs=['softmax_tensor'], + op_name_list=op_name_list) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, eval_metric=top1) + q_model.save("./tf-quant.pb") + from neural_compressor.config import TF2ONNXConfig + config = TF2ONNXConfig(dtype=self.args.dtype) + q_model.export(self.args.output_graph, config) + else: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + config = TF2ONNXConfig(dtype=self.args.dtype) + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + else: + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + + def eval(model): + if isinstance(model, str): + return eval_func_tf(model, dataloader, top1) + else: + return eval_func_onnx(model, dataloader, top1) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/requirements.txt b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/requirements.txt new file mode 100644 index 00000000000..b5e9f4ddb52 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/requirements.txt @@ -0,0 +1,6 @@ +tensorflow +intel-extension-for-tensorflow[cpu] +tf2onnx +onnx +onnxruntime +onnxruntime-extensions; python_version < '3.10' \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/run_benchmark.sh b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/run_export.sh b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/run_export.sh new file mode 100644 index 00000000000..2faf034fc88 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/run_export.sh @@ -0,0 +1,57 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + quant_format=qdq + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --dtype=*) + dtype=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + if [ ${dtype} == 'int8' ] + then + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --dataset_location ${dataset_location} \ + --export + else + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --export + fi +} + +main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/README.md b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/README.md index 86a9ffcbd38..5e73d7a075e 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/README.md +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/README.md @@ -113,6 +113,6 @@ bash run_benchmark.sh --input_model=./tf-quant.pb --mode=performance --dataset_l ### Run benchmark for ONNX INT8 QDQ model ```shell -bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 -bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 +bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 ``` \ No newline at end of file From 6cda256e5c80bf5ab75e8c44100c0dd3ec184ce7 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Sat, 11 Feb 2023 12:20:35 +0800 Subject: [PATCH 31/43] move resnet50_v1.5 path Signed-off-by: Lv, Liang1 --- .../resnet50_v1_5/export/README.md | 85 +++++++++ .../resnet50_v1_5/export/main.py | 176 ++++++++++++++++++ .../resnet50_v1_5/export/requirements.txt | 6 + .../resnet50_v1_5/export/run_benchmark.sh | 42 +++++ .../resnet50_v1_5/export/run_export.sh | 57 ++++++ .../resnet50_v1_5/export/run_tuning.sh | 39 ++++ 6 files changed, 405 insertions(+) create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/requirements.txt create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_benchmark.sh create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_export.sh create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_tuning.sh diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md new file mode 100644 index 00000000000..ed8cb77c38a --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md @@ -0,0 +1,85 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. + + +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model + +```bash +wget https://zenodo.org/record/2535873/files/resnet50_v1.pb +``` + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/tf2onnx/ + # convert validation subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd resnet50_v1.5/int8_export + ``` + +# Run Command +Please note the dataset is TF records format for running quantization and benchmark. + +### Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1.onnx --dtype=fp32 --quant_format=qdq +``` + +## Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1_int8.onnx --dtype=int8 --quant_format=qdq --dataset_location=/path/to/imagenet/ +``` + +## Run benchmark for Tensorflow INT8 model +```shell +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py new file mode 100644 index 00000000000..394e0e9d353 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py @@ -0,0 +1,176 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import os +import onnxruntime as ort +import numpy as np + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + if postprocess: + output, label = postprocess((output, label)) + if isinstance(output, list): + if len(output) == 1: + output = output[0] + else: + output = output[1] + metric.update(output, label) + return metric.result() + +def eval_func_tf(model, dataloader, metric, postprocess=None): + from neural_compressor.model import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + if isinstance(predictions, list): + if len(model.output_tensor_names) == 1: + predictions = predictions[0] + else: + predictions = predictions[1] + metric.update(predictions, labels) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + arg_parser.add_argument('--dtype', dest='dtype', default='fp32', help='the data type of export') + arg_parser.add_argument('--quant_format', dest='quant_format', default='qdq', help='the quant format of export') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + + if self.args.export: + if self.args.dtype == 'int8': + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion + from neural_compressor.utils.create_obj_from_config import create_dataloader + calib_dataloader_args = { + 'batch_size': 10, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) + eval_dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + op_name_list = { + 'resnet_model/dense/MatMul': + { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']}, + } + } + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], + outputs=['softmax_tensor'], + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.3), + op_name_list=op_name_list) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, eval_metric=top1) + q_model.save("./tf-quant.pb") + from neural_compressor.config import TF2ONNXConfig + config = TF2ONNXConfig(dtype=self.args.dtype) + q_model.export(self.args.output_graph, config) + else: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + config = TF2ONNXConfig(dtype=self.args.dtype) + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + else: + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + + def eval(model): + if isinstance(model, str): + return eval_func_tf(model, dataloader, top1) + else: + return eval_func_onnx(model, dataloader, top1) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/requirements.txt b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/requirements.txt new file mode 100644 index 00000000000..b5e9f4ddb52 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/requirements.txt @@ -0,0 +1,6 @@ +tensorflow +intel-extension-for-tensorflow[cpu] +tf2onnx +onnx +onnxruntime +onnxruntime-extensions; python_version < '3.10' \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_benchmark.sh b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_export.sh b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_export.sh new file mode 100644 index 00000000000..2faf034fc88 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_export.sh @@ -0,0 +1,57 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + quant_format=qdq + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --dtype=*) + dtype=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + if [ ${dtype} == 'int8' ] + then + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --dataset_location ${dataset_location} \ + --export + else + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --export + fi +} + +main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_tuning.sh b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_tuning.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_tuning.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" From 6e5cd176f331ab6c979a774c7f5a50514fcac8fe Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Sat, 11 Feb 2023 13:05:29 +0800 Subject: [PATCH 32/43] move mobilenet_v2 export path Signed-off-by: Lv, Liang1 --- .../mobilenet_v2/export/README.md | 119 +++++++++++++++ .../mobilenet_v2/export/main.py | 142 ++++++++++++++++++ .../mobilenet_v2/export/requirements.txt | 6 + .../export/run_benchmark.sh} | 17 ++- .../mobilenet_v2/export/run_export.sh | 57 +++++++ .../resnet50_v1_5/export/README.md | 4 +- 6 files changed, 336 insertions(+), 9 deletions(-) create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/README.md create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/requirements.txt rename examples/tensorflow/image_recognition/tensorflow_models/{resnet50_v1_5/export/run_tuning.sh => mobilenet_v2/export/run_benchmark.sh} (64%) create mode 100644 examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/run_export.sh diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/README.md b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/README.md new file mode 100644 index 00000000000..4fc65137092 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/README.md @@ -0,0 +1,119 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. + + +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model + +The mobilenet_v2 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). +We can get the pb file by convert the checkpoint file. + + 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) + ```shell + wget https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz + tar -xvf mobilenet_v2_1.4_224.tgz + ``` + + 2. Exporting the Inference Graph + ```shell + git clone https://github.com/tensorflow/models + cd models/research/slim + python export_inference_graph.py \ + --alsologtostderr \ + --model_name=mobilenet_v2 \ + --output_file=/tmp/mobilenet_v2_inf_graph.pb + ``` + Make sure to use intel-tensorflow v1.15, and pip install tf_slim. + #### Install Intel Tensorflow 1.15 up2 + Check your python version and use pip install 1.15.0 up2 from links below: + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl + > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/mobilenet_v2_inf_graph.pb \ + --input_checkpoint=./mobilenet_v2.ckpt \ + --input_binary=true \ + --output_graph=./frozen_mobilenet_v2.pb \ + --output_node_names=MobilenetV2/Predictions/Reshape_1 + ``` + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/image_recognition/tensorflow_models/ + # convert validation subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd mobilenet_v2/export + ``` + +# Run Command +Please note the dataset is TF records format for running quantization and benchmark. + + +### Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2.onnx --dtype=fp32 --quant_format=qdq +``` + +## Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2_int8.onnx --dtype=int8 --quant_format=qdq --dataset_location=/path/to/imagenet/ +``` + +## Run benchmark for Tensorflow INT8 model +```shell +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 +``` + +### Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 +bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py new file mode 100644 index 00000000000..6a704d148dc --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py @@ -0,0 +1,142 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import os +import onnxruntime as ort +import numpy as np + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + input_names = [i.name for i in sess.get_inputs()] + for input_data, label in dataloader: + output = sess.run(None, dict(zip(input_names, [input_data]))) + if postprocess: + output, label = postprocess((output, label)) + metric.update(output, label) + return metric.result() + +def eval_func_tf(model, dataloader, metric, postprocess=None): + from neural_compressor.model import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + metric.update(predictions, labels) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + arg_parser.add_argument('--dtype', dest='dtype', default='fp32', help='the data type of export') + arg_parser.add_argument('--quant_format', dest='quant_format', default='qdq', help='the quant format of export') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.export: + if self.args.dtype == 'int8': + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': 10, + 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, + 'transform': {'BilinearImagenet': + {'height': 224, 'width': 224}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100]) + q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=dataloader, + eval_dataloader=dataloader) + q_model.save("./tf-quant.pb") + from neural_compressor.config import TF2ONNXConfig + config = TF2ONNXConfig(dtype=self.args.dtype, input_names='input[-1,224,224,3]') + q_model.export(self.args.output_graph, config) + else: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + config = TF2ONNXConfig(dtype="fp32", input_names='input[-1,224,224,3]') + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + else: + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, + 'transform': {'BilinearImagenet': {'height': 224, 'width': 224}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) + def eval(model): + if isinstance(model, str): + return eval_func_tf(model, dataloader, top1) + else: + return eval_func_onnx(model, dataloader, top1) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(self.args.input_graph, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/requirements.txt b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/requirements.txt new file mode 100644 index 00000000000..b5e9f4ddb52 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/requirements.txt @@ -0,0 +1,6 @@ +tensorflow +intel-extension-for-tensorflow[cpu] +tf2onnx +onnx +onnxruntime +onnxruntime-extensions; python_version < '3.10' \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_tuning.sh b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/run_benchmark.sh similarity index 64% rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_tuning.sh rename to examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/run_benchmark.sh index 6a9e1b859c9..e83a029e800 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_tuning.sh +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/run_benchmark.sh @@ -2,38 +2,41 @@ set -x function main { + init_params "$@" - run_tuning + run_benchmark } # init params function init_params { - for var in "$@" do case $var in --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) + --mode=*) + mode=$(echo $var |cut -f2 -d=) ;; --dataset_location=*) dataset_location=$(echo $var |cut -f2 -d=) ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) esac done } # run_tuning -function run_tuning { +function run_benchmark { python main.py \ --input-graph ${input_model} \ - --output-graph ${output_model} \ + --mode ${mode} \ --dataset_location ${dataset_location} \ - --tune + --batch_size ${batch_size} \ + --benchmark } main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/run_export.sh b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/run_export.sh new file mode 100644 index 00000000000..2faf034fc88 --- /dev/null +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/run_export.sh @@ -0,0 +1,57 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + quant_format=qdq + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --dtype=*) + dtype=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + if [ ${dtype} == 'int8' ] + then + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --dataset_location ${dataset_location} \ + --export + else + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --export + fi +} + +main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md index ed8cb77c38a..10eabd7160e 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md @@ -40,12 +40,12 @@ wget https://zenodo.org/record/2535873/files/resnet50_v1.pb We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/tensorflow/tf2onnx/ + cd examples/tensorflow/image_recognition/tensorflow_models/ # convert validation subset bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd resnet50_v1.5/int8_export + cd resnet50_v1.5/export ``` # Run Command From 04482d11fcabff6c9468f56e3e7e8851610c4e8a Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Sat, 11 Feb 2023 20:07:24 +0800 Subject: [PATCH 33/43] move ssd_mobilenet_v1 and resnet50_v1 path Signed-off-by: Lv, Liang1 --- .../mobilenet_v2/export/main.py | 4 + .../resnet50_v1/export/main.py | 4 + .../resnet50_v1_5/export/main.py | 3 + .../tensorflow_models/vgg16/export/main.py | 4 +- .../faster_rcnn_resnet50/export/README.md | 92 ++++++++ .../faster_rcnn_resnet50/export/main.py | 199 ++++++++++++++++++ .../export/requirements.txt | 6 + .../export/run_benchmark.sh | 42 ++++ .../faster_rcnn_resnet50/export/run_export.sh | 57 +++++ .../ssd_mobilenet_v1/export/README.md | 93 ++++++++ .../ssd_mobilenet_v1/export/main.py | 199 ++++++++++++++++++ .../ssd_mobilenet_v1/export/requirements.txt | 6 + .../ssd_mobilenet_v1/export/run_benchmark.sh | 42 ++++ .../ssd_mobilenet_v1/export/run_export.sh | 57 +++++ 14 files changed, 806 insertions(+), 2 deletions(-) create mode 100644 examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/README.md create mode 100644 examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py create mode 100644 examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/requirements.txt create mode 100644 examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/run_benchmark.sh create mode 100644 examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/run_export.sh create mode 100644 examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/README.md create mode 100644 examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py create mode 100644 examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/requirements.txt create mode 100644 examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/run_benchmark.sh create mode 100644 examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/run_export.sh diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py index 6a704d148dc..ac00f5121b0 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py @@ -78,6 +78,10 @@ def __init__(self): def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.quant_format != 'qdq': + raise ValueError("Only support tensorflow export to ONNX for QDQ format, " + "please make sure input the corret quant_format.") + if self.args.export: if self.args.dtype == 'int8': from neural_compressor import quantization diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py index 263a0f67a40..839844d8a3a 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py @@ -79,6 +79,10 @@ def __init__(self): def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.quant_format != 'qdq': + raise ValueError("Only support tensorflow export to ONNX for QDQ format, " + "please make sure input the corret quant_format.") + if self.args.export: if self.args.dtype == 'int8': from neural_compressor import quantization diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py index 394e0e9d353..9b47720f7ee 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py @@ -88,6 +88,9 @@ def __init__(self): def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.quant_format != 'qdq': + raise ValueError("Only support tensorflow export to ONNX for QDQ format, " + "please make sure input the corret quant_format.") if self.args.export: if self.args.dtype == 'int8': diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py index a4063466c1d..adf61b6a10f 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py @@ -92,8 +92,8 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" if args.quant_format != 'qdq': - raise "Only support tensorflow export to ONNX for QDQ format, \ - please make sure input the corret quanti_format." + raise ValueError("Only support tensorflow export to ONNX for QDQ format, " + "please make sure input the corret quant_format.") top1 = TensorflowTopK(k=1) postprocess = LabelShift(label_shift=1) diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/README.md b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/README.md new file mode 100644 index 00000000000..cc85aa25f38 --- /dev/null +++ b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/README.md @@ -0,0 +1,92 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. + + +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install requirements +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model + +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz +tar -xvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz +``` + +## 3. Prepare Dataset + +### Automatic dataset download + +> **_Note: `prepare_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_dataset.sh` script located in `examples/tensorflow/object_detection/tensorflow_models`. + +Usage: +```shell +cd ./examples/tensorflow/object_detection/tensorflow_models +bash prepare_dataset.sh +cd faster_rcnn_resnet50/export +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). + +# Run Command +Please note the dataset is TF records format for running quantization and benchmark. + +### Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --output_model=./faster_rcnn_resnet50.onnx --dtype=fp32 --quant_format=qdq +``` + +## Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=32 +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=1 +``` + +### Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=32 +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=1 +``` + +### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --output_model=./faster_rcnn_resnet50_int8.onnx --dtype=int8 --quant_format=qdq --dataset_location=/path/to/coco_dataset/ +``` + +## Run benchmark for Tensorflow INT8 model +```shell +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=32 +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=1 +``` + +### Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=32 +bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=1 +``` \ No newline at end of file diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py new file mode 100644 index 00000000000..381ea592207 --- /dev/null +++ b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py @@ -0,0 +1,199 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import os +import onnxruntime as ort +import numpy as np + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for inputs, labels in dataloader: + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: np.array(inputs,dtype=np.uint8)} + ) + else: + assert len_inputs == len(inputs), \ + 'number of input tensors must align with graph inputs' + + if isinstance(inputs, dict): # pragma: no cover + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + # in case dataloader contains non-array input + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + + predictions = session.run(None, ort_inputs) + + if postprocess is not None: + predictions, labels = postprocess((predictions, labels)) + + if not hasattr(metric, "compare_label") or \ + (hasattr(metric, "compare_label") and metric.compare_label): + metric.update(predictions, labels) + acc = metric.result() + return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] + +def eval_func_tf(model, dataloader, metric, postprocess=None): + metric.reset() + + from neural_compressor.model import Model + if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + metric.update(predictions, labels) + + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + arg_parser.add_argument('--dtype', dest='dtype', default='fp32', help='the data type of export') + arg_parser.add_argument('--quant_format', dest='quant_format', default='qdq', help='the quant format of export') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.quant_format != 'qdq': + raise ValueError("Only support tensorflow export to ONNX for QDQ format, " + "please make sure input the corret quant_format.") + + if self.args.export: + if self.args.dtype == 'int8': + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion + from neural_compressor.utils.create_obj_from_config import create_dataloader + calib_dataloader_args = { + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': None, + 'filter': None + } + calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) + eval_dataloader_args = { + 'batch_size': 10, + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 600}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[10, 50, 100, 200], inputs=['image_tensor'], + outputs=['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes'], + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.32)) + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, eval_metric=mAP2) + q_model.save("./tf-quant.pb") + + from neural_compressor.config import TF2ONNXConfig + q_model.input_tensor_names = ["image_tensor"] + q_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + config = TF2ONNXConfig(dtype="int8") + q_model.export(self.args.output_graph, config) + else: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + inc_model.input_tensor_names = ["image_tensor"] + inc_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + config = TF2ONNXConfig(dtype="fp32") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + else: + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 600}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + + def eval(model): + if isinstance(model, str): + return eval_func_tf(model, dataloader, mAP2) + else: + return eval_func_onnx(model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/requirements.txt b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/requirements.txt new file mode 100644 index 00000000000..b5e9f4ddb52 --- /dev/null +++ b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/requirements.txt @@ -0,0 +1,6 @@ +tensorflow +intel-extension-for-tensorflow[cpu] +tf2onnx +onnx +onnxruntime +onnxruntime-extensions; python_version < '3.10' \ No newline at end of file diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/run_benchmark.sh b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/run_export.sh b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/run_export.sh new file mode 100644 index 00000000000..2faf034fc88 --- /dev/null +++ b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/run_export.sh @@ -0,0 +1,57 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + quant_format=qdq + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --dtype=*) + dtype=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + if [ ${dtype} == 'int8' ] + then + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --dataset_location ${dataset_location} \ + --export + else + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --export + fi +} + +main "$@" diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/README.md b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/README.md new file mode 100644 index 00000000000..43a3215dc9a --- /dev/null +++ b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/README.md @@ -0,0 +1,93 @@ +Step-by-Step +============ + +This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. + + +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.8 or higher version. +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install requirements +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model + +```bash +export MODEL=ssd_mobilenet_v1_coco_2018_01_28 +wget http://download.tensorflow.org/models/object_detection/$MODEL.tar.gz +tar -xvf $MODEL.tar.gz +``` + +## 3. Prepare Dataset + +### Automatic dataset download + +> **_Note: `prepare_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_dataset.sh` script located in `examples/tensorflow/object_detection/tensorflow_models`. + +Usage: +```shell +cd ./examples/tensorflow/object_detection/tensorflow_models +bash prepare_dataset.sh +cd ssd_mobilenet_v1/export +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). + +# Run Command +Please note the dataset is TF records format for running quantization and benchmark. + +### Export Tensorflow FP32 model to ONNX FP32 model +```shell +bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --dtype=fp32 --quant_format=qdq +``` + +## Run benchmark for Tensorflow FP32 model +```shell +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=32 +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=1 +``` + +### Run benchmark for ONNX FP32 model +```shell +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=32 +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=1 +``` + +### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model +```shell +bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --dtype=int8 --quant_format=qdq --dataset_location=/path/to/coco_dataset/ +``` + +## Run benchmark for Tensorflow INT8 model +```shell +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=32 +bash run_benchmark.sh --input_model=./tf-quant.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=1 +``` + +### Run benchmark for ONNX INT8 QDQ model +```shell +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=32 +bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=1 +``` \ No newline at end of file diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py new file mode 100644 index 00000000000..019d75ae2e8 --- /dev/null +++ b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py @@ -0,0 +1,199 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from argparse import ArgumentParser +import tensorflow as tf +import onnx +import os +import onnxruntime as ort +import numpy as np + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + +def eval_func_onnx(model, dataloader, metric, postprocess=None): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for inputs, labels in dataloader: + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: np.array(inputs,dtype=np.uint8)} + ) + else: + assert len_inputs == len(inputs), \ + 'number of input tensors must align with graph inputs' + + if isinstance(inputs, dict): # pragma: no cover + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + # in case dataloader contains non-array input + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + + predictions = session.run(None, ort_inputs) + + if postprocess is not None: + predictions, labels = postprocess((predictions, labels)) + + if not hasattr(metric, "compare_label") or \ + (hasattr(metric, "compare_label") and metric.compare_label): + metric.update(predictions, labels) + acc = metric.result() + return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] + +def eval_func_tf(model, dataloader, metric, postprocess=None): + metric.reset() + + from neural_compressor.model import Model + if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + for _, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + predictions = model.sess.run(output_tensor, feed_dict) + metric.update(predictions, labels) + + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def __init__(self): + """Initilization.""" + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') + arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') + arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') + arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') + arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') + arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') + arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') + arg_parser.add_argument('--dtype', dest='dtype', default='fp32', help='the data type of export') + arg_parser.add_argument('--quant_format', dest='quant_format', default='qdq', help='the quant format of export') + self.args = arg_parser.parse_args() + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + if self.args.quant_format != 'qdq': + raise ValueError("Only support tensorflow export to ONNX for QDQ format, " + "please make sure input the corret quant_format.") + + if self.args.export: + if self.args.dtype == 'int8': + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion + from neural_compressor.utils.create_obj_from_config import create_dataloader + calib_dataloader_args = { + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': None, + 'filter': None + } + calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) + eval_dataloader_args = { + 'batch_size': 10, + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 300}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[10, 50, 100, 200], inputs=['image_tensor'], + outputs=['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes'], + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.1)) + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, eval_metric=mAP2) + q_model.save("./tf-quant.pb") + + from neural_compressor.config import TF2ONNXConfig + q_model.input_tensor_names = ["image_tensor"] + q_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + config = TF2ONNXConfig(dtype="int8") + q_model.export(self.args.output_graph, config) + else: + from neural_compressor.model import Model + from neural_compressor.config import TF2ONNXConfig + inc_model = Model(self.args.input_graph) + inc_model.input_tensor_names = ["image_tensor"] + inc_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + config = TF2ONNXConfig(dtype="fp32") + inc_model.export(self.args.output_graph, config) + + if self.args.benchmark: + if self.args.input_graph.endswith('.onnx'): + model = onnx.load(self.args.input_graph) + else: + model = self.args.input_graph + + from neural_compressor.utils.create_obj_from_config import create_dataloader + dataloader_args = { + 'batch_size': self.args.batch_size, + 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, + 'transform': {'Resize': {'size': 300}}, + 'filter': None + } + dataloader = create_dataloader('tensorflow', dataloader_args) + + from neural_compressor.metric import COCOmAPv2 + output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} + mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) + + def eval(model): + if isinstance(model, str): + return eval_func_tf(model, dataloader, mAP2) + else: + return eval_func_onnx(model, dataloader, mAP2) + + if self.args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model, conf, b_dataloader=dataloader) + elif self.args.mode == 'accuracy': + acc_result = eval(model) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/requirements.txt b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/requirements.txt new file mode 100644 index 00000000000..b5e9f4ddb52 --- /dev/null +++ b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/requirements.txt @@ -0,0 +1,6 @@ +tensorflow +intel-extension-for-tensorflow[cpu] +tf2onnx +onnx +onnxruntime +onnxruntime-extensions; python_version < '3.10' \ No newline at end of file diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/run_benchmark.sh b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/run_benchmark.sh new file mode 100644 index 00000000000..e83a029e800 --- /dev/null +++ b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/run_export.sh b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/run_export.sh new file mode 100644 index 00000000000..2faf034fc88 --- /dev/null +++ b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/run_export.sh @@ -0,0 +1,57 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_export + +} + +# init params +function init_params { + quant_format=qdq + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --dtype=*) + dtype=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_export +function run_export { + if [ ${dtype} == 'int8' ] + then + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --dataset_location ${dataset_location} \ + --export + else + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dtype ${dtype} \ + --quant_format ${quant_format} \ + --export + fi +} + +main "$@" From 14b08f2511ebc1ec2cbf189e96a75f6d981d850f Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Sat, 11 Feb 2023 20:12:23 +0800 Subject: [PATCH 34/43] remove tf2onnx folder Signed-off-by: Lv, Liang1 --- .../mobilenet_v2/export/main.py | 2 +- .../resnet50_v1/export/main.py | 2 +- .../resnet50_v1_5/export/main.py | 2 +- .../tensorflow_models/vgg16/export/main.py | 2 +- .../faster_rcnn_resnet50/export/main.py | 2 +- .../ssd_mobilenet_v1/export/main.py | 2 +- examples/tensorflow/tf2onnx/README.md | 13 - .../fp32_export/README.md | 75 - .../faster_rcnn_resnet50/fp32_export/main.py | 157 - .../fp32_export/requirements.txt | 9 - .../fp32_export/run_benchmark.sh | 42 - .../fp32_export/run_export.sh | 35 - .../int8_export/README.md | 80 - .../faster_rcnn_resnet50/int8_export/main.py | 187 - .../int8_export/requirements.txt | 9 - .../int8_export/run_benchmark.sh | 42 - .../int8_export/run_export.sh | 35 - .../int8_export/run_tuning.sh | 39 - .../imagenet_prepare/build_imagenet_data.py | 567 - .../download_and_convert_imagenet.sh | 100 - .../imagenet_prepare/download_imagenet.sh | 99 - .../imagenet_lsvrc_2015_synsets.txt | 1000 - .../imagenet_prepare/imagenet_metadata.txt | 21842 ---------------- .../mobilenet_v2/fp32_export/README.md | 101 - .../tf2onnx/mobilenet_v2/fp32_export/main.py | 119 - .../mobilenet_v2/fp32_export/requirements.txt | 9 - .../mobilenet_v2/fp32_export/run_benchmark.sh | 42 - .../mobilenet_v2/fp32_export/run_export.sh | 35 - .../mobilenet_v2/int8_export/README.md | 107 - .../tf2onnx/mobilenet_v2/int8_export/main.py | 137 - .../mobilenet_v2/int8_export/requirements.txt | 9 - .../mobilenet_v2/int8_export/run_benchmark.sh | 42 - .../mobilenet_v2/int8_export/run_export.sh | 35 - .../mobilenet_v2/int8_export/run_tuning.sh | 39 - .../tf2onnx/prepare_coco_dataset.sh | 136 - .../tf2onnx/prepare_imagenet_dataset.sh | 71 - .../resnet50v1.0/fp32_export/README.md | 66 - .../tf2onnx/resnet50v1.0/fp32_export/main.py | 123 - .../resnet50v1.0/fp32_export/requirements.txt | 6 - .../resnet50v1.0/fp32_export/run_benchmark.sh | 42 - .../resnet50v1.0/fp32_export/run_export.sh | 35 - .../resnet50v1.0/int8_export/README.md | 75 - .../tf2onnx/resnet50v1.0/int8_export/main.py | 160 - .../resnet50v1.0/int8_export/requirements.txt | 6 - .../resnet50v1.0/int8_export/run_benchmark.sh | 42 - .../resnet50v1.0/int8_export/run_export.sh | 35 - .../resnet50v1.0/int8_export/run_tuning.sh | 39 - .../resnet50v1.5/fp32_export/README.md | 67 - .../tf2onnx/resnet50v1.5/fp32_export/main.py | 122 - .../resnet50v1.5/fp32_export/requirements.txt | 9 - .../resnet50v1.5/fp32_export/run_benchmark.sh | 42 - .../resnet50v1.5/fp32_export/run_export.sh | 35 - .../resnet50v1.5/int8_export/README.md | 74 - .../tf2onnx/resnet50v1.5/int8_export/main.py | 160 - .../resnet50v1.5/int8_export/requirements.txt | 9 - .../resnet50v1.5/int8_export/run_benchmark.sh | 42 - .../resnet50v1.5/int8_export/run_export.sh | 35 - .../resnet50v1.5/int8_export/run_tuning.sh | 39 - .../ssd_mobilenet_v1/fp32_export/README.md | 76 - .../ssd_mobilenet_v1/fp32_export/main.py | 157 - .../fp32_export/requirements.txt | 9 - .../fp32_export/run_benchmark.sh | 42 - .../fp32_export/run_export.sh | 35 - .../ssd_mobilenet_v1/int8_export/README.md | 81 - .../ssd_mobilenet_v1/int8_export/main.py | 187 - .../int8_export/requirements.txt | 9 - .../int8_export/run_benchmark.sh | 42 - .../int8_export/run_export.sh | 35 - .../int8_export/run_tuning.sh | 39 - .../tf2onnx/vgg16/fp32_export/README.md | 99 - .../tf2onnx/vgg16/fp32_export/main.py | 124 - .../vgg16/fp32_export/requirements.txt | 9 - .../vgg16/fp32_export/run_benchmark.sh | 42 - .../tf2onnx/vgg16/fp32_export/run_export.sh | 35 - .../tf2onnx/vgg16/int8_export/README.md | 106 - .../tf2onnx/vgg16/int8_export/main.py | 159 - .../vgg16/int8_export/requirements.txt | 9 - .../vgg16/int8_export/run_benchmark.sh | 42 - .../tf2onnx/vgg16/int8_export/run_export.sh | 35 - .../tf2onnx/vgg16/int8_export/run_tuning.sh | 39 - 80 files changed, 6 insertions(+), 27893 deletions(-) delete mode 100644 examples/tensorflow/tf2onnx/README.md delete mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_tuning.sh delete mode 100644 examples/tensorflow/tf2onnx/imagenet_prepare/build_imagenet_data.py delete mode 100644 examples/tensorflow/tf2onnx/imagenet_prepare/download_and_convert_imagenet.sh delete mode 100644 examples/tensorflow/tf2onnx/imagenet_prepare/download_imagenet.sh delete mode 100644 examples/tensorflow/tf2onnx/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt delete mode 100644 examples/tensorflow/tf2onnx/imagenet_prepare/imagenet_metadata.txt delete mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_tuning.sh delete mode 100644 examples/tensorflow/tf2onnx/prepare_coco_dataset.sh delete mode 100644 examples/tensorflow/tf2onnx/prepare_imagenet_dataset.sh delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_tuning.sh delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_tuning.sh delete mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_tuning.sh delete mode 100644 examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/vgg16/fp32_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/vgg16/fp32_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/README.md delete mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/main.py delete mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt delete mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/run_benchmark.sh delete mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/run_export.sh delete mode 100644 examples/tensorflow/tf2onnx/vgg16/int8_export/run_tuning.sh diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py index ac00f5121b0..19f6df22318 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py @@ -80,7 +80,7 @@ def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" if self.args.quant_format != 'qdq': raise ValueError("Only support tensorflow export to ONNX for QDQ format, " - "please make sure input the corret quant_format.") + "please make sure input the correct quant_format.") if self.args.export: if self.args.dtype == 'int8': diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py index 839844d8a3a..cbd3d0a2532 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py @@ -81,7 +81,7 @@ def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" if self.args.quant_format != 'qdq': raise ValueError("Only support tensorflow export to ONNX for QDQ format, " - "please make sure input the corret quant_format.") + "please make sure input the correct quant_format.") if self.args.export: if self.args.dtype == 'int8': diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py index 9b47720f7ee..9ccfba39970 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py @@ -90,7 +90,7 @@ def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" if self.args.quant_format != 'qdq': raise ValueError("Only support tensorflow export to ONNX for QDQ format, " - "please make sure input the corret quant_format.") + "please make sure input the correct quant_format.") if self.args.export: if self.args.dtype == 'int8': diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py index adf61b6a10f..6a641d602b6 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py @@ -93,7 +93,7 @@ def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" if args.quant_format != 'qdq': raise ValueError("Only support tensorflow export to ONNX for QDQ format, " - "please make sure input the corret quant_format.") + "please make sure input the correct quant_format.") top1 = TensorflowTopK(k=1) postprocess = LabelShift(label_shift=1) diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py index 381ea592207..3fd1f8b97a5 100644 --- a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py +++ b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py @@ -114,7 +114,7 @@ def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" if self.args.quant_format != 'qdq': raise ValueError("Only support tensorflow export to ONNX for QDQ format, " - "please make sure input the corret quant_format.") + "please make sure input the correct quant_format.") if self.args.export: if self.args.dtype == 'int8': diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py index 019d75ae2e8..719ba13f2f0 100644 --- a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py +++ b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py @@ -114,7 +114,7 @@ def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" if self.args.quant_format != 'qdq': raise ValueError("Only support tensorflow export to ONNX for QDQ format, " - "please make sure input the corret quant_format.") + "please make sure input the correct quant_format.") if self.args.export: if self.args.dtype == 'int8': diff --git a/examples/tensorflow/tf2onnx/README.md b/examples/tensorflow/tf2onnx/README.md deleted file mode 100644 index 8d1cf56639a..00000000000 --- a/examples/tensorflow/tf2onnx/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Tensorflow models export to ONNX models Examples - -These examples show how to export Tensorflow models to ONNX models including FP32 and INT8. -Please note that we only support to export ONNX QDQ format for INT8 now. - -The following examples are available for reference: - -* [resnet50v1.0](/examples/tensorflow/tf2onnx/resnet50v1.0): image recognition ResNet50 model. -* [resnet50v1.5](/examples/tensorflow/tf2onnx/resnet50v1.5): image recognition ResNet50 model from MLPerf. -* [mobilenet_v2](/examples/tensorflow/tf2onnx/mobilenet_v2): image recognition mobilenet_v2 model. -* [vgg16](/examples/tensorflow/tf2onnx/vgg16): image recognition vgg16 model. -* [ssd_mobilenet_v1](/examples/tensorflow/tf2onnx/ssd_mobilenet_v1): object detection ssd_mobilenet_v1 model. -* [faster_rcnn_resnet50](/examples/tensorflow/tf2onnx/faster_rcnn_resnet50): object detection faster_rcnn_resnet50 model. diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md deleted file mode 100644 index c398a72165f..00000000000 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/README.md +++ /dev/null @@ -1,75 +0,0 @@ -Step-by-Step -============ - -This document is used to show how to export Tensorflow faster_rcnn_resnet50 FP32 model to ONNX FP32 model using Intel® Neural Compressor. - - -# Prerequisite - -## 1. Environment - -### Installation -Recommend python 3.8 or higher version. -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### Install requirements -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Pretrained model - -```shell -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz -tar -xvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz -``` - -## 3. Prepare Dataset - -### Automatic dataset download - -> **_Note: `prepare_coco_dataset.sh` script works with TF version 1.x._** - -Run the `prepare_coco_dataset.sh` script located in `examples/tensorflow/tf2onnx`. - -Usage: -```shell -cd examples/tensorflow/tf2onnx/ -bash prepare_coco_dataset.sh -cd faster_rcnn_resnet50/fp32_export -``` - -This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to -tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. - -### Manual dataset download -Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). - -# Run Command -Please note the dataset is TF records format for running benchmark. - -## Export Tensorflow FP32 model to ONNX FP32 model -```shell -bash run_export.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --output_model=./faster_rcnn_resnet50_fp32_coco.onnx -``` - -## Run benchmark for Tensorflow FP32 model -```shell -bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 -bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 -``` - -## Run benchmark for ONNX FP32 model -```shell -bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 -bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_fp32_coco.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 -``` diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py deleted file mode 100644 index a2d5990f1d3..00000000000 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/main.py +++ /dev/null @@ -1,157 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from argparse import ArgumentParser -import tensorflow as tf -import onnx -import onnxruntime as ort -import numpy as np - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - ort_inputs = {} - len_inputs = len(session.get_inputs()) - inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] - for inputs, labels in dataloader: - if not isinstance(labels, list): - labels = [labels] - if len_inputs == 1: - ort_inputs.update( - inputs if isinstance(inputs, dict) else {inputs_names[0]: np.array(inputs,dtype=np.uint8)} - ) - else: - assert len_inputs == len(inputs), \ - 'number of input tensors must align with graph inputs' - - if isinstance(inputs, dict): # pragma: no cover - ort_inputs.update(inputs) - else: - for i in range(len_inputs): - # in case dataloader contains non-array input - if not isinstance(inputs[i], np.ndarray): - ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) - else: - ort_inputs.update({inputs_names[i]: inputs[i]}) - - predictions = session.run(None, ort_inputs) - - if postprocess is not None: - predictions, labels = postprocess((predictions, labels)) - - if not hasattr(metric, "compare_label") or \ - (hasattr(metric, "compare_label") and metric.compare_label): - metric.update(predictions, labels) - acc = metric.result() - return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] - -def eval_func_tf(model, dataloader, metric, postprocess=None): - metric.reset() - - from neural_compressor.model import Model - if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): - model = Model(model) - model.input_tensor_names = ["image_tensor:0"] - model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ - "detection_scores:0", "detection_classes:0"] - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - metric.update(predictions, labels) - - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def __init__(self): - """Initilization.""" - arg_parser = ArgumentParser(description='Parse args') - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') - arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') - arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') - self.args = arg_parser.parse_args() - - def run(self): - """This is neural_compressor function include export and benchmark option.""" - if self.args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(self.args.input_graph) - inc_model.input_tensor_names = ["image_tensor"] - inc_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] - config = TF2ONNXConfig(dtype="fp32") - inc_model.export(self.args.output_graph, config) - - if self.args.benchmark: - if self.args.input_graph.endswith('.onnx'): - model = onnx.load(self.args.input_graph) - else: - model = self.args.input_graph - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 600}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - - def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, mAP2) - else: - return eval_func_onnx(model, dataloader, mAP2) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/requirements.txt deleted file mode 100644 index 16783f94457..00000000000 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tensorflow==2.11.0 -tf2onnx==1.13.0 -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' -onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/fp32_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md deleted file mode 100644 index 846a87fb238..00000000000 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/README.md +++ /dev/null @@ -1,80 +0,0 @@ -Step-by-Step -============ - -This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. - - -# Prerequisite - -## 1. Environment - -### Installation -Recommend python 3.8 or higher version. -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### Install requirements -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Pretrained model - -```shell -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz -tar -xvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz -``` - -## 3. Prepare Dataset - -### Automatic dataset download - -> **_Note: `prepare_coco_dataset.sh` script works with TF version 1.x._** - -Run the `prepare_coco_dataset.sh` script located in `examples/tensorflow/tf2onnx`. - -Usage: -```shell -cd examples/tensorflow/tf2onnx/ -bash prepare_coco_dataset.sh -cd faster_rcnn_resnet50/int8_export -``` - -This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to -tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. - -### Manual dataset download -Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). - -# Run Command -Please note the dataset is TF records format for running quantization and benchmark. - -## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model -```shell -bash run_tuning.sh --input_model=./faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb --output_model=./faster_rcnn_resnet50_coco_int8.pb --dataset_location=/path/to/coco_dataset/ -``` - -## Run benchmark for Tensorflow INT8 QDQ model -```shell -bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 -bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 -``` - -## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model -```shell -bash run_export.sh --input_model=./faster_rcnn_resnet50_coco_int8.pb --output_model=./faster_rcnn_resnet50_coco_int8.onnx -``` - -## Run benchmark for ONNX INT8 QDQ model -```shell -bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 -bash run_benchmark.sh --input_model=./faster_rcnn_resnet50_coco_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 -``` diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py deleted file mode 100644 index 80ffc2a910f..00000000000 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/main.py +++ /dev/null @@ -1,187 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from argparse import ArgumentParser -import tensorflow as tf -import onnx -import os -import onnxruntime as ort -import numpy as np - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - ort_inputs = {} - len_inputs = len(session.get_inputs()) - inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] - for inputs, labels in dataloader: - if not isinstance(labels, list): - labels = [labels] - if len_inputs == 1: - ort_inputs.update( - inputs if isinstance(inputs, dict) else {inputs_names[0]: np.array(inputs,dtype=np.uint8)} - ) - else: - assert len_inputs == len(inputs), \ - 'number of input tensors must align with graph inputs' - - if isinstance(inputs, dict): # pragma: no cover - ort_inputs.update(inputs) - else: - for i in range(len_inputs): - # in case dataloader contains non-array input - if not isinstance(inputs[i], np.ndarray): - ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) - else: - ort_inputs.update({inputs_names[i]: inputs[i]}) - - predictions = session.run(None, ort_inputs) - - if postprocess is not None: - predictions, labels = postprocess((predictions, labels)) - - if not hasattr(metric, "compare_label") or \ - (hasattr(metric, "compare_label") and metric.compare_label): - metric.update(predictions, labels) - acc = metric.result() - return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] - -def eval_func_tf(model, dataloader, metric, postprocess=None): - metric.reset() - - from neural_compressor.model import Model - if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): - model = Model(model) - model.input_tensor_names = ["image_tensor:0"] - model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ - "detection_scores:0", "detection_classes:0"] - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - metric.update(predictions, labels) - - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def __init__(self): - """Initilization.""" - arg_parser = ArgumentParser(description='Parse args') - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') - arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') - arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') - arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') - self.args = arg_parser.parse_args() - - def run(self): - """This is neural_compressor function include tuning, export and benchmark option.""" - if self.args.tune: - from neural_compressor import quantization - from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion - from neural_compressor.utils.create_obj_from_config import create_dataloader - calib_dataloader_args = { - 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, - 'transform': None, - 'filter': None - } - calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) - eval_dataloader_args = { - 'batch_size': 10, - 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 600}}, - 'filter': None - } - eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) - conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[10, 50, 100, 200], inputs=['image_tensor'], - outputs=['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes'], - accuracy_criterion = AccuracyCriterion(tolerable_loss=0.32)) - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader, eval_metric=mAP2) - q_model.save(self.args.output_graph) - - if self.args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(self.args.input_graph) - inc_model.input_tensor_names = ["image_tensor"] - inc_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] - config = TF2ONNXConfig(dtype="int8") - inc_model.export(self.args.output_graph, config) - - if self.args.benchmark: - if self.args.input_graph.endswith('.onnx'): - model = onnx.load(self.args.input_graph) - else: - model = self.args.input_graph - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 600}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - - def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, mAP2) - else: - return eval_func_onnx(model, dataloader, mAP2) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/requirements.txt deleted file mode 100644 index b964010af83..00000000000 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tensorflow==2.11.0 -tf2onnx==1.13.0 -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' -onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_tuning.sh deleted file mode 100644 index 6a9e1b859c9..00000000000 --- a/examples/tensorflow/tf2onnx/faster_rcnn_resnet50/int8_export/run_tuning.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --dataset_location ${dataset_location} \ - --tune -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/imagenet_prepare/build_imagenet_data.py b/examples/tensorflow/tf2onnx/imagenet_prepare/build_imagenet_data.py deleted file mode 100644 index c52d2bd4218..00000000000 --- a/examples/tensorflow/tf2onnx/imagenet_prepare/build_imagenet_data.py +++ /dev/null @@ -1,567 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Converts ImageNet data to TFRecords file format with Example protos. - -The raw ImageNet data set is expected to reside in JPEG files located in the -following directory structure. - - data_dir/n01440764/ILSVRC2012_val_00000293.JPEG - data_dir/n01440764/ILSVRC2012_val_00000543.JPEG - ... - -where 'n01440764' is the unique synset label associated with -these images. - -The training data set consists of 1000 sub-directories (i.e. labels) -each containing 1200 JPEG images for a total of 1.2M JPEG images. - -The evaluation data set consists of 1000 sub-directories (i.e. labels) -each containing 50 JPEG images for a total of 50K JPEG images. - -This TensorFlow script converts the training and evaluation data into -a sharded data set consisting of 1024 and 128 TFRecord files, respectively. - - train_directory/train-00000-of-01024 - train_directory/train-00001-of-01024 - ... - train_directory/train-00127-of-01024 - -and - - validation_directory/validation-00000-of-00128 - validation_directory/validation-00001-of-00128 - ... - validation_directory/validation-00127-of-00128 - -Each validation TFRecord file contains ~390 records. Each training TFREcord -file contains ~1250 records. Each record within the TFRecord file is a -serialized Example proto. The Example proto contains the following fields: - - image/encoded: string containing JPEG encoded image in RGB colorspace - image/height: integer, image height in pixels - image/width: integer, image width in pixels - image/colorspace: string, specifying the colorspace, always 'RGB' - image/channels: integer, specifying the number of channels, always 3 - image/format: string, specifying the format, always'JPEG' - - image/filename: string containing the basename of the image file - e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG' - image/class/label: integer specifying the index in a classification layer. - The label ranges from [1, 1000] where 0 is not used. - image/class/synset: string specifying the unique ID of the label, - e.g. 'n01440764' - image/class/text: string specifying the human-readable version of the label - e.g. 'red fox, Vulpes vulpes' - -Note that the length of xmin is identical to the length of xmax, ymin and ymax -for each example. - -Running this script using 16 threads may take around ~2.5 hours on a HP Z420. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datetime import datetime -import os -import random -import sys -import threading - -import numpy as np -from six.moves import xrange # pylint: disable=redefined-builtin -import tensorflow as tf -tf.compat.v1.disable_eager_execution() - - -tf.compat.v1.app.flags.DEFINE_string('raw_directory', None, - 'Raw data directory') - -tf.compat.v1.app.flags.DEFINE_string('output_directory', None, - 'Output data directory') - -tf.compat.v1.app.flags.DEFINE_integer('shards', 1, - 'Number of shards in TFRecord files.') - -tf.compat.v1.app.flags.DEFINE_string('subset', 'validation', - 'Subset of imagenet, can be validation/train') - -tf.compat.v1.app.flags.DEFINE_integer('num_threads', 1, - 'Number of threads to preprocess the images.') - -# The labels file contains a list of valid labels are held in this file. -# Assumes that the file contains entries as such: -# n01440764 -# n01443537 -# n01484850 -# where each line corresponds to a label expressed as a synset. We map -# each synset contained in the file to an integer (based on the alphabetical -# ordering). See below for details. -tf.compat.v1.app.flags.DEFINE_string('labels_file', - 'imagenet_lsvrc_2015_synsets.txt', - 'Labels file') - -# This file containing mapping from synset to human-readable label. -# Assumes each line of the file looks like: -# -# n02119247 black fox -# n02119359 silver fox -# n02119477 red fox, Vulpes fulva -# -# where each line corresponds to a unique mapping. Note that each line is -# formatted as \t. -tf.compat.v1.app.flags.DEFINE_string('imagenet_metadata_file', - 'imagenet_metadata.txt', - 'ImageNet metadata file') - -FLAGS = tf.compat.v1.app.flags.FLAGS - - -def _int64_feature(value): - """Wrapper for inserting int64 features into Example proto.""" - if not isinstance(value, list): - value = [value] - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - -def _float_feature(value): - """Wrapper for inserting float features into Example proto.""" - if not isinstance(value, list): - value = [value] - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) - - -def _bytes_feature(value): - """Wrapper for inserting bytes features into Example proto.""" - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -def _convert_to_example(filename, image_buffer, label, synset, human, - height, width): - """Build an Example proto for an example. - - Args: - filename: string, path to an image file, e.g., '/path/to/example.JPG' - image_buffer: string, JPEG encoding of RGB image - label: integer, identifier for the ground truth for the network - synset: string, unique WordNet ID specifying the label, e.g., 'n02323233' - human: string, human-readable label, e.g., 'red fox, Vulpes vulpes' - height: integer, image height in pixels - width: integer, image width in pixels - Returns: - Example proto - """ - - colorspace = b'RGB' - channels = 3 - image_format = b'JPEG' - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': _int64_feature(height), - 'image/width': _int64_feature(width), - 'image/colorspace': _bytes_feature(colorspace), - 'image/channels': _int64_feature(channels), - 'image/class/label': _int64_feature(label), - 'image/class/synset': _bytes_feature(bytes(synset,'utf-8')), - 'image/class/text': _bytes_feature(bytes(human,'utf-8')), - 'image/format': _bytes_feature(image_format), - 'image/filename': _bytes_feature(bytes(os.path.basename(filename),'utf-8')), - 'image/encoded': _bytes_feature(image_buffer)})) - return example - - -class ImageCoder(object): - """Helper class that provides TensorFlow image coding utilities.""" - - def __init__(self): - # Create a single Session to run all image coding calls. - self._sess = tf.compat.v1.Session() - - # Initializes function that converts PNG to JPEG data. - self._png_data = tf.compat.v1.placeholder(dtype=tf.string) - image = tf.image.decode_png(self._png_data, channels=3) - self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) - - # Initializes function that converts CMYK JPEG data to RGB JPEG data. - self._cmyk_data = tf.compat.v1.placeholder(dtype=tf.string) - image = tf.image.decode_jpeg(self._cmyk_data, channels=0) - self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100) - - # Initializes function that decodes RGB JPEG data. - self._decode_jpeg_data = tf.compat.v1.placeholder(dtype=tf.string) - self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) - - def png_to_jpeg(self, image_data): - return self._sess.run(self._png_to_jpeg, - feed_dict={self._png_data: image_data}) - - def cmyk_to_rgb(self, image_data): - return self._sess.run(self._cmyk_to_rgb, - feed_dict={self._cmyk_data: image_data}) - - def decode_jpeg(self, image_data): - image = self._sess.run(self._decode_jpeg, - feed_dict={self._decode_jpeg_data: image_data}) - assert len(image.shape) == 3 - assert image.shape[2] == 3 - return image - - -def _is_png(filename): - """Determine if a file contains a PNG format image. - - Args: - filename: string, path of the image file. - - Returns: - boolean indicating if the image is a PNG. - """ - # File list from: - # https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU - return 'n02105855_2933.JPEG' in filename - - -def _is_cmyk(filename): - """Determine if file contains a CMYK JPEG format image. - - Args: - filename: string, path of the image file. - - Returns: - boolean indicating if the image is a JPEG encoded with CMYK color space. - """ - # File list from: - # https://github.com/cytsai/ilsvrc-cmyk-image-list - blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG', - 'n02447366_23489.JPEG', 'n02492035_15739.JPEG', - 'n02747177_10752.JPEG', 'n03018349_4028.JPEG', - 'n03062245_4620.JPEG', 'n03347037_9675.JPEG', - 'n03467068_12171.JPEG', 'n03529860_11437.JPEG', - 'n03544143_17228.JPEG', 'n03633091_5218.JPEG', - 'n03710637_5125.JPEG', 'n03961711_5286.JPEG', - 'n04033995_2932.JPEG', 'n04258138_17003.JPEG', - 'n04264628_27969.JPEG', 'n04336792_7448.JPEG', - 'n04371774_5854.JPEG', 'n04596742_4225.JPEG', - 'n07583066_647.JPEG', 'n13037406_4650.JPEG'] - return filename.split('/')[-1] in blacklist - - -def _process_image(filename, coder): - """Process a single image file. - - Args: - filename: string, path to an image file e.g., '/path/to/example.JPG'. - coder: instance of ImageCoder to provide TensorFlow image coding utils. - Returns: - image_buffer: string, JPEG encoding of RGB image. - height: integer, image height in pixels. - width: integer, image width in pixels. - """ - # Read the image file. - image_data = tf.io.gfile.GFile(filename, 'rb').read() - - # Clean the dirty data. - if _is_png(filename): - # 1 image is a PNG. - print('Converting PNG to JPEG for %s' % filename) - image_data = coder.png_to_jpeg(image_data) - elif _is_cmyk(filename): - # 22 JPEG images are in CMYK colorspace. - print('Converting CMYK to RGB for %s' % filename) - image_data = coder.cmyk_to_rgb(image_data) - - # Decode the RGB JPEG. - image = coder.decode_jpeg(image_data) - - # Check that image converted to RGB - assert len(image.shape) == 3 - height = image.shape[0] - width = image.shape[1] - assert image.shape[2] == 3 - - return image_data, height, width - - -def _process_image_files_batch(coder, thread_index, ranges, name, filenames, - synsets, labels, humans, num_shards): - """Processes and saves list of images as TFRecord in 1 thread. - - Args: - coder: instance of ImageCoder to provide TensorFlow image coding utils. - thread_index: integer, unique batch to run index is within [0, len(ranges)). - ranges: list of pairs of integers specifying ranges of each batches to - analyze in parallel. - name: string, unique identifier specifying the data set - filenames: list of strings; each string is a path to an image file - synsets: list of strings; each string is a unique WordNet ID - labels: list of integer; each integer identifies the ground truth - humans: list of strings; each string is a human-readable label - num_shards: integer number of shards for this data set. - """ - # Each thread produces N shards where N = int(num_shards / num_threads). - # For instance, if num_shards = 128, and the num_threads = 2, then the first - # thread would produce shards [0, 64). - num_threads = len(ranges) - assert not num_shards % num_threads - num_shards_per_batch = int(num_shards / num_threads) - - shard_ranges = np.linspace(ranges[thread_index][0], - ranges[thread_index][1], - num_shards_per_batch + 1).astype(int) - num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0] - - counter = 0 - for s in xrange(num_shards_per_batch): - # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' - shard = thread_index * num_shards_per_batch + s - output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards) - output_file = os.path.join(FLAGS.output_directory, output_filename) - writer = tf.io.TFRecordWriter(output_file) - - shard_counter = 0 - files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) # HERE - for i in files_in_shard: - filename = filenames[i] - label = labels[i] - synset = synsets[i] - human = humans[i] - - image_buffer, height, width = _process_image(filename, coder) - - example = _convert_to_example(filename, image_buffer, label, synset, human, height, width) - writer.write(example.SerializeToString()) - shard_counter += 1 - counter += 1 - - if not counter % 1000: - print('%s [thread %d]: Processed %d of %d images in thread batch.' % - (datetime.now(), thread_index, counter, num_files_in_thread)) - sys.stdout.flush() - - writer.close() - print('%s [thread %d]: Wrote %d images to %s' % - (datetime.now(), thread_index, shard_counter, output_file)) - sys.stdout.flush() - shard_counter = 0 - print('%s [thread %d]: Wrote %d images to %d shards.' % - (datetime.now(), thread_index, counter, num_files_in_thread)) - sys.stdout.flush() - - -def _process_image_files(name, filenames, synsets, labels, humans, num_shards): - """Process and save list of images as TFRecord of Example protos. - - Args: - name: string, unique identifier specifying the data set - filenames: list of strings; each string is a path to an image file - synsets: list of strings; each string is a unique WordNet ID - labels: list of integer; each integer identifies the ground truth - humans: list of strings; each string is a human-readable label - num_shards: integer number of shards for this data set. - """ - assert len(filenames) == len(synsets) - assert len(filenames) == len(labels) - assert len(filenames) == len(humans) - - # Break all images into batches with a [ranges[i][0], ranges[i][1]]. - spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int) - ranges = [] - threads = [] - for i in xrange(len(spacing) - 1): - ranges.append([spacing[i], spacing[i+1]]) - - # Launch a thread for each batch. - print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges)) - sys.stdout.flush() - - # Create a mechanism for monitoring when all threads are finished. - coord = tf.train.Coordinator() - - # Create a generic TensorFlow-based utility for converting all image codings. - coder = ImageCoder() - - threads = [] - for thread_index in xrange(len(ranges)): - args = (coder, thread_index, ranges, name, filenames, - synsets, labels, humans, num_shards) - t = threading.Thread(target=_process_image_files_batch, args=args) - t.start() - threads.append(t) - - # Wait for all the threads to terminate. - coord.join(threads) - print('%s: Finished writing all %d images in data set.' % - (datetime.now(), len(filenames))) - sys.stdout.flush() - - -def _find_image_files(data_dir, labels_file): - """Build a list of all images files and labels in the data set. - - Args: - data_dir: string, path to the root directory of images. - - Assumes that the ImageNet data set resides in JPEG files located in - the following directory structure. - - data_dir/n01440764/ILSVRC2012_val_00000293.JPEG - data_dir/n01440764/ILSVRC2012_val_00000543.JPEG - - where 'n01440764' is the unique synset label associated with these images. - - labels_file: string, path to the labels file. - - The list of valid labels are held in this file. Assumes that the file - contains entries as such: - n01440764 - n01443537 - n01484850 - where each line corresponds to a label expressed as a synset. We map - each synset contained in the file to an integer (based on the alphabetical - ordering) starting with the integer 1 corresponding to the synset - contained in the first line. - - The reason we start the integer labels at 1 is to reserve label 0 as an - unused background class. - - Returns: - filenames: list of strings; each string is a path to an image file. - synsets: list of strings; each string is a unique WordNet ID. - labels: list of integer; each integer identifies the ground truth. - """ - print('Determining list of input files and labels from %s.' % data_dir) - challenge_synsets = [l.strip() for l in - tf.compat.v1.gfile.FastGFile(labels_file, 'r').readlines()] - - labels = [] - filenames = [] - synsets = [] - - # Leave label index 0 empty as a background class. - label_index = 1 - - # Construct the list of JPEG files and labels. - for synset in challenge_synsets: - jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset) - matching_files = tf.io.gfile.glob(jpeg_file_path) - - labels.extend([label_index] * len(matching_files)) - synsets.extend([synset] * len(matching_files)) - filenames.extend(matching_files) - - if not label_index % 100: - print('Finished finding files in %d of %d classes.' % ( - label_index, len(challenge_synsets))) - label_index += 1 - - # Shuffle the ordering of all image files in order to guarantee - # random ordering of the images with respect to label in the - # saved TFRecord files. Make the randomization repeatable. - shuffled_index = range(len(filenames)) - random.seed(12345) - - random.shuffle(list(range(len(shuffled_index)))) - - filenames = [filenames[i] for i in shuffled_index] - synsets = [synsets[i] for i in shuffled_index] - labels = [labels[i] for i in shuffled_index] - - print('Found %d JPEG files across %d labels inside %s.' % - (len(filenames), len(challenge_synsets), data_dir)) - return filenames, synsets, labels - - -def _find_human_readable_labels(synsets, synset_to_human): - """Build a list of human-readable labels. - - Args: - synsets: list of strings; each string is a unique WordNet ID. - synset_to_human: dict of synset to human labels, e.g., - 'n02119022' --> 'red fox, Vulpes vulpes' - - Returns: - List of human-readable strings corresponding to each synset. - """ - humans = [] - for s in synsets: - assert s in synset_to_human, ('Failed to find: %s' % s) - humans.append(synset_to_human[s]) - return humans - - -def _process_dataset(name, directory, num_shards, synset_to_human): - """Process a complete data set and save it as a TFRecord. - - Args: - name: string, unique identifier specifying the data set. - directory: string, root path to the data set. - num_shards: integer number of shards for this data set. - synset_to_human: dict of synset to human labels, e.g., - 'n02119022' --> 'red fox, Vulpes vulpes' - """ - filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file) - humans = _find_human_readable_labels(synsets, synset_to_human) - - _process_image_files(name, filenames, synsets, labels, - humans, num_shards) - - -def _build_synset_lookup(imagenet_metadata_file): - """Build lookup for synset to human-readable label. - - Args: - imagenet_metadata_file: string, path to file containing mapping from - synset to human-readable label. - - Assumes each line of the file looks like: - - n02119247 black fox - n02119359 silver fox - n02119477 red fox, Vulpes fulva - - where each line corresponds to a unique mapping. Note that each line is - formatted as \t. - - Returns: - Dictionary of synset to human labels, such as: - 'n02119022' --> 'red fox, Vulpes vulpes' - """ - lines = tf.compat.v1.gfile.FastGFile(imagenet_metadata_file, 'r').readlines() - synset_to_human = {} - for l in lines: - if l: - parts = l.strip().split('\t') - assert len(parts) == 2 - synset = parts[0] - human = parts[1] - synset_to_human[synset] = human - return synset_to_human - - -def main(unused_argv): - assert not FLAGS.shards % FLAGS.num_threads, ( - 'Please make the FLAGS.num_threads commensurate with FLAGS.shards') - - print('Saving results to %s' % FLAGS.output_directory) - - # Build a map from synset to human-readable label. - synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file) - - if(FLAGS.raw_directory != None): - _process_dataset(FLAGS.subset, FLAGS.raw_directory,FLAGS.shards, synset_to_human) - -if __name__ == '__main__': - tf.compat.v1.app.run() diff --git a/examples/tensorflow/tf2onnx/imagenet_prepare/download_and_convert_imagenet.sh b/examples/tensorflow/tf2onnx/imagenet_prepare/download_and_convert_imagenet.sh deleted file mode 100644 index f9baa85ab07..00000000000 --- a/examples/tensorflow/tf2onnx/imagenet_prepare/download_and_convert_imagenet.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/bash -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Script to download and preprocess ImageNet Challenge 2012 -# training and validation data set. -# -# The final output of this script are sharded TFRecord files containing -# serialized Example protocol buffers. See build_imagenet_data.py for -# details of how the Example protocol buffers contain the ImageNet data. -# -# The final output of this script appears as such: -# -# data_dir/train-00000-of-01024 -# data_dir/train-00001-of-01024 -# ... -# data_dir/train-00127-of-01024 -# -# and -# -# data_dir/validation-00000-of-00128 -# data_dir/validation-00001-of-00128 -# ... -# data_dir/validation-00127-of-00128 -# -# Note that this script may take several hours to run to completion. The -# conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending -# on the speed of your machine. Please be patient. -# -# **IMPORTANT** -# To download the raw images, the user must create an account with image-net.org -# and generate a username and access_key. The latter two are required for -# downloading the raw images. -# - -set -e - -if [ -z "$1" ]; then - echo "usage download_and_convert_imagenet.sh [data dir]" - exit -fi - -# Create the output and temporary directories. -DATA_DIR="${1%/}" -SCRATCH_DIR="${DATA_DIR}/raw-data/" -mkdir -p "${DATA_DIR}" -mkdir -p "${SCRATCH_DIR}" -WORK_DIR="$0.runfiles/__main__" - -# Download the ImageNet data. -LABELS_FILE="${WORK_DIR}/datasets/imagenet_lsvrc_2015_synsets.txt" -DOWNLOAD_SCRIPT="${WORK_DIR}/datasets/download_imagenet.sh" -"${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}" - -# Note the locations of the train and validation data. -TRAIN_DIRECTORY="${SCRATCH_DIR}train/" -VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/" - -# Preprocess the validation data by moving the images into the appropriate -# sub-directory based on the label (synset) of the image. -echo "Organizing the validation data into sub-directories." -PREPROCESS_VAL_SCRIPT="${WORK_DIR}/datasets/preprocess_imagenet_validation_data.py" -VAL_LABELS_FILE="${WORK_DIR}/datasets/imagenet_2012_validation_synset_labels.txt" - -"${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}" - -# Convert the XML files for bounding box annotations into a single CSV. -echo "Extracting bounding box information from XML." -BOUNDING_BOX_SCRIPT="${WORK_DIR}/datasets/process_bounding_boxes.py" -BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv" -BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/" - -"${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \ - | sort >"${BOUNDING_BOX_FILE}" -echo "Finished downloading and preprocessing the ImageNet data." - -# Build the TFRecords version of the ImageNet data. -BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data" -OUTPUT_DIRECTORY="${DATA_DIR}" -IMAGENET_METADATA_FILE="${WORK_DIR}/datasets/imagenet_metadata.txt" - -"${BUILD_SCRIPT}" \ - --train_directory="${TRAIN_DIRECTORY}" \ - --validation_directory="${VALIDATION_DIRECTORY}" \ - --output_directory="${OUTPUT_DIRECTORY}" \ - --imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \ - --labels_file="${LABELS_FILE}" \ - --bounding_box_file="${BOUNDING_BOX_FILE}" diff --git a/examples/tensorflow/tf2onnx/imagenet_prepare/download_imagenet.sh b/examples/tensorflow/tf2onnx/imagenet_prepare/download_imagenet.sh deleted file mode 100644 index c780e179f93..00000000000 --- a/examples/tensorflow/tf2onnx/imagenet_prepare/download_imagenet.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Script to download ImageNet Challenge 2012 training and validation data set. -# -# Downloads and decompresses raw images and bounding boxes. -# -# **IMPORTANT** -# To download the raw images, the user must create an account with image-net.org -# and generate a username and access_key. The latter two are required for -# downloading the raw images. -# -# usage: -# ./download_imagenet.sh [dirname] -set -e - -if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then - cat < Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1` - - 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo - ```shell - python freeze_graph.py \ - --input_graph=/tmp/mobilenet_v2_inf_graph.pb \ - --input_checkpoint=./mobilenet_v2.ckpt \ - --input_binary=true \ - --output_graph=./frozen_mobilenet_v2.pb \ - --output_node_names=MobilenetV2/Predictions/Reshape_1 - ``` - - -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/tf2onnx/ - # convert validation subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd mobilenet_v2/fp32_export - ``` - -# Run Command -Please note the dataset is TF records format for running benchmark. - -## Export Tensorflow FP32 model to ONNX FP32 model -```shell -bash run_export.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2.onnx -``` - -## Run benchmark for Tensorflow FP32 model -```shell -bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./frozen_mobilenet_v2.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` - -## Run benchmark for ONNX FP32 model -```shell -bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./mobilenet_v2.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py deleted file mode 100644 index 69561478cfc..00000000000 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/main.py +++ /dev/null @@ -1,119 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from argparse import ArgumentParser -import tensorflow as tf -import onnx -import os -import onnxruntime as ort -import numpy as np - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - input_names = [i.name for i in sess.get_inputs()] - for input_data, label in dataloader: - output = sess.run(None, dict(zip(input_names, [input_data]))) - if postprocess: - output, label = postprocess((output, label)) - metric.update(output, label) - return metric.result() - -def eval_func_tf(model, dataloader, metric, postprocess=None): - from neural_compressor.model import Model - model = Model(model) - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - metric.update(predictions, labels) - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def __init__(self): - """Initilization.""" - arg_parser = ArgumentParser(description='Parse args') - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') - arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') - arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') - self.args = arg_parser.parse_args() - - def run(self): - """This is neural_compressor function include export and benchmark option.""" - if self.args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(self.args.input_graph) - config = TF2ONNXConfig(dtype="fp32", input_names='input[-1,224,224,3]') - inc_model.export(self.args.output_graph, config) - - if self.args.benchmark: - if self.args.input_graph.endswith('.onnx'): - model = onnx.load(self.args.input_graph) - else: - model = self.args.input_graph - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, - 'transform': {'BilinearImagenet': {'height': 224, 'width': 224}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, top1) - else: - return eval_func_onnx(model, dataloader, top1) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(self.args.input_graph, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt deleted file mode 100644 index 16783f94457..00000000000 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tensorflow==2.11.0 -tf2onnx==1.13.0 -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' -onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/fp32_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md deleted file mode 100644 index 9f6e9374c55..00000000000 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/README.md +++ /dev/null @@ -1,107 +0,0 @@ -Step-by-Step -============ - -This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. - - -# Prerequisite - -## 1. Environment - -### Installation -Recommend python 3.8 or higher version. -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### Install requirements -The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. -The Intel Extension for Tensorflow for Intel CPUs is installed as default. -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Pretrained model - -The mobilenet_v2 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). -We can get the pb file by convert the checkpoint file. - - 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) - ```shell - wget https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz - tar -xvf mobilenet_v2_1.4_224.tgz - ``` - - 2. Exporting the Inference Graph - ```shell - git clone https://github.com/tensorflow/models - cd models/research/slim - python export_inference_graph.py \ - --alsologtostderr \ - --model_name=mobilenet_v2 \ - --output_file=/tmp/mobilenet_v2_inf_graph.pb - ``` - Make sure to use intel-tensorflow v1.15, and pip install tf_slim. - #### Install Intel Tensorflow 1.15 up2 - Check your python version and use pip install 1.15.0 up2 from links below: - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl - > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1` - - 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo - ```shell - python freeze_graph.py \ - --input_graph=/tmp/mobilenet_v2_inf_graph.pb \ - --input_checkpoint=./mobilenet_v2.ckpt \ - --input_binary=true \ - --output_graph=./frozen_mobilenet_v2.pb \ - --output_node_names=MobilenetV2/Predictions/Reshape_1 - ``` - -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/tf2onnx/ - # convert validation subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd mobilenet_v2/int8_export - ``` - -# Run Command -Please note the dataset is TF records format for running quantization and benchmark. - -## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model -```shell -bash run_tuning.sh --input_model=./frozen_mobilenet_v2.pb --output_model=./mobilenet_v2_int8.pb --dataset_location=/path/to/imagenet/ -``` - -## Run benchmark for Tensorflow INT8 QDQ model -```shell -bash run_benchmark.sh --input_model=./mobilenet_v2_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./mobilenet_v2_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` - -## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model -```shell -bash run_export.sh --input_model=./mobilenet_v2_int8.pb --output_model=./mobilenet_v2_int8.onnx -``` - -## Run benchmark for ONNX INT8 QDQ model -```shell -bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./mobilenet_v2_int8.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py deleted file mode 100644 index b60042374ca..00000000000 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/main.py +++ /dev/null @@ -1,137 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from argparse import ArgumentParser -import tensorflow as tf -import onnx -import os -import onnxruntime as ort -import numpy as np - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - input_names = [i.name for i in sess.get_inputs()] - for input_data, label in dataloader: - output = sess.run(None, dict(zip(input_names, [input_data]))) - if postprocess: - output, label = postprocess((output, label)) - metric.update(output, label) - return metric.result() - -def eval_func_tf(model, dataloader, metric, postprocess=None): - from neural_compressor.model import Model - model = Model(model) - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - metric.update(predictions, labels) - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def __init__(self): - """Initilization.""" - arg_parser = ArgumentParser(description='Parse args') - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') - arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') - arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') - arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') - self.args = arg_parser.parse_args() - - def run(self): - """This is neural_compressor function include tuning, export and benchmark option.""" - if self.args.tune: - from neural_compressor import quantization - from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': 10, - 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, - 'transform': {'BilinearImagenet': - {'height': 224, 'width': 224}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100]) - q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=dataloader, - eval_dataloader=dataloader) - q_model.save(self.args.output_graph) - - if self.args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(self.args.input_graph) - config = TF2ONNXConfig(dtype="int8", input_names='input[-1,224,224,3]') - inc_model.export(self.args.output_graph, config) - - if self.args.benchmark: - if self.args.input_graph.endswith('.onnx'): - model = onnx.load(self.args.input_graph) - else: - model = self.args.input_graph - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"ImageRecord": {'root': self.args.dataset_location}}, - 'transform': {'BilinearImagenet': {'height': 224, 'width': 224}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, top1) - else: - return eval_func_onnx(model, dataloader, top1) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(self.args.input_graph, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt deleted file mode 100644 index b964010af83..00000000000 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tensorflow==2.11.0 -tf2onnx==1.13.0 -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' -onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_tuning.sh deleted file mode 100644 index 6a9e1b859c9..00000000000 --- a/examples/tensorflow/tf2onnx/mobilenet_v2/int8_export/run_tuning.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --dataset_location ${dataset_location} \ - --tune -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/prepare_coco_dataset.sh b/examples/tensorflow/tf2onnx/prepare_coco_dataset.sh deleted file mode 100644 index fea0ff1c373..00000000000 --- a/examples/tensorflow/tf2onnx/prepare_coco_dataset.sh +++ /dev/null @@ -1,136 +0,0 @@ -!/bin/bash -# set -x - -DATA_DIR="${PWD}/data" -DATA_NAME="val2017" -DATA_URL_LIST='http://images.cocodataset.org/zips/val2017.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip' -PACKAGES_LIST='val2017.zip annotations_trainval2017.zip' -VAL_IMAGE_DIR=$DATA_DIR/val2017 -TRAIN_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json -VAL_ANNOTATIONS_FILE=$DATA_DIR/annotations/instances_val2017.json -TESTDEV_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json -OUTPUT_DIR=$DATA_DIR - -help() -{ - cat <<- EOF - - Desc: Prepare dataset for Tensorflow COCO object detection. - - -h --help help info - - --dataset_location set dataset location, default is ./data - -EOF - exit 0 -} - -function main { - init_params "$@" - download_dataset - convert_to_tf_record -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --dataset_location=*) - DATA_DIR=$(echo "$var" |cut -f2 -d=) - ;; - -h|--help) help - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done - -} - -# removes files that will not be used anymore -function remove_zipped_packages { - for package in $PACKAGES_LIST; do - rm "$package" - done -} - -function download_tf_models_repo { - if [ ! -d models ]; then - git clone https://github.com/tensorflow/models.git - fi - cd models || exit - git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 - cd .. -} - -function divide_tf_records_by_dataset { - if [ ! -d "${DATA_DIR}/tf_test2017" ]; then - mkdir "${DATA_DIR}/tf_test2017" - fi - if [ ! -d "${DATA_DIR}/tf_train2017" ]; then - mkdir "${DATA_DIR}/tf_train2017" - fi - if [ ! -d "${DATA_DIR}/tf_val2017" ]; then - mkdir "${DATA_DIR}/tf_val2017" - fi - mv ${DATA_DIR}/coco_testdev.record* ${DATA_DIR}/tf_test2017 - mv ${DATA_DIR}/coco_train.record* ${DATA_DIR}/tf_train2017 - mv ${DATA_DIR}/coco_val.record* ${DATA_DIR}/tf_val2017 -} - -function convert { - cd models/research - protoc object_detection/protos/*.proto --python_out=. - export PYTHONPATH=$PYTHONPATH:$(pwd) - export PYTHONPATH=$PYTHONPATH:$(pwd)/slim - python ./object_detection/dataset_tools/create_coco_tf_record.py --logtostderr \ - --train_image_dir=empty_dir \ - --val_image_dir="${VAL_IMAGE_DIR}" \ - --test_image_dir=empty_dir \ - --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \ - --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \ - --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \ - --output_dir="${OUTPUT_DIR}" -} - -function convert_to_tf_record { - download_tf_models_repo - convert - divide_tf_records_by_dataset -} - -# download_dataset -function download_dataset { - if [ ! -d "${DATA_DIR}" ]; then - mkdir "${DATA_DIR}" - fi - - cd "${DATA_DIR}" || exit - if [ ! -f "${VAL_IMAGE_DIR}" ]; then - - for dataset_dowload_link in $DATA_URL_LIST; do - wget "$dataset_dowload_link" - done - for package in $PACKAGES_LIST; do - unzip -o "$package" - done - remove_zipped_packages - if [ ! -d empty_dir ]; then - mkdir empty_dir - fi - - cd annotations || exit - echo "{ \"images\": {}, \"categories\": {}}" > empty.json - cd .. - else - echo "Dataset ${DATA_NAME} is exist!" - fi - - cd ../ -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/prepare_imagenet_dataset.sh b/examples/tensorflow/tf2onnx/prepare_imagenet_dataset.sh deleted file mode 100644 index 4aad5d69a3f..00000000000 --- a/examples/tensorflow/tf2onnx/prepare_imagenet_dataset.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash -# set -x - -OUTPUT_DIR="./data" -SUBSET="validation" -SHARDS=1 - -help() -{ - cat <<- EOF - Desc: Convert prepared raw imagnet dataset to tfrecord - -h --help help info - --output_dir Output data directory - default: './data' - --raw_dir Raw data directory - --shards Number of shards in TFRecord files. - default: '1' - --subset Subset of imagenet, can be validation/train. - default: 'validation' -EOF - exit 0 -} - -function main { - init_params "$@" - convert_dataset -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --output_dir=*) - OUTPUT_DIR=$(echo $var |cut -f2 -d=) - ;; - --raw_dir=*) - RAW_DIR=$(echo $var |cut -f2 -d=) - ;; - --shards=*) - SHARDS=$(echo $var |cut -f2 -d=) - ;; - --subset=*) - SUBSET=$(echo $var |cut -f2 -d=) - ;; - -h|--help) help - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done -} - -# convert dataset -function convert_dataset { - if [ ! -d ${OUTPUT_DIR} ]; then - mkdir ${OUTPUT_DIR} - fi - python imagenet_prepare/build_imagenet_data.py \ - --imagenet_metadata_file "imagenet_prepare/imagenet_metadata.txt" \ - --labels_file "imagenet_prepare/imagenet_lsvrc_2015_synsets.txt" \ - --output_directory ${OUTPUT_DIR} \ - --subset ${SUBSET} \ - --raw_directory ${RAW_DIR} \ - --shards ${SHARDS} -} - -main "$@" - diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md deleted file mode 100644 index c2afde82d4c..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/README.md +++ /dev/null @@ -1,66 +0,0 @@ -Step-by-Step -============ - -This document is used to show how to export Tensorflow ResNet50_V1.0 FP32 model to ONNX FP32 model using Intel® Neural Compressor. -> Note: Validated Framework [Versions](/docs/source/installation_guide.md#validated-software-environment). - -# Prerequisite - -## 1. Environment - -### Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### Install requirements -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -### 2. Prepare Pretrained model - -```shell -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb -``` - -### 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/tf2onnx/ - # convert validation subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd resnet50_v1.0/fp32_export - ``` - -# Run Command -Please note the dataset is TF records format for running benchmark. - -## Export Tensorflow FP32 model to ONNX FP32 model -```shell -bash run_export.sh --input_model=./resnet50_fp32_pretrained_model.pb --output_model=./resnet50_v1.onnx -``` - -## Run benchmark for Tensorflow FP32 model -```shell -bash run_benchmark.sh --input_model=./resnet50_fp32_pretrained_model.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./resnet50_fp32_pretrained_model.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` - -## Run benchmark for ONNX FP32 model -```shell -bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/main.py deleted file mode 100644 index e7c9da59897..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/main.py +++ /dev/null @@ -1,123 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from argparse import ArgumentParser -import tensorflow as tf -import onnx -import numpy as np -import onnxruntime as ort - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - input_names = [i.name for i in sess.get_inputs()] - for input_data, label in dataloader: - output = sess.run(None, dict(zip(input_names, [input_data]))) - if postprocess: - output, label = postprocess((output, label)) - metric.update(output, label) - return metric.result() - -def eval_func_tf(model, dataloader, metric, postprocess=None): - from neural_compressor.model import Model - model = Model(model) - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - if postprocess: - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def __init__(self): - """Initilization.""" - arg_parser = ArgumentParser(description='Parse args') - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') - arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') - arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') - self.args = arg_parser.parse_args() - - def run(self): - """This is neural_compressor function include export and benchmark option.""" - if self.args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(self.args.input_graph) - config = TF2ONNXConfig(dtype="fp32") - inc_model.export(self.args.output_graph, config) - - if self.args.benchmark: - if self.args.input_graph.endswith('.onnx'): - model = onnx.load(self.args.input_graph) - else: - model = self.args.input_graph - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': 32, - 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - - def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, top1) - else: - return eval_func_onnx(model, dataloader, top1) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt deleted file mode 100644 index f9eecbb4d57..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -tensorflow -intel-extension-for-tensorflow[cpu] -tf2onnx -onnx -onnxruntime -onnxruntime-extensions; python_version < '3.10' diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/fp32_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md deleted file mode 100644 index 6cdaf98ab2b..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/README.md +++ /dev/null @@ -1,75 +0,0 @@ -Step-by-Step -============ - -This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. -> Note: Validated Framework [Versions](/docs/source/installation_guide.md#validated-software-environment). - -# Prerequisite - -## 1. Environment - -### Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### Install requirements -The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. -The Intel Extension for Tensorflow for Intel CPUs is installed as default. -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2 Prepare Pretrained model - -```shell -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb -``` - - -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/tf2onnx/ - # convert validation subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd resnet50_v1.0/int8_export - ``` - -# Run Command -Please note the dataset is TF records format for running quantization and benchmark. - -## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model -```shell -bash run_tuning.sh --input_model=./resnet50_fp32_pretrained_model.pb --output_model=./resnet50_v1_int8.pb --dataset_location=/path/to/imagenet/ -``` - -## Run benchmark for Tensorflow INT8 QDQ model -```shell -bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` - -## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model -```shell -bash run_export.sh --input_model=./resnet50_v1_int8.pb --output_model=./resnet50_v1_int8.onnx -``` - -## Run benchmark for ONNX INT8 QDQ model -```shell -bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` - diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py deleted file mode 100644 index a8564cc3e84..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/main.py +++ /dev/null @@ -1,160 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from argparse import ArgumentParser -import tensorflow as tf -import onnx -import numpy as np -import onnxruntime as ort - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - input_names = [i.name for i in sess.get_inputs()] - for input_data, label in dataloader: - output = sess.run(None, dict(zip(input_names, [input_data]))) - if postprocess: - output, label = postprocess((output, label)) - metric.update(output, label) - return metric.result() - -def eval_func_tf(model, dataloader, metric, postprocess=None): - from neural_compressor.model import Model - model = Model(model) - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - if postprocess: - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def __init__(self): - """Initilization.""" - arg_parser = ArgumentParser(description='Parse args') - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') - arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') - arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') - arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') - self.args = arg_parser.parse_args() - - def run(self): - """This is neural_compressor function include tuning, export and benchmark option.""" - if self.args.tune: - from neural_compressor import quantization - from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion - from neural_compressor.utils.create_obj_from_config import create_dataloader - calib_dataloader_args = { - 'batch_size': 10, - 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224}}, - 'filter': None - } - calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) - eval_dataloader_args = { - 'batch_size': 32, - 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224}}, - 'filter': None - } - eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) - op_name_list = { - 'resnet_model/dense/MatMul': - { - 'activation': {'dtype': ['fp32']}, - 'weight': {'dtype': ['fp32']}, - } - } - conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], - outputs=['softmax_tensor'], - op_name_list=op_name_list) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader, eval_metric=top1) - q_model.save(self.args.output_graph) - - if self.args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(self.args.input_graph) - config = TF2ONNXConfig(dtype="int8", inputs_as_nchw="input_tensor:0") - inc_model.export(self.args.output_graph, config) - - if self.args.benchmark: - if self.args.input_graph.endswith('.onnx'): - model = onnx.load(self.args.input_graph) - else: - model = self.args.input_graph - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': 32, - 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - - def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, top1) - else: - return eval_func_onnx(model, dataloader, top1) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt deleted file mode 100644 index b5e9f4ddb52..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -tensorflow -intel-extension-for-tensorflow[cpu] -tf2onnx -onnx -onnxruntime -onnxruntime-extensions; python_version < '3.10' \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_tuning.sh deleted file mode 100644 index 6a9e1b859c9..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.0/int8_export/run_tuning.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --dataset_location ${dataset_location} \ - --tune -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md deleted file mode 100644 index 77b97e92057..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/README.md +++ /dev/null @@ -1,67 +0,0 @@ -Step-by-Step -============ - -This document is used to show how to export Tensorflow ResNet50 FP32 model to ONNX FP32 model using Intel® Neural Compressor. - - -# Prerequisite - -## 1. Environment - -### Installation -Recommend python 3.8 or higher version. -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### Install requirements -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Pretrained model - -```bash -wget https://zenodo.org/record/2535873/files/resnet50_v1.pb -``` - -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/tf2onnx/ - # convert validation subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd resnet50_v1.5/fp32_export - ``` - -# Run Command -Please note the dataset is TF records format for running benchmark. - -## Export Tensorflow FP32 model to ONNX FP32 model -```shell -bash run_export.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1.onnx -``` - -## Run benchmark for Tensorflow FP32 model -```shell -bash run_benchmark.sh --input_model=./resnet50_v1.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./resnet50_v1.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` - -## Run benchmark for ONNX FP32 model -```shell -bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./resnet50_v1.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py deleted file mode 100644 index 77a34568144..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/main.py +++ /dev/null @@ -1,122 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from argparse import ArgumentParser -import tensorflow as tf -import onnx -import os -import onnxruntime as ort -import numpy as np - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - input_names = [i.name for i in sess.get_inputs()] - for input_data, label in dataloader: - output = sess.run(None, dict(zip(input_names, [input_data]))) - if postprocess: - output, label = postprocess((output, label)) - metric.update(output[1], label) - return metric.result() - -def eval_func_tf(model, dataloader, metric, postprocess=None): - from neural_compressor.model import Model - model = Model(model) - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - metric.update(predictions[1], labels) - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def __init__(self): - """Initilization.""" - arg_parser = ArgumentParser(description='Parse args') - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') - arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') - arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') - self.args = arg_parser.parse_args() - - def run(self): - """This is neural_compressor function include export and benchmark option.""" - if self.args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(self.args.input_graph) - config = TF2ONNXConfig(dtype="fp32") - inc_model.export(self.args.output_graph, config) - - if self.args.benchmark: - if self.args.input_graph.endswith('.onnx'): - model = onnx.load(self.args.input_graph) - else: - model = self.args.input_graph - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': 32, - 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - - def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, top1) - else: - return eval_func_onnx(model, dataloader, top1) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt deleted file mode 100644 index 16783f94457..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tensorflow==2.11.0 -tf2onnx==1.13.0 -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' -onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/fp32_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md deleted file mode 100644 index 69811e0e347..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/README.md +++ /dev/null @@ -1,74 +0,0 @@ -Step-by-Step -============ - -This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. - - -# Prerequisite - -## 1. Environment - -### Installation -Recommend python 3.8 or higher version. -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### Install requirements -The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. -The Intel Extension for Tensorflow for Intel CPUs is installed as default. -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Pretrained model - -```bash -wget https://zenodo.org/record/2535873/files/resnet50_v1.pb -``` - -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/tf2onnx/ - # convert validation subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd resnet50_v1.5/int8_export - ``` - -# Run Command -Please note the dataset is TF records format for running quantization and benchmark. - -## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model -```shell -bash run_tuning.sh --input_model=./resnet50_v1.pb --output_model=./resnet50_v1_int8.pb --dataset_location=/path/to/imagenet/ -``` - -## Run benchmark for Tensorflow INT8 model -```shell -bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./resnet50_v1_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` - -## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model -```shell -bash run_export.sh --input_model=./resnet50_v1_int8.pb --output_model=./resnet50_v1_int8.onnx -``` - -## Run benchmark for ONNX INT8 QDQ model -```shell -bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./resnet50_v1_int8.onnx --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py deleted file mode 100644 index 511abcc0901..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/main.py +++ /dev/null @@ -1,160 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from argparse import ArgumentParser -import tensorflow as tf -import onnx -import os -import onnxruntime as ort -import numpy as np - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - input_names = [i.name for i in sess.get_inputs()] - for input_data, label in dataloader: - output = sess.run(None, dict(zip(input_names, [input_data]))) - if postprocess: - output, label = postprocess((output, label)) - metric.update(output, label) - return metric.result() - -def eval_func_tf(model, dataloader, metric, postprocess=None): - from neural_compressor.model import Model - model = Model(model) - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - metric.update(predictions, labels) - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def __init__(self): - """Initilization.""" - arg_parser = ArgumentParser(description='Parse args') - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') - arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') - arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') - arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') - self.args = arg_parser.parse_args() - - def run(self): - """This is neural_compressor function include tuning, export and benchmark option.""" - if self.args.tune: - from neural_compressor import quantization - from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion - from neural_compressor.utils.create_obj_from_config import create_dataloader - calib_dataloader_args = { - 'batch_size': 10, - 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) - eval_dataloader_args = { - 'batch_size': 32, - 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) - op_name_list = { - 'resnet_model/dense/MatMul': - { - 'activation': {'dtype': ['fp32']}, - 'weight': {'dtype': ['fp32']}, - } - } - conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], - outputs=['softmax_tensor'], - accuracy_criterion = AccuracyCriterion(tolerable_loss=0.3), - op_name_list=op_name_list) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader, eval_metric=top1) - q_model.save(self.args.output_graph) - - if self.args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(self.args.input_graph) - config = TF2ONNXConfig(dtype="int8") - inc_model.export(self.args.output_graph, config) - - if self.args.benchmark: - if self.args.input_graph.endswith('.onnx'): - model = onnx.load(self.args.input_graph) - else: - model = self.args.input_graph - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': 32, - 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) - - def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, top1) - else: - return eval_func_onnx(model, dataloader, top1) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt deleted file mode 100644 index b964010af83..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tensorflow==2.11.0 -tf2onnx==1.13.0 -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' -onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_tuning.sh deleted file mode 100644 index 6a9e1b859c9..00000000000 --- a/examples/tensorflow/tf2onnx/resnet50v1.5/int8_export/run_tuning.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --dataset_location ${dataset_location} \ - --tune -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md deleted file mode 100644 index c48d59b35a2..00000000000 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/README.md +++ /dev/null @@ -1,76 +0,0 @@ -Step-by-Step -============ - -This document is used to show how to export Tensorflow ssd_mobilenet_v1 FP32 model to ONNX FP32 model using Intel® Neural Compressor. - - -# Prerequisite - -## 1. Environment - -### Installation -Recommend python 3.8 or higher version. -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### Install requirements -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Pretrained model - -```shell -export MODEL=ssd_mobilenet_v1_coco_2018_01_28 -wget http://download.tensorflow.org/models/object_detection/$MODEL.tar.gz -tar -xvf $MODEL.tar.gz -``` - -## 3. Prepare Dataset - -### Automatic dataset download - -> **_Note: `prepare_coco_dataset.sh` script works with TF version 1.x._** - -Run the `prepare_coco_dataset.sh` script located in `examples/tensorflow/tf2onnx`. - -Usage: -```shell -cd examples/tensorflow/tf2onnx/ -bash prepare_coco_dataset.sh -cd ssd_mobilenet_v1/fp32_export -``` - -This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to -tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. - -### Manual dataset download -Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). - -# Run Command -Please note the dataset is TF records format for running benchmark. - -## Export Tensorflow FP32 model to ONNX FP32 model -```shell -bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx -``` - -## Run benchmark for Tensorflow FP32 model -```shell -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 -``` - -## Run benchmark for ONNX FP32 model -```shell -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 -``` diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py deleted file mode 100644 index dedd3db6ee3..00000000000 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/main.py +++ /dev/null @@ -1,157 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from argparse import ArgumentParser -import tensorflow as tf -import onnx -import onnxruntime as ort -import numpy as np - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - ort_inputs = {} - len_inputs = len(session.get_inputs()) - inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] - for inputs, labels in dataloader: - if not isinstance(labels, list): - labels = [labels] - if len_inputs == 1: - ort_inputs.update( - inputs if isinstance(inputs, dict) else {inputs_names[0]: np.array(inputs,dtype=np.uint8)} - ) - else: - assert len_inputs == len(inputs), \ - 'number of input tensors must align with graph inputs' - - if isinstance(inputs, dict): # pragma: no cover - ort_inputs.update(inputs) - else: - for i in range(len_inputs): - # in case dataloader contains non-array input - if not isinstance(inputs[i], np.ndarray): - ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) - else: - ort_inputs.update({inputs_names[i]: inputs[i]}) - - predictions = session.run(None, ort_inputs) - - if postprocess is not None: - predictions, labels = postprocess((predictions, labels)) - - if not hasattr(metric, "compare_label") or \ - (hasattr(metric, "compare_label") and metric.compare_label): - metric.update(predictions, labels) - acc = metric.result() - return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] - -def eval_func_tf(model, dataloader, metric, postprocess=None): - metric.reset() - - from neural_compressor.model import Model - if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): - model = Model(model) - model.input_tensor_names = ["image_tensor:0"] - model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ - "detection_scores:0", "detection_classes:0"] - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - metric.update(predictions, labels) - - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def __init__(self): - """Initilization.""" - arg_parser = ArgumentParser(description='Parse args') - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') - arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') - arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') - self.args = arg_parser.parse_args() - - def run(self): - """This is neural_compressor function include export and benchmark option.""" - if self.args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(self.args.input_graph) - inc_model.input_tensor_names = ["image_tensor"] - inc_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] - config = TF2ONNXConfig(dtype="fp32") - inc_model.export(self.args.output_graph, config) - - if self.args.benchmark: - if self.args.input_graph.endswith('.onnx'): - model = onnx.load(self.args.input_graph) - else: - model = self.args.input_graph - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 300}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - - def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, mAP2) - else: - return eval_func_onnx(model, dataloader, mAP2) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt deleted file mode 100644 index 16783f94457..00000000000 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tensorflow==2.11.0 -tf2onnx==1.13.0 -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' -onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/fp32_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md deleted file mode 100644 index 83d4edbabf4..00000000000 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/README.md +++ /dev/null @@ -1,81 +0,0 @@ -Step-by-Step -============ - -This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. - - -# Prerequisite - -## 1. Environment - -### Installation -Recommend python 3.8 or higher version. -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### Install requirements -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Pretrained model - -```bash -export MODEL=ssd_mobilenet_v1_coco_2018_01_28 -wget http://download.tensorflow.org/models/object_detection/$MODEL.tar.gz -tar -xvf $MODEL.tar.gz -``` - -## 3. Prepare Dataset - -### Automatic dataset download - -> **_Note: `prepare_coco_dataset.sh` script works with TF version 1.x._** - -Run the `prepare_coco_dataset.sh` script located in `examples/tensorflow/tf2onnx`. - -Usage: -```shell -cd examples/tensorflow/tf2onnx/ -bash prepare_coco_dataset.sh -cd ssd_mobilenet_v1/int8_export -``` - -This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to -tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. - -### Manual dataset download -Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). - -# Run Command -Please note the dataset is TF records format for running quantization and benchmark. - -## Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model -```shell -bash run_tuning.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28 --output_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --dataset_location=/path/to/coco_dataset/ -``` - -## Run benchmark for Tensorflow INT8 QDQ model -```shell -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 -``` - -## Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model -```shell -bash run_export.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.pb --output_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx -``` - -## Run benchmark for ONNX INT8 QDQ model -```shell -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=accuracy --dataset_location=/path/to/coco_dataset/ --batch_size=16 -bash run_benchmark.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28_int8.onnx --mode=performance --dataset_location=/path/to/coco_dataset/ --batch_size=16 -``` diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py deleted file mode 100644 index 10e0cb1e754..00000000000 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/main.py +++ /dev/null @@ -1,187 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from argparse import ArgumentParser -import tensorflow as tf -import onnx -import os -import onnxruntime as ort -import numpy as np - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - ort_inputs = {} - len_inputs = len(session.get_inputs()) - inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] - for inputs, labels in dataloader: - if not isinstance(labels, list): - labels = [labels] - if len_inputs == 1: - ort_inputs.update( - inputs if isinstance(inputs, dict) else {inputs_names[0]: np.array(inputs,dtype=np.uint8)} - ) - else: - assert len_inputs == len(inputs), \ - 'number of input tensors must align with graph inputs' - - if isinstance(inputs, dict): # pragma: no cover - ort_inputs.update(inputs) - else: - for i in range(len_inputs): - # in case dataloader contains non-array input - if not isinstance(inputs[i], np.ndarray): - ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) - else: - ort_inputs.update({inputs_names[i]: inputs[i]}) - - predictions = session.run(None, ort_inputs) - - if postprocess is not None: - predictions, labels = postprocess((predictions, labels)) - - if not hasattr(metric, "compare_label") or \ - (hasattr(metric, "compare_label") and metric.compare_label): - metric.update(predictions, labels) - acc = metric.result() - return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] - -def eval_func_tf(model, dataloader, metric, postprocess=None): - metric.reset() - - from neural_compressor.model import Model - if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): - model = Model(model) - model.input_tensor_names = ["image_tensor:0"] - model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ - "detection_scores:0", "detection_classes:0"] - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - metric.update(predictions, labels) - - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def __init__(self): - """Initilization.""" - arg_parser = ArgumentParser(description='Parse args') - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') - arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') - arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') - arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') - self.args = arg_parser.parse_args() - - def run(self): - """This is neural_compressor function include tuning, export and benchmark option.""" - if self.args.tune: - from neural_compressor import quantization - from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion - from neural_compressor.utils.create_obj_from_config import create_dataloader - calib_dataloader_args = { - 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, - 'transform': None, - 'filter': None - } - calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) - eval_dataloader_args = { - 'batch_size': 10, - 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 300}}, - 'filter': None - } - eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) - conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[10, 50, 100, 200], inputs=['image_tensor'], - outputs=['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes'], - accuracy_criterion = AccuracyCriterion(tolerable_loss=0.1)) - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader, eval_metric=mAP2) - q_model.save(self.args.output_graph) - - if self.args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(self.args.input_graph) - inc_model.input_tensor_names = ["image_tensor"] - inc_model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] - config = TF2ONNXConfig(dtype="int8") - inc_model.export(self.args.output_graph, config) - - if self.args.benchmark: - if self.args.input_graph.endswith('.onnx'): - model = onnx.load(self.args.input_graph) - else: - model = self.args.input_graph - - from neural_compressor.utils.create_obj_from_config import create_dataloader - dataloader_args = { - 'batch_size': self.args.batch_size, - 'dataset': {"COCORecord": {'root':self.args.dataset_location}}, - 'transform': {'Resize': {'size': 300}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', dataloader_args) - - from neural_compressor.metric import COCOmAPv2 - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) - - def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, mAP2) - else: - return eval_func_onnx(model, dataloader, mAP2) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt deleted file mode 100644 index b964010af83..00000000000 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tensorflow==2.11.0 -tf2onnx==1.13.0 -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' -onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_tuning.sh deleted file mode 100644 index 6a9e1b859c9..00000000000 --- a/examples/tensorflow/tf2onnx/ssd_mobilenet_v1/int8_export/run_tuning.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --dataset_location ${dataset_location} \ - --tune -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md b/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md deleted file mode 100644 index fed776aa9d0..00000000000 --- a/examples/tensorflow/tf2onnx/vgg16/fp32_export/README.md +++ /dev/null @@ -1,99 +0,0 @@ -Step-by-Step -============ - -This document is used to show how to export Tensorflow VGG16 FP32 model to ONNX FP32 model using Intel® Neural Compressor. - - -# Prerequisite - -## 1. Environment - -### Install Intel® Neural Compressor -```shell -pip install neural-compressor -``` - -### Install requirements -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Model - -The vgg16 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). -We can get the pb file by convert the checkpoint file. - - 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) - ```shell - wget http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz - tar -xvf vgg_16_2016_08_28.tar.gz - ``` - - 2. Exporting the Inference Graph - ```shell - git clone https://github.com/tensorflow/models - cd models/research/slim - python export_inference_graph.py \ - --alsologtostderr \ - --model_name=vgg_16 \ - --output_file=/tmp/vgg_16_inf_graph.pb - ``` - Make sure to use intel-tensorflow v1.15, and pip install tf_slim. - #### Install Intel Tensorflow 1.15 up2 - Check your python version and use pip install 1.15.0 up2 from links below: - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl - > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed` - - 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo - ```shell - python freeze_graph.py \ - --input_graph=/tmp/vgg_16_inf_graph.pb \ - --input_checkpoint=./vgg_16.ckpt \ - --input_binary=true \ - --output_graph=./frozen_vgg16.pb \ - --output_node_names=vgg_16/fc8/squeezed - ``` - -### 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/tf2onnx/ - # convert validation subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd vgg16/fp32_export - ``` - -## Run Command -Please note the dataset is TF records format for running benchmark. - -### Export Tensorflow FP32 model to ONNX FP32 model -```shell -bash run_export.sh --input_model=./frozen_vgg16.pb --output_model=./vgg_16.onnx -``` - -## Run benchmark for Tensorflow FP32 model -```shell -bash run_benchmark.sh --input_model=./frozen_vgg16.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./frozen_vgg16.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` - -### Run benchmark for ONNX FP32 model -```shell -bash run_benchmark.sh --input_model=./vgg_16.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 -bash run_benchmark.sh --input_model=./vgg_16.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 -``` diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py b/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py deleted file mode 100644 index 7520f37201c..00000000000 --- a/examples/tensorflow/tf2onnx/vgg16/fp32_export/main.py +++ /dev/null @@ -1,124 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import onnx -import numpy as np -import tensorflow as tf -import onnxruntime as ort -from argparse import ArgumentParser -from neural_compressor.data import LabelShift -from neural_compressor.metric import TensorflowTopK -from neural_compressor.utils.create_obj_from_config import create_dataloader - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - input_names = [i.name for i in sess.get_inputs()] - - for input_data, label in dataloader: - output = sess.run(None, dict(zip(input_names, [input_data]))) - - output, label = postprocess((output, label)) - metric.update(output, label) - - acc = metric.result() - return acc - -def eval_func_tf(model, dataloader, metric, postprocess=None): - from neural_compressor.model import Model - model = Model(model) - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def __init__(self): - """Initilization.""" - arg_parser = ArgumentParser(description='Parse args') - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') - arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') - arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') - self.args = arg_parser.parse_args() - - def run(self): - """This is neural_compressor function include export and benchmark option.""" - top1 = TensorflowTopK(k=1) - postprocess = LabelShift(label_shift=1) - if self.args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(self.args.input_graph) - config = TF2ONNXConfig(dtype="fp32") - inc_model.export(self.args.output_graph, config) - - if self.args.benchmark: - if self.args.input_graph.endswith('.onnx'): - model = onnx.load(self.args.input_graph) - else: - model = self.args.input_graph - eval_dataloader_args = { - 'batch_size': 32, - 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) - - def eval(model): - if isinstance(model, str): - return eval_func_tf(model, eval_dataloader, top1, postprocess) - else: - return eval_func_onnx(model, eval_dataloader, top1, postprocess) - - if self.args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=eval_dataloader) - elif self.args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % eval_dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt b/examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt deleted file mode 100644 index 16783f94457..00000000000 --- a/examples/tensorflow/tf2onnx/vgg16/fp32_export/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tensorflow==2.11.0 -tf2onnx==1.13.0 -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' -onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_export.sh b/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/vgg16/fp32_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md b/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md deleted file mode 100644 index 17d5150779e..00000000000 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/README.md +++ /dev/null @@ -1,106 +0,0 @@ -Step-by-Step -============ - -This document is used to show how to export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model using Intel® Neural Compressor. - - -# Prerequisite - -## 1. Environment - -### Install Intel® Neural Compressor -```shell -pip install neural-compressor -``` - -### Install requirements -The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this export ONNX INT8 model example. -The Intel Extension for Tensorflow for Intel CPUs is installed as default. -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -Intel Extension for Tensorflow is mandatory to be installed for exporting Tensorflow model to ONNX. -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Model - -The vgg16 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). -We can get the pb file by convert the checkpoint file. - - 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) - ```shell - wget http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz - tar -xvf vgg_16_2016_08_28.tar.gz - ``` - - 2. Exporting the Inference Graph - ```shell - git clone https://github.com/tensorflow/models - cd models/research/slim - python export_inference_graph.py \ - --alsologtostderr \ - --model_name=vgg_16 \ - --output_file=/tmp/vgg_16_inf_graph.pb - ``` - Make sure to use intel-tensorflow v1.15, and pip install tf_slim. - #### Install Intel Tensorflow 1.15 up2 - Check your python version and use pip install 1.15.0 up2 from links below: - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl - > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed` - - 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo - ```shell - python freeze_graph.py \ - --input_graph=/tmp/vgg_16_inf_graph.pb \ - --input_checkpoint=./vgg_16.ckpt \ - --input_binary=true \ - --output_graph=./frozen_vgg16.pb \ - --output_node_names=vgg_16/fc8/squeezed - ``` - -### 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/tf2onnx/ - # convert validation subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_imagenet_dataset.sh --output_dir=/path/to/imagenet/ --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd vgg16/int8_export - ``` - -## Run Command -Please note the dataset is TF records format for running quantization and benchmark. - -### Quantize Tensorflow FP32 model to Tensorflow INT8 QDQ model -```shell -bash run_tuning.sh --input_model=./frozen_vgg16.pb --output_model=./frozen_vgg16_int8.pb --dataset_location=/path/to/imagenet/ -``` - -### Export Tensorflow INT8 QDQ model to ONNX INT8 QDQ model -```shell -bash run_export.sh --input_model=./frozen_vgg16_int8.pb --output_model=./frozen_vgg16_int8.onnx -``` - -## Run benchmark for Tensorflow INT8 model -```shell -bash run_benchmark.sh --input_model=./frozen_vgg16_int8.pb --mode=accuracy --dataset_location=/path/to/imagenet/ --batch_size=32 -bash run_benchmark.sh --input_model=./frozen_vgg16_int8.pb --mode=performance --dataset_location=/path/to/imagenet/ --batch_size=1 -``` - -### Run benchmark for ONNX INT8 QDQ model -```shell -bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 -bash run_benchmark.sh --input_model=./frozen_vgg16_int8.onnx --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 -``` \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py b/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py deleted file mode 100644 index 1ae9aa666fb..00000000000 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/main.py +++ /dev/null @@ -1,159 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -import onnx -import numpy as np -import tensorflow as tf -import onnxruntime as ort -from argparse import ArgumentParser -from neural_compressor.data import LabelShift -from neural_compressor.metric import TensorflowTopK -from neural_compressor.utils.create_obj_from_config import create_dataloader - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - -arg_parser = ArgumentParser(description='Parse args') -arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') -arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') -arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') -arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') -arg_parser.add_argument('--export', dest='export', action='store_true', help='use neural_compressor to export.') -arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') -arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') -arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') -args = arg_parser.parse_args() - -def eval_func_onnx(model, dataloader, metric, postprocess=None): - metric.reset() - sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - input_names = [i.name for i in sess.get_inputs()] - - for input_data, label in dataloader: - output = sess.run(None, dict(zip(input_names, [input_data]))) - - output, label = postprocess((output, label)) - metric.update(output, label) - - acc = metric.result() - return acc - -def eval_func_tf(model): - from neural_compressor.model import Model - metric = TensorflowTopK(k=1) - postprocess = LabelShift(label_shift=1) - model = Model(model) - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - eval_dataloader_args = { - 'batch_size': 32, - 'dataset': {"ImageRecord": {'root':args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', eval_dataloader_args) - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def run(self): - """This is neural_compressor function include tuning, export and benchmark option.""" - top1 = TensorflowTopK(k=1) - postprocess = LabelShift(label_shift=1) - if args.tune: - from neural_compressor import quantization - from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion - calib_dataloader_args = { - 'batch_size': 10, - 'dataset': {"ImageRecord": {'root':args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) - op_name_list = { - 'resnet_model/dense/MatMul': - { - 'activation': {'dtype': ['fp32']}, - 'weight': {'dtype': ['fp32']}, - } - } - conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], - outputs=['softmax_tensor'], - op_name_list=op_name_list) - q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_func=eval_func_tf) - q_model.save(args.output_graph) - - if args.export: - from neural_compressor.model import Model - from neural_compressor.config import TF2ONNXConfig - inc_model = Model(args.input_graph) - config = TF2ONNXConfig(dtype="int8") - inc_model.export(args.output_graph, config) - - if args.benchmark: - if args.input_graph.endswith('.onnx'): - model = onnx.load(args.input_graph) - else: - model = args.input_graph - eval_dataloader_args = { - 'batch_size': 32, - 'dataset': {"ImageRecord": {'root':args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) - - def eval(model): - if isinstance(model, str): - return eval_func_tf(model) - else: - return eval_func_onnx(model, eval_dataloader, top1, postprocess) - - if args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=eval_dataloader) - elif args.mode == 'accuracy': - acc_result = eval(model) - print("Batch size = %d" % eval_dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt b/examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt deleted file mode 100644 index b964010af83..00000000000 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tensorflow==2.11.0 -tf2onnx==1.13.0 -onnx==1.9.0; python_version < '3.10' -onnx==1.12.0; python_version == '3.10' -onnxruntime==1.10.0; python_version < '3.10' -onnxruntime==1.12.0; python_version == '3.10' -onnxruntime-extensions; python_version < '3.10' - -pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability \ No newline at end of file diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/run_benchmark.sh b/examples/tensorflow/tf2onnx/vgg16/int8_export/run_benchmark.sh deleted file mode 100644 index e83a029e800..00000000000 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/run_benchmark.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - esac - done - -} - -# run_tuning -function run_benchmark { - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/run_export.sh b/examples/tensorflow/tf2onnx/vgg16/int8_export/run_export.sh deleted file mode 100644 index 1c6d1c908fe..00000000000 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/run_export.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_export - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_export -function run_export { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --export -} - -main "$@" diff --git a/examples/tensorflow/tf2onnx/vgg16/int8_export/run_tuning.sh b/examples/tensorflow/tf2onnx/vgg16/int8_export/run_tuning.sh deleted file mode 100644 index 6a9e1b859c9..00000000000 --- a/examples/tensorflow/tf2onnx/vgg16/int8_export/run_tuning.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --dataset_location ${dataset_location} \ - --tune -} - -main "$@" From 2322a55fb285ddf92cd179db3df3c10dc9eb7155 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Sun, 12 Feb 2023 22:39:20 +0800 Subject: [PATCH 35/43] update model path Signed-off-by: chensuyue --- examples/.config/model_params_tf2onnx.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/.config/model_params_tf2onnx.json b/examples/.config/model_params_tf2onnx.json index a2e152254b8..d6f6916d334 100644 --- a/examples/.config/model_params_tf2onnx.json +++ b/examples/.config/model_params_tf2onnx.json @@ -1,7 +1,7 @@ { "tf2onnx": { "resnet50v1.0": { - "model_src_dir": "tf2onnx/resnet50v1.0", + "model_src_dir": "image_recognition/tensorflow_models/resnet50_v1/export", "source_model_dataset": "/tf_dataset/dataset/imagenet", "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ImagenetRaw_small_5000", "input_model": "/tf_dataset/pre-trained-models/resnet50/fp32/freezed_resnet50.pb", @@ -9,7 +9,7 @@ "batch_size": 32 }, "resnet50v1.5": { - "model_src_dir": "tf2onnx/resnet50v1.5", + "model_src_dir": "image_recognition/tensorflow_models/resnet50_v1_5/export", "source_model_dataset": "/tf_dataset/dataset/imagenet", "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ImagenetRaw_small_5000", "input_model": "/tf_dataset/pre-trained-models/resnet50v1_5/fp32/resnet50_v1.pb", From f824642c0e0a6b560a17f425a3f72e3cb22781b9 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 14 Feb 2023 00:34:25 +0800 Subject: [PATCH 36/43] add more tf2onnx models Signed-off-by: chensuyue --- examples/.config/model_params_tf2onnx.json | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/examples/.config/model_params_tf2onnx.json b/examples/.config/model_params_tf2onnx.json index d6f6916d334..1e5e7824b92 100644 --- a/examples/.config/model_params_tf2onnx.json +++ b/examples/.config/model_params_tf2onnx.json @@ -15,6 +15,38 @@ "input_model": "/tf_dataset/pre-trained-models/resnet50v1_5/fp32/resnet50_v1.pb", "main_script": "main.py", "batch_size": 32 + }, + "mobilenetv2": { + "model_src_dir": "image_recognition/tensorflow_models/mobilenet_v2/export", + "source_model_dataset": "/tf_dataset/dataset/imagenet", + "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ImagenetRaw_small_5000", + "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_mobilenet_v2.pb", + "main_script": "main.py", + "batch_size": 32 + }, + "vgg16": { + "model_src_dir": "image_recognition/tensorflow_models/vgg16/export", + "source_model_dataset": "/tf_dataset/dataset/imagenet", + "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ImagenetRaw_small_5000", + "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_vgg16.pb", + "main_script": "main.py", + "batch_size": 32 + }, + "faster_rcnn_resnet50": { + "model_src_dir": "object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq", + "source_model_dataset": "/tf_dataset/tensorflow/coco_val.record", + "target_model_dataset": "/tf_dataset2/datasets/coco2017/coco/", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/faster_rcnn_resnet50/frozen_inference_graph.pb", + "main_script": "main.py", + "batch_size": 10 + }, + "ssd_mobilenet_v1": { + "model_src_dir": "object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq", + "source_model_dataset": "/tf_dataset/tensorflow/coco_val.record", + "target_model_dataset": "/tf_dataset2/datasets/coco2017/coco/", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1/frozen_inference_graph.pb", + "main_script": "main.py", + "batch_size": 10 } } } \ No newline at end of file From a68f88593c298bb8fc4faf79e87fe37a2a70170e Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 14 Feb 2023 13:06:50 +0800 Subject: [PATCH 37/43] fix path Signed-off-by: chensuyue --- examples/.config/model_params_tf2onnx.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/.config/model_params_tf2onnx.json b/examples/.config/model_params_tf2onnx.json index 1e5e7824b92..c781e2c2462 100644 --- a/examples/.config/model_params_tf2onnx.json +++ b/examples/.config/model_params_tf2onnx.json @@ -33,7 +33,7 @@ "batch_size": 32 }, "faster_rcnn_resnet50": { - "model_src_dir": "object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq", + "model_src_dir": "object_detection/tensorflow_models/faster_rcnn_resnet50/export", "source_model_dataset": "/tf_dataset/tensorflow/coco_val.record", "target_model_dataset": "/tf_dataset2/datasets/coco2017/coco/", "input_model": "/tf_dataset/pre-train-model-oob/object_detection/faster_rcnn_resnet50/frozen_inference_graph.pb", @@ -41,7 +41,7 @@ "batch_size": 10 }, "ssd_mobilenet_v1": { - "model_src_dir": "object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq", + "model_src_dir": "object_detection/tensorflow_models/ssd_mobilenet_v1/export", "source_model_dataset": "/tf_dataset/tensorflow/coco_val.record", "target_model_dataset": "/tf_dataset2/datasets/coco2017/coco/", "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1/frozen_inference_graph.pb", From 5586ea80a2375caecf15459a69fd98627f8da933 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 14 Feb 2023 14:54:36 +0800 Subject: [PATCH 38/43] fix onnx path Signed-off-by: chensuyue --- examples/.config/model_params_tf2onnx.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/.config/model_params_tf2onnx.json b/examples/.config/model_params_tf2onnx.json index c781e2c2462..e093be172b3 100644 --- a/examples/.config/model_params_tf2onnx.json +++ b/examples/.config/model_params_tf2onnx.json @@ -3,7 +3,7 @@ "resnet50v1.0": { "model_src_dir": "image_recognition/tensorflow_models/resnet50_v1/export", "source_model_dataset": "/tf_dataset/dataset/imagenet", - "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ImagenetRaw_small_5000", + "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ILSVRC2012_img_val", "input_model": "/tf_dataset/pre-trained-models/resnet50/fp32/freezed_resnet50.pb", "main_script": "main.py", "batch_size": 32 @@ -11,7 +11,7 @@ "resnet50v1.5": { "model_src_dir": "image_recognition/tensorflow_models/resnet50_v1_5/export", "source_model_dataset": "/tf_dataset/dataset/imagenet", - "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ImagenetRaw_small_5000", + "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ILSVRC2012_img_val", "input_model": "/tf_dataset/pre-trained-models/resnet50v1_5/fp32/resnet50_v1.pb", "main_script": "main.py", "batch_size": 32 @@ -19,7 +19,7 @@ "mobilenetv2": { "model_src_dir": "image_recognition/tensorflow_models/mobilenet_v2/export", "source_model_dataset": "/tf_dataset/dataset/imagenet", - "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ImagenetRaw_small_5000", + "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ILSVRC2012_img_val", "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_mobilenet_v2.pb", "main_script": "main.py", "batch_size": 32 @@ -27,7 +27,7 @@ "vgg16": { "model_src_dir": "image_recognition/tensorflow_models/vgg16/export", "source_model_dataset": "/tf_dataset/dataset/imagenet", - "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ImagenetRaw_small_5000", + "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ILSVRC2012_img_val", "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_vgg16.pb", "main_script": "main.py", "batch_size": 32 From c9bf6d15325c815d4e8c79013ec23abc636a6344 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 14 Feb 2023 17:54:14 +0800 Subject: [PATCH 39/43] use the same dataset for tf2onnx source and target model Signed-off-by: chensuyue --- examples/.config/model_params_tf2onnx.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/.config/model_params_tf2onnx.json b/examples/.config/model_params_tf2onnx.json index e093be172b3..b5b80de0aa2 100644 --- a/examples/.config/model_params_tf2onnx.json +++ b/examples/.config/model_params_tf2onnx.json @@ -3,7 +3,7 @@ "resnet50v1.0": { "model_src_dir": "image_recognition/tensorflow_models/resnet50_v1/export", "source_model_dataset": "/tf_dataset/dataset/imagenet", - "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ILSVRC2012_img_val", + "target_model_dataset": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset/pre-trained-models/resnet50/fp32/freezed_resnet50.pb", "main_script": "main.py", "batch_size": 32 @@ -11,7 +11,7 @@ "resnet50v1.5": { "model_src_dir": "image_recognition/tensorflow_models/resnet50_v1_5/export", "source_model_dataset": "/tf_dataset/dataset/imagenet", - "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ILSVRC2012_img_val", + "target_model_dataset": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset/pre-trained-models/resnet50v1_5/fp32/resnet50_v1.pb", "main_script": "main.py", "batch_size": 32 @@ -19,7 +19,7 @@ "mobilenetv2": { "model_src_dir": "image_recognition/tensorflow_models/mobilenet_v2/export", "source_model_dataset": "/tf_dataset/dataset/imagenet", - "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ILSVRC2012_img_val", + "target_model_dataset": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_mobilenet_v2.pb", "main_script": "main.py", "batch_size": 32 @@ -27,7 +27,7 @@ "vgg16": { "model_src_dir": "image_recognition/tensorflow_models/vgg16/export", "source_model_dataset": "/tf_dataset/dataset/imagenet", - "target_model_dataset": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ILSVRC2012_img_val", + "target_model_dataset": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_vgg16.pb", "main_script": "main.py", "batch_size": 32 @@ -35,7 +35,7 @@ "faster_rcnn_resnet50": { "model_src_dir": "object_detection/tensorflow_models/faster_rcnn_resnet50/export", "source_model_dataset": "/tf_dataset/tensorflow/coco_val.record", - "target_model_dataset": "/tf_dataset2/datasets/coco2017/coco/", + "target_model_dataset": "/tf_dataset/tensorflow/coco_val.record", "input_model": "/tf_dataset/pre-train-model-oob/object_detection/faster_rcnn_resnet50/frozen_inference_graph.pb", "main_script": "main.py", "batch_size": 10 @@ -43,7 +43,7 @@ "ssd_mobilenet_v1": { "model_src_dir": "object_detection/tensorflow_models/ssd_mobilenet_v1/export", "source_model_dataset": "/tf_dataset/tensorflow/coco_val.record", - "target_model_dataset": "/tf_dataset2/datasets/coco2017/coco/", + "target_model_dataset": "/tf_dataset/tensorflow/coco_val.record", "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1/frozen_inference_graph.pb", "main_script": "main.py", "batch_size": 10 From 1a7a23ab82e309dbf1f4164b4fa0b43d4eef4d32 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Thu, 16 Feb 2023 20:58:17 +0800 Subject: [PATCH 40/43] use the input batch_size Signed-off-by: Lv, Liang1 --- .../resnet50_v1_5/export/main.py | 2 +- .../tensorflow_models/vgg16/export/main.py | 75 +++++++++++++------ 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py index 9ccfba39970..917745d80de 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py @@ -147,7 +147,7 @@ def run(self): from neural_compressor.utils.create_obj_from_config import create_dataloader dataloader_args = { - 'batch_size': 32, + 'batch_size': self.args.batch_size, 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, 'transform': {'ResizeCropImagenet': {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py index 6a641d602b6..404f766428c 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py @@ -16,7 +16,7 @@ # limitations under the License. # -import os +import time import onnx import numpy as np import tensorflow as tf @@ -59,30 +59,48 @@ def eval_func_onnx(model, dataloader, metric, postprocess=None): acc = metric.result() return acc -def eval_func_tf(model): +def eval_func_tf(model, eval_dataloader, metric, postprocess=None): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ from neural_compressor.model import Model - metric = TensorflowTopK(k=1) - postprocess = LabelShift(label_shift=1) model = Model(model) input_tensor = model.input_tensor output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ model.output_tensor[0] - eval_dataloader_args = { - 'batch_size': 32, - 'dataset': {"ImageRecord": {'root':args.dataset_location}}, - 'transform': {'ResizeCropImagenet': - {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'filter': None - } - dataloader = create_dataloader('tensorflow', eval_dataloader_args) - - for _, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - predictions = model.sess.run(output_tensor, feed_dict) - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + if postprocess: + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / args.batch_size + return latency + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) acc = metric.result() return acc @@ -110,6 +128,14 @@ def run(self): 'filter': None } calib_dataloader = create_dataloader('tensorflow', calib_dataloader_args) + eval_dataloader_args = { + 'batch_size': 32, + 'dataset': {"ImageRecord": {'root':args.dataset_location}}, + 'transform': {'ResizeCropImagenet': + {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, + 'filter': None + } + eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) op_name_list = { 'resnet_model/dense/MatMul': { @@ -120,8 +146,10 @@ def run(self): conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], outputs=['softmax_tensor'], op_name_list=op_name_list) + def eval(model): + return eval_func_tf(model, eval_dataloader, top1, postprocess) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_func=eval_func_tf) + eval_func=eval) q_model.save("./tf-quant.pb") from neural_compressor.config import TF2ONNXConfig config = TF2ONNXConfig(dtype=args.dtype) @@ -139,17 +167,16 @@ def run(self): else: model = args.input_graph eval_dataloader_args = { - 'batch_size': 32, + 'batch_size': args.batch_size, 'dataset': {"ImageRecord": {'root':args.dataset_location}}, 'transform': {'ResizeCropImagenet': {'height': 224, 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, 'filter': None } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) - def eval(model): if isinstance(model, str): - return eval_func_tf(model) + return eval_func_tf(model, eval_dataloader, top1, postprocess) else: return eval_func_onnx(model, eval_dataloader, top1, postprocess) From 665f396eefb8e81ca83ee6422d41d21e36c1726f Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Thu, 16 Feb 2023 21:25:26 +0800 Subject: [PATCH 41/43] Update main.py --- .../tensorflow_models/resnet50_v1/export/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py index cbd3d0a2532..13ab3c57f06 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py @@ -137,7 +137,7 @@ def run(self): from neural_compressor.utils.create_obj_from_config import create_dataloader dataloader_args = { - 'batch_size': 32, + 'batch_size': self.args.batch_size, 'dataset': {"ImageRecord": {'root':self.args.dataset_location}}, 'transform': {'ResizeCropImagenet': {'height': 224, 'width': 224}}, From d488e83c141a70d845840780fac6e27439d690cf Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Fri, 17 Feb 2023 18:00:52 +0800 Subject: [PATCH 42/43] fix ssd_mobilenet_v1 and faster_rcnn benchmark issue Signed-off-by: Lv, Liang1 --- .../faster_rcnn_resnet50/export/main.py | 38 +++++++++++++---- .../ssd_mobilenet_v1/export/main.py | 42 +++++++++++++++---- 2 files changed, 65 insertions(+), 15 deletions(-) diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py index 3fd1f8b97a5..75e2b99c4e9 100644 --- a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py +++ b/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/export/main.py @@ -19,19 +19,21 @@ from argparse import ArgumentParser import tensorflow as tf import onnx -import os +import time import onnxruntime as ort import numpy as np tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func_onnx(model, dataloader, metric, postprocess=None): +def eval_func_onnx(model, dataloader, metric, postprocess=None, batch_size=32, mode='accuracy'): metric.reset() session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) ort_inputs = {} len_inputs = len(session.get_inputs()) inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + + latency_list = [] for inputs, labels in dataloader: if not isinstance(labels, list): labels = [labels] @@ -53,7 +55,9 @@ def eval_func_onnx(model, dataloader, metric, postprocess=None): else: ort_inputs.update({inputs_names[i]: inputs[i]}) + start = time.time() predictions = session.run(None, ort_inputs) + end = time.time() if postprocess is not None: predictions, labels = postprocess((predictions, labels)) @@ -61,10 +65,17 @@ def eval_func_onnx(model, dataloader, metric, postprocess=None): if not hasattr(metric, "compare_label") or \ (hasattr(metric, "compare_label") and metric.compare_label): metric.update(predictions, labels) + latency_list.append(end-start) + latency = np.array(latency_list[:]).mean() / batch_size + if mode == 'performance': + print("Batch size = {}".format(batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] -def eval_func_tf(model, dataloader, metric, postprocess=None): +def eval_func_tf(model, dataloader, metric, postprocess=None, batch_size=32, mode='accuracy'): metric.reset() from neural_compressor.model import Model @@ -77,12 +88,23 @@ def eval_func_tf(model, dataloader, metric, postprocess=None): output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ model.output_tensor[0] + latency_list = [] for _, (inputs, labels) in enumerate(dataloader): # dataloader should keep the order and len of inputs same with input_tensor inputs = np.array([inputs]) feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() metric.update(predictions, labels) + latency_list.append(end-start) + latency = np.array(latency_list[:]).mean() / batch_size + + if mode == 'performance': + print("Batch size = {}".format(batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) acc = metric.result() return acc @@ -178,16 +200,18 @@ def run(self): mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, mAP2) + if self.args.input_graph.endswith('.onnx'): + return eval_func_onnx(model, dataloader, mAP2, + batch_size=self.args.batch_size, mode='performance') else: - return eval_func_onnx(model, dataloader, mAP2) + return eval_func_tf(model, dataloader, mAP2, + batch_size=self.args.batch_size, mode='performance') if self.args.mode == 'performance': from neural_compressor.benchmark import fit from neural_compressor.config import BenchmarkConfig conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) + fit(model, conf, b_dataloader=dataloader, b_func=eval) elif self.args.mode == 'accuracy': acc_result = eval(model) print("Batch size = %d" % dataloader.batch_size) diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py index 719ba13f2f0..4fe1a484f00 100644 --- a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py +++ b/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/export/main.py @@ -19,19 +19,21 @@ from argparse import ArgumentParser import tensorflow as tf import onnx -import os +import time import onnxruntime as ort import numpy as np tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -def eval_func_onnx(model, dataloader, metric, postprocess=None): +def eval_func_onnx(model, dataloader, metric, postprocess=None, batch_size=32, mode='accuracy'): metric.reset() session = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) ort_inputs = {} len_inputs = len(session.get_inputs()) inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + + latency_list = [] for inputs, labels in dataloader: if not isinstance(labels, list): labels = [labels] @@ -53,7 +55,9 @@ def eval_func_onnx(model, dataloader, metric, postprocess=None): else: ort_inputs.update({inputs_names[i]: inputs[i]}) + start = time.time() predictions = session.run(None, ort_inputs) + end = time.time() if postprocess is not None: predictions, labels = postprocess((predictions, labels)) @@ -61,10 +65,17 @@ def eval_func_onnx(model, dataloader, metric, postprocess=None): if not hasattr(metric, "compare_label") or \ (hasattr(metric, "compare_label") and metric.compare_label): metric.update(predictions, labels) + latency_list.append(end-start) + latency = np.array(latency_list[:]).mean() / batch_size + if mode == 'performance': + print("Batch size = {}".format(batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] -def eval_func_tf(model, dataloader, metric, postprocess=None): +def eval_func_tf(model, dataloader, metric, postprocess=None, batch_size=32, mode='accuracy'): metric.reset() from neural_compressor.model import Model @@ -77,12 +88,23 @@ def eval_func_tf(model, dataloader, metric, postprocess=None): output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ model.output_tensor[0] + latency_list = [] for _, (inputs, labels) in enumerate(dataloader): # dataloader should keep the order and len of inputs same with input_tensor inputs = np.array([inputs]) feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() metric.update(predictions, labels) + latency_list.append(end-start) + latency = np.array(latency_list[:]).mean() / batch_size + + if mode == 'performance': + print("Batch size = {}".format(batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) acc = metric.result() return acc @@ -178,16 +200,20 @@ def run(self): mAP2 = COCOmAPv2(output_index_mapping=output_index_mapping) def eval(model): - if isinstance(model, str): - return eval_func_tf(model, dataloader, mAP2) + if self.args.input_graph.endswith('.onnx'): + return eval_func_onnx(model, dataloader, mAP2, + batch_size=self.args.batch_size, mode='performance') else: - return eval_func_onnx(model, dataloader, mAP2) + return eval_func_tf(model, dataloader, mAP2, + batch_size=self.args.batch_size, mode='performance') if self.args.mode == 'performance': from neural_compressor.benchmark import fit from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model, conf, b_dataloader=dataloader) + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7, + inputs=['image_tensor'], + outputs=['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes']) + fit(model, conf, b_dataloader=dataloader, b_func=eval) elif self.args.mode == 'accuracy': acc_result = eval(model) print("Batch size = %d" % dataloader.batch_size) From ea3cd4a00bf19a3947bbd76c2edf5d01938125b3 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Thu, 23 Feb 2023 09:52:51 +0800 Subject: [PATCH 43/43] fix resnet50_v1.5 accuracy issue running on itex Signed-off-by: Lv, Liang1 --- .../tensorflow_models/resnet50_v1_5/export/main.py | 1 - .../tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py index 917745d80de..1de9f18597c 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py @@ -122,7 +122,6 @@ def run(self): } conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], outputs=['softmax_tensor'], - accuracy_criterion = AccuracyCriterion(tolerable_loss=0.3), op_name_list=op_name_list) from neural_compressor.metric import TensorflowTopK top1 = TensorflowTopK(k=1) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py index 3727755b6ea..270f3e37342 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py @@ -207,6 +207,13 @@ def _find_relu_node(self, node): ) and ((node.op.find("Relu") == -1 and node.op.find("Elu") == -1) or \ ('alpha' in node.attr and node.attr['alpha'].f > 0)): return False + elif self.itex_mode and node.op in ('Add', 'AddV2', 'AddN'): + if re.search(r"\w+:\d+", node.input[1]): + input_node = self.node_name_mapping[node.input[1].rsplit(':', 1)[0]].node + else: + input_node = self.node_name_mapping[node.input[1]].node + if input_node.op in ('BiasAdd', 'Add', 'AddV2', 'AddN'): + return False elif self._check_op_list(node.op) or (self.itex_mode and node.op in ('Add', 'AddV2')): if node.op == 'ConcatV2': find_relu = False