Skip to content

Commit

Permalink
Fix for mixed precision run and their input parameters
Browse files Browse the repository at this point in the history
Fixes GPU faults Seen when running Mixed Precision inferences workloads with Bert v1.1  (fp16 + int8 Quant of Conv + MatMul)

Was hitting an edge case with mixed precision where the input parameters were not being populated and using uninitizlied values for the parameters which would "work" silently as no issue in inference arise. For bert though, segment_ids is pushed through a gather onnx operator which uses these as an index.

Using uninitialized memory made this error  such that it was non obvious why we were getting failures between our runs and we saw the issue intermittently between machines/cards/etc.

Fixes here are as follows to the MIGraphX Execution Provider

-Fp16 quantization after int8
-Additional debug logging for workflow of loading/quantization
- Set input/output parameters as seperate run prior to int8 calibration
- Set all dynamic data as input parameters for int8 static calibration to be performed with MIGraphX

Without these changes models will fail to copy input parameters on mixed precision runs when we decided to quantize as MIGraphX assumes all inputs will be used for calibration not just the input data read in from a calibration table.
  • Loading branch information
TedThemistokleous committed Jun 8, 2024
1 parent 328bc48 commit 0fda866
Showing 1 changed file with 65 additions and 16 deletions.
81 changes: 65 additions & 16 deletions onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1117,39 +1117,52 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
}

std::vector<std::string> input_names, output_names;
no_input_shape = get_input_output_names(graph_body_viewer, input_names, output_names);
no_input_shape = no_input_shape or get_input_output_names(graph_body_viewer, input_names, output_names);

// by parsing the model_proto, create a program corresponding to
// the input fused_node
migraphx::program prog;

if (!no_input_shape) {
LOGS_DEFAULT(INFO) << "No Input shapes detected quantizing model" << std::endl;
prog = migraphx::parse_onnx_buffer(onnx_string_buffer, options);
if (fp16_enable_) {
migraphx::quantize_fp16(prog);
}

// Read in the calibration data and map it to an migraphx paramater map for the calibration ops
if (int8_enable_ && int8_calibration_cache_available_) {
LOGS_DEFAULT(INFO) << "Quantizing input program to int8" << std::endl;
migraphx::quantize_int8_options quant_opts;
migraphx::program_parameters quant_params;

auto param_shapes = prog.get_parameter_shapes();

for (auto&& name : param_shapes.names()) {
auto dynamic_range_i = dynamic_range_map.find(name);
if (dynamic_range_i != dynamic_range_map.end()) {
quant_params.add(name, migraphx::argument(param_shapes[name], &(dynamic_range_i->second)));
}
// Add all calibration data read in from int8 table
for (auto& [cal_key, cal_val] : dynamic_range_map) {
auto cal_val_shape = migraphx::shape(migraphx_shape_float_type);
quant_params.add(cal_key.c_str(), migraphx::argument(cal_val_shape, static_cast<void*>(std::move(&cal_val))));
}

quant_opts.add_calibration_data(quant_params);

// specify thing we want to int8 quantize
quant_opts.add_op_name("convolution");
quant_opts.add_op_name("dot");

// perform static quantization on the programs
migraphx::quantize_int8(prog, t_, quant_opts);
LOGS_DEFAULT(INFO) << "Quantizing input program to int8: Complete" << std::endl;
}

if (fp16_enable_) {
LOGS_DEFAULT(INFO) << "Quantizing input program to fp16" << std::endl;
migraphx::quantize_fp16(prog);
LOGS_DEFAULT(INFO) << "Quantizing input program to fp16: Complete" << std::endl;
}


migraphx::compile_options co;
co.set_fast_math(false);
LOGS_DEFAULT(INFO) << "Model Compile: Begin" << std::endl;
prog.compile(t_, co);
LOGS_DEFAULT(INFO) << "Model Compile: Complete" << std::endl;
auto prog_output_shapes = prog.get_output_shapes();
for (std::size_t i = 0; i < output_names.size(); ++i) {
auto out_len = prog_output_shapes[i].lengths();
Expand Down Expand Up @@ -1199,6 +1212,7 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
bool input_shape_match = true;
migraphx::program_parameter_shapes param_shapes;
if (no_input_shape) {
LOGS_DEFAULT(VERBOSE) << "Missing input shape setting input parameters again" << std::endl;
for (auto& it : map_input_name_index) {
auto& name = it.first;
auto& index = it.second;
Expand All @@ -1210,6 +1224,7 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
input_shape_match = false;
}
} else {
LOGS_DEFAULT(VERBOSE) << "Assigning inputs, and parameters from compiled model" << std::endl;
param_shapes = prog.get_parameter_shapes();
auto prog_output_shapes = prog.get_output_shapes();

Expand Down Expand Up @@ -1243,33 +1258,64 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
// input shapes are different, needs to re-parse onnx and
// re-compile the program
if (!input_shape_match) {
LOGS_DEFAULT(VERBOSE) << "No Input shapes mismatch detected. Recompiling" << std::endl;
prog = migraphx::parse_onnx_buffer(onnx_string, cmp_options);
if (fp16_enable) {
migraphx::quantize_fp16(prog);
}

// Read in the calibration data and map it to an migraphx paramater map for the calibration ops
if (int8_enable && int8_calibration_cache_available) {
LOGS_DEFAULT(INFO) << "Quantize Int8: Begin" << std::endl;
migraphx::quantize_int8_options quant_opts;
migraphx::program_parameters quant_params;

auto param_shapes = prog.get_parameter_shapes();

// Add input parameter data and the values they're set to
for (auto&& name : param_shapes.names()) {
auto dynamic_range_i = map_dynamic_range.find(name);
if (dynamic_range_i != map_dynamic_range.end()) {
quant_params.add(name, migraphx::argument(param_shapes[name], &(dynamic_range_i->second)));
if (map_input_name_index.count(name) > 0) {
auto input_tensor = ctx.GetInput(map_input_name_index[name]);
auto tensor_info = input_tensor.GetTensorTypeAndShapeInfo();
const auto tensor_shape = tensor_info.GetShape();
const auto tensor_type = tensor_info.GetElementType();

migraphx_shape_datatype_t mgx_type;
getMIGraphXType(tensor_type, mgx_type);
auto mgx_s = param_shapes[name];

if (mgx_type != mgx_s.type()) {
LOGS_DEFAULT(FATAL) << "MIGraphX: param type mismatch";
}
quant_params.add(name, migraphx::argument(param_shapes[name], const_cast<void*>(input_tensor.GetTensorRawData())));
}
}

// Add all calibration data read in from int8 table
for (auto &[cal_key, cal_val] : map_dynamic_range) {
auto cal_val_shape = migraphx::shape(migraphx_shape_float_type);
quant_params.add(cal_key.c_str(), migraphx::argument(cal_val_shape, static_cast<void*>(std::move(&cal_val))));
}
quant_opts.add_calibration_data(quant_params);

// specify thing we want to int8 quantize
quant_opts.add_op_name("convolution");
quant_opts.add_op_name("dot");

// perform static quantization on the programs
migraphx::quantize_int8(prog, t, quant_opts);
LOGS_DEFAULT(INFO) << "Quantize Int8: Completed" << std::endl;
}

if (fp16_enable) {
LOGS_DEFAULT(INFO) << "Quantize fp16: Begin" << std::endl;
migraphx::quantize_fp16(prog);
LOGS_DEFAULT(INFO) << "Quantize fp16: Completed" << std::endl;
}

LOGS_DEFAULT(INFO) << "Model Compile: Begin" << std::endl;
migraphx::compile_options co;
co.set_fast_math(false);
prog.compile(t, co);

LOGS_DEFAULT(INFO) << "Model Compile: Completed" << std::endl;
mgx_state->prog = prog;
param_shapes = prog.get_parameter_shapes();
no_input_shape = false;
Expand All @@ -1281,6 +1327,7 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
if (param_shapes.size() > 0) {
for (auto&& name : param_shapes.names()) {
if (map_input_name_index.count(name) > 0) {
LOGS_DEFAULT(INFO) << "Setting parameters for:" << name << std::endl;
auto input_tensor = ctx.GetInput(map_input_name_index[name]);
auto tensor_info = input_tensor.GetTensorTypeAndShapeInfo();
const auto tensor_shape = tensor_info.GetShape();
Expand All @@ -1293,6 +1340,8 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
if (mgx_type != mgx_s.type()) {
LOGS_DEFAULT(FATAL) << "MIGraphX: param type mismatch";
}

LOGS_DEFAULT(INFO) << "Writing Raw tensor data " << std::endl;
m.add(name, migraphx::argument(param_shapes[name],
const_cast<void*>(input_tensor.GetTensorRawData())));
}
Expand Down

0 comments on commit 0fda866

Please sign in to comment.