Fix for mixed precision run and their input parameters

Fixes GPU faults Seen when running Mixed Precision inferences workloads with Bert v1.1 (fp16 + int8 Quant of Conv + MatMul) Was hitting an edge case with mixed precision where the input parameters were not being populated and using uninitizlied values for the parameters which would "work" silently as no issue in inference arise. For bert though, segment_ids is pushed through a gather onnx operator which uses these as an index. Using uninitialized memory made this error such that it was non obvious why we were getting failures between our runs and we saw the issue intermittently between machines/cards/etc. Fixes here are as follows to the MIGraphX Execution Provider -Fp16 quantization after int8 -Additional debug logging for workflow of loading/quantization - Set input/output parameters as seperate run prior to int8 calibration - Set all dynamic data as input parameters for int8 static calibration to be performed with MIGraphX Without these changes models will fail to copy input parameters on mixed precision runs when we decided to quantize as MIGraphX assumes all inputs will be used for calibration not just the input data read in from a calibration table.
TedThemistokleous · Jun 8, 2024 · 0fda866 · 0fda866
1 parent 328bc48
commit 0fda866
Showing 1 changed file with 65 additions and 16 deletions.
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
@@ -1117,39 +1117,52 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
     }
 
     std::vector<std::string> input_names, output_names;
-    no_input_shape = get_input_output_names(graph_body_viewer, input_names, output_names);
+    no_input_shape = no_input_shape or get_input_output_names(graph_body_viewer, input_names, output_names);
 
     // by parsing the model_proto, create a program corresponding to
     // the input fused_node
     migraphx::program prog;
 
     if (!no_input_shape) {
+      LOGS_DEFAULT(INFO) << "No Input shapes detected quantizing model" << std::endl;
       prog = migraphx::parse_onnx_buffer(onnx_string_buffer, options);
-      if (fp16_enable_) {
-        migraphx::quantize_fp16(prog);
-      }
 
       // Read in the calibration data and map it to an migraphx paramater map for the calibration ops
       if (int8_enable_ && int8_calibration_cache_available_) {
+        LOGS_DEFAULT(INFO) << "Quantizing input program to int8" << std::endl;
         migraphx::quantize_int8_options quant_opts;
         migraphx::program_parameters quant_params;
 
         auto param_shapes = prog.get_parameter_shapes();
 
-        for (auto&& name : param_shapes.names()) {
-          auto dynamic_range_i = dynamic_range_map.find(name);
-          if (dynamic_range_i != dynamic_range_map.end()) {
-            quant_params.add(name, migraphx::argument(param_shapes[name], &(dynamic_range_i->second)));
-          }
+        // Add all calibration data read in from int8 table
+        for (auto& [cal_key, cal_val] : dynamic_range_map) {
+          auto cal_val_shape = migraphx::shape(migraphx_shape_float_type);
+          quant_params.add(cal_key.c_str(), migraphx::argument(cal_val_shape, static_cast<void*>(std::move(&cal_val))));
         }
-
         quant_opts.add_calibration_data(quant_params);
+
+        // specify thing we want to int8 quantize
+        quant_opts.add_op_name("convolution");
+        quant_opts.add_op_name("dot");
+
         // perform static quantization on the programs
         migraphx::quantize_int8(prog, t_, quant_opts);
+        LOGS_DEFAULT(INFO) << "Quantizing input program to int8: Complete" << std::endl;
       }
+
+      if (fp16_enable_) {
+        LOGS_DEFAULT(INFO) << "Quantizing input program to fp16" << std::endl;
+        migraphx::quantize_fp16(prog);
+        LOGS_DEFAULT(INFO) << "Quantizing input program to fp16: Complete" << std::endl;
+      }
+
+
       migraphx::compile_options co;
       co.set_fast_math(false);
+      LOGS_DEFAULT(INFO) << "Model Compile: Begin" << std::endl;
       prog.compile(t_, co);
+      LOGS_DEFAULT(INFO) << "Model Compile: Complete" << std::endl;
       auto prog_output_shapes = prog.get_output_shapes();
       for (std::size_t i = 0; i < output_names.size(); ++i) {
         auto out_len = prog_output_shapes[i].lengths();
@@ -1199,6 +1212,7 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
       bool input_shape_match = true;
       migraphx::program_parameter_shapes param_shapes;
       if (no_input_shape) {
+        LOGS_DEFAULT(VERBOSE) << "Missing input shape setting input parameters again" << std::endl;
         for (auto& it : map_input_name_index) {
           auto& name = it.first;
           auto& index = it.second;
@@ -1210,6 +1224,7 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
           input_shape_match = false;
         }
       } else {
+        LOGS_DEFAULT(VERBOSE) << "Assigning inputs, and parameters from compiled model" << std::endl;
         param_shapes = prog.get_parameter_shapes();
         auto prog_output_shapes = prog.get_output_shapes();
 
@@ -1243,33 +1258,64 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
       // input shapes are different, needs to re-parse onnx and
       // re-compile the program
       if (!input_shape_match) {
+        LOGS_DEFAULT(VERBOSE) << "No Input shapes mismatch detected. Recompiling" << std::endl;
         prog = migraphx::parse_onnx_buffer(onnx_string, cmp_options);
-        if (fp16_enable) {
-          migraphx::quantize_fp16(prog);
-        }
 
         // Read in the calibration data and map it to an migraphx paramater map for the calibration ops
         if (int8_enable && int8_calibration_cache_available) {
+          LOGS_DEFAULT(INFO) << "Quantize Int8: Begin" << std::endl;
           migraphx::quantize_int8_options quant_opts;
           migraphx::program_parameters quant_params;
 
           auto param_shapes = prog.get_parameter_shapes();
 
+          // Add input parameter data and the values they're set to
           for (auto&& name : param_shapes.names()) {
-            auto dynamic_range_i = map_dynamic_range.find(name);
-            if (dynamic_range_i != map_dynamic_range.end()) {
-              quant_params.add(name, migraphx::argument(param_shapes[name], &(dynamic_range_i->second)));
+            if (map_input_name_index.count(name) > 0) {
+              auto input_tensor = ctx.GetInput(map_input_name_index[name]);
+              auto tensor_info = input_tensor.GetTensorTypeAndShapeInfo();
+              const auto tensor_shape = tensor_info.GetShape();
+              const auto tensor_type = tensor_info.GetElementType();
+
+              migraphx_shape_datatype_t mgx_type;
+              getMIGraphXType(tensor_type, mgx_type);
+              auto mgx_s = param_shapes[name];
+
+              if (mgx_type != mgx_s.type()) {
+                LOGS_DEFAULT(FATAL) << "MIGraphX: param type mismatch";
+              }
+              quant_params.add(name, migraphx::argument(param_shapes[name], const_cast<void*>(input_tensor.GetTensorRawData())));
             }
           }
 
+          // Add all calibration data read in from int8 table
+          for (auto &[cal_key, cal_val] : map_dynamic_range) {
+            auto cal_val_shape = migraphx::shape(migraphx_shape_float_type);
+            quant_params.add(cal_key.c_str(), migraphx::argument(cal_val_shape, static_cast<void*>(std::move(&cal_val))));
+          }
           quant_opts.add_calibration_data(quant_params);
+
+          // specify thing we want to int8 quantize
+          quant_opts.add_op_name("convolution");
+          quant_opts.add_op_name("dot");
+
           // perform static quantization on the programs
           migraphx::quantize_int8(prog, t, quant_opts);
+          LOGS_DEFAULT(INFO) << "Quantize Int8: Completed" << std::endl;
+        }
+
+        if (fp16_enable) {
+          LOGS_DEFAULT(INFO) << "Quantize fp16: Begin" << std::endl;
+          migraphx::quantize_fp16(prog);
+          LOGS_DEFAULT(INFO) << "Quantize fp16: Completed" << std::endl;
         }
 
+        LOGS_DEFAULT(INFO) << "Model Compile: Begin" << std::endl;
         migraphx::compile_options co;
         co.set_fast_math(false);
         prog.compile(t, co);
+
+        LOGS_DEFAULT(INFO) << "Model Compile: Completed" << std::endl;
         mgx_state->prog = prog;
         param_shapes = prog.get_parameter_shapes();
         no_input_shape = false;
@@ -1281,6 +1327,7 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
       if (param_shapes.size() > 0) {
         for (auto&& name : param_shapes.names()) {
           if (map_input_name_index.count(name) > 0) {
+            LOGS_DEFAULT(INFO) << "Setting parameters for:" << name << std::endl;
             auto input_tensor = ctx.GetInput(map_input_name_index[name]);
             auto tensor_info = input_tensor.GetTensorTypeAndShapeInfo();
             const auto tensor_shape = tensor_info.GetShape();
@@ -1293,6 +1340,8 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
             if (mgx_type != mgx_s.type()) {
               LOGS_DEFAULT(FATAL) << "MIGraphX: param type mismatch";
             }
+
+            LOGS_DEFAULT(INFO) << "Writing Raw tensor data " << std::endl;
             m.add(name, migraphx::argument(param_shapes[name],
                                            const_cast<void*>(input_tensor.GetTensorRawData())));
           }