resolve conflict

microsoft · Aug 9, 2023 · ec27703 · ec27703
2 parents 92de13a + 2c5d4dc
commit ec27703
Show file tree

Hide file tree

Showing 222 changed files with 10,299 additions and 1,469 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -40,4 +40,3 @@
         "-build/include_subdir",
         "-runtime/references"
     ]
-}
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -257,15 +257,21 @@ option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for thre
 # Some features are only enabled when onnxruntime_ENABLE_PYTHON is ON as they are only relevant
 # when using python env
 if (onnxruntime_ENABLE_TRAINING)
-  set(onnxruntime_ENABLE_TRAINING_OPS ON)
   set(onnxruntime_ENABLE_TRAINING_APIS ON)
-  set(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP ON)
+  set(onnxruntime_ENABLE_TRAINING_OPS ON)
   set(onnxruntime_ENABLE_ATEN ON)
   set(onnxruntime_ENABLE_TRITON ON)
+  if (NOT APPLE)
+    set(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP ON)
+  endif()
 endif()
 
 if (onnxruntime_ENABLE_TRAINING_APIS)
   set(onnxruntime_ENABLE_TRAINING_OPS ON)
+  if (onnxruntime_ENABLE_PYTHON AND NOT onnxruntime_ENABLE_TRAINING)
+    message(FATAL_ERROR "Standalone On-Device Training build is not supported with Python bindings! "
+    "Please use the --enable_training flag instead of the --enable_training_apis flag.")
+  endif()
 endif()
 
 if (onnxruntime_USE_CUDA)
@@ -1230,9 +1236,12 @@ if (onnxruntime_USE_OPENVINO)
   elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
     set(OPENVINO_VERSION "2023.0")
     add_definitions(-DOPENVINO_2023_0=1)
+  elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1")
+    set(OPENVINO_VERSION "2023.1")
+    add_definitions(-DOPENVINO_2023_1=1)
   elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
-    set(OPENVINO_VERSION "2023.0")
-    add_definitions(-DOPENVINO_2023_0=1)
+    set(OPENVINO_VERSION "2023.1")
+    add_definitions(-DOPENVINO_2023_1=1)
   else()
     message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}")
   endif()

diff --git a/cmake/external/onnx_protobuf.natvis b/cmake/external/onnx_protobuf.natvis
@@ -50,7 +50,7 @@
         <Item Name="[value]">_impl_.value_.tagged_ptr_</Item>
     </Expand>
   </Type>
-  
+
   <Type Name="onnx::TensorAnnotation">
     <DisplayString>{{ tensor_name={_impl_.tensor_name_.tagged_ptr_} }}</DisplayString>
     <Expand>
@@ -80,7 +80,7 @@
     <Intrinsic Name="_has_graph" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000020u) != 0"/>
     <Intrinsic Name="_has_type_proto" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000040u) != 0"/>
     <Intrinsic Name="_has_sparse_tensor" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000080u) != 0"/>
-    <DisplayString>{{ name={ name_.tagged_ptr_ }, type={ (AttributeProto_AttributeType)_impl_.type_ } }}</DisplayString>
+    <DisplayString>{{ name={ _impl_.name_.tagged_ptr_ }, type={ (AttributeProto_AttributeType)_impl_.type_ } }}</DisplayString>
     <Expand>
       <Item Name="[name]">_impl_.name_.tagged_ptr_</Item>
       <Item Name="[type]">(AttributeProto_AttributeType)_impl_.type_</Item>
@@ -118,18 +118,21 @@
       <Item Name="[attribute]">_impl_.attribute_</Item>
     </Expand>
   </Type>
-    
+
   <Type Name="onnx::FunctionProto">
-    <DisplayString>{{ name={ _impl_.name_.tagged_ptr_ }, domain={ _impl_.domain_.tagged_ptr_ } }}</DisplayString>
+    <DisplayString>{{ name={ _impl_.name_.tagged_ptr_ } }}</DisplayString>
     <Expand>
-      <Item Name="[opset_import]">_impl_.opset_import_</Item>
       <Item Name="[input]">_impl_.input_</Item>
       <Item Name="[output]">_impl_.output_</Item>
       <Item Name="[node]">_impl_.node_</Item>
-      <Item Name="[doc]" ExcludeView="simple">_impl_.doc_string_.tagged_ptr_</Item>
+      <Item Name="[attribute]">_impl_.attribute_</Item>
+      <Item Name="[attribute_proto]" ExcludeView="simple">_impl_.attribute_proto_</Item>
+      <Item Name="[doc]">_impl_.doc_string_.tagged_ptr_</Item>
+      <Item Name="[opset_import]">_impl_.opset_import_</Item>
+      <Item Name="[domain]">_impl_.domain_.tagged_ptr_</Item>
     </Expand>
   </Type>
-  
+
   <Type Name="onnx::GraphProto">
     <DisplayString>{{ name={ _impl_.name_.tagged_ptr_ } }}</DisplayString>
     <Expand>
@@ -143,7 +146,7 @@
       <Item Name="[doc]" ExcludeView="simple">_impl_.doc_string_.tagged_ptr_</Item>
     </Expand>
   </Type>
-  
+
   <Type Name="onnx::ModelProto">
     <DisplayString>{{ producer={_impl_.producer_name_.tagged_ptr_}, domain={ _impl_.domain_.tagged_ptr_ } }}</DisplayString>
     <Expand>
@@ -156,7 +159,7 @@
       <Item Name="[graph]" ExcludeView="simple">_impl_.graph_</Item>
     </Expand>
   </Type>
-  
+
   <Type Name="onnx::TensorShapeProto_Dimension">
     <DisplayString Condition="*_impl_._oneof_case_ == 0">empty</DisplayString>
     <DisplayString Condition="*_impl_._oneof_case_ == kDimValue">{{ v = {_impl_.value_.dim_value_} }}</DisplayString>
@@ -171,7 +174,7 @@
       <Item Name="[dims]" ExcludeView="simple">_impl_.dim_</Item>
     </Expand>
   </Type>
-  
+
   <!--Type Protos-->
   <Type Name="onnx::TypeProto">
     <DisplayString>{{ type={ (ValueCase)*_impl_._oneof_case_ } }}</DisplayString>
@@ -185,7 +188,7 @@
       <Item Name="[denotation]" ExcludeView="simple">_impl_.denotation_.tagged_ptr_</Item>
     </Expand>
   </Type>
-  
+
   <Type Name="onnx::TypeProto_Tensor">
     <Intrinsic Name="_has_shape" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000001u) != 0"/>
     <DisplayString>{{ elem_type={ (TensorProto_DataType)_impl_.elem_type_ } }}</DisplayString>
@@ -195,7 +198,7 @@
       <Item Name="[has_shape]" ExcludeView="simple">_has_shape()</Item>
     </Expand>
   </Type>
-  
+
   <Type Name="onnx::TypeProto_Sequence">
     <Intrinsic Name="_has_element_type" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000001u) != 0"/>
     <DisplayString Condition="_has_element_type()">{{ elem_type={ *_impl_.elem_type_ } }}</DisplayString>
@@ -204,7 +207,7 @@
       <Item Name="[has_element_type]" ExcludeView="simple">_has_element_type()</Item>
     </Expand>
   </Type>
-  
+
   <Type Name="onnx::TypeProto_Map">
     <Intrinsic Name="_has_value_type" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000001u) != 0"/>
     <DisplayString>{{ key_type={ (TensorProto_DataType)_impl_.key_type_ } }}</DisplayString>
@@ -239,7 +242,7 @@
     <DisplayString>{{ begin={ begin_ }, end={ end_ } }}</DisplayString>
     <Expand></Expand>
   </Type>
-  
+
   <!--TODO Figure out how to calculate the size of the tensor given its shape. Loops are only possible in custom lists. -->
   <Type Name="onnx::TensorProto">
     <Intrinsic Name="_has_raw_data" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000002u) != 0" />
@@ -252,7 +255,7 @@
     <Intrinsic Name="_shape_size_3" Expression="_shape_size_2() * _shape_array()[2]" />
     <Intrinsic Name="_shape_size_4" Expression="_shape_size_3() * _shape_array()[3]" />
     <Intrinsic Name="_shape_size_5" Expression="_shape_size_4() * _shape_array()[4]" />
-    <DisplayString>{{ name={name_.tagged_ptr_}, data_type={ (TensorProto_DataType)_impl_.data_type_ } }}</DisplayString>
+    <DisplayString>{{ name={_impl_.name_.tagged_ptr_}, data_type={ (TensorProto_DataType)_impl_.data_type_ } }}</DisplayString>
     <Expand>
       <Item Name="[name]" ExcludeView="simple">_impl_.name_.tagged_ptr_</Item>
       <Item Name="[data_type]" ExcludeView="simple">(TensorProto_DataType)_impl_.data_type_</Item>
@@ -281,10 +284,10 @@
       <Item Name="[has_segment]" ExcludeView="simple">_has_segment()</Item>
     </Expand>
   </Type>
-  
+
   <Type Name="onnx::SparseTensorProto">
-    <Intrinsic Name="_has_values" Expression="(_has_bits_.has_bits_[0] &amp; 0x00000001u) != 0"/>
-    <Intrinsic Name="_has_indices" Expression="(_has_bits_.has_bits_[0] &amp; 0x00000002u) != 0"/>
+    <Intrinsic Name="_has_values" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000001u) != 0"/>
+    <Intrinsic Name="_has_indices" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000002u) != 0"/>
     <DisplayString>{{ SparseTensorProto }}</DisplayString>
     <Expand>
       <Item Name="[dense_shape]" ExcludeView="simple">_impl_.dims_</Item>

diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake
@@ -35,6 +35,13 @@ onnxruntime_add_static_library(onnxruntime_mlas
   ${MLAS_SRC_DIR}/qdwconv_kernelsize.cpp
 )
 
+if (NOT onnxruntime_ORT_MINIMAL_BUILD)
+  target_sources(onnxruntime_mlas PRIVATE
+    ${MLAS_SRC_DIR}/q4_dq.cpp
+    ${MLAS_SRC_DIR}/q4gemm.cpp
+  )
+endif()
+
 set(ONNXRUNTIME_MLAS_LIBS onnxruntime_mlas)
 
 #TODO: set MASM flags properly
@@ -186,6 +193,12 @@ function(setup_mlas_source_for_windows)
       ${MLAS_SRC_DIR}/amd64/TanhKernelFma3.asm
       ${MLAS_SRC_DIR}/amd64/ErfKernelFma3.asm
     )
+    if (NOT onnxruntime_ORT_MINIMAL_BUILD)
+      target_sources(onnxruntime_mlas PRIVATE
+        ${MLAS_SRC_DIR}/q4gemm_avx512.cpp
+      )
+    endif()
+
   else()
     target_sources(onnxruntime_mlas PRIVATE
       ${MLAS_SRC_DIR}/qgemm_kernel_sse.cpp
@@ -526,16 +539,23 @@ else()
           ${mlas_platform_srcs_avx512core}
         )
 
-	if(NOT APPLE)
+        if (NOT onnxruntime_ORT_MINIMAL_BUILD)
           set(mlas_platform_srcs
             ${mlas_platform_srcs}
-	    ${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmxCommon.S
+            ${MLAS_SRC_DIR}/q4gemm_avx512.cpp
+          )
+          set_source_files_properties(${MLAS_SRC_DIR}/q4gemm_avx512.cpp PROPERTIES COMPILE_FLAGS "-mfma -mavx512vnni -mavx512bw -mavx512dq -mavx512vl -mavx512f")
+        endif()
+        if(NOT APPLE)
+          set(mlas_platform_srcs
+            ${mlas_platform_srcs}
+	        ${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmxCommon.S
             ${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp
             ${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S
             )
           set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
           set_source_files_properties(${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
-	endif()
+	    endif()
 
         if(ONNXRUNTIME_MLAS_MULTI_ARCH)
           onnxruntime_add_static_library(onnxruntime_mlas_x86_64 ${mlas_platform_srcs})
@@ -572,3 +592,46 @@ if (NOT onnxruntime_BUILD_SHARED_LIB)
             RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
             FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
 endif()
+
+
+if (NOT onnxruntime_ORT_MINIMAL_BUILD)
+
+  #
+  # Command line tool for quantization and de-quantization of 2-D fp32 tensors
+  # based on block-wise quantization of int4
+  #
+
+  onnxruntime_add_executable(onnxruntime_mlas_q4dq
+    ${MLAS_SRC_DIR}/q4_dq_cli.cpp
+  )
+  target_include_directories(onnxruntime_mlas_q4dq PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${MLAS_SRC_DIR})
+  set_target_properties(onnxruntime_mlas_q4dq PROPERTIES FOLDER "ONNXRuntimeTest")
+
+  target_link_libraries(onnxruntime_mlas_q4dq PRIVATE ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common)
+  if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    target_link_libraries(onnxruntime_mlas_q4dq PRIVATE cpuinfo)
+  endif()
+  if(NOT WIN32)
+    target_link_libraries(onnxruntime_mlas_q4dq PRIVATE nsync::nsync_cpp ${CMAKE_DL_LIBS})
+  endif()
+  if (CMAKE_SYSTEM_NAME STREQUAL "Android")
+    target_link_libraries(onnxruntime_mlas_q4dq PRIVATE ${android_shared_libs})
+  endif()
+
+  if(WIN32)
+    target_link_libraries(onnxruntime_mlas_q4dq PRIVATE debug Dbghelp Advapi32)
+  endif()
+  if (onnxruntime_LINK_LIBATOMIC)
+    target_link_libraries(onnxruntime_mlas_q4dq PRIVATE atomic)
+  endif()
+  target_link_libraries(onnxruntime_mlas_q4dq PRIVATE Threads::Threads)
+
+  if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+      set_target_properties(onnxruntime_mlas_q4dq PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1 -s PROXY_TO_PTHREAD=1 -s EXIT_RUNTIME=1")
+    else()
+      set_target_properties(onnxruntime_mlas_q4dq PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1")
+    endif()
+  endif()
+
+endif()
-Original file line number
+Diff line change
@@ Expand Up / @@ -40,4 +40,3 @@ @@
             "-build/include_subdir",
             "-runtime/references"
         ]
-    }