Skip to content

Commit

Permalink
resolve conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
RandyShuai committed Aug 9, 2023
2 parents 92de13a + 2c5d4dc commit ec27703
Show file tree
Hide file tree
Showing 222 changed files with 10,299 additions and 1,469 deletions.
1 change: 0 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,3 @@
"-build/include_subdir",
"-runtime/references"
]
}
17 changes: 13 additions & 4 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -257,15 +257,21 @@ option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for thre
# Some features are only enabled when onnxruntime_ENABLE_PYTHON is ON as they are only relevant
# when using python env
if (onnxruntime_ENABLE_TRAINING)
set(onnxruntime_ENABLE_TRAINING_OPS ON)
set(onnxruntime_ENABLE_TRAINING_APIS ON)
set(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP ON)
set(onnxruntime_ENABLE_TRAINING_OPS ON)
set(onnxruntime_ENABLE_ATEN ON)
set(onnxruntime_ENABLE_TRITON ON)
if (NOT APPLE)
set(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP ON)
endif()
endif()

if (onnxruntime_ENABLE_TRAINING_APIS)
set(onnxruntime_ENABLE_TRAINING_OPS ON)
if (onnxruntime_ENABLE_PYTHON AND NOT onnxruntime_ENABLE_TRAINING)
message(FATAL_ERROR "Standalone On-Device Training build is not supported with Python bindings! "
"Please use the --enable_training flag instead of the --enable_training_apis flag.")
endif()
endif()

if (onnxruntime_USE_CUDA)
Expand Down Expand Up @@ -1230,9 +1236,12 @@ if (onnxruntime_USE_OPENVINO)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
set(OPENVINO_VERSION "2023.0")
add_definitions(-DOPENVINO_2023_0=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1")
set(OPENVINO_VERSION "2023.1")
add_definitions(-DOPENVINO_2023_1=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
set(OPENVINO_VERSION "2023.0")
add_definitions(-DOPENVINO_2023_0=1)
set(OPENVINO_VERSION "2023.1")
add_definitions(-DOPENVINO_2023_1=1)
else()
message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}")
endif()
Expand Down
39 changes: 21 additions & 18 deletions cmake/external/onnx_protobuf.natvis
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
<Item Name="[value]">_impl_.value_.tagged_ptr_</Item>
</Expand>
</Type>

<Type Name="onnx::TensorAnnotation">
<DisplayString>{{ tensor_name={_impl_.tensor_name_.tagged_ptr_} }}</DisplayString>
<Expand>
Expand Down Expand Up @@ -80,7 +80,7 @@
<Intrinsic Name="_has_graph" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000020u) != 0"/>
<Intrinsic Name="_has_type_proto" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000040u) != 0"/>
<Intrinsic Name="_has_sparse_tensor" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000080u) != 0"/>
<DisplayString>{{ name={ name_.tagged_ptr_ }, type={ (AttributeProto_AttributeType)_impl_.type_ } }}</DisplayString>
<DisplayString>{{ name={ _impl_.name_.tagged_ptr_ }, type={ (AttributeProto_AttributeType)_impl_.type_ } }}</DisplayString>
<Expand>
<Item Name="[name]">_impl_.name_.tagged_ptr_</Item>
<Item Name="[type]">(AttributeProto_AttributeType)_impl_.type_</Item>
Expand Down Expand Up @@ -118,18 +118,21 @@
<Item Name="[attribute]">_impl_.attribute_</Item>
</Expand>
</Type>

<Type Name="onnx::FunctionProto">
<DisplayString>{{ name={ _impl_.name_.tagged_ptr_ }, domain={ _impl_.domain_.tagged_ptr_ } }}</DisplayString>
<DisplayString>{{ name={ _impl_.name_.tagged_ptr_ } }}</DisplayString>
<Expand>
<Item Name="[opset_import]">_impl_.opset_import_</Item>
<Item Name="[input]">_impl_.input_</Item>
<Item Name="[output]">_impl_.output_</Item>
<Item Name="[node]">_impl_.node_</Item>
<Item Name="[doc]" ExcludeView="simple">_impl_.doc_string_.tagged_ptr_</Item>
<Item Name="[attribute]">_impl_.attribute_</Item>
<Item Name="[attribute_proto]" ExcludeView="simple">_impl_.attribute_proto_</Item>
<Item Name="[doc]">_impl_.doc_string_.tagged_ptr_</Item>
<Item Name="[opset_import]">_impl_.opset_import_</Item>
<Item Name="[domain]">_impl_.domain_.tagged_ptr_</Item>
</Expand>
</Type>

<Type Name="onnx::GraphProto">
<DisplayString>{{ name={ _impl_.name_.tagged_ptr_ } }}</DisplayString>
<Expand>
Expand All @@ -143,7 +146,7 @@
<Item Name="[doc]" ExcludeView="simple">_impl_.doc_string_.tagged_ptr_</Item>
</Expand>
</Type>

<Type Name="onnx::ModelProto">
<DisplayString>{{ producer={_impl_.producer_name_.tagged_ptr_}, domain={ _impl_.domain_.tagged_ptr_ } }}</DisplayString>
<Expand>
Expand All @@ -156,7 +159,7 @@
<Item Name="[graph]" ExcludeView="simple">_impl_.graph_</Item>
</Expand>
</Type>

<Type Name="onnx::TensorShapeProto_Dimension">
<DisplayString Condition="*_impl_._oneof_case_ == 0">empty</DisplayString>
<DisplayString Condition="*_impl_._oneof_case_ == kDimValue">{{ v = {_impl_.value_.dim_value_} }}</DisplayString>
Expand All @@ -171,7 +174,7 @@
<Item Name="[dims]" ExcludeView="simple">_impl_.dim_</Item>
</Expand>
</Type>

<!--Type Protos-->
<Type Name="onnx::TypeProto">
<DisplayString>{{ type={ (ValueCase)*_impl_._oneof_case_ } }}</DisplayString>
Expand All @@ -185,7 +188,7 @@
<Item Name="[denotation]" ExcludeView="simple">_impl_.denotation_.tagged_ptr_</Item>
</Expand>
</Type>

<Type Name="onnx::TypeProto_Tensor">
<Intrinsic Name="_has_shape" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000001u) != 0"/>
<DisplayString>{{ elem_type={ (TensorProto_DataType)_impl_.elem_type_ } }}</DisplayString>
Expand All @@ -195,7 +198,7 @@
<Item Name="[has_shape]" ExcludeView="simple">_has_shape()</Item>
</Expand>
</Type>

<Type Name="onnx::TypeProto_Sequence">
<Intrinsic Name="_has_element_type" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000001u) != 0"/>
<DisplayString Condition="_has_element_type()">{{ elem_type={ *_impl_.elem_type_ } }}</DisplayString>
Expand All @@ -204,7 +207,7 @@
<Item Name="[has_element_type]" ExcludeView="simple">_has_element_type()</Item>
</Expand>
</Type>

<Type Name="onnx::TypeProto_Map">
<Intrinsic Name="_has_value_type" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000001u) != 0"/>
<DisplayString>{{ key_type={ (TensorProto_DataType)_impl_.key_type_ } }}</DisplayString>
Expand Down Expand Up @@ -239,7 +242,7 @@
<DisplayString>{{ begin={ begin_ }, end={ end_ } }}</DisplayString>
<Expand></Expand>
</Type>

<!--TODO Figure out how to calculate the size of the tensor given its shape. Loops are only possible in custom lists. -->
<Type Name="onnx::TensorProto">
<Intrinsic Name="_has_raw_data" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000002u) != 0" />
Expand All @@ -252,7 +255,7 @@
<Intrinsic Name="_shape_size_3" Expression="_shape_size_2() * _shape_array()[2]" />
<Intrinsic Name="_shape_size_4" Expression="_shape_size_3() * _shape_array()[3]" />
<Intrinsic Name="_shape_size_5" Expression="_shape_size_4() * _shape_array()[4]" />
<DisplayString>{{ name={name_.tagged_ptr_}, data_type={ (TensorProto_DataType)_impl_.data_type_ } }}</DisplayString>
<DisplayString>{{ name={_impl_.name_.tagged_ptr_}, data_type={ (TensorProto_DataType)_impl_.data_type_ } }}</DisplayString>
<Expand>
<Item Name="[name]" ExcludeView="simple">_impl_.name_.tagged_ptr_</Item>
<Item Name="[data_type]" ExcludeView="simple">(TensorProto_DataType)_impl_.data_type_</Item>
Expand Down Expand Up @@ -281,10 +284,10 @@
<Item Name="[has_segment]" ExcludeView="simple">_has_segment()</Item>
</Expand>
</Type>

<Type Name="onnx::SparseTensorProto">
<Intrinsic Name="_has_values" Expression="(_has_bits_.has_bits_[0] &amp; 0x00000001u) != 0"/>
<Intrinsic Name="_has_indices" Expression="(_has_bits_.has_bits_[0] &amp; 0x00000002u) != 0"/>
<Intrinsic Name="_has_values" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000001u) != 0"/>
<Intrinsic Name="_has_indices" Expression="(_impl_._has_bits_.has_bits_[0] &amp; 0x00000002u) != 0"/>
<DisplayString>{{ SparseTensorProto }}</DisplayString>
<Expand>
<Item Name="[dense_shape]" ExcludeView="simple">_impl_.dims_</Item>
Expand Down
69 changes: 66 additions & 3 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ onnxruntime_add_static_library(onnxruntime_mlas
${MLAS_SRC_DIR}/qdwconv_kernelsize.cpp
)

if (NOT onnxruntime_ORT_MINIMAL_BUILD)
target_sources(onnxruntime_mlas PRIVATE
${MLAS_SRC_DIR}/q4_dq.cpp
${MLAS_SRC_DIR}/q4gemm.cpp
)
endif()

set(ONNXRUNTIME_MLAS_LIBS onnxruntime_mlas)

#TODO: set MASM flags properly
Expand Down Expand Up @@ -186,6 +193,12 @@ function(setup_mlas_source_for_windows)
${MLAS_SRC_DIR}/amd64/TanhKernelFma3.asm
${MLAS_SRC_DIR}/amd64/ErfKernelFma3.asm
)
if (NOT onnxruntime_ORT_MINIMAL_BUILD)
target_sources(onnxruntime_mlas PRIVATE
${MLAS_SRC_DIR}/q4gemm_avx512.cpp
)
endif()

else()
target_sources(onnxruntime_mlas PRIVATE
${MLAS_SRC_DIR}/qgemm_kernel_sse.cpp
Expand Down Expand Up @@ -526,16 +539,23 @@ else()
${mlas_platform_srcs_avx512core}
)

if(NOT APPLE)
if (NOT onnxruntime_ORT_MINIMAL_BUILD)
set(mlas_platform_srcs
${mlas_platform_srcs}
${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmxCommon.S
${MLAS_SRC_DIR}/q4gemm_avx512.cpp
)
set_source_files_properties(${MLAS_SRC_DIR}/q4gemm_avx512.cpp PROPERTIES COMPILE_FLAGS "-mfma -mavx512vnni -mavx512bw -mavx512dq -mavx512vl -mavx512f")
endif()
if(NOT APPLE)
set(mlas_platform_srcs
${mlas_platform_srcs}
${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmxCommon.S
${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp
${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S
)
set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
set_source_files_properties(${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
endif()
endif()

if(ONNXRUNTIME_MLAS_MULTI_ARCH)
onnxruntime_add_static_library(onnxruntime_mlas_x86_64 ${mlas_platform_srcs})
Expand Down Expand Up @@ -572,3 +592,46 @@ if (NOT onnxruntime_BUILD_SHARED_LIB)
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()


if (NOT onnxruntime_ORT_MINIMAL_BUILD)

#
# Command line tool for quantization and de-quantization of 2-D fp32 tensors
# based on block-wise quantization of int4
#

onnxruntime_add_executable(onnxruntime_mlas_q4dq
${MLAS_SRC_DIR}/q4_dq_cli.cpp
)
target_include_directories(onnxruntime_mlas_q4dq PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${MLAS_SRC_DIR})
set_target_properties(onnxruntime_mlas_q4dq PROPERTIES FOLDER "ONNXRuntimeTest")

target_link_libraries(onnxruntime_mlas_q4dq PRIVATE ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common)
if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
target_link_libraries(onnxruntime_mlas_q4dq PRIVATE cpuinfo)
endif()
if(NOT WIN32)
target_link_libraries(onnxruntime_mlas_q4dq PRIVATE nsync::nsync_cpp ${CMAKE_DL_LIBS})
endif()
if (CMAKE_SYSTEM_NAME STREQUAL "Android")
target_link_libraries(onnxruntime_mlas_q4dq PRIVATE ${android_shared_libs})
endif()

if(WIN32)
target_link_libraries(onnxruntime_mlas_q4dq PRIVATE debug Dbghelp Advapi32)
endif()
if (onnxruntime_LINK_LIBATOMIC)
target_link_libraries(onnxruntime_mlas_q4dq PRIVATE atomic)
endif()
target_link_libraries(onnxruntime_mlas_q4dq PRIVATE Threads::Threads)

if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
set_target_properties(onnxruntime_mlas_q4dq PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1 -s PROXY_TO_PTHREAD=1 -s EXIT_RUNTIME=1")
else()
set_target_properties(onnxruntime_mlas_q4dq PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1")
endif()
endif()

endif()
Loading

0 comments on commit ec27703

Please sign in to comment.