Skip to content

Commit

Permalink
Fix build issues
Browse files Browse the repository at this point in the history
  • Loading branch information
yuslepukhin committed Mar 25, 2024
1 parent 3d0debc commit 1546d79
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,14 @@ void compute_gemm_ref(
template <
typename Element,
typename LayoutCutlass,
typename Layout = std::conditional_t<std::is_same<LayoutCutlass, cutlass::layout::ColumnMajor>::value, ColumnMajorLayout, RowMajorLayout>>
typename Layout = std::conditional_t<std::is_same<LayoutCutlass,
cutlass::layout::ColumnMajor>::value,
ColumnMajorLayout, RowMajorLayout>>
__forceinline__
MatrixRef<Element, Layout, true>
make_MatrixRef(cutlass::HostTensor<Element, LayoutCutlass> const& tensor) {
static_assert(std::is_same<LayoutCutlass, cutlass::layout::ColumnMajor>::value || std::is_same<LayoutCutlass, cutlass::layout::RowMajor>::value);
static_assert(std::is_same<LayoutCutlass, cutlass::layout::ColumnMajor>::value ||
std::is_same<LayoutCutlass, cutlass::layout::RowMajor>::value);
auto shape = make_Position(tensor.extent().row(), tensor.extent().column());
auto* ptr = const_cast<typename std::remove_const<Element>::type*>(tensor.host_data());
return MatrixRef<Element, Layout, true>(ptr, tensor.capacity(), shape);
Expand All @@ -127,11 +130,13 @@ __forceinline__
template <
typename Element,
typename LayoutCutlass,
typename Layout = std::conditional_t<std::is_same<LayoutCutlass, cutlass::layout::ColumnMajor>::value, ColumnMajorLayout, RowMajorLayout>>
typename Layout = std::conditional_t<std::is_same<LayoutCutlass, cutlass::layout::ColumnMajor>::value,
ColumnMajorLayout, RowMajorLayout>>
__forceinline__
MatrixRef<Element const, Layout, true>
make_ConstMatrixRef(cutlass::HostTensor<Element, LayoutCutlass> const& tensor) {
static_assert(std::is_same<LayoutCutlass, cutlass::layout::ColumnMajor>::value || std::is_same<LayoutCutlass, cutlass::layout::RowMajor>::value);
static_assert(std::is_same<LayoutCutlass, cutlass::layout::ColumnMajor>::value ||
std::is_same<LayoutCutlass, cutlass::layout::RowMajor>::value);
auto shape = make_Position(tensor.extent().row(), tensor.extent().column());
return MatrixRef<Element const, Layout, true>(tensor.host_data(), tensor.capacity(), shape);
}
Expand Down Expand Up @@ -175,7 +180,8 @@ void run_blkq4_gemm(int m, int n, int k) {

const cutlass::gemm::GemmCoord problem_size = {m, n, k};
const auto q_weight_shape = cutlass::make_Coord(problem_size.k() / 2, problem_size.n());
const auto meta_shape = cutlass::make_Coord(problem_size.k() / QuantBlocking::kRow, problem_size.n() / QuantBlocking::kColumn);
const auto meta_shape = cutlass::make_Coord(problem_size.k() / QuantBlocking::kRow, problem_size.n() /
QuantBlocking::kColumn);

//
// Generate quantized and dequantizeed input matrix B [K, N]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ stages:
--enable_cuda_profiling --enable_cuda_nhwc_ops \
--enable_pybind --build_java \
--use_cache \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75; \
--cmake_extra_defines onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75 \
--cmake_extra_defines onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON; \
ccache -sv; \
ccache -z"
workingDirectory: $(Build.SourcesDirectory)
Expand Down
18 changes: 9 additions & 9 deletions tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ stages:
EnvSetupScript: setup_env_cuda.bat
buildArch: x64
additionalBuildFlags: --enable_pybind --build_java --build_nodejs --use_cuda --cuda_home="$(Agent.TempDirectory)\v11.8" \
--enable_cuda_profiling \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 \
--enable_cuda_profiling ^
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 ^
--cmake_extra_defines onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON
msbuildPlatform: x64
isX86: false
Expand All @@ -62,9 +62,9 @@ stages:
BuildConfig: 'RelWithDebInfo'
EnvSetupScript: setup_env_cuda.bat
buildArch: x64
additionalBuildFlags: --enable_pybind --enable_training --use_cuda --cuda_home="$(Agent.TempDirectory)\v11.8" \
--skip_onnx_tests \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 \
additionalBuildFlags: --enable_pybind --enable_training --use_cuda --cuda_home="$(Agent.TempDirectory)\v11.8" ^
--skip_onnx_tests ^
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 ^
--cmake_extra_defines onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON
msbuildPlatform: x64
isX86: false
Expand Down Expand Up @@ -101,10 +101,10 @@ stages:
EnvSetupScript: setup_env_cuda.bat
buildArch: x64
# note: need to specify `--gen_doc` when creating the build config so it has to be in additionalBuildFlags
additionalBuildFlags: --gen_doc validate --skip_tests --enable_pybind --use_dml --use_cuda \
--cuda_home="$(Agent.TempDirectory)\v11.8" \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 \
--cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF \
additionalBuildFlags: --gen_doc validate --skip_tests --enable_pybind --use_dml --use_cuda ^
--cuda_home="$(Agent.TempDirectory)\v11.8" ^
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 ^
--cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF ^
--cmake_extra_defines onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON
msbuildPlatform: x64
isX86: false
Expand Down

0 comments on commit 1546d79

Please sign in to comment.