Skip to content

Commit

Permalink
Merge branch 'PaddlePaddle:develop' into add_diagonal
Browse files Browse the repository at this point in the history
  • Loading branch information
bapijun authored Oct 19, 2023
2 parents 0ea1cce + cfd1d0e commit e402e9c
Show file tree
Hide file tree
Showing 1,458 changed files with 65,998 additions and 39,009 deletions.
8 changes: 4 additions & 4 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ bugprone-integer-division,
bugprone-misplaced-widening-cast,
-bugprone-move-forwarding-reference,
-bugprone-multiple-statement-macro,
-bugprone-narrowing-conversions,
bugprone-narrowing-conversions,
-bugprone-not-null-terminated-result,
-bugprone-parent-virtual-call,
-bugprone-posix-return,
Expand Down Expand Up @@ -155,7 +155,7 @@ cppcoreguidelines-avoid-c-arrays,
-cppcoreguidelines-avoid-goto,
cppcoreguidelines-c-copy-assignment-signature,
cppcoreguidelines-explicit-virtual-functions,
-cppcoreguidelines-init-variables,
cppcoreguidelines-init-variables,
cppcoreguidelines-narrowing-conversions,
cppcoreguidelines-no-malloc,
-cppcoreguidelines-pro-type-const-cast,
Expand Down Expand Up @@ -189,12 +189,12 @@ modernize-use-override,
modernize-use-transparent-functors,
-modernize-use-uncaught-exceptions,
performance-faster-string-find,
-performance-for-range-copy,
performance-for-range-copy,
-performance-implicit-conversion-in-loop,
-performance-inefficient-algorithm,
performance-inefficient-string-concatenation,
-performance-inefficient-vector-operation,
-performance-move-const-arg,
performance-move-const-arg,
-performance-move-constructor-init,
-performance-no-automatic-move,
performance-noexcept-move-constructor,
Expand Down
3 changes: 3 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ per-file-ignores =
# These files need tabs for testing.
test/dygraph_to_static/test_error.py:E101,W191

# Ignore compare with True in sot unittest
test/sot/test_dup_top.py:E712

# temp ignore base directory
python/paddle/base/*:
E712,
Expand Down
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,7 @@
path = third_party/jitify
url = https://github.com/NVIDIA/jitify.git
ignore = dirty
[submodule "third_party/cccl"]
path = third_party/cccl
url = https://github.com/NVIDIA/cccl.git
ignore = dirty
4 changes: 3 additions & 1 deletion cmake/external/brpc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
# limitations under the License.

include(ExternalProject)
set(OPENSSL_USE_STATIC_LIBS ON)
if(NOT WITH_ARM)
set(OPENSSL_USE_STATIC_LIBS ON)
endif()
find_package(OpenSSL REQUIRED)

message(STATUS "ssl:" ${OPENSSL_SSL_LIBRARY})
Expand Down
31 changes: 31 additions & 0 deletions cmake/external/cccl.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
include(ExternalProject)

set(CCCL_PATH
"${THIRD_PARTY_PATH}/cccl"
CACHE STRING "A path setting for external_cccl path.")
set(CCCL_PREFIX_DIR ${CCCL_PATH})
set(CCCL_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/cccl)

# The latest commit has bugs in windows, so we set a fix commit.
set(CCCL_TAG 1f6e4bcae0fbf1bbed87f88544d8d2161c490fc1)
execute_process(COMMAND git --git-dir=${CCCL_SOURCE_DIR}/.git
--work-tree=${CCCL_SOURCE_DIR} checkout ${CCCL_TAG})

set(CCCL_INCLUDE_DIR ${CCCL_SOURCE_DIR})
message("CCCL_INCLUDE_DIR is ${CCCL_INCLUDE_DIR}")
include_directories(${CCCL_INCLUDE_DIR})

ExternalProject_Add(
extern_cccl
${EXTERNAL_PROJECT_LOG_ARGS}
SOURCE_DIR ${CCCL_SOURCE_DIR}
PREFIX ${CCCL_PREFIX_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND "")

add_library(cccl INTERFACE)

add_dependencies(cccl extern_cccl)
14 changes: 9 additions & 5 deletions cmake/external/openblas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@ set(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
set(CBLAS_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/openblas)
set(CBLAS_TAG v0.3.7)

# OpenBLAS support Raptor Lake from v0.3.22
if(UNIX
AND NOT APPLE
AND NOT WITH_ROCM
# Why use v0.3.18? The IDG business line encountered a random openblas error,
# which can be resolved after upgrading openblas.
# And why compile when gcc>8.2? Please refer to
# https://github.com/spack/spack/issues/19932#issuecomment-733452619
# v0.3.18 only support gcc>=8.3 or gcc>=7.4
if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.2
AND NOT WITH_XPU)
set(CBLAS_TAG v0.3.23)
# We only compile with openblas 0.3.18 when gcc >= 8.3
set(CBLAS_TAG v0.3.18)
endif()

if(APPLE AND WITH_ARM)
Expand Down
5 changes: 4 additions & 1 deletion cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
# To build a unit test binary, which is an executable binary with libpaddle.so
# automatically linked:
#
# paddle_test(example SHARED)
# paddle_test(example SRCS example_test.cc)
#

# including binary directory for generated headers.
Expand Down Expand Up @@ -1345,6 +1345,9 @@ function(math_library TARGET)
if(WITH_GPU)
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
list(APPEND math_common_deps cub)
elseif(${CMAKE_CUDA_COMPILER_VERSION} EQUAL 12.0
OR ${CMAKE_CUDA_COMPILER_VERSION} GREATER 12.0)
list(APPEND math_common_deps cccl)
else()
list(APPEND math_common_deps)
endif()
Expand Down
12 changes: 12 additions & 0 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,14 @@ if(NOT DEFINED WITH_MKLDNN)
endif()
endif()

if(WIN32)
if(MSVC)
if(MSVC_VERSION LESS 1920)
set(WITH_MKLDNN OFF)
endif()
endif()
endif()

if(WIN32
OR APPLE
OR NOT WITH_GPU
Expand Down Expand Up @@ -375,6 +383,10 @@ if(WITH_GPU)
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
include(external/cub) # download cub
list(APPEND third_party_deps extern_cub)
elseif(${CMAKE_CUDA_COMPILER_VERSION} EQUAL 12.0
OR ${CMAKE_CUDA_COMPILER_VERSION} GREATER 12.0)
include(external/cccl)
list(APPEND third_party_deps extern_cccl)
endif()
set(URL
"https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz"
Expand Down
83 changes: 83 additions & 0 deletions paddle/cinn/ast_gen_ius/ast_gen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "paddle/cinn/ir/operation.h"
#include "paddle/cinn/ir/tensor.h"
#include "paddle/cinn/lang/compute.h"
#include "paddle/cinn/optim/replace_var_with_expr.h"

namespace cinn {
namespace ast_gen_ius {
Expand Down Expand Up @@ -84,11 +85,75 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
tensor_group->MarkShareMemBuffer(tensor, init_tensor);
tensor_group->CtrlDepend(tensor, init_tensor);
Expr init_body = ir::Store::Make(init_tensor, init_value, axis_exprs);
// create schedule block itervars, i0,i1...
std::vector<ir::Var> block_vars;
std::vector<ir::Expr> iter_values;
// reduce body and reduce init schedule block should have different objects
// for same axis so we re-create objects
std::vector<Var> axis_vars = common::GenDefaultAxis(axis_len);
for (int i = 0; i < shape.size(); ++i) {
block_vars.push_back(Var(Expr(0),
shape[i],
cinn::UniqName("i" + std::to_string(i)),
/*is_reduce = */ false));
optim::ReplaceVarWithExpr(&init_body, axis[i], block_vars[i]);
axis_vars[i]->is_reduce_axis = false;
if (shape[i] == Expr(1)) {
iter_values.push_back(Expr(0));
} else {
iter_values.push_back(axis_vars[i]);
}
}
init_body = ir::ScheduleBlockRealize::Make(
iter_values,
ir::ScheduleBlock::Make(
block_vars, {}, {}, reduce_init_name, init_body));

// For the remaining reduce axis, make reduce body
const std::vector<ir::Var>& reduce_axis = tensor->reduce_axis;
ir::Expr reduce_body =
ConvertReduceBody(tensor->body(), tensor, axis_exprs);
// create schedule block itervars, i0,i1...
std::vector<ir::Var> reduce_block_vars;
std::vector<ir::Expr> reduce_iter_values;
// reduce body and reduce init schedule block should have different objects
// for same axis so we re-create objects
std::vector<Var> reduce_axis_vars = common::GenDefaultAxis(axis_len);
for (int i = 0; i < shape.size(); ++i) {
reduce_block_vars.push_back(Var(Expr(0),
shape[i],
cinn::UniqName("i" + std::to_string(i)),
/*is_reduce = */ false));
reduce_axis_vars[i]->is_reduce_axis = false;
if (shape[i] == Expr(1)) {
reduce_iter_values.push_back(Expr(0));
} else {
reduce_iter_values.push_back(axis_vars[i]);
}
}
for (int i = 0; i < reduce_axis.size(); ++i) {
int count = shape.size() + i;
reduce_block_vars.push_back(
Var(reduce_axis[i]->lower_bound,
reduce_axis[i]->upper_bound,
cinn::UniqName("i" + std::to_string(count)),
/*is_reduce = */ true));
ir::Var reduce_axis_var = reduce_axis[i];
reduce_axis_var->is_reduce_axis = true;
reduce_iter_values.push_back(reduce_axis_var);
}
for (int i = 0; i < axis.size(); ++i) {
optim::ReplaceVarWithExpr(&reduce_body, axis[i], reduce_block_vars[i]);
}
for (int i = axis.size(); i < reduce_block_vars.size(); ++i) {
optim::ReplaceVarWithExpr(
&reduce_body, reduce_axis[i - axis.size()], reduce_block_vars[i]);
}

reduce_body = ir::ScheduleBlockRealize::Make(
reduce_iter_values,
ir::ScheduleBlock::Make(
reduce_block_vars, {}, {}, tensor->name, reduce_body));
for (int i = static_cast<int>(reduce_axis.size()) - 1; i >= 0; --i) {
reduce_body = ir::For::Make(reduce_axis[i],
reduce_axis[i]->lower_bound,
Expand All @@ -114,6 +179,24 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
return body;
} else {
ir::Expr body = ir::Store::Make(tensor, tensor->body(), axis_exprs);
// create schedule block itervars, i0,i1...
std::vector<ir::Var> block_vars;
std::vector<ir::Expr> iter_values;
std::vector<Var> axis_vars = common::GenDefaultAxis(axis_len);
for (int i = 0; i < shape.size(); ++i) {
block_vars.push_back(Var(
Expr(0), shape[i], cinn::UniqName("i" + std::to_string(i)), false));
optim::ReplaceVarWithExpr(&body, axis[i], block_vars[i]);
axis_vars[i]->is_reduce_axis = false;
if (shape[i] == Expr(1)) {
iter_values.push_back(Expr(0));
} else {
iter_values.push_back(axis_vars[i]);
}
}
body = ir::ScheduleBlockRealize::Make(
iter_values,
ir::ScheduleBlock::Make(block_vars, {}, {}, tensor->name, body));
for (int i = static_cast<int>(axis_len) - 1; i >= 0; --i) {
ir::Var loop_var = axis[i];
ir::Expr loop_extent = shape[i];
Expand Down
Loading

0 comments on commit e402e9c

Please sign in to comment.