Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

… feature/fuse_reduce_op
PaddlePaddle · May 16, 2018 · 71a6997 · 71a6997
2 parents f0f00c5 + e528862
commit 71a6997
Show file tree

Hide file tree

Showing 15 changed files with 64 additions and 54 deletions.
diff --git a/cmake/external/snappy.cmake b/cmake/external/snappy.cmake
@@ -47,8 +47,6 @@ ExternalProject_Add(
                      -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib
                      -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
                      -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-    BUILD_COMMAND   make -j8
-    INSTALL_COMMAND make install
 )
 
 add_library(snappy STATIC IMPORTED GLOBAL)

diff --git a/cmake/external/snappystream.cmake b/cmake/external/snappystream.cmake
@@ -46,8 +46,6 @@ ExternalProject_Add(
                         -DCMAKE_INSTALL_PREFIX:PATH=${SNAPPYSTREAM_INSTALL_DIR}
                         -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPYSTREAM_INSTALL_DIR}/lib
                         -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-        BUILD_COMMAND   make -j8
-        INSTALL_COMMAND make install
         DEPENDS snappy
 )
 

diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
@@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY)
 endif()
 
 add_subdirectory(testing)
-if(NOT MOBILE_INFERENCE AND NOT RPI)
+if(NOT MOBILE_INFERENCE AND NOT RPI AND NOT WITH_C_API)
   add_subdirectory(fluid)
 endif()
diff --git a/paddle/fluid/framework/data_type.cc b/paddle/fluid/framework/data_type.cc
@@ -58,6 +58,7 @@ static DataTypeMap* InitDataTypeMap() {
   RegType(bool, proto::VarType::BOOL);
   RegType(size_t, proto::VarType::SIZE_T);
   RegType(int16_t, proto::VarType::INT16);
+  RegType(uint8_t, proto::VarType::UINT8);
 
 #undef RegType
   return retv;

diff --git a/paddle/fluid/framework/data_type.h b/paddle/fluid/framework/data_type.h
@@ -47,8 +47,14 @@ inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
     case proto::VarType::BOOL:
       visitor.template operator()<bool>();
       break;
+    case proto::VarType::UINT8:
+      visitor.template operator()<uint8_t>();
+      break;
+    case proto::VarType::INT16:
+      visitor.template operator()<int16_t>();
+      break;
     default:
-      PADDLE_THROW("Not supported");
+      PADDLE_THROW("Not supported %d", type);
   }
 }
 

diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc
@@ -48,17 +48,18 @@ void FetchOpHandle::RunImpl() {
   WaitInputVarGenerated(platform::CPUPlace());
 
   tensors_.resize(inputs_.size());
-  auto *var_handle = static_cast<VarHandle *>(inputs_[0]);
-  auto &var_name = var_handle->name_;
   platform::CPUPlace cpu;
   auto &scopes = *local_scopes_;
 
-  for (size_t i = 0; i < scopes.size(); ++i) {
-    auto &scope = scopes[i];
-    auto *var =
-        scope->FindVar(kLocalExecScopeName)->Get<Scope *>()->FindVar(var_name);
+  for (size_t i = 0; i < inputs_.size(); ++i) {
+    auto *var_handle = static_cast<VarHandle *>(inputs_[i]);
+    auto &scope = scopes.at(var_handle->scope_idx_);
+    auto *var = scope->FindVar(kLocalExecScopeName)
+                    ->Get<Scope *>()
+                    ->FindVar(var_handle->name_);
     PADDLE_ENFORCE_NOT_NULL(var, "Cannot find variable %s in execution scope",
-                            var_name);
+                            var_handle->name_);
+
     auto &t = var->Get<framework::LoDTensor>();
     if (platform::is_gpu_place(t.place())) {
 #ifdef PADDLE_WITH_CUDA

diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto
@@ -103,6 +103,7 @@ message VarType {
     FP64 = 6;
     // Tensor<size_t> is used in C++.
     SIZE_T = 19;
+    UINT8 = 20;
 
     // Other types that may need additional descriptions
     LOD_TENSOR = 7;

diff --git a/paddle/fluid/framework/lod_tensor_test.cc b/paddle/fluid/framework/lod_tensor_test.cc
@@ -228,11 +228,12 @@ TEST(LoD, CheckAbsLoD) {
   ASSERT_FALSE(CheckAbsLoD(abs_lod0));
 }
 
-TEST(LoDTensor, RecordIO) {
+template <typename T>
+static void TestRecordIO() {
   LoDTensor tensor;
-  int* tmp = tensor.mutable_data<int>(make_ddim({4, 5}), platform::CPUPlace());
+  T* tmp = tensor.mutable_data<T>(make_ddim({4, 5}), platform::CPUPlace());
   for (int i = 0; i < 20; ++i) {
-    tmp[i] = i;
+    tmp[i] = static_cast<T>(i);
   }
 
   std::stringstream* stream = new std::stringstream();
@@ -247,7 +248,7 @@ TEST(LoDTensor, RecordIO) {
 
   auto assert_tensor_ok = [](const LoDTensor& tensor) {
     for (int i = 0; i < 20; ++i) {
-      ASSERT_EQ(tensor.data<int>()[i], i);
+      ASSERT_EQ(tensor.data<T>()[i], static_cast<T>(i));
     }
   };
 
@@ -265,5 +266,13 @@ TEST(LoDTensor, RecordIO) {
   }
 }
 
+TEST(LoDTensor, RecordIO) {
+  TestRecordIO<int>();
+  TestRecordIO<int16_t>();
+  TestRecordIO<uint8_t>();
+  TestRecordIO<float>();
+  TestRecordIO<double>();
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h
@@ -33,7 +33,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/variant.h"
-#include "paddle/utils/Error.h"
 
 namespace paddle {
 namespace framework {

diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc
@@ -38,7 +38,9 @@ template struct SetConstant<platform::CPUDeviceContext, bool>;
   template struct Transpose<platform::CPUDeviceContext, double, RANK>;     \
   template struct Transpose<platform::CPUDeviceContext, int, RANK>;        \
   template struct Transpose<platform::CPUDeviceContext, int64_t, RANK>;    \
-  template struct Transpose<platform::CPUDeviceContext, bool, RANK>;
+  template struct Transpose<platform::CPUDeviceContext, bool, RANK>;       \
+  template struct Transpose<platform::CPUDeviceContext, int16_t, RANK>;    \
+  template struct Transpose<platform::CPUDeviceContext, uint8_t, RANK>;
 
 DEFINE_CPU_TRANS(1);
 DEFINE_CPU_TRANS(2);

diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt
@@ -1,4 +1,4 @@
-proto_library(profiler_proto SRCS profiler.proto)
+proto_library(profiler_proto SRCS profiler.proto DEPS framework_proto)
 py_proto_compile(profiler_py_proto SRCS profiler.proto)
 
 add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)

diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh
@@ -504,6 +504,7 @@ function main() {
         ;;
       capi)
         cmake_gen ${PYTHON_ABI:-""}
+        build
         gen_capi_package
         ;;
       fluid_inference_lib)

diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -66,6 +66,7 @@ list(REMOVE_ITEM TEST_OPS test_fetch_var)
 list(REMOVE_ITEM TEST_OPS test_parallel_op)
 list(REMOVE_ITEM TEST_OPS test_dynrnn_static_input)
 list(REMOVE_ITEM TEST_OPS test_dist_train)
+list(REMOVE_ITEM TEST_OPS test_network_with_dtype)
 
 # tests that can be bundled together in one python process for speed.
 if(WITH_FAST_BUNDLE_TEST)
@@ -83,6 +84,7 @@ py_test_modules(test_parallel_executor MODULES test_parallel_executor)
 py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR})
 py_test_modules(test_train_dyn_rnn MODULES test_dyn_rnn)
 py_test_modules(test_mul_op MODULES test_mul_op)
+py_test_modules(test_network_with_dtype MODULES test_network_with_dtype)
 
 # tests that need to be run in separate process.
 py_test_modules(test_multihead_attention MODULES test_multihead_attention)

diff --git a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
@@ -24,48 +24,43 @@
 
 
 class TestNetWithDtype(unittest.TestCase):
-    def set_network(self):
+    def setUp(self):
         self.dtype = "float64"
         self.init_dtype()
-        main = fluid.Program()
-        with fluid.program_guard(main):
-            self.x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
-            self.y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
-            y_predict = fluid.layers.fc(input=self.x, size=1, act=None)
 
-            cost = fluid.layers.square_error_cost(input=y_predict, label=self.y)
+    def run_net_on_place(self, place):
+        main = fluid.Program()
+        startup = fluid.Program()
+        with fluid.program_guard(main, startup):
+            x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
+            y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
+            y_predict = fluid.layers.fc(input=x, size=1, act=None)
+            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
             avg_cost = fluid.layers.mean(cost)
-        self.program = main
-        self.fetch_list = [avg_cost]
+            sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+            sgd_optimizer.minimize(avg_cost)
 
-        sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
-        sgd_optimizer.minimize(avg_cost)
-
-    def run_net_on_place(self, place):
+        fetch_list = [avg_cost]
         train_reader = paddle.batch(
             paddle.dataset.uci_housing.train(), batch_size=BATCH_SIZE)
-        feeder = fluid.DataFeeder(place=place, feed_list=[self.x, self.y])
+        feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
         exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
+        exe.run(startup)
         for data in train_reader():
-            exe.run(self.program,
-                    feed=feeder.feed(data),
-                    fetch_list=self.fetch_list)
+            exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list)
             # the main program is runable, the datatype is fully supported
             break
 
     def init_dtype(self):
         pass
 
     def test_cpu(self):
-        self.set_network()
         place = fluid.CPUPlace()
         self.run_net_on_place(place)
 
     def test_gpu(self):
         if not core.is_compiled_with_cuda():
             return
-        self.set_network()
         place = fluid.CUDAPlace(0)
         self.run_net_on_place(place)
 

diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py
@@ -771,19 +771,17 @@ def test_update_dense_parameter_all_reduce(self):
         self.check_network_convergence(
             is_sparse=False, build_strategy=build_strategy)
 
-    # def test_update_sparse_parameter_reduce(self):
-    #     build_strategy = fluid.BuildStrategy()
-    #     build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
-    #     self.check_network_convergence(
-    #         is_sparse=True, build_strategy=build_strategy)
-    #
-    # def test_update_dense_parameter_reduce(self):
-    #     build_strategy = fluid.BuildStrategy()
-    #     build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
-    #     self.check_network_convergence(
-    #         is_sparse=False, build_strategy=build_strategy)
+    def test_update_sparse_parameter_reduce(self):
+        build_strategy = fluid.BuildStrategy()
+        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
+        self.check_network_convergence(
+            is_sparse=True, build_strategy=build_strategy)
 
-    # test fetch all the variables of global_block
+    def test_update_dense_parameter_reduce(self):
+        build_strategy = fluid.BuildStrategy()
+        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
+        self.check_network_convergence(
+            is_sparse=False, build_strategy=build_strategy)
 
 
 import paddle.dataset.flowers as flowers
@@ -849,8 +847,7 @@ def parallel_exe(self, train_inputs, seed):
                     assert not math.isnan(np.sum(ret[i])) and \
                            not math.isinf(np.sum(ret[i]))
 
-    @unittest.skip("this test is buggy")
-    def test_feed(self):
+    def test_fetch_op(self):
         tst_reader = paddle.batch(flowers.test(use_xmap=False), batch_size=16)
         tst_reader_iter = tst_reader()
-Original file line number
+Diff line change
@@ Expand Up / @@ -504,6 +504,7 @@ function main() { @@
             ;;
           capi)
             cmake_gen ${PYTHON_ABI:-""}
+            build
             gen_capi_package
             ;;
           fluid_inference_lib)
@@ Expand Down @@