From 8114cb5ecd7add0eac0595410de37c0598154dfc Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Mon, 16 Apr 2018 17:06:57 +0800
Subject: [PATCH] dash

---
 .../framework/details/reduce_and_gather.h     | 15 +++++++------
 .../framework/details/reduce_op_handle.cc     | 22 +++++++++----------
 2 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/paddle/fluid/framework/details/reduce_and_gather.h b/paddle/fluid/framework/details/reduce_and_gather.h
index a945ffb6da817..1591477356472 100644
--- a/paddle/fluid/framework/details/reduce_and_gather.h
+++ b/paddle/fluid/framework/details/reduce_and_gather.h
@@ -53,6 +53,7 @@ struct GatherSelectedRows {
   void operator()(
       const std::vector<SelectedRows> &src_selecte_rows_,
       const std::vector<platform::Place> &in_places,
+      const platform::Place &out_place,
       const std::unordered_map<platform::Place, platform::DeviceContext *,
                                platform::PlaceHash> &dev_ctxes,
       SelectedRows *dst_selecte_rows) const {
@@ -63,20 +64,20 @@ struct GatherSelectedRows {
 
     for (auto &in_sr : src_selecte_rows_) {
       in_tensors.emplace_back(in_sr.value());
-      out_rows.insert(out_rows.end(), in_sr.rows.begin(), in_sr.rows.end());
+      out_rows.insert(out_rows.end(), in_sr.rows().begin(), in_sr.rows().end());
     }
 
     auto &pre_in = src_selecte_rows_[0];
 
-    dst_tensor_.set_height(pre_in.height());
-    dst_selecte_rows.set_rows(out_rows);
+    dst_selecte_rows->set_height(pre_in.height());
+    dst_selecte_rows->set_rows(out_rows);
     size_t rows = out_rows.size();
     DDim out_dim = pre_in.GetCompleteDims();
     out_dim[0] = static_cast<int64_t>(rows);
-    dst_selecte_rows.mutable_value()->Resize(out_dim);
-    dst_selecte_rows.mutable_value()->mutable_data(out_place,
-                                                   pre_in.value().type());
-    Tensor *out_tensor = dst_selecte_rows.mutable_value();
+    dst_selecte_rows->mutable_value()->Resize(out_dim);
+    dst_selecte_rows->mutable_value()->mutable_data(out_place,
+                                                    pre_in.value().type());
+    Tensor *out_tensor = dst_selecte_rows->mutable_value();
 
     // copy
     int s = 0, e = 0;
diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc
index d67371ee3128a..5148c40d6370e 100644
--- a/paddle/fluid/framework/details/reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/reduce_op_handle.cc
@@ -61,7 +61,7 @@ void ReduceOpHandle::RunImpl() {
                     "The number of output should be one.");
 
   // Wait input done, this Wait is asynchronous operation
-  auto &in_place = in_var_handles[0]->place_;
+  auto in_place = in_var_handles[0]->place_;
   if (in_var_handles[0]->generated_op_) {
     for (auto *in : in_var_handles) {
       auto &in_p = in->place_;
@@ -103,7 +103,8 @@ void ReduceOpHandle::RunImpl() {
       in_selected_rows.emplace_back(in_sr);
     }
     auto trg = out_var->GetMutable<framework::SelectedRows>();
-    gather(in_selected_rows, in_places, dev_ctxes_, trg);
+    gather(in_selected_rows, in_places, out_var_handles[0]->place_, dev_ctxes_,
+           trg);
   } else {
     // reduce tensor
     auto pre_in = pre_in_var->Get<framework::LoDTensor>();
@@ -139,22 +140,21 @@ void ReduceOpHandle::RunImpl() {
         auto &p = in_places[i];
         auto &lod_tensor = lod_tensors[i];
 
-        void *buffer = const_cast<void *>(lod_tensor.data<void>());
+        int gpu_id = static_cast<>
 
-        if (dtype == -1) {
-          dtype = platform::ToNCCLDataType(lod_tensor.type());
-        }
-
-        T *recvbuffer = nullptr;
-        if (root == gpu_id) {
-          recvbuffer = trg->mutable_data(out_var_handles[0]->place_);
-        }
+            void *buffer = const_cast<void *>(lod_tensor.data<void>());
 
         int dev_id = boost::get<platform::CUDAPlace>(p).device;
         auto &nccl_ctx = nccl_ctxs_.at(dev_id);
         auto stream = nccl_ctx.stream();
         auto comm = nccl_ctx.comm_;
 
+        void *recvbuffer = nullptr;
+        if (root == dev_id) {
+          recvbuffer = trg->mutable_data(out_var_handles[0]->place_);
+        }
+
+        // error: get the sizeof of var.type()
         all_reduce_calls.emplace_back([=] {
           PADDLE_ENFORCE(platform::dynload::ncclReduce(
               buffer, recvbuffer, static_cast<size_t>(lod_tensor.numel()),