From fa5b154a69655484e3ae2257aa5527277ed88267 Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Wed, 4 Oct 2017 17:10:15 -0700
Subject: [PATCH 01/29] "add model format design doc"

---
 doc/design/model_format.md                    |  36 +++++
 paddle/framework/framework.proto              |   3 +
 paddle/framework/scope.cc                     |  17 +++
 paddle/framework/scope.h                      |   4 +
 paddle/framework/scope_test.cc                |  13 ++
 paddle/framework/tensor.h                     |  18 +++
 paddle/framework/tensor_impl.h                |   9 ++
 paddle/operators/save_restore_op.cc           | 143 ++++++++++++++++++
 .../framework/tests/test_save_restore_op.py   |  46 ++++++
 9 files changed, 289 insertions(+)
 create mode 100644 doc/design/model_format.md
 create mode 100644 paddle/operators/save_restore_op.cc
 create mode 100644 python/paddle/v2/framework/tests/test_save_restore_op.py

diff --git a/doc/design/model_format.md b/doc/design/model_format.md
new file mode 100644
index 0000000000000..03531d0e38b39
--- /dev/null
+++ b/doc/design/model_format.md
@@ -0,0 +1,36 @@
+# Design Doc: Model Format
+
+## Motivation
+
+The model is the output of training process. One complete model consists of two parts, namely, the **topology** and the **parameters**. To support business deployment, we need to make the model format must be self-completed and do not expose any training source code.
+
+As a result, In PaddlePaddle, the **topology** represents as a  [ProgramDesc](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/doc/design/program.md), which describes the model structure. The **parameters** contain all the trainable weights in the model, we must support large size parameter, and high efficiency read/write for speed. 
+
+## Implementation
+
+The topology is saved as a plain text, in detail, a self-complete protobuf file. 
+
+The parameters are saved as a binary file. As we all know, the protobuf message has the limits of [64M size](https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.io.coded_stream#CodedInputStream.SetTotalBytesLimit.details). So we design a particular format for tensor serialization, for speed we core dump the memory to disk and save the necessary information, such as the`dims`, `name` of the tensor, Even the `LoD` information in [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/paddle/framework/lod_tensor.md). In detail, as the table shows
+
+```text
+[offset] [type]          [value]          [description] 
+0000     32 bit integer  1            	  1 for little endian, 0 for big endian
+0004     32 bit integer  0x0000006E(0110) version number, 0110 for paddle version 0.11.0
+0008     32 bit integer  ??               md5checksum of Tensors
+0009     unsigned byte    0			   	 0 for tensor, 1 for LoDTensor
+0013     32 bit integer  28               Tensor Name length
+0017     unsigned byte   ??               Tensor Name chars 
+0018     unsigned byte   ??               ..
+...
+00100     32 bit integer  3               Tensor dims count
+00104     32 bit integer  ??              Tensor dims 0 
+00108     32 bit integer  ??               ..
+...
+00150     unsigned byte   ??              Tensor value
+00151     unsigned byte   ??               ..
+
+```
+
+## Summary
+
+We introduce the model format, the `ProgramDesc` describe the **topology**, and a particular binary format for **parameters**.
diff --git a/paddle/framework/framework.proto b/paddle/framework/framework.proto
index 951c7afbc14e2..a55d2bcf8143a 100644
--- a/paddle/framework/framework.proto
+++ b/paddle/framework/framework.proto
@@ -106,6 +106,9 @@ message LoDTensorDesc {
 message VarDesc {
   required string name = 1;
   optional LoDTensorDesc lod_tensor = 2;
+
+  // If the variable is stateful, it needs to be stored in checkpoint.
+  optional bool is_persistent = 3;
 }
 
 message BlockDesc {
diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc
index 080b4ac621c1b..3a4e4d7d284f2 100644
--- a/paddle/framework/scope.cc
+++ b/paddle/framework/scope.cc
@@ -62,5 +62,22 @@ void Scope::DropKids() {
   kids_.clear();
 }
 
+std::unordered_set<std::string> Scope::Introspection(bool recursive) const {
+  std::unordered_set<std::string> known_vars;
+
+  if (recursive) {
+    for (auto& kid : kids_) {
+      auto kid_vars = kid->Introspection();
+      for (auto& p : kid_vars) {
+        known_vars.insert(p);
+      }
+    }
+  }
+  for (auto& p : vars_) {
+    known_vars.insert(p.first);
+  }
+  return known_vars;
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h
index 7047f0d55e984..0b1a45b0df349 100644
--- a/paddle/framework/scope.h
+++ b/paddle/framework/scope.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <list>
 #include <string>
 #include <unordered_map>
+#include <unordered_set>
 
 #include "paddle/framework/variable.h"
 #include "paddle/platform/macros.h"
@@ -62,6 +63,9 @@ class Scope {
   /// Drop all kids scopes belonged to this scope.
   void DropKids();
 
+  // enumerate all the variables current contains.
+  std::unordered_set<std::string> Introspection(bool recursive = false) const;
+
  private:
   // Call Scope::NewScope for a sub-scope.
   explicit Scope(Scope const* parent) : parent_(parent) {}
diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc
index 9d51e355b0f63..9261658effd11 100644
--- a/paddle/framework/scope_test.cc
+++ b/paddle/framework/scope_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/framework/scope.h"
+#include "glog/logging.h"
 #include "gtest/gtest.h"
 
 using paddle::framework::Scope;
@@ -54,3 +55,15 @@ TEST(Scope, FindScope) {
   EXPECT_EQ(&s, s.FindScope(v));
   EXPECT_EQ(&s, ss.FindScope(v));
 }
+
+TEST(Scope, Introspection) {
+  Scope s;
+  Variable* v = s.NewVar("a");
+  EXPECT_EQ(&s, s.FindScope(v));
+  std::unordered_set<std::string> ans = s.Introspection();
+  std::string str;
+  for (auto& var : ans) {
+    str += var;
+  }
+  EXPECT_STREQ("a", str.c_str());
+}
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index 80a3f0a3935ef..352fb08ab331a 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -111,6 +111,24 @@ class Tensor {
 
   std::type_index type() const { return holder_->type(); }
 
+  /**
+   *  @brief Serialize tensor to char bytes. Please check model_format.md for
+   * detail.
+   *
+   *  @param
+   *  @return return string
+   */
+  std::string SerializeToString() const;
+
+  void SerializeToString(std::string* s) const;
+
+  /**
+   *  @brief Deserialize char bytes to tensor.
+   *  @param
+   *  @return return string
+   */
+  void DeserializeFromString(const std::string& s);
+
  private:
   template <typename T>
   inline void check_memory_size() const;
diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h
index a5405f9c31543..fc9254d37751c 100644
--- a/paddle/framework/tensor_impl.h
+++ b/paddle/framework/tensor_impl.h
@@ -162,5 +162,14 @@ inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
   return res;
 }
 
+inline std::string SerializeToString() const {
+  std::string ret;
+  return SerializeToString(s);
+}
+
+inline void SerializeToString(std::string* s) const {}
+
+inline void DeserializeFromString(const std::string& s) {}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc
new file mode 100644
index 0000000000000..ff9e3adb4038d
--- /dev/null
+++ b/paddle/operators/save_restore_op.cc
@@ -0,0 +1,143 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+
+#include <stdio.h>
+#include <string.h>
+
+namespace paddle {
+namespace operators {
+
+using framework::Tensor;
+
+class SaveOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContextBase* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInputs("X"),
+                   "Input(X) of SaveOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("absolute_path"),
+                   "Input(absolute_path) of SaveOp should not be null.");
+  }
+};
+
+class SaveOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  SaveOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X",
+             "(tensor), the tensor count can be 1~INT_MAX, tensors names which "
+             "values will be saved.")
+        .AsDuplicable()
+        .NotInGradient();
+    AddAttr<std::string>("absolute_path", "the absolute_path for save model.")
+        .NotInGradient();
+    AddComment(R"DOC(
+Save the input tensors to a tar file based on input names and absolute path.
+
+All the inputs can carry the LoD (Level of Details) information,
+or not.
+)DOC");
+  }
+};
+
+template <typename Place, typename T>
+class SaveKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto ins = context.MultiInput<std::string>("X");
+    std::string absolute_path = ctx.Attr<std::string>("absolute_path");
+
+    auto place = context.GetEigenDevice<Place>();
+    FILE* fp;
+    fp = fopen(absolute_path.c_str())
+        PADDLE_ENFORCE(fp != nullptr, "open file for model failed.");
+    int N = ins.size();
+    for (int i = 0; i < N; i++) {
+      // at present, we only support tensor serialization instead of variable
+      std::string bytes = ins[i].SerializeToString();
+      size_t count =
+          fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(), fp);
+      PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");
+    }
+  }
+};
+
+class RestoreOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContextBase* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInputs("X"),
+                   "Input(X) of RestoreOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("absolute_path"),
+                   "Input(absolute_path) of Restore Op should not be null.");
+  }
+};
+
+class RestoreOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  RestoreOpMaker(framework::OpProto* proto,
+                 framework::OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X",
+             "(tensor), the tensor count can be 1~INT_MAX, tensors names which "
+             "values will be saved.")
+        .AsDuplicable()
+        .NotInGradient();
+    AddAttr<std::string>("absolute_path", "the absolute_path for save model.")
+        .NotInGradient();
+    AddComment(R"DOC(
+Save the input tensors to a tar file based on input names and absolute path.
+
+All the inputs can carry the LoD (Level of Details) information,
+or not.
+)DOC");
+  }
+};
+
+template <typename Place, typename T>
+class RestoreKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto ins = context.MultiInput<std::string>("X");
+    std::string absolute_path = ctx.Attr<std::string>("absolute_path");
+
+    auto place = context.GetEigenDevice<Place>();
+    FILE* fp;
+    fp = fopen(absolute_path.c_str())
+        PADDLE_ENFORCE(fp != nullptr, "open file for model failed.");
+    int N = ins.size();
+    for (int i = 0; i < N; i++) {
+      // at present, we only support tensor serialization instead of variable
+      std::string bytes = ins[i].SerializeToString();
+      size_t count =
+          fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(), fp);
+      PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_WITHOUT_GRADIENT(save, ops::SaveOp, ops::SaveOpMaker);
+REGISTER_OP_CPU_KERNEL(save,
+                       ops::SaveKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_WITHOUT_GRADIENT(save, ops::SaveOp, ops::SaveOpMaker);
+REGISTER_OP_CPU_KERNEL(save,
+                       ops::RestoreKernel<paddle::platform::CPUPlace, float>);
diff --git a/python/paddle/v2/framework/tests/test_save_restore_op.py b/python/paddle/v2/framework/tests/test_save_restore_op.py
new file mode 100644
index 0000000000000..e81bcb25f54bd
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_save_restore_op.py
@@ -0,0 +1,46 @@
+import paddle.v2 as paddle
+from paddle.v2.framework import core
+from paddle.v2.framework.op import Operator
+from op_test import OpTest
+import numpy as np
+import unittest
+import os, sys
+
+ABSOLUTE_PATH = "/tmp/"  # directory for save parameter files.
+
+
+class TestSaveOp(OpTest):
+    def setUp(self):
+        self.op_type = "save"
+        x0 = np.random.random((2, 3)).astype("float32")
+        x1 = np.random.random((1, 2)).astype("float32")
+        x2 = np.random.random((2, 1)).astype("float32")
+
+        self.inputs = {
+            "X": [("x0", x0), ("x1", x1), ("x2", x2)],
+            "absolute_path": ABSOLUTE_PATH
+        }
+
+    def test_check_output(self):
+        self.check_output()
+        self.assertTrue(
+            os.path.exists(os.path.join(ABSOLUTE_PATH, "x0")),
+            " parameter not saved")
+
+
+class TestRestoreOp(OpTest):
+    def setUp(self):
+        self.op_type = "restore"
+        x0 = np.random.random((2, 3)).astype("float32")
+        x1 = np.random.random((1, 2)).astype("float32")
+        x2 = np.random.random((2, 1)).astype("float32")
+
+        self.inputs = {
+            "X": [("x0", x0), ("x1", x1), ("x2", x2)],
+            "absolute_path": ABSOLUTE_PATH
+        }
+
+    # def test_check_output(self):
+    #   self.check_output()
+    #   self.assertTrue(os.path.exists(os.path.join(ABSOLUTE_PATH, "x0")),
+    #                   " parameter not saved")

From 5c617f6562704a1801b0de3ca1626927e785f9b2 Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Wed, 4 Oct 2017 18:33:39 -0700
Subject: [PATCH 02/29] "add restore function"

---
 doc/design/model_format.md          |  5 ++--
 paddle/operators/save_restore_op.cc | 36 ++++++++++++++---------------
 2 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/doc/design/model_format.md b/doc/design/model_format.md
index 03531d0e38b39..4928cb1eb358b 100644
--- a/doc/design/model_format.md
+++ b/doc/design/model_format.md
@@ -17,7 +17,7 @@ The parameters are saved as a binary file. As we all know, the protobuf message
 0000     32 bit integer  1            	  1 for little endian, 0 for big endian
 0004     32 bit integer  0x0000006E(0110) version number, 0110 for paddle version 0.11.0
 0008     32 bit integer  ??               md5checksum of Tensors
-0009     unsigned byte    0			   	 0 for tensor, 1 for LoDTensor
+0009     unsigned byte    0	              0 for tensor, 1 for LoDTensor
 0013     32 bit integer  28               Tensor Name length
 0017     unsigned byte   ??               Tensor Name chars 
 0018     unsigned byte   ??               ..
@@ -28,9 +28,8 @@ The parameters are saved as a binary file. As we all know, the protobuf message
 ...
 00150     unsigned byte   ??              Tensor value
 00151     unsigned byte   ??               ..
-
 ```
 
 ## Summary
 
-We introduce the model format, the `ProgramDesc` describe the **topology**, and a particular binary format for **parameters**.
+We introduce the model format, the `ProgramDesc` describe the **topology**, and a bunch of particular format binary tensors describe the **parameters**.
diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc
index ff9e3adb4038d..4a88d676f3de1 100644
--- a/paddle/operators/save_restore_op.cc
+++ b/paddle/operators/save_restore_op.cc
@@ -42,10 +42,9 @@ class SaveOpMaker : public framework::OpProtoAndCheckerMaker {
              "values will be saved.")
         .AsDuplicable()
         .NotInGradient();
-    AddAttr<std::string>("absolute_path", "the absolute_path for save model.")
-        .NotInGradient();
+    AddAttr<std::string>("absolute_path", "the absolute_path for save model.");
     AddComment(R"DOC(
-Save the input tensors to a tar file based on input names and absolute path.
+Save the input tensors to a binary file based on input tensor names and absolute path.
 
 All the inputs can carry the LoD (Level of Details) information,
 or not.
@@ -53,25 +52,26 @@ or not.
   }
 };
 
-template <typename Place, typename T>
+template <typename T>
 class SaveKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto ins = context.MultiInput<std::string>("X");
+    auto ins = context.MultiInput<Tensor>("X");
     std::string absolute_path = ctx.Attr<std::string>("absolute_path");
 
-    auto place = context.GetEigenDevice<Place>();
     FILE* fp;
     fp = fopen(absolute_path.c_str())
         PADDLE_ENFORCE(fp != nullptr, "open file for model failed.");
     int N = ins.size();
     for (int i = 0; i < N; i++) {
       // at present, we only support tensor serialization instead of variable
+
       std::string bytes = ins[i].SerializeToString();
       size_t count =
           fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(), fp);
       PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");
     }
+    fclose(fp);
   }
 };
 
@@ -93,37 +93,35 @@ class RestoreOpMaker : public framework::OpProtoAndCheckerMaker {
   RestoreOpMaker(framework::OpProto* proto,
                  framework::OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X",
-             "(tensor), the tensor count can be 1~INT_MAX, tensors names which "
-             "values will be saved.")
+    AddOutput("Out",
+              "(tensor), the tensor count can be 1~INT_MAX, tensors which "
+              "values will be restores.")
         .AsDuplicable()
         .NotInGradient();
-    AddAttr<std::string>("absolute_path", "the absolute_path for save model.")
-        .NotInGradient();
+    AddAttr<std::string>("absolute_path", "the absolute_path for model file.");
     AddComment(R"DOC(
-Save the input tensors to a tar file based on input names and absolute path.
+Restore the tensors from model file based on absolute path.
 
-All the inputs can carry the LoD (Level of Details) information,
+All the tensors outputs may carry the LoD (Level of Details) information,
 or not.
 )DOC");
   }
 };
 
-template <typename Place, typename T>
+template <typename T>
 class RestoreKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto ins = context.MultiInput<std::string>("X");
+    auto outs = context.MultiOutput<std::string>("Out");
     std::string absolute_path = ctx.Attr<std::string>("absolute_path");
 
-    auto place = context.GetEigenDevice<Place>();
     FILE* fp;
     fp = fopen(absolute_path.c_str())
-        PADDLE_ENFORCE(fp != nullptr, "open file for model failed.");
-    int N = ins.size();
+        PADDLE_ENFORCE(fp != nullptr, "open model file failed.");
     for (int i = 0; i < N; i++) {
       // at present, we only support tensor serialization instead of variable
-      std::string bytes = ins[i].SerializeToString();
+
+      std::string bytes = ins[i].DeserializeFromString();
       size_t count =
           fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(), fp);
       PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");

From e63a2b347615649fbba5f466ce2a61830624190e Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Fri, 6 Oct 2017 09:41:38 -0700
Subject: [PATCH 03/29] "add parse protobuf"

---
 paddle/framework/lod_tensor.cc | 28 ++++++++++++++++++++++++++++
 paddle/framework/lod_tensor.h  | 15 +++++++++++++++
 paddle/framework/tensor.h      | 18 ------------------
 paddle/framework/tensor_impl.h |  9 ---------
 4 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 5b7badf89c171..20b403d7d3dd9 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -13,8 +13,10 @@
    limitations under the License. */
 
 #include "paddle/framework/lod_tensor.h"
+#include "paddle/framework/framework.pb.h"
 
 #include <glog/logging.h>
+#include <stdint.h>
 
 namespace paddle {
 namespace framework {
@@ -103,5 +105,31 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin,
   lod_ = new_lod;
 }
 
+std::string LoDTensor::SerializeToString() const {
+  LoDTensorDesc desc;
+  // set data_type
+  if (this->type() == typeid(bool)) desc.set_data_type(DataType::BOOL);
+  if (this->type() == typeid(int16_t)) desc.set_data_type(DataType::INT16);
+  if (this->type() == typeid(int32_t)) desc.set_data_type(DataType::INT32);
+  if (this->type() == typeid(int64_t)) desc.set_data_type(DataType::INT64);
+  // FIXME(dzh): there is no fp16 in standard c++
+  if (this->type() == typeid(int_fast16_t)) desc.set_data_type(DataType::FP16);
+  if (this->type() == typeid(float)) desc.set_data_type(DataType::FP16);
+  if (this->type() == typeid(double)) desc.set_data_type(DataType::FP16);
+
+  // set dims
+  std::vector<int64_t> dims = vectorize(this->dims());
+  for (auto& dim : dims) {
+    desc.add_dims(dim);
+  }
+
+  // set lod information
+  desc.set_lod_level(this->NumLevels());
+}
+
+void LoDTensor::SerializeToString(std::string* s) const {}
+
+void LoDTensor::DeserializeFromString(const std::string& s) {}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
index 49786a4a6635f..2576d66b0e3a4 100644
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@@ -119,6 +119,21 @@ class LoDTensor : public Tensor {
    */
   void ShrinkInLevel(size_t level, size_t elem_begin, size_t elem_end);
 
+  /**
+   *  @brief Serialize tensor to char bytes.
+   *  Please check model_format.md for the format detail.
+   *  @return return string
+   */
+  std::string SerializeToString() const;
+
+  void SerializeToString(std::string* s) const;
+
+  /**
+   *  @brief Deserialize char bytes to tensor.
+   *  @return return string
+   */
+  void DeserializeFromString(const std::string& s);
+
  private:
   LoD lod_;
 };
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index 352fb08ab331a..80a3f0a3935ef 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -111,24 +111,6 @@ class Tensor {
 
   std::type_index type() const { return holder_->type(); }
 
-  /**
-   *  @brief Serialize tensor to char bytes. Please check model_format.md for
-   * detail.
-   *
-   *  @param
-   *  @return return string
-   */
-  std::string SerializeToString() const;
-
-  void SerializeToString(std::string* s) const;
-
-  /**
-   *  @brief Deserialize char bytes to tensor.
-   *  @param
-   *  @return return string
-   */
-  void DeserializeFromString(const std::string& s);
-
  private:
   template <typename T>
   inline void check_memory_size() const;
diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h
index fc9254d37751c..a5405f9c31543 100644
--- a/paddle/framework/tensor_impl.h
+++ b/paddle/framework/tensor_impl.h
@@ -162,14 +162,5 @@ inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
   return res;
 }
 
-inline std::string SerializeToString() const {
-  std::string ret;
-  return SerializeToString(s);
-}
-
-inline void SerializeToString(std::string* s) const {}
-
-inline void DeserializeFromString(const std::string& s) {}
-
 }  // namespace framework
 }  // namespace paddle

From 425a6b69384a9af36ce578add8c1f3296b20efa6 Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Fri, 6 Oct 2017 12:10:13 -0700
Subject: [PATCH 04/29] "move necessary information to saver.proto"

---
 paddle/framework/CMakeLists.txt     |  6 ++-
 paddle/framework/framework.proto    |  4 +-
 paddle/framework/lod_tensor.cc      | 64 ++++++++++++++++++++++++++---
 paddle/framework/lod_tensor.h       |  1 +
 paddle/framework/lod_tensor_test.cc |  5 +++
 paddle/framework/saver.proto        | 43 +++++++++++++++++++
 paddle/framework/tensor.h           | 11 ++++-
 7 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 paddle/framework/saver.proto

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 5d394132b7f3d..e9c7be85a6b30 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -1,4 +1,7 @@
 # ddim lib
+proto_library(framework_proto SRCS framework.proto)
+proto_library(saver_proto SRCS framework.proto saver.proto)
+
 cc_library(ddim SRCS ddim.cc DEPS eigen3)
 cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
 nv_test(dim_test SRCS dim_test.cu DEPS ddim)
@@ -7,7 +10,7 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context)
 cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
 cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
 
-cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor)
+cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor saver_proto framework_proto)
 cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor)
 nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
 
@@ -16,7 +19,6 @@ cc_test(variable_test SRCS variable_test.cc)
 cc_library(scope SRCS scope.cc)
 cc_test(scope_test SRCS scope_test.cc DEPS scope)
 
-proto_library(framework_proto SRCS framework.proto)
 
 cc_library(attribute SRCS attribute.cc DEPS framework_proto)
 cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute)
diff --git a/paddle/framework/framework.proto b/paddle/framework/framework.proto
index a55d2bcf8143a..d696cdf5ad681 100644
--- a/paddle/framework/framework.proto
+++ b/paddle/framework/framework.proto
@@ -106,9 +106,7 @@ message LoDTensorDesc {
 message VarDesc {
   required string name = 1;
   optional LoDTensorDesc lod_tensor = 2;
-
-  // If the variable is stateful, it needs to be stored in checkpoint.
-  optional bool is_persistent = 3;
+  optional bool persistable = 3 [ default = false ];
 }
 
 message BlockDesc {
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 20b403d7d3dd9..df51c7181f315 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -13,10 +13,16 @@
    limitations under the License. */
 
 #include "paddle/framework/lod_tensor.h"
-#include "paddle/framework/framework.pb.h"
+#include "paddle/framework/saver.pb.h"
+#include "paddle/memory/memcpy.h"
+#include "paddle/memory/memory.h"
 
-#include <glog/logging.h>
 #include <stdint.h>
+#include <string.h>
+#include <algorithm>
+#include <iterator>
+
+#include <glog/logging.h>
 
 namespace paddle {
 namespace framework {
@@ -106,7 +112,8 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin,
 }
 
 std::string LoDTensor::SerializeToString() const {
-  LoDTensorDesc desc;
+  LoDTensorProto desc;
+
   // set data_type
   if (this->type() == typeid(bool)) desc.set_data_type(DataType::BOOL);
   if (this->type() == typeid(int16_t)) desc.set_data_type(DataType::INT16);
@@ -125,11 +132,56 @@ std::string LoDTensor::SerializeToString() const {
 
   // set lod information
   desc.set_lod_level(this->NumLevels());
+  std::string desc_bytes = desc.SerializeAsString();
+
+  // FIXME(dzh) : implement fix chunk size buffer.
+  size_t DESC_SIZE = desc_bytes.size();
+  size_t DATA_SIZE = holder_->size() - offset_;
+
+  const size_t BUFFER_SIZE = DESC_SIZE + DATA_SIZE + 2 * sizeof(size_t);
+  char* buffer =
+      static_cast<char*>(memory::Alloc(platform::CPUPlace(), BUFFER_SIZE));
+
+  // format: desc_size data_size, desc_bytes, data_bytes.
+  platform::Place place = holder_->place();
+  platform::Place src_place = platform::CPUPlace();
+  platform::Place dst_place = platform::CPUPlace();
+
+  memory::Copy(dst_place, buffer, src_place, &DESC_SIZE, sizeof(size_t));
+  memory::Copy(dst_place, buffer + sizeof(size_t), src_place, &DATA_SIZE,
+               sizeof(size_t));
+  memory::Copy(dst_place, buffer + sizeof(size_t) * 2, src_place,
+               desc_bytes.c_str(), desc_bytes.size());
+
+  // copy gpu data to cpu implicitly.
+  PADDLE_ENFORCE(this->numel() != 0, " Serialize a empty Tensor!");
+  int element_width = holder->size() / this->numel();
+  memory::Copy(
+      dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(), place,
+      static_cast<char*>(holder_->ptr()) + offset_ / element_width, DATA_SIZE);
+
+  return std::string(buffer, BUFFER_SIZE);
 }
 
-void LoDTensor::SerializeToString(std::string* s) const {}
-
-void LoDTensor::DeserializeFromString(const std::string& s) {}
+void LoDTensor::DeserializeFromString(const std::string& s) {
+  size_t DESC_SIZE, DATA_SIZE;
+  platform::Place src_place = platform::CPUPlace();
+  platform::Place dst_place = holder_->place();
+  memory::Copy(dst_place, &DESC_SIZE, src_place, s.c_str(), sizeof(size_t));
+  memory::Copy(dst_place, &DATA_SIZE, src_place, s.c_str() + sizeof(size_t),
+               sizeof(size_t));
+
+  // parse LoDTensorDesc
+  LoDTensorProto desc;
+  desc.ParseFromArray(s.c_str() + sizeof(size_t) * 2, DESC_SIZE);
+
+  std::vector<int64_t> dims;
+  std::copy(desc.dims.begin(), desc.dims().end(), std::back_inserter(dims));
+  this->Resize(dims);
+  auto* ptr = this->mutable_data(dst_place);
+  memory::Copy(dst_place, ptr, src_place,
+               s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE);
+}
 
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
index b1b668f2cdf13..3b9765554c6d5 100644
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@@ -122,6 +122,7 @@ class LoDTensor : public Tensor {
   /**
    *  @brief Serialize tensor to char bytes.
    *  Please check model_format.md for the format detail.
+   *  NOTE: GPUTensor will copy data to cpu implicitly.
    *  @return return string
    */
   std::string SerializeToString() const;
diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc
index 44f09f584fb75..b42fec746ba70 100644
--- a/paddle/framework/lod_tensor_test.cc
+++ b/paddle/framework/lod_tensor_test.cc
@@ -102,5 +102,10 @@ TEST_F(LoDTensorTester, ShrinkInLevel) {
   ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
 }
 
+TEST_F(LoDTensorTester, SerializeDeserialize) {
+  LoDTensor new_lod_tensor = lod_tensor_;
+  std::string s = lod_tensor_.SerializeToString();
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/saver.proto b/paddle/framework/saver.proto
new file mode 100644
index 0000000000000..5a61831751ee9
--- /dev/null
+++ b/paddle/framework/saver.proto
@@ -0,0 +1,43 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+syntax = "proto2";
+package paddle.framework;
+
+import "framework.proto";
+
+/**
+ * This file contains necessary information for tensorboard, model, checkpoint.
+ * etc.
+ */
+
+message LoDInfo { repeated int64 level = 1; }
+
+/**
+ * Save the LoDTensor Meta information through LoDTensorProto, its memory data
+ * is copyed to c buffer
+ * immediately for speed.
+ */
+message LoDTensorProto {
+  optional DataType data_type = 1;
+  repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
+  repeated LoDInfo levels = 3;
+  optional int32 lod_level = 4 [ default = 0 ];
+  optional int32 place = 5; // device information
+}
+
+message ModelProto {
+  optional int32 version = 1; // version number
+  optional int64 md5sum = 2;  // checksum
+}
\ No newline at end of file
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index 80a3f0a3935ef..8d0548db86a18 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -31,6 +31,8 @@ namespace paddle {
 
 namespace framework {
 
+class LoDTensor;
+
 class Tensor {
  public:
   template <typename T, size_t D, int MajorType, typename IndexType>
@@ -116,6 +118,8 @@ class Tensor {
   inline void check_memory_size() const;
 
  private:
+  friend class LoDTensor;
+
   /**
    * @note    Placeholder hides type T, so it doesn't appear as a template
    *          parameter of Variable.
@@ -157,7 +161,12 @@ class Tensor {
   /*! holds the memory block if allocated. */
   std::shared_ptr<Placeholder> holder_;
 
-  /*! points to dimensions of memory block. */
+  /**
+   * @brief points to elements dimensions.
+   *
+   * @note dims_ do not indicate the memory block size.
+   */
+
   DDim dims_;
 
   /**

From 2f8eb957cc87785d2ebb03693b7ef0c6eed6ce6d Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Fri, 6 Oct 2017 16:13:46 -0700
Subject: [PATCH 05/29] "format code"

---
 paddle/framework/lod_tensor.cc      | 52 ++++++++++++++++++++++++-----
 paddle/framework/lod_tensor_test.cc | 12 ++++++-
 paddle/framework/saver.proto        | 10 ++++--
 3 files changed, 61 insertions(+), 13 deletions(-)

diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index df51c7181f315..b053de4325d20 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -120,7 +120,8 @@ std::string LoDTensor::SerializeToString() const {
   if (this->type() == typeid(int32_t)) desc.set_data_type(DataType::INT32);
   if (this->type() == typeid(int64_t)) desc.set_data_type(DataType::INT64);
   // FIXME(dzh): there is no fp16 in standard c++
-  if (this->type() == typeid(int_fast16_t)) desc.set_data_type(DataType::FP16);
+  // if (this->type() == typeid(int_fast16_t))
+  // desc.set_data_type(DataType::FP16);
   if (this->type() == typeid(float)) desc.set_data_type(DataType::FP16);
   if (this->type() == typeid(double)) desc.set_data_type(DataType::FP16);
 
@@ -132,6 +133,18 @@ std::string LoDTensor::SerializeToString() const {
 
   // set lod information
   desc.set_lod_level(this->NumLevels());
+  for (int i = 0; i < this->NumLevels(); ++i) {
+    LoDInfo* lod = desc.add_levels();
+    for (int j = 0; j < lod_[i].size(); ++j) {
+      lod->add_level(this->lod_element(i, j));
+    }
+  }
+
+  // set place information
+  platform::Place place = holder_->place();
+  if (platform::is_gpu_place(place)) desc.set_place(DevicePlace::GPUPlace);
+  if (platform::is_cpu_place(place)) desc.set_place(DevicePlace::CPUPlace);
+
   std::string desc_bytes = desc.SerializeAsString();
 
   // FIXME(dzh) : implement fix chunk size buffer.
@@ -143,7 +156,6 @@ std::string LoDTensor::SerializeToString() const {
       static_cast<char*>(memory::Alloc(platform::CPUPlace(), BUFFER_SIZE));
 
   // format: desc_size data_size, desc_bytes, data_bytes.
-  platform::Place place = holder_->place();
   platform::Place src_place = platform::CPUPlace();
   platform::Place dst_place = platform::CPUPlace();
 
@@ -155,7 +167,7 @@ std::string LoDTensor::SerializeToString() const {
 
   // copy gpu data to cpu implicitly.
   PADDLE_ENFORCE(this->numel() != 0, " Serialize a empty Tensor!");
-  int element_width = holder->size() / this->numel();
+  int element_width = holder_->size() / this->numel();
   memory::Copy(
       dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(), place,
       static_cast<char*>(holder_->ptr()) + offset_ / element_width, DATA_SIZE);
@@ -166,19 +178,41 @@ std::string LoDTensor::SerializeToString() const {
 void LoDTensor::DeserializeFromString(const std::string& s) {
   size_t DESC_SIZE, DATA_SIZE;
   platform::Place src_place = platform::CPUPlace();
-  platform::Place dst_place = holder_->place();
-  memory::Copy(dst_place, &DESC_SIZE, src_place, s.c_str(), sizeof(size_t));
-  memory::Copy(dst_place, &DATA_SIZE, src_place, s.c_str() + sizeof(size_t),
+  memory::Copy(platform::CPUPlace(), &DESC_SIZE, src_place, s.c_str(),
                sizeof(size_t));
+  memory::Copy(platform::CPUPlace(), &DATA_SIZE, src_place,
+               s.c_str() + sizeof(size_t), sizeof(size_t));
 
   // parse LoDTensorDesc
   LoDTensorProto desc;
   desc.ParseFromArray(s.c_str() + sizeof(size_t) * 2, DESC_SIZE);
 
   std::vector<int64_t> dims;
-  std::copy(desc.dims.begin(), desc.dims().end(), std::back_inserter(dims));
-  this->Resize(dims);
-  auto* ptr = this->mutable_data(dst_place);
+  std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
+  this->Resize(make_ddim(dims));
+
+  platform::Place dst_place;
+  if (desc.place() == DevicePlace::CPUPlace) dst_place = platform::CPUPlace();
+  if (desc.place() == DevicePlace::GPUPlace) dst_place = platform::GPUPlace();
+
+  // parse data type
+  void* ptr;
+  if (desc.data_type() == DataType::BOOL)
+    ptr = this->mutable_data<bool>(dst_place);
+  if (desc.data_type() == DataType::INT16)
+    ptr = this->mutable_data<int16_t>(dst_place);
+  if (desc.data_type() == DataType::INT32)
+    ptr = this->mutable_data<int32_t>(dst_place);
+  if (desc.data_type() == DataType::INT64)
+    ptr = this->mutable_data<int64_t>(dst_place);
+  // FIXME(dzh): there is no fp16 in standard c++
+  // if (desc.data_type() == DataType::FP16) ptr =
+  // this->mutable_data<int_fast16_t>(dst_place);
+  if (desc.data_type() == DataType::FP32)
+    ptr = this->mutable_data<float>(dst_place);
+  if (desc.data_type() == DataType::FP64)
+    ptr = this->mutable_data<double>(dst_place);
+
   memory::Copy(dst_place, ptr, src_place,
                s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE);
 }
diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc
index b42fec746ba70..e19b2248c6c08 100644
--- a/paddle/framework/lod_tensor_test.cc
+++ b/paddle/framework/lod_tensor_test.cc
@@ -38,7 +38,10 @@ class LoDTensorTester : public ::testing::Test {
 
     lod_tensor_.Resize({20 /*batch size*/, 128 /*dim*/});
     // malloc memory
-    lod_tensor_.mutable_data<float>(place);
+    float* dst_ptr = lod_tensor_.mutable_data<float>(place);
+    for (int i = 0; i < 20 * 128; ++i) {
+      dst_ptr[i] = i;
+    }
 
     lod_tensor_.set_lod(lod);
   }
@@ -104,7 +107,14 @@ TEST_F(LoDTensorTester, ShrinkInLevel) {
 
 TEST_F(LoDTensorTester, SerializeDeserialize) {
   LoDTensor new_lod_tensor = lod_tensor_;
+  float* src_ptr = lod_tensor_.data<float>();
   std::string s = lod_tensor_.SerializeToString();
+  LoDTensor dst;
+  dst.DeserializeFromString(s);
+  float* dst_ptr = dst.data<float>();
+  for (int i = 0; i < 20 * 128, ++i) {
+    EXPECT_EQ(dst_ptr[i], src_ptr[i]);
+  }
 }
 
 }  // namespace framework
diff --git a/paddle/framework/saver.proto b/paddle/framework/saver.proto
index 5a61831751ee9..75284000859e8 100644
--- a/paddle/framework/saver.proto
+++ b/paddle/framework/saver.proto
@@ -26,15 +26,19 @@ message LoDInfo { repeated int64 level = 1; }
 
 /**
  * Save the LoDTensor Meta information through LoDTensorProto, its memory data
- * is copyed to c buffer
- * immediately for speed.
+ * is copyed to c buffer immediately.
  */
+enum DevicePlace {
+  CPUPlace = 0;
+  GPUPlace = 1;
+}
+
 message LoDTensorProto {
   optional DataType data_type = 1;
   repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
   repeated LoDInfo levels = 3;
   optional int32 lod_level = 4 [ default = 0 ];
-  optional int32 place = 5; // device information
+  optional DevicePlace place = 5; // device information
 }
 
 message ModelProto {

From f69e444646e7cc8ba3bee07d14caae86639117c3 Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Fri, 6 Oct 2017 18:08:21 -0700
Subject: [PATCH 06/29] "add gpu option"

---
 paddle/framework/CMakeLists.txt     |  2 +-
 paddle/framework/lod_tensor.cc      | 76 ++++++++++++++++++-----------
 paddle/framework/lod_tensor.h       |  4 +-
 paddle/framework/lod_tensor_test.cc |  4 +-
 paddle/framework/saver.proto        | 10 ++--
 5 files changed, 58 insertions(+), 38 deletions(-)

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index e9c7be85a6b30..29c227d91c159 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -11,7 +11,7 @@ cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
 cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
 
 cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor saver_proto framework_proto)
-cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor)
+cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor paddle_memory)
 nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
 
 cc_test(variable_test SRCS variable_test.cc)
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index b053de4325d20..2ddd9f20d51f1 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -115,15 +115,16 @@ std::string LoDTensor::SerializeToString() const {
   LoDTensorProto desc;
 
   // set data_type
-  if (this->type() == typeid(bool)) desc.set_data_type(DataType::BOOL);
+  if (this->type() == typeid(int8_t)) desc.set_data_type(DataType::BOOL);
   if (this->type() == typeid(int16_t)) desc.set_data_type(DataType::INT16);
   if (this->type() == typeid(int32_t)) desc.set_data_type(DataType::INT32);
   if (this->type() == typeid(int64_t)) desc.set_data_type(DataType::INT64);
   // FIXME(dzh): there is no fp16 in standard c++
-  // if (this->type() == typeid(int_fast16_t))
-  // desc.set_data_type(DataType::FP16);
-  if (this->type() == typeid(float)) desc.set_data_type(DataType::FP16);
-  if (this->type() == typeid(double)) desc.set_data_type(DataType::FP16);
+
+  if (this->type() == typeid(float))
+    desc.set_data_type(DataType::FP32);  // NOLINT
+  if (this->type() == typeid(double))
+    desc.set_data_type(DataType::FP64);  // NOLINT
 
   // set dims
   std::vector<int64_t> dims = vectorize(this->dims());
@@ -133,17 +134,15 @@ std::string LoDTensor::SerializeToString() const {
 
   // set lod information
   desc.set_lod_level(this->NumLevels());
-  for (int i = 0; i < this->NumLevels(); ++i) {
+  for (size_t i = 0; i < this->NumLevels(); ++i) {
     LoDInfo* lod = desc.add_levels();
-    for (int j = 0; j < lod_[i].size(); ++j) {
+    for (size_t j = 0; j < lod_[i].size(); ++j) {
       lod->add_level(this->lod_element(i, j));
     }
   }
 
   // set place information
   platform::Place place = holder_->place();
-  if (platform::is_gpu_place(place)) desc.set_place(DevicePlace::GPUPlace);
-  if (platform::is_cpu_place(place)) desc.set_place(DevicePlace::CPUPlace);
 
   std::string desc_bytes = desc.SerializeAsString();
 
@@ -156,8 +155,8 @@ std::string LoDTensor::SerializeToString() const {
       static_cast<char*>(memory::Alloc(platform::CPUPlace(), BUFFER_SIZE));
 
   // format: desc_size data_size, desc_bytes, data_bytes.
-  platform::Place src_place = platform::CPUPlace();
-  platform::Place dst_place = platform::CPUPlace();
+  platform::CPUPlace src_place;
+  platform::CPUPlace dst_place;
 
   memory::Copy(dst_place, buffer, src_place, &DESC_SIZE, sizeof(size_t));
   memory::Copy(dst_place, buffer + sizeof(size_t), src_place, &DATA_SIZE,
@@ -165,23 +164,38 @@ std::string LoDTensor::SerializeToString() const {
   memory::Copy(dst_place, buffer + sizeof(size_t) * 2, src_place,
                desc_bytes.c_str(), desc_bytes.size());
 
-  // copy gpu data to cpu implicitly.
   PADDLE_ENFORCE(this->numel() != 0, " Serialize a empty Tensor!");
+
   int element_width = holder_->size() / this->numel();
-  memory::Copy(
-      dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(), place,
-      static_cast<char*>(holder_->ptr()) + offset_ / element_width, DATA_SIZE);
+  if (platform::is_cpu_place(place)) {
+    memory::Copy(dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(),
+                 boost::get<platform::CPUPlace>(place),
+                 static_cast<char*>(holder_->ptr()) + offset_ / element_width,
+                 DATA_SIZE);
+  }
+#ifdef PADDLE_WITH_GPU
+  else if (platform::is_gpu_place(place)) {
+    memory::Copy(dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(),
+                 boost::get<platform::GPUPlace>(place),
+                 static_cast<char*>(holder_->ptr()) + offset_ / element_width,
+                 DATA_SIZE);
+  }
+#endif
 
-  return std::string(buffer, BUFFER_SIZE);
+  std::string ret(buffer, BUFFER_SIZE);
+  memory::Free(platform::CPUPlace(), buffer);
+  return ret;
 }
 
-void LoDTensor::DeserializeFromString(const std::string& s) {
+void LoDTensor::DeserializeFromString(const std::string& s,
+                                      const platform::Place& dst_place) {
   size_t DESC_SIZE, DATA_SIZE;
+  DESC_SIZE = DATA_SIZE = 100;
   platform::Place src_place = platform::CPUPlace();
-  memory::Copy(platform::CPUPlace(), &DESC_SIZE, src_place, s.c_str(),
-               sizeof(size_t));
-  memory::Copy(platform::CPUPlace(), &DATA_SIZE, src_place,
-               s.c_str() + sizeof(size_t), sizeof(size_t));
+  // memory::Copy(src_place, &DESC_SIZE, src_place, s.c_str(),
+  //              sizeof(size_t));
+  // memory::Copy(src_place, &DATA_SIZE, src_place,
+  //              s.c_str() + sizeof(size_t), sizeof(size_t));
 
   // parse LoDTensorDesc
   LoDTensorProto desc;
@@ -191,10 +205,6 @@ void LoDTensor::DeserializeFromString(const std::string& s) {
   std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
   this->Resize(make_ddim(dims));
 
-  platform::Place dst_place;
-  if (desc.place() == DevicePlace::CPUPlace) dst_place = platform::CPUPlace();
-  if (desc.place() == DevicePlace::GPUPlace) dst_place = platform::GPUPlace();
-
   // parse data type
   void* ptr;
   if (desc.data_type() == DataType::BOOL)
@@ -206,15 +216,23 @@ void LoDTensor::DeserializeFromString(const std::string& s) {
   if (desc.data_type() == DataType::INT64)
     ptr = this->mutable_data<int64_t>(dst_place);
   // FIXME(dzh): there is no fp16 in standard c++
-  // if (desc.data_type() == DataType::FP16) ptr =
-  // this->mutable_data<int_fast16_t>(dst_place);
+
   if (desc.data_type() == DataType::FP32)
     ptr = this->mutable_data<float>(dst_place);
   if (desc.data_type() == DataType::FP64)
     ptr = this->mutable_data<double>(dst_place);
 
-  memory::Copy(dst_place, ptr, src_place,
-               s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE);
+  // GPU
+  if (platform::is_cpu_place(dst_place)) {
+    memory::Copy(boost::get<platform::CPUPlace>(dst_place), ptr, src_place,
+                 s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE);
+  }
+#ifdef PADDLE_WITH_GPU
+  else if (platform::is_gpu_place(dst_place)) {
+    memory::Copy(boost::get<platform::GPUPlace>(dst_place), ptr, src_place,
+                 s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE);
+  }
+#endif
 }
 
 }  // namespace framework
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
index 3b9765554c6d5..6e4c4b2fcbe85 100644
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@@ -25,6 +25,7 @@
 #include "paddle/framework/ddim.h"
 #include "paddle/framework/tensor.h"
 #include "paddle/platform/enforce.h"
+#include "paddle/platform/place.h"
 
 namespace paddle {
 namespace framework {
@@ -133,7 +134,8 @@ class LoDTensor : public Tensor {
    *  @brief Deserialize char bytes to tensor.
    *  @return return string
    */
-  void DeserializeFromString(const std::string& s);
+  void DeserializeFromString(const std::string& s,
+                             const platform::Place& dst_place);
 
  private:
   LoD lod_;
diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc
index e19b2248c6c08..ee3b3c8a8b9c3 100644
--- a/paddle/framework/lod_tensor_test.cc
+++ b/paddle/framework/lod_tensor_test.cc
@@ -110,9 +110,9 @@ TEST_F(LoDTensorTester, SerializeDeserialize) {
   float* src_ptr = lod_tensor_.data<float>();
   std::string s = lod_tensor_.SerializeToString();
   LoDTensor dst;
-  dst.DeserializeFromString(s);
+  dst.DeserializeFromString(s, platform::CPUPlace());
   float* dst_ptr = dst.data<float>();
-  for (int i = 0; i < 20 * 128, ++i) {
+  for (int i = 0; i < 20 * 128; ++i) {
     EXPECT_EQ(dst_ptr[i], src_ptr[i]);
   }
 }
diff --git a/paddle/framework/saver.proto b/paddle/framework/saver.proto
index 75284000859e8..ff7592c1f0901 100644
--- a/paddle/framework/saver.proto
+++ b/paddle/framework/saver.proto
@@ -28,17 +28,17 @@ message LoDInfo { repeated int64 level = 1; }
  * Save the LoDTensor Meta information through LoDTensorProto, its memory data
  * is copyed to c buffer immediately.
  */
-enum DevicePlace {
-  CPUPlace = 0;
-  GPUPlace = 1;
-}
+// enum DevicePlace {
+//   CPUPlace = 0;
+//   GPUPlace = 1;
+// }
 
 message LoDTensorProto {
   optional DataType data_type = 1;
   repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
   repeated LoDInfo levels = 3;
   optional int32 lod_level = 4 [ default = 0 ];
-  optional DevicePlace place = 5; // device information
+  // optional DevicePlace place = 5; // device information
 }
 
 message ModelProto {

From 5a6e6b27bf8a1224c19e0cf711c06f14efb28dd8 Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Sun, 8 Oct 2017 23:37:09 -0700
Subject: [PATCH 07/29] "add lod info"

---
 paddle/framework/lod_tensor.cc      | 36 ++++++++++++++++++-----------
 paddle/framework/lod_tensor_test.cc |  4 ++++
 paddle/framework/saver.proto        |  5 ----
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 2ddd9f20d51f1..365912f4e6b32 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -121,10 +121,10 @@ std::string LoDTensor::SerializeToString() const {
   if (this->type() == typeid(int64_t)) desc.set_data_type(DataType::INT64);
   // FIXME(dzh): there is no fp16 in standard c++
 
-  if (this->type() == typeid(float))
-    desc.set_data_type(DataType::FP32);  // NOLINT
-  if (this->type() == typeid(double))
-    desc.set_data_type(DataType::FP64);  // NOLINT
+  if (this->type() == typeid(float))  // NOLINT
+    desc.set_data_type(DataType::FP32);
+  if (this->type() == typeid(double))  // NOLINT
+    desc.set_data_type(DataType::FP64);
 
   // set dims
   std::vector<int64_t> dims = vectorize(this->dims());
@@ -137,7 +137,7 @@ std::string LoDTensor::SerializeToString() const {
   for (size_t i = 0; i < this->NumLevels(); ++i) {
     LoDInfo* lod = desc.add_levels();
     for (size_t j = 0; j < lod_[i].size(); ++j) {
-      lod->add_level(this->lod_element(i, j));
+      lod->add_level(lod_[i][j]);
     }
   }
 
@@ -174,7 +174,7 @@ std::string LoDTensor::SerializeToString() const {
                  DATA_SIZE);
   }
 #ifdef PADDLE_WITH_GPU
-  else if (platform::is_gpu_place(place)) {
+  if (platform::is_gpu_place(place)) {
     memory::Copy(dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(),
                  boost::get<platform::GPUPlace>(place),
                  static_cast<char*>(holder_->ptr()) + offset_ / element_width,
@@ -191,11 +191,10 @@ void LoDTensor::DeserializeFromString(const std::string& s,
                                       const platform::Place& dst_place) {
   size_t DESC_SIZE, DATA_SIZE;
   DESC_SIZE = DATA_SIZE = 100;
-  platform::Place src_place = platform::CPUPlace();
-  // memory::Copy(src_place, &DESC_SIZE, src_place, s.c_str(),
-  //              sizeof(size_t));
-  // memory::Copy(src_place, &DATA_SIZE, src_place,
-  //              s.c_str() + sizeof(size_t), sizeof(size_t));
+  platform::CPUPlace src_place;
+  memory::Copy(src_place, &DESC_SIZE, src_place, s.c_str(), sizeof(size_t));
+  memory::Copy(src_place, &DATA_SIZE, src_place, s.c_str() + sizeof(size_t),
+               sizeof(size_t));
 
   // parse LoDTensorDesc
   LoDTensorProto desc;
@@ -222,13 +221,24 @@ void LoDTensor::DeserializeFromString(const std::string& s,
   if (desc.data_type() == DataType::FP64)
     ptr = this->mutable_data<double>(dst_place);
 
-  // GPU
+  LoD lod;
+  std::vector<size_t> levels;
+  for (int i = 0; i < desc.levels().size(); ++i) {
+    auto current_level = desc.levels()[i].level();
+    std::copy(current_level.begin(), current_level.end(),
+              std::back_inserter(levels));
+    lod.emplace_back(levels);
+    levels.clear();
+  }
+
+  this->set_lod(lod);
+
   if (platform::is_cpu_place(dst_place)) {
     memory::Copy(boost::get<platform::CPUPlace>(dst_place), ptr, src_place,
                  s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE);
   }
 #ifdef PADDLE_WITH_GPU
-  else if (platform::is_gpu_place(dst_place)) {
+  if (platform::is_gpu_place(dst_place)) {
     memory::Copy(boost::get<platform::GPUPlace>(dst_place), ptr, src_place,
                  s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE);
   }
diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc
index ee3b3c8a8b9c3..d8bf335b5b23f 100644
--- a/paddle/framework/lod_tensor_test.cc
+++ b/paddle/framework/lod_tensor_test.cc
@@ -115,6 +115,10 @@ TEST_F(LoDTensorTester, SerializeDeserialize) {
   for (int i = 0; i < 20 * 128; ++i) {
     EXPECT_EQ(dst_ptr[i], src_ptr[i]);
   }
+
+  ASSERT_EQ(dst.NumElements(0), 2UL);
+  ASSERT_EQ(dst.NumElements(1), 4UL);
+  ASSERT_EQ(dst.NumElements(2), 8UL);
 }
 
 }  // namespace framework
diff --git a/paddle/framework/saver.proto b/paddle/framework/saver.proto
index ff7592c1f0901..e733de3cd74c7 100644
--- a/paddle/framework/saver.proto
+++ b/paddle/framework/saver.proto
@@ -28,17 +28,12 @@ message LoDInfo { repeated int64 level = 1; }
  * Save the LoDTensor Meta information through LoDTensorProto, its memory data
  * is copyed to c buffer immediately.
  */
-// enum DevicePlace {
-//   CPUPlace = 0;
-//   GPUPlace = 1;
-// }
 
 message LoDTensorProto {
   optional DataType data_type = 1;
   repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
   repeated LoDInfo levels = 3;
   optional int32 lod_level = 4 [ default = 0 ];
-  // optional DevicePlace place = 5; // device information
 }
 
 message ModelProto {

From d111c7aea59d4fd0303e0c8a44747719b9df38e7 Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Mon, 9 Oct 2017 11:44:59 -0700
Subject: [PATCH 08/29] "add saveop python test wrapper"

---
 paddle/framework/lod_tensor.h                 |   4 +
 paddle/framework/saver.proto                  |   5 -
 paddle/operators/checkpoint_op.cc             |  49 ++++++++
 paddle/operators/save_restore_op.cc           | 116 +++++++++++-------
 .../framework/tests/test_save_restore_op.py   |  49 ++++++--
 5 files changed, 163 insertions(+), 60 deletions(-)
 create mode 100644 paddle/operators/checkpoint_op.cc

diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
index 6e4c4b2fcbe85..846de64c362a6 100644
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@@ -126,6 +126,10 @@ class LoDTensor : public Tensor {
    *  NOTE: GPUTensor will copy data to cpu implicitly.
    *  @return return string
    */
+
+  // FIXME(dzh) : currently, this interface should only use in
+  // save/restore checkpoint/model. Tensor in pserver do not use shape
+  // information. So should serialize tensor to string in the trainer.
   std::string SerializeToString() const;
 
   void SerializeToString(std::string* s) const;
diff --git a/paddle/framework/saver.proto b/paddle/framework/saver.proto
index e733de3cd74c7..4cadb3c990b4d 100644
--- a/paddle/framework/saver.proto
+++ b/paddle/framework/saver.proto
@@ -35,8 +35,3 @@ message LoDTensorProto {
   repeated LoDInfo levels = 3;
   optional int32 lod_level = 4 [ default = 0 ];
 }
-
-message ModelProto {
-  optional int32 version = 1; // version number
-  optional int64 md5sum = 2;  // checksum
-}
\ No newline at end of file
diff --git a/paddle/operators/checkpoint_op.cc b/paddle/operators/checkpoint_op.cc
new file mode 100644
index 0000000000000..5cc3cb5366282
--- /dev/null
+++ b/paddle/operators/checkpoint_op.cc
@@ -0,0 +1,49 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+using framework::Tensor;
+using framework::LoDTensor;
+
+class CheckpointOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContextBase* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("absolute_path"),
+                   "Input(absolute_path) of Checkpoint should not be null.");
+  }
+};
+
+class SaveOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  SaveOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X",
+             "(tensor), the tensor count can be 1~INT_MAX, tensors names which "
+             "values will be saved.")
+        .AsDuplicable()
+        .NotInGradient();
+    AddAttr<std::string>("absolute_path", "the absolute_path for save model.");
+    AddComment(R"DOC(
+Save the input tensors to a binary file based on input tensor names and absolute path.
+
+All the inputs can carry the LoD (Level of Details) information,
+or not.
+)DOC");
+  }
+};
diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc
index 4a88d676f3de1..c4f80661f750d 100644
--- a/paddle/operators/save_restore_op.cc
+++ b/paddle/operators/save_restore_op.cc
@@ -14,11 +14,13 @@
 
 #include <stdio.h>
 #include <string.h>
+#include <fstream>
 
 namespace paddle {
 namespace operators {
 
 using framework::Tensor;
+using framework::LoDTensor;
 
 class SaveOp : public framework::OperatorWithKernel {
  public:
@@ -28,8 +30,8 @@ class SaveOp : public framework::OperatorWithKernel {
   void InferShape(framework::InferShapeContextBase* ctx) const override {
     PADDLE_ENFORCE(ctx->HasInputs("X"),
                    "Input(X) of SaveOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("absolute_path"),
-                   "Input(absolute_path) of SaveOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("absolutePath"),
+                   "Input(absolutePath) of SaveOp should not be null.");
   }
 };
 
@@ -42,7 +44,7 @@ class SaveOpMaker : public framework::OpProtoAndCheckerMaker {
              "values will be saved.")
         .AsDuplicable()
         .NotInGradient();
-    AddAttr<std::string>("absolute_path", "the absolute_path for save model.");
+    AddAttr<std::string>("absolutePath", "the absolutePath for save model.");
     AddComment(R"DOC(
 Save the input tensors to a binary file based on input tensor names and absolute path.
 
@@ -55,23 +57,40 @@ or not.
 template <typename T>
 class SaveKernel : public framework::OpKernel<T> {
  public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto ins = context.MultiInput<Tensor>("X");
-    std::string absolute_path = ctx.Attr<std::string>("absolute_path");
-
-    FILE* fp;
-    fp = fopen(absolute_path.c_str())
-        PADDLE_ENFORCE(fp != nullptr, "open file for model failed.");
-    int N = ins.size();
-    for (int i = 0; i < N; i++) {
-      // at present, we only support tensor serialization instead of variable
-
-      std::string bytes = ins[i].SerializeToString();
-      size_t count =
-          fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(), fp);
-      PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto ins = ctx.MultiInput<LoDTensor>("X");
+    std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
+
+    // FILE* fp;
+    // fp = fopen(absolutePath.c_str(), "a");
+    // PADDLE_ENFORCE(fp != nullptr, "open file for model failed.");
+
+    std::ofstream fout(absolutePath, std::fstream::app);
+    PADDLE_ENFORCE(!fout.is_open(), "open file for model failed.");
+    for (size_t i = 0; i < ins.size(); ++i) {
+      std::string bytes = ins[i]->SerializeToString();
+      fout << bytes << '\n';
+      // size_t count =
+      //   fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(), fp);
+      // PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");
+      // PADDLE_ENFORCE(fputc('\n', fp) == 0, "write delimiter failed");
     }
-    fclose(fp);
+
+    fout.close();
+
+    //   int N = ins.size();
+    //   for (int i = 0; i < N; i++) {
+    //     // at present, we only support lodtensor serialization instead of
+    //     tensor
+
+    //     std::string bytes = ins[i]->SerializeToString();
+    //     size_t count =
+    //         fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(),
+    //         fp);
+    //     PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");
+    //     PADDLE_ENFORCE(fputc('\n', fp) == 0, "write delimiter failed");
+    //   }
+    //   fclose(fp);
   }
 };
 
@@ -81,10 +100,10 @@ class RestoreOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(framework::InferShapeContextBase* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInputs("X"),
-                   "Input(X) of RestoreOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("absolute_path"),
-                   "Input(absolute_path) of Restore Op should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutputs("Out"),
+                   "Output(X) of RestoreOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("absolutePath"),
+                   "Input(absolutePath) of Restore Op should not be null.");
   }
 };
 
@@ -98,7 +117,7 @@ class RestoreOpMaker : public framework::OpProtoAndCheckerMaker {
               "values will be restores.")
         .AsDuplicable()
         .NotInGradient();
-    AddAttr<std::string>("absolute_path", "the absolute_path for model file.");
+    AddAttr<std::string>("absolutePath", "the absolutePath for model file.");
     AddComment(R"DOC(
 Restore the tensors from model file based on absolute path.
 
@@ -108,24 +127,34 @@ or not.
   }
 };
 
-template <typename T>
+template <typename Place, typename T>
 class RestoreKernel : public framework::OpKernel<T> {
  public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto outs = context.MultiOutput<std::string>("Out");
-    std::string absolute_path = ctx.Attr<std::string>("absolute_path");
-
-    FILE* fp;
-    fp = fopen(absolute_path.c_str())
-        PADDLE_ENFORCE(fp != nullptr, "open model file failed.");
-    for (int i = 0; i < N; i++) {
-      // at present, we only support tensor serialization instead of variable
-
-      std::string bytes = ins[i].DeserializeFromString();
-      size_t count =
-          fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(), fp);
-      PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto outs = ctx.MultiOutput<LoDTensor>("Out");
+    std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
+
+    // FILE* fp;
+    // fp = fopen(absolutePath.c_str(), "r");
+    // PADDLE_ENFORCE(fp != nullptr, "open model file failed.");
+
+    std::ifstream fin(absolutePath);
+    PADDLE_ENFORCE(!fin.is_open(), "open model file failed.");
+    std::string line;
+    int i = 0;
+    while (std::getline(fin, line)) {
+      outs[i++]->DeserializeFromString(line, ctx.GetPlace());
     }
+
+    // while(fgets()) {
+    //   // at present, we only support tensor serialization instead of variable
+
+    //   std::string bytes = outs[i].DeserializeFromString();
+    //   size_t count =
+    //       fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(),
+    //       fp);
+    //   PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");
+    // }
   }
 };
 
@@ -133,9 +162,10 @@ class RestoreKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
+
 REGISTER_OP_WITHOUT_GRADIENT(save, ops::SaveOp, ops::SaveOpMaker);
-REGISTER_OP_CPU_KERNEL(save,
-                       ops::SaveKernel<paddle::platform::CPUPlace, float>);
-REGISTER_OP_WITHOUT_GRADIENT(save, ops::SaveOp, ops::SaveOpMaker);
-REGISTER_OP_CPU_KERNEL(save,
+REGISTER_OP_CPU_KERNEL(save, ops::SaveKernel<float>);
+
+REGISTER_OP_WITHOUT_GRADIENT(restore, ops::RestoreOp, ops::RestoreOpMaker);
+REGISTER_OP_CPU_KERNEL(restore,
                        ops::RestoreKernel<paddle::platform::CPUPlace, float>);
diff --git a/python/paddle/v2/framework/tests/test_save_restore_op.py b/python/paddle/v2/framework/tests/test_save_restore_op.py
index e81bcb25f54bd..d7f5e5de15f7b 100644
--- a/python/paddle/v2/framework/tests/test_save_restore_op.py
+++ b/python/paddle/v2/framework/tests/test_save_restore_op.py
@@ -6,7 +6,33 @@
 import unittest
 import os, sys
 
-ABSOLUTE_PATH = "/tmp/"  # directory for save parameter files.
+ABSOLUTE_PATH = "/tmp/PADDLE_TEST_MODEL"  # directory for save parameter files.
+
+scope = core.Scope()
+place = core.CPUPlace()
+dev_ctx = core.DeviceContext.create(place)
+
+
+# Saver is a demo for saveing arbitrary tensor
+# it is a wrapper of c++ save/restore op
+def Saver(var_list=None):
+    net = core.Net.create()
+    save_tensors = []
+    for var in var_list:
+        tensor = scope.find_var(var)
+        save_tensors.append(tensor)
+    save_op = Operator("save", X=save_tensors, absolutePath=ABSOLUTE_PATH)
+    net.append_op(save_op)
+    net.infer_shape(scope)
+    net.run(scope, dev_ctx)
+
+
+class TestSaver(unittest.TestCase):
+    def test_save_tensors(self):
+        a = scope.new_var("a")
+        b = scope.new_var("b")
+        Saver(["a", "b"])
+        self.assertTrue(os.path.exists(ABSOLUTE_PATH))
 
 
 class TestSaveOp(OpTest):
@@ -22,25 +48,24 @@ def setUp(self):
         }
 
     def test_check_output(self):
+        if os.path.exists(ABSOLUTE_PATH):
+            os.rmdir(ABSOLUTE_PATH)
         self.check_output()
-        self.assertTrue(
-            os.path.exists(os.path.join(ABSOLUTE_PATH, "x0")),
-            " parameter not saved")
+        self.assertTrue(os.path.exists(ABSOLUTE_PATH))
 
 
+# Must run savetest first
 class TestRestoreOp(OpTest):
     def setUp(self):
         self.op_type = "restore"
         x0 = np.random.random((2, 3)).astype("float32")
         x1 = np.random.random((1, 2)).astype("float32")
         x2 = np.random.random((2, 1)).astype("float32")
+        self.check_results = [x0, x1, x2]
 
-        self.inputs = {
-            "X": [("x0", x0), ("x1", x1), ("x2", x2)],
-            "absolute_path": ABSOLUTE_PATH
-        }
+        self.outputs = {"Out": [], "absolute_path": ABSOLUTE_PATH}
 
-    # def test_check_output(self):
-    #   self.check_output()
-    #   self.assertTrue(os.path.exists(os.path.join(ABSOLUTE_PATH, "x0")),
-    #                   " parameter not saved")
+    def test_check_output(self):
+        self.check_output()
+        for e in self.check_results:
+            self.assertTrue(e in self.outputs["Out"])

From 70786ee19b43316fc054622f5830a64658d0d6fe Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Mon, 9 Oct 2017 14:27:49 -0700
Subject: [PATCH 09/29] "checkpoint reuse save operator"

---
 paddle/framework/scope.cc                     | 10 ++--
 paddle/framework/scope.h                      |  4 +-
 paddle/framework/scope_test.cc                |  6 ++-
 paddle/operators/checkpoint_op.cc             | 50 ++++++++++++++-----
 paddle/operators/save_restore_op.cc           | 37 +-------------
 .../v2/framework/tests/test_checkpoint_op.py  | 32 ++++++++++++
 .../framework/tests/test_save_restore_op.py   | 22 ++++----
 7 files changed, 95 insertions(+), 66 deletions(-)
 create mode 100644 python/paddle/v2/framework/tests/test_checkpoint_op.py

diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc
index 3a4e4d7d284f2..2552433fb0364 100644
--- a/paddle/framework/scope.cc
+++ b/paddle/framework/scope.cc
@@ -62,19 +62,19 @@ void Scope::DropKids() {
   kids_.clear();
 }
 
-std::unordered_set<std::string> Scope::Introspection(bool recursive) const {
-  std::unordered_set<std::string> known_vars;
+std::vector<std::string> Scope::GetAllNames(bool recursive) const {
+  std::vector<std::string> known_vars(vars_.size());
 
   if (recursive) {
     for (auto& kid : kids_) {
-      auto kid_vars = kid->Introspection();
+      auto kid_vars = kid->GetAllNames();
       for (auto& p : kid_vars) {
-        known_vars.insert(p);
+        known_vars.emplace_back(p);
       }
     }
   }
   for (auto& p : vars_) {
-    known_vars.insert(p.first);
+    known_vars.emplace_back(p.first);
   }
   return known_vars;
 }
diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h
index 0b1a45b0df349..b585dd938d98a 100644
--- a/paddle/framework/scope.h
+++ b/paddle/framework/scope.h
@@ -17,7 +17,7 @@ limitations under the License. */
 #include <list>
 #include <string>
 #include <unordered_map>
-#include <unordered_set>
+#include <vector>
 
 #include "paddle/framework/variable.h"
 #include "paddle/platform/macros.h"
@@ -64,7 +64,7 @@ class Scope {
   void DropKids();
 
   // enumerate all the variables current contains.
-  std::unordered_set<std::string> Introspection(bool recursive = false) const;
+  std::vector<std::string> GetAllNames(bool recursive = false) const;
 
  private:
   // Call Scope::NewScope for a sub-scope.
diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc
index 9261658effd11..3cced548e7e09 100644
--- a/paddle/framework/scope_test.cc
+++ b/paddle/framework/scope_test.cc
@@ -56,14 +56,16 @@ TEST(Scope, FindScope) {
   EXPECT_EQ(&s, ss.FindScope(v));
 }
 
-TEST(Scope, Introspection) {
+TEST(Scope, GetAllNames) {
   Scope s;
   Variable* v = s.NewVar("a");
   EXPECT_EQ(&s, s.FindScope(v));
-  std::unordered_set<std::string> ans = s.Introspection();
+
+  std::vector<std::string> ans = s.GetAllNames();
   std::string str;
   for (auto& var : ans) {
     str += var;
   }
+
   EXPECT_STREQ("a", str.c_str());
 }
diff --git a/paddle/operators/checkpoint_op.cc b/paddle/operators/checkpoint_op.cc
index 5cc3cb5366282..e78550ac82971 100644
--- a/paddle/operators/checkpoint_op.cc
+++ b/paddle/operators/checkpoint_op.cc
@@ -24,26 +24,52 @@ class CheckpointOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(framework::InferShapeContextBase* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("absolute_path"),
-                   "Input(absolute_path) of Checkpoint should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("absolutePath"),
+                   "Input(absolutePath) of Checkpoint should not be null.");
   }
 };
 
-class SaveOpMaker : public framework::OpProtoAndCheckerMaker {
+class CheckpointOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
-  SaveOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
+  CheckpointOpMaker(framework::OpProto* proto,
+                    framework::OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X",
-             "(tensor), the tensor count can be 1~INT_MAX, tensors names which "
-             "values will be saved.")
-        .AsDuplicable()
-        .NotInGradient();
-    AddAttr<std::string>("absolute_path", "the absolute_path for save model.");
+    AddAttr<std::string>("absolutePath", "the absolutePath for save model.");
+    AddAttr<int>("interval",
+                 "(int, default 0) time seconds interval for saving "
+                 "checkpoint. 0 means only save chekcpoint once");
     AddComment(R"DOC(
-Save the input tensors to a binary file based on input tensor names and absolute path.
+Save the workload environment to the absolute path.
 
-All the inputs can carry the LoD (Level of Details) information,
+All the tensors can carry the LoD (Level of Details) information,
 or not.
 )DOC");
   }
 };
+
+template <typename T>
+class CheckpointKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
+    int interval = ctx.template Attr<int>("interval");
+    auto& scope = ctx.scope();
+    std::vector<std::string> ins = scope.GetAllNames();
+    std::vector<framework::Variable*> inputs;
+    for (auto& name : ins) {
+      inputs.emplace_back(scope.FindVar(name));
+    }
+
+    framework::OpRegistry::CreateOp("save", {{"X", inputs}},
+                                    {"absolutePath", absolutePath});
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_WITHOUT_GRADIENT(checkpoint, ops::CheckpointOp,
+                             ops::CheckpointOpMaker);
+REGISTER_OP_CPU_KERNEL(checkpoint, ops::CheckpointKernel<float>);
diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc
index c4f80661f750d..7bf9c19f4e652 100644
--- a/paddle/operators/save_restore_op.cc
+++ b/paddle/operators/save_restore_op.cc
@@ -61,36 +61,14 @@ class SaveKernel : public framework::OpKernel<T> {
     auto ins = ctx.MultiInput<LoDTensor>("X");
     std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
 
-    // FILE* fp;
-    // fp = fopen(absolutePath.c_str(), "a");
-    // PADDLE_ENFORCE(fp != nullptr, "open file for model failed.");
-
     std::ofstream fout(absolutePath, std::fstream::app);
     PADDLE_ENFORCE(!fout.is_open(), "open file for model failed.");
     for (size_t i = 0; i < ins.size(); ++i) {
       std::string bytes = ins[i]->SerializeToString();
       fout << bytes << '\n';
-      // size_t count =
-      //   fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(), fp);
-      // PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");
-      // PADDLE_ENFORCE(fputc('\n', fp) == 0, "write delimiter failed");
     }
 
     fout.close();
-
-    //   int N = ins.size();
-    //   for (int i = 0; i < N; i++) {
-    //     // at present, we only support lodtensor serialization instead of
-    //     tensor
-
-    //     std::string bytes = ins[i]->SerializeToString();
-    //     size_t count =
-    //         fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(),
-    //         fp);
-    //     PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");
-    //     PADDLE_ENFORCE(fputc('\n', fp) == 0, "write delimiter failed");
-    //   }
-    //   fclose(fp);
   }
 };
 
@@ -134,27 +112,14 @@ class RestoreKernel : public framework::OpKernel<T> {
     auto outs = ctx.MultiOutput<LoDTensor>("Out");
     std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
 
-    // FILE* fp;
-    // fp = fopen(absolutePath.c_str(), "r");
-    // PADDLE_ENFORCE(fp != nullptr, "open model file failed.");
-
     std::ifstream fin(absolutePath);
     PADDLE_ENFORCE(!fin.is_open(), "open model file failed.");
+
     std::string line;
     int i = 0;
     while (std::getline(fin, line)) {
       outs[i++]->DeserializeFromString(line, ctx.GetPlace());
     }
-
-    // while(fgets()) {
-    //   // at present, we only support tensor serialization instead of variable
-
-    //   std::string bytes = outs[i].DeserializeFromString();
-    //   size_t count =
-    //       fwrite(bytes.c_str(), sizeof(char), sizeof(char) * bytes.size(),
-    //       fp);
-    //   PADDLE_ENFORCE(count == bytes.size(), "write to model file failed.");
-    // }
   }
 };
 
diff --git a/python/paddle/v2/framework/tests/test_checkpoint_op.py b/python/paddle/v2/framework/tests/test_checkpoint_op.py
new file mode 100644
index 0000000000000..2d5c8c6fc1687
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_checkpoint_op.py
@@ -0,0 +1,32 @@
+import paddle.v2 as paddle
+from paddle.v2.framework import core
+from paddle.v2.framework.op import Operator
+from op_test import OpTest
+import numpy as np
+import unittest
+import os
+
+ABSOLUTE_PATH = "/tmp/PADDLE_MODEL"  # directory for save parameter files.
+
+scope = core.Scope()
+place = core.CPUPlace()
+dev_ctx = core.DeviceContext.create(place)
+
+
+class TestCheckpointOp(OpTest):
+    def setUp(self):
+        self.op_type = "checkpoint"
+        x0 = np.random.random((2, 3)).astype("float32")
+        x1 = np.random.random((1, 2)).astype("float32")
+        x2 = np.random.random((2, 1)).astype("float32")
+
+        self.attrs = {"absolute_path": ABSOLUTE_PATH, "interval": 100}
+
+    def test_check_output(self):
+        self.assertTrue(os.path.exists(ABSOLUTE_PATH))
+
+
+#TODO(dzh): add a joint testing case to checkpoint/save
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_save_restore_op.py b/python/paddle/v2/framework/tests/test_save_restore_op.py
index d7f5e5de15f7b..cd454119003e8 100644
--- a/python/paddle/v2/framework/tests/test_save_restore_op.py
+++ b/python/paddle/v2/framework/tests/test_save_restore_op.py
@@ -1,12 +1,11 @@
-import paddle.v2 as paddle
 from paddle.v2.framework import core
 from paddle.v2.framework.op import Operator
-from op_test import OpTest
 import numpy as np
+from op_test import OpTest
 import unittest
-import os, sys
+import os
 
-ABSOLUTE_PATH = "/tmp/PADDLE_TEST_MODEL"  # directory for save parameter files.
+ABSOLUTE_PATH = "/tmp/PADDLE_MODEL"  # directory for save parameter files.
 
 scope = core.Scope()
 place = core.CPUPlace()
@@ -42,10 +41,9 @@ def setUp(self):
         x1 = np.random.random((1, 2)).astype("float32")
         x2 = np.random.random((2, 1)).astype("float32")
 
-        self.inputs = {
-            "X": [("x0", x0), ("x1", x1), ("x2", x2)],
-            "absolute_path": ABSOLUTE_PATH
-        }
+        self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)], }
+
+        self.attrs = {"absolute_path": ABSOLUTE_PATH}
 
     def test_check_output(self):
         if os.path.exists(ABSOLUTE_PATH):
@@ -63,9 +61,15 @@ def setUp(self):
         x2 = np.random.random((2, 1)).astype("float32")
         self.check_results = [x0, x1, x2]
 
-        self.outputs = {"Out": [], "absolute_path": ABSOLUTE_PATH}
+        self.outputs = {"Out": []}
+
+        self.attrs = {"absolute_path": ABSOLUTE_PATH}
 
     def test_check_output(self):
         self.check_output()
         for e in self.check_results:
             self.assertTrue(e in self.outputs["Out"])
+
+
+if __name__ == "__main__":
+    unittest.main()

From 1f3126520c890470d6d177c3d802c26f06cd96e2 Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Mon, 9 Oct 2017 16:01:58 -0700
Subject: [PATCH 10/29] "rewrite model format design doc"

---
 doc/design/model_format.md                    | 27 ++++++++++---------
 paddle/framework/lod_tensor.h                 | 10 +++----
 paddle/framework/saver.proto                  |  6 ++---
 paddle/operators/checkpoint_op.cc             | 19 ++++++++++---
 paddle/operators/save_restore_op.cc           |  3 +--
 .../framework/tests/test_save_restore_op.py   |  6 +++--
 6 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/doc/design/model_format.md b/doc/design/model_format.md
index 4928cb1eb358b..491302e14a615 100644
--- a/doc/design/model_format.md
+++ b/doc/design/model_format.md
@@ -10,26 +10,27 @@ As a result, In PaddlePaddle, the **topology** represents as a  [ProgramDesc](ht
 
 The topology is saved as a plain text, in detail, a self-complete protobuf file. 
 
-The parameters are saved as a binary file. As we all know, the protobuf message has the limits of [64M size](https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.io.coded_stream#CodedInputStream.SetTotalBytesLimit.details). So we design a particular format for tensor serialization, for speed we core dump the memory to disk and save the necessary information, such as the`dims`, `name` of the tensor, Even the `LoD` information in [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/paddle/framework/lod_tensor.md). In detail, as the table shows
+The parameters are saved as a binary file. As we all know, the protobuf message has the limits of [64M size](https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.io.coded_stream#CodedInputStream.SetTotalBytesLimit.details). We do a benchmark experiment 
+
+ So we design a particular format for tensor serialization. By default, arbitrary tensor in Paddle is a [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md), and has a description information proto of (LoDTensorDesc)[https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L99]. We save the DescProto as the byte string header, it contains the necessary information, such as the `dims`, the `name` of the tensor, and the `LoD` information in [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/paddle/framework/lod_tensor.md). Tensor stores value in a continuous memory buffer, for speed we dump the raw memory to disk and save it as the byte string content. So, the binary format of one tensor is, 
+
+|HeaderLength|ContentLength|**LoDTensorDesc**|**TensorValue**|
+
+In detail, tensor's  byte view as the table shows
 
 ```text
 [offset] [type]          [value]          [description] 
-0000     32 bit integer  1            	  1 for little endian, 0 for big endian
-0004     32 bit integer  0x0000006E(0110) version number, 0110 for paddle version 0.11.0
-0008     32 bit integer  ??               md5checksum of Tensors
-0009     unsigned byte    0	              0 for tensor, 1 for LoDTensor
-0013     32 bit integer  28               Tensor Name length
-0017     unsigned byte   ??               Tensor Name chars 
-0018     unsigned byte   ??               ..
+0000     32 bit integer  ??            	  HeaderLength, the length of LoDTensorDesc
+0004     32 bit integer  ??               ContentLength, the length of LodTensor Buffer
+0008     32 bit integer  ??               TensorDesc
+0012     32 bit integer  ??               TensorDesc
 ...
-00100     32 bit integer  3               Tensor dims count
-00104     32 bit integer  ??              Tensor dims 0 
+00100     32 bit integer  ??              Tensor Value
+00104     32 bit integer  ??              Tensor Value 
 00108     32 bit integer  ??               ..
 ...
-00150     unsigned byte   ??              Tensor value
-00151     unsigned byte   ??               ..
 ```
 
 ## Summary
 
-We introduce the model format, the `ProgramDesc` describe the **topology**, and a bunch of particular format binary tensors describe the **parameters**.
+We introduce the model format, the `ProgramDesc` describe the **topology**, and a bunch of particular format binary tensors describes the **parameters**.
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
index 846de64c362a6..e69cd327437ec 100644
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@@ -127,13 +127,13 @@ class LoDTensor : public Tensor {
    *  @return return string
    */
 
-  // FIXME(dzh) : currently, this interface should only use in
-  // save/restore checkpoint/model. Tensor in pserver do not use shape
-  // information. So should serialize tensor to string in the trainer.
+  // FIXME(dzh) : Currently, this interface should only be used in
+  // save/restore model and checkpoint. ParameterServer do not use shape
+  // information to do the optimization, as a result, when we serialize
+  // parameter/gradient to string, we should serialize the tensor
+  // to string in the ps trainer instead of LoDTensor.
   std::string SerializeToString() const;
 
-  void SerializeToString(std::string* s) const;
-
   /**
    *  @brief Deserialize char bytes to tensor.
    *  @return return string
diff --git a/paddle/framework/saver.proto b/paddle/framework/saver.proto
index 4cadb3c990b4d..30f361a882539 100644
--- a/paddle/framework/saver.proto
+++ b/paddle/framework/saver.proto
@@ -18,15 +18,15 @@ package paddle.framework;
 import "framework.proto";
 
 /**
- * This file contains necessary information for tensorboard, model, checkpoint.
+ * This file contains necessary information for model, checkpoint.
  * etc.
  */
 
 message LoDInfo { repeated int64 level = 1; }
 
 /**
- * Save the LoDTensor Meta information through LoDTensorProto, its memory data
- * is copyed to c buffer immediately.
+ * Save the LoDTensorDesc information through LoDTensorProto, its data memory
+ * is copyed to c buffer immediately. See model_format.md for details.
  */
 
 message LoDTensorProto {
diff --git a/paddle/operators/checkpoint_op.cc b/paddle/operators/checkpoint_op.cc
index e78550ac82971..3f9a47922ecd3 100644
--- a/paddle/operators/checkpoint_op.cc
+++ b/paddle/operators/checkpoint_op.cc
@@ -9,9 +9,10 @@
    See the License for the specific language governing permissions and
    limitations under the License. */
 
-#include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
 
+#include <fstream>
+
 namespace paddle {
 namespace operators {
 
@@ -52,7 +53,12 @@ class CheckpointKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
-    int interval = ctx.template Attr<int>("interval");
+    // TODO(dzh) : checkpoint kernel need executor support :
+    // 1. asynchronously call of operator
+    // 2. checkpoint op need at least two thread.
+    //    Because checkpoint will happen per-interval, so need a thread wait
+    //    on the timer.
+    // int interval = ctx.template Attr<int>("interval");
     auto& scope = ctx.scope();
     std::vector<std::string> ins = scope.GetAllNames();
     std::vector<framework::Variable*> inputs;
@@ -60,8 +66,13 @@ class CheckpointKernel : public framework::OpKernel<T> {
       inputs.emplace_back(scope.FindVar(name));
     }
 
-    framework::OpRegistry::CreateOp("save", {{"X", inputs}},
-                                    {"absolutePath", absolutePath});
+    std::ofstream fout(absolutePath, std::fstream::app);
+    PADDLE_ENFORCE(!fout.is_open(), "open file for model failed.");
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      std::string bytes = inputs[i]->Get<LoDTensor>().SerializeToString();
+      fout << bytes << '\n';
+    }
+    fout.close();
   }
 };
 
diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc
index 7bf9c19f4e652..c7097a44a240e 100644
--- a/paddle/operators/save_restore_op.cc
+++ b/paddle/operators/save_restore_op.cc
@@ -12,8 +12,6 @@
 #include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
 
-#include <stdio.h>
-#include <string.h>
 #include <fstream>
 
 namespace paddle {
@@ -120,6 +118,7 @@ class RestoreKernel : public framework::OpKernel<T> {
     while (std::getline(fin, line)) {
       outs[i++]->DeserializeFromString(line, ctx.GetPlace());
     }
+    fin.close();
   }
 };
 
diff --git a/python/paddle/v2/framework/tests/test_save_restore_op.py b/python/paddle/v2/framework/tests/test_save_restore_op.py
index cd454119003e8..a9fdc638b7f3f 100644
--- a/python/paddle/v2/framework/tests/test_save_restore_op.py
+++ b/python/paddle/v2/framework/tests/test_save_restore_op.py
@@ -11,9 +11,11 @@
 place = core.CPUPlace()
 dev_ctx = core.DeviceContext.create(place)
 
+# Saver is a demo for saving arbitrary tensor.
+# TODO(dzh): Saver also need to save ProgramDesc. Should be done after
+# python API done.
+
 
-# Saver is a demo for saveing arbitrary tensor
-# it is a wrapper of c++ save/restore op
 def Saver(var_list=None):
     net = core.Net.create()
     save_tensors = []

From 1e92448c3e47ea9ec420df967c74cfedf1a5a30b Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Mon, 9 Oct 2017 16:19:19 -0700
Subject: [PATCH 11/29] "async support needed"

---
 paddle/operators/checkpoint_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/operators/checkpoint_op.cc b/paddle/operators/checkpoint_op.cc
index 3f9a47922ecd3..4bd8f54082455 100644
--- a/paddle/operators/checkpoint_op.cc
+++ b/paddle/operators/checkpoint_op.cc
@@ -57,7 +57,7 @@ class CheckpointKernel : public framework::OpKernel<T> {
     // 1. asynchronously call of operator
     // 2. checkpoint op need at least two thread.
     //    Because checkpoint will happen per-interval, so need a thread wait
-    //    on the timer.
+    //    the timer/steps to reach the condition.
     // int interval = ctx.template Attr<int>("interval");
     auto& scope = ctx.scope();
     std::vector<std::string> ins = scope.GetAllNames();

From 6d24d23bb1e8b29298ed89fca912cfe260e238c3 Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Mon, 9 Oct 2017 17:20:15 -0700
Subject: [PATCH 12/29] "fix run once"

---
 paddle/operators/checkpoint_op.cc | 32 ++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/paddle/operators/checkpoint_op.cc b/paddle/operators/checkpoint_op.cc
index 4bd8f54082455..492736cd4ed80 100644
--- a/paddle/operators/checkpoint_op.cc
+++ b/paddle/operators/checkpoint_op.cc
@@ -25,7 +25,10 @@ class CheckpointOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(framework::InferShapeContextBase* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("absolutePath"),
+    PADDLE_ENFORCE(ctx->HasInput("Step"),
+                   "Input(Step) of Checkpoint should not be null.");
+    std::string absolutePath = ctx->Attrs().Get<std::string>("absolutePath");
+    PADDLE_ENFORCE(absolutePath != "",
                    "Input(absolutePath) of Checkpoint should not be null.");
   }
 };
@@ -35,10 +38,13 @@ class CheckpointOpMaker : public framework::OpProtoAndCheckerMaker {
   CheckpointOpMaker(framework::OpProto* proto,
                     framework::OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("Step", "(Tensor), indicate the training process.");
     AddAttr<std::string>("absolutePath", "the absolutePath for save model.");
-    AddAttr<int>("interval",
-                 "(int, default 0) time seconds interval for saving "
-                 "checkpoint. 0 means only save chekcpoint once");
+    AddAttr<int>(
+        "everyNStep",
+        "(int, default 0) interval for saving checkpoint in every n step."
+        "0 means only save checkpoint once")
+        .SetDefault(0);
     AddComment(R"DOC(
 Save the workload environment to the absolute path.
 
@@ -54,12 +60,21 @@ class CheckpointKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
     // TODO(dzh) : checkpoint kernel need executor support :
-    // 1. asynchronously call of operator
+    // 1. operator should support asynchronously running
     // 2. checkpoint op need at least two thread.
-    //    Because checkpoint will happen per-interval, so need a thread wait
+    //    Because checkpoint will happen every, so need a thread wait
     //    the timer/steps to reach the condition.
-    // int interval = ctx.template Attr<int>("interval");
+    auto* Step = ctx.Input<Tensor>("Step");
+    const int* curr_step = Step->data<int>();
+    int every_n_step = ctx.template Attr<int>("everyNStep");
     auto& scope = ctx.scope();
+
+    if (every_n_step == 0 && !run_once) {
+      run_once = true;
+    }
+
+    if (run_once || *curr_step % every_n_step != 0) return;
+
     std::vector<std::string> ins = scope.GetAllNames();
     std::vector<framework::Variable*> inputs;
     for (auto& name : ins) {
@@ -74,6 +89,9 @@ class CheckpointKernel : public framework::OpKernel<T> {
     }
     fout.close();
   }
+
+  // flag indicate this op may be skipped.
+  mutable bool run_once = false;
 };
 
 }  // namespace operators

From e5974d12e073b019005cae16b7479ad19c52d8d1 Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Mon, 9 Oct 2017 19:39:53 -0700
Subject: [PATCH 13/29] "fix doc based on comments"

---
 doc/design/model_format.md | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/doc/design/model_format.md b/doc/design/model_format.md
index 491302e14a615..d0130f8a41ef1 100644
--- a/doc/design/model_format.md
+++ b/doc/design/model_format.md
@@ -4,30 +4,31 @@
 
 The model is the output of training process. One complete model consists of two parts, namely, the **topology** and the **parameters**. To support business deployment, we need to make the model format must be self-completed and do not expose any training source code.
 
-As a result, In PaddlePaddle, the **topology** represents as a  [ProgramDesc](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/doc/design/program.md), which describes the model structure. The **parameters** contain all the trainable weights in the model, we must support large size parameter, and high efficiency read/write for speed. 
+As a result, In PaddlePaddle, the **topology** represents as a  [ProgramDesc](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/doc/design/program.md), which describes the model structure. The **parameters** contain all the trainable weights in the model, we must support large size parameter, and efficient serialization/deserialization. 
 
 ## Implementation
 
-The topology is saved as a plain text, in detail, a self-complete protobuf file. 
+The topology is saved as a plain text, in detail, a self-contain protobuf file. 
 
-The parameters are saved as a binary file. As we all know, the protobuf message has the limits of [64M size](https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.io.coded_stream#CodedInputStream.SetTotalBytesLimit.details). We do a benchmark experiment 
+The parameters are saved as a binary file. As we all know, the protobuf message has the limits of [64M size](https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.io.coded_stream#CodedInputStream.SetTotalBytesLimit.details). We do a (benchmark experiment)[https://github.com/PaddlePaddle/Paddle/pull/4610], its result shows protobuf is not fit in this scene.
 
- So we design a particular format for tensor serialization. By default, arbitrary tensor in Paddle is a [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md), and has a description information proto of (LoDTensorDesc)[https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L99]. We save the DescProto as the byte string header, it contains the necessary information, such as the `dims`, the `name` of the tensor, and the `LoD` information in [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/paddle/framework/lod_tensor.md). Tensor stores value in a continuous memory buffer, for speed we dump the raw memory to disk and save it as the byte string content. So, the binary format of one tensor is, 
+As a result, we design a particular format for tensor serialization. By default, arbitrary tensor in Paddle is a [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md), and has a description information proto of (LoDTensorDesc)[https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L99]. We save the DescProto as the byte string header, it contains the necessary information, such as the `dims`, the `name` of the tensor, and the `LoD` information in [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/paddle/framework/lod_tensor.md). Tensor stores value in a continuous memory buffer, for speed we dump the raw memory to disk and save it as the byte string content. So, the binary format of one tensor is, 
 
 |HeaderLength|ContentLength|**LoDTensorDesc**|**TensorValue**|
 
-In detail, tensor's  byte view as the table shows
+In detail, tensor's  byte view as the table shows. Note that all the signed value written in little-endian.
 
 ```text
-[offset] [type]          [value]          [description] 
-0000     32 bit integer  ??            	  HeaderLength, the length of LoDTensorDesc
-0004     32 bit integer  ??               ContentLength, the length of LodTensor Buffer
-0008     32 bit integer  ??               TensorDesc
-0012     32 bit integer  ??               TensorDesc
+[offset] [type]              [description] 
+0000     32 bit integer      version number
+0004     32 bit integer      HeaderLength, the length of LoDTensorDesc
+0008     64 bit integer      ContentLength, the length of LodTensor Buffer
+0009      8 bit char         TensorDesc
+00010     8 bit char         TensorDesc
 ...
-00100     32 bit integer  ??              Tensor Value
-00104     32 bit integer  ??              Tensor Value 
-00108     32 bit integer  ??               ..
+00100     8 bit char         TensorValue
+00101     8 bit char         TensorValue
+00102     8 bit char         TensorValue              ..
 ...
 ```
 

From d756fe1dcb87125705099e7ed9e1aa7bbcd216fe Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Mon, 9 Oct 2017 20:54:38 -0700
Subject: [PATCH 14/29] "refine based on comments"

---
 paddle/framework/lod_tensor.cc    |  1 +
 paddle/operators/checkpoint_op.cc | 15 ++++++++-------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 365912f4e6b32..d09836bcbe3ff 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -14,6 +14,7 @@
 
 #include "paddle/framework/lod_tensor.h"
 #include "paddle/framework/saver.pb.h"
+
 #include "paddle/memory/memcpy.h"
 #include "paddle/memory/memory.h"
 
diff --git a/paddle/operators/checkpoint_op.cc b/paddle/operators/checkpoint_op.cc
index 492736cd4ed80..f7995c7aa2d02 100644
--- a/paddle/operators/checkpoint_op.cc
+++ b/paddle/operators/checkpoint_op.cc
@@ -28,7 +28,7 @@ class CheckpointOp : public framework::OperatorWithKernel {
     PADDLE_ENFORCE(ctx->HasInput("Step"),
                    "Input(Step) of Checkpoint should not be null.");
     std::string absolutePath = ctx->Attrs().Get<std::string>("absolutePath");
-    PADDLE_ENFORCE(absolutePath != "",
+    PADDLE_ENFORCE(!absolutePath.empty(),
                    "Input(absolutePath) of Checkpoint should not be null.");
   }
 };
@@ -64,16 +64,16 @@ class CheckpointKernel : public framework::OpKernel<T> {
     // 2. checkpoint op need at least two thread.
     //    Because checkpoint will happen every, so need a thread wait
     //    the timer/steps to reach the condition.
-    auto* Step = ctx.Input<Tensor>("Step");
-    const int* curr_step = Step->data<int>();
+    auto* step = ctx.Input<Tensor>("Step");
+    const int curr_step = *step->data<int>();
     int every_n_step = ctx.template Attr<int>("everyNStep");
     auto& scope = ctx.scope();
 
-    if (every_n_step == 0 && !run_once) {
-      run_once = true;
+    if (every_n_step == 0 && !run_once_) {
+      run_once_ = true;
     }
 
-    if (run_once || *curr_step % every_n_step != 0) return;
+    if (run_once_ || curr_step % every_n_step != 0) return;
 
     std::vector<std::string> ins = scope.GetAllNames();
     std::vector<framework::Variable*> inputs;
@@ -90,8 +90,9 @@ class CheckpointKernel : public framework::OpKernel<T> {
     fout.close();
   }
 
+ private:
   // flag indicate this op may be skipped.
-  mutable bool run_once = false;
+  mutable bool run_once_ = false;
 };
 
 }  // namespace operators

From 54819ccd333ad069b46fce00339db6ad22d09bb6 Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Mon, 9 Oct 2017 21:16:00 -0700
Subject: [PATCH 15/29] "fix based comments"

---
 doc/design/model_format.md          |  3 +--
 paddle/framework/lod_tensor.cc      | 16 ++++++++++------
 paddle/framework/lod_tensor_test.cc |  7 +++++--
 paddle/framework/saver.proto        |  1 +
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/doc/design/model_format.md b/doc/design/model_format.md
index d0130f8a41ef1..1c3ca9c4d3bef 100644
--- a/doc/design/model_format.md
+++ b/doc/design/model_format.md
@@ -20,8 +20,7 @@ In detail, tensor's  byte view as the table shows. Note that all the signed valu
 
 ```text
 [offset] [type]              [description] 
-0000     32 bit integer      version number
-0004     32 bit integer      HeaderLength, the length of LoDTensorDesc
+0004     64 bit integer      HeaderLength, the length of LoDTensorDesc
 0008     64 bit integer      ContentLength, the length of LodTensor Buffer
 0009      8 bit char         TensorDesc
 00010     8 bit char         TensorDesc
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index d09836bcbe3ff..9a315c8b630dc 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -128,10 +128,13 @@ std::string LoDTensor::SerializeToString() const {
     desc.set_data_type(DataType::FP64);
 
   // set dims
-  std::vector<int64_t> dims = vectorize(this->dims());
-  for (auto& dim : dims) {
-    desc.add_dims(dim);
+  // std::vector<int64_t> dims = vectorize(this->dims());
+  for (int i = 0; i < dims().size(); ++i) {
+    desc.add_dims(dims()[i]);
   }
+  // for (auto& dim : dims) {
+  //   desc.add_dims(dims()dwim);
+  // }
 
   // set lod information
   desc.set_lod_level(this->NumLevels());
@@ -142,8 +145,7 @@ std::string LoDTensor::SerializeToString() const {
     }
   }
 
-  // set place information
-  platform::Place place = holder_->place();
+  desc.set_version(0);
 
   std::string desc_bytes = desc.SerializeAsString();
 
@@ -167,7 +169,9 @@ std::string LoDTensor::SerializeToString() const {
 
   PADDLE_ENFORCE(this->numel() != 0, " Serialize a empty Tensor!");
 
+  platform::Place place = holder_->place();
   int element_width = holder_->size() / this->numel();
+
   if (platform::is_cpu_place(place)) {
     memory::Copy(dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(),
                  boost::get<platform::CPUPlace>(place),
@@ -191,8 +195,8 @@ std::string LoDTensor::SerializeToString() const {
 void LoDTensor::DeserializeFromString(const std::string& s,
                                       const platform::Place& dst_place) {
   size_t DESC_SIZE, DATA_SIZE;
-  DESC_SIZE = DATA_SIZE = 100;
   platform::CPUPlace src_place;
+
   memory::Copy(src_place, &DESC_SIZE, src_place, s.c_str(), sizeof(size_t));
   memory::Copy(src_place, &DATA_SIZE, src_place, s.c_str() + sizeof(size_t),
                sizeof(size_t));
diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc
index d8bf335b5b23f..5798ff5eaa99e 100644
--- a/paddle/framework/lod_tensor_test.cc
+++ b/paddle/framework/lod_tensor_test.cc
@@ -17,10 +17,13 @@
 #include <gtest/gtest.h>
 #include <algorithm>
 #include <memory>
+#include <vector>
 
 namespace paddle {
 namespace framework {
 
+const int kLodTensorSize = 20 * 128;
+
 class LoDTensorTester : public ::testing::Test {
  public:
   virtual void SetUp() override {
@@ -39,7 +42,7 @@ class LoDTensorTester : public ::testing::Test {
     lod_tensor_.Resize({20 /*batch size*/, 128 /*dim*/});
     // malloc memory
     float* dst_ptr = lod_tensor_.mutable_data<float>(place);
-    for (int i = 0; i < 20 * 128; ++i) {
+    for (int i = 0; i < kLodTensorSize; ++i) {
       dst_ptr[i] = i;
     }
 
@@ -112,7 +115,7 @@ TEST_F(LoDTensorTester, SerializeDeserialize) {
   LoDTensor dst;
   dst.DeserializeFromString(s, platform::CPUPlace());
   float* dst_ptr = dst.data<float>();
-  for (int i = 0; i < 20 * 128; ++i) {
+  for (int i = 0; i < kLodTensorSize; ++i) {
     EXPECT_EQ(dst_ptr[i], src_ptr[i]);
   }
 
diff --git a/paddle/framework/saver.proto b/paddle/framework/saver.proto
index 30f361a882539..326900473f7e9 100644
--- a/paddle/framework/saver.proto
+++ b/paddle/framework/saver.proto
@@ -34,4 +34,5 @@ message LoDTensorProto {
   repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
   repeated LoDInfo levels = 3;
   optional int32 lod_level = 4 [ default = 0 ];
+  optional int32 version = 5;
 }

From 7407afd8cf39540f15f53442869c727e7dc4365c Mon Sep 17 00:00:00 2001
From: Dong Zhihong <dzhwinter@gmail.com>
Date: Fri, 20 Oct 2017 11:21:47 -0700
Subject: [PATCH 16/29] "remove persistable flag from framework.proto"

---
 doc/design/model_format.md          | 16 ++++++++--------
 paddle/framework/lod_tensor.cc      |  4 +---
 paddle/framework/lod_tensor_test.cu | 27 +++++++++++++++++++++++++++
 paddle/framework/saver.proto        |  1 +
 paddle/operators/CMakeLists.txt     |  7 +++++++
 5 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/doc/design/model_format.md b/doc/design/model_format.md
index 1c3ca9c4d3bef..db8c36e5f5dca 100644
--- a/doc/design/model_format.md
+++ b/doc/design/model_format.md
@@ -2,7 +2,7 @@
 
 ## Motivation
 
-The model is the output of training process. One complete model consists of two parts, namely, the **topology** and the **parameters**. To support business deployment, we need to make the model format must be self-completed and do not expose any training source code.
+The model is the output of training process. One complete model consists of two parts, namely, the **topology** and the **parameters**. To support industrial deployment, we need to make the model format must be self-completed and do not expose any training source code.
 
 As a result, In PaddlePaddle, the **topology** represents as a  [ProgramDesc](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/doc/design/program.md), which describes the model structure. The **parameters** contain all the trainable weights in the model, we must support large size parameter, and efficient serialization/deserialization. 
 
@@ -20,14 +20,14 @@ In detail, tensor's  byte view as the table shows. Note that all the signed valu
 
 ```text
 [offset] [type]              [description] 
-0004     64 bit integer      HeaderLength, the length of LoDTensorDesc
-0008     64 bit integer      ContentLength, the length of LodTensor Buffer
-0009      8 bit char         TensorDesc
-00010     8 bit char         TensorDesc
+0004     4 bytes integer      HeaderLength, the length of LoDTensorDesc
+0008     4 bytes integer      ContentLength, the length of LodTensor Buffer
+0009     1 bytes char         TensorDesc
+00010    1 bytes char         TensorDesc
 ...
-00100     8 bit char         TensorValue
-00101     8 bit char         TensorValue
-00102     8 bit char         TensorValue              ..
+00100    1 bytes char         TensorValue
+00101    1 bytes char         TensorValue
+00102    1 bytes char         TensorValue              ..
 ...
 ```
 
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 9a315c8b630dc..3c940c31dd378 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -127,8 +127,6 @@ std::string LoDTensor::SerializeToString() const {
   if (this->type() == typeid(double))  // NOLINT
     desc.set_data_type(DataType::FP64);
 
-  // set dims
-  // std::vector<int64_t> dims = vectorize(this->dims());
   for (int i = 0; i < dims().size(); ++i) {
     desc.add_dims(dims()[i]);
   }
@@ -210,7 +208,7 @@ void LoDTensor::DeserializeFromString(const std::string& s,
   this->Resize(make_ddim(dims));
 
   // parse data type
-  void* ptr;
+  void* ptr = nullptr;
   if (desc.data_type() == DataType::BOOL)
     ptr = this->mutable_data<bool>(dst_place);
   if (desc.data_type() == DataType::INT16)
diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu
index 25041024cb51d..11659be02ac34 100644
--- a/paddle/framework/lod_tensor_test.cu
+++ b/paddle/framework/lod_tensor_test.cu
@@ -48,3 +48,30 @@ TEST(LoDTensor, LoDInGPU) {
     CHECK_EQ(lod[0].data()[i], src_lod[0].data()[i] * 2);
   }
 }
+
+TEST(LoDTensor, SerializeDeserialize) {
+  paddle::framework::LoDTensor lod_tensor;
+  paddle::platform::GPUPlace place(0);
+
+  paddle::framework::LoD src_lod;
+  src_lod.push_back(std::vector<size_t>{0, 2, 4, 6, 8, 10, 12, 14});
+
+  lod_tensor.Resize({14, 16});
+  lod_tensor.mutable_data<float>(place);
+
+  lod_tensor.set_lod(src_lod);
+  CHECK_EQ(lod_tensor.lod_element(0, 2).first, 4UL);
+  CHECK_EQ(lod_tensor.lod_element(0, 4).first, 8UL);
+
+  test<<<1, 8>>>(src_lod[0].data(), src_lod[0].size());
+  cudaDeviceSynchronize();
+
+  std::string s = lod_tensor.SerializeToString();
+  paddle::framework::LoDTensor dst;
+  dst.DeserializeFromString(s, place);
+  paddle::framework::LoD dst_lod = dst.lod();
+
+  for (size_t i = 0; i < dst_lod[0].size(); ++i) {
+    CHECK_EQ(src_lod[0].data()[i], dst_lod[0].data()[i] * 2);
+  }
+}
diff --git a/paddle/framework/saver.proto b/paddle/framework/saver.proto
index 326900473f7e9..90a191a6a7925 100644
--- a/paddle/framework/saver.proto
+++ b/paddle/framework/saver.proto
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 syntax = "proto2";
+option optimize_for = LITE_RUNTIME;
 package paddle.framework;
 
 import "framework.proto";
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 75fcc1cda1651..78944468f9f14 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -69,6 +69,13 @@ function(op_library TARGET)
         file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n")
     endif()
 
+    # save_restore_op contains several operators
+    if ("${TARGET}" STREQUAL "save_restore_op")
+        set(pybind_flag 1)
+        # It's enough to just adding one operator to pybind
+        file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(save);\n")
+    endif()
+
     # activation_op contains several operators
     if ("${TARGET}" STREQUAL "activation_op")
         set(pybind_flag 1)

From 15fd027160313487a73207545787442c0788e81f Mon Sep 17 00:00:00 2001
From: Dong Zhihong <dzhwinter@gmail.com>
Date: Fri, 20 Oct 2017 17:18:03 -0700
Subject: [PATCH 17/29] "add IndicateDataType to restore op"

---
 paddle/operators/checkpoint_op.cc             |  4 +-
 paddle/operators/save_restore_op.cc           | 98 ++++++++++---------
 .../framework/tests/test_save_restore_op.py   | 83 +++++++++-------
 3 files changed, 103 insertions(+), 82 deletions(-)

diff --git a/paddle/operators/checkpoint_op.cc b/paddle/operators/checkpoint_op.cc
index f7995c7aa2d02..efb79b2347d93 100644
--- a/paddle/operators/checkpoint_op.cc
+++ b/paddle/operators/checkpoint_op.cc
@@ -24,7 +24,7 @@ class CheckpointOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
  protected:
-  void InferShape(framework::InferShapeContextBase* ctx) const override {
+  void InferShape(framework::InferShapeContext* ctx) const override {
     PADDLE_ENFORCE(ctx->HasInput("Step"),
                    "Input(Step) of Checkpoint should not be null.");
     std::string absolutePath = ctx->Attrs().Get<std::string>("absolutePath");
@@ -82,7 +82,7 @@ class CheckpointKernel : public framework::OpKernel<T> {
     }
 
     std::ofstream fout(absolutePath, std::fstream::app);
-    PADDLE_ENFORCE(!fout.is_open(), "open file for model failed.");
+    PADDLE_ENFORCE(fout.is_open(), "open file for model failed.");
     for (size_t i = 0; i < inputs.size(); ++i) {
       std::string bytes = inputs[i]->Get<LoDTensor>().SerializeToString();
       fout << bytes << '\n';
diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc
index c7097a44a240e..6cbc10285ae84 100644
--- a/paddle/operators/save_restore_op.cc
+++ b/paddle/operators/save_restore_op.cc
@@ -20,15 +20,54 @@ namespace operators {
 using framework::Tensor;
 using framework::LoDTensor;
 
+template <typename T>
+class SaveKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto ins = ctx.MultiInput<LoDTensor>("X");
+    std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
+
+    std::ofstream fout(absolutePath, std::ofstream::out | std::ofstream::app);
+    VLOG(1) << " open file for save model in " << absolutePath;
+    PADDLE_ENFORCE(fout.is_open(), "open file for model failed.");
+    for (size_t i = 0; i < ins.size(); ++i) {
+      std::string bytes = ins[i]->SerializeToString();
+      fout << bytes << '\n';
+    }
+
+    fout.close();
+  }
+};
+
+template <typename Place, typename T>
+class RestoreKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto outs = ctx.MultiOutput<LoDTensor>("Out");
+    std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
+
+    std::ifstream fin(absolutePath);
+    PADDLE_ENFORCE(fin.is_open(), "open model file failed.");
+
+    std::string line;
+    int i = 0;
+    while (std::getline(fin, line)) {
+      outs[i++]->DeserializeFromString(line, ctx.GetPlace());
+    }
+    fin.close();
+  }
+};
+
 class SaveOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
 
  protected:
-  void InferShape(framework::InferShapeContextBase* ctx) const override {
+  void InferShape(framework::InferShapeContext* ctx) const override {
     PADDLE_ENFORCE(ctx->HasInputs("X"),
                    "Input(X) of SaveOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("absolutePath"),
+    auto absolutePath = ctx->Attrs().Get<std::string>("absolutePath");
+    PADDLE_ENFORCE(!absolutePath.empty(),
                    "Input(absolutePath) of SaveOp should not be null.");
   }
 };
@@ -40,8 +79,7 @@ class SaveOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("X",
              "(tensor), the tensor count can be 1~INT_MAX, tensors names which "
              "values will be saved.")
-        .AsDuplicable()
-        .NotInGradient();
+        .AsDuplicable();
     AddAttr<std::string>("absolutePath", "the absolutePath for save model.");
     AddComment(R"DOC(
 Save the input tensors to a binary file based on input tensor names and absolute path.
@@ -52,35 +90,23 @@ or not.
   }
 };
 
-template <typename T>
-class SaveKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto ins = ctx.MultiInput<LoDTensor>("X");
-    std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
-
-    std::ofstream fout(absolutePath, std::fstream::app);
-    PADDLE_ENFORCE(!fout.is_open(), "open file for model failed.");
-    for (size_t i = 0; i < ins.size(); ++i) {
-      std::string bytes = ins[i]->SerializeToString();
-      fout << bytes << '\n';
-    }
-
-    fout.close();
-  }
-};
-
 class RestoreOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
 
  protected:
-  void InferShape(framework::InferShapeContextBase* ctx) const override {
+  void InferShape(framework::InferShapeContext* ctx) const override {
     PADDLE_ENFORCE(ctx->HasOutputs("Out"),
                    "Output(X) of RestoreOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("absolutePath"),
+    auto absolutePath = ctx->Attrs().Get<std::string>("absolutePath");
+    PADDLE_ENFORCE(!absolutePath.empty(),
                    "Input(absolutePath) of Restore Op should not be null.");
   }
+
+  framework::DataType IndicateDataType(
+      const framework::ExecutionContext& ctx) const override {
+    return static_cast<framework::DataType>(Attr<int>("data_type"));
+  }
 };
 
 class RestoreOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -91,9 +117,10 @@ class RestoreOpMaker : public framework::OpProtoAndCheckerMaker {
     AddOutput("Out",
               "(tensor), the tensor count can be 1~INT_MAX, tensors which "
               "values will be restores.")
-        .AsDuplicable()
-        .NotInGradient();
+        .AsDuplicable();
     AddAttr<std::string>("absolutePath", "the absolutePath for model file.");
+    AddAttr<int>("data_type", "output tensor data type")
+        .SetDefault(framework::DataType::FP32);
     AddComment(R"DOC(
 Restore the tensors from model file based on absolute path.
 
@@ -103,25 +130,6 @@ or not.
   }
 };
 
-template <typename Place, typename T>
-class RestoreKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto outs = ctx.MultiOutput<LoDTensor>("Out");
-    std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
-
-    std::ifstream fin(absolutePath);
-    PADDLE_ENFORCE(!fin.is_open(), "open model file failed.");
-
-    std::string line;
-    int i = 0;
-    while (std::getline(fin, line)) {
-      outs[i++]->DeserializeFromString(line, ctx.GetPlace());
-    }
-    fin.close();
-  }
-};
-
 }  // namespace operators
 }  // namespace paddle
 
diff --git a/python/paddle/v2/framework/tests/test_save_restore_op.py b/python/paddle/v2/framework/tests/test_save_restore_op.py
index a9fdc638b7f3f..881c30bbc3f1c 100644
--- a/python/paddle/v2/framework/tests/test_save_restore_op.py
+++ b/python/paddle/v2/framework/tests/test_save_restore_op.py
@@ -1,9 +1,9 @@
 from paddle.v2.framework import core
 from paddle.v2.framework.op import Operator
 import numpy as np
-from op_test import OpTest
+from op_test import OpTest, create_op
 import unittest
-import os
+import os, sys
 
 ABSOLUTE_PATH = "/tmp/PADDLE_MODEL"  # directory for save parameter files.
 
@@ -15,62 +15,75 @@
 # TODO(dzh): Saver also need to save ProgramDesc. Should be done after
 # python API done.
 
-
-def Saver(var_list=None):
-    net = core.Net.create()
-    save_tensors = []
-    for var in var_list:
-        tensor = scope.find_var(var)
-        save_tensors.append(tensor)
-    save_op = Operator("save", X=save_tensors, absolutePath=ABSOLUTE_PATH)
-    net.append_op(save_op)
-    net.infer_shape(scope)
-    net.run(scope, dev_ctx)
-
-
-class TestSaver(unittest.TestCase):
-    def test_save_tensors(self):
-        a = scope.new_var("a")
-        b = scope.new_var("b")
-        Saver(["a", "b"])
-        self.assertTrue(os.path.exists(ABSOLUTE_PATH))
+# def Saver(var_list=None):
+#     net = core.Net.create()
+#     save_tensors = []
+#     # for var in var_list:
+#     #     tensor = scope.find_var(var)
+#     #     save_tensors.append(tensor)
+#     # save_op = Operator("save", X=save_tensors, absolutePath=ABSOLUTE_PATH)
+#     save_op = Operator("save", X=var_list, absolutePath=ABSOLUTE_PATH)
+#     net.append_op(save_op)
+#     net.infer_shape(scope)
+#     net.run(scope, dev_ctx)
+
+# class TestSaver(unittest.TestCase):
+#     def test_save_tensors(self):
+#         a = scope.var("a")
+#         b = scope.var("b")
+#         Saver(["a", "b"])
+#         self.assertTrue(os.path.exists(ABSOLUTE_PATH))
 
 
 class TestSaveOp(OpTest):
     def setUp(self):
         self.op_type = "save"
-        x0 = np.random.random((2, 3)).astype("float32")
-        x1 = np.random.random((1, 2)).astype("float32")
-        x2 = np.random.random((2, 1)).astype("float32")
+        x0 = np.ones((2, 3)).astype("float32")
+        x1 = np.ones((1, 2)).astype("float32")
+        x2 = np.ones((2, 1)).astype("float32")
 
         self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)], }
 
-        self.attrs = {"absolute_path": ABSOLUTE_PATH}
+        self.attrs = {"absolutePath": ABSOLUTE_PATH}
 
     def test_check_output(self):
         if os.path.exists(ABSOLUTE_PATH):
-            os.rmdir(ABSOLUTE_PATH)
+            try:
+                if os.path.isdir(ABSOLUTE_PATH):
+                    os.rmdir(ABSOLUTE_PATH)
+                elif os.path.isfile(ABSOLUTE_PATH):
+                    os.remove(ABSOLUTE_PATH)
+            except OSError:
+                pass
         self.check_output()
         self.assertTrue(os.path.exists(ABSOLUTE_PATH))
 
 
-# Must run savetest first
+# must run saveTest first
 class TestRestoreOp(OpTest):
     def setUp(self):
         self.op_type = "restore"
-        x0 = np.random.random((2, 3)).astype("float32")
-        x1 = np.random.random((1, 2)).astype("float32")
-        x2 = np.random.random((2, 1)).astype("float32")
-        self.check_results = [x0, x1, x2]
 
-        self.outputs = {"Out": []}
+        src_x0 = np.ones((2, 3)).astype("float32")
+        src_x1 = np.ones((1, 2)).astype("float32")
+        src_x2 = np.ones((2, 1)).astype("float32")
+
+        x0 = scope.var('x0').get_tensor()
+        x1 = scope.var('x1').get_tensor()
+        x2 = scope.var('x2').get_tensor()
+
+        self.src_tensors = [src_x0, src_x1, src_x2]
+        # self.outputs = {"Out": [x0, x1, x2]}
+        self.inputs = {}
+        self.outputs = {"Out": [("x0", x0), ("x1", x1), ("x2", x2)]}
 
-        self.attrs = {"absolute_path": ABSOLUTE_PATH}
+        self.attrs = {"absolutePath": ABSOLUTE_PATH}
 
     def test_check_output(self):
         self.check_output()
-        for e in self.check_results:
-            self.assertTrue(e in self.outputs["Out"])
+        # self.dst_tensors = [scope.find_var("x0"), scope.find_var("x1"), scope.find_var("x2")]
+        # for src, dst in zip(self.save_tensors, self.dst_tensors):
+        #     self.assertTrue(np.array_equal(src, dst))
 
 
 if __name__ == "__main__":

From d7e25aab63cc4895c30d85072db696e3f10dcc1a Mon Sep 17 00:00:00 2001
From: Dong Zhihong <dzhwinter@gmail.com>
Date: Sun, 22 Oct 2017 16:48:39 -0700
Subject: [PATCH 18/29] "add save test"

---
 paddle/framework/lod_tensor.cc                |  5 +-
 paddle/framework/lod_tensor_test.cc           |  2 +-
 paddle/operators/save_restore_op.cc           | 17 ++--
 .../framework/tests/test_save_restore_op.py   | 82 +++++++++++--------
 4 files changed, 61 insertions(+), 45 deletions(-)

diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 8bc39a03d3d54..bcd292a5300c4 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -139,9 +139,6 @@ std::string LoDTensor::SerializeToString() const {
   for (int i = 0; i < dims().size(); ++i) {
     desc.add_dims(dims()[i]);
   }
-  // for (auto& dim : dims) {
-  //   desc.add_dims(dims()dwim);
-  // }
 
   // set lod information
   desc.set_lod_level(this->NumLevels());
@@ -174,7 +171,7 @@ std::string LoDTensor::SerializeToString() const {
   memory::Copy(dst_place, buffer + sizeof(size_t) * 2, src_place,
                desc_bytes.c_str(), desc_bytes.size());
 
-  PADDLE_ENFORCE(this->numel() != 0, " Serialize a empty Tensor!");
+  PADDLE_ENFORCE(this->numel() != 0, "Serialize a empty Tensor!");
 
   platform::Place place = holder_->place();
   int element_width = holder_->size() / this->numel();
diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc
index b5f90ac5327ff..b984d62071745 100644
--- a/paddle/framework/lod_tensor_test.cc
+++ b/paddle/framework/lod_tensor_test.cc
@@ -119,7 +119,7 @@ TEST_F(LoDTensorTester, SerializeDeserialize) {
   }
 
   ASSERT_EQ(dst.NumElements(0), 2UL);
-  ASSERT_EQ(dst.NumElements(1), 4UL);
+  ASSERT_EQ(dst.NumElements(1), 3UL);
   ASSERT_EQ(dst.NumElements(2), 8UL);
 }
 
diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc
index 6cbc10285ae84..04465c1ecd02d 100644
--- a/paddle/operators/save_restore_op.cc
+++ b/paddle/operators/save_restore_op.cc
@@ -27,15 +27,17 @@ class SaveKernel : public framework::OpKernel<T> {
     auto ins = ctx.MultiInput<LoDTensor>("X");
     std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
 
-    std::ofstream fout(absolutePath, std::ofstream::out | std::ofstream::app);
-    VLOG(1) << " open file for save model in " << absolutePath;
-    PADDLE_ENFORCE(fout.is_open(), "open file for model failed.");
+    std::ofstream fout(absolutePath, std::ofstream::out);
+    VLOG(1) << "Open model file : " << absolutePath;
+    PADDLE_ENFORCE(fout.is_open(), "Open model file failed.");
     for (size_t i = 0; i < ins.size(); ++i) {
       std::string bytes = ins[i]->SerializeToString();
-      fout << bytes << '\n';
+      // fout << bytes << '\n';
+      fout << bytes;
     }
 
     fout.close();
+    VLOG(1) << "Save model finished. Items count : " << ins.size();
   }
 };
 
@@ -46,12 +48,15 @@ class RestoreKernel : public framework::OpKernel<T> {
     auto outs = ctx.MultiOutput<LoDTensor>("Out");
     std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
 
-    std::ifstream fin(absolutePath);
-    PADDLE_ENFORCE(fin.is_open(), "open model file failed.");
+    std::ifstream fin(absolutePath, std::ifstream::in);
+    VLOG(1) << "Open model file : " << absolutePath;
+    PADDLE_ENFORCE(fin.is_open(), "Open model file failed.");
 
     std::string line;
     int i = 0;
     while (std::getline(fin, line)) {
+      VLOG(1) << "Item " << i << " size " << line.size() << " content " << line;
+      // if (line.empty()) continue;
       outs[i++]->DeserializeFromString(line, ctx.GetPlace());
     }
     fin.close();
diff --git a/python/paddle/v2/framework/tests/test_save_restore_op.py b/python/paddle/v2/framework/tests/test_save_restore_op.py
index 881c30bbc3f1c..642aec5881bce 100644
--- a/python/paddle/v2/framework/tests/test_save_restore_op.py
+++ b/python/paddle/v2/framework/tests/test_save_restore_op.py
@@ -15,17 +15,31 @@
 # TODO(dzh): Saver also need to save ProgramDesc. Should be done after
 # python API done.
 
-# def Saver(var_list=None):
-#     net = core.Net.create()
-#     save_tensors = []
-#     # for var in var_list:
-#     #     tensor = scope.find_var(var)
-#     #     save_tensors.append(tensor)
-#     # save_op = Operator("save", X=save_tensors, absolutePath=ABSOLUTE_PATH)
-#     save_op = Operator("save", X=var_list, absolutePath=ABSOLUTE_PATH)
-#     net.append_op(save_op)
-#     net.infer_shape(scope)
-#     net.run(scope, dev_ctx)
+
+def Saver(var_list=None):
+    net = core.Net.create()
+    save_tensors = []
+    # for var in var_list:
+    #     tensor = scope.find_var(var)
+    #     save_tensors.append(tensor)
+    # save_op = Operator("save", X=save_tensors, absolutePath=ABSOLUTE_PATH)
+    save_op = Operator("save", X=var_list, absolutePath="/tmp/test_model.model")
+    net.append_op(save_op)
+    # net.complete_add_op()
+    net.run(scope, dev_ctx)
+
+
+x = np.ones((3, 10)).astype("float32")
+a = scope.var("x").get_tensor()
+a.set_dims(x.shape)
+a.set(x, place)
+b = scope.var("y").get_tensor()
+
+s = a.tobytes()
+# print s
+b.frombytes(s, place)
+
+# Saver("x")
 
 # class TestSaver(unittest.TestCase):
 #     def test_save_tensors(self):
@@ -34,29 +48,29 @@
 #         Saver(["a", "b"])
 #         self.assertTrue(os.path.exists(ABSOLUTE_PATH))
 
-
-class TestSaveOp(OpTest):
-    def setUp(self):
-        self.op_type = "save"
-        x0 = np.ones((2, 3)).astype("float32")
-        x1 = np.ones((1, 2)).astype("float32")
-        x2 = np.ones((2, 1)).astype("float32")
-
-        self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)], }
-
-        self.attrs = {"absolutePath": ABSOLUTE_PATH}
-
-    def test_check_output(self):
-        if os.path.exists(ABSOLUTE_PATH):
-            try:
-                if os.path.isdir(ABSOLUTE_PATH):
-                    os.rmdir(ABSOLUTE_PATH)
-                elif os.path.isfile(ABSOLUTE_PATH):
-                    os.remove(ABSOLUTE_PATH)
-            except OSError:
-                pass
-        self.check_output()
-        self.assertTrue(os.path.exists(ABSOLUTE_PATH))
+# class TestSaveOp(OpTest):
+#     def setUp(self):
+#         self.op_type = "save"
+#         x0 = np.ones((1, 1)).astype("float32")
+#         x1 = np.ones((1, 1)).astype("float32")
+#         # x2 = np.ones((2, 1)).astype("float32")
+
+#         # self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)], }
+#         self.inputs = {"X": [("x0", x0), ("x1", x1)], }
+
+#         self.attrs = {"absolutePath": ABSOLUTE_PATH}
+
+#     def test_check_output(self):
+#         if os.path.exists(ABSOLUTE_PATH):
+#             try:
+#                 if os.path.isdir(ABSOLUTE_PATH):
+#                     os.rmdir(ABSOLUTE_PATH)
+#                 elif os.path.isfile(ABSOLUTE_PATH):
+#                     os.remove(ABSOLUTE_PATH)
+#             except OSError:
+#                 pass
+#         self.check_output()
+#         self.assertTrue(os.path.exists(ABSOLUTE_PATH))
 
 
 # must run saveTest first

From a05883f396fad54aeb51e74757f0f34d2532638b Mon Sep 17 00:00:00 2001
From: Dong Zhihong <dzhwinter@gmail.com>
Date: Sun, 22 Oct 2017 16:53:11 -0700
Subject: [PATCH 19/29] "modify save restore code"

---
 .../framework/tests/test_save_restore_op.py   | 80 ++++++++-----------
 1 file changed, 33 insertions(+), 47 deletions(-)

diff --git a/python/paddle/v2/framework/tests/test_save_restore_op.py b/python/paddle/v2/framework/tests/test_save_restore_op.py
index 642aec5881bce..610cdc3f39bff 100644
--- a/python/paddle/v2/framework/tests/test_save_restore_op.py
+++ b/python/paddle/v2/framework/tests/test_save_restore_op.py
@@ -5,7 +5,7 @@
 import unittest
 import os, sys
 
-ABSOLUTE_PATH = "/tmp/PADDLE_MODEL"  # directory for save parameter files.
+ABSOLUTE_PATH = "/tmp/MODEL"  # directory for save parameter files.
 
 scope = core.Scope()
 place = core.CPUPlace()
@@ -19,58 +19,42 @@
 def Saver(var_list=None):
     net = core.Net.create()
     save_tensors = []
-    # for var in var_list:
-    #     tensor = scope.find_var(var)
-    #     save_tensors.append(tensor)
-    # save_op = Operator("save", X=save_tensors, absolutePath=ABSOLUTE_PATH)
-    save_op = Operator("save", X=var_list, absolutePath="/tmp/test_model.model")
+    save_op = Operator("save", X=save_tensors, absolutePath=ABSOLUTE_PATH)
     net.append_op(save_op)
-    # net.complete_add_op()
     net.run(scope, dev_ctx)
 
 
-x = np.ones((3, 10)).astype("float32")
-a = scope.var("x").get_tensor()
-a.set_dims(x.shape)
-a.set(x, place)
-b = scope.var("y").get_tensor()
+class TestSaver(unittest.TestCase):
+    def test_save_tensors(self):
+        a = scope.var("a")
+        b = scope.var("b")
+        Saver(["a", "b"])
+        self.assertTrue(os.path.exists(ABSOLUTE_PATH))
 
-s = a.tobytes()
-# print s
-b.frombytes(s, place)
 
-# Saver("x")
-
-# class TestSaver(unittest.TestCase):
-#     def test_save_tensors(self):
-#         a = scope.var("a")
-#         b = scope.var("b")
-#         Saver(["a", "b"])
-#         self.assertTrue(os.path.exists(ABSOLUTE_PATH))
-
-# class TestSaveOp(OpTest):
-#     def setUp(self):
-#         self.op_type = "save"
-#         x0 = np.ones((1, 1)).astype("float32")
-#         x1 = np.ones((1, 1)).astype("float32")
-#         # x2 = np.ones((2, 1)).astype("float32")
+class TestSaveOp(OpTest):
+    def setUp(self):
+        self.op_type = "save"
+        x0 = np.ones((1, 1)).astype("float32")
+        x1 = np.ones((1, 1)).astype("float32")
+        # x2 = np.ones((2, 1)).astype("float32")
 
-#         # self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)], }
-#         self.inputs = {"X": [("x0", x0), ("x1", x1)], }
+        # self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)], }
+        self.inputs = {"X": [("x0", x0), ("x1", x1)], }
 
-#         self.attrs = {"absolutePath": ABSOLUTE_PATH}
+        self.attrs = {"absolutePath": ABSOLUTE_PATH}
 
-#     def test_check_output(self):
-#         if os.path.exists(ABSOLUTE_PATH):
-#             try:
-#                 if os.path.isdir(ABSOLUTE_PATH):
-#                     os.rmdir(ABSOLUTE_PATH)
-#                 elif os.path.isfile(ABSOLUTE_PATH):
-#                     os.remove(ABSOLUTE_PATH)
-#             except OSError:
-#                 pass
-#         self.check_output()
-#         self.assertTrue(os.path.exists(ABSOLUTE_PATH))
+    def test_check_output(self):
+        if os.path.exists(ABSOLUTE_PATH):
+            try:
+                if os.path.isdir(ABSOLUTE_PATH):
+                    os.rmdir(ABSOLUTE_PATH)
+                elif os.path.isfile(ABSOLUTE_PATH):
+                    os.remove(ABSOLUTE_PATH)
+            except OSError:
+                pass
+        self.check_output()
+        self.assertTrue(os.path.exists(ABSOLUTE_PATH))
 
 
 # must run saveTest first
@@ -95,9 +79,11 @@ def setUp(self):
 
     def test_check_output(self):
         self.check_output()
-        # self.dst_tensors = [scope.find_var("x0"), scope.find_var("x1"), scope.find_var("x2")]
-        # for src, dst in zip(self.save_tensors, self.dst_tensors):
-        #     self.assertTrue(np.array_equal(src, dst))
+        self.dst_tensors = [
+            scope.find_var("x0"), scope.find_var("x1"), scope.find_var("x2")
+        ]
+        for src, dst in zip(self.save_tensors, self.dst_tensors):
+            self.assertTrue(np.array_equal(src, dst))
 
 
 if __name__ == "__main__":

From f918bfc7da5ed11cef4fea6dc5f735eb53538580 Mon Sep 17 00:00:00 2001
From: Dong Zhihong <dzhwinter@gmail.com>
Date: Sun, 22 Oct 2017 17:25:22 -0700
Subject: [PATCH 20/29] "modified the restore logic"

---
 paddle/framework/lod_tensor.cc                | 12 +++++----
 paddle/operators/save_restore_op.cc           | 26 ++++++++++++++-----
 .../framework/tests/test_save_restore_op.py   |  6 ++---
 3 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index bcd292a5300c4..f53dd1c1858b4 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -165,8 +165,8 @@ std::string LoDTensor::SerializeToString() const {
   platform::CPUPlace src_place;
   platform::CPUPlace dst_place;
 
-  memory::Copy(dst_place, buffer, src_place, &DESC_SIZE, sizeof(size_t));
-  memory::Copy(dst_place, buffer + sizeof(size_t), src_place, &DATA_SIZE,
+  memory::Copy(dst_place, buffer, src_place, &BUFFER_SIZE, sizeof(size_t));
+  memory::Copy(dst_place, buffer + sizeof(size_t), src_place, &DESC_SIZE,
                sizeof(size_t));
   memory::Copy(dst_place, buffer + sizeof(size_t) * 2, src_place,
                desc_bytes.c_str(), desc_bytes.size());
@@ -198,13 +198,15 @@ std::string LoDTensor::SerializeToString() const {
 
 void LoDTensor::DeserializeFromString(const std::string& s,
                                       const platform::Place& dst_place) {
-  size_t DESC_SIZE, DATA_SIZE;
+  size_t DESC_SIZE, BUFFER_SIZE;
   platform::CPUPlace src_place;
 
-  memory::Copy(src_place, &DESC_SIZE, src_place, s.c_str(), sizeof(size_t));
-  memory::Copy(src_place, &DATA_SIZE, src_place, s.c_str() + sizeof(size_t),
+  memory::Copy(src_place, &BUFFER_SIZE, src_place, s.c_str(), sizeof(size_t));
+  memory::Copy(src_place, &DESC_SIZE, src_place, s.c_str() + sizeof(size_t),
                sizeof(size_t));
 
+  const size_t DATA_SIZE = BUFFER_SIZE - DESC_SIZE - sizeof(size_t) * 2;
+
   // parse LoDTensorDesc
   LoDTensorProto desc;
   desc.ParseFromArray(s.c_str() + sizeof(size_t) * 2, DESC_SIZE);
diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc
index 04465c1ecd02d..e3d6b7b5c73d8 100644
--- a/paddle/operators/save_restore_op.cc
+++ b/paddle/operators/save_restore_op.cc
@@ -32,7 +32,6 @@ class SaveKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE(fout.is_open(), "Open model file failed.");
     for (size_t i = 0; i < ins.size(); ++i) {
       std::string bytes = ins[i]->SerializeToString();
-      // fout << bytes << '\n';
       fout << bytes;
     }
 
@@ -52,12 +51,25 @@ class RestoreKernel : public framework::OpKernel<T> {
     VLOG(1) << "Open model file : " << absolutePath;
     PADDLE_ENFORCE(fin.is_open(), "Open model file failed.");
 
-    std::string line;
-    int i = 0;
-    while (std::getline(fin, line)) {
-      VLOG(1) << "Item " << i << " size " << line.size() << " content " << line;
-      // if (line.empty()) continue;
-      outs[i++]->DeserializeFromString(line, ctx.GetPlace());
+    int tensor_idx = 0;
+    while (fin) {
+      std::string line;
+      size_t tensor_size = 0;
+      const size_t kBufferSize =
+          4096;  // read chuck by chuck for switch pageing.
+      char buffer[kBufferSize];
+
+      if (fin.read((char*)(tensor_size), sizeof(size_t))) {
+        size_t read_size = std::min(kBufferSize, tensor_size);
+        tensor_size -= kBufferSize;
+        while (read_size > 0 && fin.read(buffer, read_size)) {
+          line.append(buffer, read_size);
+        }
+      }
+      PADDLE_ENFORCE(tensor_size == line.size(), "Read tensor error.");
+      VLOG(1) << "Item " << tensor_idx << " size " << line.size() << " content "
+              << line;
+      outs[tensor_idx++]->DeserializeFromString(line, ctx.GetPlace());
     }
     fin.close();
   }
diff --git a/python/paddle/v2/framework/tests/test_save_restore_op.py b/python/paddle/v2/framework/tests/test_save_restore_op.py
index 610cdc3f39bff..28cbcddf35415 100644
--- a/python/paddle/v2/framework/tests/test_save_restore_op.py
+++ b/python/paddle/v2/framework/tests/test_save_restore_op.py
@@ -37,10 +37,9 @@ def setUp(self):
         self.op_type = "save"
         x0 = np.ones((1, 1)).astype("float32")
         x1 = np.ones((1, 1)).astype("float32")
-        # x2 = np.ones((2, 1)).astype("float32")
+        x2 = np.ones((2, 1)).astype("float32")
 
-        # self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)], }
-        self.inputs = {"X": [("x0", x0), ("x1", x1)], }
+        self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)], }
 
         self.attrs = {"absolutePath": ABSOLUTE_PATH}
 
@@ -71,7 +70,6 @@ def setUp(self):
         x2 = scope.var('x2').get_tensor()
 
         self.src_tensors = [src_x0, src_x1, src_x2]
-        # self.outputs = {"Out": [x0, x1, x2]}
         self.inputs = {}
         self.outputs = {"Out": [("x0", x0), ("x1", x1), ("x2", x2)]}
 

From feb23f4f121fd96ab9e83b50752ceaca21a90a9f Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 23 Oct 2017 14:34:29 -0700
Subject: [PATCH 21/29] rm checkpoint_op.cc

---
 paddle/operators/checkpoint_op.cc | 105 ------------------------------
 1 file changed, 105 deletions(-)
 delete mode 100644 paddle/operators/checkpoint_op.cc

diff --git a/paddle/operators/checkpoint_op.cc b/paddle/operators/checkpoint_op.cc
deleted file mode 100644
index efb79b2347d93..0000000000000
--- a/paddle/operators/checkpoint_op.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-   http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
-
-#include "paddle/framework/op_registry.h"
-
-#include <fstream>
-
-namespace paddle {
-namespace operators {
-
-using framework::Tensor;
-using framework::LoDTensor;
-
-class CheckpointOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("Step"),
-                   "Input(Step) of Checkpoint should not be null.");
-    std::string absolutePath = ctx->Attrs().Get<std::string>("absolutePath");
-    PADDLE_ENFORCE(!absolutePath.empty(),
-                   "Input(absolutePath) of Checkpoint should not be null.");
-  }
-};
-
-class CheckpointOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  CheckpointOpMaker(framework::OpProto* proto,
-                    framework::OpAttrChecker* op_checker)
-      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("Step", "(Tensor), indicate the training process.");
-    AddAttr<std::string>("absolutePath", "the absolutePath for save model.");
-    AddAttr<int>(
-        "everyNStep",
-        "(int, default 0) interval for saving checkpoint in every n step."
-        "0 means only save checkpoint once")
-        .SetDefault(0);
-    AddComment(R"DOC(
-Save the workload environment to the absolute path.
-
-All the tensors can carry the LoD (Level of Details) information,
-or not.
-)DOC");
-  }
-};
-
-template <typename T>
-class CheckpointKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
-    // TODO(dzh) : checkpoint kernel need executor support :
-    // 1. operator should support asynchronously running
-    // 2. checkpoint op need at least two thread.
-    //    Because checkpoint will happen every, so need a thread wait
-    //    the timer/steps to reach the condition.
-    auto* step = ctx.Input<Tensor>("Step");
-    const int curr_step = *step->data<int>();
-    int every_n_step = ctx.template Attr<int>("everyNStep");
-    auto& scope = ctx.scope();
-
-    if (every_n_step == 0 && !run_once_) {
-      run_once_ = true;
-    }
-
-    if (run_once_ || curr_step % every_n_step != 0) return;
-
-    std::vector<std::string> ins = scope.GetAllNames();
-    std::vector<framework::Variable*> inputs;
-    for (auto& name : ins) {
-      inputs.emplace_back(scope.FindVar(name));
-    }
-
-    std::ofstream fout(absolutePath, std::fstream::app);
-    PADDLE_ENFORCE(fout.is_open(), "open file for model failed.");
-    for (size_t i = 0; i < inputs.size(); ++i) {
-      std::string bytes = inputs[i]->Get<LoDTensor>().SerializeToString();
-      fout << bytes << '\n';
-    }
-    fout.close();
-  }
-
- private:
-  // flag indicate this op may be skipped.
-  mutable bool run_once_ = false;
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-REGISTER_OP_WITHOUT_GRADIENT(checkpoint, ops::CheckpointOp,
-                             ops::CheckpointOpMaker);
-REGISTER_OP_CPU_KERNEL(checkpoint, ops::CheckpointKernel<float>);

From c5e6307f61b1743d7cab50a1a5ea691a01d7362e Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 23 Oct 2017 14:35:50 -0700
Subject: [PATCH 22/29] rm test_checkpoint_op.py

---
 .../v2/framework/tests/test_checkpoint_op.py  | 32 -------------------
 1 file changed, 32 deletions(-)
 delete mode 100644 python/paddle/v2/framework/tests/test_checkpoint_op.py

diff --git a/python/paddle/v2/framework/tests/test_checkpoint_op.py b/python/paddle/v2/framework/tests/test_checkpoint_op.py
deleted file mode 100644
index 2d5c8c6fc1687..0000000000000
--- a/python/paddle/v2/framework/tests/test_checkpoint_op.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import paddle.v2 as paddle
-from paddle.v2.framework import core
-from paddle.v2.framework.op import Operator
-from op_test import OpTest
-import numpy as np
-import unittest
-import os
-
-ABSOLUTE_PATH = "/tmp/PADDLE_MODEL"  # directory for save parameter files.
-
-scope = core.Scope()
-place = core.CPUPlace()
-dev_ctx = core.DeviceContext.create(place)
-
-
-class TestCheckpointOp(OpTest):
-    def setUp(self):
-        self.op_type = "checkpoint"
-        x0 = np.random.random((2, 3)).astype("float32")
-        x1 = np.random.random((1, 2)).astype("float32")
-        x2 = np.random.random((2, 1)).astype("float32")
-
-        self.attrs = {"absolute_path": ABSOLUTE_PATH, "interval": 100}
-
-    def test_check_output(self):
-        self.assertTrue(os.path.exists(ABSOLUTE_PATH))
-
-
-#TODO(dzh): add a joint testing case to checkpoint/save
-
-if __name__ == "__main__":
-    unittest.main()

From 0961597391ebda1b9c474b893552697fa137ab4f Mon Sep 17 00:00:00 2001
From: Dong Zhihong <dzhwinter@gmail.com>
Date: Mon, 23 Oct 2017 15:15:10 -0700
Subject: [PATCH 23/29] "get inputs outputs name from execution context"

---
 paddle/framework/operator.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index 0d0304ac9e130..aca663ffc6fc6 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -290,6 +290,15 @@ class ExecutionContext {
     return device_context_;
   }
 
+  //! Get a input which has multiple variables.
+  const std::vector<std::string>& Inputs(const std::string& name) const {
+    return op_.Inputs(name);
+  }
+  //! Get an output which has multiple variables.
+  const std::vector<std::string>& Outputs(const std::string& name) const {
+    return op_.Outputs(name);
+  }
+
 #ifdef PADDLE_WITH_CUDA
   const platform::CUDADeviceContext& cuda_device_context() const {
     PADDLE_ENFORCE(platform::is_gpu_place(device_context_.GetPlace()));

From 9e8ddc1ac5aa45a386d4ced27082a9fb5cf9cd87 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 23 Oct 2017 16:03:56 -0700
Subject: [PATCH 24/29] Saving each variable to a independent file

---
 paddle/framework/operator.h         |  4 +-
 paddle/operators/save_restore_op.cc | 86 ++++++++++++++---------------
 2 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index aca663ffc6fc6..353b1e559d5e6 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -241,7 +241,7 @@ class ExecutionContext {
 
   template <typename T>
   const std::vector<const T*> MultiInput(const std::string& name) const {
-    auto names = op_.Inputs(name);
+    auto names = Inputs(name);
     std::vector<const T*> res;
     res.reserve(names.size());
     std::transform(names.begin(), names.end(), std::back_inserter(res),
@@ -254,7 +254,7 @@ class ExecutionContext {
 
   template <typename T>
   std::vector<T*> MultiOutput(const std::string& name) const {
-    auto names = op_.Outputs(name);
+    auto names = Outputs(name);
     std::vector<T*> res;
     res.reserve(names.size());
     std::transform(names.begin(), names.end(), std::back_inserter(res),
diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc
index e3d6b7b5c73d8..a9125e429920e 100644
--- a/paddle/operators/save_restore_op.cc
+++ b/paddle/operators/save_restore_op.cc
@@ -20,23 +20,29 @@ namespace operators {
 using framework::Tensor;
 using framework::LoDTensor;
 
+inline static std::string VarToFileName(const std::string& folder_path,
+                                        const std::string& var_name) {
+  return folder_path + "/__" + var_name + "__";
+}
+
 template <typename T>
 class SaveKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto ins = ctx.MultiInput<LoDTensor>("X");
-    std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
-
-    std::ofstream fout(absolutePath, std::ofstream::out);
-    VLOG(1) << "Open model file : " << absolutePath;
-    PADDLE_ENFORCE(fout.is_open(), "Open model file failed.");
-    for (size_t i = 0; i < ins.size(); ++i) {
-      std::string bytes = ins[i]->SerializeToString();
+    std::string folder_path = ctx.template Attr<std::string>("folderPath");
+    auto tensors = ctx.MultiInput<LoDTensor>("X");
+    auto& var_names = ctx.Inputs("X");
+
+    VLOG(1) << "Save variables to folder: " << folder_path;
+    for (size_t i = 0; i < tensors.size(); ++i) {
+      std::string file_name = VarToFileName(folder_path, var_names.at(i));
+      std::ofstream fout(file_name, std::ofstream::out);
+      PADDLE_ENFORCE(fout.is_open(), "Fail to create file %s.", file_name);
+      std::string bytes = tensors[i]->SerializeToString();
       fout << bytes;
+      fout.close();
     }
-
-    fout.close();
-    VLOG(1) << "Save model finished. Items count : " << ins.size();
+    VLOG(1) << "Compelete saving variables. Items count: " << ins.size();
   }
 };
 
@@ -44,34 +50,26 @@ template <typename Place, typename T>
 class RestoreKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto outs = ctx.MultiOutput<LoDTensor>("Out");
-    std::string absolutePath = ctx.template Attr<std::string>("absolutePath");
-
-    std::ifstream fin(absolutePath, std::ifstream::in);
-    VLOG(1) << "Open model file : " << absolutePath;
-    PADDLE_ENFORCE(fin.is_open(), "Open model file failed.");
-
-    int tensor_idx = 0;
-    while (fin) {
-      std::string line;
-      size_t tensor_size = 0;
-      const size_t kBufferSize =
-          4096;  // read chuck by chuck for switch pageing.
+    std::string folder_path = ctx.template Attr<std::string>("folderPath");
+    VLOG(1) << "Try loading variables from folder: " << folder_path;
+    auto tensors = ctx.MultiOutput<LoDTensor>("Out");
+    auto& var_names = ctx.Outputs("Out");
+
+    for (size_t i = 0; i < var_names.size(); ++i) {
+      std::string file_name = VarToFileName(folder_path, var_names[i]);
+      std::ifstream fin(file_name, std::ifstream::in);
+      PADDLE_ENFORCE(fin.is_open(), "Fail to open file %s.", file_name);
+      const size_t kBufferSize = 4096;  // equal to linux page size
       char buffer[kBufferSize];
-
-      if (fin.read((char*)(tensor_size), sizeof(size_t))) {
-        size_t read_size = std::min(kBufferSize, tensor_size);
-        tensor_size -= kBufferSize;
-        while (read_size > 0 && fin.read(buffer, read_size)) {
-          line.append(buffer, read_size);
-        }
+      std::string cache;
+      while (!fin.eof()) {
+        fin.read(buffer, kBufferSize);
+        cache.append(buffer, fin.gcount());
       }
-      PADDLE_ENFORCE(tensor_size == line.size(), "Read tensor error.");
-      VLOG(1) << "Item " << tensor_idx << " size " << line.size() << " content "
-              << line;
-      outs[tensor_idx++]->DeserializeFromString(line, ctx.GetPlace());
+      tensors.at(i)->DeserializeFromString(cache, ctx.GetPlace());
+      fin.close();
     }
-    fin.close();
+    VLOG(1) << "Complete loading variables.";
   }
 };
 
@@ -83,9 +81,9 @@ class SaveOp : public framework::OperatorWithKernel {
   void InferShape(framework::InferShapeContext* ctx) const override {
     PADDLE_ENFORCE(ctx->HasInputs("X"),
                    "Input(X) of SaveOp should not be null.");
-    auto absolutePath = ctx->Attrs().Get<std::string>("absolutePath");
-    PADDLE_ENFORCE(!absolutePath.empty(),
-                   "Input(absolutePath) of SaveOp should not be null.");
+    auto folder_path = ctx->Attrs().Get<std::string>("folderPath");
+    PADDLE_ENFORCE(!folder_path.empty(),
+                   "Input(folder_path) of SaveOp should not be null.");
   }
 };
 
@@ -97,7 +95,7 @@ class SaveOpMaker : public framework::OpProtoAndCheckerMaker {
              "(tensor), the tensor count can be 1~INT_MAX, tensors names which "
              "values will be saved.")
         .AsDuplicable();
-    AddAttr<std::string>("absolutePath", "the absolutePath for save model.");
+    AddAttr<std::string>("folderPath", "the folderPath for save model.");
     AddComment(R"DOC(
 Save the input tensors to a binary file based on input tensor names and absolute path.
 
@@ -115,9 +113,9 @@ class RestoreOp : public framework::OperatorWithKernel {
   void InferShape(framework::InferShapeContext* ctx) const override {
     PADDLE_ENFORCE(ctx->HasOutputs("Out"),
                    "Output(X) of RestoreOp should not be null.");
-    auto absolutePath = ctx->Attrs().Get<std::string>("absolutePath");
-    PADDLE_ENFORCE(!absolutePath.empty(),
-                   "Input(absolutePath) of Restore Op should not be null.");
+    auto folder_path = ctx->Attrs().Get<std::string>("folderPath");
+    PADDLE_ENFORCE(!folder_path.empty(),
+                   "Input(folder_path) of Restore Op should not be null.");
   }
 
   framework::DataType IndicateDataType(
@@ -135,7 +133,7 @@ class RestoreOpMaker : public framework::OpProtoAndCheckerMaker {
               "(tensor), the tensor count can be 1~INT_MAX, tensors which "
               "values will be restores.")
         .AsDuplicable();
-    AddAttr<std::string>("absolutePath", "the absolutePath for model file.");
+    AddAttr<std::string>("folderPath", "the folderPath for model file.");
     AddAttr<int>("data_type", "output tensor data type")
         .SetDefault(framework::DataType::FP32);
     AddComment(R"DOC(

From e1c1e2c2f17e679f19a6ae80f63f271a31f4088f Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 23 Oct 2017 16:20:43 -0700
Subject: [PATCH 25/29] Fix bugs

---
 paddle/framework/scope_test.cc      | 2 +-
 paddle/operators/save_restore_op.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc
index 0b48460d2a885..f738d5ba9ecda 100644
--- a/paddle/framework/scope_test.cc
+++ b/paddle/framework/scope_test.cc
@@ -58,7 +58,7 @@ TEST(Scope, FindScope) {
 
 TEST(Scope, GetAllNames) {
   Scope s;
-  Variable* v = s.NewVar("a");
+  Variable* v = s.Var("a");
   EXPECT_EQ(&s, s.FindScope(v));
 
   std::vector<std::string> ans = s.GetAllNames();
diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc
index a9125e429920e..b98ccf904a9a8 100644
--- a/paddle/operators/save_restore_op.cc
+++ b/paddle/operators/save_restore_op.cc
@@ -42,7 +42,7 @@ class SaveKernel : public framework::OpKernel<T> {
       fout << bytes;
       fout.close();
     }
-    VLOG(1) << "Compelete saving variables. Items count: " << ins.size();
+    VLOG(1) << "Compelete saving variables. Items count: " << tensors.size();
   }
 };
 

From 4bc80d94cb2d31861125a13c66dec2e684ea1232 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 23 Oct 2017 18:31:19 -0700
Subject: [PATCH 26/29] Rewrite save_restore_op_test with new Python framework

---
 .../framework/tests/test_save_restore_op.py   | 147 ++++++++----------
 1 file changed, 65 insertions(+), 82 deletions(-)

diff --git a/python/paddle/v2/framework/tests/test_save_restore_op.py b/python/paddle/v2/framework/tests/test_save_restore_op.py
index 28cbcddf35415..75ae6a237cade 100644
--- a/python/paddle/v2/framework/tests/test_save_restore_op.py
+++ b/python/paddle/v2/framework/tests/test_save_restore_op.py
@@ -1,87 +1,70 @@
-from paddle.v2.framework import core
-from paddle.v2.framework.op import Operator
+import paddle.v2.framework.core as core
+import paddle.v2.framework.framework as framework
+import paddle.v2.framework.executor as executor
+
 import numpy as np
-from op_test import OpTest, create_op
 import unittest
-import os, sys
-
-ABSOLUTE_PATH = "/tmp/MODEL"  # directory for save parameter files.
-
-scope = core.Scope()
-place = core.CPUPlace()
-dev_ctx = core.DeviceContext.create(place)
-
-# Saver is a demo for saving arbitrary tensor.
-# TODO(dzh): Saver also need to save ProgramDesc. Should be done after
-# python API done.
-
-
-def Saver(var_list=None):
-    net = core.Net.create()
-    save_tensors = []
-    save_op = Operator("save", X=save_tensors, absolutePath=ABSOLUTE_PATH)
-    net.append_op(save_op)
-    net.run(scope, dev_ctx)
-
-
-class TestSaver(unittest.TestCase):
-    def test_save_tensors(self):
-        a = scope.var("a")
-        b = scope.var("b")
-        Saver(["a", "b"])
-        self.assertTrue(os.path.exists(ABSOLUTE_PATH))
-
-
-class TestSaveOp(OpTest):
-    def setUp(self):
-        self.op_type = "save"
-        x0 = np.ones((1, 1)).astype("float32")
-        x1 = np.ones((1, 1)).astype("float32")
-        x2 = np.ones((2, 1)).astype("float32")
-
-        self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)], }
-
-        self.attrs = {"absolutePath": ABSOLUTE_PATH}
-
-    def test_check_output(self):
-        if os.path.exists(ABSOLUTE_PATH):
-            try:
-                if os.path.isdir(ABSOLUTE_PATH):
-                    os.rmdir(ABSOLUTE_PATH)
-                elif os.path.isfile(ABSOLUTE_PATH):
-                    os.remove(ABSOLUTE_PATH)
-            except OSError:
-                pass
-        self.check_output()
-        self.assertTrue(os.path.exists(ABSOLUTE_PATH))
-
-
-# must run saveTest first
-class TestRestoreOp(OpTest):
-    def setUp(self):
-        self.op_type = "restore"
-
-        src_x0 = np.ones((2, 3)).astype("float32")
-        src_x1 = np.ones((1, 2)).astype("float32")
-        src_x2 = np.ones((2, 1)).astype("float32")
-
-        x0 = scope.var('x0').get_tensor()
-        x1 = scope.var('x1').get_tensor()
-        x2 = scope.var('x2').get_tensor()
-
-        self.src_tensors = [src_x0, src_x1, src_x2]
-        self.inputs = {}
-        self.outputs = {"Out": [("x0", x0), ("x1", x1), ("x2", x2)]}
-
-        self.attrs = {"absolutePath": ABSOLUTE_PATH}
-
-    def test_check_output(self):
-        self.check_output()
-        self.dst_tensors = [
-            scope.find_var("x0"), scope.find_var("x1"), scope.find_var("x2")
-        ]
-        for src, dst in zip(self.save_tensors, self.dst_tensors):
-            self.assertTrue(np.array_equal(src, dst))
+import os
+import sys
+import shutil
+
+FOLDER_PATH = "./tmp_test_dir"
+
+
+class TestSaveRestoreOp(unittest.TestCase):
+    def test_save_restore_op(self):
+        tensor_1_val = np.random.rand(3, 9).astype("float32")
+        tensor_2_val = np.random.rand(4, 2).astype("float32")
+        place = core.CPUPlace()
+
+        program = framework.Program()
+        block = program.global_block()
+        v_a = block.create_var(
+            dtype="float32", shape=[3, 9], lod_level=0, name="tensor_1")
+        v_b = block.create_var(
+            dtype="float32", shape=[4, 2], lod_level=0, name="tensor_2")
+
+        t_1 = core.LoDTensor()
+        t_1.set(tensor_1_val, place)
+        t_2 = core.LoDTensor()
+        t_2.set(tensor_2_val, place)
+        block.append_op(
+            type="save",
+            inputs={"X": [v_a, v_b]},
+            attrs={"folderPath": FOLDER_PATH})
+        block.append_op(
+            type="fill_constant",
+            outputs={"Out": [v_a]},
+            attrs={"shape": [2, 2],
+                   "value": 0.0})
+        block.append_op(
+            type="fill_constant",
+            outputs={"Out": [v_b]},
+            attrs={"shape": [2, 2],
+                   "value": 0.0})
+        block.append_op(
+            type="restore",
+            outputs={"Out": [v_a, v_b]},
+            attrs={"folderPath": FOLDER_PATH})
+
+        if os.path.exists(FOLDER_PATH):
+            shutil.rmtree(FOLDER_PATH)
+        os.makedirs(FOLDER_PATH)
+
+        exe = executor.Executor(place)
+        out = exe.run(program,
+                      feed={"tensor_1": t_1,
+                            "tensor_2": t_2},
+                      fetch_list=[v_a, v_b])
+
+        self.assertTrue(os.path.isdir(FOLDER_PATH))
+        self.assertTrue(os.path.isfile(FOLDER_PATH + "/__tensor_1__"))
+        self.assertTrue(os.path.isfile(FOLDER_PATH + "/__tensor_2__"))
+
+        self.assertTrue(np.array_equal(np.array(out[0]), tensor_1_val))
+        self.assertTrue(np.array_equal(np.array(out[1]), tensor_2_val))
+
+        shutil.rmtree(FOLDER_PATH)
 
 
 if __name__ == "__main__":

From 4150e273be40e0687ae4bd988411badecc95f5d7 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 23 Oct 2017 21:05:18 -0700
Subject: [PATCH 27/29] Move `SaveOp` and `RestoreOp` from OpWithKernel to
 OpBase

---
 paddle/framework/operator.h             |  13 +--
 paddle/operators/save_restore_op.cc     | 131 +++++++++++-------------
 python/paddle/v2/framework/framework.py |   3 +-
 3 files changed, 64 insertions(+), 83 deletions(-)

diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index 353b1e559d5e6..0d0304ac9e130 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -241,7 +241,7 @@ class ExecutionContext {
 
   template <typename T>
   const std::vector<const T*> MultiInput(const std::string& name) const {
-    auto names = Inputs(name);
+    auto names = op_.Inputs(name);
     std::vector<const T*> res;
     res.reserve(names.size());
     std::transform(names.begin(), names.end(), std::back_inserter(res),
@@ -254,7 +254,7 @@ class ExecutionContext {
 
   template <typename T>
   std::vector<T*> MultiOutput(const std::string& name) const {
-    auto names = Outputs(name);
+    auto names = op_.Outputs(name);
     std::vector<T*> res;
     res.reserve(names.size());
     std::transform(names.begin(), names.end(), std::back_inserter(res),
@@ -290,15 +290,6 @@ class ExecutionContext {
     return device_context_;
   }
 
-  //! Get a input which has multiple variables.
-  const std::vector<std::string>& Inputs(const std::string& name) const {
-    return op_.Inputs(name);
-  }
-  //! Get an output which has multiple variables.
-  const std::vector<std::string>& Outputs(const std::string& name) const {
-    return op_.Outputs(name);
-  }
-
 #ifdef PADDLE_WITH_CUDA
   const platform::CUDADeviceContext& cuda_device_context() const {
     PADDLE_ENFORCE(platform::is_gpu_place(device_context_.GetPlace()));
diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc
index b98ccf904a9a8..314e4e927924b 100644
--- a/paddle/operators/save_restore_op.cc
+++ b/paddle/operators/save_restore_op.cc
@@ -25,65 +25,35 @@ inline static std::string VarToFileName(const std::string& folder_path,
   return folder_path + "/__" + var_name + "__";
 }
 
-template <typename T>
-class SaveKernel : public framework::OpKernel<T> {
+class SaveOp : public framework::OperatorBase {
  public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    std::string folder_path = ctx.template Attr<std::string>("folderPath");
-    auto tensors = ctx.MultiInput<LoDTensor>("X");
-    auto& var_names = ctx.Inputs("X");
+  SaveOp(const std::string& type, const framework::VariableNameMap& inputs,
+         const framework::VariableNameMap& outputs,
+         const framework::AttributeMap& attrs)
+      : OperatorBase(type, inputs, outputs, attrs) {}
+
+  void Run(const framework::Scope& scope,
+           const platform::DeviceContext& dev_ctx) const override {
+    const auto& var_names = this->Inputs("X");
+    for (const auto& name : var_names) {
+      PADDLE_ENFORCE_NOT_NULL(scope.FindVar(name),
+                              "Can not find variable '%s' in the scope.", name);
+    }
+    std::string folder_path = this->Attr<std::string>("folderPath");
+    PADDLE_ENFORCE(!folder_path.empty(),
+                   "'folderPath' of SaveOp shouldn't be empty.");
 
     VLOG(1) << "Save variables to folder: " << folder_path;
-    for (size_t i = 0; i < tensors.size(); ++i) {
-      std::string file_name = VarToFileName(folder_path, var_names.at(i));
+    for (const auto& name : var_names) {
+      std::string file_name = VarToFileName(folder_path, name);
       std::ofstream fout(file_name, std::ofstream::out);
       PADDLE_ENFORCE(fout.is_open(), "Fail to create file %s.", file_name);
-      std::string bytes = tensors[i]->SerializeToString();
+      const LoDTensor& tensor = scope.FindVar(name)->Get<LoDTensor>();
+      std::string bytes = tensor.SerializeToString();
       fout << bytes;
       fout.close();
     }
-    VLOG(1) << "Compelete saving variables. Items count: " << tensors.size();
-  }
-};
-
-template <typename Place, typename T>
-class RestoreKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    std::string folder_path = ctx.template Attr<std::string>("folderPath");
-    VLOG(1) << "Try loading variables from folder: " << folder_path;
-    auto tensors = ctx.MultiOutput<LoDTensor>("Out");
-    auto& var_names = ctx.Outputs("Out");
-
-    for (size_t i = 0; i < var_names.size(); ++i) {
-      std::string file_name = VarToFileName(folder_path, var_names[i]);
-      std::ifstream fin(file_name, std::ifstream::in);
-      PADDLE_ENFORCE(fin.is_open(), "Fail to open file %s.", file_name);
-      const size_t kBufferSize = 4096;  // equal to linux page size
-      char buffer[kBufferSize];
-      std::string cache;
-      while (!fin.eof()) {
-        fin.read(buffer, kBufferSize);
-        cache.append(buffer, fin.gcount());
-      }
-      tensors.at(i)->DeserializeFromString(cache, ctx.GetPlace());
-      fin.close();
-    }
-    VLOG(1) << "Complete loading variables.";
-  }
-};
-
-class SaveOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInputs("X"),
-                   "Input(X) of SaveOp should not be null.");
-    auto folder_path = ctx->Attrs().Get<std::string>("folderPath");
-    PADDLE_ENFORCE(!folder_path.empty(),
-                   "Input(folder_path) of SaveOp should not be null.");
+    VLOG(1) << "Compelete saving variables. Items count: " << var_names.size();
   }
 };
 
@@ -105,22 +75,42 @@ or not.
   }
 };
 
-class RestoreOp : public framework::OperatorWithKernel {
+class RestoreOp : public framework::OperatorBase {
  public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasOutputs("Out"),
-                   "Output(X) of RestoreOp should not be null.");
-    auto folder_path = ctx->Attrs().Get<std::string>("folderPath");
+  RestoreOp(const std::string& type, const framework::VariableNameMap& inputs,
+            const framework::VariableNameMap& outputs,
+            const framework::AttributeMap& attrs)
+      : OperatorBase(type, inputs, outputs, attrs) {}
+
+  void Run(const framework::Scope& scope,
+           const platform::DeviceContext& dev_ctx) const override {
+    const auto& var_names = this->Outputs("Out");
+    for (const auto& name : var_names) {
+      PADDLE_ENFORCE_NOT_NULL(scope.FindVar(name),
+                              "Can not find variable '%s' in the scope.", name);
+    }
+    std::string folder_path = this->Attr<std::string>("folderPath");
     PADDLE_ENFORCE(!folder_path.empty(),
-                   "Input(folder_path) of Restore Op should not be null.");
-  }
+                   "'folderPath' of RestoreOp shouldn't be empty.");
+
+    VLOG(1) << "Try loading variables from folder: " << folder_path;
 
-  framework::DataType IndicateDataType(
-      const framework::ExecutionContext& ctx) const override {
-    return static_cast<framework::DataType>(Attr<int>("data_type"));
+    for (const auto& name : var_names) {
+      std::string file_name = VarToFileName(folder_path, name);
+      std::ifstream fin(file_name, std::ifstream::in);
+      PADDLE_ENFORCE(fin.is_open(), "Fail to open file %s.", file_name);
+      const size_t kBufferSize = 4096;  // equal to linux page size
+      char buffer[kBufferSize];
+      std::string cache;
+      while (!fin.eof()) {
+        fin.read(buffer, kBufferSize);
+        cache.append(buffer, fin.gcount());
+      }
+      LoDTensor* tensor = scope.FindVar(name)->GetMutable<LoDTensor>();
+      tensor->DeserializeFromString(cache, dev_ctx.GetPlace());
+      fin.close();
+    }
+    VLOG(1) << "Complete loading variables.";
   }
 };
 
@@ -148,11 +138,10 @@ or not.
 }  // namespace operators
 }  // namespace paddle
 
-namespace ops = paddle::operators;
-
-REGISTER_OP_WITHOUT_GRADIENT(save, ops::SaveOp, ops::SaveOpMaker);
-REGISTER_OP_CPU_KERNEL(save, ops::SaveKernel<float>);
+REGISTER_OPERATOR(save, paddle::operators::SaveOp,
+                  paddle::framework::EmptyGradOpMaker,
+                  paddle::operators::SaveOpMaker);
 
-REGISTER_OP_WITHOUT_GRADIENT(restore, ops::RestoreOp, ops::RestoreOpMaker);
-REGISTER_OP_CPU_KERNEL(restore,
-                       ops::RestoreKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OPERATOR(restore, paddle::operators::RestoreOp,
+                  paddle::framework::EmptyGradOpMaker,
+                  paddle::operators::RestoreOpMaker);
diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py
index 813e25816d479..fdd132b398f23 100644
--- a/python/paddle/v2/framework/framework.py
+++ b/python/paddle/v2/framework/framework.py
@@ -257,7 +257,8 @@ def find_name(var_list, name):
                     self.desc.set_attr(attr_name, attrs[attr_name])
 
         self.desc.check_attrs()
-        if type not in {'feed', 'fetch'}:
+        no_kernel_op_set = {'feed', 'fetch', 'save', 'restore'}
+        if type not in no_kernel_op_set:
             self.desc.infer_var_type(self.block.desc)
             self.desc.infer_shape(self.block.desc)
 

From 7fdc53668a86715f03f7a3d3b323d2de220e6aaf Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 23 Oct 2017 21:08:19 -0700
Subject: [PATCH 28/29] Refine unit test of SaveOp and RestoreOp

---
 python/paddle/v2/framework/tests/test_save_restore_op.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/v2/framework/tests/test_save_restore_op.py b/python/paddle/v2/framework/tests/test_save_restore_op.py
index 75ae6a237cade..3a36d03f62a7a 100644
--- a/python/paddle/v2/framework/tests/test_save_restore_op.py
+++ b/python/paddle/v2/framework/tests/test_save_restore_op.py
@@ -14,7 +14,7 @@
 class TestSaveRestoreOp(unittest.TestCase):
     def test_save_restore_op(self):
         tensor_1_val = np.random.rand(3, 9).astype("float32")
-        tensor_2_val = np.random.rand(4, 2).astype("float32")
+        tensor_2_val = np.random.randint(0, 20, size=(4, 2)).astype("int32")
         place = core.CPUPlace()
 
         program = framework.Program()
@@ -22,7 +22,7 @@ def test_save_restore_op(self):
         v_a = block.create_var(
             dtype="float32", shape=[3, 9], lod_level=0, name="tensor_1")
         v_b = block.create_var(
-            dtype="float32", shape=[4, 2], lod_level=0, name="tensor_2")
+            dtype="int32", shape=[4, 2], lod_level=0, name="tensor_2")
 
         t_1 = core.LoDTensor()
         t_1.set(tensor_1_val, place)

From ad08120b5ae22f7a50080fa562eeffcc00612db8 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Tue, 24 Oct 2017 11:20:48 -0700
Subject: [PATCH 29/29] fix compile errorwq

---
 paddle/operators/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index acd302f0c7a4f..d2d70d8be7120 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -73,7 +73,7 @@ function(op_library TARGET)
     if ("${TARGET}" STREQUAL "save_restore_op")
         set(pybind_flag 1)
         # It's enough to just adding one operator to pybind
-        file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(save);\n")
+        file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(save);\n")
     endif()
 
     # activation_op contains several operators