PaddlePaddle · reyoung · Mar 21, 2017 · Mar 21, 2017 · Mar 21, 2017 · Mar 21, 2017
@@ -91,6 +91,7 @@ include(external/openblas)  # download, build, install openblas
 include(external/swig)      # download, build, install swig
 include(external/warpctc)   # download, build, install warpctc
 include(external/any)       # download libn::any
+include(external/nlohmann_json)  # download json
 
 include(generic)            # simplify cmake module
 include(package)            # set paddle packages

diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake
@@ -52,7 +52,10 @@ macro(add_style_check_target TARGET_NAME)
             endforeach()
             if(LINT MATCHES ON)
                 get_filename_component(base_filename ${filename} NAME)
-                set(CUR_GEN ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.cpplint)
+                get_filename_component(dirname ${filename} DIRECTORY)
+                set(CUR_GEN ${dirname}_${base_filename}.cpplint)
+                string(REGEX REPLACE "paddle/([^p][^a][^d].*)" "\\1" CUR_GEN ${CUR_GEN})
+                set(CUR_GEN ${CMAKE_CURRENT_BINARY_DIR}/${CUR_GEN})
                 add_custom_command(OUTPUT ${CUR_GEN}
                     PRE_BUILD
                     COMMAND env ${py_env} "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"

diff --git a/cmake/external/any.cmake b/cmake/external/any.cmake
@@ -18,3 +18,4 @@ ExternalProject_Add(
 )
 
 add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE)
+LIST(APPEND external_project_dependencies linb_any)
diff --git a/cmake/external/nlohmann_json.cmake b/cmake/external/nlohmann_json.cmake
@@ -0,0 +1,20 @@
+INCLUDE(ExternalProject)
+
+SET(NLOHMANN_JSON_SOURCE_DIR ${THIRD_PARTY_PATH}/nlohmann_json)
+
+INCLUDE_DIRECTORIES(${NLOHMANN_JSON_SOURCE_DIR}/src/nlohmann_json/src/)
+
+ExternalProject_Add(
+    nlohmann_json
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    GIT_REPOSITORY  "https://github.com/nlohmann/json.git"
+    GIT_TAG         "v2.1.1"
+    PREFIX          ${NLOHMANN_JSON_SOURCE_DIR}
+    UPDATE_COMMAND  ""
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND     ""
+    INSTALL_COMMAND   ""
+    TEST_COMMAND      ""
+)
+
+LIST(APPEND external_project_dependencies nlohmann_json)
diff --git a/cmake/flags.cmake b/cmake/flags.cmake
@@ -109,7 +109,8 @@ set(COMMON_FLAGS
     -Wno-unused-function
     -Wno-error=literal-suffix
     -Wno-error=sign-compare
-    -Wno-error=unused-local-typedefs)
+    -Wno-error=unused-local-typedefs
+    -Wno-error=deprecated-declarations)
 
 set(GPU_COMMON_FLAGS
     -fPIC

diff --git a/cmake/util.cmake b/cmake/util.cmake
@@ -76,6 +76,7 @@ function(link_paddle_exe TARGET_NAME)
         paddle_gserver
         paddle_function
         ARCHIVE_END
+        paddle_topology
         paddle_pserver
         paddle_trainer_lib
         paddle_network

diff --git a/doc/design/topology.md b/doc/design/topology.md
@@ -0,0 +1,119 @@
+# Topology Overview
+Topology is a concept in Paddle for representing neural networks.  A neural network contains one topology, which describes how layers connected to each other, and many parameters. The other deep learning frameworks may call this concept a computation graph, neural network configurations.
+
+The topology is not only an API level concept but also how we organize the computation codes for each `Layer` or `Function` in Paddle. The Paddle should maintain a dictionary from `Layer Type` to Layer implementation, e.g.  from string `mul` to function `void tensor_multiply(Tensor& ins, Tensor& outs)'. The mechanism about how to manipulate topology by users, how Paddle maps user topology to implementations of `Layer` and `Function` is a fundamental problem for refactoring Paddle.
+
+## User Stories and Examples
+
+### Kernel Developers
+
+Alan is a professional developer in CPU and GPU. He can write kernel functions of a new `Layer` with the best performance. However, he is not a familiar with Paddle API language, Python. Alan just needs to write the kernel function and register them in Paddle, and then Paddle should generate the user-side APIs for these kernel functions without any codes written by Alan.
+
+```cpp
+template <DeviceType devType>
+void cos_kernel(std::vector<Tensor>& ins, std::vector<Tensor>& outs,  double scale) {
+    // implemetation here.
+}
+
+BEGIN_REGISTER_FUNCTION(cos, cos_kernel)
+// The parameter of cos function. 
+func.addAttribute("scale", "The scale of cos layer").defaultValue(1.0).largerThan(0.0);
+
+// Two inputs
+func.addInput().dataType(Dense).dimension(2).supportSeqType();
+func.addInput().dataType(Dense).dimension(2).supportSeqType();
+
+// One outputs
+func.addOutput().dataType(Dense).dimension(2).supportSeqType();
+
+// Tell Paddle how to inference the output shape?
+func.setShapeInferer([](std::vector<Dims>& ins, std::vector<Dims>& outs){
+    outs[0] = {ins[0][0], 1};  // output dimension = batch_size * 1
+});
+
+END_REGISTER_FUNCTION()
+```
+
+### QA Developers
+
+Bob is a QA developer of Paddle.  He wants to tests all Paddle supported `Function` and `Layer`.  However, each layer has different configuration attributes, e.g. `scale` in `cosine` function. Each configuration attribute has different value range, data type. Bob should easily test all boundary conditions of one Layer or Functions by using new mechanism about topology.
+
+```
+auto cos = function::Register("cos");
+
+for each_attribute in cos.attributes:
+    each_attribute = each_attribute.min_value
+
+test(cos);
+
+for each_attribute in cos.attributes:
+    each_attribute = each_attribute.max_value
+test(cos);
+```
+
+### Language Binding Developers
+
+Carol is a language binding developer of Paddle. She wants to develop a language binding of Paddle. She is not familiar with Paddle C++ core and does not want to go so deep in Paddle. She just wants a clear list of what Layer Paddle supports, the configuration parameters of each Layer.
+
+Also as a language binding developer, Carol does not want to write any topology validation code in language binding because Paddle C++ Core could be in flux and layer's API could be changed.
+
+She just can access the register information of `Topology` and uses this information in another language. She can either uses reflection or code generation in that language to generate end-user APIs.
+
+```python
+import paddle
+
+for layer_name in paddle.topology.meta.all_registed_layers:
+    def __func__(**kwargs):
+        layer_meta = paddle.topology.meta.all_registed_layers["layer_name"]
+        return layer_meta.new_configration(kwargs)
+
+    globals()[layer_name] = __func__
+```
+
+### API End-Users
+
+David is a new user of Paddle, who are not familiar with Paddle and deep learning. He writes a Python program and configures a neural network. When he run this program, he expects a clear error message when his configuration is wrong. The error message should be like `cosine layer's scale parameter should be larger than 0.0.`, not just a `check error` in our computation kernel. Because we register all parameter's meta information, it is easy to achieve this goal.
+
+
+## Goals
+
+After thinking lots of user stories, we make the conclusion of what we want in Topology design.
+
+* User should directly operate C++ topology configuration because we should maintain the consistency between each language bindings, and make language binding layer thin and easily to develop.
+* Our topology configuration should be able to validate user's input and give a reasonable error message. Also, we should maintain some meta information of each configuration attribute, e.g. `scale` attribute in `cos` layer is a `double` value, should be larger than 0.0, and the default value is 1.0.
+* We should serialize our topology into a portable format, so users can use the model they trained before for inference.
+* We should let our kernel developer easily to register their kernel functions to Paddle and not make them write configuration APIs in Python.
+
+## Implementation
+
+### Meta Information
+To achieve goals above, we need a place to store meta information of each layer. The meta information is used to describe what a layer could be configured, what the attributes of one layer could set, what the input types could be.
+
+For example, the cosine layer should have two inputs, and the two inputs should be the same shape. The two inputs should both be the dense matrix. The cosine layer should have only one output, and the output shape should be [batch_size, 1] because, for each pair of input sample, the cosine similarity should be a scalar. The cosine layer has one configurable argument, `scale`. It is the scalar number multiplied to the cosine similarity.  `scale` should be a `double` value,  the default value is 1.0,  and should be larger than 0.0.
+
+All these meta information should be written in namespace `paddle::topology::meta`. There are several basic classes in this namespace.
+
+* Constraints:  It is a function list which stores the constraints of one attribute. It used to validate user input must be correct.
+* AttributeMeta:  It represent a meta information of an attribute, e.g. `scale`. It contains the attribute name,  description, type information and `Constraints`.
+* TensorMeta: Tensor is the input/output of the Layer or Function. It contains a vector of `AttributeMeta`. The data type, sequence type is just an attribute of the tensor.
+* FunctionMeta: It represent a meta information of a paddle::Function. It contains two vectors of TensorMeta, and they are inputs and outputs. The FunctionMeta also contains a vector of AttributeMeta, that kernel developers can add the attributes used by their kernel.
+* LayerMeta: A similar concept like FunctionMeta, but used to represent `Layer'.
+* TopologyMeta: A topology meta contains a vector of `AttributeMeta`, which represent the attributes can be set globally in a topology.
+
+### Topology information
+
+The topology information is the actual information of a neural network. It is one to one correspondence to meta information. We use `std::any`(a.k.a `boost::any`) to represent the attribute value of each attribute because attribute could be any type(double/int/vector<int>, etc).
+
+So the `topology::Tensor` contains an attribute map, e.g. `map<string, any>`.  The `Function` contains an attribute map, input tensors, and output tensors. The rest types of topology information are correspondent to its meta information.
+
+## Step by step approach
+
+After building the `Topology` concept in C++, Paddle's Python code could be clean up. However, the development process would be broken down into step by step, carefully completed, to make Paddle code steady and not introduce bugs.
+
+The step by step approach are:
+
+1. Add `Constraints`, `AttributeMeta` , `TensorMeta`, `FunctionMeta` to refactor the `paddle::Function` package. Make `paddle::Function` just a plain function registered to `FunctionMeta`. Use a small scope experiment make sure we could uses `topology::meta` and `topology` represent a piece of neural network.
+
+2. Complete the `LayerMeta`, `TopologyMeta`, etc. But write a conversion method from `protobuf::LayerConfig`/`protobuf::ModelConfig` to `topology::Layer`/`topology::Topology`. Make `paddle_trainer` can use and test `topology` package. A side-effect of this job is to let `paddle_trainer` validation users' `trainer_config.conf` file, and give a reasonalbe error message when user gives a wrong configuration.
+
+3. Clean up the implementation of `paddle.v2` topology. Let `v2` package not invoke `trainer_config_helper`, just invoke `topology` package directly from C-API.
diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
@@ -4,6 +4,7 @@ add_subdirectory(utils)
 add_subdirectory(testing)
 add_subdirectory(math)
 add_subdirectory(parameter)
+add_subdirectory(topology)
 add_subdirectory(gserver)
 add_subdirectory(pserver)
 add_subdirectory(trainer)

diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt
@@ -35,6 +35,7 @@ SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS
     paddle_utils
     paddle_gserver
     paddle_pserver
+    paddle_topology
     paddle_api
     paddle_cuda
     paddle_trainer_lib
@@ -61,6 +62,7 @@ SWIG_LINK_LIBRARIES(swig_paddle
     paddle_function
     ${METRIC_LIBS}
     ${ARCHIVE_END}
+    paddle_topology
     paddle_pserver
     paddle_trainer_lib
     paddle_network

diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h
@@ -15,7 +15,6 @@ limitations under the License. */
 #pragma once
 
 #include <glog/logging.h>
-
 #include "TensorShape.h"
 #include "TensorType.h"
 #include "paddle/math/Matrix.h"

diff --git a/paddle/function/BufferArgs.h b/paddle/function/BufferArgs.h
@@ -0,0 +1,99 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "BufferArg.h"
+
+namespace paddle {
+/**
+ * Argument type for Function::calc().
+ * A BufferArgs contains a set of BufferArg,
+ * because Function can have multiple inputs and outputs.
+ *
+ * addArg() with Matix object used to adapt Layer Argument.
+ * Will create a BufferArg object in addArg(),
+ * and free in destructor of BufferArgs.
+ *
+ * addArg() with BufferArg object, just save BufferArg object address,
+ * and the caller needs to guarantee the validity of the BufferArg object
+ * in the BufferArgs life time.
+ */
+class BufferArgs {
+public:
+  BufferArgs() {}
+
+  ~BufferArgs() {
+    for (auto arg : _args_) {
+      delete arg;
+    }
+  }
+
+  size_t size() const { return args_.size(); }
+
+  // add argument into BufferArgs
+  // Tensor can be Matrix, Vector, IVector.
+  // For inputs, do not need argType.
+  // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO.
+  void addArg(const Matrix& arg, ArgType argType = UNSPECIFIED) {
+    _args_.push_back(new BufferArg(arg, argType));
+    addArg(*_args_.back());
+  }
+
+  void addArg(const Vector& arg, ArgType argType = UNSPECIFIED) {
+    _args_.push_back(new BufferArg(arg, argType));
+    addArg(*_args_.back());
+  }
+
+  void addArg(const IVector& arg, ArgType argType = UNSPECIFIED) {
+    _args_.push_back(new BufferArg(arg, argType));
+    addArg(*_args_.back());
+  }
+
+  // Add arg into BufferArgs and reshape the arg.
+  //
+  // For example, arg represents an image buffer,
+  // but Matrix can only represent a two-dimensional Tensor.
+  // So need an extra argument to describe the shape of the image buffer.
+  void addArg(const Matrix& arg,
+              const TensorShape& shape,
+              ArgType argType = UNSPECIFIED);
+
+  void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
+  void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
+
+  void addArg(const Matrix& matrix,
+              const IVector& vector,
+              ArgType argType = UNSPECIFIED);
+
+  // get argument
+  const BufferArg& operator[](size_t num) const {
+    CHECK_LT(num, args_.size());
+    return *args_[num];
+  }
+
+  void addArg(BufferArg& arg) { args_.push_back(&arg); }
+
+  void addArg(SequenceIdArg& arg) { args_.push_back(&arg); }
+
+  void addArg(SequenceArg& arg) { args_.push_back(&arg); }
+
+  void addArg(SparseMatrixArg& arg) { args_.push_back(&arg); }
+
+private:
+  std::vector<BufferArg*> args_;
+  // The BufferArg object is constructed and freed by BufferArgs.
+  std::vector<BufferArg*> _args_;
+};
+
+}  // namespace paddle
diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt
@@ -1,9 +1,11 @@
 file(GLOB h_files . *Op.h)
 file(GLOB cpp_files . *Op.cpp)
 
-list(APPEND h_files Function.h)
-list(APPEND cpp_files Function.cpp)
+list(APPEND h_files FunctionList.h BufferArgs.h)
+list(APPEND cpp_files FunctionList.cpp)
 list(APPEND cpp_files BufferArg.cpp)
+list(APPEND h_files Register.h)
+list(APPEND cpp_files Register.cpp)
 
 if(WITH_GPU)
     file(GLOB cu_files . *OpGpu.cu)