From 016995e91bcba6c90384a4592f5866d7248b2eea Mon Sep 17 00:00:00 2001
From: sidgoyal78 <vi.siddharth78@gmail.com>
Date: Thu, 7 Jun 2018 10:39:32 -0700
Subject: [PATCH 1/4] Add python files and edit readme

---
 06.understand_sentiment/README.md             | 125 +++++--------
 06.understand_sentiment/index.html            | 125 +++++--------
 06.understand_sentiment/train_conv.py         | 155 ++++++++++++++++
 06.understand_sentiment/train_dyn_rnn.py      | 171 ++++++++++++++++++
 06.understand_sentiment/train_stacked_lstm.py | 162 +++++++++++++++++
 5 files changed, 570 insertions(+), 168 deletions(-)
 create mode 100644 06.understand_sentiment/train_conv.py
 create mode 100644 06.understand_sentiment/train_dyn_rnn.py
 create mode 100644 06.understand_sentiment/train_stacked_lstm.py

diff --git a/06.understand_sentiment/README.md b/06.understand_sentiment/README.md
index f682f30f..c43b811c 100644
--- a/06.understand_sentiment/README.md
+++ b/06.understand_sentiment/README.md
@@ -102,14 +102,11 @@ After issuing a command `python train.py`, training will start immediately. The
 
 ### Initialize PaddlePaddle
 
-We must import and initialize PaddlePaddle (enable/disable GPU, set the number of trainers, etc).
+We must import and initialize Paddle.
 
 ```python
-import sys
-import paddle.v2 as paddle
-
-# PaddlePaddle init
-paddle.init(use_gpu=False, trainer_count=1)
+import paddle
+import paddle.fluid as fluid
 ```
 
 As alluded to in section [Model Overview](#model-overview), here we provide the implementations of both Text CNN and Stacked-bidirectional LSTM models.
@@ -121,20 +118,26 @@ We create a neural network `convolution_net` as the following snippet code.
 Note: `paddle.networks.sequence_conv_pool` includes both convolution and pooling layer operations.
 
 ```python
-def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
-    data = paddle.layer.data("word",
-                             paddle.data_type.integer_value_sequence(input_dim))
-    emb = paddle.layer.embedding(input=data, size=emb_dim)
-    conv_3 = paddle.networks.sequence_conv_pool(
-        input=emb, context_len=3, hidden_size=hid_dim)
-    conv_4 = paddle.networks.sequence_conv_pool(
-        input=emb, context_len=4, hidden_size=hid_dim)
-    output = paddle.layer.fc(input=[conv_3, conv_4],
-                             size=class_dim,
-                             act=paddle.activation.Softmax())
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost, output
+def convolution_net(data, input_dim, class_dim=2, emb_dim=128, hid_dim=128):
+    emb = fluid.layers.embedding(
+        input=data, size=[input_dim, emb_dim], is_sparse=True)
+    conv_3 = fluid.nets.sequence_conv_pool(
+        input=emb,
+        num_filters=hid_dim,
+        filter_size=3,
+        act="tanh",
+        pool_type="sqrt")
+    conv_4 = fluid.nets.sequence_conv_pool(
+        input=emb,
+        num_filters=hid_dim,
+        filter_size=4,
+        act="tanh",
+        pool_type="sqrt")
+    prediction = fluid.layers.fc(input=[conv_3, conv_4],
+                                 size=class_dim,
+                                 act="softmax")
+    return prediction
+
 ```
 
 1. Define input data and its dimension
@@ -154,70 +157,31 @@ def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
 We create a neural network `stacked_lstm_net` as below.
 
 ```python
-def stacked_lstm_net(input_dim,
-                     class_dim=2,
-                     emb_dim=128,
-                     hid_dim=512,
-                     stacked_num=3):
-    """
-    A Wrapper for sentiment classification task.
-    This network uses a bi-directional recurrent network,
-    consisting of three LSTM layers. This configuration is
-    motivated from the following paper, but uses few layers.
-        http://www.aclweb.org/anthology/P15-1109
-    input_dim: here is word dictionary dimension.
-    class_dim: number of categories.
-    emb_dim: dimension of word embedding.
-    hid_dim: dimension of hidden layer.
-    stacked_num: number of stacked lstm-hidden layer.
-    """
+def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
     assert stacked_num % 2 == 1
 
-    fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
-    lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
-    para_attr = [fc_para_attr, lstm_para_attr]
-    bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
-    relu = paddle.activation.Relu()
-    linear = paddle.activation.Linear()
-
-    data = paddle.layer.data("word",
-                             paddle.data_type.integer_value_sequence(input_dim))
-    emb = paddle.layer.embedding(input=data, size=emb_dim)
+    emb = fluid.layers.embedding(
+        input=data, size=[input_dim, emb_dim], is_sparse=True)
 
-    fc1 = paddle.layer.fc(input=emb,
-                          size=hid_dim,
-                          act=linear,
-                          bias_attr=bias_attr)
-    lstm1 = paddle.layer.lstmemory(
-        input=fc1, act=relu, bias_attr=bias_attr)
+    fc1 = fluid.layers.fc(input=emb, size=hid_dim)
+    lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)
 
     inputs = [fc1, lstm1]
+
     for i in range(2, stacked_num + 1):
-        fc = paddle.layer.fc(input=inputs,
-                             size=hid_dim,
-                             act=linear,
-                             param_attr=para_attr,
-                             bias_attr=bias_attr)
-        lstm = paddle.layer.lstmemory(
-            input=fc,
-            reverse=(i % 2) == 0,
-            act=relu,
-            bias_attr=bias_attr)
+        fc = fluid.layers.fc(input=inputs, size=hid_dim)
+        lstm, cell = fluid.layers.dynamic_lstm(
+            input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
         inputs = [fc, lstm]
 
-    fc_last = paddle.layer.pooling(
-        input=inputs[0], pooling_type=paddle.pooling.Max())
-    lstm_last = paddle.layer.pooling(
-        input=inputs[1], pooling_type=paddle.pooling.Max())
-    output = paddle.layer.fc(input=[fc_last, lstm_last],
-                             size=class_dim,
-                             act=paddle.activation.Softmax(),
-                             bias_attr=bias_attr,
-                             param_attr=para_attr)
-
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost, output
+    fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
+    lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
+
+    prediction = fluid.layers.fc(input=[fc_last, lstm_last],
+                                 size=class_dim,
+                                 act='softmax')
+    return prediction
+
 ```
 
 1. Define input data and its dimension
@@ -236,14 +200,7 @@ def stacked_lstm_net(input_dim,
 To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`.
 
 ```python
-word_dict = paddle.dataset.imdb.word_dict()
-dict_dim = len(word_dict)
-class_dim = 2
-
-# option 1
-[cost, output] = convolution_net(dict_dim, class_dim=class_dim)
-# option 2
-# [cost, output] = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
+TODO
 ```
 
 ## Model Training
diff --git a/06.understand_sentiment/index.html b/06.understand_sentiment/index.html
index 76b4eb38..1304bba1 100644
--- a/06.understand_sentiment/index.html
+++ b/06.understand_sentiment/index.html
@@ -144,14 +144,11 @@
 
 ### Initialize PaddlePaddle
 
-We must import and initialize PaddlePaddle (enable/disable GPU, set the number of trainers, etc).
+We must import and initialize Paddle.
 
 ```python
-import sys
-import paddle.v2 as paddle
-
-# PaddlePaddle init
-paddle.init(use_gpu=False, trainer_count=1)
+import paddle
+import paddle.fluid as fluid
 ```
 
 As alluded to in section [Model Overview](#model-overview), here we provide the implementations of both Text CNN and Stacked-bidirectional LSTM models.
@@ -163,20 +160,26 @@
 Note: `paddle.networks.sequence_conv_pool` includes both convolution and pooling layer operations.
 
 ```python
-def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
-    data = paddle.layer.data("word",
-                             paddle.data_type.integer_value_sequence(input_dim))
-    emb = paddle.layer.embedding(input=data, size=emb_dim)
-    conv_3 = paddle.networks.sequence_conv_pool(
-        input=emb, context_len=3, hidden_size=hid_dim)
-    conv_4 = paddle.networks.sequence_conv_pool(
-        input=emb, context_len=4, hidden_size=hid_dim)
-    output = paddle.layer.fc(input=[conv_3, conv_4],
-                             size=class_dim,
-                             act=paddle.activation.Softmax())
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost, output
+def convolution_net(data, input_dim, class_dim=2, emb_dim=128, hid_dim=128):
+    emb = fluid.layers.embedding(
+        input=data, size=[input_dim, emb_dim], is_sparse=True)
+    conv_3 = fluid.nets.sequence_conv_pool(
+        input=emb,
+        num_filters=hid_dim,
+        filter_size=3,
+        act="tanh",
+        pool_type="sqrt")
+    conv_4 = fluid.nets.sequence_conv_pool(
+        input=emb,
+        num_filters=hid_dim,
+        filter_size=4,
+        act="tanh",
+        pool_type="sqrt")
+    prediction = fluid.layers.fc(input=[conv_3, conv_4],
+                                 size=class_dim,
+                                 act="softmax")
+    return prediction
+
 ```
 
 1. Define input data and its dimension
@@ -196,70 +199,31 @@
 We create a neural network `stacked_lstm_net` as below.
 
 ```python
-def stacked_lstm_net(input_dim,
-                     class_dim=2,
-                     emb_dim=128,
-                     hid_dim=512,
-                     stacked_num=3):
-    """
-    A Wrapper for sentiment classification task.
-    This network uses a bi-directional recurrent network,
-    consisting of three LSTM layers. This configuration is
-    motivated from the following paper, but uses few layers.
-        http://www.aclweb.org/anthology/P15-1109
-    input_dim: here is word dictionary dimension.
-    class_dim: number of categories.
-    emb_dim: dimension of word embedding.
-    hid_dim: dimension of hidden layer.
-    stacked_num: number of stacked lstm-hidden layer.
-    """
+def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
     assert stacked_num % 2 == 1
 
-    fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
-    lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
-    para_attr = [fc_para_attr, lstm_para_attr]
-    bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
-    relu = paddle.activation.Relu()
-    linear = paddle.activation.Linear()
-
-    data = paddle.layer.data("word",
-                             paddle.data_type.integer_value_sequence(input_dim))
-    emb = paddle.layer.embedding(input=data, size=emb_dim)
+    emb = fluid.layers.embedding(
+        input=data, size=[input_dim, emb_dim], is_sparse=True)
 
-    fc1 = paddle.layer.fc(input=emb,
-                          size=hid_dim,
-                          act=linear,
-                          bias_attr=bias_attr)
-    lstm1 = paddle.layer.lstmemory(
-        input=fc1, act=relu, bias_attr=bias_attr)
+    fc1 = fluid.layers.fc(input=emb, size=hid_dim)
+    lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)
 
     inputs = [fc1, lstm1]
+
     for i in range(2, stacked_num + 1):
-        fc = paddle.layer.fc(input=inputs,
-                             size=hid_dim,
-                             act=linear,
-                             param_attr=para_attr,
-                             bias_attr=bias_attr)
-        lstm = paddle.layer.lstmemory(
-            input=fc,
-            reverse=(i % 2) == 0,
-            act=relu,
-            bias_attr=bias_attr)
+        fc = fluid.layers.fc(input=inputs, size=hid_dim)
+        lstm, cell = fluid.layers.dynamic_lstm(
+            input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
         inputs = [fc, lstm]
 
-    fc_last = paddle.layer.pooling(
-        input=inputs[0], pooling_type=paddle.pooling.Max())
-    lstm_last = paddle.layer.pooling(
-        input=inputs[1], pooling_type=paddle.pooling.Max())
-    output = paddle.layer.fc(input=[fc_last, lstm_last],
-                             size=class_dim,
-                             act=paddle.activation.Softmax(),
-                             bias_attr=bias_attr,
-                             param_attr=para_attr)
-
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost, output
+    fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
+    lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
+
+    prediction = fluid.layers.fc(input=[fc_last, lstm_last],
+                                 size=class_dim,
+                                 act='softmax')
+    return prediction
+
 ```
 
 1. Define input data and its dimension
@@ -278,14 +242,7 @@
 To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`.
 
 ```python
-word_dict = paddle.dataset.imdb.word_dict()
-dict_dim = len(word_dict)
-class_dim = 2
-
-# option 1
-[cost, output] = convolution_net(dict_dim, class_dim=class_dim)
-# option 2
-# [cost, output] = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
+TODO
 ```
 
 ## Model Training
diff --git a/06.understand_sentiment/train_conv.py b/06.understand_sentiment/train_conv.py
new file mode 100644
index 00000000..b7bf4a85
--- /dev/null
+++ b/06.understand_sentiment/train_conv.py
@@ -0,0 +1,155 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import paddle
+import paddle.fluid as fluid
+from functools import partial
+import numpy as np
+
+CLASS_DIM = 2
+EMB_DIM = 128
+HID_DIM = 512
+BATCH_SIZE = 128
+
+
+def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
+    emb = fluid.layers.embedding(
+        input=data, size=[input_dim, emb_dim], is_sparse=True)
+    conv_3 = fluid.nets.sequence_conv_pool(
+        input=emb,
+        num_filters=hid_dim,
+        filter_size=3,
+        act="tanh",
+        pool_type="sqrt")
+    conv_4 = fluid.nets.sequence_conv_pool(
+        input=emb,
+        num_filters=hid_dim,
+        filter_size=4,
+        act="tanh",
+        pool_type="sqrt")
+    prediction = fluid.layers.fc(
+        input=[conv_3, conv_4], size=class_dim, act="softmax")
+    return prediction
+
+
+def inference_program(word_dict):
+    data = fluid.layers.data(
+        name="words", shape=[1], dtype="int64", lod_level=1)
+
+    dict_dim = len(word_dict)
+    net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
+    return net
+
+
+def train_program(word_dict):
+    prediction = inference_program(word_dict)
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(cost)
+    accuracy = fluid.layers.accuracy(input=prediction, label=label)
+    return [avg_cost, accuracy]
+
+
+def optimizer_func():
+    return fluid.optimizer.Adagrad(learning_rate=0.002)
+
+
+def train(use_cuda, train_program, params_dirname):
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+
+    word_dict = paddle.dataset.imdb.word_dict()
+    trainer = fluid.Trainer(
+        train_func=partial(train_program, word_dict),
+        place=place,
+        optimizer_func=optimizer_func)
+
+    def event_handler(event):
+        if isinstance(event, fluid.EndEpochEvent):
+            test_reader = paddle.batch(
+                paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
+            avg_cost, acc = trainer.test(
+                reader=test_reader, feed_order=['words', 'label'])
+
+            print("avg_cost: %s" % avg_cost)
+            print("acc     : %s" % acc)
+
+            if acc > 0.2:  # Smaller value to increase CI speed
+                trainer.save_params(params_dirname)
+                trainer.stop()
+
+            else:
+                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
+                    event.epoch + 1, avg_cost, acc))
+                if math.isnan(avg_cost):
+                    sys.exit("got NaN loss, training failed.")
+        elif isinstance(event, fluid.EndStepEvent):
+            print("Step {0}, Epoch {1} Metrics {2}".format(
+                event.step, event.epoch, map(np.array, event.metrics)))
+            if event.step == 1:  # Run 2 iterations to speed CI
+                trainer.save_params(params_dirname)
+                trainer.stop()
+
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.imdb.train(word_dict), buf_size=25000),
+        batch_size=BATCH_SIZE)
+
+    trainer.train(
+        num_epochs=1,
+        event_handler=event_handler,
+        reader=train_reader,
+        feed_order=['words', 'label'])
+
+
+def infer(use_cuda, inference_program, params_dirname=None):
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    word_dict = paddle.dataset.imdb.word_dict()
+
+    inferencer = fluid.Inferencer(
+        infer_func=partial(inference_program, word_dict),
+        param_path=params_dirname,
+        place=place)
+
+    # Setup input by creating LoDTensor to represent sequence of words.
+    # Here each word is the basic element of the LoDTensor and the shape of 
+    # each word (base_shape) should be [1] since it is simply an index to 
+    # look up for the corresponding word vector.
+    # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]],
+    # which has only one lod level. Then the created LoDTensor will have only 
+    # one higher level structure (sequence of words, or sentence) than the basic 
+    # element (word). Hence the LoDTensor will hold data for three sentences of 
+    # length 3, 4 and 2, respectively. 
+    # Note that lod info should be a list of lists.
+    lod = [[3, 4, 2]]
+    base_shape = [1]
+    # The range of random integers is [low, high]
+    tensor_words = fluid.create_random_int_lodtensor(
+        lod, base_shape, place, low=0, high=len(word_dict) - 1)
+    results = inferencer.infer({'words': tensor_words})
+    print("infer results: ", results)
+
+
+def main(use_cuda):
+    if use_cuda and not fluid.core.is_compiled_with_cuda():
+        return
+    params_dirname = "understand_sentiment_conv.inference.model"
+    train(use_cuda, train_program, params_dirname)
+    infer(use_cuda, inference_program, params_dirname)
+
+
+if __name__ == '__main__':
+    for use_cuda in (False, True):
+        main(use_cuda=use_cuda)
diff --git a/06.understand_sentiment/train_dyn_rnn.py b/06.understand_sentiment/train_dyn_rnn.py
new file mode 100644
index 00000000..aa7c567b
--- /dev/null
+++ b/06.understand_sentiment/train_dyn_rnn.py
@@ -0,0 +1,171 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import paddle
+import paddle.fluid as fluid
+from functools import partial
+import numpy as np
+
+CLASS_DIM = 2
+EMB_DIM = 128
+BATCH_SIZE = 128
+LSTM_SIZE = 128
+
+
+def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size):
+    emb = fluid.layers.embedding(
+        input=data, size=[input_dim, emb_dim], is_sparse=True)
+    sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh')
+
+    rnn = fluid.layers.DynamicRNN()
+    with rnn.block():
+        word = rnn.step_input(sentence)
+        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
+        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])
+
+        def gate_common(ipt, hidden, size):
+            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
+            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
+            return gate0 + gate1
+
+        forget_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
+                                                         lstm_size))
+        input_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
+                                                        lstm_size))
+        output_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
+                                                         lstm_size))
+        cell_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
+                                                       lstm_size))
+
+        cell = forget_gate * prev_cell + input_gate * cell_gate
+        hidden = output_gate * fluid.layers.tanh(x=cell)
+        rnn.update_memory(prev_cell, cell)
+        rnn.update_memory(prev_hidden, hidden)
+        rnn.output(hidden)
+
+    last = fluid.layers.sequence_last_step(rnn())
+    prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax")
+    return prediction
+
+
+def inference_program(word_dict):
+    data = fluid.layers.data(
+        name="words", shape=[1], dtype="int64", lod_level=1)
+
+    dict_dim = len(word_dict)
+    pred = dynamic_rnn_lstm(data, dict_dim, CLASS_DIM, EMB_DIM, LSTM_SIZE)
+    return pred
+
+
+def train_program(word_dict):
+    prediction = inference_program(word_dict)
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(cost)
+    accuracy = fluid.layers.accuracy(input=prediction, label=label)
+    return [avg_cost, accuracy]
+
+
+def optimizer_func():
+    return fluid.optimizer.Adagrad(learning_rate=0.002)
+
+
+def train(use_cuda, train_program, params_dirname):
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+
+    word_dict = paddle.dataset.imdb.word_dict()
+    trainer = fluid.Trainer(
+        train_func=partial(train_program, word_dict),
+        place=place,
+        optimizer_func=optimizer_func)
+
+    def event_handler(event):
+        if isinstance(event, fluid.EndEpochEvent):
+            test_reader = paddle.batch(
+                paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
+            avg_cost, acc = trainer.test(
+                reader=test_reader, feed_order=['words', 'label'])
+
+            print("avg_cost: %s" % avg_cost)
+            print("acc     : %s" % acc)
+
+            if acc > 0.2:  # Smaller value to increase CI speed
+                trainer.save_params(params_dirname)
+                trainer.stop()
+
+            else:
+                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
+                    event.epoch + 1, avg_cost, acc))
+                if math.isnan(avg_cost):
+                    sys.exit("got NaN loss, training failed.")
+        elif isinstance(event, fluid.EndStepEvent):
+            print("Step {0}, Epoch {1} Metrics {2}".format(
+                event.step, event.epoch, map(np.array, event.metrics)))
+            if event.step == 1:  # Run 2 iterations to speed CI
+                trainer.save_params(params_dirname)
+                trainer.stop()
+
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.imdb.train(word_dict), buf_size=25000),
+        batch_size=BATCH_SIZE)
+
+    trainer.train(
+        num_epochs=1,
+        event_handler=event_handler,
+        reader=train_reader,
+        feed_order=['words', 'label'])
+
+
+def infer(use_cuda, inference_program, params_dirname=None):
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    word_dict = paddle.dataset.imdb.word_dict()
+
+    inferencer = fluid.Inferencer(
+        infer_func=partial(inference_program, word_dict),
+        param_path=params_dirname,
+        place=place)
+
+    # Setup input by creating LoDTensor to represent sequence of words.
+    # Here each word is the basic element of the LoDTensor and the shape of 
+    # each word (base_shape) should be [1] since it is simply an index to 
+    # look up for the corresponding word vector.
+    # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]],
+    # which has only one lod level. Then the created LoDTensor will have only 
+    # one higher level structure (sequence of words, or sentence) than the basic 
+    # element (word). Hence the LoDTensor will hold data for three sentences of 
+    # length 3, 4 and 2, respectively. 
+    # Note that lod info should be a list of lists.
+    lod = [[3, 4, 2]]
+    base_shape = [1]
+    # The range of random integers is [low, high]
+    tensor_words = fluid.create_random_int_lodtensor(
+        lod, base_shape, place, low=0, high=len(word_dict) - 1)
+    results = inferencer.infer({'words': tensor_words})
+    print("infer results: ", results)
+
+
+def main(use_cuda):
+    if use_cuda and not fluid.core.is_compiled_with_cuda():
+        return
+    params_dirname = "understand_sentiment_conv.inference.model"
+    train(use_cuda, train_program, params_dirname)
+    infer(use_cuda, inference_program, params_dirname)
+
+
+if __name__ == '__main__':
+    for use_cuda in (False, True):
+        main(use_cuda=use_cuda)
diff --git a/06.understand_sentiment/train_stacked_lstm.py b/06.understand_sentiment/train_stacked_lstm.py
new file mode 100644
index 00000000..923e7107
--- /dev/null
+++ b/06.understand_sentiment/train_stacked_lstm.py
@@ -0,0 +1,162 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import paddle
+import paddle.fluid as fluid
+from functools import partial
+import numpy as np
+
+CLASS_DIM = 2
+EMB_DIM = 128
+HID_DIM = 512
+STACKED_NUM = 3
+BATCH_SIZE = 128
+
+
+def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
+    assert stacked_num % 2 == 1
+
+    emb = fluid.layers.embedding(
+        input=data, size=[input_dim, emb_dim], is_sparse=True)
+
+    fc1 = fluid.layers.fc(input=emb, size=hid_dim)
+    lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)
+
+    inputs = [fc1, lstm1]
+
+    for i in range(2, stacked_num + 1):
+        fc = fluid.layers.fc(input=inputs, size=hid_dim)
+        lstm, cell = fluid.layers.dynamic_lstm(
+            input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
+        inputs = [fc, lstm]
+
+    fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
+    lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
+
+    prediction = fluid.layers.fc(
+        input=[fc_last, lstm_last], size=class_dim, act='softmax')
+    return prediction
+
+
+def inference_program(word_dict):
+    data = fluid.layers.data(
+        name="words", shape=[1], dtype="int64", lod_level=1)
+
+    dict_dim = len(word_dict)
+    net = stacked_lstm_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM,
+                           STACKED_NUM)
+    return net
+
+
+def train_program(word_dict):
+    prediction = inference_program(word_dict)
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(cost)
+    accuracy = fluid.layers.accuracy(input=prediction, label=label)
+    return [avg_cost, accuracy]
+
+
+def optimizer_func():
+    return fluid.optimizer.Adagrad(learning_rate=0.002)
+
+
+def train(use_cuda, train_program, params_dirname):
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+
+    word_dict = paddle.dataset.imdb.word_dict()
+    trainer = fluid.Trainer(
+        train_func=partial(train_program, word_dict),
+        place=place,
+        optimizer_func=optimizer_func)
+
+    def event_handler(event):
+        if isinstance(event, fluid.EndEpochEvent):
+            test_reader = paddle.batch(
+                paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
+            avg_cost, acc = trainer.test(
+                reader=test_reader, feed_order=['words', 'label'])
+
+            print("avg_cost: %s" % avg_cost)
+            print("acc     : %s" % acc)
+
+            if acc > 0.2:  # Smaller value to increase CI speed
+                trainer.save_params(params_dirname)
+                trainer.stop()
+
+            else:
+                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
+                    event.epoch + 1, avg_cost, acc))
+                if math.isnan(avg_cost):
+                    sys.exit("got NaN loss, training failed.")
+        elif isinstance(event, fluid.EndStepEvent):
+            print("Step {0}, Epoch {1} Metrics {2}".format(
+                event.step, event.epoch, map(np.array, event.metrics)))
+            if event.step == 1:  # Run 2 iterations to speed CI
+                trainer.save_params(params_dirname)
+                trainer.stop()
+
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.imdb.train(word_dict), buf_size=25000),
+        batch_size=BATCH_SIZE)
+
+    trainer.train(
+        num_epochs=1,
+        event_handler=event_handler,
+        reader=train_reader,
+        feed_order=['words', 'label'])
+
+
+def infer(use_cuda, inference_program, params_dirname=None):
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    word_dict = paddle.dataset.imdb.word_dict()
+
+    inferencer = fluid.Inferencer(
+        infer_func=partial(inference_program, word_dict),
+        param_path=params_dirname,
+        place=place)
+
+    # Setup input by creating LoDTensor to represent sequence of words.
+    # Here each word is the basic element of the LoDTensor and the shape of 
+    # each word (base_shape) should be [1] since it is simply an index to 
+    # look up for the corresponding word vector.
+    # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]],
+    # which has only one lod level. Then the created LoDTensor will have only 
+    # one higher level structure (sequence of words, or sentence) than the basic 
+    # element (word). Hence the LoDTensor will hold data for three sentences of 
+    # length 3, 4 and 2, respectively. 
+    # Note that lod info should be a list of lists.
+    lod = [[3, 4, 2]]
+    base_shape = [1]
+    # The range of random integers is [low, high]
+    tensor_words = fluid.create_random_int_lodtensor(
+        lod, base_shape, place, low=0, high=len(word_dict) - 1)
+    results = inferencer.infer({'words': tensor_words})
+    print("infer results: ", results)
+
+
+def main(use_cuda):
+    if use_cuda and not fluid.core.is_compiled_with_cuda():
+        return
+    params_dirname = "understand_sentiment_stacked_lstm.inference.model"
+    train(use_cuda, train_program, params_dirname)
+    infer(use_cuda, inference_program, params_dirname)
+
+
+if __name__ == '__main__':
+    for use_cuda in (False, True):
+        main(use_cuda=use_cuda)

From 6c82719911b1a5106dcbf5caafff4c30f8fed9be Mon Sep 17 00:00:00 2001
From: Nicky <nicky@baidu.com>
Date: Wed, 13 Jun 2018 14:52:54 -0700
Subject: [PATCH 2/4] Update README and confirm everything runs with jupyter
 for sentimental analysis

---
 06.understand_sentiment/README.md             | 194 +++++++++++-------
 06.understand_sentiment/index.html            | 194 +++++++++++-------
 06.understand_sentiment/train.py              | 161 ---------------
 06.understand_sentiment/train_conv.py         |  53 ++---
 06.understand_sentiment/train_dyn_rnn.py      |   5 +-
 06.understand_sentiment/train_stacked_lstm.py |   5 +-
 6 files changed, 269 insertions(+), 343 deletions(-)
 delete mode 100644 06.understand_sentiment/train.py

diff --git a/06.understand_sentiment/README.md b/06.understand_sentiment/README.md
index c43b811c..6d3a6657 100644
--- a/06.understand_sentiment/README.md
+++ b/06.understand_sentiment/README.md
@@ -98,15 +98,21 @@ We use [IMDB](http://ai.stanford.edu/%7Eamaas/data/sentiment/) dataset for senti
 After issuing a command `python train.py`, training will start immediately. The details will be unpacked by the following sessions to see how it works.
 
 
-## Model Structure
+## Model Configuration
 
-### Initialize PaddlePaddle
-
-We must import and initialize Paddle.
+Our program starts with importing necessary packages and initializing some global variables:
 
 ```python
 import paddle
 import paddle.fluid as fluid
+from functools import partial
+import numpy as np
+
+CLASS_DIM = 2
+EMB_DIM = 128
+HID_DIM = 512
+BATCH_SIZE = 128
+USE_GPU = False
 ```
 
 As alluded to in section [Model Overview](#model-overview), here we provide the implementations of both Text CNN and Stacked-bidirectional LSTM models.
@@ -115,10 +121,10 @@ As alluded to in section [Model Overview](#model-overview), here we provide the
 
 We create a neural network `convolution_net` as the following snippet code.
 
-Note: `paddle.networks.sequence_conv_pool` includes both convolution and pooling layer operations.
+Note: `fluid.nets.sequence_conv_pool` includes both convolution and pooling layer operations.
 
 ```python
-def convolution_net(data, input_dim, class_dim=2, emb_dim=128, hid_dim=128):
+def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
     emb = fluid.layers.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
     conv_3 = fluid.nets.sequence_conv_pool(
@@ -133,32 +139,22 @@ def convolution_net(data, input_dim, class_dim=2, emb_dim=128, hid_dim=128):
         filter_size=4,
         act="tanh",
         pool_type="sqrt")
-    prediction = fluid.layers.fc(input=[conv_3, conv_4],
-                                 size=class_dim,
-                                 act="softmax")
+    prediction = fluid.layers.fc(
+        input=[conv_3, conv_4], size=class_dim, act="softmax")
     return prediction
 
 ```
+Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories.
 
-1. Define input data and its dimension
-
-    Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories. In `convolution_net`, the input to the network is defined in `paddle.layer.data`.
-
-1. Define Classifier
-
-    The above Text CNN network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category.
-
-1. Define Loss Function
+The above Text CNN network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category.
 
-    In the context of supervised learning, labels of the training set are defined in `paddle.layer.data`, too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier.
 
-#### Stacked bidirectional LSTM
+### Stacked bidirectional LSTM
 
 We create a neural network `stacked_lstm_net` as below.
 
 ```python
 def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
-    assert stacked_num % 2 == 1
 
     emb = fluid.layers.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
@@ -183,104 +179,150 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
     return prediction
 
 ```
+The above stacked bidirectional LSTM network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category.
 
-1. Define input data and its dimension
+To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. In below steps, we will go with `convolution_net`.
 
-    Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories. In `stacked_lstm_net`, the input to the network is defined in `paddle.layer.data`.
+Next we define a `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`.
 
-1. Define Classifier
-
-    The above stacked bidirectional LSTM network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category.
+```python
+def inference_program(word_dict):
+    data = fluid.layers.data(
+        name="words", shape=[1], dtype="int64", lod_level=1)
 
-1. Define Loss Function
+    dict_dim = len(word_dict)
+    net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
+    return net
+```
 
-    In the context of supervised learning, labels of the training set are defined in `paddle.layer.data`, too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier.
+Then we define a `training_program` that uses the result from `inference_program` to compute the cost with label data.
+Also define `optimizer_func` to specify the optimizer.
 
 
-To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`.
+In the context of supervised learning, labels of the training set are defined in `paddle.layer.data` too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier.
 
 ```python
-TODO
+def train_program(word_dict):
+    prediction = inference_program(word_dict)
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(cost)
+    accuracy = fluid.layers.accuracy(input=prediction, label=label)
+    return [avg_cost, accuracy]
+
+
+def optimizer_func():
+    return fluid.optimizer.Adagrad(learning_rate=0.002)
 ```
 
 ## Model Training
 
-### Define Parameters
+### Specify training environment
 
-First, we create the model parameters according to the previous model configuration `cost`.
+Specify your training environment, you should specify if the training is on CPU or GPU.
 
 ```python
-# create parameters
-parameters = paddle.parameters.create(cost)
+use_cuda = False
+place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
 ```
 
-### Create Trainer
+### Datafeeder Configuration
 
-Before jumping into creating a training module, algorithm setting is also necessary.
-Here we specified `Adam` optimization algorithm via `paddle.optimizer`.
+Next we define data feeders for test and train. The feeder reads a `buf_size` of data each time and feed them to the training/testing process.
+`paddle.dataset.imdb.train` will yield records during each pass, after shuffling, a batch input of `BATCH_SIZE` is generated for training.
 
-```python
-# create optimizer
-adam_optimizer = paddle.optimizer.Adam(
-    learning_rate=2e-3,
-    regularization=paddle.optimizer.L2Regularization(rate=8e-4),
-    model_average=paddle.optimizer.ModelAverage(average_window=0.5))
-
-# create trainer
-trainer = paddle.trainer.SGD(cost=cost,
-                                parameters=parameters,
-                                update_equation=adam_optimizer)
-```
+Notice for loading and reading IMDB data, it could take up to 1 minute. Please be patient.
 
-### Training
+```python
 
-`paddle.dataset.imdb.train()` will yield records during each pass, after shuffling, a batch input is generated for training.
+print("Loading IMDB word dict....")
+word_dict = paddle.dataset.imdb.word_dict()
 
-```python
+print ("Reading training data....")
 train_reader = paddle.batch(
     paddle.reader.shuffle(
-        lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
-    batch_size=100)
+        paddle.dataset.imdb.train(word_dict), buf_size=25000),
+    batch_size=BATCH_SIZE)
+```
+
+
+### Create Trainer
 
-test_reader = paddle.batch(
-    lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)
+Create a trainer that takes `train_program` as input and specify optimizer function.
+
+```python
+trainer = fluid.Trainer(
+    train_func=partial(train_program, word_dict),
+    place=place,
+    optimizer_func=optimizer_func)
 ```
 
-`feeding` is devoted to specifying the correspondence between each yield record and `paddle.layer.data`. For instance, the first column of data generated by `paddle.dataset.imdb.train()` corresponds to `word` feature.
+### Feeding Data
+
+`feed_order` is devoted to specifying the correspondence between each yield record and `paddle.layer.data`. For instance, the first column of data generated by `imdb.train` corresponds to `words`.
 
 ```python
-feeding = {'word': 0, 'label': 1}
+feed_order = ['words', 'label']
 ```
 
-Callback function `event_handler` will be invoked to track training progress when a pre-defined event happens.
+### Event Handler
+
+Callback function `event_handler` will be called during training when a pre-defined event happens.
+For example, we can check the cost by `trainer.test` when `EndStepEvent` occurs
 
 ```python
+# Specify the directory path to save the parameters
+params_dirname = "understand_sentiment_conv.inference.model"
+
 def event_handler(event):
-    if isinstance(event, paddle.event.EndIteration):
-        if event.batch_id % 100 == 0:
-            print "\nPass %d, Batch %d, Cost %f, %s" % (
-                event.pass_id, event.batch_id, event.cost, event.metrics)
-        else:
-            sys.stdout.write('.')
-            sys.stdout.flush()
-    if isinstance(event, paddle.event.EndPass):
-        with open('./params_pass_%d.tar' % event.pass_id, 'w') as f:
-            trainer.save_parameter_to_tar(f)
-
-        result = trainer.test(reader=test_reader, feeding=feeding)
-        print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
+    if isinstance(event, fluid.EndStepEvent):
+        print("Step {0}, Epoch {1} Metrics {2}".format(
+                event.step, event.epoch, map(np.array, event.metrics)))
+
+        if event.step == 10:
+            trainer.save_params(params_dirname)
+            trainer.stop()
 ```
 
-Finally, we can invoke `trainer.train` to start training:
+### Training
+
+Finally, we invoke `trainer.train` to start training with `num_epochs` and other parameters.
 
 ```python
 trainer.train(
-    reader=train_reader,
+    num_epochs=1,
     event_handler=event_handler,
-    feeding=feeding,
-    num_passes=10)
+    reader=train_reader,
+    feed_order=feed_order)
 ```
 
+## Inference
+
+### Create Inferencer
+
+Initialize Inferencer with `inference_program` and `params_dirname` which is where we save params from training.
+
+```python
+inferencer = fluid.Inferencer(
+        infer_func=partial(inference_program, word_dict),
+        param_path=params_dirname,
+        place=place)
+```
+
+### Infer
+
+Now we can infer with inputs that we provide in `feed_order` during training.
+
+```python
+lod = [[3, 4, 2]]
+base_shape = [1]
+# The range of random integers is [low, high]
+tensor_words = fluid.create_random_int_lodtensor(
+    lod, base_shape, place, low=0, high=len(word_dict) - 1)
+results = inferencer.infer({'words': tensor_words})
+print("infer results: ", results)
+
+```
 
 ## Conclusion
 
diff --git a/06.understand_sentiment/index.html b/06.understand_sentiment/index.html
index 1304bba1..b9f19828 100644
--- a/06.understand_sentiment/index.html
+++ b/06.understand_sentiment/index.html
@@ -140,15 +140,21 @@
 After issuing a command `python train.py`, training will start immediately. The details will be unpacked by the following sessions to see how it works.
 
 
-## Model Structure
+## Model Configuration
 
-### Initialize PaddlePaddle
-
-We must import and initialize Paddle.
+Our program starts with importing necessary packages and initializing some global variables:
 
 ```python
 import paddle
 import paddle.fluid as fluid
+from functools import partial
+import numpy as np
+
+CLASS_DIM = 2
+EMB_DIM = 128
+HID_DIM = 512
+BATCH_SIZE = 128
+USE_GPU = False
 ```
 
 As alluded to in section [Model Overview](#model-overview), here we provide the implementations of both Text CNN and Stacked-bidirectional LSTM models.
@@ -157,10 +163,10 @@
 
 We create a neural network `convolution_net` as the following snippet code.
 
-Note: `paddle.networks.sequence_conv_pool` includes both convolution and pooling layer operations.
+Note: `fluid.nets.sequence_conv_pool` includes both convolution and pooling layer operations.
 
 ```python
-def convolution_net(data, input_dim, class_dim=2, emb_dim=128, hid_dim=128):
+def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
     emb = fluid.layers.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
     conv_3 = fluid.nets.sequence_conv_pool(
@@ -175,32 +181,22 @@
         filter_size=4,
         act="tanh",
         pool_type="sqrt")
-    prediction = fluid.layers.fc(input=[conv_3, conv_4],
-                                 size=class_dim,
-                                 act="softmax")
+    prediction = fluid.layers.fc(
+        input=[conv_3, conv_4], size=class_dim, act="softmax")
     return prediction
 
 ```
+Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories.
 
-1. Define input data and its dimension
-
-    Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories. In `convolution_net`, the input to the network is defined in `paddle.layer.data`.
-
-1. Define Classifier
-
-    The above Text CNN network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category.
-
-1. Define Loss Function
+The above Text CNN network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category.
 
-    In the context of supervised learning, labels of the training set are defined in `paddle.layer.data`, too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier.
 
-#### Stacked bidirectional LSTM
+### Stacked bidirectional LSTM
 
 We create a neural network `stacked_lstm_net` as below.
 
 ```python
 def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
-    assert stacked_num % 2 == 1
 
     emb = fluid.layers.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
@@ -225,104 +221,150 @@
     return prediction
 
 ```
+The above stacked bidirectional LSTM network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category.
 
-1. Define input data and its dimension
+To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. In below steps, we will go with `convolution_net`.
 
-    Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories. In `stacked_lstm_net`, the input to the network is defined in `paddle.layer.data`.
+Next we define a `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`.
 
-1. Define Classifier
-
-    The above stacked bidirectional LSTM network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category.
+```python
+def inference_program(word_dict):
+    data = fluid.layers.data(
+        name="words", shape=[1], dtype="int64", lod_level=1)
 
-1. Define Loss Function
+    dict_dim = len(word_dict)
+    net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
+    return net
+```
 
-    In the context of supervised learning, labels of the training set are defined in `paddle.layer.data`, too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier.
+Then we define a `training_program` that uses the result from `inference_program` to compute the cost with label data.
+Also define `optimizer_func` to specify the optimizer.
 
 
-To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`.
+In the context of supervised learning, labels of the training set are defined in `paddle.layer.data` too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier.
 
 ```python
-TODO
+def train_program(word_dict):
+    prediction = inference_program(word_dict)
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(cost)
+    accuracy = fluid.layers.accuracy(input=prediction, label=label)
+    return [avg_cost, accuracy]
+
+
+def optimizer_func():
+    return fluid.optimizer.Adagrad(learning_rate=0.002)
 ```
 
 ## Model Training
 
-### Define Parameters
+### Specify training environment
 
-First, we create the model parameters according to the previous model configuration `cost`.
+Specify your training environment, you should specify if the training is on CPU or GPU.
 
 ```python
-# create parameters
-parameters = paddle.parameters.create(cost)
+use_cuda = False
+place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
 ```
 
-### Create Trainer
+### Datafeeder Configuration
 
-Before jumping into creating a training module, algorithm setting is also necessary.
-Here we specified `Adam` optimization algorithm via `paddle.optimizer`.
+Next we define data feeders for test and train. The feeder reads a `buf_size` of data each time and feed them to the training/testing process.
+`paddle.dataset.imdb.train` will yield records during each pass, after shuffling, a batch input of `BATCH_SIZE` is generated for training.
 
-```python
-# create optimizer
-adam_optimizer = paddle.optimizer.Adam(
-    learning_rate=2e-3,
-    regularization=paddle.optimizer.L2Regularization(rate=8e-4),
-    model_average=paddle.optimizer.ModelAverage(average_window=0.5))
-
-# create trainer
-trainer = paddle.trainer.SGD(cost=cost,
-                                parameters=parameters,
-                                update_equation=adam_optimizer)
-```
+Notice for loading and reading IMDB data, it could take up to 1 minute. Please be patient.
 
-### Training
+```python
 
-`paddle.dataset.imdb.train()` will yield records during each pass, after shuffling, a batch input is generated for training.
+print("Loading IMDB word dict....")
+word_dict = paddle.dataset.imdb.word_dict()
 
-```python
+print ("Reading training data....")
 train_reader = paddle.batch(
     paddle.reader.shuffle(
-        lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
-    batch_size=100)
+        paddle.dataset.imdb.train(word_dict), buf_size=25000),
+    batch_size=BATCH_SIZE)
+```
+
+
+### Create Trainer
 
-test_reader = paddle.batch(
-    lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)
+Create a trainer that takes `train_program` as input and specify optimizer function.
+
+```python
+trainer = fluid.Trainer(
+    train_func=partial(train_program, word_dict),
+    place=place,
+    optimizer_func=optimizer_func)
 ```
 
-`feeding` is devoted to specifying the correspondence between each yield record and `paddle.layer.data`. For instance, the first column of data generated by `paddle.dataset.imdb.train()` corresponds to `word` feature.
+### Feeding Data
+
+`feed_order` is devoted to specifying the correspondence between each yield record and `paddle.layer.data`. For instance, the first column of data generated by `imdb.train` corresponds to `words`.
 
 ```python
-feeding = {'word': 0, 'label': 1}
+feed_order = ['words', 'label']
 ```
 
-Callback function `event_handler` will be invoked to track training progress when a pre-defined event happens.
+### Event Handler
+
+Callback function `event_handler` will be called during training when a pre-defined event happens.
+For example, we can check the cost by `trainer.test` when `EndStepEvent` occurs
 
 ```python
+# Specify the directory path to save the parameters
+params_dirname = "understand_sentiment_conv.inference.model"
+
 def event_handler(event):
-    if isinstance(event, paddle.event.EndIteration):
-        if event.batch_id % 100 == 0:
-            print "\nPass %d, Batch %d, Cost %f, %s" % (
-                event.pass_id, event.batch_id, event.cost, event.metrics)
-        else:
-            sys.stdout.write('.')
-            sys.stdout.flush()
-    if isinstance(event, paddle.event.EndPass):
-        with open('./params_pass_%d.tar' % event.pass_id, 'w') as f:
-            trainer.save_parameter_to_tar(f)
-
-        result = trainer.test(reader=test_reader, feeding=feeding)
-        print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
+    if isinstance(event, fluid.EndStepEvent):
+        print("Step {0}, Epoch {1} Metrics {2}".format(
+                event.step, event.epoch, map(np.array, event.metrics)))
+
+        if event.step == 10:
+            trainer.save_params(params_dirname)
+            trainer.stop()
 ```
 
-Finally, we can invoke `trainer.train` to start training:
+### Training
+
+Finally, we invoke `trainer.train` to start training with `num_epochs` and other parameters.
 
 ```python
 trainer.train(
-    reader=train_reader,
+    num_epochs=1,
     event_handler=event_handler,
-    feeding=feeding,
-    num_passes=10)
+    reader=train_reader,
+    feed_order=feed_order)
 ```
 
+## Inference
+
+### Create Inferencer
+
+Initialize Inferencer with `inference_program` and `params_dirname` which is where we save params from training.
+
+```python
+inferencer = fluid.Inferencer(
+        infer_func=partial(inference_program, word_dict),
+        param_path=params_dirname,
+        place=place)
+```
+
+### Infer
+
+Now we can infer with inputs that we provide in `feed_order` during training.
+
+```python
+lod = [[3, 4, 2]]
+base_shape = [1]
+# The range of random integers is [low, high]
+tensor_words = fluid.create_random_int_lodtensor(
+    lod, base_shape, place, low=0, high=len(word_dict) - 1)
+results = inferencer.infer({'words': tensor_words})
+print("infer results: ", results)
+
+```
 
 ## Conclusion
 
diff --git a/06.understand_sentiment/train.py b/06.understand_sentiment/train.py
deleted file mode 100644
index 58f61700..00000000
--- a/06.understand_sentiment/train.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys, os
-import paddle.v2 as paddle
-
-with_gpu = os.getenv('WITH_GPU', '0') != '0'
-
-
-def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
-    data = paddle.layer.data("word",
-                             paddle.data_type.integer_value_sequence(input_dim))
-    emb = paddle.layer.embedding(input=data, size=emb_dim)
-    conv_3 = paddle.networks.sequence_conv_pool(
-        input=emb, context_len=3, hidden_size=hid_dim)
-    conv_4 = paddle.networks.sequence_conv_pool(
-        input=emb, context_len=4, hidden_size=hid_dim)
-    output = paddle.layer.fc(
-        input=[conv_3, conv_4], size=class_dim, act=paddle.activation.Softmax())
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost, output
-
-
-def stacked_lstm_net(input_dim,
-                     class_dim=2,
-                     emb_dim=128,
-                     hid_dim=512,
-                     stacked_num=3):
-    """
-    A Wrapper for sentiment classification task.
-    This network uses bi-directional recurrent network,
-    consisting three LSTM layers. This configure is referred to
-    the paper as following url, but use fewer layrs.
-        http://www.aclweb.org/anthology/P15-1109
-
-    input_dim: here is word dictionary dimension.
-    class_dim: number of categories.
-    emb_dim: dimension of word embedding.
-    hid_dim: dimension of hidden layer.
-    stacked_num: number of stacked lstm-hidden layer.
-    """
-    assert stacked_num % 2 == 1
-
-    fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
-    lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
-    para_attr = [fc_para_attr, lstm_para_attr]
-    bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
-    relu = paddle.activation.Relu()
-    linear = paddle.activation.Linear()
-
-    data = paddle.layer.data("word",
-                             paddle.data_type.integer_value_sequence(input_dim))
-    emb = paddle.layer.embedding(input=data, size=emb_dim)
-
-    fc1 = paddle.layer.fc(
-        input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
-    lstm1 = paddle.layer.lstmemory(input=fc1, act=relu, bias_attr=bias_attr)
-
-    inputs = [fc1, lstm1]
-    for i in range(2, stacked_num + 1):
-        fc = paddle.layer.fc(
-            input=inputs,
-            size=hid_dim,
-            act=linear,
-            param_attr=para_attr,
-            bias_attr=bias_attr)
-        lstm = paddle.layer.lstmemory(
-            input=fc, reverse=(i % 2) == 0, act=relu, bias_attr=bias_attr)
-        inputs = [fc, lstm]
-
-    fc_last = paddle.layer.pooling(
-        input=inputs[0], pooling_type=paddle.pooling.Max())
-    lstm_last = paddle.layer.pooling(
-        input=inputs[1], pooling_type=paddle.pooling.Max())
-    output = paddle.layer.fc(
-        input=[fc_last, lstm_last],
-        size=class_dim,
-        act=paddle.activation.Softmax(),
-        bias_attr=bias_attr,
-        param_attr=para_attr)
-
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost, output
-
-
-if __name__ == '__main__':
-    # init
-    paddle.init(use_gpu=with_gpu)
-
-    #data
-    print 'load dictionary...'
-    word_dict = paddle.dataset.imdb.word_dict()
-    dict_dim = len(word_dict)
-    class_dim = 2
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=1000),
-        batch_size=100)
-    test_reader = paddle.batch(
-        paddle.dataset.imdb.test(word_dict), batch_size=100)
-
-    feeding = {'word': 0, 'label': 1}
-
-    # network config
-    # Please choose the way to build the network
-    # by uncommenting the corresponding line.
-    [cost, output] = convolution_net(dict_dim, class_dim=class_dim)
-    # [cost, output] = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
-
-    # create parameters
-    parameters = paddle.parameters.create(cost)
-
-    # create optimizer
-    adam_optimizer = paddle.optimizer.Adam(
-        learning_rate=2e-3,
-        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
-        model_average=paddle.optimizer.ModelAverage(average_window=0.5))
-
-    # create trainer
-    trainer = paddle.trainer.SGD(
-        cost=cost, parameters=parameters, update_equation=adam_optimizer)
-
-    # End batch and end pass event handler
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                print "\nPass %d, Batch %d, Cost %f, %s" % (
-                    event.pass_id, event.batch_id, event.cost, event.metrics)
-            else:
-                sys.stdout.write('.')
-                sys.stdout.flush()
-        if isinstance(event, paddle.event.EndPass):
-            with open('./params_pass_%d.tar' % event.pass_id, 'w') as f:
-                trainer.save_parameter_to_tar(f)
-
-            result = trainer.test(reader=test_reader, feeding=feeding)
-            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
-
-    # Save the inference topology to protobuf.
-    inference_topology = paddle.topology.Topology(layers=output)
-    with open("./inference_topology.pkl", 'wb') as f:
-        inference_topology.serialize_for_inference(f)
-
-    trainer.train(
-        reader=train_reader,
-        event_handler=event_handler,
-        feeding=feeding,
-        num_passes=20)
diff --git a/06.understand_sentiment/train_conv.py b/06.understand_sentiment/train_conv.py
index b7bf4a85..860a8011 100644
--- a/06.understand_sentiment/train_conv.py
+++ b/06.understand_sentiment/train_conv.py
@@ -14,6 +14,7 @@
 
 from __future__ import print_function
 
+import os
 import paddle
 import paddle.fluid as fluid
 from functools import partial
@@ -68,50 +69,50 @@ def optimizer_func():
 
 
 def train(use_cuda, train_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    import time
 
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    print("Loading IMDB word dict....")
     word_dict = paddle.dataset.imdb.word_dict()
+
+    print("Reading training data....")
+
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.imdb.train(word_dict), buf_size=25000),
+        batch_size=BATCH_SIZE)
+
+    print("Reading testing data....")
+    test_reader = paddle.batch(
+        paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
+
     trainer = fluid.Trainer(
         train_func=partial(train_program, word_dict),
         place=place,
         optimizer_func=optimizer_func)
 
+    feed_order = ['words', 'label']
+
     def event_handler(event):
-        if isinstance(event, fluid.EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
+        if isinstance(event, fluid.EndStepEvent):
             avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['words', 'label'])
+                reader=test_reader, feed_order=feed_order)
 
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
+            print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
+                event.step, avg_cost, acc))
 
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-                trainer.stop()
-
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, fluid.EndStepEvent):
             print("Step {0}, Epoch {1} Metrics {2}".format(
                 event.step, event.epoch, map(np.array, event.metrics)))
-            if event.step == 1:  # Run 2 iterations to speed CI
+
+            if event.step == 10:  # Adjust this number for accuracy
                 trainer.save_params(params_dirname)
                 trainer.stop()
 
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=25000),
-        batch_size=BATCH_SIZE)
-
     trainer.train(
         num_epochs=1,
         event_handler=event_handler,
         reader=train_reader,
-        feed_order=['words', 'label'])
+        feed_order=feed_order)
 
 
 def infer(use_cuda, inference_program, params_dirname=None):
@@ -151,5 +152,5 @@ def main(use_cuda):
 
 
 if __name__ == '__main__':
-    for use_cuda in (False, True):
-        main(use_cuda=use_cuda)
+    use_cuda = os.getenv('WITH_GPU', '0') != '0'
+    main(use_cuda)
diff --git a/06.understand_sentiment/train_dyn_rnn.py b/06.understand_sentiment/train_dyn_rnn.py
index aa7c567b..e139d670 100644
--- a/06.understand_sentiment/train_dyn_rnn.py
+++ b/06.understand_sentiment/train_dyn_rnn.py
@@ -14,6 +14,7 @@
 
 from __future__ import print_function
 
+import os
 import paddle
 import paddle.fluid as fluid
 from functools import partial
@@ -167,5 +168,5 @@ def main(use_cuda):
 
 
 if __name__ == '__main__':
-    for use_cuda in (False, True):
-        main(use_cuda=use_cuda)
+    use_cuda = os.getenv('WITH_GPU', '0') != '0'
+    main(use_cuda)
diff --git a/06.understand_sentiment/train_stacked_lstm.py b/06.understand_sentiment/train_stacked_lstm.py
index 923e7107..0a3b2015 100644
--- a/06.understand_sentiment/train_stacked_lstm.py
+++ b/06.understand_sentiment/train_stacked_lstm.py
@@ -14,6 +14,7 @@
 
 from __future__ import print_function
 
+import os
 import paddle
 import paddle.fluid as fluid
 from functools import partial
@@ -158,5 +159,5 @@ def main(use_cuda):
 
 
 if __name__ == '__main__':
-    for use_cuda in (False, True):
-        main(use_cuda=use_cuda)
+    use_cuda = os.getenv('WITH_GPU', '0') != '0'
+    main(use_cuda)

From cec2c1da543d93754a575c6b28f718c2d0c50a02 Mon Sep 17 00:00:00 2001
From: Nicky <nicky@baidu.com>
Date: Wed, 13 Jun 2018 15:07:25 -0700
Subject: [PATCH 3/4] clean up

---
 06.understand_sentiment/train_dyn_rnn.py      | 1 +
 06.understand_sentiment/train_stacked_lstm.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/06.understand_sentiment/train_dyn_rnn.py b/06.understand_sentiment/train_dyn_rnn.py
index e139d670..f548a27f 100644
--- a/06.understand_sentiment/train_dyn_rnn.py
+++ b/06.understand_sentiment/train_dyn_rnn.py
@@ -24,6 +24,7 @@
 EMB_DIM = 128
 BATCH_SIZE = 128
 LSTM_SIZE = 128
+USE_GPU = False
 
 
 def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size):
diff --git a/06.understand_sentiment/train_stacked_lstm.py b/06.understand_sentiment/train_stacked_lstm.py
index 0a3b2015..104215d7 100644
--- a/06.understand_sentiment/train_stacked_lstm.py
+++ b/06.understand_sentiment/train_stacked_lstm.py
@@ -25,6 +25,7 @@
 HID_DIM = 512
 STACKED_NUM = 3
 BATCH_SIZE = 128
+USE_GPU = False
 
 
 def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):

From 89d558cc5166e7413e1736cf84d8865b5dc7af1c Mon Sep 17 00:00:00 2001
From: Nicky <nicky@baidu.com>
Date: Thu, 14 Jun 2018 16:32:58 -0700
Subject: [PATCH 4/4] Add inference example instead of random data and update
 per team's comments

---
 06.understand_sentiment/README.md             | 37 +++++++--
 06.understand_sentiment/index.html            | 37 +++++++--
 06.understand_sentiment/train_conv.py         | 46 +++++++----
 06.understand_sentiment/train_dyn_rnn.py      | 82 +++++++++++--------
 06.understand_sentiment/train_stacked_lstm.py | 82 +++++++++++--------
 5 files changed, 178 insertions(+), 106 deletions(-)

diff --git a/06.understand_sentiment/README.md b/06.understand_sentiment/README.md
index 6d3a6657..f427f134 100644
--- a/06.understand_sentiment/README.md
+++ b/06.understand_sentiment/README.md
@@ -183,7 +183,7 @@ The above stacked bidirectional LSTM network extracts high-level features and ma
 
 To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. In below steps, we will go with `convolution_net`.
 
-Next we define a `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`.
+Next we define an `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`.
 
 ```python
 def inference_program(word_dict):
@@ -200,6 +200,7 @@ Also define `optimizer_func` to specify the optimizer.
 
 
 In the context of supervised learning, labels of the training set are defined in `paddle.layer.data` too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier.
+First result that returns from the list must be cost.
 
 ```python
 def train_program(word_dict):
@@ -309,18 +310,38 @@ inferencer = fluid.Inferencer(
         place=place)
 ```
 
+### Create Lod Tensor with test data
+
+To do inference, we pick 3 potential reviews out of our mind as testing data. Feel free to modify any of them.
+We map each word in the reviews to id from `word_dict`, replaced by 'unknown' if the word is not in `word_dict`.
+Then we create lod data with the id list and use `create_lod_tensor` to create lod tensor.
+
+```python
+reviews_str = [
+    'read the book forget the movie', 'this is a great movie', 'this is very bad'
+]
+reviews = [c.split() for c in reviews_str]
+
+UNK = word_dict['<unk>']
+lod = []
+for c in reviews:
+    lod.append([word_dict.get(words, UNK) for words in c])
+
+base_shape = [[len(c) for c in lod]]
+
+tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
+```
+
 ### Infer
 
-Now we can infer with inputs that we provide in `feed_order` during training.
+Now we can infer and predict probability of positive or negative from each review above.
 
 ```python
-lod = [[3, 4, 2]]
-base_shape = [1]
-# The range of random integers is [low, high]
-tensor_words = fluid.create_random_int_lodtensor(
-    lod, base_shape, place, low=0, high=len(word_dict) - 1)
 results = inferencer.infer({'words': tensor_words})
-print("infer results: ", results)
+
+for i, r in enumerate(results[0]):
+    print("Predict probability of ", r[0], " to be positive and ", r[1], " to be negative for review \'", reviews_str[i], "\'")
+
 
 ```
 
diff --git a/06.understand_sentiment/index.html b/06.understand_sentiment/index.html
index b9f19828..54f0a5b2 100644
--- a/06.understand_sentiment/index.html
+++ b/06.understand_sentiment/index.html
@@ -225,7 +225,7 @@
 
 To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. In below steps, we will go with `convolution_net`.
 
-Next we define a `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`.
+Next we define an `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`.
 
 ```python
 def inference_program(word_dict):
@@ -242,6 +242,7 @@
 
 
 In the context of supervised learning, labels of the training set are defined in `paddle.layer.data` too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier.
+First result that returns from the list must be cost.
 
 ```python
 def train_program(word_dict):
@@ -351,18 +352,38 @@
         place=place)
 ```
 
+### Create Lod Tensor with test data
+
+To do inference, we pick 3 potential reviews out of our mind as testing data. Feel free to modify any of them.
+We map each word in the reviews to id from `word_dict`, replaced by 'unknown' if the word is not in `word_dict`.
+Then we create lod data with the id list and use `create_lod_tensor` to create lod tensor.
+
+```python
+reviews_str = [
+    'read the book forget the movie', 'this is a great movie', 'this is very bad'
+]
+reviews = [c.split() for c in reviews_str]
+
+UNK = word_dict['<unk>']
+lod = []
+for c in reviews:
+    lod.append([word_dict.get(words, UNK) for words in c])
+
+base_shape = [[len(c) for c in lod]]
+
+tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
+```
+
 ### Infer
 
-Now we can infer with inputs that we provide in `feed_order` during training.
+Now we can infer and predict probability of positive or negative from each review above.
 
 ```python
-lod = [[3, 4, 2]]
-base_shape = [1]
-# The range of random integers is [low, high]
-tensor_words = fluid.create_random_int_lodtensor(
-    lod, base_shape, place, low=0, high=len(word_dict) - 1)
 results = inferencer.infer({'words': tensor_words})
-print("infer results: ", results)
+
+for i, r in enumerate(results[0]):
+    print("Predict probability of ", r[0], " to be positive and ", r[1], " to be negative for review \'", reviews_str[i], "\'")
+
 
 ```
 
diff --git a/06.understand_sentiment/train_conv.py b/06.understand_sentiment/train_conv.py
index 860a8011..61fe18e4 100644
--- a/06.understand_sentiment/train_conv.py
+++ b/06.understand_sentiment/train_conv.py
@@ -69,14 +69,11 @@ def optimizer_func():
 
 
 def train(use_cuda, train_program, params_dirname):
-    import time
-
     place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
     print("Loading IMDB word dict....")
     word_dict = paddle.dataset.imdb.word_dict()
 
     print("Reading training data....")
-
     train_reader = paddle.batch(
         paddle.reader.shuffle(
             paddle.dataset.imdb.train(word_dict), buf_size=25000),
@@ -95,18 +92,18 @@ def train(use_cuda, train_program, params_dirname):
 
     def event_handler(event):
         if isinstance(event, fluid.EndStepEvent):
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=feed_order)
+            if event.step % 10 == 0:
+                avg_cost, acc = trainer.test(
+                    reader=test_reader, feed_order=feed_order)
 
-            print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                event.step, avg_cost, acc))
+                print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
+                    event.step, avg_cost, acc))
 
-            print("Step {0}, Epoch {1} Metrics {2}".format(
-                event.step, event.epoch, map(np.array, event.metrics)))
+                print("Step {0}, Epoch {1} Metrics {2}".format(
+                    event.step, event.epoch, map(np.array, event.metrics)))
 
-            if event.step == 10:  # Adjust this number for accuracy
-                trainer.save_params(params_dirname)
-                trainer.stop()
+        elif isinstance(event, fluid.EndEpochEvent):
+            trainer.save_params(params_dirname)
 
     trainer.train(
         num_epochs=1,
@@ -134,13 +131,26 @@ def infer(use_cuda, inference_program, params_dirname=None):
     # element (word). Hence the LoDTensor will hold data for three sentences of 
     # length 3, 4 and 2, respectively. 
     # Note that lod info should be a list of lists.
-    lod = [[3, 4, 2]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    tensor_words = fluid.create_random_int_lodtensor(
-        lod, base_shape, place, low=0, high=len(word_dict) - 1)
+
+    reviews_str = [
+        'read the book forget the movie', 'this is a great movie',
+        'this is very bad'
+    ]
+    reviews = [c.split() for c in reviews_str]
+
+    UNK = word_dict['<unk>']
+    lod = []
+    for c in reviews:
+        lod.append([word_dict.get(words, UNK) for words in c])
+
+    base_shape = [[len(c) for c in lod]]
+
+    tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
     results = inferencer.infer({'words': tensor_words})
-    print("infer results: ", results)
+
+    for i, r in enumerate(results[0]):
+        print("Predict probability of ", r[0], " to be positive and ", r[1],
+              " to be negative for review \'", reviews_str[i], "\'")
 
 
 def main(use_cuda):
diff --git a/06.understand_sentiment/train_dyn_rnn.py b/06.understand_sentiment/train_dyn_rnn.py
index f548a27f..ef3be26f 100644
--- a/06.understand_sentiment/train_dyn_rnn.py
+++ b/06.understand_sentiment/train_dyn_rnn.py
@@ -87,49 +87,46 @@ def optimizer_func():
 
 def train(use_cuda, train_program, params_dirname):
     place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-
+    print("Loading IMDB word dict....")
     word_dict = paddle.dataset.imdb.word_dict()
+
+    print("Reading training data....")
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.imdb.train(word_dict), buf_size=25000),
+        batch_size=BATCH_SIZE)
+
+    print("Reading testing data....")
+    test_reader = paddle.batch(
+        paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
+
     trainer = fluid.Trainer(
         train_func=partial(train_program, word_dict),
         place=place,
         optimizer_func=optimizer_func)
 
+    feed_order = ['words', 'label']
+
     def event_handler(event):
-        if isinstance(event, fluid.EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['words', 'label'])
-
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-                trainer.stop()
-
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, fluid.EndStepEvent):
-            print("Step {0}, Epoch {1} Metrics {2}".format(
-                event.step, event.epoch, map(np.array, event.metrics)))
-            if event.step == 1:  # Run 2 iterations to speed CI
-                trainer.save_params(params_dirname)
-                trainer.stop()
+        if isinstance(event, fluid.EndStepEvent):
+            if event.step % 10 == 0:
+                avg_cost, acc = trainer.test(
+                    reader=test_reader, feed_order=feed_order)
 
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=25000),
-        batch_size=BATCH_SIZE)
+                print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
+                    event.step, avg_cost, acc))
+
+                print("Step {0}, Epoch {1} Metrics {2}".format(
+                    event.step, event.epoch, map(np.array, event.metrics)))
+
+        elif isinstance(event, fluid.EndEpochEvent):
+            trainer.save_params(params_dirname)
 
     trainer.train(
         num_epochs=1,
         event_handler=event_handler,
         reader=train_reader,
-        feed_order=['words', 'label'])
+        feed_order=feed_order)
 
 
 def infer(use_cuda, inference_program, params_dirname=None):
@@ -151,13 +148,26 @@ def infer(use_cuda, inference_program, params_dirname=None):
     # element (word). Hence the LoDTensor will hold data for three sentences of 
     # length 3, 4 and 2, respectively. 
     # Note that lod info should be a list of lists.
-    lod = [[3, 4, 2]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    tensor_words = fluid.create_random_int_lodtensor(
-        lod, base_shape, place, low=0, high=len(word_dict) - 1)
+
+    reviews_str = [
+        'read the book forget the movie', 'this is a great movie',
+        'this is very bad'
+    ]
+    reviews = [c.split() for c in reviews_str]
+
+    UNK = word_dict['<unk>']
+    lod = []
+    for c in reviews:
+        lod.append([word_dict.get(words, UNK) for words in c])
+
+    base_shape = [[len(c) for c in lod]]
+
+    tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
     results = inferencer.infer({'words': tensor_words})
-    print("infer results: ", results)
+
+    for i, r in enumerate(results[0]):
+        print("Predict probability of ", r[0], " to be positive and ", r[1],
+              " to be negative for review \'", reviews_str[i], "\'")
 
 
 def main(use_cuda):
diff --git a/06.understand_sentiment/train_stacked_lstm.py b/06.understand_sentiment/train_stacked_lstm.py
index 104215d7..351994f8 100644
--- a/06.understand_sentiment/train_stacked_lstm.py
+++ b/06.understand_sentiment/train_stacked_lstm.py
@@ -78,49 +78,46 @@ def optimizer_func():
 
 def train(use_cuda, train_program, params_dirname):
     place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-
+    print("Loading IMDB word dict....")
     word_dict = paddle.dataset.imdb.word_dict()
+
+    print("Reading training data....")
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.imdb.train(word_dict), buf_size=25000),
+        batch_size=BATCH_SIZE)
+
+    print("Reading testing data....")
+    test_reader = paddle.batch(
+        paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
+
     trainer = fluid.Trainer(
         train_func=partial(train_program, word_dict),
         place=place,
         optimizer_func=optimizer_func)
 
+    feed_order = ['words', 'label']
+
     def event_handler(event):
-        if isinstance(event, fluid.EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['words', 'label'])
-
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-                trainer.stop()
-
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, fluid.EndStepEvent):
-            print("Step {0}, Epoch {1} Metrics {2}".format(
-                event.step, event.epoch, map(np.array, event.metrics)))
-            if event.step == 1:  # Run 2 iterations to speed CI
-                trainer.save_params(params_dirname)
-                trainer.stop()
+        if isinstance(event, fluid.EndStepEvent):
+            if event.step % 10 == 0:
+                avg_cost, acc = trainer.test(
+                    reader=test_reader, feed_order=feed_order)
 
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=25000),
-        batch_size=BATCH_SIZE)
+                print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
+                    event.step, avg_cost, acc))
+
+                print("Step {0}, Epoch {1} Metrics {2}".format(
+                    event.step, event.epoch, map(np.array, event.metrics)))
+
+        elif isinstance(event, fluid.EndEpochEvent):
+            trainer.save_params(params_dirname)
 
     trainer.train(
         num_epochs=1,
         event_handler=event_handler,
         reader=train_reader,
-        feed_order=['words', 'label'])
+        feed_order=feed_order)
 
 
 def infer(use_cuda, inference_program, params_dirname=None):
@@ -142,13 +139,26 @@ def infer(use_cuda, inference_program, params_dirname=None):
     # element (word). Hence the LoDTensor will hold data for three sentences of 
     # length 3, 4 and 2, respectively. 
     # Note that lod info should be a list of lists.
-    lod = [[3, 4, 2]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    tensor_words = fluid.create_random_int_lodtensor(
-        lod, base_shape, place, low=0, high=len(word_dict) - 1)
+
+    reviews_str = [
+        'read the book forget the movie', 'this is a great movie',
+        'this is very bad'
+    ]
+    reviews = [c.split() for c in reviews_str]
+
+    UNK = word_dict['<unk>']
+    lod = []
+    for c in reviews:
+        lod.append([word_dict.get(words, UNK) for words in c])
+
+    base_shape = [[len(c) for c in lod]]
+
+    tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
     results = inferencer.infer({'words': tensor_words})
-    print("infer results: ", results)
+
+    for i, r in enumerate(results[0]):
+        print("Predict probability of ", r[0], " to be positive and ", r[1],
+              " to be negative for review \'", reviews_str[i], "\'")
 
 
 def main(use_cuda):