From 016995e91bcba6c90384a4592f5866d7248b2eea Mon Sep 17 00:00:00 2001 From: sidgoyal78 Date: Thu, 7 Jun 2018 10:39:32 -0700 Subject: [PATCH 1/4] Add python files and edit readme --- 06.understand_sentiment/README.md | 125 +++++-------- 06.understand_sentiment/index.html | 125 +++++-------- 06.understand_sentiment/train_conv.py | 155 ++++++++++++++++ 06.understand_sentiment/train_dyn_rnn.py | 171 ++++++++++++++++++ 06.understand_sentiment/train_stacked_lstm.py | 162 +++++++++++++++++ 5 files changed, 570 insertions(+), 168 deletions(-) create mode 100644 06.understand_sentiment/train_conv.py create mode 100644 06.understand_sentiment/train_dyn_rnn.py create mode 100644 06.understand_sentiment/train_stacked_lstm.py diff --git a/06.understand_sentiment/README.md b/06.understand_sentiment/README.md index f682f30f..c43b811c 100644 --- a/06.understand_sentiment/README.md +++ b/06.understand_sentiment/README.md @@ -102,14 +102,11 @@ After issuing a command `python train.py`, training will start immediately. The ### Initialize PaddlePaddle -We must import and initialize PaddlePaddle (enable/disable GPU, set the number of trainers, etc). +We must import and initialize Paddle. ```python -import sys -import paddle.v2 as paddle - -# PaddlePaddle init -paddle.init(use_gpu=False, trainer_count=1) +import paddle +import paddle.fluid as fluid ``` As alluded to in section [Model Overview](#model-overview), here we provide the implementations of both Text CNN and Stacked-bidirectional LSTM models. @@ -121,20 +118,26 @@ We create a neural network `convolution_net` as the following snippet code. Note: `paddle.networks.sequence_conv_pool` includes both convolution and pooling layer operations. ```python -def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128): - data = paddle.layer.data("word", - paddle.data_type.integer_value_sequence(input_dim)) - emb = paddle.layer.embedding(input=data, size=emb_dim) - conv_3 = paddle.networks.sequence_conv_pool( - input=emb, context_len=3, hidden_size=hid_dim) - conv_4 = paddle.networks.sequence_conv_pool( - input=emb, context_len=4, hidden_size=hid_dim) - output = paddle.layer.fc(input=[conv_3, conv_4], - size=class_dim, - act=paddle.activation.Softmax()) - lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) - cost = paddle.layer.classification_cost(input=output, label=lbl) - return cost, output +def convolution_net(data, input_dim, class_dim=2, emb_dim=128, hid_dim=128): + emb = fluid.layers.embedding( + input=data, size=[input_dim, emb_dim], is_sparse=True) + conv_3 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=3, + act="tanh", + pool_type="sqrt") + conv_4 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=4, + act="tanh", + pool_type="sqrt") + prediction = fluid.layers.fc(input=[conv_3, conv_4], + size=class_dim, + act="softmax") + return prediction + ``` 1. Define input data and its dimension @@ -154,70 +157,31 @@ def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128): We create a neural network `stacked_lstm_net` as below. ```python -def stacked_lstm_net(input_dim, - class_dim=2, - emb_dim=128, - hid_dim=512, - stacked_num=3): - """ - A Wrapper for sentiment classification task. - This network uses a bi-directional recurrent network, - consisting of three LSTM layers. This configuration is - motivated from the following paper, but uses few layers. - http://www.aclweb.org/anthology/P15-1109 - input_dim: here is word dictionary dimension. - class_dim: number of categories. - emb_dim: dimension of word embedding. - hid_dim: dimension of hidden layer. - stacked_num: number of stacked lstm-hidden layer. - """ +def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num): assert stacked_num % 2 == 1 - fc_para_attr = paddle.attr.Param(learning_rate=1e-3) - lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.) - para_attr = [fc_para_attr, lstm_para_attr] - bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.) - relu = paddle.activation.Relu() - linear = paddle.activation.Linear() - - data = paddle.layer.data("word", - paddle.data_type.integer_value_sequence(input_dim)) - emb = paddle.layer.embedding(input=data, size=emb_dim) + emb = fluid.layers.embedding( + input=data, size=[input_dim, emb_dim], is_sparse=True) - fc1 = paddle.layer.fc(input=emb, - size=hid_dim, - act=linear, - bias_attr=bias_attr) - lstm1 = paddle.layer.lstmemory( - input=fc1, act=relu, bias_attr=bias_attr) + fc1 = fluid.layers.fc(input=emb, size=hid_dim) + lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim) inputs = [fc1, lstm1] + for i in range(2, stacked_num + 1): - fc = paddle.layer.fc(input=inputs, - size=hid_dim, - act=linear, - param_attr=para_attr, - bias_attr=bias_attr) - lstm = paddle.layer.lstmemory( - input=fc, - reverse=(i % 2) == 0, - act=relu, - bias_attr=bias_attr) + fc = fluid.layers.fc(input=inputs, size=hid_dim) + lstm, cell = fluid.layers.dynamic_lstm( + input=fc, size=hid_dim, is_reverse=(i % 2) == 0) inputs = [fc, lstm] - fc_last = paddle.layer.pooling( - input=inputs[0], pooling_type=paddle.pooling.Max()) - lstm_last = paddle.layer.pooling( - input=inputs[1], pooling_type=paddle.pooling.Max()) - output = paddle.layer.fc(input=[fc_last, lstm_last], - size=class_dim, - act=paddle.activation.Softmax(), - bias_attr=bias_attr, - param_attr=para_attr) - - lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) - cost = paddle.layer.classification_cost(input=output, label=lbl) - return cost, output + fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max') + lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max') + + prediction = fluid.layers.fc(input=[fc_last, lstm_last], + size=class_dim, + act='softmax') + return prediction + ``` 1. Define input data and its dimension @@ -236,14 +200,7 @@ def stacked_lstm_net(input_dim, To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. ```python -word_dict = paddle.dataset.imdb.word_dict() -dict_dim = len(word_dict) -class_dim = 2 - -# option 1 -[cost, output] = convolution_net(dict_dim, class_dim=class_dim) -# option 2 -# [cost, output] = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3) +TODO ``` ## Model Training diff --git a/06.understand_sentiment/index.html b/06.understand_sentiment/index.html index 76b4eb38..1304bba1 100644 --- a/06.understand_sentiment/index.html +++ b/06.understand_sentiment/index.html @@ -144,14 +144,11 @@ ### Initialize PaddlePaddle -We must import and initialize PaddlePaddle (enable/disable GPU, set the number of trainers, etc). +We must import and initialize Paddle. ```python -import sys -import paddle.v2 as paddle - -# PaddlePaddle init -paddle.init(use_gpu=False, trainer_count=1) +import paddle +import paddle.fluid as fluid ``` As alluded to in section [Model Overview](#model-overview), here we provide the implementations of both Text CNN and Stacked-bidirectional LSTM models. @@ -163,20 +160,26 @@ Note: `paddle.networks.sequence_conv_pool` includes both convolution and pooling layer operations. ```python -def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128): - data = paddle.layer.data("word", - paddle.data_type.integer_value_sequence(input_dim)) - emb = paddle.layer.embedding(input=data, size=emb_dim) - conv_3 = paddle.networks.sequence_conv_pool( - input=emb, context_len=3, hidden_size=hid_dim) - conv_4 = paddle.networks.sequence_conv_pool( - input=emb, context_len=4, hidden_size=hid_dim) - output = paddle.layer.fc(input=[conv_3, conv_4], - size=class_dim, - act=paddle.activation.Softmax()) - lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) - cost = paddle.layer.classification_cost(input=output, label=lbl) - return cost, output +def convolution_net(data, input_dim, class_dim=2, emb_dim=128, hid_dim=128): + emb = fluid.layers.embedding( + input=data, size=[input_dim, emb_dim], is_sparse=True) + conv_3 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=3, + act="tanh", + pool_type="sqrt") + conv_4 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=4, + act="tanh", + pool_type="sqrt") + prediction = fluid.layers.fc(input=[conv_3, conv_4], + size=class_dim, + act="softmax") + return prediction + ``` 1. Define input data and its dimension @@ -196,70 +199,31 @@ We create a neural network `stacked_lstm_net` as below. ```python -def stacked_lstm_net(input_dim, - class_dim=2, - emb_dim=128, - hid_dim=512, - stacked_num=3): - """ - A Wrapper for sentiment classification task. - This network uses a bi-directional recurrent network, - consisting of three LSTM layers. This configuration is - motivated from the following paper, but uses few layers. - http://www.aclweb.org/anthology/P15-1109 - input_dim: here is word dictionary dimension. - class_dim: number of categories. - emb_dim: dimension of word embedding. - hid_dim: dimension of hidden layer. - stacked_num: number of stacked lstm-hidden layer. - """ +def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num): assert stacked_num % 2 == 1 - fc_para_attr = paddle.attr.Param(learning_rate=1e-3) - lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.) - para_attr = [fc_para_attr, lstm_para_attr] - bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.) - relu = paddle.activation.Relu() - linear = paddle.activation.Linear() - - data = paddle.layer.data("word", - paddle.data_type.integer_value_sequence(input_dim)) - emb = paddle.layer.embedding(input=data, size=emb_dim) + emb = fluid.layers.embedding( + input=data, size=[input_dim, emb_dim], is_sparse=True) - fc1 = paddle.layer.fc(input=emb, - size=hid_dim, - act=linear, - bias_attr=bias_attr) - lstm1 = paddle.layer.lstmemory( - input=fc1, act=relu, bias_attr=bias_attr) + fc1 = fluid.layers.fc(input=emb, size=hid_dim) + lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim) inputs = [fc1, lstm1] + for i in range(2, stacked_num + 1): - fc = paddle.layer.fc(input=inputs, - size=hid_dim, - act=linear, - param_attr=para_attr, - bias_attr=bias_attr) - lstm = paddle.layer.lstmemory( - input=fc, - reverse=(i % 2) == 0, - act=relu, - bias_attr=bias_attr) + fc = fluid.layers.fc(input=inputs, size=hid_dim) + lstm, cell = fluid.layers.dynamic_lstm( + input=fc, size=hid_dim, is_reverse=(i % 2) == 0) inputs = [fc, lstm] - fc_last = paddle.layer.pooling( - input=inputs[0], pooling_type=paddle.pooling.Max()) - lstm_last = paddle.layer.pooling( - input=inputs[1], pooling_type=paddle.pooling.Max()) - output = paddle.layer.fc(input=[fc_last, lstm_last], - size=class_dim, - act=paddle.activation.Softmax(), - bias_attr=bias_attr, - param_attr=para_attr) - - lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) - cost = paddle.layer.classification_cost(input=output, label=lbl) - return cost, output + fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max') + lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max') + + prediction = fluid.layers.fc(input=[fc_last, lstm_last], + size=class_dim, + act='softmax') + return prediction + ``` 1. Define input data and its dimension @@ -278,14 +242,7 @@ To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. ```python -word_dict = paddle.dataset.imdb.word_dict() -dict_dim = len(word_dict) -class_dim = 2 - -# option 1 -[cost, output] = convolution_net(dict_dim, class_dim=class_dim) -# option 2 -# [cost, output] = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3) +TODO ``` ## Model Training diff --git a/06.understand_sentiment/train_conv.py b/06.understand_sentiment/train_conv.py new file mode 100644 index 00000000..b7bf4a85 --- /dev/null +++ b/06.understand_sentiment/train_conv.py @@ -0,0 +1,155 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from functools import partial +import numpy as np + +CLASS_DIM = 2 +EMB_DIM = 128 +HID_DIM = 512 +BATCH_SIZE = 128 + + +def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim): + emb = fluid.layers.embedding( + input=data, size=[input_dim, emb_dim], is_sparse=True) + conv_3 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=3, + act="tanh", + pool_type="sqrt") + conv_4 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=4, + act="tanh", + pool_type="sqrt") + prediction = fluid.layers.fc( + input=[conv_3, conv_4], size=class_dim, act="softmax") + return prediction + + +def inference_program(word_dict): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + + dict_dim = len(word_dict) + net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM) + return net + + +def train_program(word_dict): + prediction = inference_program(word_dict) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(cost) + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return [avg_cost, accuracy] + + +def optimizer_func(): + return fluid.optimizer.Adagrad(learning_rate=0.002) + + +def train(use_cuda, train_program, params_dirname): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + + word_dict = paddle.dataset.imdb.word_dict() + trainer = fluid.Trainer( + train_func=partial(train_program, word_dict), + place=place, + optimizer_func=optimizer_func) + + def event_handler(event): + if isinstance(event, fluid.EndEpochEvent): + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) + avg_cost, acc = trainer.test( + reader=test_reader, feed_order=['words', 'label']) + + print("avg_cost: %s" % avg_cost) + print("acc : %s" % acc) + + if acc > 0.2: # Smaller value to increase CI speed + trainer.save_params(params_dirname) + trainer.stop() + + else: + print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + event.epoch + 1, avg_cost, acc)) + if math.isnan(avg_cost): + sys.exit("got NaN loss, training failed.") + elif isinstance(event, fluid.EndStepEvent): + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + if event.step == 1: # Run 2 iterations to speed CI + trainer.save_params(params_dirname) + trainer.stop() + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) + + trainer.train( + num_epochs=1, + event_handler=event_handler, + reader=train_reader, + feed_order=['words', 'label']) + + +def infer(use_cuda, inference_program, params_dirname=None): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + word_dict = paddle.dataset.imdb.word_dict() + + inferencer = fluid.Inferencer( + infer_func=partial(inference_program, word_dict), + param_path=params_dirname, + place=place) + + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + lod = [[3, 4, 2]] + base_shape = [1] + # The range of random integers is [low, high] + tensor_words = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=len(word_dict) - 1) + results = inferencer.infer({'words': tensor_words}) + print("infer results: ", results) + + +def main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + params_dirname = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) + + +if __name__ == '__main__': + for use_cuda in (False, True): + main(use_cuda=use_cuda) diff --git a/06.understand_sentiment/train_dyn_rnn.py b/06.understand_sentiment/train_dyn_rnn.py new file mode 100644 index 00000000..aa7c567b --- /dev/null +++ b/06.understand_sentiment/train_dyn_rnn.py @@ -0,0 +1,171 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from functools import partial +import numpy as np + +CLASS_DIM = 2 +EMB_DIM = 128 +BATCH_SIZE = 128 +LSTM_SIZE = 128 + + +def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size): + emb = fluid.layers.embedding( + input=data, size=[input_dim, emb_dim], is_sparse=True) + sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh') + + rnn = fluid.layers.DynamicRNN() + with rnn.block(): + word = rnn.step_input(sentence) + prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) + prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) + + def gate_common(ipt, hidden, size): + gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) + gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) + return gate0 + gate1 + + forget_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + input_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + output_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + cell_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + + cell = forget_gate * prev_cell + input_gate * cell_gate + hidden = output_gate * fluid.layers.tanh(x=cell) + rnn.update_memory(prev_cell, cell) + rnn.update_memory(prev_hidden, hidden) + rnn.output(hidden) + + last = fluid.layers.sequence_last_step(rnn()) + prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax") + return prediction + + +def inference_program(word_dict): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + + dict_dim = len(word_dict) + pred = dynamic_rnn_lstm(data, dict_dim, CLASS_DIM, EMB_DIM, LSTM_SIZE) + return pred + + +def train_program(word_dict): + prediction = inference_program(word_dict) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(cost) + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return [avg_cost, accuracy] + + +def optimizer_func(): + return fluid.optimizer.Adagrad(learning_rate=0.002) + + +def train(use_cuda, train_program, params_dirname): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + + word_dict = paddle.dataset.imdb.word_dict() + trainer = fluid.Trainer( + train_func=partial(train_program, word_dict), + place=place, + optimizer_func=optimizer_func) + + def event_handler(event): + if isinstance(event, fluid.EndEpochEvent): + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) + avg_cost, acc = trainer.test( + reader=test_reader, feed_order=['words', 'label']) + + print("avg_cost: %s" % avg_cost) + print("acc : %s" % acc) + + if acc > 0.2: # Smaller value to increase CI speed + trainer.save_params(params_dirname) + trainer.stop() + + else: + print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + event.epoch + 1, avg_cost, acc)) + if math.isnan(avg_cost): + sys.exit("got NaN loss, training failed.") + elif isinstance(event, fluid.EndStepEvent): + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + if event.step == 1: # Run 2 iterations to speed CI + trainer.save_params(params_dirname) + trainer.stop() + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) + + trainer.train( + num_epochs=1, + event_handler=event_handler, + reader=train_reader, + feed_order=['words', 'label']) + + +def infer(use_cuda, inference_program, params_dirname=None): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + word_dict = paddle.dataset.imdb.word_dict() + + inferencer = fluid.Inferencer( + infer_func=partial(inference_program, word_dict), + param_path=params_dirname, + place=place) + + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + lod = [[3, 4, 2]] + base_shape = [1] + # The range of random integers is [low, high] + tensor_words = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=len(word_dict) - 1) + results = inferencer.infer({'words': tensor_words}) + print("infer results: ", results) + + +def main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + params_dirname = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) + + +if __name__ == '__main__': + for use_cuda in (False, True): + main(use_cuda=use_cuda) diff --git a/06.understand_sentiment/train_stacked_lstm.py b/06.understand_sentiment/train_stacked_lstm.py new file mode 100644 index 00000000..923e7107 --- /dev/null +++ b/06.understand_sentiment/train_stacked_lstm.py @@ -0,0 +1,162 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from functools import partial +import numpy as np + +CLASS_DIM = 2 +EMB_DIM = 128 +HID_DIM = 512 +STACKED_NUM = 3 +BATCH_SIZE = 128 + + +def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num): + assert stacked_num % 2 == 1 + + emb = fluid.layers.embedding( + input=data, size=[input_dim, emb_dim], is_sparse=True) + + fc1 = fluid.layers.fc(input=emb, size=hid_dim) + lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim) + + inputs = [fc1, lstm1] + + for i in range(2, stacked_num + 1): + fc = fluid.layers.fc(input=inputs, size=hid_dim) + lstm, cell = fluid.layers.dynamic_lstm( + input=fc, size=hid_dim, is_reverse=(i % 2) == 0) + inputs = [fc, lstm] + + fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max') + lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max') + + prediction = fluid.layers.fc( + input=[fc_last, lstm_last], size=class_dim, act='softmax') + return prediction + + +def inference_program(word_dict): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + + dict_dim = len(word_dict) + net = stacked_lstm_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM, + STACKED_NUM) + return net + + +def train_program(word_dict): + prediction = inference_program(word_dict) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(cost) + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return [avg_cost, accuracy] + + +def optimizer_func(): + return fluid.optimizer.Adagrad(learning_rate=0.002) + + +def train(use_cuda, train_program, params_dirname): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + + word_dict = paddle.dataset.imdb.word_dict() + trainer = fluid.Trainer( + train_func=partial(train_program, word_dict), + place=place, + optimizer_func=optimizer_func) + + def event_handler(event): + if isinstance(event, fluid.EndEpochEvent): + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) + avg_cost, acc = trainer.test( + reader=test_reader, feed_order=['words', 'label']) + + print("avg_cost: %s" % avg_cost) + print("acc : %s" % acc) + + if acc > 0.2: # Smaller value to increase CI speed + trainer.save_params(params_dirname) + trainer.stop() + + else: + print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + event.epoch + 1, avg_cost, acc)) + if math.isnan(avg_cost): + sys.exit("got NaN loss, training failed.") + elif isinstance(event, fluid.EndStepEvent): + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + if event.step == 1: # Run 2 iterations to speed CI + trainer.save_params(params_dirname) + trainer.stop() + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) + + trainer.train( + num_epochs=1, + event_handler=event_handler, + reader=train_reader, + feed_order=['words', 'label']) + + +def infer(use_cuda, inference_program, params_dirname=None): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + word_dict = paddle.dataset.imdb.word_dict() + + inferencer = fluid.Inferencer( + infer_func=partial(inference_program, word_dict), + param_path=params_dirname, + place=place) + + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + lod = [[3, 4, 2]] + base_shape = [1] + # The range of random integers is [low, high] + tensor_words = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=len(word_dict) - 1) + results = inferencer.infer({'words': tensor_words}) + print("infer results: ", results) + + +def main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + params_dirname = "understand_sentiment_stacked_lstm.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) + + +if __name__ == '__main__': + for use_cuda in (False, True): + main(use_cuda=use_cuda) From 6c82719911b1a5106dcbf5caafff4c30f8fed9be Mon Sep 17 00:00:00 2001 From: Nicky Date: Wed, 13 Jun 2018 14:52:54 -0700 Subject: [PATCH 2/4] Update README and confirm everything runs with jupyter for sentimental analysis --- 06.understand_sentiment/README.md | 194 +++++++++++------- 06.understand_sentiment/index.html | 194 +++++++++++------- 06.understand_sentiment/train.py | 161 --------------- 06.understand_sentiment/train_conv.py | 53 ++--- 06.understand_sentiment/train_dyn_rnn.py | 5 +- 06.understand_sentiment/train_stacked_lstm.py | 5 +- 6 files changed, 269 insertions(+), 343 deletions(-) delete mode 100644 06.understand_sentiment/train.py diff --git a/06.understand_sentiment/README.md b/06.understand_sentiment/README.md index c43b811c..6d3a6657 100644 --- a/06.understand_sentiment/README.md +++ b/06.understand_sentiment/README.md @@ -98,15 +98,21 @@ We use [IMDB](http://ai.stanford.edu/%7Eamaas/data/sentiment/) dataset for senti After issuing a command `python train.py`, training will start immediately. The details will be unpacked by the following sessions to see how it works. -## Model Structure +## Model Configuration -### Initialize PaddlePaddle - -We must import and initialize Paddle. +Our program starts with importing necessary packages and initializing some global variables: ```python import paddle import paddle.fluid as fluid +from functools import partial +import numpy as np + +CLASS_DIM = 2 +EMB_DIM = 128 +HID_DIM = 512 +BATCH_SIZE = 128 +USE_GPU = False ``` As alluded to in section [Model Overview](#model-overview), here we provide the implementations of both Text CNN and Stacked-bidirectional LSTM models. @@ -115,10 +121,10 @@ As alluded to in section [Model Overview](#model-overview), here we provide the We create a neural network `convolution_net` as the following snippet code. -Note: `paddle.networks.sequence_conv_pool` includes both convolution and pooling layer operations. +Note: `fluid.nets.sequence_conv_pool` includes both convolution and pooling layer operations. ```python -def convolution_net(data, input_dim, class_dim=2, emb_dim=128, hid_dim=128): +def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim): emb = fluid.layers.embedding( input=data, size=[input_dim, emb_dim], is_sparse=True) conv_3 = fluid.nets.sequence_conv_pool( @@ -133,32 +139,22 @@ def convolution_net(data, input_dim, class_dim=2, emb_dim=128, hid_dim=128): filter_size=4, act="tanh", pool_type="sqrt") - prediction = fluid.layers.fc(input=[conv_3, conv_4], - size=class_dim, - act="softmax") + prediction = fluid.layers.fc( + input=[conv_3, conv_4], size=class_dim, act="softmax") return prediction ``` +Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories. -1. Define input data and its dimension - - Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories. In `convolution_net`, the input to the network is defined in `paddle.layer.data`. - -1. Define Classifier - - The above Text CNN network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category. - -1. Define Loss Function +The above Text CNN network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category. - In the context of supervised learning, labels of the training set are defined in `paddle.layer.data`, too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier. -#### Stacked bidirectional LSTM +### Stacked bidirectional LSTM We create a neural network `stacked_lstm_net` as below. ```python def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num): - assert stacked_num % 2 == 1 emb = fluid.layers.embedding( input=data, size=[input_dim, emb_dim], is_sparse=True) @@ -183,104 +179,150 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num): return prediction ``` +The above stacked bidirectional LSTM network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category. -1. Define input data and its dimension +To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. In below steps, we will go with `convolution_net`. - Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories. In `stacked_lstm_net`, the input to the network is defined in `paddle.layer.data`. +Next we define a `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`. -1. Define Classifier - - The above stacked bidirectional LSTM network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category. +```python +def inference_program(word_dict): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) -1. Define Loss Function + dict_dim = len(word_dict) + net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM) + return net +``` - In the context of supervised learning, labels of the training set are defined in `paddle.layer.data`, too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier. +Then we define a `training_program` that uses the result from `inference_program` to compute the cost with label data. +Also define `optimizer_func` to specify the optimizer. -To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. +In the context of supervised learning, labels of the training set are defined in `paddle.layer.data` too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier. ```python -TODO +def train_program(word_dict): + prediction = inference_program(word_dict) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(cost) + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return [avg_cost, accuracy] + + +def optimizer_func(): + return fluid.optimizer.Adagrad(learning_rate=0.002) ``` ## Model Training -### Define Parameters +### Specify training environment -First, we create the model parameters according to the previous model configuration `cost`. +Specify your training environment, you should specify if the training is on CPU or GPU. ```python -# create parameters -parameters = paddle.parameters.create(cost) +use_cuda = False +place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() ``` -### Create Trainer +### Datafeeder Configuration -Before jumping into creating a training module, algorithm setting is also necessary. -Here we specified `Adam` optimization algorithm via `paddle.optimizer`. +Next we define data feeders for test and train. The feeder reads a `buf_size` of data each time and feed them to the training/testing process. +`paddle.dataset.imdb.train` will yield records during each pass, after shuffling, a batch input of `BATCH_SIZE` is generated for training. -```python -# create optimizer -adam_optimizer = paddle.optimizer.Adam( - learning_rate=2e-3, - regularization=paddle.optimizer.L2Regularization(rate=8e-4), - model_average=paddle.optimizer.ModelAverage(average_window=0.5)) - -# create trainer -trainer = paddle.trainer.SGD(cost=cost, - parameters=parameters, - update_equation=adam_optimizer) -``` +Notice for loading and reading IMDB data, it could take up to 1 minute. Please be patient. -### Training +```python -`paddle.dataset.imdb.train()` will yield records during each pass, after shuffling, a batch input is generated for training. +print("Loading IMDB word dict....") +word_dict = paddle.dataset.imdb.word_dict() -```python +print ("Reading training data....") train_reader = paddle.batch( paddle.reader.shuffle( - lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000), - batch_size=100) + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) +``` + + +### Create Trainer -test_reader = paddle.batch( - lambda: paddle.dataset.imdb.test(word_dict), batch_size=100) +Create a trainer that takes `train_program` as input and specify optimizer function. + +```python +trainer = fluid.Trainer( + train_func=partial(train_program, word_dict), + place=place, + optimizer_func=optimizer_func) ``` -`feeding` is devoted to specifying the correspondence between each yield record and `paddle.layer.data`. For instance, the first column of data generated by `paddle.dataset.imdb.train()` corresponds to `word` feature. +### Feeding Data + +`feed_order` is devoted to specifying the correspondence between each yield record and `paddle.layer.data`. For instance, the first column of data generated by `imdb.train` corresponds to `words`. ```python -feeding = {'word': 0, 'label': 1} +feed_order = ['words', 'label'] ``` -Callback function `event_handler` will be invoked to track training progress when a pre-defined event happens. +### Event Handler + +Callback function `event_handler` will be called during training when a pre-defined event happens. +For example, we can check the cost by `trainer.test` when `EndStepEvent` occurs ```python +# Specify the directory path to save the parameters +params_dirname = "understand_sentiment_conv.inference.model" + def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "\nPass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) - else: - sys.stdout.write('.') - sys.stdout.flush() - if isinstance(event, paddle.event.EndPass): - with open('./params_pass_%d.tar' % event.pass_id, 'w') as f: - trainer.save_parameter_to_tar(f) - - result = trainer.test(reader=test_reader, feeding=feeding) - print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) + if isinstance(event, fluid.EndStepEvent): + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + + if event.step == 10: + trainer.save_params(params_dirname) + trainer.stop() ``` -Finally, we can invoke `trainer.train` to start training: +### Training + +Finally, we invoke `trainer.train` to start training with `num_epochs` and other parameters. ```python trainer.train( - reader=train_reader, + num_epochs=1, event_handler=event_handler, - feeding=feeding, - num_passes=10) + reader=train_reader, + feed_order=feed_order) ``` +## Inference + +### Create Inferencer + +Initialize Inferencer with `inference_program` and `params_dirname` which is where we save params from training. + +```python +inferencer = fluid.Inferencer( + infer_func=partial(inference_program, word_dict), + param_path=params_dirname, + place=place) +``` + +### Infer + +Now we can infer with inputs that we provide in `feed_order` during training. + +```python +lod = [[3, 4, 2]] +base_shape = [1] +# The range of random integers is [low, high] +tensor_words = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=len(word_dict) - 1) +results = inferencer.infer({'words': tensor_words}) +print("infer results: ", results) + +``` ## Conclusion diff --git a/06.understand_sentiment/index.html b/06.understand_sentiment/index.html index 1304bba1..b9f19828 100644 --- a/06.understand_sentiment/index.html +++ b/06.understand_sentiment/index.html @@ -140,15 +140,21 @@ After issuing a command `python train.py`, training will start immediately. The details will be unpacked by the following sessions to see how it works. -## Model Structure +## Model Configuration -### Initialize PaddlePaddle - -We must import and initialize Paddle. +Our program starts with importing necessary packages and initializing some global variables: ```python import paddle import paddle.fluid as fluid +from functools import partial +import numpy as np + +CLASS_DIM = 2 +EMB_DIM = 128 +HID_DIM = 512 +BATCH_SIZE = 128 +USE_GPU = False ``` As alluded to in section [Model Overview](#model-overview), here we provide the implementations of both Text CNN and Stacked-bidirectional LSTM models. @@ -157,10 +163,10 @@ We create a neural network `convolution_net` as the following snippet code. -Note: `paddle.networks.sequence_conv_pool` includes both convolution and pooling layer operations. +Note: `fluid.nets.sequence_conv_pool` includes both convolution and pooling layer operations. ```python -def convolution_net(data, input_dim, class_dim=2, emb_dim=128, hid_dim=128): +def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim): emb = fluid.layers.embedding( input=data, size=[input_dim, emb_dim], is_sparse=True) conv_3 = fluid.nets.sequence_conv_pool( @@ -175,32 +181,22 @@ filter_size=4, act="tanh", pool_type="sqrt") - prediction = fluid.layers.fc(input=[conv_3, conv_4], - size=class_dim, - act="softmax") + prediction = fluid.layers.fc( + input=[conv_3, conv_4], size=class_dim, act="softmax") return prediction ``` +Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories. -1. Define input data and its dimension - - Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories. In `convolution_net`, the input to the network is defined in `paddle.layer.data`. - -1. Define Classifier - - The above Text CNN network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category. - -1. Define Loss Function +The above Text CNN network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category. - In the context of supervised learning, labels of the training set are defined in `paddle.layer.data`, too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier. -#### Stacked bidirectional LSTM +### Stacked bidirectional LSTM We create a neural network `stacked_lstm_net` as below. ```python def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num): - assert stacked_num % 2 == 1 emb = fluid.layers.embedding( input=data, size=[input_dim, emb_dim], is_sparse=True) @@ -225,104 +221,150 @@ return prediction ``` +The above stacked bidirectional LSTM network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category. -1. Define input data and its dimension +To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. In below steps, we will go with `convolution_net`. - Parameter `input_dim` denotes the dictionary size, and `class_dim` is the number of categories. In `stacked_lstm_net`, the input to the network is defined in `paddle.layer.data`. +Next we define a `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`. -1. Define Classifier - - The above stacked bidirectional LSTM network extracts high-level features and maps them to a vector of the same size as the categories. `paddle.activation.Softmax` function or classifier is then used for calculating the probability of the sentence belonging to each category. +```python +def inference_program(word_dict): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) -1. Define Loss Function + dict_dim = len(word_dict) + net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM) + return net +``` - In the context of supervised learning, labels of the training set are defined in `paddle.layer.data`, too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier. +Then we define a `training_program` that uses the result from `inference_program` to compute the cost with label data. +Also define `optimizer_func` to specify the optimizer. -To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. +In the context of supervised learning, labels of the training set are defined in `paddle.layer.data` too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier. ```python -TODO +def train_program(word_dict): + prediction = inference_program(word_dict) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(cost) + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return [avg_cost, accuracy] + + +def optimizer_func(): + return fluid.optimizer.Adagrad(learning_rate=0.002) ``` ## Model Training -### Define Parameters +### Specify training environment -First, we create the model parameters according to the previous model configuration `cost`. +Specify your training environment, you should specify if the training is on CPU or GPU. ```python -# create parameters -parameters = paddle.parameters.create(cost) +use_cuda = False +place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() ``` -### Create Trainer +### Datafeeder Configuration -Before jumping into creating a training module, algorithm setting is also necessary. -Here we specified `Adam` optimization algorithm via `paddle.optimizer`. +Next we define data feeders for test and train. The feeder reads a `buf_size` of data each time and feed them to the training/testing process. +`paddle.dataset.imdb.train` will yield records during each pass, after shuffling, a batch input of `BATCH_SIZE` is generated for training. -```python -# create optimizer -adam_optimizer = paddle.optimizer.Adam( - learning_rate=2e-3, - regularization=paddle.optimizer.L2Regularization(rate=8e-4), - model_average=paddle.optimizer.ModelAverage(average_window=0.5)) - -# create trainer -trainer = paddle.trainer.SGD(cost=cost, - parameters=parameters, - update_equation=adam_optimizer) -``` +Notice for loading and reading IMDB data, it could take up to 1 minute. Please be patient. -### Training +```python -`paddle.dataset.imdb.train()` will yield records during each pass, after shuffling, a batch input is generated for training. +print("Loading IMDB word dict....") +word_dict = paddle.dataset.imdb.word_dict() -```python +print ("Reading training data....") train_reader = paddle.batch( paddle.reader.shuffle( - lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000), - batch_size=100) + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) +``` + + +### Create Trainer -test_reader = paddle.batch( - lambda: paddle.dataset.imdb.test(word_dict), batch_size=100) +Create a trainer that takes `train_program` as input and specify optimizer function. + +```python +trainer = fluid.Trainer( + train_func=partial(train_program, word_dict), + place=place, + optimizer_func=optimizer_func) ``` -`feeding` is devoted to specifying the correspondence between each yield record and `paddle.layer.data`. For instance, the first column of data generated by `paddle.dataset.imdb.train()` corresponds to `word` feature. +### Feeding Data + +`feed_order` is devoted to specifying the correspondence between each yield record and `paddle.layer.data`. For instance, the first column of data generated by `imdb.train` corresponds to `words`. ```python -feeding = {'word': 0, 'label': 1} +feed_order = ['words', 'label'] ``` -Callback function `event_handler` will be invoked to track training progress when a pre-defined event happens. +### Event Handler + +Callback function `event_handler` will be called during training when a pre-defined event happens. +For example, we can check the cost by `trainer.test` when `EndStepEvent` occurs ```python +# Specify the directory path to save the parameters +params_dirname = "understand_sentiment_conv.inference.model" + def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "\nPass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) - else: - sys.stdout.write('.') - sys.stdout.flush() - if isinstance(event, paddle.event.EndPass): - with open('./params_pass_%d.tar' % event.pass_id, 'w') as f: - trainer.save_parameter_to_tar(f) - - result = trainer.test(reader=test_reader, feeding=feeding) - print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) + if isinstance(event, fluid.EndStepEvent): + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + + if event.step == 10: + trainer.save_params(params_dirname) + trainer.stop() ``` -Finally, we can invoke `trainer.train` to start training: +### Training + +Finally, we invoke `trainer.train` to start training with `num_epochs` and other parameters. ```python trainer.train( - reader=train_reader, + num_epochs=1, event_handler=event_handler, - feeding=feeding, - num_passes=10) + reader=train_reader, + feed_order=feed_order) ``` +## Inference + +### Create Inferencer + +Initialize Inferencer with `inference_program` and `params_dirname` which is where we save params from training. + +```python +inferencer = fluid.Inferencer( + infer_func=partial(inference_program, word_dict), + param_path=params_dirname, + place=place) +``` + +### Infer + +Now we can infer with inputs that we provide in `feed_order` during training. + +```python +lod = [[3, 4, 2]] +base_shape = [1] +# The range of random integers is [low, high] +tensor_words = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=len(word_dict) - 1) +results = inferencer.infer({'words': tensor_words}) +print("infer results: ", results) + +``` ## Conclusion diff --git a/06.understand_sentiment/train.py b/06.understand_sentiment/train.py deleted file mode 100644 index 58f61700..00000000 --- a/06.understand_sentiment/train.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys, os -import paddle.v2 as paddle - -with_gpu = os.getenv('WITH_GPU', '0') != '0' - - -def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128): - data = paddle.layer.data("word", - paddle.data_type.integer_value_sequence(input_dim)) - emb = paddle.layer.embedding(input=data, size=emb_dim) - conv_3 = paddle.networks.sequence_conv_pool( - input=emb, context_len=3, hidden_size=hid_dim) - conv_4 = paddle.networks.sequence_conv_pool( - input=emb, context_len=4, hidden_size=hid_dim) - output = paddle.layer.fc( - input=[conv_3, conv_4], size=class_dim, act=paddle.activation.Softmax()) - lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) - cost = paddle.layer.classification_cost(input=output, label=lbl) - return cost, output - - -def stacked_lstm_net(input_dim, - class_dim=2, - emb_dim=128, - hid_dim=512, - stacked_num=3): - """ - A Wrapper for sentiment classification task. - This network uses bi-directional recurrent network, - consisting three LSTM layers. This configure is referred to - the paper as following url, but use fewer layrs. - http://www.aclweb.org/anthology/P15-1109 - - input_dim: here is word dictionary dimension. - class_dim: number of categories. - emb_dim: dimension of word embedding. - hid_dim: dimension of hidden layer. - stacked_num: number of stacked lstm-hidden layer. - """ - assert stacked_num % 2 == 1 - - fc_para_attr = paddle.attr.Param(learning_rate=1e-3) - lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.) - para_attr = [fc_para_attr, lstm_para_attr] - bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.) - relu = paddle.activation.Relu() - linear = paddle.activation.Linear() - - data = paddle.layer.data("word", - paddle.data_type.integer_value_sequence(input_dim)) - emb = paddle.layer.embedding(input=data, size=emb_dim) - - fc1 = paddle.layer.fc( - input=emb, size=hid_dim, act=linear, bias_attr=bias_attr) - lstm1 = paddle.layer.lstmemory(input=fc1, act=relu, bias_attr=bias_attr) - - inputs = [fc1, lstm1] - for i in range(2, stacked_num + 1): - fc = paddle.layer.fc( - input=inputs, - size=hid_dim, - act=linear, - param_attr=para_attr, - bias_attr=bias_attr) - lstm = paddle.layer.lstmemory( - input=fc, reverse=(i % 2) == 0, act=relu, bias_attr=bias_attr) - inputs = [fc, lstm] - - fc_last = paddle.layer.pooling( - input=inputs[0], pooling_type=paddle.pooling.Max()) - lstm_last = paddle.layer.pooling( - input=inputs[1], pooling_type=paddle.pooling.Max()) - output = paddle.layer.fc( - input=[fc_last, lstm_last], - size=class_dim, - act=paddle.activation.Softmax(), - bias_attr=bias_attr, - param_attr=para_attr) - - lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) - cost = paddle.layer.classification_cost(input=output, label=lbl) - return cost, output - - -if __name__ == '__main__': - # init - paddle.init(use_gpu=with_gpu) - - #data - print 'load dictionary...' - word_dict = paddle.dataset.imdb.word_dict() - dict_dim = len(word_dict) - class_dim = 2 - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=1000), - batch_size=100) - test_reader = paddle.batch( - paddle.dataset.imdb.test(word_dict), batch_size=100) - - feeding = {'word': 0, 'label': 1} - - # network config - # Please choose the way to build the network - # by uncommenting the corresponding line. - [cost, output] = convolution_net(dict_dim, class_dim=class_dim) - # [cost, output] = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3) - - # create parameters - parameters = paddle.parameters.create(cost) - - # create optimizer - adam_optimizer = paddle.optimizer.Adam( - learning_rate=2e-3, - regularization=paddle.optimizer.L2Regularization(rate=8e-4), - model_average=paddle.optimizer.ModelAverage(average_window=0.5)) - - # create trainer - trainer = paddle.trainer.SGD( - cost=cost, parameters=parameters, update_equation=adam_optimizer) - - # End batch and end pass event handler - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "\nPass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) - else: - sys.stdout.write('.') - sys.stdout.flush() - if isinstance(event, paddle.event.EndPass): - with open('./params_pass_%d.tar' % event.pass_id, 'w') as f: - trainer.save_parameter_to_tar(f) - - result = trainer.test(reader=test_reader, feeding=feeding) - print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) - - # Save the inference topology to protobuf. - inference_topology = paddle.topology.Topology(layers=output) - with open("./inference_topology.pkl", 'wb') as f: - inference_topology.serialize_for_inference(f) - - trainer.train( - reader=train_reader, - event_handler=event_handler, - feeding=feeding, - num_passes=20) diff --git a/06.understand_sentiment/train_conv.py b/06.understand_sentiment/train_conv.py index b7bf4a85..860a8011 100644 --- a/06.understand_sentiment/train_conv.py +++ b/06.understand_sentiment/train_conv.py @@ -14,6 +14,7 @@ from __future__ import print_function +import os import paddle import paddle.fluid as fluid from functools import partial @@ -68,50 +69,50 @@ def optimizer_func(): def train(use_cuda, train_program, params_dirname): - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + import time + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + print("Loading IMDB word dict....") word_dict = paddle.dataset.imdb.word_dict() + + print("Reading training data....") + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) + + print("Reading testing data....") + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) + trainer = fluid.Trainer( train_func=partial(train_program, word_dict), place=place, optimizer_func=optimizer_func) + feed_order = ['words', 'label'] + def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): - test_reader = paddle.batch( - paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) + if isinstance(event, fluid.EndStepEvent): avg_cost, acc = trainer.test( - reader=test_reader, feed_order=['words', 'label']) + reader=test_reader, feed_order=feed_order) - print("avg_cost: %s" % avg_cost) - print("acc : %s" % acc) + print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + event.step, avg_cost, acc)) - if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(params_dirname) - trainer.stop() - - else: - print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( - event.epoch + 1, avg_cost, acc)) - if math.isnan(avg_cost): - sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) - if event.step == 1: # Run 2 iterations to speed CI + + if event.step == 10: # Adjust this number for accuracy trainer.save_params(params_dirname) trainer.stop() - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=25000), - batch_size=BATCH_SIZE) - trainer.train( num_epochs=1, event_handler=event_handler, reader=train_reader, - feed_order=['words', 'label']) + feed_order=feed_order) def infer(use_cuda, inference_program, params_dirname=None): @@ -151,5 +152,5 @@ def main(use_cuda): if __name__ == '__main__': - for use_cuda in (False, True): - main(use_cuda=use_cuda) + use_cuda = os.getenv('WITH_GPU', '0') != '0' + main(use_cuda) diff --git a/06.understand_sentiment/train_dyn_rnn.py b/06.understand_sentiment/train_dyn_rnn.py index aa7c567b..e139d670 100644 --- a/06.understand_sentiment/train_dyn_rnn.py +++ b/06.understand_sentiment/train_dyn_rnn.py @@ -14,6 +14,7 @@ from __future__ import print_function +import os import paddle import paddle.fluid as fluid from functools import partial @@ -167,5 +168,5 @@ def main(use_cuda): if __name__ == '__main__': - for use_cuda in (False, True): - main(use_cuda=use_cuda) + use_cuda = os.getenv('WITH_GPU', '0') != '0' + main(use_cuda) diff --git a/06.understand_sentiment/train_stacked_lstm.py b/06.understand_sentiment/train_stacked_lstm.py index 923e7107..0a3b2015 100644 --- a/06.understand_sentiment/train_stacked_lstm.py +++ b/06.understand_sentiment/train_stacked_lstm.py @@ -14,6 +14,7 @@ from __future__ import print_function +import os import paddle import paddle.fluid as fluid from functools import partial @@ -158,5 +159,5 @@ def main(use_cuda): if __name__ == '__main__': - for use_cuda in (False, True): - main(use_cuda=use_cuda) + use_cuda = os.getenv('WITH_GPU', '0') != '0' + main(use_cuda) From cec2c1da543d93754a575c6b28f718c2d0c50a02 Mon Sep 17 00:00:00 2001 From: Nicky Date: Wed, 13 Jun 2018 15:07:25 -0700 Subject: [PATCH 3/4] clean up --- 06.understand_sentiment/train_dyn_rnn.py | 1 + 06.understand_sentiment/train_stacked_lstm.py | 1 + 2 files changed, 2 insertions(+) diff --git a/06.understand_sentiment/train_dyn_rnn.py b/06.understand_sentiment/train_dyn_rnn.py index e139d670..f548a27f 100644 --- a/06.understand_sentiment/train_dyn_rnn.py +++ b/06.understand_sentiment/train_dyn_rnn.py @@ -24,6 +24,7 @@ EMB_DIM = 128 BATCH_SIZE = 128 LSTM_SIZE = 128 +USE_GPU = False def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size): diff --git a/06.understand_sentiment/train_stacked_lstm.py b/06.understand_sentiment/train_stacked_lstm.py index 0a3b2015..104215d7 100644 --- a/06.understand_sentiment/train_stacked_lstm.py +++ b/06.understand_sentiment/train_stacked_lstm.py @@ -25,6 +25,7 @@ HID_DIM = 512 STACKED_NUM = 3 BATCH_SIZE = 128 +USE_GPU = False def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num): From 89d558cc5166e7413e1736cf84d8865b5dc7af1c Mon Sep 17 00:00:00 2001 From: Nicky Date: Thu, 14 Jun 2018 16:32:58 -0700 Subject: [PATCH 4/4] Add inference example instead of random data and update per team's comments --- 06.understand_sentiment/README.md | 37 +++++++-- 06.understand_sentiment/index.html | 37 +++++++-- 06.understand_sentiment/train_conv.py | 46 +++++++---- 06.understand_sentiment/train_dyn_rnn.py | 82 +++++++++++-------- 06.understand_sentiment/train_stacked_lstm.py | 82 +++++++++++-------- 5 files changed, 178 insertions(+), 106 deletions(-) diff --git a/06.understand_sentiment/README.md b/06.understand_sentiment/README.md index 6d3a6657..f427f134 100644 --- a/06.understand_sentiment/README.md +++ b/06.understand_sentiment/README.md @@ -183,7 +183,7 @@ The above stacked bidirectional LSTM network extracts high-level features and ma To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. In below steps, we will go with `convolution_net`. -Next we define a `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`. +Next we define an `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`. ```python def inference_program(word_dict): @@ -200,6 +200,7 @@ Also define `optimizer_func` to specify the optimizer. In the context of supervised learning, labels of the training set are defined in `paddle.layer.data` too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier. +First result that returns from the list must be cost. ```python def train_program(word_dict): @@ -309,18 +310,38 @@ inferencer = fluid.Inferencer( place=place) ``` +### Create Lod Tensor with test data + +To do inference, we pick 3 potential reviews out of our mind as testing data. Feel free to modify any of them. +We map each word in the reviews to id from `word_dict`, replaced by 'unknown' if the word is not in `word_dict`. +Then we create lod data with the id list and use `create_lod_tensor` to create lod tensor. + +```python +reviews_str = [ + 'read the book forget the movie', 'this is a great movie', 'this is very bad' +] +reviews = [c.split() for c in reviews_str] + +UNK = word_dict[''] +lod = [] +for c in reviews: + lod.append([word_dict.get(words, UNK) for words in c]) + +base_shape = [[len(c) for c in lod]] + +tensor_words = fluid.create_lod_tensor(lod, base_shape, place) +``` + ### Infer -Now we can infer with inputs that we provide in `feed_order` during training. +Now we can infer and predict probability of positive or negative from each review above. ```python -lod = [[3, 4, 2]] -base_shape = [1] -# The range of random integers is [low, high] -tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=len(word_dict) - 1) results = inferencer.infer({'words': tensor_words}) -print("infer results: ", results) + +for i, r in enumerate(results[0]): + print("Predict probability of ", r[0], " to be positive and ", r[1], " to be negative for review \'", reviews_str[i], "\'") + ``` diff --git a/06.understand_sentiment/index.html b/06.understand_sentiment/index.html index b9f19828..54f0a5b2 100644 --- a/06.understand_sentiment/index.html +++ b/06.understand_sentiment/index.html @@ -225,7 +225,7 @@ To reiterate, we can either invoke `convolution_net` or `stacked_lstm_net`. In below steps, we will go with `convolution_net`. -Next we define a `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`. +Next we define an `inference_program` that simply uses `convolution_net` to predict output with the input from `fluid.layer.data`. ```python def inference_program(word_dict): @@ -242,6 +242,7 @@ In the context of supervised learning, labels of the training set are defined in `paddle.layer.data` too. During training, cross-entropy is used as loss function in `paddle.layer.classification_cost` and as the output of the network; During testing, the outputs are the probabilities calculated in the classifier. +First result that returns from the list must be cost. ```python def train_program(word_dict): @@ -351,18 +352,38 @@ place=place) ``` +### Create Lod Tensor with test data + +To do inference, we pick 3 potential reviews out of our mind as testing data. Feel free to modify any of them. +We map each word in the reviews to id from `word_dict`, replaced by 'unknown' if the word is not in `word_dict`. +Then we create lod data with the id list and use `create_lod_tensor` to create lod tensor. + +```python +reviews_str = [ + 'read the book forget the movie', 'this is a great movie', 'this is very bad' +] +reviews = [c.split() for c in reviews_str] + +UNK = word_dict[''] +lod = [] +for c in reviews: + lod.append([word_dict.get(words, UNK) for words in c]) + +base_shape = [[len(c) for c in lod]] + +tensor_words = fluid.create_lod_tensor(lod, base_shape, place) +``` + ### Infer -Now we can infer with inputs that we provide in `feed_order` during training. +Now we can infer and predict probability of positive or negative from each review above. ```python -lod = [[3, 4, 2]] -base_shape = [1] -# The range of random integers is [low, high] -tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=len(word_dict) - 1) results = inferencer.infer({'words': tensor_words}) -print("infer results: ", results) + +for i, r in enumerate(results[0]): + print("Predict probability of ", r[0], " to be positive and ", r[1], " to be negative for review \'", reviews_str[i], "\'") + ``` diff --git a/06.understand_sentiment/train_conv.py b/06.understand_sentiment/train_conv.py index 860a8011..61fe18e4 100644 --- a/06.understand_sentiment/train_conv.py +++ b/06.understand_sentiment/train_conv.py @@ -69,14 +69,11 @@ def optimizer_func(): def train(use_cuda, train_program, params_dirname): - import time - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() print("Loading IMDB word dict....") word_dict = paddle.dataset.imdb.word_dict() print("Reading training data....") - train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.imdb.train(word_dict), buf_size=25000), @@ -95,18 +92,18 @@ def train(use_cuda, train_program, params_dirname): def event_handler(event): if isinstance(event, fluid.EndStepEvent): - avg_cost, acc = trainer.test( - reader=test_reader, feed_order=feed_order) + if event.step % 10 == 0: + avg_cost, acc = trainer.test( + reader=test_reader, feed_order=feed_order) - print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( - event.step, avg_cost, acc)) + print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + event.step, avg_cost, acc)) - print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, map(np.array, event.metrics))) + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) - if event.step == 10: # Adjust this number for accuracy - trainer.save_params(params_dirname) - trainer.stop() + elif isinstance(event, fluid.EndEpochEvent): + trainer.save_params(params_dirname) trainer.train( num_epochs=1, @@ -134,13 +131,26 @@ def infer(use_cuda, inference_program, params_dirname=None): # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] - base_shape = [1] - # The range of random integers is [low, high] - tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=len(word_dict) - 1) + + reviews_str = [ + 'read the book forget the movie', 'this is a great movie', + 'this is very bad' + ] + reviews = [c.split() for c in reviews_str] + + UNK = word_dict[''] + lod = [] + for c in reviews: + lod.append([word_dict.get(words, UNK) for words in c]) + + base_shape = [[len(c) for c in lod]] + + tensor_words = fluid.create_lod_tensor(lod, base_shape, place) results = inferencer.infer({'words': tensor_words}) - print("infer results: ", results) + + for i, r in enumerate(results[0]): + print("Predict probability of ", r[0], " to be positive and ", r[1], + " to be negative for review \'", reviews_str[i], "\'") def main(use_cuda): diff --git a/06.understand_sentiment/train_dyn_rnn.py b/06.understand_sentiment/train_dyn_rnn.py index f548a27f..ef3be26f 100644 --- a/06.understand_sentiment/train_dyn_rnn.py +++ b/06.understand_sentiment/train_dyn_rnn.py @@ -87,49 +87,46 @@ def optimizer_func(): def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - + print("Loading IMDB word dict....") word_dict = paddle.dataset.imdb.word_dict() + + print("Reading training data....") + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) + + print("Reading testing data....") + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) + trainer = fluid.Trainer( train_func=partial(train_program, word_dict), place=place, optimizer_func=optimizer_func) + feed_order = ['words', 'label'] + def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): - test_reader = paddle.batch( - paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) - avg_cost, acc = trainer.test( - reader=test_reader, feed_order=['words', 'label']) - - print("avg_cost: %s" % avg_cost) - print("acc : %s" % acc) - - if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(params_dirname) - trainer.stop() - - else: - print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( - event.epoch + 1, avg_cost, acc)) - if math.isnan(avg_cost): - sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): - print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, map(np.array, event.metrics))) - if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(params_dirname) - trainer.stop() + if isinstance(event, fluid.EndStepEvent): + if event.step % 10 == 0: + avg_cost, acc = trainer.test( + reader=test_reader, feed_order=feed_order) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=25000), - batch_size=BATCH_SIZE) + print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + event.step, avg_cost, acc)) + + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + + elif isinstance(event, fluid.EndEpochEvent): + trainer.save_params(params_dirname) trainer.train( num_epochs=1, event_handler=event_handler, reader=train_reader, - feed_order=['words', 'label']) + feed_order=feed_order) def infer(use_cuda, inference_program, params_dirname=None): @@ -151,13 +148,26 @@ def infer(use_cuda, inference_program, params_dirname=None): # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] - base_shape = [1] - # The range of random integers is [low, high] - tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=len(word_dict) - 1) + + reviews_str = [ + 'read the book forget the movie', 'this is a great movie', + 'this is very bad' + ] + reviews = [c.split() for c in reviews_str] + + UNK = word_dict[''] + lod = [] + for c in reviews: + lod.append([word_dict.get(words, UNK) for words in c]) + + base_shape = [[len(c) for c in lod]] + + tensor_words = fluid.create_lod_tensor(lod, base_shape, place) results = inferencer.infer({'words': tensor_words}) - print("infer results: ", results) + + for i, r in enumerate(results[0]): + print("Predict probability of ", r[0], " to be positive and ", r[1], + " to be negative for review \'", reviews_str[i], "\'") def main(use_cuda): diff --git a/06.understand_sentiment/train_stacked_lstm.py b/06.understand_sentiment/train_stacked_lstm.py index 104215d7..351994f8 100644 --- a/06.understand_sentiment/train_stacked_lstm.py +++ b/06.understand_sentiment/train_stacked_lstm.py @@ -78,49 +78,46 @@ def optimizer_func(): def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - + print("Loading IMDB word dict....") word_dict = paddle.dataset.imdb.word_dict() + + print("Reading training data....") + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) + + print("Reading testing data....") + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) + trainer = fluid.Trainer( train_func=partial(train_program, word_dict), place=place, optimizer_func=optimizer_func) + feed_order = ['words', 'label'] + def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): - test_reader = paddle.batch( - paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) - avg_cost, acc = trainer.test( - reader=test_reader, feed_order=['words', 'label']) - - print("avg_cost: %s" % avg_cost) - print("acc : %s" % acc) - - if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(params_dirname) - trainer.stop() - - else: - print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( - event.epoch + 1, avg_cost, acc)) - if math.isnan(avg_cost): - sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): - print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, map(np.array, event.metrics))) - if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(params_dirname) - trainer.stop() + if isinstance(event, fluid.EndStepEvent): + if event.step % 10 == 0: + avg_cost, acc = trainer.test( + reader=test_reader, feed_order=feed_order) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=25000), - batch_size=BATCH_SIZE) + print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + event.step, avg_cost, acc)) + + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + + elif isinstance(event, fluid.EndEpochEvent): + trainer.save_params(params_dirname) trainer.train( num_epochs=1, event_handler=event_handler, reader=train_reader, - feed_order=['words', 'label']) + feed_order=feed_order) def infer(use_cuda, inference_program, params_dirname=None): @@ -142,13 +139,26 @@ def infer(use_cuda, inference_program, params_dirname=None): # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] - base_shape = [1] - # The range of random integers is [low, high] - tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=len(word_dict) - 1) + + reviews_str = [ + 'read the book forget the movie', 'this is a great movie', + 'this is very bad' + ] + reviews = [c.split() for c in reviews_str] + + UNK = word_dict[''] + lod = [] + for c in reviews: + lod.append([word_dict.get(words, UNK) for words in c]) + + base_shape = [[len(c) for c in lod]] + + tensor_words = fluid.create_lod_tensor(lod, base_shape, place) results = inferencer.infer({'words': tensor_words}) - print("infer results: ", results) + + for i, r in enumerate(results[0]): + print("Predict probability of ", r[0], " to be positive and ", r[1], + " to be negative for review \'", reviews_str[i], "\'") def main(use_cuda):