Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Commit

Permalink
Merge pull request #532 from weixing02/fix
Browse files Browse the repository at this point in the history
Fix 04 & 06
  • Loading branch information
luotao1 authored Jun 5, 2018
2 parents 3cac9f3 + 97855b5 commit f4b5cc8
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 26 deletions.
2 changes: 1 addition & 1 deletion 04.word2vec/README.cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def wordemb(inlayer):
- 定义输入层接受的数据类型以及名字。

```python
paddle.init(use_gpu=False, trainer_count=3) # 初始化PaddlePaddle
paddle.init(use_gpu=False, trainer_count=1) # 初始化PaddlePaddle
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
# 每个输入层都接受整形数据,这些数据的范围是[0, dict_size)
Expand Down
2 changes: 1 addition & 1 deletion 04.word2vec/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def wordemb(inlayer):
- Define name and type for input to data layer.

```python
paddle.init(use_gpu=False, trainer_count=3)
paddle.init(use_gpu=False, trainer_count=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
# Every layer takes integer value of range [0, dict_size)
Expand Down
2 changes: 1 addition & 1 deletion 04.word2vec/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def load_dict_and_embedding():


def main():
paddle.init(use_gpu=with_gpu, trainer_count=3)
paddle.init(use_gpu=with_gpu, trainer_count=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
# Every layer takes integer value of range [0, dict_size)
Expand Down
35 changes: 14 additions & 21 deletions 06.understand_sentiment/README.cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,9 @@ def convolution_net(input_dim,
output = paddle.layer.fc(input=[conv_3, conv_4],
size=class_dim,
act=paddle.activation.Softmax())
if not is_predict:
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
else:
return output
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost, output
```
网络的输入`input_dim`表示的是词典的大小,`class_dim`表示类别数。这里,我们使用[`sequence_conv_pool`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/trainer_config_helpers/networks.py) API实现了卷积和池化操作。

Expand Down Expand Up @@ -202,12 +199,9 @@ def stacked_lstm_net(input_dim,
bias_attr=bias_attr,
param_attr=para_attr)

if not is_predict:
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
else:
return output
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost, output
```
网络的输入`stacked_num`表示的是LSTM的层数,需要是奇数,确保最高层LSTM正向。Paddle里面是通过一个fc和一个lstmemory来实现基于LSTM的循环神经网络。

Expand All @@ -233,10 +227,10 @@ if __name__ == '__main__':
```python
train_reader = paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100)
test_reader = paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict),
paddle.dataset.imdb.test(word_dict),
batch_size=100)
```
这里,`dataset.imdb.train()``dataset.imdb.test()`分别是`dataset.imdb`中的训练数据和测试数据API。`train_reader`在训练时使用,意义是将读取的训练数据进行shuffle后,组成一个batch数据。同理,`test_reader`是在测试的时候使用,将读取的测试数据组成一个batch。
Expand All @@ -249,9 +243,10 @@ if __name__ == '__main__':

```python
# Please choose the way to build the network
# by uncommenting the corresponding line.
cost = convolution_net(dict_dim, class_dim=class_dim)
# cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
# option 1
[cost, output] = convolution_net(dict_dim, class_dim=class_dim)
# option 2
# [cost, output] = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
```
该示例中默认使用`convolution_net`网络,如果使用`stacked_lstm_net`网络,注释相应的行即可。其中cost是网络的优化目标,同时cost包含了整个网络的拓扑信息。

Expand Down Expand Up @@ -350,10 +345,8 @@ Test with Pass 0, {'classification_error_evaluator': 0.11432000249624252}

# 0 stands for positive sample, 1 stands for negative sample
label = {0:'pos', 1:'neg'}
# Use the network used by trainer
out = convolution_net(dict_dim, class_dim=class_dim, is_predict=True)
# out = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3, is_predict=True)
probs = paddle.infer(output_layer=out, parameters=parameters, input=input)

probs = paddle.infer(output_layer=output, parameters=parameters, input=input)

labs = np.argsort(-probs)
for idx, lab in enumerate(labs):
Expand Down
4 changes: 2 additions & 2 deletions 06.understand_sentiment/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -282,11 +282,11 @@ trainer = paddle.trainer.SGD(cost=cost,
```python
train_reader = paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100)

test_reader = paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)
paddle.dataset.imdb.test(word_dict), batch_size=100)
```

`feeding` is devoted to specifying the correspondence between each yield record and `paddle.layer.data`. For instance, the first column of data generated by `paddle.dataset.imdb.train()` corresponds to `word` feature.
Expand Down

0 comments on commit f4b5cc8

Please sign in to comment.