Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Commit

Permalink
Chap 4 word2vec: polish chapter content (#541)
Browse files Browse the repository at this point in the history
  • Loading branch information
daming-lu authored Jun 8, 2018
1 parent 1b3bb17 commit 928d178
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 42 deletions.
37 changes: 21 additions & 16 deletions 04.word2vec/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,9 @@ def train(use_cuda, train_program, params_dirname):
if event.step % 10 == 0:
print "Step %d: Average Cost %f" % (event.step, avg_cost)

# If average cost is lower than 5.0, we consider the model good enough to stop.
# If average cost is lower than 5.8, we consider the model good enough to stop.
# Note 5.8 is a relatively high value. In order to get a better model, one should
# aim for avg_cost lower than 3.5. But the training could take longer time.
if avg_cost < 5.8:
trainer.save_params(params_dirname)
trainer.stop()
Expand Down Expand Up @@ -383,16 +385,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only
# one lod_level and there is only one sequence of one word on this level.
# Note that lod info should be a list of lists.
lod1 = [[211]] # 'among'
lod2 = [[6]] # 'a'
lod3 = [[96]] # 'group'
lod4 = [[4]] # 'of'
base_shape = [1]

first_word = fluid.create_lod_tensor(lod1, base_shape, place)
second_word = fluid.create_lod_tensor(lod2, base_shape, place)
third_word = fluid.create_lod_tensor(lod3, base_shape, place)
fourth_word = fluid.create_lod_tensor(lod4, base_shape, place)
data1 = [[211]] # 'among'
data2 = [[6]] # 'a'
data3 = [[96]] # 'group'
data4 = [[4]] # 'of'
lod = [[1]]

first_word = fluid.create_lod_tensor(data1, lod, place)
second_word = fluid.create_lod_tensor(data2, lod, place)
third_word = fluid.create_lod_tensor(data3, lod, place)
fourth_word = fluid.create_lod_tensor(data4, lod, place)

result = inferencer.infer(
{
Expand All @@ -406,16 +409,18 @@ def infer(use_cuda, inference_program, params_dirname=None):
print(numpy.array(result[0]))
most_possible_word_index = numpy.argmax(result[0])
print(most_possible_word_index)
print([key for key, value in word_dict.iteritems() if value == most_possible_word_index][0])
print([
key for key, value in word_dict.iteritems()
if value == most_possible_word_index
][0])
```

When we spent 30 mins in training, the output is like below, which means the next word for `among a group of` is `unknown`. After several hours training, it gives a meaningful prediction as `workers`.
When we spent 3 mins in training, the output is like below, which means the next word for `among a group of` is `a`. If we train the model with a longer time, it will give a meaningful prediction as `workers`.

```text
[[4.0056456e-02 5.4810006e-02 5.3107393e-05 ... 1.0061498e-04
8.9233123e-05 1.5757295e-01]]
2072
<unk>
[[0.00106646 0.0007907 0.00072041 ... 0.00049024 0.00041355 0.00084464]]
6
a
```

The main entrance of the program is fairly simple:
Expand Down
37 changes: 21 additions & 16 deletions 04.word2vec/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,9 @@
if event.step % 10 == 0:
print "Step %d: Average Cost %f" % (event.step, avg_cost)

# If average cost is lower than 5.0, we consider the model good enough to stop.
# If average cost is lower than 5.8, we consider the model good enough to stop.
# Note 5.8 is a relatively high value. In order to get a better model, one should
# aim for avg_cost lower than 3.5. But the training could take longer time.
if avg_cost < 5.8:
trainer.save_params(params_dirname)
trainer.stop()
Expand Down Expand Up @@ -425,16 +427,17 @@
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only
# one lod_level and there is only one sequence of one word on this level.
# Note that lod info should be a list of lists.
lod1 = [[211]] # 'among'
lod2 = [[6]] # 'a'
lod3 = [[96]] # 'group'
lod4 = [[4]] # 'of'
base_shape = [1]

first_word = fluid.create_lod_tensor(lod1, base_shape, place)
second_word = fluid.create_lod_tensor(lod2, base_shape, place)
third_word = fluid.create_lod_tensor(lod3, base_shape, place)
fourth_word = fluid.create_lod_tensor(lod4, base_shape, place)
data1 = [[211]] # 'among'
data2 = [[6]] # 'a'
data3 = [[96]] # 'group'
data4 = [[4]] # 'of'
lod = [[1]]

first_word = fluid.create_lod_tensor(data1, lod, place)
second_word = fluid.create_lod_tensor(data2, lod, place)
third_word = fluid.create_lod_tensor(data3, lod, place)
fourth_word = fluid.create_lod_tensor(data4, lod, place)

result = inferencer.infer(
{
Expand All @@ -448,16 +451,18 @@
print(numpy.array(result[0]))
most_possible_word_index = numpy.argmax(result[0])
print(most_possible_word_index)
print([key for key, value in word_dict.iteritems() if value == most_possible_word_index][0])
print([
key for key, value in word_dict.iteritems()
if value == most_possible_word_index
][0])
```

When we spent 30 mins in training, the output is like below, which means the next word for `among a group of` is `unknown`. After several hours training, it gives a meaningful prediction as `workers`.
When we spent 3 mins in training, the output is like below, which means the next word for `among a group of` is `board`. If we train the model with a longer time, it will give a meaningful prediction as `workers`.

```text
[[4.0056456e-02 5.4810006e-02 5.3107393e-05 ... 1.0061498e-04
8.9233123e-05 1.5757295e-01]]
2072
<unk>
[[0.00144043 0.00073983 0.00042264 ... 0.00061815 0.00038701 0.00099838]]
142
board
```

The main entrance of the program is fairly simple:
Expand Down
23 changes: 13 additions & 10 deletions 04.word2vec/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ def event_handler(event):
if event.step % 10 == 0:
print "Step %d: Average Cost %f" % (event.step, avg_cost)

# If average cost is lower than 5.8, we consider the model good enough to stop.
# Note 5.8 is a relatively high value. In order to get a better model, one should
# aim for avg_cost lower than 3.5. But the training could take longer time.
if avg_cost < 5.8:
trainer.save_params(params_dirname)
trainer.stop()
Expand Down Expand Up @@ -138,17 +141,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only
# one lod_level and there is only one sequence of one word on this level.
# Note that lod info should be a list of lists.

data1 = [[211]] # 'among'
data2 = [[6]] # 'a'
data3 = [[96]] # 'group'
data4 = [[4]] # 'of'
lod = [[1]]
base_shape = [1]
# The range of random integers is [low, high]
first_word = fluid.create_random_int_lodtensor(
lod, base_shape, place, low=0, high=dict_size - 1)
second_word = fluid.create_random_int_lodtensor(
lod, base_shape, place, low=0, high=dict_size - 1)
third_word = fluid.create_random_int_lodtensor(
lod, base_shape, place, low=0, high=dict_size - 1)
fourth_word = fluid.create_random_int_lodtensor(
lod, base_shape, place, low=0, high=dict_size - 1)

first_word = fluid.create_lod_tensor(data1, lod, place)
second_word = fluid.create_lod_tensor(data2, lod, place)
third_word = fluid.create_lod_tensor(data3, lod, place)
fourth_word = fluid.create_lod_tensor(data4, lod, place)

result = inferencer.infer(
{
Expand Down

0 comments on commit 928d178

Please sign in to comment.