From d2aba22ae291a19e1b8d433210f1b516a992918c Mon Sep 17 00:00:00 2001 From: Terrorblade <42961051+BeyonderXX@users.noreply.github.com> Date: Fri, 7 Dec 2018 19:43:51 +0800 Subject: [PATCH 1/6] Fix the bug of BidirectionalCell I did hybridize( ) and pass "valid_length" to the unroll( ) function of BidirectionalCell, then returned AssertionError in line 79. Because symbol.split( ) return a symbol but not a symbol list. Result in the length of inputs dont equal parameter "length" when call unroll( ) to compute r_outputs and r_states. --- python/mxnet/gluon/rnn/rnn_cell.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/mxnet/gluon/rnn/rnn_cell.py b/python/mxnet/gluon/rnn/rnn_cell.py index 98e96fc6da17..fd345ddbbcfa 100644 --- a/python/mxnet/gluon/rnn/rnn_cell.py +++ b/python/mxnet/gluon/rnn/rnn_cell.py @@ -1041,8 +1041,8 @@ def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=N reversed_inputs = F.SequenceReverse(F.stack(*inputs, axis=0), sequence_length=valid_length, use_sequence_length=True) - reversed_inputs = _as_list(F.split(reversed_inputs, axis=0, num_outputs=length, - squeeze_axis=True)) + reversed_inputs = list(F.split(reversed_inputs, axis=0, num_outputs=length, + squeeze_axis=True)) begin_state = _get_begin_state(self, F, begin_state, inputs, batch_size) states = begin_state @@ -1063,8 +1063,8 @@ def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=N sequence_length=valid_length, use_sequence_length=True, axis=0) - reversed_r_outputs = _as_list(F.split(reversed_r_outputs, axis=0, num_outputs=length, - squeeze_axis=True)) + reversed_r_outputs = list(F.split(reversed_r_outputs, axis=0, num_outputs=length, + squeeze_axis=True)) if merge_outputs is None: merge_outputs = isinstance(l_outputs, tensor_types) l_outputs, _, _, _ = _format_sequence(None, l_outputs, layout, merge_outputs) From 0af9aafd9c6e30a3025da842d784037b65af4781 Mon Sep 17 00:00:00 2001 From: Terrorblade Date: Mon, 10 Dec 2018 00:00:48 +0800 Subject: [PATCH 2/6] add a test for BidirectionalCell --- CONTRIBUTORS.md | 1 + tests/python/unittest/test_gluon_rnn.py | 28 +++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index d1dd9b90708a..b9f84d592a70 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -192,6 +192,7 @@ List of Contributors * [Rahul Padmanabhan](https://github.com/rahul3) * [Yuxi Hu](https://github.com/yuxihu) * [Harsh Patel](https://github.com/harshp8l) +* [Xiao Wang](https://github.com/BeyonderXX) Label Bot --------- diff --git a/tests/python/unittest/test_gluon_rnn.py b/tests/python/unittest/test_gluon_rnn.py index eee3adda2c65..840db3a4521e 100644 --- a/tests/python/unittest/test_gluon_rnn.py +++ b/tests/python/unittest/test_gluon_rnn.py @@ -243,6 +243,34 @@ def test_bidirectional(): assert outs == [(10, 200), (10, 200), (10, 200)] +def test_bidirectional_unroll_valid_length(): + # Test BidirectionalCell. + # In 1.3.1 version, after hybridize( ), BidirectionalCell would failed when pass valid_length to unroll( ). + class BiLSTM(gluon.nn.HybridBlock): + def __init__(self, rnn_size, time_step, **kwargs): + super(BiLSTM, self).__init__(**kwargs) + self.time_step = time_step + with self.name_scope(): + self.bi_lstm = gluon.rnn.BidirectionalCell( + gluon.rnn.LSTMCell(rnn_size, prefix='rnn_l0_'), + gluon.rnn.LSTMCell(rnn_size, prefix='rnn_r0_'), + output_prefix='lstm_bi_') + + def hybrid_forward(self, F, inputs, valid_len): + outputs, states = self.bi_lstm.unroll(self.time_step, inputs, valid_length=valid_len, + layout='TNC', merge_outputs='True') + return outputs, states + + rnn_size, time_step = 100, 3 + net = BiLSTM(rnn_size, time_step) + net.initialize() + net.hybridize() + inputs_data = mx.nd.random.uniform(shape=(3, 10, 50)) + valid_len = mx.nd.array(range(1, 11)) + outputs, _ = net(inputs_data, valid_len) + assert outputs.shape == (3, 10, 200) + + @assert_raises_cudnn_not_satisfied(min_version='5.1.10') def test_layer_bidirectional(): class RefBiLSTM(gluon.Block): From 1a3ff5d1d6205974199c1ac208d97e8601784204 Mon Sep 17 00:00:00 2001 From: Terrorblade <42961051+BeyonderXX@users.noreply.github.com> Date: Fri, 7 Dec 2018 19:43:51 +0800 Subject: [PATCH 3/6] Fix the bug of BidirectionalCell I did hybridize( ) and pass "valid_length" to the unroll( ) function of BidirectionalCell, then returned AssertionError in line 79. Because symbol.split( ) return a symbol but not a symbol list. Result in the length of inputs dont equal parameter "length" when call unroll( ) to compute r_outputs and r_states. --- CONTRIBUTORS.md | 1 + python/mxnet/gluon/rnn/rnn_cell.py | 8 +++---- tests/python/unittest/test_gluon_rnn.py | 28 +++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index d1dd9b90708a..b9f84d592a70 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -192,6 +192,7 @@ List of Contributors * [Rahul Padmanabhan](https://github.com/rahul3) * [Yuxi Hu](https://github.com/yuxihu) * [Harsh Patel](https://github.com/harshp8l) +* [Xiao Wang](https://github.com/BeyonderXX) Label Bot --------- diff --git a/python/mxnet/gluon/rnn/rnn_cell.py b/python/mxnet/gluon/rnn/rnn_cell.py index 98e96fc6da17..fd345ddbbcfa 100644 --- a/python/mxnet/gluon/rnn/rnn_cell.py +++ b/python/mxnet/gluon/rnn/rnn_cell.py @@ -1041,8 +1041,8 @@ def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=N reversed_inputs = F.SequenceReverse(F.stack(*inputs, axis=0), sequence_length=valid_length, use_sequence_length=True) - reversed_inputs = _as_list(F.split(reversed_inputs, axis=0, num_outputs=length, - squeeze_axis=True)) + reversed_inputs = list(F.split(reversed_inputs, axis=0, num_outputs=length, + squeeze_axis=True)) begin_state = _get_begin_state(self, F, begin_state, inputs, batch_size) states = begin_state @@ -1063,8 +1063,8 @@ def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=N sequence_length=valid_length, use_sequence_length=True, axis=0) - reversed_r_outputs = _as_list(F.split(reversed_r_outputs, axis=0, num_outputs=length, - squeeze_axis=True)) + reversed_r_outputs = list(F.split(reversed_r_outputs, axis=0, num_outputs=length, + squeeze_axis=True)) if merge_outputs is None: merge_outputs = isinstance(l_outputs, tensor_types) l_outputs, _, _, _ = _format_sequence(None, l_outputs, layout, merge_outputs) diff --git a/tests/python/unittest/test_gluon_rnn.py b/tests/python/unittest/test_gluon_rnn.py index eee3adda2c65..840db3a4521e 100644 --- a/tests/python/unittest/test_gluon_rnn.py +++ b/tests/python/unittest/test_gluon_rnn.py @@ -243,6 +243,34 @@ def test_bidirectional(): assert outs == [(10, 200), (10, 200), (10, 200)] +def test_bidirectional_unroll_valid_length(): + # Test BidirectionalCell. + # In 1.3.1 version, after hybridize( ), BidirectionalCell would failed when pass valid_length to unroll( ). + class BiLSTM(gluon.nn.HybridBlock): + def __init__(self, rnn_size, time_step, **kwargs): + super(BiLSTM, self).__init__(**kwargs) + self.time_step = time_step + with self.name_scope(): + self.bi_lstm = gluon.rnn.BidirectionalCell( + gluon.rnn.LSTMCell(rnn_size, prefix='rnn_l0_'), + gluon.rnn.LSTMCell(rnn_size, prefix='rnn_r0_'), + output_prefix='lstm_bi_') + + def hybrid_forward(self, F, inputs, valid_len): + outputs, states = self.bi_lstm.unroll(self.time_step, inputs, valid_length=valid_len, + layout='TNC', merge_outputs='True') + return outputs, states + + rnn_size, time_step = 100, 3 + net = BiLSTM(rnn_size, time_step) + net.initialize() + net.hybridize() + inputs_data = mx.nd.random.uniform(shape=(3, 10, 50)) + valid_len = mx.nd.array(range(1, 11)) + outputs, _ = net(inputs_data, valid_len) + assert outputs.shape == (3, 10, 200) + + @assert_raises_cudnn_not_satisfied(min_version='5.1.10') def test_layer_bidirectional(): class RefBiLSTM(gluon.Block): From 14157e6f364b73567922018e3f94c5665815738d Mon Sep 17 00:00:00 2001 From: Terrorblade <42961051+BeyonderXX@users.noreply.github.com> Date: Mon, 10 Dec 2018 13:19:03 +0800 Subject: [PATCH 4/6] fix test_bidirectional_unroll_valid_length( ) Fix the error of parameter. --- tests/python/unittest/test_gluon_rnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/unittest/test_gluon_rnn.py b/tests/python/unittest/test_gluon_rnn.py index 840db3a4521e..60bdc7d28202 100644 --- a/tests/python/unittest/test_gluon_rnn.py +++ b/tests/python/unittest/test_gluon_rnn.py @@ -258,7 +258,7 @@ def __init__(self, rnn_size, time_step, **kwargs): def hybrid_forward(self, F, inputs, valid_len): outputs, states = self.bi_lstm.unroll(self.time_step, inputs, valid_length=valid_len, - layout='TNC', merge_outputs='True') + layout='TNC', merge_outputs=True) return outputs, states rnn_size, time_step = 100, 3 From c2dd9d43cce90695140276e11ad790ee11234547 Mon Sep 17 00:00:00 2001 From: Terrorblade <42961051+BeyonderXX@users.noreply.github.com> Date: Fri, 7 Dec 2018 19:43:51 +0800 Subject: [PATCH 5/6] Fix the bug of BidirectionalCell I did hybridize( ) and pass "valid_length" to the unroll( ) function of BidirectionalCell, then returned AssertionError in line 79. Because symbol.split( ) return a symbol but not a symbol list. Result in the length of inputs dont equal parameter "length" when call unroll( ) to compute r_outputs and r_states. --- CONTRIBUTORS.md | 1 + python/mxnet/gluon/rnn/rnn_cell.py | 8 +++---- tests/python/unittest/test_gluon_rnn.py | 28 +++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index d1dd9b90708a..b9f84d592a70 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -192,6 +192,7 @@ List of Contributors * [Rahul Padmanabhan](https://github.com/rahul3) * [Yuxi Hu](https://github.com/yuxihu) * [Harsh Patel](https://github.com/harshp8l) +* [Xiao Wang](https://github.com/BeyonderXX) Label Bot --------- diff --git a/python/mxnet/gluon/rnn/rnn_cell.py b/python/mxnet/gluon/rnn/rnn_cell.py index 98e96fc6da17..fd345ddbbcfa 100644 --- a/python/mxnet/gluon/rnn/rnn_cell.py +++ b/python/mxnet/gluon/rnn/rnn_cell.py @@ -1041,8 +1041,8 @@ def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=N reversed_inputs = F.SequenceReverse(F.stack(*inputs, axis=0), sequence_length=valid_length, use_sequence_length=True) - reversed_inputs = _as_list(F.split(reversed_inputs, axis=0, num_outputs=length, - squeeze_axis=True)) + reversed_inputs = list(F.split(reversed_inputs, axis=0, num_outputs=length, + squeeze_axis=True)) begin_state = _get_begin_state(self, F, begin_state, inputs, batch_size) states = begin_state @@ -1063,8 +1063,8 @@ def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=N sequence_length=valid_length, use_sequence_length=True, axis=0) - reversed_r_outputs = _as_list(F.split(reversed_r_outputs, axis=0, num_outputs=length, - squeeze_axis=True)) + reversed_r_outputs = list(F.split(reversed_r_outputs, axis=0, num_outputs=length, + squeeze_axis=True)) if merge_outputs is None: merge_outputs = isinstance(l_outputs, tensor_types) l_outputs, _, _, _ = _format_sequence(None, l_outputs, layout, merge_outputs) diff --git a/tests/python/unittest/test_gluon_rnn.py b/tests/python/unittest/test_gluon_rnn.py index eee3adda2c65..60bdc7d28202 100644 --- a/tests/python/unittest/test_gluon_rnn.py +++ b/tests/python/unittest/test_gluon_rnn.py @@ -243,6 +243,34 @@ def test_bidirectional(): assert outs == [(10, 200), (10, 200), (10, 200)] +def test_bidirectional_unroll_valid_length(): + # Test BidirectionalCell. + # In 1.3.1 version, after hybridize( ), BidirectionalCell would failed when pass valid_length to unroll( ). + class BiLSTM(gluon.nn.HybridBlock): + def __init__(self, rnn_size, time_step, **kwargs): + super(BiLSTM, self).__init__(**kwargs) + self.time_step = time_step + with self.name_scope(): + self.bi_lstm = gluon.rnn.BidirectionalCell( + gluon.rnn.LSTMCell(rnn_size, prefix='rnn_l0_'), + gluon.rnn.LSTMCell(rnn_size, prefix='rnn_r0_'), + output_prefix='lstm_bi_') + + def hybrid_forward(self, F, inputs, valid_len): + outputs, states = self.bi_lstm.unroll(self.time_step, inputs, valid_length=valid_len, + layout='TNC', merge_outputs=True) + return outputs, states + + rnn_size, time_step = 100, 3 + net = BiLSTM(rnn_size, time_step) + net.initialize() + net.hybridize() + inputs_data = mx.nd.random.uniform(shape=(3, 10, 50)) + valid_len = mx.nd.array(range(1, 11)) + outputs, _ = net(inputs_data, valid_len) + assert outputs.shape == (3, 10, 200) + + @assert_raises_cudnn_not_satisfied(min_version='5.1.10') def test_layer_bidirectional(): class RefBiLSTM(gluon.Block): From 2f412e5b4345116d006ed698631c3e6acd7a393c Mon Sep 17 00:00:00 2001 From: Terrorblade <42961051+BeyonderXX@users.noreply.github.com> Date: Mon, 10 Dec 2018 20:54:24 +0800 Subject: [PATCH 6/6] fix test_bidirectional_unroll_valid_length( ) --- python/mxnet/gluon/rnn/rnn_cell.py | 37 ++++++++-------- tests/python/unittest/test_gluon_rnn.py | 56 ++++++++++++------------- 2 files changed, 48 insertions(+), 45 deletions(-) diff --git a/python/mxnet/gluon/rnn/rnn_cell.py b/python/mxnet/gluon/rnn/rnn_cell.py index fd345ddbbcfa..6ef3604eb973 100644 --- a/python/mxnet/gluon/rnn/rnn_cell.py +++ b/python/mxnet/gluon/rnn/rnn_cell.py @@ -102,6 +102,23 @@ def _mask_sequence_variable_length(F, data, length, valid_length, time_axis, mer squeeze_axis=True)) return outputs +def _reverse_sequences(sequences, unroll_step, valid_length=None): + if isinstance(sequences[0], symbol.Symbol): + F = symbol + else: + F = ndarray + + if valid_length is None: + reversed_sequences = list(reversed(sequences)) + else: + reversed_sequences = F.SequenceReverse(F.stack(*sequences, axis=0), + sequence_length=valid_length, + use_sequence_length=True) + reversed_sequences = F.split(reversed_sequences, axis=0, num_outputs=unroll_step, squeeze_axis=True) + + return reversed_sequences + + class RecurrentCell(Block): """Abstract base class for RNN cells @@ -1035,14 +1052,7 @@ def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=N self.reset() inputs, axis, F, batch_size = _format_sequence(length, inputs, layout, False) - if valid_length is None: - reversed_inputs = list(reversed(inputs)) - else: - reversed_inputs = F.SequenceReverse(F.stack(*inputs, axis=0), - sequence_length=valid_length, - use_sequence_length=True) - reversed_inputs = list(F.split(reversed_inputs, axis=0, num_outputs=length, - squeeze_axis=True)) + reversed_inputs = list(_reverse_sequences(inputs, length, valid_length)) begin_state = _get_begin_state(self, F, begin_state, inputs, batch_size) states = begin_state @@ -1056,15 +1066,8 @@ def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=N begin_state=states[len(l_cell.state_info(batch_size)):], layout=layout, merge_outputs=False, valid_length=valid_length) - if valid_length is None: - reversed_r_outputs = list(reversed(r_outputs)) - else: - reversed_r_outputs = F.SequenceReverse(F.stack(*r_outputs, axis=0), - sequence_length=valid_length, - use_sequence_length=True, - axis=0) - reversed_r_outputs = list(F.split(reversed_r_outputs, axis=0, num_outputs=length, - squeeze_axis=True)) + reversed_r_outputs = _reverse_sequences(r_outputs, length, valid_length) + if merge_outputs is None: merge_outputs = isinstance(l_outputs, tensor_types) l_outputs, _, _, _ = _format_sequence(None, l_outputs, layout, merge_outputs) diff --git a/tests/python/unittest/test_gluon_rnn.py b/tests/python/unittest/test_gluon_rnn.py index 60bdc7d28202..edc43d21b36b 100644 --- a/tests/python/unittest/test_gluon_rnn.py +++ b/tests/python/unittest/test_gluon_rnn.py @@ -243,34 +243,6 @@ def test_bidirectional(): assert outs == [(10, 200), (10, 200), (10, 200)] -def test_bidirectional_unroll_valid_length(): - # Test BidirectionalCell. - # In 1.3.1 version, after hybridize( ), BidirectionalCell would failed when pass valid_length to unroll( ). - class BiLSTM(gluon.nn.HybridBlock): - def __init__(self, rnn_size, time_step, **kwargs): - super(BiLSTM, self).__init__(**kwargs) - self.time_step = time_step - with self.name_scope(): - self.bi_lstm = gluon.rnn.BidirectionalCell( - gluon.rnn.LSTMCell(rnn_size, prefix='rnn_l0_'), - gluon.rnn.LSTMCell(rnn_size, prefix='rnn_r0_'), - output_prefix='lstm_bi_') - - def hybrid_forward(self, F, inputs, valid_len): - outputs, states = self.bi_lstm.unroll(self.time_step, inputs, valid_length=valid_len, - layout='TNC', merge_outputs=True) - return outputs, states - - rnn_size, time_step = 100, 3 - net = BiLSTM(rnn_size, time_step) - net.initialize() - net.hybridize() - inputs_data = mx.nd.random.uniform(shape=(3, 10, 50)) - valid_len = mx.nd.array(range(1, 11)) - outputs, _ = net(inputs_data, valid_len) - assert outputs.shape == (3, 10, 200) - - @assert_raises_cudnn_not_satisfied(min_version='5.1.10') def test_layer_bidirectional(): class RefBiLSTM(gluon.Block): @@ -628,6 +600,34 @@ def test_layer_fill_shape(): assert layer.l0_i2h_weight.shape[1] == 7, layer.l0_i2h_weight.shape[1] +def test_bidirectional_unroll_valid_length(): + # Test BidirectionalCell. + # In 1.3.1 version, after hybridize( ), BidirectionalCell would failed when pass valid_length to unroll( ). + class BiLSTM(gluon.nn.HybridBlock): + def __init__(self, rnn_size, time_step, **kwargs): + super(BiLSTM, self).__init__(**kwargs) + self.time_step = time_step + with self.name_scope(): + self.bi_lstm = gluon.rnn.BidirectionalCell( + gluon.rnn.LSTMCell(rnn_size, prefix='rnn_l0_'), + gluon.rnn.LSTMCell(rnn_size, prefix='rnn_r0_'), + output_prefix='lstm_bi_') + + def hybrid_forward(self, F, inputs, valid_len): + outputs, states = self.bi_lstm.unroll(self.time_step, inputs, valid_length=valid_len, + layout='NTC', merge_outputs=True) + return outputs, states + + rnn_size, time_step = 100, 3 + net = BiLSTM(rnn_size, time_step) + net.initialize() + net.hybridize() + inputs_data = mx.nd.random.uniform(shape=(10, 3, 50)) + valid_len = mx.nd.array([1]*10) + outputs, _ = net(inputs_data, valid_len) + assert outputs.shape == (10, 3, 200) + + if __name__ == '__main__': import nose nose.runmodule()