From bf774382e805d5fc9e55b164c755c160b9963fc9 Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Fri, 8 Sep 2017 10:24:00 +0100 Subject: [PATCH] Updated comments on RemapOutputs --- lstm/fullyconnected.cpp | 8 +++----- lstm/fullyconnected.h | 7 ++----- lstm/lstm.cpp | 7 ++----- lstm/lstm.h | 7 ++----- lstm/lstmtrainer.cpp | 2 +- lstm/lstmtrainer.h | 2 +- lstm/network.h | 16 +++++++++++----- lstm/plumbing.cpp | 7 ++----- lstm/plumbing.h | 7 ++----- lstm/series.cpp | 7 ++----- lstm/series.h | 7 ++----- lstm/weightmatrix.cpp | 5 ++++- lstm/weightmatrix.h | 5 ++++- 13 files changed, 38 insertions(+), 49 deletions(-) diff --git a/lstm/fullyconnected.cpp b/lstm/fullyconnected.cpp index 38d7432f53..9589582cfa 100644 --- a/lstm/fullyconnected.cpp +++ b/lstm/fullyconnected.cpp @@ -84,11 +84,9 @@ int FullyConnected::InitWeights(float range, TRand* randomizer) { return num_weights_; } -// Changes the number of outputs to the size of the given code_map, copying -// the old weight matrix entries for each output from code_map[output] where -// non-negative, and uses the mean (over all outputs) of the existing weights -// for all outputs with negative code_map entries. Returns the new number of -// weights. Only operates on Softmax layers with old_no outputs. +// Recursively searches the network for softmaxes with old_no outputs, +// and remaps their outputs according to code_map. See network.h for details. + int FullyConnected::RemapOutputs(int old_no, const std::vector& code_map) { if (type_ == NT_SOFTMAX && no_ == old_no) { num_weights_ = weights_.RemapOutputs(code_map); diff --git a/lstm/fullyconnected.h b/lstm/fullyconnected.h index a5f830e1ae..d9a13650d0 100644 --- a/lstm/fullyconnected.h +++ b/lstm/fullyconnected.h @@ -68,11 +68,8 @@ class FullyConnected : public Network { // Sets up the network for training. Initializes weights using weights of // scale `range` picked according to the random number generator `randomizer`. virtual int InitWeights(float range, TRand* randomizer); - // Changes the number of outputs to the size of the given code_map, copying - // the old weight matrix entries for each output from code_map[output] where - // non-negative, and uses the mean (over all outputs) of the existing weights - // for all outputs with negative code_map entries. Returns the new number of - // weights. Only operates on Softmax layers with old_no outputs. + // Recursively searches the network for softmaxes with old_no outputs, + // and remaps their outputs according to code_map. See network.h for details. int RemapOutputs(int old_no, const std::vector& code_map) override; // Converts a float network to an int network. diff --git a/lstm/lstm.cpp b/lstm/lstm.cpp index aa1a02b22f..2660f877b9 100644 --- a/lstm/lstm.cpp +++ b/lstm/lstm.cpp @@ -140,11 +140,8 @@ int LSTM::InitWeights(float range, TRand* randomizer) { return num_weights_; } -// Changes the number of outputs to the size of the given code_map, copying -// the old weight matrix entries for each output from code_map[output] where -// non-negative, and uses the mean (over all outputs) of the existing weights -// for all outputs with negative code_map entries. Returns the new number of -// weights. Only operates on Softmax layers with old_no outputs. +// Recursively searches the network for softmaxes with old_no outputs, +// and remaps their outputs according to code_map. See network.h for details. int LSTM::RemapOutputs(int old_no, const std::vector& code_map) { if (softmax_ != NULL) { num_weights_ -= softmax_->num_weights(); diff --git a/lstm/lstm.h b/lstm/lstm.h index 6fde9cb5a0..f0563a4b76 100644 --- a/lstm/lstm.h +++ b/lstm/lstm.h @@ -76,11 +76,8 @@ class LSTM : public Network { // Sets up the network for training. Initializes weights using weights of // scale `range` picked according to the random number generator `randomizer`. virtual int InitWeights(float range, TRand* randomizer); - // Changes the number of outputs to the size of the given code_map, copying - // the old weight matrix entries for each output from code_map[output] where - // non-negative, and uses the mean (over all outputs) of the existing weights - // for all outputs with negative code_map entries. Returns the new number of - // weights. Only operates on Softmax layers with old_no outputs. + // Recursively searches the network for softmaxes with old_no outputs, + // and remaps their outputs according to code_map. See network.h for details. int RemapOutputs(int old_no, const std::vector& code_map) override; // Converts a float network to an int network. diff --git a/lstm/lstmtrainer.cpp b/lstm/lstmtrainer.cpp index 6e67a39ecb..a50a972d0c 100644 --- a/lstm/lstmtrainer.cpp +++ b/lstm/lstmtrainer.cpp @@ -135,7 +135,7 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char* filename, filename == old_traineddata) { return true; // Normal checkpoint load complete. } - tprintf("Code range changed from %d to %d!!\n", network_->NumOutputs(), + tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(), recoder_.code_range()); if (old_traineddata == nullptr || *old_traineddata == '\0') { tprintf("Must supply the old traineddata for code conversion!\n"); diff --git a/lstm/lstmtrainer.h b/lstm/lstmtrainer.h index b7a5ec771a..f4a8b5f4ce 100644 --- a/lstm/lstmtrainer.h +++ b/lstm/lstmtrainer.h @@ -99,7 +99,7 @@ class LSTMTrainer : public LSTMRecognizer { // Tries to deserialize a trainer from the given file and silently returns // false in case of failure. If old_traineddata is not null, then it is - // assumed that the character set is to be re-mapped from old_traininddata to + // assumed that the character set is to be re-mapped from old_traineddata to // the new, with consequent change in weight matrices etc. bool TryLoadingCheckpoint(const char* filename, const char* old_traineddata); diff --git a/lstm/network.h b/lstm/network.h index 03a8f66943..d4635b649f 100644 --- a/lstm/network.h +++ b/lstm/network.h @@ -172,11 +172,17 @@ class Network { // and should not be deleted by any of the networks. // Returns the number of weights initialized. virtual int InitWeights(float range, TRand* randomizer); - // Changes the number of outputs to the size of the given code_map, copying - // the old weight matrix entries for each output from code_map[output] where - // non-negative, and uses the mean (over all outputs) of the existing weights - // for all outputs with negative code_map entries. Returns the new number of - // weights. Only operates on Softmax layers with old_no outputs. + // Changes the number of outputs to the outside world to the size of the given + // code_map. Recursively searches the entire network for Softmax layers that + // have exactly old_no outputs, and operates only on those, leaving all others + // unchanged. This enables networks with multiple output layers to get all + // their softmaxes updated, but if an internal layer, uses one of those + // softmaxes for input, then the inputs will effectively be scrambled. + // TODO(rays) Fix this before any such network is implemented. + // The softmaxes are resized by copying the old weight matrix entries for each + // output from code_map[output] where non-negative, and uses the mean (over + // all outputs) of the existing weights for all outputs with negative code_map + // entries. Returns the new number of weights. virtual int RemapOutputs(int old_no, const std::vector& code_map) { return 0; } diff --git a/lstm/plumbing.cpp b/lstm/plumbing.cpp index 0f3b1fccd0..7e43903fee 100644 --- a/lstm/plumbing.cpp +++ b/lstm/plumbing.cpp @@ -57,11 +57,8 @@ int Plumbing::InitWeights(float range, TRand* randomizer) { return num_weights_; } -// Changes the number of outputs to the size of the given code_map, copying -// the old weight matrix entries for each output from code_map[output] where -// non-negative, and uses the mean (over all outputs) of the existing weights -// for all outputs with negative code_map entries. Returns the new number of -// weights. Only operates on Softmax layers with old_no outputs. +// Recursively searches the network for softmaxes with old_no outputs, +// and remaps their outputs according to code_map. See network.h for details. int Plumbing::RemapOutputs(int old_no, const std::vector& code_map) { num_weights_ = 0; for (int i = 0; i < stack_.size(); ++i) { diff --git a/lstm/plumbing.h b/lstm/plumbing.h index 73b6fe8cc8..6ce995e678 100644 --- a/lstm/plumbing.h +++ b/lstm/plumbing.h @@ -57,11 +57,8 @@ class Plumbing : public Network { // and should not be deleted by any of the networks. // Returns the number of weights initialized. virtual int InitWeights(float range, TRand* randomizer); - // Changes the number of outputs to the size of the given code_map, copying - // the old weight matrix entries for each output from code_map[output] where - // non-negative, and uses the mean (over all outputs) of the existing weights - // for all outputs with negative code_map entries. Returns the new number of - // weights. Only operates on Softmax layers with old_no outputs. + // Recursively searches the network for softmaxes with old_no outputs, + // and remaps their outputs according to code_map. See network.h for details. int RemapOutputs(int old_no, const std::vector& code_map) override; // Converts a float network to an int network. diff --git a/lstm/series.cpp b/lstm/series.cpp index b3a0522a1e..a5e64072e9 100644 --- a/lstm/series.cpp +++ b/lstm/series.cpp @@ -60,11 +60,8 @@ int Series::InitWeights(float range, TRand* randomizer) { return num_weights_; } -// Changes the number of outputs to the size of the given code_map, copying -// the old weight matrix entries for each output from code_map[output] where -// non-negative, and uses the mean (over all outputs) of the existing weights -// for all outputs with negative code_map entries. Returns the new number of -// weights. Only operates on Softmax layers with old_no outputs. +// Recursively searches the network for softmaxes with old_no outputs, +// and remaps their outputs according to code_map. See network.h for details. int Series::RemapOutputs(int old_no, const std::vector& code_map) { num_weights_ = 0; tprintf("Num (Extended) outputs,weights in Series:\n"); diff --git a/lstm/series.h b/lstm/series.h index 2af705ecc7..5b787e5426 100644 --- a/lstm/series.h +++ b/lstm/series.h @@ -46,11 +46,8 @@ class Series : public Plumbing { // scale `range` picked according to the random number generator `randomizer`. // Returns the number of weights initialized. virtual int InitWeights(float range, TRand* randomizer); - // Changes the number of outputs to the size of the given code_map, copying - // the old weight matrix entries for each output from code_map[output] where - // non-negative, and uses the mean (over all outputs) of the existing weights - // for all outputs with negative code_map entries. Returns the new number of - // weights. Only operates on Softmax layers with old_no outputs. + // Recursively searches the network for softmaxes with old_no outputs, + // and remaps their outputs according to code_map. See network.h for details. int RemapOutputs(int old_no, const std::vector& code_map) override; // Sets needs_to_backprop_ to needs_backprop and returns true if diff --git a/lstm/weightmatrix.cpp b/lstm/weightmatrix.cpp index 7d296952b0..ea41413bb8 100644 --- a/lstm/weightmatrix.cpp +++ b/lstm/weightmatrix.cpp @@ -61,7 +61,10 @@ int WeightMatrix::InitWeightsFloat(int no, int ni, bool use_adam, // the old weight matrix entries for each output from code_map[output] where // non-negative, and uses the mean (over all outputs) of the existing weights // for all outputs with negative code_map entries. Returns the new number of -// weights. +// weights. Can be used to change the character set addressed by an output +// softmax. +// TODO(rays) A RemapInputs would also be useful, so a change can be made +// in the middle of a network. int WeightMatrix::RemapOutputs(const std::vector& code_map) { GENERIC_2D_ARRAY old_wf(wf_); int old_no = wf_.dim1(); diff --git a/lstm/weightmatrix.h b/lstm/weightmatrix.h index 4806c5d55c..0805e0b253 100644 --- a/lstm/weightmatrix.h +++ b/lstm/weightmatrix.h @@ -74,7 +74,10 @@ class WeightMatrix { // the old weight matrix entries for each output from code_map[output] where // non-negative, and uses the mean (over all outputs) of the existing weights // for all outputs with negative code_map entries. Returns the new number of - // weights. + // weights. Can be used to change the character set addressed by an output + // softmax. + // TODO(rays) A RemapInputs would also be useful, so a change can be made + // in the middle of a network. int RemapOutputs(const std::vector& code_map); // Converts a float network to an int network. Each set of input weights that