From 26155ac582f225f7a350620ed15b7e744a1e0268 Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Mon, 31 Aug 2020 16:57:04 -0400 Subject: [PATCH 01/48] enable hot-word boosting --- native_client/args.h | 20 +++++++++-- native_client/client.cc | 17 ++++++++++ .../ctcdecode/ctc_beam_search_decoder.cpp | 33 +++++++++++++++++-- .../ctcdecode/ctc_beam_search_decoder.h | 18 +++++++++- native_client/deepspeech.cc | 33 ++++++++++++++++++- native_client/deepspeech.h | 25 ++++++++++++++ native_client/modelstate.h | 2 ++ 7 files changed, 141 insertions(+), 7 deletions(-) diff --git a/native_client/args.h b/native_client/args.h index baa9b7ffa3..fad3923c40 100644 --- a/native_client/args.h +++ b/native_client/args.h @@ -38,6 +38,10 @@ int json_candidate_transcripts = 3; int stream_size = 0; +char* hot_words = NULL; + +float boost_coefficient = 0.f; + void PrintHelp(const char* bin) { std::cout << @@ -57,7 +61,9 @@ void PrintHelp(const char* bin) "\t--candidate_transcripts NUMBER\tNumber of candidate transcripts to include in JSON output\n" "\t--stream size\t\t\tRun in stream mode, output intermediate results\n" "\t--help\t\t\t\tShow help\n" - "\t--version\t\t\tPrint version and exits\n"; + "\t--version\t\t\tPrint version and exits\n" + "\t--hot_words\t\t\tHot words separated by commas\n" + "\t--boost_coefficient\t\t\tThe coefficient to boost the hot_words\n"; char* version = DS_Version(); std::cerr << "DeepSpeech " << version << "\n"; DS_FreeString(version); @@ -66,7 +72,7 @@ void PrintHelp(const char* bin) bool ProcessArgs(int argc, char** argv) { - const char* const short_opts = "m:l:a:b:c:d:tejs:vh"; + const char* const short_opts = "m:l:a:b:c:d:tejs:vh:w:f"; const option long_opts[] = { {"model", required_argument, nullptr, 'm'}, {"scorer", required_argument, nullptr, 'l'}, @@ -79,6 +85,8 @@ bool ProcessArgs(int argc, char** argv) {"json", no_argument, nullptr, 'j'}, {"candidate_transcripts", required_argument, nullptr, 150}, {"stream", required_argument, nullptr, 's'}, + {"hot_words", required_argument, nullptr, 'w'}, + {"boost_coefficient", required_argument, nullptr, 'f'}, {"version", no_argument, nullptr, 'v'}, {"help", no_argument, nullptr, 'h'}, {nullptr, no_argument, nullptr, 0} @@ -144,6 +152,14 @@ bool ProcessArgs(int argc, char** argv) has_versions = true; break; + case 'w': + hot_words = optarg; + break; + + case 'f': + boost_coefficient = atof(optarg); + break; + case 'h': // -h or --help case '?': // Unrecognized option default: diff --git a/native_client/client.cc b/native_client/client.cc index 46a16115c5..71c10ec2d3 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -416,6 +416,23 @@ main(int argc, char **argv) } } + + if (hot_words) { + status = DS_EnableHotWords(ctx, hot_words); + if (status != 0) { + fprintf(stderr, "Could not enable hot words.\n"); + return 1; + } + } + + if (boost_coefficient) { + status = DS_EnableBoostCoefficient(ctx, boost_coefficient); + if (status != 0) { + fprintf(stderr, "Could not set boost coefficient.\n"); + return 1; + } + } + if (scorer) { status = DS_EnableExternalScorer(ctx, scorer); if (status != 0) { diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.cpp b/native_client/ctcdecode/ctc_beam_search_decoder.cpp index 580ee51c67..38ccb14a58 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp +++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include "decoder_utils.h" #include "ThreadPool.h" @@ -18,7 +19,9 @@ DecoderState::init(const Alphabet& alphabet, size_t beam_size, double cutoff_prob, size_t cutoff_top_n, - std::shared_ptr ext_scorer) + std::shared_ptr ext_scorer, + std::set hot_words, + float boost_coefficient) { // assign special ids abs_time_step_ = 0; @@ -29,6 +32,8 @@ DecoderState::init(const Alphabet& alphabet, cutoff_prob_ = cutoff_prob; cutoff_top_n_ = cutoff_top_n; ext_scorer_ = ext_scorer; + hot_words_ = hot_words; + boost_coefficient_ = boost_coefficient; start_expanding_ = false; // init prefixes' root @@ -160,8 +165,24 @@ DecoderState::next(const double *probs, float score = 0.0; std::vector ngram; ngram = ext_scorer_->make_ngram(prefix_to_score); + + // hot_boost == 1.0 == no boost at all + float hot_boost = 1.0; + if (!hot_words_.empty()) { + // increase prob of prefix for every word + // that matches a word in the hot-words list + for (std::string word : ngram) { + if ( hot_words_.find(word) != hot_words_.end() ) { + // increase the log_cond_prob(prefix|LM) + // since the log_cond_prob is negative, we multiply by + // a float <1.0 to increase. + hot_boost *= boost_coefficient_; + } + } + } + bool bos = ngram.size() < ext_scorer_->get_max_order(); - score = ext_scorer_->get_log_cond_prob(ngram, bos) * ext_scorer_->alpha; + score = ( ext_scorer_->get_log_cond_prob(ngram, bos) * hot_boost ) * ext_scorer_->alpha; log_p += score; log_p += ext_scorer_->beta; } @@ -256,11 +277,13 @@ std::vector ctc_beam_search_decoder( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, + std::set hot_words, + float boost_coefficient, size_t num_results) { VALID_CHECK_EQ(alphabet.GetSize()+1, class_dim, "Number of output classes in acoustic model does not match number of labels in the alphabet file. Alphabet file must be the same one that was used to train the acoustic model."); DecoderState state; - state.init(alphabet, beam_size, cutoff_prob, cutoff_top_n, ext_scorer); + state.init(alphabet, beam_size, cutoff_prob, cutoff_top_n, ext_scorer, hot_words, boost_coefficient); state.next(probs, time_dim, class_dim); return state.decode(num_results); } @@ -279,6 +302,8 @@ ctc_beam_search_decoder_batch( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, + std::set hot_words, + float boost_coefficient, size_t num_results) { VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!"); @@ -298,6 +323,8 @@ ctc_beam_search_decoder_batch( cutoff_prob, cutoff_top_n, ext_scorer, + hot_words, + boost_coefficient, num_results)); } diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.h b/native_client/ctcdecode/ctc_beam_search_decoder.h index 65e7497dc6..ef6527abc8 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.h +++ b/native_client/ctcdecode/ctc_beam_search_decoder.h @@ -22,6 +22,8 @@ class DecoderState { std::vector prefixes_; std::unique_ptr prefix_root_; TimestepTreeNode timestep_tree_root_{nullptr, 0}; + std::set hot_words_; + float boost_coefficient_; public: DecoderState() = default; @@ -48,7 +50,9 @@ class DecoderState { size_t beam_size, double cutoff_prob, size_t cutoff_top_n, - std::shared_ptr ext_scorer); + std::shared_ptr ext_scorer, + std::set hot_words, + float boost_coefficient); /* Send data to the decoder * @@ -88,6 +92,10 @@ class DecoderState { * ext_scorer: External scorer to evaluate a prefix, which consists of * n-gram language model scoring and word insertion term. * Default null, decoding the input sample without scorer. + * hot_words: A list of hot-words, which will get their probs boosted + * boost_coefficient: A floating-point number between (0,1). + * This is used to scale the score from the scorer. + * 0.0 == 100% probability (because using neg. logs). * num_results: Number of beams to return. * Return: * A vector where each element is a pair of score and decoding result, @@ -103,6 +111,8 @@ std::vector ctc_beam_search_decoder( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, + std::set hot_words, + float boost_coefficient, size_t num_results=1); /* CTC Beam Search Decoder for batch data @@ -117,6 +127,10 @@ std::vector ctc_beam_search_decoder( * ext_scorer: External scorer to evaluate a prefix, which consists of * n-gram language model scoring and word insertion term. * Default null, decoding the input sample without scorer. + * hot_words: A list of hot-words, which will get their probs boosted + * boost_coefficient: A floating-point number between (0,1). + * This is used to scale the score from the scorer. + * 0.0 == 100% probability (because using neg. logs). * num_results: Number of beams to return. * Return: * A 2-D vector where each element is a vector of beam search decoding @@ -136,6 +150,8 @@ ctc_beam_search_decoder_batch( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, + std::set hot_words, + float boost_coefficient, size_t num_results=1); #endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 38868d4b5f..bb06a63125 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -8,6 +8,7 @@ #include #include #include +#include #include "deepspeech.h" #include "alphabet.h" @@ -342,6 +343,34 @@ DS_EnableExternalScorer(ModelState* aCtx, return DS_ERR_OK; } + +int +DS_EnableHotWords(ModelState* aCtx, + const char* aHotWords) +{ + + std::string hot_words_(aHotWords); + std::size_t last = 0; + std::size_t next = 0; + std::string delim = ","; + while ((next = hot_words_.find(delim, last)) != string::npos) { + aCtx->hot_words_.insert(hot_words_.substr(last, next-last)); + last = next + 1; + } + aCtx->hot_words_.insert(hot_words_.substr(last)); + + return DS_ERR_OK; +} + +int +DS_EnableBoostCoefficient(ModelState* aCtx, + float aBoostCoefficient) +{ + aCtx->boost_coefficient_ = aBoostCoefficient; + + return DS_ERR_OK; +} + int DS_DisableExternalScorer(ModelState* aCtx) { @@ -390,7 +419,9 @@ DS_CreateStream(ModelState* aCtx, aCtx->beam_width_, cutoff_prob, cutoff_top_n, - aCtx->scorer_); + aCtx->scorer_, + aCtx->hot_words_, + aCtx->boost_coefficient_); *retval = ctx.release(); return DS_ERR_OK; diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h index 1df3cf2e43..756852d7e1 100644 --- a/native_client/deepspeech.h +++ b/native_client/deepspeech.h @@ -157,7 +157,32 @@ DEEPSPEECH_EXPORT int DS_EnableExternalScorer(ModelState* aCtx, const char* aScorerPath); + /** + * @brief Enable decoding with hot-word boosting. + * + * @param aCtx The ModelState pointer for the model being changed. + * @param aHotWords A list of hot words. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +DEEPSPEECH_EXPORT +int DS_EnableHotWords(ModelState* aCtx, + const char* aHotWords); + /** + * @brief Set boost co-efficient for hot-words + * + * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * @param aBoostCoefficient A floating point number used to increas the probability + * of a candidate prefix during decoding which contains of of the hot-words + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_EnableBoostCoefficient(ModelState* aCtx, + float aBoostCoefficient); + + /** * @brief Disable decoding using an external scorer. * * @param aCtx The ModelState pointer for the model being changed. diff --git a/native_client/modelstate.h b/native_client/modelstate.h index 0dbe108ae1..b77b60541d 100644 --- a/native_client/modelstate.h +++ b/native_client/modelstate.h @@ -18,6 +18,8 @@ struct ModelState { Alphabet alphabet_; std::shared_ptr scorer_; unsigned int beam_width_; + std::set hot_words_; + float boost_coefficient_; unsigned int n_steps_; unsigned int n_context_; unsigned int n_features_; From 3745a5157a2afd4f3d24cd5756161dc5a4676354 Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Tue, 1 Sep 2020 16:05:41 -0400 Subject: [PATCH 02/48] more consistent ordering of CLI arguments --- native_client/args.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/native_client/args.h b/native_client/args.h index fad3923c40..5a644b637a 100644 --- a/native_client/args.h +++ b/native_client/args.h @@ -60,10 +60,10 @@ void PrintHelp(const char* bin) "\t--json\t\t\t\tExtended output, shows word timings as JSON\n" "\t--candidate_transcripts NUMBER\tNumber of candidate transcripts to include in JSON output\n" "\t--stream size\t\t\tRun in stream mode, output intermediate results\n" - "\t--help\t\t\t\tShow help\n" - "\t--version\t\t\tPrint version and exits\n" "\t--hot_words\t\t\tHot words separated by commas\n" - "\t--boost_coefficient\t\t\tThe coefficient to boost the hot_words\n"; + "\t--boost_coefficient\t\t\tThe coefficient to boost the hot_words\n" + "\t--help\t\t\t\tShow help\n" + "\t--version\t\t\tPrint version and exits\n"; char* version = DS_Version(); std::cerr << "DeepSpeech " << version << "\n"; DS_FreeString(version); @@ -72,7 +72,7 @@ void PrintHelp(const char* bin) bool ProcessArgs(int argc, char** argv) { - const char* const short_opts = "m:l:a:b:c:d:tejs:vh:w:f"; + const char* const short_opts = "m:l:a:b:c:d:tejs:w:f:vh"; const option long_opts[] = { {"model", required_argument, nullptr, 'm'}, {"scorer", required_argument, nullptr, 'l'}, From 9a3be21c5985a303d7fec04076f23bb448a526eb Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 2 Sep 2020 07:40:12 -0400 Subject: [PATCH 03/48] progress on review --- native_client/deepspeech.cc | 6 +++--- native_client/deepspeech.h | 25 +++++++------------------ 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index bb06a63125..db890da714 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -345,10 +345,10 @@ DS_EnableExternalScorer(ModelState* aCtx, int -DS_EnableHotWords(ModelState* aCtx, - const char* aHotWords) +DS_AddHotWord(ModelState* aCtx, + const char* word, + float boostCoefficient) { - std::string hot_words_(aHotWords); std::size_t last = 0; std::size_t next = 0; diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h index 756852d7e1..382fb85199 100644 --- a/native_client/deepspeech.h +++ b/native_client/deepspeech.h @@ -157,32 +157,21 @@ DEEPSPEECH_EXPORT int DS_EnableExternalScorer(ModelState* aCtx, const char* aScorerPath); - /** - * @brief Enable decoding with hot-word boosting. +/** + * @brief Add a hot-word and its boosting coefficient. * * @param aCtx The ModelState pointer for the model being changed. - * @param aHotWords A list of hot words. + * @param word The hot word. + * @param boostCoefficient the boosting coefficient. * * @return Zero on success, non-zero on failure (invalid arguments). */ DEEPSPEECH_EXPORT -int DS_EnableHotWords(ModelState* aCtx, - const char* aHotWords); +int DS_AddHotWord(ModelState* aCtx, + const char* word, + float boostCoefficient); /** - * @brief Set boost co-efficient for hot-words - * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. - * @param aBoostCoefficient A floating point number used to increas the probability - * of a candidate prefix during decoding which contains of of the hot-words - * - * @return Zero on success, non-zero on failure. - */ -DEEPSPEECH_EXPORT -int DS_EnableBoostCoefficient(ModelState* aCtx, - float aBoostCoefficient); - - /** * @brief Disable decoding using an external scorer. * * @param aCtx The ModelState pointer for the model being changed. From 2b44f74951135895a6fb5f9cb3278466016b4f7d Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 2 Sep 2020 15:20:00 -0400 Subject: [PATCH 04/48] use map instead of set for hot-words, move string logic to client.cc --- native_client/client.cc | 34 ++++++++++++------- .../ctcdecode/ctc_beam_search_decoder.cpp | 23 ++++++------- .../ctcdecode/ctc_beam_search_decoder.h | 26 +++++++------- native_client/deepspeech.cc | 23 ++----------- native_client/modelstate.h | 3 +- 5 files changed, 47 insertions(+), 62 deletions(-) diff --git a/native_client/client.cc b/native_client/client.cc index 71c10ec2d3..d0dea7f5b9 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -390,6 +390,18 @@ ProcessFile(ModelState* context, const char* path, bool show_times) } } +std::vector +splitString(std::string in_string, std::string delim) +{ + std::vector out_vector; + std::size_t last = 0; + std::size_t next = 0; + while ((next = in_string.find(delim, last)) != string::npos) { + out_vector.push_back(in_string.substr(last, next-last)); + } + return out_vector; +} + int main(int argc, char **argv) { @@ -418,18 +430,16 @@ main(int argc, char **argv) if (hot_words) { - status = DS_EnableHotWords(ctx, hot_words); - if (status != 0) { - fprintf(stderr, "Could not enable hot words.\n"); - return 1; - } - } - - if (boost_coefficient) { - status = DS_EnableBoostCoefficient(ctx, boost_coefficient); - if (status != 0) { - fprintf(stderr, "Could not set boost coefficient.\n"); - return 1; + std::vector hot_words_ = splitString(hot_words, ","); + for(string hot_word_ : hot_words_){ + std::vector pair_ = splitString(hot_word_, ":"); + std::string word = pair_[0]; + float boost_coefficient = pair_[1]; + status = DS_AddHotWord(ctx, word, boost_coefficient); + if (status != 0) { + fprintf(stderr, "Could not enable hot words.\n"); + return 1; + } } } diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.cpp b/native_client/ctcdecode/ctc_beam_search_decoder.cpp index 38ccb14a58..eef31c8ede 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp +++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include "decoder_utils.h" #include "ThreadPool.h" @@ -20,8 +19,7 @@ DecoderState::init(const Alphabet& alphabet, double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::set hot_words, - float boost_coefficient) + std::map hot_words) { // assign special ids abs_time_step_ = 0; @@ -33,7 +31,6 @@ DecoderState::init(const Alphabet& alphabet, cutoff_top_n_ = cutoff_top_n; ext_scorer_ = ext_scorer; hot_words_ = hot_words; - boost_coefficient_ = boost_coefficient; start_expanding_ = false; // init prefixes' root @@ -166,17 +163,20 @@ DecoderState::next(const double *probs, std::vector ngram; ngram = ext_scorer_->make_ngram(prefix_to_score); - // hot_boost == 1.0 == no boost at all + // when hot_boost is 1.0, there is no boost at all float hot_boost = 1.0; if (!hot_words_.empty()) { + std::map::iterator iter; // increase prob of prefix for every word // that matches a word in the hot-words list for (std::string word : ngram) { - if ( hot_words_.find(word) != hot_words_.end() ) { + iter = hot_words_.find(word); + if ( iter != hot_words_.end() ) { // increase the log_cond_prob(prefix|LM) // since the log_cond_prob is negative, we multiply by // a float <1.0 to increase. - hot_boost *= boost_coefficient_; + float boost iter->second; + hot_boost *= boost } } } @@ -277,13 +277,12 @@ std::vector ctc_beam_search_decoder( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::set hot_words, - float boost_coefficient, + std::map hot_words, size_t num_results) { VALID_CHECK_EQ(alphabet.GetSize()+1, class_dim, "Number of output classes in acoustic model does not match number of labels in the alphabet file. Alphabet file must be the same one that was used to train the acoustic model."); DecoderState state; - state.init(alphabet, beam_size, cutoff_prob, cutoff_top_n, ext_scorer, hot_words, boost_coefficient); + state.init(alphabet, beam_size, cutoff_prob, cutoff_top_n, ext_scorer, hot_words); state.next(probs, time_dim, class_dim); return state.decode(num_results); } @@ -302,8 +301,7 @@ ctc_beam_search_decoder_batch( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::set hot_words, - float boost_coefficient, + std::map hot_words, size_t num_results) { VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!"); @@ -324,7 +322,6 @@ ctc_beam_search_decoder_batch( cutoff_top_n, ext_scorer, hot_words, - boost_coefficient, num_results)); } diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.h b/native_client/ctcdecode/ctc_beam_search_decoder.h index ef6527abc8..f4e7d56bc5 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.h +++ b/native_client/ctcdecode/ctc_beam_search_decoder.h @@ -24,6 +24,7 @@ class DecoderState { TimestepTreeNode timestep_tree_root_{nullptr, 0}; std::set hot_words_; float boost_coefficient_; + std::map hot_words_; public: DecoderState() = default; @@ -51,8 +52,7 @@ class DecoderState { double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::set hot_words, - float boost_coefficient); + std::map hot_words); /* Send data to the decoder * @@ -92,10 +92,10 @@ class DecoderState { * ext_scorer: External scorer to evaluate a prefix, which consists of * n-gram language model scoring and word insertion term. * Default null, decoding the input sample without scorer. - * hot_words: A list of hot-words, which will get their probs boosted - * boost_coefficient: A floating-point number between (0,1). - * This is used to scale the score from the scorer. - * 0.0 == 100% probability (because using neg. logs). + * hot_words: A map of hot-words and their corresponding boost co-efficient + * The hot-word is a string and the boost coefficient is a + * floating-point number between (0,1). The boost is used + * to scale the score from the scorer. * num_results: Number of beams to return. * Return: * A vector where each element is a pair of score and decoding result, @@ -111,8 +111,7 @@ std::vector ctc_beam_search_decoder( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::set hot_words, - float boost_coefficient, + std::map hot_words, size_t num_results=1); /* CTC Beam Search Decoder for batch data @@ -127,10 +126,10 @@ std::vector ctc_beam_search_decoder( * ext_scorer: External scorer to evaluate a prefix, which consists of * n-gram language model scoring and word insertion term. * Default null, decoding the input sample without scorer. - * hot_words: A list of hot-words, which will get their probs boosted - * boost_coefficient: A floating-point number between (0,1). - * This is used to scale the score from the scorer. - * 0.0 == 100% probability (because using neg. logs). + * hot_words: A map of hot-words and their corresponding boost co-efficient + * The hot-word is a string and the boost coefficient is a + * floating-point number between (0,1). The boost is used + * to scale the score from the scorer. * num_results: Number of beams to return. * Return: * A 2-D vector where each element is a vector of beam search decoding @@ -150,8 +149,7 @@ ctc_beam_search_decoder_batch( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::set hot_words, - float boost_coefficient, + std::set hot_words, size_t num_results=1); #endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index db890da714..e30823a101 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -349,25 +349,7 @@ DS_AddHotWord(ModelState* aCtx, const char* word, float boostCoefficient) { - std::string hot_words_(aHotWords); - std::size_t last = 0; - std::size_t next = 0; - std::string delim = ","; - while ((next = hot_words_.find(delim, last)) != string::npos) { - aCtx->hot_words_.insert(hot_words_.substr(last, next-last)); - last = next + 1; - } - aCtx->hot_words_.insert(hot_words_.substr(last)); - - return DS_ERR_OK; -} - -int -DS_EnableBoostCoefficient(ModelState* aCtx, - float aBoostCoefficient) -{ - aCtx->boost_coefficient_ = aBoostCoefficient; - + aCtx->hot_words_.insert(word, boostCoefficient); return DS_ERR_OK; } @@ -420,8 +402,7 @@ DS_CreateStream(ModelState* aCtx, cutoff_prob, cutoff_top_n, aCtx->scorer_, - aCtx->hot_words_, - aCtx->boost_coefficient_); + aCtx->hot_words_); *retval = ctx.release(); return DS_ERR_OK; diff --git a/native_client/modelstate.h b/native_client/modelstate.h index b77b60541d..88d7038a86 100644 --- a/native_client/modelstate.h +++ b/native_client/modelstate.h @@ -18,8 +18,7 @@ struct ModelState { Alphabet alphabet_; std::shared_ptr scorer_; unsigned int beam_width_; - std::set hot_words_; - float boost_coefficient_; + std::map hot_words_; unsigned int n_steps_; unsigned int n_context_; unsigned int n_features_; From 42f1f3af2182667f9f2afc2cad3b683a7930f4a2 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 3 Sep 2020 10:37:29 -0700 Subject: [PATCH 05/48] typo bug --- native_client/ctcdecode/ctc_beam_search_decoder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.cpp b/native_client/ctcdecode/ctc_beam_search_decoder.cpp index eef31c8ede..439ab4d5d1 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp +++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp @@ -175,7 +175,7 @@ DecoderState::next(const double *probs, // increase the log_cond_prob(prefix|LM) // since the log_cond_prob is negative, we multiply by // a float <1.0 to increase. - float boost iter->second; + float boost = iter->second; hot_boost *= boost } } From 103ee935b99283295c77db36e184070225e1dd61 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 3 Sep 2020 10:57:31 -0700 Subject: [PATCH 06/48] pointer things? --- native_client/deepspeech.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index e30823a101..235eca2a59 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -349,7 +349,7 @@ DS_AddHotWord(ModelState* aCtx, const char* word, float boostCoefficient) { - aCtx->hot_words_.insert(word, boostCoefficient); + aCtx->hot_words_->insert(word, boostCoefficient); return DS_ERR_OK; } From 81157f5e1b887c18f8b5f54c450d21170f9771c0 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Wed, 9 Sep 2020 17:13:31 -0700 Subject: [PATCH 07/48] use map for hotwords, better string splitting --- native_client/args.h | 10 +------ native_client/client.cc | 27 ++++++++++++------- .../ctcdecode/ctc_beam_search_decoder.cpp | 8 +++--- native_client/deepspeech.cc | 2 +- 4 files changed, 23 insertions(+), 24 deletions(-) diff --git a/native_client/args.h b/native_client/args.h index 5a644b637a..75cade2b97 100644 --- a/native_client/args.h +++ b/native_client/args.h @@ -40,8 +40,6 @@ int stream_size = 0; char* hot_words = NULL; -float boost_coefficient = 0.f; - void PrintHelp(const char* bin) { std::cout << @@ -61,7 +59,6 @@ void PrintHelp(const char* bin) "\t--candidate_transcripts NUMBER\tNumber of candidate transcripts to include in JSON output\n" "\t--stream size\t\t\tRun in stream mode, output intermediate results\n" "\t--hot_words\t\t\tHot words separated by commas\n" - "\t--boost_coefficient\t\t\tThe coefficient to boost the hot_words\n" "\t--help\t\t\t\tShow help\n" "\t--version\t\t\tPrint version and exits\n"; char* version = DS_Version(); @@ -72,7 +69,7 @@ void PrintHelp(const char* bin) bool ProcessArgs(int argc, char** argv) { - const char* const short_opts = "m:l:a:b:c:d:tejs:w:f:vh"; + const char* const short_opts = "m:l:a:b:c:d:tejs:w:vh"; const option long_opts[] = { {"model", required_argument, nullptr, 'm'}, {"scorer", required_argument, nullptr, 'l'}, @@ -86,7 +83,6 @@ bool ProcessArgs(int argc, char** argv) {"candidate_transcripts", required_argument, nullptr, 150}, {"stream", required_argument, nullptr, 's'}, {"hot_words", required_argument, nullptr, 'w'}, - {"boost_coefficient", required_argument, nullptr, 'f'}, {"version", no_argument, nullptr, 'v'}, {"help", no_argument, nullptr, 'h'}, {nullptr, no_argument, nullptr, 0} @@ -156,10 +152,6 @@ bool ProcessArgs(int argc, char** argv) hot_words = optarg; break; - case 'f': - boost_coefficient = atof(optarg); - break; - case 'h': // -h or --help case '?': // Unrecognized option default: diff --git a/native_client/client.cc b/native_client/client.cc index d0dea7f5b9..244d4c59af 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -391,14 +391,20 @@ ProcessFile(ModelState* context, const char* path, bool show_times) } std::vector -splitString(std::string in_string, std::string delim) +SplitStringOnDelim(std::string in_string, std::string delim) { std::vector out_vector; - std::size_t last = 0; - std::size_t next = 0; - while ((next = in_string.find(delim, last)) != string::npos) { - out_vector.push_back(in_string.substr(last, next-last)); + char * tmp_str = new char[in_string.size() + 1]; + std::copy(in_string.begin(), in_string.end(), tmp_str); + tmp_str[in_string.size()] = '\0'; + const char* token; + token = strtok(tmp_str, delim.c_str()); + // out_vector.push_back(token); + while( token != NULL ) { + out_vector.push_back(token); + token = strtok(NULL, delim.c_str()); } + delete[] tmp_str; return out_vector; } @@ -430,11 +436,12 @@ main(int argc, char **argv) if (hot_words) { - std::vector hot_words_ = splitString(hot_words, ","); - for(string hot_word_ : hot_words_){ - std::vector pair_ = splitString(hot_word_, ":"); - std::string word = pair_[0]; - float boost_coefficient = pair_[1]; + std::vector hot_words_ = SplitStringOnDelim(hot_words, ","); + for ( std::string hot_word_ : hot_words_ ) { + std::vector pair_ = SplitStringOnDelim(hot_word_, ":"); + const char* word = (pair_[0]).c_str(); + const char* boost = (pair_[1]).c_str(); + float boost_coefficient = strtof(boost,0); status = DS_AddHotWord(ctx, word, boost_coefficient); if (status != 0) { fprintf(stderr, "Could not enable hot words.\n"); diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.cpp b/native_client/ctcdecode/ctc_beam_search_decoder.cpp index 439ab4d5d1..8b80d29a82 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp +++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp @@ -166,17 +166,17 @@ DecoderState::next(const double *probs, // when hot_boost is 1.0, there is no boost at all float hot_boost = 1.0; if (!hot_words_.empty()) { - std::map::iterator iter; + std::map::iterator iter; // increase prob of prefix for every word // that matches a word in the hot-words list for (std::string word : ngram) { - iter = hot_words_.find(word); + iter = hot_words_.find(word); if ( iter != hot_words_.end() ) { // increase the log_cond_prob(prefix|LM) // since the log_cond_prob is negative, we multiply by // a float <1.0 to increase. - float boost = iter->second; - hot_boost *= boost + float boost = iter->second; + hot_boost *= boost; } } } diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 235eca2a59..adb46f63b0 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -349,7 +349,7 @@ DS_AddHotWord(ModelState* aCtx, const char* word, float boostCoefficient) { - aCtx->hot_words_->insert(word, boostCoefficient); + aCtx->hot_words_.insert( std::pair (word, boostCoefficient) ); return DS_ERR_OK; } From ba809434d42289cf86045fc1019d54dec53f0ea6 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Wed, 9 Sep 2020 17:48:51 -0700 Subject: [PATCH 08/48] add the boost, not multiply --- native_client/client.cc | 3 +-- native_client/ctcdecode/ctc_beam_search_decoder.cpp | 10 +++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/native_client/client.cc b/native_client/client.cc index 244d4c59af..57c0d62b76 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -440,8 +440,7 @@ main(int argc, char **argv) for ( std::string hot_word_ : hot_words_ ) { std::vector pair_ = SplitStringOnDelim(hot_word_, ":"); const char* word = (pair_[0]).c_str(); - const char* boost = (pair_[1]).c_str(); - float boost_coefficient = strtof(boost,0); + float boost_coefficient = strtof((pair_[1]).c_str(),0); status = DS_AddHotWord(ctx, word, boost_coefficient); if (status != 0) { fprintf(stderr, "Could not enable hot words.\n"); diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.cpp b/native_client/ctcdecode/ctc_beam_search_decoder.cpp index 8b80d29a82..be0e303e8c 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp +++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp @@ -163,8 +163,7 @@ DecoderState::next(const double *probs, std::vector ngram; ngram = ext_scorer_->make_ngram(prefix_to_score); - // when hot_boost is 1.0, there is no boost at all - float hot_boost = 1.0; + float hot_boost = 0.0; if (!hot_words_.empty()) { std::map::iterator iter; // increase prob of prefix for every word @@ -173,16 +172,13 @@ DecoderState::next(const double *probs, iter = hot_words_.find(word); if ( iter != hot_words_.end() ) { // increase the log_cond_prob(prefix|LM) - // since the log_cond_prob is negative, we multiply by - // a float <1.0 to increase. - float boost = iter->second; - hot_boost *= boost; + hot_boost += iter->second; } } } bool bos = ngram.size() < ext_scorer_->get_max_order(); - score = ( ext_scorer_->get_log_cond_prob(ngram, bos) * hot_boost ) * ext_scorer_->alpha; + score = ( ext_scorer_->get_log_cond_prob(ngram, bos) + hot_boost ) * ext_scorer_->alpha; log_p += score; log_p += ext_scorer_->beta; } From ff74bd5b66a769ebd244bed5e7948b4a9d132174 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Wed, 9 Sep 2020 17:59:58 -0700 Subject: [PATCH 09/48] cleaning up --- native_client/client.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/native_client/client.cc b/native_client/client.cc index 57c0d62b76..d79a17c6c8 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -397,9 +397,7 @@ SplitStringOnDelim(std::string in_string, std::string delim) char * tmp_str = new char[in_string.size() + 1]; std::copy(in_string.begin(), in_string.end(), tmp_str); tmp_str[in_string.size()] = '\0'; - const char* token; - token = strtok(tmp_str, delim.c_str()); - // out_vector.push_back(token); + const char* token = strtok(tmp_str, delim.c_str()); while( token != NULL ) { out_vector.push_back(token); token = strtok(NULL, delim.c_str()); @@ -443,8 +441,8 @@ main(int argc, char **argv) float boost_coefficient = strtof((pair_[1]).c_str(),0); status = DS_AddHotWord(ctx, word, boost_coefficient); if (status != 0) { - fprintf(stderr, "Could not enable hot words.\n"); - return 1; + fprintf(stderr, "Could not enable hot words.\n"); + return 1; } } } From b997babc6df06428c1b1424491d7dee547faf2b1 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Wed, 9 Sep 2020 18:22:49 -0700 Subject: [PATCH 10/48] cleaning whitespace --- native_client/client.cc | 1 - native_client/ctcdecode/ctc_beam_search_decoder.h | 6 ++---- native_client/deepspeech.cc | 1 - 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/native_client/client.cc b/native_client/client.cc index d79a17c6c8..85b922c3e7 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -432,7 +432,6 @@ main(int argc, char **argv) } } - if (hot_words) { std::vector hot_words_ = SplitStringOnDelim(hot_words, ","); for ( std::string hot_word_ : hot_words_ ) { diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.h b/native_client/ctcdecode/ctc_beam_search_decoder.h index f4e7d56bc5..d2e36698fa 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.h +++ b/native_client/ctcdecode/ctc_beam_search_decoder.h @@ -94,8 +94,7 @@ class DecoderState { * Default null, decoding the input sample without scorer. * hot_words: A map of hot-words and their corresponding boost co-efficient * The hot-word is a string and the boost coefficient is a - * floating-point number between (0,1). The boost is used - * to scale the score from the scorer. + * floating-point number. * num_results: Number of beams to return. * Return: * A vector where each element is a pair of score and decoding result, @@ -128,8 +127,7 @@ std::vector ctc_beam_search_decoder( * Default null, decoding the input sample without scorer. * hot_words: A map of hot-words and their corresponding boost co-efficient * The hot-word is a string and the boost coefficient is a - * floating-point number between (0,1). The boost is used - * to scale the score from the scorer. + * floating-point number. * num_results: Number of beams to return. * Return: * A 2-D vector where each element is a vector of beam search decoding diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index adb46f63b0..25a4ebea3b 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -343,7 +343,6 @@ DS_EnableExternalScorer(ModelState* aCtx, return DS_ERR_OK; } - int DS_AddHotWord(ModelState* aCtx, const char* word, From 6fbad1679690f6952b7af7c829e866b0ce421987 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 10 Sep 2020 10:25:31 -0700 Subject: [PATCH 11/48] remove inclusion --- native_client/deepspeech.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 25a4ebea3b..0aec13d8b0 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -8,7 +8,6 @@ #include #include #include -#include #include "deepspeech.h" #include "alphabet.h" From 96cd43ddc48a55846011d125d5e15cc78ab63249 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 10 Sep 2020 10:47:26 -0700 Subject: [PATCH 12/48] change typo set-->map --- native_client/ctcdecode/ctc_beam_search_decoder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.h b/native_client/ctcdecode/ctc_beam_search_decoder.h index d2e36698fa..9745205a1c 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.h +++ b/native_client/ctcdecode/ctc_beam_search_decoder.h @@ -147,7 +147,7 @@ ctc_beam_search_decoder_batch( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::set hot_words, + std::map hot_words, size_t num_results=1); #endif // CTC_BEAM_SEARCH_DECODER_H_ From d3a53784b672764a8c17a1103b8b297950dfe9d9 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 10 Sep 2020 11:08:31 -0700 Subject: [PATCH 13/48] rename boost_coefficient to boost X-DeepSpeech: NOBUILD --- native_client/client.cc | 4 ++-- native_client/ctcdecode/ctc_beam_search_decoder.h | 10 ++++------ native_client/deepspeech.cc | 4 ++-- native_client/deepspeech.h | 6 +++--- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/native_client/client.cc b/native_client/client.cc index 85b922c3e7..d0b23d8fd9 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -437,8 +437,8 @@ main(int argc, char **argv) for ( std::string hot_word_ : hot_words_ ) { std::vector pair_ = SplitStringOnDelim(hot_word_, ":"); const char* word = (pair_[0]).c_str(); - float boost_coefficient = strtof((pair_[1]).c_str(),0); - status = DS_AddHotWord(ctx, word, boost_coefficient); + float boost = strtof((pair_[1]).c_str(),0); + status = DS_AddHotWord(ctx, word, boost); if (status != 0) { fprintf(stderr, "Could not enable hot words.\n"); return 1; diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.h b/native_client/ctcdecode/ctc_beam_search_decoder.h index 9745205a1c..d2530779bf 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.h +++ b/native_client/ctcdecode/ctc_beam_search_decoder.h @@ -92,9 +92,8 @@ class DecoderState { * ext_scorer: External scorer to evaluate a prefix, which consists of * n-gram language model scoring and word insertion term. * Default null, decoding the input sample without scorer. - * hot_words: A map of hot-words and their corresponding boost co-efficient - * The hot-word is a string and the boost coefficient is a - * floating-point number. + * hot_words: A map of hot-words and their corresponding boosts + * The hot-word is a string and the boost is a float. * num_results: Number of beams to return. * Return: * A vector where each element is a pair of score and decoding result, @@ -125,9 +124,8 @@ std::vector ctc_beam_search_decoder( * ext_scorer: External scorer to evaluate a prefix, which consists of * n-gram language model scoring and word insertion term. * Default null, decoding the input sample without scorer. - * hot_words: A map of hot-words and their corresponding boost co-efficient - * The hot-word is a string and the boost coefficient is a - * floating-point number. + * hot_words: A map of hot-words and their corresponding boosts + * The hot-word is a string and the boost is a float. * num_results: Number of beams to return. * Return: * A 2-D vector where each element is a vector of beam search decoding diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 0aec13d8b0..888133865b 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -345,9 +345,9 @@ DS_EnableExternalScorer(ModelState* aCtx, int DS_AddHotWord(ModelState* aCtx, const char* word, - float boostCoefficient) + float boost) { - aCtx->hot_words_.insert( std::pair (word, boostCoefficient) ); + aCtx->hot_words_.insert( std::pair (word, boost) ); return DS_ERR_OK; } diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h index 382fb85199..2f70316ff5 100644 --- a/native_client/deepspeech.h +++ b/native_client/deepspeech.h @@ -158,18 +158,18 @@ int DS_EnableExternalScorer(ModelState* aCtx, const char* aScorerPath); /** - * @brief Add a hot-word and its boosting coefficient. + * @brief Add a hot-word and its boost. * * @param aCtx The ModelState pointer for the model being changed. * @param word The hot word. - * @param boostCoefficient the boosting coefficient. + * @param boost The additive boost. * * @return Zero on success, non-zero on failure (invalid arguments). */ DEEPSPEECH_EXPORT int DS_AddHotWord(ModelState* aCtx, const char* word, - float boostCoefficient); + float boost); /** * @brief Disable decoding using an external scorer. From cdf44aab765e973160ffdea4787960bb498f7074 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 10 Sep 2020 11:54:32 -0700 Subject: [PATCH 14/48] add hot_words to python bindings --- native_client/ctcdecode/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index fd897b3bd0..77bfd46873 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -96,6 +96,7 @@ def ctc_beam_search_decoder(probs_seq, cutoff_prob=1.0, cutoff_top_n=40, scorer=None, + hot_words=None, num_results=1): """Wrapper for the CTC Beam Search Decoder. @@ -116,6 +117,8 @@ def ctc_beam_search_decoder(probs_seq, :param scorer: External scorer for partially decoded sentence, e.g. word count or language model. :type scorer: Scorer + :param hot_words: Map of words (keys) to their assigned boosts (values) + :type hot_words: map{string:float} :param num_results: Number of beams to return. :type num_results: int :return: List of tuples of confidence and sentence as decoding @@ -137,6 +140,7 @@ def ctc_beam_search_decoder_batch(probs_seq, cutoff_prob=1.0, cutoff_top_n=40, scorer=None, + hot_words=None, num_results=1): """Wrapper for the batched CTC beam search decoder. @@ -161,13 +165,15 @@ def ctc_beam_search_decoder_batch(probs_seq, :param scorer: External scorer for partially decoded sentence, e.g. word count or language model. :type scorer: Scorer + :param hot_words: Map of words (keys) to their assigned boosts (values) + :type hot_words: map{string:float} :param num_results: Number of beams to return. :type num_results: int :return: List of tuples of confidence and sentence as decoding results, in descending order of the confidence. :rtype: list """ - batch_beam_results = swigwrapper.ctc_beam_search_decoder_batch(probs_seq, seq_lengths, alphabet, beam_size, num_processes, cutoff_prob, cutoff_top_n, scorer, num_results) + batch_beam_results = swigwrapper.ctc_beam_search_decoder_batch(probs_seq, seq_lengths, alphabet, beam_size, num_processes, cutoff_prob, cutoff_top_n, scorer, hot_words, num_results) batch_beam_results = [ [(res.confidence, alphabet.Decode(res.tokens)) for res in beam_results] for beam_results in batch_beam_results From d8a779d4d8a894cdbed154f42abfc7f936c3c727 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 10 Sep 2020 12:22:56 -0700 Subject: [PATCH 15/48] missing hot_words --- native_client/ctcdecode/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index 77bfd46873..329ef54064 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -127,7 +127,7 @@ def ctc_beam_search_decoder(probs_seq, """ beam_results = swigwrapper.ctc_beam_search_decoder( probs_seq, alphabet, beam_size, cutoff_prob, cutoff_top_n, - scorer, num_results) + scorer, hot_words, num_results) beam_results = [(res.confidence, alphabet.Decode(res.tokens)) for res in beam_results] return beam_results From b047db2ea27ed891b0849867d70b07e193e21bb0 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 10 Sep 2020 13:10:12 -0700 Subject: [PATCH 16/48] include map in swigwrapper.i --- native_client/ctcdecode/swigwrapper.i | 1 + 1 file changed, 1 insertion(+) diff --git a/native_client/ctcdecode/swigwrapper.i b/native_client/ctcdecode/swigwrapper.i index dbe67c689c..1c754597ad 100644 --- a/native_client/ctcdecode/swigwrapper.i +++ b/native_client/ctcdecode/swigwrapper.i @@ -11,6 +11,7 @@ %include %include %include +%include %include "numpy.i" %init %{ From 9e8ff99ab70e68a4a2b702b1e779ca1ac492f714 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 10 Sep 2020 14:10:00 -0700 Subject: [PATCH 17/48] add Map template to swigwrapper.i --- .../ctcdecode/#ctc_beam_search_decoder.h# | 148 ++++++++++++++++++ native_client/ctcdecode/swigwrapper.i | 1 + 2 files changed, 149 insertions(+) create mode 100644 native_client/ctcdecode/#ctc_beam_search_decoder.h# diff --git a/native_client/ctcdecode/#ctc_beam_search_decoder.h# b/native_client/ctcdecode/#ctc_beam_search_decoder.h# new file mode 100644 index 0000000000..e80fcda52b --- /dev/null +++ b/native_client/ctcdecode/#ctc_beam_search_decoder.h# @@ -0,0 +1,148 @@ +#ifndef CTC_BEAM_SEARCH_DECODER_H_ +#define CTC_BEAM_SEARCH_DECODER_H_ + +#include +#include +#include + +#include "scorer.h" +#include "output.h" +#include "alphabet.h" + +class DecoderState { + int abs_time_step_; + int space_id_; + int blank_id_; + size_t beam_size_; + double cutoff_prob_; + size_t cutoff_top_n_; + bool start_expanding_; + + std::shared_ptr ext_scorer_; + std::vector prefixes_; + std::unique_ptr prefix_root_; + std::map hot_words_; + +public: + DecoderState() = default; + ~DecoderState() = default; + + // Disallow copying + DecoderState(const DecoderState&) = delete; + DecoderState& operator=(DecoderState&) = delete; + + /* Initialize CTC beam search decoder + * + * Parameters: + * alphabet: The alphabet. + * beam_size: The width of beam search. + * cutoff_prob: Cutoff probability for pruning. + * cutoff_top_n: Cutoff number for pruning. + * ext_scorer: External scorer to evaluate a prefix, which consists of + * n-gram language model scoring and word insertion term. + * Default null, decoding the input sample without scorer. + * Return: + * Zero on success, non-zero on failure. + */ + int init(const Alphabet& alphabet, + size_t beam_size, + double cutoff_prob, + size_t cutoff_top_n, + std::shared_ptr ext_scorer, + std::map hot_words); + + /* Send data to the decoder + * + * Parameters: + * probs: 2-D vector where each element is a vector of probabilities + * over alphabet of one time step. + * time_dim: Number of timesteps. + * class_dim: Number of classes (alphabet length + 1 for space character). + */ + void next(const double *probs, + int time_dim, + int class_dim); + + /* Get up to num_results transcriptions from current decoder state. + * + * Parameters: + * num_results: Number of beams to return. + * + * Return: + * A vector where each element is a pair of score and decoding result, + * in descending order. + */ + std::vector decode(size_t num_results=1) const; +}; + + +/* CTC Beam Search Decoder + * Parameters: + * probs: 2-D vector where each element is a vector of probabilities + * over alphabet of one time step. + * time_dim: Number of timesteps. + * class_dim: Alphabet length (plus 1 for space character). + * alphabet: The alphabet. + * beam_size: The width of beam search. + * cutoff_prob: Cutoff probability for pruning. + * cutoff_top_n: Cutoff number for pruning. + * ext_scorer: External scorer to evaluate a prefix, which consists of + * n-gram language model scoring and word insertion term. + * Default null, decoding the input sample without scorer. + * hot_words: A map of hot-words and their corresponding boosts + * The hot-word is a string and the boost is a float. + * num_results: Number of beams to return. + * Return: + * A vector where each element is a pair of score and decoding result, + * in descending order. +*/ + +std::vector ctc_beam_search_decoder( + const double* probs, + int time_dim, + int class_dim, + const Alphabet &alphabet, + size_t beam_size, + double cutoff_prob, + size_t cutoff_top_n, + std::shared_ptr ext_scorer, + std::map hot_words, + size_t num_results=1); + +/* CTC Beam Search Decoder for batch data + * Parameters: + * probs: 3-D vector where each element is a 2-D vector that can be used + * by ctc_beam_search_decoder(). + * alphabet: The alphabet. + * beam_size: The width of beam search. + * num_processes: Number of threads for beam search. + * cutoff_prob: Cutoff probability for pruning. + * cutoff_top_n: Cutoff number for pruning. + * ext_scorer: External scorer to evaluate a prefix, which consists of + * n-gram language model scoring and word insertion term. + * Default null, decoding the input sample without scorer. + * hot_words: A map of hot-words and their corresponding boosts + * The hot-word is a string and the boost is a float. + * num_results: Number of beams to return. + * Return: + * A 2-D vector where each element is a vector of beam search decoding + * result for one audio sample. +*/ +std::vector> +ctc_beam_search_decoder_batch( + const double* probs, + int batch_size, + int time_dim, + int class_dim, + const int* seq_lengths, + int seq_lengths_size, + const Alphabet &alphabet, + size_t beam_size, + size_t num_processes, + double cutoff_prob, + size_t cutoff_top_n, + std::shared_ptr ext_scorer, + std::map hot_words, + size_t num_results=1); + +#endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/native_client/ctcdecode/swigwrapper.i b/native_client/ctcdecode/swigwrapper.i index 1c754597ad..49c106f06f 100644 --- a/native_client/ctcdecode/swigwrapper.i +++ b/native_client/ctcdecode/swigwrapper.i @@ -23,6 +23,7 @@ namespace std { %template(UnsignedIntVector) vector; %template(OutputVector) vector; %template(OutputVectorVector) vector>; + %template(Map) map; } %shared_ptr(Scorer); From 0fc3521a47086f05af1b09bbddae3200cf409d52 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 14 Sep 2020 07:30:23 -0700 Subject: [PATCH 18/48] emacs intermediate file --- .../ctcdecode/#ctc_beam_search_decoder.h# | 148 ------------------ 1 file changed, 148 deletions(-) delete mode 100644 native_client/ctcdecode/#ctc_beam_search_decoder.h# diff --git a/native_client/ctcdecode/#ctc_beam_search_decoder.h# b/native_client/ctcdecode/#ctc_beam_search_decoder.h# deleted file mode 100644 index e80fcda52b..0000000000 --- a/native_client/ctcdecode/#ctc_beam_search_decoder.h# +++ /dev/null @@ -1,148 +0,0 @@ -#ifndef CTC_BEAM_SEARCH_DECODER_H_ -#define CTC_BEAM_SEARCH_DECODER_H_ - -#include -#include -#include - -#include "scorer.h" -#include "output.h" -#include "alphabet.h" - -class DecoderState { - int abs_time_step_; - int space_id_; - int blank_id_; - size_t beam_size_; - double cutoff_prob_; - size_t cutoff_top_n_; - bool start_expanding_; - - std::shared_ptr ext_scorer_; - std::vector prefixes_; - std::unique_ptr prefix_root_; - std::map hot_words_; - -public: - DecoderState() = default; - ~DecoderState() = default; - - // Disallow copying - DecoderState(const DecoderState&) = delete; - DecoderState& operator=(DecoderState&) = delete; - - /* Initialize CTC beam search decoder - * - * Parameters: - * alphabet: The alphabet. - * beam_size: The width of beam search. - * cutoff_prob: Cutoff probability for pruning. - * cutoff_top_n: Cutoff number for pruning. - * ext_scorer: External scorer to evaluate a prefix, which consists of - * n-gram language model scoring and word insertion term. - * Default null, decoding the input sample without scorer. - * Return: - * Zero on success, non-zero on failure. - */ - int init(const Alphabet& alphabet, - size_t beam_size, - double cutoff_prob, - size_t cutoff_top_n, - std::shared_ptr ext_scorer, - std::map hot_words); - - /* Send data to the decoder - * - * Parameters: - * probs: 2-D vector where each element is a vector of probabilities - * over alphabet of one time step. - * time_dim: Number of timesteps. - * class_dim: Number of classes (alphabet length + 1 for space character). - */ - void next(const double *probs, - int time_dim, - int class_dim); - - /* Get up to num_results transcriptions from current decoder state. - * - * Parameters: - * num_results: Number of beams to return. - * - * Return: - * A vector where each element is a pair of score and decoding result, - * in descending order. - */ - std::vector decode(size_t num_results=1) const; -}; - - -/* CTC Beam Search Decoder - * Parameters: - * probs: 2-D vector where each element is a vector of probabilities - * over alphabet of one time step. - * time_dim: Number of timesteps. - * class_dim: Alphabet length (plus 1 for space character). - * alphabet: The alphabet. - * beam_size: The width of beam search. - * cutoff_prob: Cutoff probability for pruning. - * cutoff_top_n: Cutoff number for pruning. - * ext_scorer: External scorer to evaluate a prefix, which consists of - * n-gram language model scoring and word insertion term. - * Default null, decoding the input sample without scorer. - * hot_words: A map of hot-words and their corresponding boosts - * The hot-word is a string and the boost is a float. - * num_results: Number of beams to return. - * Return: - * A vector where each element is a pair of score and decoding result, - * in descending order. -*/ - -std::vector ctc_beam_search_decoder( - const double* probs, - int time_dim, - int class_dim, - const Alphabet &alphabet, - size_t beam_size, - double cutoff_prob, - size_t cutoff_top_n, - std::shared_ptr ext_scorer, - std::map hot_words, - size_t num_results=1); - -/* CTC Beam Search Decoder for batch data - * Parameters: - * probs: 3-D vector where each element is a 2-D vector that can be used - * by ctc_beam_search_decoder(). - * alphabet: The alphabet. - * beam_size: The width of beam search. - * num_processes: Number of threads for beam search. - * cutoff_prob: Cutoff probability for pruning. - * cutoff_top_n: Cutoff number for pruning. - * ext_scorer: External scorer to evaluate a prefix, which consists of - * n-gram language model scoring and word insertion term. - * Default null, decoding the input sample without scorer. - * hot_words: A map of hot-words and their corresponding boosts - * The hot-word is a string and the boost is a float. - * num_results: Number of beams to return. - * Return: - * A 2-D vector where each element is a vector of beam search decoding - * result for one audio sample. -*/ -std::vector> -ctc_beam_search_decoder_batch( - const double* probs, - int batch_size, - int time_dim, - int class_dim, - const int* seq_lengths, - int seq_lengths_size, - const Alphabet &alphabet, - size_t beam_size, - size_t num_processes, - double cutoff_prob, - size_t cutoff_top_n, - std::shared_ptr ext_scorer, - std::map hot_words, - size_t num_results=1); - -#endif // CTC_BEAM_SEARCH_DECODER_H_ From c64c68b33f97434784ba01d30b9f336b91fb9d63 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 14 Sep 2020 07:58:54 -0700 Subject: [PATCH 19/48] map things --- native_client/ctcdecode/__init__.py | 1 + native_client/ctcdecode/swigwrapper.i | 1 + 2 files changed, 2 insertions(+) diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index 329ef54064..bdf826dd63 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -2,6 +2,7 @@ from . import swigwrapper # pylint: disable=import-self from .swigwrapper import UTF8Alphabet +from .swigwrapper import Map # This module is built with SWIG_PYTHON_STRICT_BYTE_CHAR so we must handle # string encoding explicitly, here and throughout this file. diff --git a/native_client/ctcdecode/swigwrapper.i b/native_client/ctcdecode/swigwrapper.i index 49c106f06f..aca4f12c48 100644 --- a/native_client/ctcdecode/swigwrapper.i +++ b/native_client/ctcdecode/swigwrapper.i @@ -27,6 +27,7 @@ namespace std { } %shared_ptr(Scorer); +%shared_ptr(Map); // Convert NumPy arrays to pointer+lengths %apply (double* IN_ARRAY2, int DIM1, int DIM2) {(const double *probs, int time_dim, int class_dim)}; From c805ab7dafb0477330c2e943ddc8d30b5cc6b678 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 14 Sep 2020 08:31:15 -0700 Subject: [PATCH 20/48] map-->unordered_map --- native_client/ctcdecode/ctc_beam_search_decoder.cpp | 10 +++++----- native_client/ctcdecode/ctc_beam_search_decoder.h | 7 ++++--- native_client/ctcdecode/swigwrapper.i | 4 ++-- native_client/modelstate.h | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.cpp b/native_client/ctcdecode/ctc_beam_search_decoder.cpp index be0e303e8c..845b1c91b4 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp +++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include "decoder_utils.h" @@ -19,7 +19,7 @@ DecoderState::init(const Alphabet& alphabet, double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::map hot_words) + std::unorded_map hot_words) { // assign special ids abs_time_step_ = 0; @@ -165,7 +165,7 @@ DecoderState::next(const double *probs, float hot_boost = 0.0; if (!hot_words_.empty()) { - std::map::iterator iter; + std::unorded_map::iterator iter; // increase prob of prefix for every word // that matches a word in the hot-words list for (std::string word : ngram) { @@ -273,7 +273,7 @@ std::vector ctc_beam_search_decoder( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::map hot_words, + std::unorded_map hot_words, size_t num_results) { VALID_CHECK_EQ(alphabet.GetSize()+1, class_dim, "Number of output classes in acoustic model does not match number of labels in the alphabet file. Alphabet file must be the same one that was used to train the acoustic model."); @@ -297,7 +297,7 @@ ctc_beam_search_decoder_batch( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::map hot_words, + std::unorded_map hot_words, size_t num_results) { VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!"); diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.h b/native_client/ctcdecode/ctc_beam_search_decoder.h index d2530779bf..547490292c 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.h +++ b/native_client/ctcdecode/ctc_beam_search_decoder.h @@ -25,6 +25,7 @@ class DecoderState { std::set hot_words_; float boost_coefficient_; std::map hot_words_; + std::unorded_map hot_words_; public: DecoderState() = default; @@ -52,7 +53,7 @@ class DecoderState { double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::map hot_words); + std::unorded_map hot_words); /* Send data to the decoder * @@ -109,7 +110,7 @@ std::vector ctc_beam_search_decoder( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::map hot_words, + std::unorded_map hot_words, size_t num_results=1); /* CTC Beam Search Decoder for batch data @@ -145,7 +146,7 @@ ctc_beam_search_decoder_batch( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::map hot_words, + std::unorded_map hot_words, size_t num_results=1); #endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/native_client/ctcdecode/swigwrapper.i b/native_client/ctcdecode/swigwrapper.i index aca4f12c48..c71f1bae19 100644 --- a/native_client/ctcdecode/swigwrapper.i +++ b/native_client/ctcdecode/swigwrapper.i @@ -11,7 +11,7 @@ %include %include %include -%include +%include %include "numpy.i" %init %{ @@ -23,7 +23,7 @@ namespace std { %template(UnsignedIntVector) vector; %template(OutputVector) vector; %template(OutputVectorVector) vector>; - %template(Map) map; + %template(Map) unordered_map; } %shared_ptr(Scorer); diff --git a/native_client/modelstate.h b/native_client/modelstate.h index 88d7038a86..60a63b7bee 100644 --- a/native_client/modelstate.h +++ b/native_client/modelstate.h @@ -18,7 +18,7 @@ struct ModelState { Alphabet alphabet_; std::shared_ptr scorer_; unsigned int beam_width_; - std::map hot_words_; + std::unorded_map hot_words_; unsigned int n_steps_; unsigned int n_context_; unsigned int n_features_; From 8c611bc2e040d6772754ae61650af83fa7937b6e Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 14 Sep 2020 08:41:18 -0700 Subject: [PATCH 21/48] typu --- native_client/ctcdecode/ctc_beam_search_decoder.cpp | 8 ++++---- native_client/ctcdecode/ctc_beam_search_decoder.h | 11 ++++------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.cpp b/native_client/ctcdecode/ctc_beam_search_decoder.cpp index 845b1c91b4..2f6dd17a16 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp +++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp @@ -19,7 +19,7 @@ DecoderState::init(const Alphabet& alphabet, double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::unorded_map hot_words) + std::unordered_map hot_words) { // assign special ids abs_time_step_ = 0; @@ -165,7 +165,7 @@ DecoderState::next(const double *probs, float hot_boost = 0.0; if (!hot_words_.empty()) { - std::unorded_map::iterator iter; + std::unordered_map::iterator iter; // increase prob of prefix for every word // that matches a word in the hot-words list for (std::string word : ngram) { @@ -273,7 +273,7 @@ std::vector ctc_beam_search_decoder( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::unorded_map hot_words, + std::unordered_map hot_words, size_t num_results) { VALID_CHECK_EQ(alphabet.GetSize()+1, class_dim, "Number of output classes in acoustic model does not match number of labels in the alphabet file. Alphabet file must be the same one that was used to train the acoustic model."); @@ -297,7 +297,7 @@ ctc_beam_search_decoder_batch( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::unorded_map hot_words, + std::unordered_map hot_words, size_t num_results) { VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!"); diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.h b/native_client/ctcdecode/ctc_beam_search_decoder.h index 547490292c..dc19555cca 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.h +++ b/native_client/ctcdecode/ctc_beam_search_decoder.h @@ -22,10 +22,7 @@ class DecoderState { std::vector prefixes_; std::unique_ptr prefix_root_; TimestepTreeNode timestep_tree_root_{nullptr, 0}; - std::set hot_words_; - float boost_coefficient_; - std::map hot_words_; - std::unorded_map hot_words_; + std::unordered_map hot_words_; public: DecoderState() = default; @@ -53,7 +50,7 @@ class DecoderState { double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::unorded_map hot_words); + std::unordered_map hot_words); /* Send data to the decoder * @@ -110,7 +107,7 @@ std::vector ctc_beam_search_decoder( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::unorded_map hot_words, + std::unordered_map hot_words, size_t num_results=1); /* CTC Beam Search Decoder for batch data @@ -146,7 +143,7 @@ ctc_beam_search_decoder_batch( double cutoff_prob, size_t cutoff_top_n, std::shared_ptr ext_scorer, - std::unorded_map hot_words, + std::unordered_map hot_words, size_t num_results=1); #endif // CTC_BEAM_SEARCH_DECODER_H_ From 97b0416d5887ea13b0627091a3be2edade6f393f Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 14 Sep 2020 08:42:21 -0700 Subject: [PATCH 22/48] typu --- native_client/modelstate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native_client/modelstate.h b/native_client/modelstate.h index 60a63b7bee..16e066d6eb 100644 --- a/native_client/modelstate.h +++ b/native_client/modelstate.h @@ -18,7 +18,7 @@ struct ModelState { Alphabet alphabet_; std::shared_ptr scorer_; unsigned int beam_width_; - std::unorded_map hot_words_; + std::unordered_map hot_words_; unsigned int n_steps_; unsigned int n_context_; unsigned int n_features_; From 82a582d0b40182436a6ae4b91929463f151ab5e6 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 14 Sep 2020 12:43:32 -0700 Subject: [PATCH 23/48] use dict() not None --- native_client/ctcdecode/__init__.py | 5 ++--- native_client/ctcdecode/swigwrapper.i | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index bdf826dd63..94e03b1504 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -2,7 +2,6 @@ from . import swigwrapper # pylint: disable=import-self from .swigwrapper import UTF8Alphabet -from .swigwrapper import Map # This module is built with SWIG_PYTHON_STRICT_BYTE_CHAR so we must handle # string encoding explicitly, here and throughout this file. @@ -97,7 +96,7 @@ def ctc_beam_search_decoder(probs_seq, cutoff_prob=1.0, cutoff_top_n=40, scorer=None, - hot_words=None, + hot_words=dict(), num_results=1): """Wrapper for the CTC Beam Search Decoder. @@ -141,7 +140,7 @@ def ctc_beam_search_decoder_batch(probs_seq, cutoff_prob=1.0, cutoff_top_n=40, scorer=None, - hot_words=None, + hot_words=dict(), num_results=1): """Wrapper for the batched CTC beam search decoder. diff --git a/native_client/ctcdecode/swigwrapper.i b/native_client/ctcdecode/swigwrapper.i index c71f1bae19..683a342625 100644 --- a/native_client/ctcdecode/swigwrapper.i +++ b/native_client/ctcdecode/swigwrapper.i @@ -27,7 +27,6 @@ namespace std { } %shared_ptr(Scorer); -%shared_ptr(Map); // Convert NumPy arrays to pointer+lengths %apply (double* IN_ARRAY2, int DIM1, int DIM2) {(const double *probs, int time_dim, int class_dim)}; From 6155e43ebee0762db1b09bce92b1ef04baa110e2 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 14 Sep 2020 13:22:00 -0700 Subject: [PATCH 24/48] error out if hot_words without scorer --- native_client/client.cc | 28 ++++++++++++++-------------- native_client/deepspeech.cc | 7 +++++-- native_client/modelstate.h | 2 +- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/native_client/client.cc b/native_client/client.cc index d0b23d8fd9..4c0685a0d8 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -432,20 +432,6 @@ main(int argc, char **argv) } } - if (hot_words) { - std::vector hot_words_ = SplitStringOnDelim(hot_words, ","); - for ( std::string hot_word_ : hot_words_ ) { - std::vector pair_ = SplitStringOnDelim(hot_word_, ":"); - const char* word = (pair_[0]).c_str(); - float boost = strtof((pair_[1]).c_str(),0); - status = DS_AddHotWord(ctx, word, boost); - if (status != 0) { - fprintf(stderr, "Could not enable hot words.\n"); - return 1; - } - } - } - if (scorer) { status = DS_EnableExternalScorer(ctx, scorer); if (status != 0) { @@ -462,6 +448,20 @@ main(int argc, char **argv) } // sphinx-doc: c_ref_model_stop + if (hot_words) { + std::vector hot_words_ = SplitStringOnDelim(hot_words, ","); + for ( std::string hot_word_ : hot_words_ ) { + std::vector pair_ = SplitStringOnDelim(hot_word_, ":"); + const char* word = (pair_[0]).c_str(); + float boost = strtof((pair_[1]).c_str(),0); + status = DS_AddHotWord(ctx, word, boost); + if (status != 0) { + fprintf(stderr, "Could not enable hot words.\n"); + return 1; + } + } + } + #ifndef NO_SOX // Initialise SOX assert(sox_init() == SOX_SUCCESS); diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 888133865b..ba969d247b 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -347,8 +347,11 @@ DS_AddHotWord(ModelState* aCtx, const char* word, float boost) { - aCtx->hot_words_.insert( std::pair (word, boost) ); - return DS_ERR_OK; + if (aCtx->scorer_) { + aCtx->hot_words_.insert( std::pair (word, boost) ); + return DS_ERR_OK; + } + return DS_ERR_SCORER_NOT_ENABLED; } int diff --git a/native_client/modelstate.h b/native_client/modelstate.h index 16e066d6eb..4beb78b472 100644 --- a/native_client/modelstate.h +++ b/native_client/modelstate.h @@ -17,8 +17,8 @@ struct ModelState { Alphabet alphabet_; std::shared_ptr scorer_; - unsigned int beam_width_; std::unordered_map hot_words_; + unsigned int beam_width_; unsigned int n_steps_; unsigned int n_context_; unsigned int n_features_; From 6df4297ba26962feeead6e8fb743b09ca3c2652c Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 15 Sep 2020 06:14:53 -0700 Subject: [PATCH 25/48] two new functions: remove hot-word and clear all hot-words --- native_client/deepspeech.cc | 25 +++++++++++++++++++++++-- native_client/deepspeech.h | 26 ++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index ba969d247b..03fd819f58 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -344,8 +344,8 @@ DS_EnableExternalScorer(ModelState* aCtx, int DS_AddHotWord(ModelState* aCtx, - const char* word, - float boost) + const char* word, + float boost) { if (aCtx->scorer_) { aCtx->hot_words_.insert( std::pair (word, boost) ); @@ -354,6 +354,27 @@ DS_AddHotWord(ModelState* aCtx, return DS_ERR_SCORER_NOT_ENABLED; } +int +DS_EraseHotWord(ModelState* aCtx, + const char* word) +{ + if (aCtx->scorer_) { + aCtx->hot_words_.erase(word); + return DS_ERR_OK; + } + return DS_ERR_SCORER_NOT_ENABLED; +} + +int +DS_ClearHotWords(ModelState* aCtx) +{ + if (aCtx->scorer_) { + aCtx->hot_words_.clear(); + return DS_ERR_OK; + } + return DS_ERR_SCORER_NOT_ENABLED; +} + int DS_DisableExternalScorer(ModelState* aCtx) { diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h index 2f70316ff5..8a8ec89559 100644 --- a/native_client/deepspeech.h +++ b/native_client/deepspeech.h @@ -168,8 +168,30 @@ int DS_EnableExternalScorer(ModelState* aCtx, */ DEEPSPEECH_EXPORT int DS_AddHotWord(ModelState* aCtx, - const char* word, - float boost); + const char* word, + float boost); + +/** + * @brief Remove entry for a hot-word from the hot-words map. + * + * @param aCtx The ModelState pointer for the model being changed. + * @param word The hot word. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +DEEPSPEECH_EXPORT +int DS_EraseHotWord(ModelState* aCtx, + const char* word); + +/** + * @brief Removes all elements from the hot-words map. + * + * @param aCtx The ModelState pointer for the model being changed. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +DEEPSPEECH_EXPORT +int DS_ClearHotWords(ModelState* aCtx); /** * @brief Disable decoding using an external scorer. From 22af3c6bef679b14eff7e15f072bbf1c07a7a37f Mon Sep 17 00:00:00 2001 From: josh meyer Date: Fri, 18 Sep 2020 13:32:41 -0700 Subject: [PATCH 26/48] starting to work on better error messages X-DeepSpeech: NOBUILD --- native_client/deepspeech.cc | 15 ++++++++++++--- native_client/deepspeech.h | 5 ++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 03fd819f58..391a69b337 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -348,7 +348,10 @@ DS_AddHotWord(ModelState* aCtx, float boost) { if (aCtx->scorer_) { - aCtx->hot_words_.insert( std::pair (word, boost) ); + std::unordered_map::iterator iter, bool err = aCtx->hot_words_.insert( std::pair (word, boost) ); + if (err != 0) { + return DS_ERR_FAIL_INSERT_HOTWORD; + } return DS_ERR_OK; } return DS_ERR_SCORER_NOT_ENABLED; @@ -359,7 +362,10 @@ DS_EraseHotWord(ModelState* aCtx, const char* word) { if (aCtx->scorer_) { - aCtx->hot_words_.erase(word); + int err = aCtx->hot_words_.erase(word); + if (err != 0) { + return DS_ERR_FAIL_ERASE_HOTWORD; + } return DS_ERR_OK; } return DS_ERR_SCORER_NOT_ENABLED; @@ -369,7 +375,10 @@ int DS_ClearHotWords(ModelState* aCtx) { if (aCtx->scorer_) { - aCtx->hot_words_.clear(); + int err = aCtx->hot_words_.clear(); + if (err != 0) { + return DS_ERR_FAIL_CLEAR_HOTWORD; + } return DS_ERR_OK; } return DS_ERR_SCORER_NOT_ENABLED; diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h index 8a8ec89559..231dc78e4b 100644 --- a/native_client/deepspeech.h +++ b/native_client/deepspeech.h @@ -81,7 +81,10 @@ typedef struct Metadata { APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \ APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \ APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \ - APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") + APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") \ + APPLY(DS_ERR_FAIL_INSERT_HOTWORD, 0x3008, "Could not insert hot-word.") \ + APPLY(DS_ERR_FAIL_CLEAR_HOTWORD, 0x3009, "Could not clear hot-words.") \ + APPLY(DS_ERR_FAIL_ERASE_HOTWORD, 0x3010, "Could not erase hot-word.") // sphinx-doc: error_code_listing_end From c90b05478fed932c88a9254f96700c9be9136c19 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 21 Sep 2020 10:06:39 -0700 Subject: [PATCH 27/48] better error handling + .Net ERR codes --- native_client/client.cc | 5 ++++- native_client/deepspeech.cc | 15 ++++++++++----- .../dotnet/DeepSpeechClient/Enums/ErrorCodes.cs | 3 +++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/native_client/client.cc b/native_client/client.cc index 4c0685a0d8..3b160c4ec6 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -453,9 +453,12 @@ main(int argc, char **argv) for ( std::string hot_word_ : hot_words_ ) { std::vector pair_ = SplitStringOnDelim(hot_word_, ":"); const char* word = (pair_[0]).c_str(); + // the strtof function will return 0 in case of non numeric characters + // so, check the boost string before we turn it into a float + bool boost_is_valid = (pair_[1].find_first_not_of(".0123456789") == std::string::npos); float boost = strtof((pair_[1]).c_str(),0); status = DS_AddHotWord(ctx, word, boost); - if (status != 0) { + if (status != 0 || !boost_is_valid) { fprintf(stderr, "Could not enable hot words.\n"); return 1; } diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 391a69b337..57f77ba119 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -348,8 +348,10 @@ DS_AddHotWord(ModelState* aCtx, float boost) { if (aCtx->scorer_) { - std::unordered_map::iterator iter, bool err = aCtx->hot_words_.insert( std::pair (word, boost) ); - if (err != 0) { + const int size_before = aCtx->hot_words_.size(); + aCtx->hot_words_.insert( std::pair (word, boost) ); + const int size_after = aCtx->hot_words_.size(); + if (size_before == size_after) { return DS_ERR_FAIL_INSERT_HOTWORD; } return DS_ERR_OK; @@ -362,8 +364,10 @@ DS_EraseHotWord(ModelState* aCtx, const char* word) { if (aCtx->scorer_) { + const int size_before = aCtx->hot_words_.size(); int err = aCtx->hot_words_.erase(word); - if (err != 0) { + const int size_after = aCtx->hot_words_.size(); + if (size_before == size_after) { return DS_ERR_FAIL_ERASE_HOTWORD; } return DS_ERR_OK; @@ -375,8 +379,9 @@ int DS_ClearHotWords(ModelState* aCtx) { if (aCtx->scorer_) { - int err = aCtx->hot_words_.clear(); - if (err != 0) { + aCtx->hot_words_.clear(); + const int size_after = aCtx->hot_words_.size(); + if (size_after != 0) { return DS_ERR_FAIL_CLEAR_HOTWORD; } return DS_ERR_OK; diff --git a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs b/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs index 30660add2a..cbcb8f43bf 100644 --- a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs +++ b/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs @@ -26,5 +26,8 @@ internal enum ErrorCodes DS_ERR_FAIL_CREATE_STREAM = 0x3004, DS_ERR_FAIL_READ_PROTOBUF = 0x3005, DS_ERR_FAIL_CREATE_SESS = 0x3006, + DS_ERR_FAIL_INSERT_HOTWORD = 0x3008, + DS_ERR_FAIL_CLEAR_HOTWORD = 0x3009, + DS_ERR_FAIL_ERASE_HOTWORD = 0x3010 } } From b69a99c007cb077268733cdf10e2556311642155 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 21 Sep 2020 10:13:24 -0700 Subject: [PATCH 28/48] allow for negative boosts:) --- native_client/client.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native_client/client.cc b/native_client/client.cc index 3b160c4ec6..c139129e53 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -455,7 +455,7 @@ main(int argc, char **argv) const char* word = (pair_[0]).c_str(); // the strtof function will return 0 in case of non numeric characters // so, check the boost string before we turn it into a float - bool boost_is_valid = (pair_[1].find_first_not_of(".0123456789") == std::string::npos); + bool boost_is_valid = (pair_[1].find_first_not_of("-.0123456789") == std::string::npos); float boost = strtof((pair_[1]).c_str(),0); status = DS_AddHotWord(ctx, word, boost); if (status != 0 || !boost_is_valid) { From 753b62ffe0de7fb03de9c36861356a98c8479e37 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 21 Sep 2020 10:36:44 -0700 Subject: [PATCH 29/48] adding TC test for hot-words --- taskcluster/tc-asserts.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh index 7a164b07e2..8bd398b29f 100755 --- a/taskcluster/tc-asserts.sh +++ b/taskcluster/tc-asserts.sh @@ -531,6 +531,12 @@ run_cpp_only_inference_tests() status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_intermediate_decode}" "$status" + + set +e + hotwords_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) + status=$? + set -e + assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" } run_js_streaming_inference_tests() From a7a6dccb3a3d30b7b97b459373a9e73c04947028 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 21 Sep 2020 12:39:51 -0700 Subject: [PATCH 30/48] add hot-words to python client, make TC test hot-words everywhere --- native_client/python/__init__.py | 16 ++++++++++++++++ native_client/python/client.py | 8 ++++++++ taskcluster/tc-asserts.sh | 12 ++++++------ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/native_client/python/__init__.py b/native_client/python/__init__.py index 8dec3f0c44..8637e99ae8 100644 --- a/native_client/python/__init__.py +++ b/native_client/python/__init__.py @@ -95,6 +95,22 @@ def disableExternalScorer(self): """ return deepspeech.impl.DisableExternalScorer(self._impl) + def addHotWord(self, word, boost): + """ + Add a word and its boost for decoding. + + :param word: the hot-word + :type word: str + + :param word: the boost + :type word: float + + :throws: RuntimeError on error + """ + status = deepspeech.impl.AddHotWord(self._impl, word, boost) + if status != 0: + raise RuntimeError("AddHotWord failed with '{}' (0x{:X})".format(deepspeech.impl.ErrorCodeToErrorMessage(status),status)) + def setScorerAlphaBeta(self, alpha, beta): """ Set hyperparameters alpha and beta of the external scorer. diff --git a/native_client/python/client.py b/native_client/python/client.py index 6ebf7bcdf8..156adc300b 100644 --- a/native_client/python/client.py +++ b/native_client/python/client.py @@ -109,6 +109,8 @@ def main(): help='Output json from metadata with timestamp of each word') parser.add_argument('--candidate_transcripts', type=int, default=3, help='Number of candidate transcripts to include in JSON output') + parser.add_argument('--hot_words', type=str, + help='Hot words and their boosts.') args = parser.parse_args() print('Loading model from file {}'.format(args.model), file=sys.stderr) @@ -134,6 +136,12 @@ def main(): if args.lm_alpha and args.lm_beta: ds.setScorerAlphaBeta(args.lm_alpha, args.lm_beta) + if args.hot_words: + print('Adding hot-words', file=sys.stderr) + for word_boost in args.hot_words.split(','): + word,boost = word_boost.split(':') + ds.addHotWord(word,float(boost)) + fin = wave.open(args.audio, 'rb') fs_orig = fin.getframerate() if fs_orig != desired_sample_rate: diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh index 8bd398b29f..a62c58e707 100755 --- a/taskcluster/tc-asserts.sh +++ b/taskcluster/tc-asserts.sh @@ -395,6 +395,12 @@ run_basic_inference_tests() status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status" + + set +e + hotwords_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) + status=$? + set -e + assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" } run_all_inference_tests() @@ -531,12 +537,6 @@ run_cpp_only_inference_tests() status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_intermediate_decode}" "$status" - - set +e - hotwords_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) - status=$? - set -e - assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" } run_js_streaming_inference_tests() From 9c437c3dcad2aff6d4308df446c4527d9d8e6749 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Mon, 21 Sep 2020 13:12:21 -0700 Subject: [PATCH 31/48] only run TC tests for C++ and Python --- taskcluster/tc-asserts.sh | 15 +++++++++------ taskcluster/tc-cpp-ds-tests.sh | 2 ++ taskcluster/tc-cpp_tflite-ds-tests.sh | 2 ++ taskcluster/tc-python-tests.sh | 2 ++ taskcluster/tc-python_tflite-tests.sh | 2 ++ 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh index a62c58e707..4fd8162ba0 100755 --- a/taskcluster/tc-asserts.sh +++ b/taskcluster/tc-asserts.sh @@ -395,12 +395,6 @@ run_basic_inference_tests() status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status" - - set +e - hotwords_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) - status=$? - set -e - assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" } run_all_inference_tests() @@ -530,6 +524,15 @@ run_multi_inference_tests() assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_withlm}" "$status" } +run_hotword_tests() +{ + set +e + hotwords_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) + status=$? + set -e + assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" +} + run_cpp_only_inference_tests() { set +e diff --git a/taskcluster/tc-cpp-ds-tests.sh b/taskcluster/tc-cpp-ds-tests.sh index 67d5d92fc4..eabfcfa8aa 100644 --- a/taskcluster/tc-cpp-ds-tests.sh +++ b/taskcluster/tc-cpp-ds-tests.sh @@ -18,3 +18,5 @@ run_all_inference_tests run_multi_inference_tests run_cpp_only_inference_tests + +run_hotword_tests diff --git a/taskcluster/tc-cpp_tflite-ds-tests.sh b/taskcluster/tc-cpp_tflite-ds-tests.sh index 313475ef8b..6e7f9d8c79 100644 --- a/taskcluster/tc-cpp_tflite-ds-tests.sh +++ b/taskcluster/tc-cpp_tflite-ds-tests.sh @@ -23,3 +23,5 @@ run_all_inference_tests run_multi_inference_tests run_cpp_only_inference_tests + +run_hotword_tests diff --git a/taskcluster/tc-python-tests.sh b/taskcluster/tc-python-tests.sh index d55a30978b..4e6b284099 100644 --- a/taskcluster/tc-python-tests.sh +++ b/taskcluster/tc-python-tests.sh @@ -28,4 +28,6 @@ ensure_cuda_usage "$3" run_all_inference_tests +run_hotword_tests + virtualenv_deactivate "${pyalias}" "deepspeech" diff --git a/taskcluster/tc-python_tflite-tests.sh b/taskcluster/tc-python_tflite-tests.sh index a95adf4008..7f00ea1205 100644 --- a/taskcluster/tc-python_tflite-tests.sh +++ b/taskcluster/tc-python_tflite-tests.sh @@ -33,4 +33,6 @@ deepspeech --version run_all_inference_tests +run_hotword_tests + virtualenv_deactivate "${pyalias}" "deepspeech" From 63ccc4a27f2f7eb05b9c16328a329fd98707f8bc Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 06:20:50 -0700 Subject: [PATCH 32/48] fully expose API in python bindings --- native_client/python/__init__.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/native_client/python/__init__.py b/native_client/python/__init__.py index 8637e99ae8..4b55da5acd 100644 --- a/native_client/python/__init__.py +++ b/native_client/python/__init__.py @@ -111,6 +111,29 @@ def addHotWord(self, word, boost): if status != 0: raise RuntimeError("AddHotWord failed with '{}' (0x{:X})".format(deepspeech.impl.ErrorCodeToErrorMessage(status),status)) + def eraseHotWord(self, word): + """ + Remove entry for word from hot-words dict. + + :param word: the hot-word + :type word: str + + :throws: RuntimeError on error + """ + status = deepspeech.impl.EraseHotWord(self._impl, word) + if status != 0: + raise RuntimeError("EraseHotWord failed with '{}' (0x{:X})".format(deepspeech.impl.ErrorCodeToErrorMessage(status),status)) + + def clearHotWords(self): + """ + Remove all entries from hot-words dict. + + :throws: RuntimeError on error + """ + status = deepspeech.impl.ClearHotWords(self._impl) + if status != 0: + raise RuntimeError("ClearHotWords failed with '{}' (0x{:X})".format(deepspeech.impl.ErrorCodeToErrorMessage(status),status)) + def setScorerAlphaBeta(self, alpha, beta): """ Set hyperparameters alpha and beta of the external scorer. From 44bf59d811574abedf8aa77572bc327a30be0f6a Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 07:08:42 -0700 Subject: [PATCH 33/48] expose API in Java (thanks spectie!) --- .../libdeepspeech/DeepSpeechModel.java | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java index eafa11e230..a97b4db0f4 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java @@ -215,4 +215,36 @@ public String finishStream(DeepSpeechStreamingState ctx) { public Metadata finishStreamWithMetadata(DeepSpeechStreamingState ctx, int num_results) { return impl.FinishStreamWithMetadata(ctx.get(), num_results); } + /** + * @brief Add a hot-word + * + * @param word + * @param boost + * + * @throws RuntimeException on failure. + * + */ + public void addHotWord(String word, float boost) { + evaluateErrorCode(impl.addHotWord(word, boost)); + } + /** + * @brief Erase a hot-word + * + * @param word + * + * @throws RuntimeException on failure. + * + */ + public void eraseHotWord(String word) { + evaluateErrorCode(impl.eraseHotWord(word)); + } + /** + * @brief Clear all hot-words. + * + * @throws RuntimeException on failure. + * + */ + public void clearHotWords() { + evaluateErrorCode(impl.clearHotWords()); + } } From 486634112525acb8f6f93ddd9fb7158a63da1bd4 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 07:21:22 -0700 Subject: [PATCH 34/48] expose API in dotnet (thanks spectie!) --- .../dotnet/DeepSpeechClient/DeepSpeech.cs | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs index 08a3808b39..cfda417616 100644 --- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs +++ b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs @@ -74,6 +74,39 @@ public unsafe void SetModelBeamWidth(uint aBeamWidth) EvaluateResultCode(resultCode); } + /// + /// Add a hot-word. + /// + /// Some word + /// Some boost + /// Thrown on failure. + public unsafe void addHotWord(string aWord, float aBoost) + { + var resultCode = NativeImp.DS_AddHotWord(_modelStatePP, aWord, aBoost); + EvaluateResultCode(resultCode); + } + + /// + /// Erase entry for a hot-word. + /// + /// Some word + /// Thrown on failure. + public unsafe void eraseHotWord(string aWord) + { + var resultCode = NativeImp.DS_EraseHotWord(_modelStatePP, aWord); + EvaluateResultCode(resultCode); + } + + /// + /// Clear all hot-words. + /// + /// Thrown on failure. + public unsafe void clearHotWords() + { + var resultCode = NativeImp.DS_ClearHotWords(_modelStatePP); + EvaluateResultCode(resultCode); + } + /// /// Return the sample rate expected by the model. /// From 3d0b67de9813f3b12532cb99e24f90132daa60eb Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 07:38:11 -0700 Subject: [PATCH 35/48] expose API in javascript (thanks spectie!) --- native_client/javascript/index.ts | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/native_client/javascript/index.ts b/native_client/javascript/index.ts index 988cbfd504..91fa98d870 100644 --- a/native_client/javascript/index.ts +++ b/native_client/javascript/index.ts @@ -182,6 +182,47 @@ export class Model { } } + /** + * Add a hot-word and its boost + * + * @param aWord word + * @param aBoost boost + * + * @throws on error + */ + addHotWord(aWord: string, aBoost: number): void { + const status = binding.addHotWord(this._impl, aWord, aBoost); + if (status !== 0) { + throw `addHotWord failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`; + } + } + + /** + * Erase entry for hot-word + * + * @param aWord word + * + * @throws on error + */ + addHotWord(aWord: string): void { + const status = binding.eraseHotWord(this._impl, aWord); + if (status !== 0) { + throw `eraseHotWord failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`; + } + } + + /** + * Clear all hot-word entries + * + * @throws on error + */ + clearHotWords(): void { + const status = binding.clearHotWords(this._impl); + if (status !== 0) { + throw `clearHotWord failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`; + } + } + /** * Return the sample rate expected by the model. * From 6636a659698eec004aaf4d2557d067a721e1baff Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 07:46:10 -0700 Subject: [PATCH 36/48] java lol --- .../mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java index a97b4db0f4..f461904664 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java @@ -225,7 +225,7 @@ public Metadata finishStreamWithMetadata(DeepSpeechStreamingState ctx, int num_r * */ public void addHotWord(String word, float boost) { - evaluateErrorCode(impl.addHotWord(word, boost)); + evaluateErrorCode(impl.AddHotWord(word, boost)); } /** * @brief Erase a hot-word @@ -236,7 +236,7 @@ public void addHotWord(String word, float boost) { * */ public void eraseHotWord(String word) { - evaluateErrorCode(impl.eraseHotWord(word)); + evaluateErrorCode(impl.EraseHotWord(word)); } /** * @brief Clear all hot-words. @@ -245,6 +245,6 @@ public void eraseHotWord(String word) { * */ public void clearHotWords() { - evaluateErrorCode(impl.clearHotWords()); + evaluateErrorCode(impl.ClearHotWords()); } } From ecb3d2783b13e3e6af39c34d633e9140ed89f198 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 07:48:24 -0700 Subject: [PATCH 37/48] typo in javascript --- native_client/javascript/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native_client/javascript/index.ts b/native_client/javascript/index.ts index 91fa98d870..ec7f5686ce 100644 --- a/native_client/javascript/index.ts +++ b/native_client/javascript/index.ts @@ -204,7 +204,7 @@ export class Model { * * @throws on error */ - addHotWord(aWord: string): void { + eraseHotWord(aWord: string): void { const status = binding.eraseHotWord(this._impl, aWord); if (status !== 0) { throw `eraseHotWord failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`; From f742e058ec129b914492d58065ab2b77482a5887 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 08:01:21 -0700 Subject: [PATCH 38/48] commenting --- native_client/args.h | 2 +- native_client/client.cc | 2 +- native_client/deepspeech.h | 6 +++--- native_client/python/client.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/native_client/args.h b/native_client/args.h index 75cade2b97..856988dd4e 100644 --- a/native_client/args.h +++ b/native_client/args.h @@ -58,7 +58,7 @@ void PrintHelp(const char* bin) "\t--json\t\t\t\tExtended output, shows word timings as JSON\n" "\t--candidate_transcripts NUMBER\tNumber of candidate transcripts to include in JSON output\n" "\t--stream size\t\t\tRun in stream mode, output intermediate results\n" - "\t--hot_words\t\t\tHot words separated by commas\n" + "\t--hot_words\t\t\tHot-words and their boosts. Word:Boost pairs are comma-separated\n" "\t--help\t\t\t\tShow help\n" "\t--version\t\t\tPrint version and exits\n"; char* version = DS_Version(); diff --git a/native_client/client.cc b/native_client/client.cc index c139129e53..96e1ff3999 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -459,7 +459,7 @@ main(int argc, char **argv) float boost = strtof((pair_[1]).c_str(),0); status = DS_AddHotWord(ctx, word, boost); if (status != 0 || !boost_is_valid) { - fprintf(stderr, "Could not enable hot words.\n"); + fprintf(stderr, "Could not enable hot-word.\n"); return 1; } } diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h index 231dc78e4b..35e9289a2e 100644 --- a/native_client/deepspeech.h +++ b/native_client/deepspeech.h @@ -164,8 +164,8 @@ int DS_EnableExternalScorer(ModelState* aCtx, * @brief Add a hot-word and its boost. * * @param aCtx The ModelState pointer for the model being changed. - * @param word The hot word. - * @param boost The additive boost. + * @param word The hot-word. + * @param boost The boost. * * @return Zero on success, non-zero on failure (invalid arguments). */ @@ -178,7 +178,7 @@ int DS_AddHotWord(ModelState* aCtx, * @brief Remove entry for a hot-word from the hot-words map. * * @param aCtx The ModelState pointer for the model being changed. - * @param word The hot word. + * @param word The hot-word. * * @return Zero on success, non-zero on failure (invalid arguments). */ diff --git a/native_client/python/client.py b/native_client/python/client.py index 156adc300b..ca1c8e9225 100644 --- a/native_client/python/client.py +++ b/native_client/python/client.py @@ -110,7 +110,7 @@ def main(): parser.add_argument('--candidate_transcripts', type=int, default=3, help='Number of candidate transcripts to include in JSON output') parser.add_argument('--hot_words', type=str, - help='Hot words and their boosts.') + help='Hot-words and their boosts.') args = parser.parse_args() print('Loading model from file {}'.format(args.model), file=sys.stderr) From 37a27f60430a094a9a48a7d29cf5cfda6d07939e Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 08:15:24 -0700 Subject: [PATCH 39/48] java error codes from swig --- .../deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java index f93f3e8cd1..3fad4553be 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java @@ -28,7 +28,10 @@ public enum DeepSpeech_Error_Codes { ERR_FAIL_CREATE_STREAM(0x3004), ERR_FAIL_READ_PROTOBUF(0x3005), ERR_FAIL_CREATE_SESS(0x3006), - ERR_FAIL_CREATE_MODEL(0x3007); + ERR_FAIL_CREATE_MODEL(0x3007), + ERR_FAIL_INSERT_HOTWORD(0x3008), + ERR_FAIL_CLEAR_HOTWORD(0x3009), + ERR_FAIL_ERASE_HOTWORD(0x3010); public final int swigValue() { return swigValue; From 90f1611f9a327c2bcb581cef0b92f572e2230e66 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 08:18:11 -0700 Subject: [PATCH 40/48] java docs from SWIG --- .../SWIGTYPE_p_ModelState.java | 26 ++ .../SWIGTYPE_p_StreamingState.java | 26 ++ .../SWIGTYPE_p_p_ModelState.java | 26 ++ .../SWIGTYPE_p_p_StreamingState.java | 26 ++ .../deepspeech/libdeepspeech_doc/impl.java | 359 ++++++++++++++++++ .../deepspeech/libdeepspeech_doc/implJNI.java | 55 +++ 6 files changed, 518 insertions(+) create mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_ModelState.java create mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_StreamingState.java create mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_ModelState.java create mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_StreamingState.java create mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/impl.java create mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/implJNI.java diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_ModelState.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_ModelState.java new file mode 100644 index 0000000000..6a0ff56035 --- /dev/null +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_ModelState.java @@ -0,0 +1,26 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 4.0.1 + * + * Do not make changes to this file unless you know what you are doing--modify + * the SWIG interface file instead. + * ----------------------------------------------------------------------------- */ + +package org.mozilla.deepspeech.libdeepspeech; + +public class SWIGTYPE_p_ModelState { + private transient long swigCPtr; + + protected SWIGTYPE_p_ModelState(long cPtr, @SuppressWarnings("unused") boolean futureUse) { + swigCPtr = cPtr; + } + + protected SWIGTYPE_p_ModelState() { + swigCPtr = 0; + } + + protected static long getCPtr(SWIGTYPE_p_ModelState obj) { + return (obj == null) ? 0 : obj.swigCPtr; + } +} + diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_StreamingState.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_StreamingState.java new file mode 100644 index 0000000000..75d4f1a530 --- /dev/null +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_StreamingState.java @@ -0,0 +1,26 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 4.0.1 + * + * Do not make changes to this file unless you know what you are doing--modify + * the SWIG interface file instead. + * ----------------------------------------------------------------------------- */ + +package org.mozilla.deepspeech.libdeepspeech; + +public class SWIGTYPE_p_StreamingState { + private transient long swigCPtr; + + protected SWIGTYPE_p_StreamingState(long cPtr, @SuppressWarnings("unused") boolean futureUse) { + swigCPtr = cPtr; + } + + protected SWIGTYPE_p_StreamingState() { + swigCPtr = 0; + } + + protected static long getCPtr(SWIGTYPE_p_StreamingState obj) { + return (obj == null) ? 0 : obj.swigCPtr; + } +} + diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_ModelState.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_ModelState.java new file mode 100644 index 0000000000..97ca54d7c9 --- /dev/null +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_ModelState.java @@ -0,0 +1,26 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 4.0.1 + * + * Do not make changes to this file unless you know what you are doing--modify + * the SWIG interface file instead. + * ----------------------------------------------------------------------------- */ + +package org.mozilla.deepspeech.libdeepspeech; + +public class SWIGTYPE_p_p_ModelState { + private transient long swigCPtr; + + protected SWIGTYPE_p_p_ModelState(long cPtr, @SuppressWarnings("unused") boolean futureUse) { + swigCPtr = cPtr; + } + + protected SWIGTYPE_p_p_ModelState() { + swigCPtr = 0; + } + + protected static long getCPtr(SWIGTYPE_p_p_ModelState obj) { + return (obj == null) ? 0 : obj.swigCPtr; + } +} + diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_StreamingState.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_StreamingState.java new file mode 100644 index 0000000000..50b597b429 --- /dev/null +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_StreamingState.java @@ -0,0 +1,26 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 4.0.1 + * + * Do not make changes to this file unless you know what you are doing--modify + * the SWIG interface file instead. + * ----------------------------------------------------------------------------- */ + +package org.mozilla.deepspeech.libdeepspeech; + +public class SWIGTYPE_p_p_StreamingState { + private transient long swigCPtr; + + protected SWIGTYPE_p_p_StreamingState(long cPtr, @SuppressWarnings("unused") boolean futureUse) { + swigCPtr = cPtr; + } + + protected SWIGTYPE_p_p_StreamingState() { + swigCPtr = 0; + } + + protected static long getCPtr(SWIGTYPE_p_p_StreamingState obj) { + return (obj == null) ? 0 : obj.swigCPtr; + } +} + diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/impl.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/impl.java new file mode 100644 index 0000000000..c982673ce7 --- /dev/null +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/impl.java @@ -0,0 +1,359 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 4.0.1 + * + * Do not make changes to this file unless you know what you are doing--modify + * the SWIG interface file instead. + * ----------------------------------------------------------------------------- */ + +package org.mozilla.deepspeech.libdeepspeech; + +public class impl { + public static SWIGTYPE_p_p_ModelState new_modelstatep() { + long cPtr = implJNI.new_modelstatep(); + return (cPtr == 0) ? null : new SWIGTYPE_p_p_ModelState(cPtr, false); + } + + public static SWIGTYPE_p_p_ModelState copy_modelstatep(SWIGTYPE_p_ModelState value) { + long cPtr = implJNI.copy_modelstatep(SWIGTYPE_p_ModelState.getCPtr(value)); + return (cPtr == 0) ? null : new SWIGTYPE_p_p_ModelState(cPtr, false); + } + + public static void delete_modelstatep(SWIGTYPE_p_p_ModelState obj) { + implJNI.delete_modelstatep(SWIGTYPE_p_p_ModelState.getCPtr(obj)); + } + + public static void modelstatep_assign(SWIGTYPE_p_p_ModelState obj, SWIGTYPE_p_ModelState value) { + implJNI.modelstatep_assign(SWIGTYPE_p_p_ModelState.getCPtr(obj), SWIGTYPE_p_ModelState.getCPtr(value)); + } + + public static SWIGTYPE_p_ModelState modelstatep_value(SWIGTYPE_p_p_ModelState obj) { + long cPtr = implJNI.modelstatep_value(SWIGTYPE_p_p_ModelState.getCPtr(obj)); + return (cPtr == 0) ? null : new SWIGTYPE_p_ModelState(cPtr, false); + } + + public static SWIGTYPE_p_p_StreamingState new_streamingstatep() { + long cPtr = implJNI.new_streamingstatep(); + return (cPtr == 0) ? null : new SWIGTYPE_p_p_StreamingState(cPtr, false); + } + + public static SWIGTYPE_p_p_StreamingState copy_streamingstatep(SWIGTYPE_p_StreamingState value) { + long cPtr = implJNI.copy_streamingstatep(SWIGTYPE_p_StreamingState.getCPtr(value)); + return (cPtr == 0) ? null : new SWIGTYPE_p_p_StreamingState(cPtr, false); + } + + public static void delete_streamingstatep(SWIGTYPE_p_p_StreamingState obj) { + implJNI.delete_streamingstatep(SWIGTYPE_p_p_StreamingState.getCPtr(obj)); + } + + public static void streamingstatep_assign(SWIGTYPE_p_p_StreamingState obj, SWIGTYPE_p_StreamingState value) { + implJNI.streamingstatep_assign(SWIGTYPE_p_p_StreamingState.getCPtr(obj), SWIGTYPE_p_StreamingState.getCPtr(value)); + } + + public static SWIGTYPE_p_StreamingState streamingstatep_value(SWIGTYPE_p_p_StreamingState obj) { + long cPtr = implJNI.streamingstatep_value(SWIGTYPE_p_p_StreamingState.getCPtr(obj)); + return (cPtr == 0) ? null : new SWIGTYPE_p_StreamingState(cPtr, false); + } + + /** + * An object providing an interface to a trained DeepSpeech model.
+ *
+ * @param aModelPath The path to the frozen model graph.
+ * retval a ModelState pointer
+ *
+ * @return Zero on success, non-zero on failure. + */ + public static int CreateModel(String aModelPath, SWIGTYPE_p_p_ModelState retval) { + return implJNI.CreateModel(aModelPath, SWIGTYPE_p_p_ModelState.getCPtr(retval)); + } + + /** + * Get beam width value used by the model. If {DS_SetModelBeamWidth}
+ * was not called before, will return the default value loaded from the
+ * model file.
+ *
+ * @param aCtx A ModelState pointer created with {DS_CreateModel}.
+ *
+ * @return Beam width value used by the model. + */ + public static long GetModelBeamWidth(SWIGTYPE_p_ModelState aCtx) { + return implJNI.GetModelBeamWidth(SWIGTYPE_p_ModelState.getCPtr(aCtx)); + } + + /** + * Set beam width value used by the model.
+ *
+ * @param aCtx A ModelState pointer created with {DS_CreateModel}.
+ * @param aBeamWidth The beam width used by the model. A larger beam width value
+ * generates better results at the cost of decoding time.
+ *
+ * @return Zero on success, non-zero on failure. + */ + public static int SetModelBeamWidth(SWIGTYPE_p_ModelState aCtx, long aBeamWidth) { + return implJNI.SetModelBeamWidth(SWIGTYPE_p_ModelState.getCPtr(aCtx), aBeamWidth); + } + + /** + * Return the sample rate expected by a model.
+ *
+ * @param aCtx A ModelState pointer created with {DS_CreateModel}.
+ *
+ * @return Sample rate expected by the model for its input. + */ + public static int GetModelSampleRate(SWIGTYPE_p_ModelState aCtx) { + return implJNI.GetModelSampleRate(SWIGTYPE_p_ModelState.getCPtr(aCtx)); + } + + /** + * Frees associated resources and destroys model object. + */ + public static void FreeModel(SWIGTYPE_p_ModelState ctx) { + implJNI.FreeModel(SWIGTYPE_p_ModelState.getCPtr(ctx)); + } + + /** + * Enable decoding using an external scorer.
+ *
+ * @param aCtx The ModelState pointer for the model being changed.
+ * @param aScorerPath The path to the external scorer file.
+ *
+ * @return Zero on success, non-zero on failure (invalid arguments). + */ + public static int EnableExternalScorer(SWIGTYPE_p_ModelState aCtx, String aScorerPath) { + return implJNI.EnableExternalScorer(SWIGTYPE_p_ModelState.getCPtr(aCtx), aScorerPath); + } + + /** + * Add a hot-word and its boost.
+ *
+ * @param aCtx The ModelState pointer for the model being changed.
+ * @param word The hot-word.
+ * @param boost The boost.
+ *
+ * @return Zero on success, non-zero on failure (invalid arguments). + */ + public static int AddHotWord(SWIGTYPE_p_ModelState aCtx, String word, float boost) { + return implJNI.AddHotWord(SWIGTYPE_p_ModelState.getCPtr(aCtx), word, boost); + } + + /** + * Remove entry for a hot-word from the hot-words map.
+ *
+ * @param aCtx The ModelState pointer for the model being changed.
+ * @param word The hot-word.
+ *
+ * @return Zero on success, non-zero on failure (invalid arguments). + */ + public static int EraseHotWord(SWIGTYPE_p_ModelState aCtx, String word) { + return implJNI.EraseHotWord(SWIGTYPE_p_ModelState.getCPtr(aCtx), word); + } + + /** + * Removes all elements from the hot-words map.
+ *
+ * @param aCtx The ModelState pointer for the model being changed.
+ *
+ * @return Zero on success, non-zero on failure (invalid arguments). + */ + public static int ClearHotWords(SWIGTYPE_p_ModelState aCtx) { + return implJNI.ClearHotWords(SWIGTYPE_p_ModelState.getCPtr(aCtx)); + } + + /** + * Disable decoding using an external scorer.
+ *
+ * @param aCtx The ModelState pointer for the model being changed.
+ *
+ * @return Zero on success, non-zero on failure. + */ + public static int DisableExternalScorer(SWIGTYPE_p_ModelState aCtx) { + return implJNI.DisableExternalScorer(SWIGTYPE_p_ModelState.getCPtr(aCtx)); + } + + /** + * Set hyperparameters alpha and beta of the external scorer.
+ *
+ * @param aCtx The ModelState pointer for the model being changed.
+ * @param aAlpha The alpha hyperparameter of the decoder. Language model weight.
+ *
+ *
+ * @return Zero on success, non-zero on failure. + */ + public static int SetScorerAlphaBeta(SWIGTYPE_p_ModelState aCtx, float aAlpha, float aBeta) { + return implJNI.SetScorerAlphaBeta(SWIGTYPE_p_ModelState.getCPtr(aCtx), aAlpha, aBeta); + } + + /** + * Use the DeepSpeech model to convert speech to text.
+ *
+ * @param aCtx The ModelState pointer for the model to use.
+ * @param aBuffer A 16-bit, mono raw audio signal at the appropriate
+ * sample rate (matching what the model was trained on).
+ * @param aBufferSize The number of samples in the audio signal.
+ *
+ * @return The STT result. The user is responsible for freeing the string using
+ * {DS_FreeString()}. Returns NULL on error. + */ + public static String SpeechToText(SWIGTYPE_p_ModelState aCtx, short[] aBuffer, long aBufferSize) { + return implJNI.SpeechToText(SWIGTYPE_p_ModelState.getCPtr(aCtx), aBuffer, aBufferSize); + } + + /** + * Use the DeepSpeech model to convert speech to text and output results
+ * including metadata.
+ *
+ * @param aCtx The ModelState pointer for the model to use.
+ * @param aBuffer A 16-bit, mono raw audio signal at the appropriate
+ * sample rate (matching what the model was trained on).
+ * @param aBufferSize The number of samples in the audio signal.
+ * @param aNumResults The maximum number of CandidateTranscript structs to return. Returned value might be smaller than this.
+ *
+ * @return Metadata struct containing multiple CandidateTranscript structs. Each
+ * transcript has per-token metadata including timing information. The
+ * user is responsible for freeing Metadata by calling {DS_FreeMetadata()}.
+ * Returns NULL on error. + */ + public static Metadata SpeechToTextWithMetadata(SWIGTYPE_p_ModelState aCtx, short[] aBuffer, long aBufferSize, long aNumResults) { + long cPtr = implJNI.SpeechToTextWithMetadata(SWIGTYPE_p_ModelState.getCPtr(aCtx), aBuffer, aBufferSize, aNumResults); + return (cPtr == 0) ? null : new Metadata(cPtr, false); + } + + /** + * Create a new streaming inference state. The streaming state returned
+ * by this function can then be passed to {DS_FeedAudioContent()}
+ * and {DS_FinishStream()}.
+ *
+ * @param aCtx The ModelState pointer for the model to use.
+ * retval an opaque pointer that represents the streaming state. Can
+ * be NULL if an error occurs.
+ *
+ * @return Zero for success, non-zero on failure. + */ + public static int CreateStream(SWIGTYPE_p_ModelState aCtx, SWIGTYPE_p_p_StreamingState retval) { + return implJNI.CreateStream(SWIGTYPE_p_ModelState.getCPtr(aCtx), SWIGTYPE_p_p_StreamingState.getCPtr(retval)); + } + + /** + * Feed audio samples to an ongoing streaming inference.
+ *
+ * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
+ * @param aBuffer An array of 16-bit, mono raw audio samples at the
+ * appropriate sample rate (matching what the model was trained on).
+ * @param aBufferSize The number of samples in aBuffer. + */ + public static void FeedAudioContent(SWIGTYPE_p_StreamingState aSctx, short[] aBuffer, long aBufferSize) { + implJNI.FeedAudioContent(SWIGTYPE_p_StreamingState.getCPtr(aSctx), aBuffer, aBufferSize); + } + + /** + * Compute the intermediate decoding of an ongoing streaming inference.
+ *
+ * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
+ *
+ * @return The STT intermediate result. The user is responsible for freeing the
+ * string using {DS_FreeString()}. + */ + public static String IntermediateDecode(SWIGTYPE_p_StreamingState aSctx) { + return implJNI.IntermediateDecode(SWIGTYPE_p_StreamingState.getCPtr(aSctx)); + } + + /** + * Compute the intermediate decoding of an ongoing streaming inference,
+ * return results including metadata.
+ *
+ * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
+ * @param aNumResults The number of candidate transcripts to return.
+ *
+ * @return Metadata struct containing multiple candidate transcripts. Each transcript
+ * has per-token metadata including timing information. The user is
+ * responsible for freeing Metadata by calling {DS_FreeMetadata()}.
+ * Returns NULL on error. + */ + public static Metadata IntermediateDecodeWithMetadata(SWIGTYPE_p_StreamingState aSctx, long aNumResults) { + long cPtr = implJNI.IntermediateDecodeWithMetadata(SWIGTYPE_p_StreamingState.getCPtr(aSctx), aNumResults); + return (cPtr == 0) ? null : new Metadata(cPtr, false); + } + + /** + * Compute the final decoding of an ongoing streaming inference and return
+ * the result. Signals the end of an ongoing streaming inference.
+ *
+ * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
+ *
+ * @return The STT result. The user is responsible for freeing the string using
+ * {DS_FreeString()}.
+ *
+ * Note: This method will free the state pointer (aSctx). + */ + public static String FinishStream(SWIGTYPE_p_StreamingState aSctx) { + return implJNI.FinishStream(SWIGTYPE_p_StreamingState.getCPtr(aSctx)); + } + + /** + * Compute the final decoding of an ongoing streaming inference and return
+ * results including metadata. Signals the end of an ongoing streaming
+ * inference.
+ *
+ * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
+ * @param aNumResults The number of candidate transcripts to return.
+ *
+ * @return Metadata struct containing multiple candidate transcripts. Each transcript
+ * has per-token metadata including timing information. The user is
+ * responsible for freeing Metadata by calling {DS_FreeMetadata()}.
+ * Returns NULL on error.
+ *
+ * Note: This method will free the state pointer (aSctx). + */ + public static Metadata FinishStreamWithMetadata(SWIGTYPE_p_StreamingState aSctx, long aNumResults) { + long cPtr = implJNI.FinishStreamWithMetadata(SWIGTYPE_p_StreamingState.getCPtr(aSctx), aNumResults); + return (cPtr == 0) ? null : new Metadata(cPtr, false); + } + + /** + * Destroy a streaming state without decoding the computed logits. This
+ * can be used if you no longer need the result of an ongoing streaming
+ * inference and don't want to perform a costly decode operation.
+ *
+ * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
+ *
+ * Note: This method will free the state pointer (aSctx). + */ + public static void FreeStream(SWIGTYPE_p_StreamingState aSctx) { + implJNI.FreeStream(SWIGTYPE_p_StreamingState.getCPtr(aSctx)); + } + + /** + * Free memory allocated for metadata information. + */ + public static void FreeMetadata(Metadata m) { + implJNI.FreeMetadata(Metadata.getCPtr(m), m); + } + + /** + * Free a char* string returned by the DeepSpeech API. + */ + public static void FreeString(String str) { + implJNI.FreeString(str); + } + + /** + * Returns the version of this library. The returned version is a semantic
+ * version (SemVer 2.0.0). The string returned must be freed with {DS_FreeString()}.
+ *
+ * @return The version string. + */ + public static String Version() { + return implJNI.Version(); + } + + /** + * Returns a textual description corresponding to an error code.
+ * The string returned must be freed with DS_FreeString()}.
+ *
+ * @return The error description. + */ + public static String ErrorCodeToErrorMessage(int aErrorCode) { + return implJNI.ErrorCodeToErrorMessage(aErrorCode); + } + +} diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/implJNI.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/implJNI.java new file mode 100644 index 0000000000..c461f936b8 --- /dev/null +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/implJNI.java @@ -0,0 +1,55 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 4.0.1 + * + * Do not make changes to this file unless you know what you are doing--modify + * the SWIG interface file instead. + * ----------------------------------------------------------------------------- */ + +package org.mozilla.deepspeech.libdeepspeech; + +public class implJNI { + public final static native long new_modelstatep(); + public final static native long copy_modelstatep(long jarg1); + public final static native void delete_modelstatep(long jarg1); + public final static native void modelstatep_assign(long jarg1, long jarg2); + public final static native long modelstatep_value(long jarg1); + public final static native long new_streamingstatep(); + public final static native long copy_streamingstatep(long jarg1); + public final static native void delete_streamingstatep(long jarg1); + public final static native void streamingstatep_assign(long jarg1, long jarg2); + public final static native long streamingstatep_value(long jarg1); + public final static native String TokenMetadata_Text_get(long jarg1, TokenMetadata jarg1_); + public final static native long TokenMetadata_Timestep_get(long jarg1, TokenMetadata jarg1_); + public final static native float TokenMetadata_StartTime_get(long jarg1, TokenMetadata jarg1_); + public final static native long CandidateTranscript_NumTokens_get(long jarg1, CandidateTranscript jarg1_); + public final static native double CandidateTranscript_Confidence_get(long jarg1, CandidateTranscript jarg1_); + public final static native long CandidateTranscript_getToken(long jarg1, CandidateTranscript jarg1_, int jarg2); + public final static native long Metadata_NumTranscripts_get(long jarg1, Metadata jarg1_); + public final static native long Metadata_getTranscript(long jarg1, Metadata jarg1_, int jarg2); + public final static native void delete_Metadata(long jarg1); + public final static native int CreateModel(String jarg1, long jarg2); + public final static native long GetModelBeamWidth(long jarg1); + public final static native int SetModelBeamWidth(long jarg1, long jarg2); + public final static native int GetModelSampleRate(long jarg1); + public final static native void FreeModel(long jarg1); + public final static native int EnableExternalScorer(long jarg1, String jarg2); + public final static native int AddHotWord(long jarg1, String jarg2, float jarg3); + public final static native int EraseHotWord(long jarg1, String jarg2); + public final static native int ClearHotWords(long jarg1); + public final static native int DisableExternalScorer(long jarg1); + public final static native int SetScorerAlphaBeta(long jarg1, float jarg2, float jarg3); + public final static native String SpeechToText(long jarg1, short[] jarg2, long jarg3); + public final static native long SpeechToTextWithMetadata(long jarg1, short[] jarg2, long jarg3, long jarg4); + public final static native int CreateStream(long jarg1, long jarg2); + public final static native void FeedAudioContent(long jarg1, short[] jarg2, long jarg3); + public final static native String IntermediateDecode(long jarg1); + public final static native long IntermediateDecodeWithMetadata(long jarg1, long jarg2); + public final static native String FinishStream(long jarg1); + public final static native long FinishStreamWithMetadata(long jarg1, long jarg2); + public final static native void FreeStream(long jarg1); + public final static native void FreeMetadata(long jarg1, Metadata jarg1_); + public final static native void FreeString(String jarg1); + public final static native String Version(); + public final static native String ErrorCodeToErrorMessage(int jarg1); +} From 102445cf9f8ff9936cf738f757f6f8abdd97b9e4 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 10:01:55 -0700 Subject: [PATCH 41/48] java and dotnet issues --- .../dotnet/DeepSpeechClient/NativeImp.cs | 12 + .../libdeepspeech/DeepSpeechModel.java | 6 +- .../SWIGTYPE_p_ModelState.java | 26 -- .../SWIGTYPE_p_StreamingState.java | 26 -- .../SWIGTYPE_p_p_ModelState.java | 26 -- .../SWIGTYPE_p_p_StreamingState.java | 26 -- .../deepspeech/libdeepspeech_doc/impl.java | 359 ------------------ .../deepspeech/libdeepspeech_doc/implJNI.java | 55 --- 8 files changed, 15 insertions(+), 521 deletions(-) delete mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_ModelState.java delete mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_StreamingState.java delete mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_ModelState.java delete mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_StreamingState.java delete mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/impl.java delete mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/implJNI.java diff --git a/native_client/dotnet/DeepSpeechClient/NativeImp.cs b/native_client/dotnet/DeepSpeechClient/NativeImp.cs index bc77cf1b18..1a7dacacf2 100644 --- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs +++ b/native_client/dotnet/DeepSpeechClient/NativeImp.cs @@ -41,6 +41,18 @@ internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath, internal static unsafe extern ErrorCodes DS_EnableExternalScorer(IntPtr** aCtx, string aScorerPath); + [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern ErrorCodes DS_AddHotWord(IntPtr** aCtx, + string aWord, + float aBoost); + + [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern ErrorCodes DS_EraseHotWord(IntPtr** aCtx, + string aWord); + + [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern ErrorCodes DS_ClearHotWords(IntPtr** aCtx); + [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] internal static unsafe extern ErrorCodes DS_DisableExternalScorer(IntPtr** aCtx); diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java index f461904664..ce313d20db 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java @@ -225,7 +225,7 @@ public Metadata finishStreamWithMetadata(DeepSpeechStreamingState ctx, int num_r * */ public void addHotWord(String word, float boost) { - evaluateErrorCode(impl.AddHotWord(word, boost)); + evaluateErrorCode(impl.AddHotWord(this._msp, word, boost)); } /** * @brief Erase a hot-word @@ -236,7 +236,7 @@ public void addHotWord(String word, float boost) { * */ public void eraseHotWord(String word) { - evaluateErrorCode(impl.EraseHotWord(word)); + evaluateErrorCode(impl.EraseHotWord(this._msp, word)); } /** * @brief Clear all hot-words. @@ -245,6 +245,6 @@ public void eraseHotWord(String word) { * */ public void clearHotWords() { - evaluateErrorCode(impl.ClearHotWords()); + evaluateErrorCode(impl.ClearHotWords(this._msp)); } } diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_ModelState.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_ModelState.java deleted file mode 100644 index 6a0ff56035..0000000000 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_ModelState.java +++ /dev/null @@ -1,26 +0,0 @@ -/* ---------------------------------------------------------------------------- - * This file was automatically generated by SWIG (http://www.swig.org). - * Version 4.0.1 - * - * Do not make changes to this file unless you know what you are doing--modify - * the SWIG interface file instead. - * ----------------------------------------------------------------------------- */ - -package org.mozilla.deepspeech.libdeepspeech; - -public class SWIGTYPE_p_ModelState { - private transient long swigCPtr; - - protected SWIGTYPE_p_ModelState(long cPtr, @SuppressWarnings("unused") boolean futureUse) { - swigCPtr = cPtr; - } - - protected SWIGTYPE_p_ModelState() { - swigCPtr = 0; - } - - protected static long getCPtr(SWIGTYPE_p_ModelState obj) { - return (obj == null) ? 0 : obj.swigCPtr; - } -} - diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_StreamingState.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_StreamingState.java deleted file mode 100644 index 75d4f1a530..0000000000 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_StreamingState.java +++ /dev/null @@ -1,26 +0,0 @@ -/* ---------------------------------------------------------------------------- - * This file was automatically generated by SWIG (http://www.swig.org). - * Version 4.0.1 - * - * Do not make changes to this file unless you know what you are doing--modify - * the SWIG interface file instead. - * ----------------------------------------------------------------------------- */ - -package org.mozilla.deepspeech.libdeepspeech; - -public class SWIGTYPE_p_StreamingState { - private transient long swigCPtr; - - protected SWIGTYPE_p_StreamingState(long cPtr, @SuppressWarnings("unused") boolean futureUse) { - swigCPtr = cPtr; - } - - protected SWIGTYPE_p_StreamingState() { - swigCPtr = 0; - } - - protected static long getCPtr(SWIGTYPE_p_StreamingState obj) { - return (obj == null) ? 0 : obj.swigCPtr; - } -} - diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_ModelState.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_ModelState.java deleted file mode 100644 index 97ca54d7c9..0000000000 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_ModelState.java +++ /dev/null @@ -1,26 +0,0 @@ -/* ---------------------------------------------------------------------------- - * This file was automatically generated by SWIG (http://www.swig.org). - * Version 4.0.1 - * - * Do not make changes to this file unless you know what you are doing--modify - * the SWIG interface file instead. - * ----------------------------------------------------------------------------- */ - -package org.mozilla.deepspeech.libdeepspeech; - -public class SWIGTYPE_p_p_ModelState { - private transient long swigCPtr; - - protected SWIGTYPE_p_p_ModelState(long cPtr, @SuppressWarnings("unused") boolean futureUse) { - swigCPtr = cPtr; - } - - protected SWIGTYPE_p_p_ModelState() { - swigCPtr = 0; - } - - protected static long getCPtr(SWIGTYPE_p_p_ModelState obj) { - return (obj == null) ? 0 : obj.swigCPtr; - } -} - diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_StreamingState.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_StreamingState.java deleted file mode 100644 index 50b597b429..0000000000 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/SWIGTYPE_p_p_StreamingState.java +++ /dev/null @@ -1,26 +0,0 @@ -/* ---------------------------------------------------------------------------- - * This file was automatically generated by SWIG (http://www.swig.org). - * Version 4.0.1 - * - * Do not make changes to this file unless you know what you are doing--modify - * the SWIG interface file instead. - * ----------------------------------------------------------------------------- */ - -package org.mozilla.deepspeech.libdeepspeech; - -public class SWIGTYPE_p_p_StreamingState { - private transient long swigCPtr; - - protected SWIGTYPE_p_p_StreamingState(long cPtr, @SuppressWarnings("unused") boolean futureUse) { - swigCPtr = cPtr; - } - - protected SWIGTYPE_p_p_StreamingState() { - swigCPtr = 0; - } - - protected static long getCPtr(SWIGTYPE_p_p_StreamingState obj) { - return (obj == null) ? 0 : obj.swigCPtr; - } -} - diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/impl.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/impl.java deleted file mode 100644 index c982673ce7..0000000000 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/impl.java +++ /dev/null @@ -1,359 +0,0 @@ -/* ---------------------------------------------------------------------------- - * This file was automatically generated by SWIG (http://www.swig.org). - * Version 4.0.1 - * - * Do not make changes to this file unless you know what you are doing--modify - * the SWIG interface file instead. - * ----------------------------------------------------------------------------- */ - -package org.mozilla.deepspeech.libdeepspeech; - -public class impl { - public static SWIGTYPE_p_p_ModelState new_modelstatep() { - long cPtr = implJNI.new_modelstatep(); - return (cPtr == 0) ? null : new SWIGTYPE_p_p_ModelState(cPtr, false); - } - - public static SWIGTYPE_p_p_ModelState copy_modelstatep(SWIGTYPE_p_ModelState value) { - long cPtr = implJNI.copy_modelstatep(SWIGTYPE_p_ModelState.getCPtr(value)); - return (cPtr == 0) ? null : new SWIGTYPE_p_p_ModelState(cPtr, false); - } - - public static void delete_modelstatep(SWIGTYPE_p_p_ModelState obj) { - implJNI.delete_modelstatep(SWIGTYPE_p_p_ModelState.getCPtr(obj)); - } - - public static void modelstatep_assign(SWIGTYPE_p_p_ModelState obj, SWIGTYPE_p_ModelState value) { - implJNI.modelstatep_assign(SWIGTYPE_p_p_ModelState.getCPtr(obj), SWIGTYPE_p_ModelState.getCPtr(value)); - } - - public static SWIGTYPE_p_ModelState modelstatep_value(SWIGTYPE_p_p_ModelState obj) { - long cPtr = implJNI.modelstatep_value(SWIGTYPE_p_p_ModelState.getCPtr(obj)); - return (cPtr == 0) ? null : new SWIGTYPE_p_ModelState(cPtr, false); - } - - public static SWIGTYPE_p_p_StreamingState new_streamingstatep() { - long cPtr = implJNI.new_streamingstatep(); - return (cPtr == 0) ? null : new SWIGTYPE_p_p_StreamingState(cPtr, false); - } - - public static SWIGTYPE_p_p_StreamingState copy_streamingstatep(SWIGTYPE_p_StreamingState value) { - long cPtr = implJNI.copy_streamingstatep(SWIGTYPE_p_StreamingState.getCPtr(value)); - return (cPtr == 0) ? null : new SWIGTYPE_p_p_StreamingState(cPtr, false); - } - - public static void delete_streamingstatep(SWIGTYPE_p_p_StreamingState obj) { - implJNI.delete_streamingstatep(SWIGTYPE_p_p_StreamingState.getCPtr(obj)); - } - - public static void streamingstatep_assign(SWIGTYPE_p_p_StreamingState obj, SWIGTYPE_p_StreamingState value) { - implJNI.streamingstatep_assign(SWIGTYPE_p_p_StreamingState.getCPtr(obj), SWIGTYPE_p_StreamingState.getCPtr(value)); - } - - public static SWIGTYPE_p_StreamingState streamingstatep_value(SWIGTYPE_p_p_StreamingState obj) { - long cPtr = implJNI.streamingstatep_value(SWIGTYPE_p_p_StreamingState.getCPtr(obj)); - return (cPtr == 0) ? null : new SWIGTYPE_p_StreamingState(cPtr, false); - } - - /** - * An object providing an interface to a trained DeepSpeech model.
- *
- * @param aModelPath The path to the frozen model graph.
- * retval a ModelState pointer
- *
- * @return Zero on success, non-zero on failure. - */ - public static int CreateModel(String aModelPath, SWIGTYPE_p_p_ModelState retval) { - return implJNI.CreateModel(aModelPath, SWIGTYPE_p_p_ModelState.getCPtr(retval)); - } - - /** - * Get beam width value used by the model. If {DS_SetModelBeamWidth}
- * was not called before, will return the default value loaded from the
- * model file.
- *
- * @param aCtx A ModelState pointer created with {DS_CreateModel}.
- *
- * @return Beam width value used by the model. - */ - public static long GetModelBeamWidth(SWIGTYPE_p_ModelState aCtx) { - return implJNI.GetModelBeamWidth(SWIGTYPE_p_ModelState.getCPtr(aCtx)); - } - - /** - * Set beam width value used by the model.
- *
- * @param aCtx A ModelState pointer created with {DS_CreateModel}.
- * @param aBeamWidth The beam width used by the model. A larger beam width value
- * generates better results at the cost of decoding time.
- *
- * @return Zero on success, non-zero on failure. - */ - public static int SetModelBeamWidth(SWIGTYPE_p_ModelState aCtx, long aBeamWidth) { - return implJNI.SetModelBeamWidth(SWIGTYPE_p_ModelState.getCPtr(aCtx), aBeamWidth); - } - - /** - * Return the sample rate expected by a model.
- *
- * @param aCtx A ModelState pointer created with {DS_CreateModel}.
- *
- * @return Sample rate expected by the model for its input. - */ - public static int GetModelSampleRate(SWIGTYPE_p_ModelState aCtx) { - return implJNI.GetModelSampleRate(SWIGTYPE_p_ModelState.getCPtr(aCtx)); - } - - /** - * Frees associated resources and destroys model object. - */ - public static void FreeModel(SWIGTYPE_p_ModelState ctx) { - implJNI.FreeModel(SWIGTYPE_p_ModelState.getCPtr(ctx)); - } - - /** - * Enable decoding using an external scorer.
- *
- * @param aCtx The ModelState pointer for the model being changed.
- * @param aScorerPath The path to the external scorer file.
- *
- * @return Zero on success, non-zero on failure (invalid arguments). - */ - public static int EnableExternalScorer(SWIGTYPE_p_ModelState aCtx, String aScorerPath) { - return implJNI.EnableExternalScorer(SWIGTYPE_p_ModelState.getCPtr(aCtx), aScorerPath); - } - - /** - * Add a hot-word and its boost.
- *
- * @param aCtx The ModelState pointer for the model being changed.
- * @param word The hot-word.
- * @param boost The boost.
- *
- * @return Zero on success, non-zero on failure (invalid arguments). - */ - public static int AddHotWord(SWIGTYPE_p_ModelState aCtx, String word, float boost) { - return implJNI.AddHotWord(SWIGTYPE_p_ModelState.getCPtr(aCtx), word, boost); - } - - /** - * Remove entry for a hot-word from the hot-words map.
- *
- * @param aCtx The ModelState pointer for the model being changed.
- * @param word The hot-word.
- *
- * @return Zero on success, non-zero on failure (invalid arguments). - */ - public static int EraseHotWord(SWIGTYPE_p_ModelState aCtx, String word) { - return implJNI.EraseHotWord(SWIGTYPE_p_ModelState.getCPtr(aCtx), word); - } - - /** - * Removes all elements from the hot-words map.
- *
- * @param aCtx The ModelState pointer for the model being changed.
- *
- * @return Zero on success, non-zero on failure (invalid arguments). - */ - public static int ClearHotWords(SWIGTYPE_p_ModelState aCtx) { - return implJNI.ClearHotWords(SWIGTYPE_p_ModelState.getCPtr(aCtx)); - } - - /** - * Disable decoding using an external scorer.
- *
- * @param aCtx The ModelState pointer for the model being changed.
- *
- * @return Zero on success, non-zero on failure. - */ - public static int DisableExternalScorer(SWIGTYPE_p_ModelState aCtx) { - return implJNI.DisableExternalScorer(SWIGTYPE_p_ModelState.getCPtr(aCtx)); - } - - /** - * Set hyperparameters alpha and beta of the external scorer.
- *
- * @param aCtx The ModelState pointer for the model being changed.
- * @param aAlpha The alpha hyperparameter of the decoder. Language model weight.
- *
- *
- * @return Zero on success, non-zero on failure. - */ - public static int SetScorerAlphaBeta(SWIGTYPE_p_ModelState aCtx, float aAlpha, float aBeta) { - return implJNI.SetScorerAlphaBeta(SWIGTYPE_p_ModelState.getCPtr(aCtx), aAlpha, aBeta); - } - - /** - * Use the DeepSpeech model to convert speech to text.
- *
- * @param aCtx The ModelState pointer for the model to use.
- * @param aBuffer A 16-bit, mono raw audio signal at the appropriate
- * sample rate (matching what the model was trained on).
- * @param aBufferSize The number of samples in the audio signal.
- *
- * @return The STT result. The user is responsible for freeing the string using
- * {DS_FreeString()}. Returns NULL on error. - */ - public static String SpeechToText(SWIGTYPE_p_ModelState aCtx, short[] aBuffer, long aBufferSize) { - return implJNI.SpeechToText(SWIGTYPE_p_ModelState.getCPtr(aCtx), aBuffer, aBufferSize); - } - - /** - * Use the DeepSpeech model to convert speech to text and output results
- * including metadata.
- *
- * @param aCtx The ModelState pointer for the model to use.
- * @param aBuffer A 16-bit, mono raw audio signal at the appropriate
- * sample rate (matching what the model was trained on).
- * @param aBufferSize The number of samples in the audio signal.
- * @param aNumResults The maximum number of CandidateTranscript structs to return. Returned value might be smaller than this.
- *
- * @return Metadata struct containing multiple CandidateTranscript structs. Each
- * transcript has per-token metadata including timing information. The
- * user is responsible for freeing Metadata by calling {DS_FreeMetadata()}.
- * Returns NULL on error. - */ - public static Metadata SpeechToTextWithMetadata(SWIGTYPE_p_ModelState aCtx, short[] aBuffer, long aBufferSize, long aNumResults) { - long cPtr = implJNI.SpeechToTextWithMetadata(SWIGTYPE_p_ModelState.getCPtr(aCtx), aBuffer, aBufferSize, aNumResults); - return (cPtr == 0) ? null : new Metadata(cPtr, false); - } - - /** - * Create a new streaming inference state. The streaming state returned
- * by this function can then be passed to {DS_FeedAudioContent()}
- * and {DS_FinishStream()}.
- *
- * @param aCtx The ModelState pointer for the model to use.
- * retval an opaque pointer that represents the streaming state. Can
- * be NULL if an error occurs.
- *
- * @return Zero for success, non-zero on failure. - */ - public static int CreateStream(SWIGTYPE_p_ModelState aCtx, SWIGTYPE_p_p_StreamingState retval) { - return implJNI.CreateStream(SWIGTYPE_p_ModelState.getCPtr(aCtx), SWIGTYPE_p_p_StreamingState.getCPtr(retval)); - } - - /** - * Feed audio samples to an ongoing streaming inference.
- *
- * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
- * @param aBuffer An array of 16-bit, mono raw audio samples at the
- * appropriate sample rate (matching what the model was trained on).
- * @param aBufferSize The number of samples in aBuffer. - */ - public static void FeedAudioContent(SWIGTYPE_p_StreamingState aSctx, short[] aBuffer, long aBufferSize) { - implJNI.FeedAudioContent(SWIGTYPE_p_StreamingState.getCPtr(aSctx), aBuffer, aBufferSize); - } - - /** - * Compute the intermediate decoding of an ongoing streaming inference.
- *
- * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
- *
- * @return The STT intermediate result. The user is responsible for freeing the
- * string using {DS_FreeString()}. - */ - public static String IntermediateDecode(SWIGTYPE_p_StreamingState aSctx) { - return implJNI.IntermediateDecode(SWIGTYPE_p_StreamingState.getCPtr(aSctx)); - } - - /** - * Compute the intermediate decoding of an ongoing streaming inference,
- * return results including metadata.
- *
- * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
- * @param aNumResults The number of candidate transcripts to return.
- *
- * @return Metadata struct containing multiple candidate transcripts. Each transcript
- * has per-token metadata including timing information. The user is
- * responsible for freeing Metadata by calling {DS_FreeMetadata()}.
- * Returns NULL on error. - */ - public static Metadata IntermediateDecodeWithMetadata(SWIGTYPE_p_StreamingState aSctx, long aNumResults) { - long cPtr = implJNI.IntermediateDecodeWithMetadata(SWIGTYPE_p_StreamingState.getCPtr(aSctx), aNumResults); - return (cPtr == 0) ? null : new Metadata(cPtr, false); - } - - /** - * Compute the final decoding of an ongoing streaming inference and return
- * the result. Signals the end of an ongoing streaming inference.
- *
- * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
- *
- * @return The STT result. The user is responsible for freeing the string using
- * {DS_FreeString()}.
- *
- * Note: This method will free the state pointer (aSctx). - */ - public static String FinishStream(SWIGTYPE_p_StreamingState aSctx) { - return implJNI.FinishStream(SWIGTYPE_p_StreamingState.getCPtr(aSctx)); - } - - /** - * Compute the final decoding of an ongoing streaming inference and return
- * results including metadata. Signals the end of an ongoing streaming
- * inference.
- *
- * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
- * @param aNumResults The number of candidate transcripts to return.
- *
- * @return Metadata struct containing multiple candidate transcripts. Each transcript
- * has per-token metadata including timing information. The user is
- * responsible for freeing Metadata by calling {DS_FreeMetadata()}.
- * Returns NULL on error.
- *
- * Note: This method will free the state pointer (aSctx). - */ - public static Metadata FinishStreamWithMetadata(SWIGTYPE_p_StreamingState aSctx, long aNumResults) { - long cPtr = implJNI.FinishStreamWithMetadata(SWIGTYPE_p_StreamingState.getCPtr(aSctx), aNumResults); - return (cPtr == 0) ? null : new Metadata(cPtr, false); - } - - /** - * Destroy a streaming state without decoding the computed logits. This
- * can be used if you no longer need the result of an ongoing streaming
- * inference and don't want to perform a costly decode operation.
- *
- * @param aSctx A streaming state pointer returned by {DS_CreateStream()}.
- *
- * Note: This method will free the state pointer (aSctx). - */ - public static void FreeStream(SWIGTYPE_p_StreamingState aSctx) { - implJNI.FreeStream(SWIGTYPE_p_StreamingState.getCPtr(aSctx)); - } - - /** - * Free memory allocated for metadata information. - */ - public static void FreeMetadata(Metadata m) { - implJNI.FreeMetadata(Metadata.getCPtr(m), m); - } - - /** - * Free a char* string returned by the DeepSpeech API. - */ - public static void FreeString(String str) { - implJNI.FreeString(str); - } - - /** - * Returns the version of this library. The returned version is a semantic
- * version (SemVer 2.0.0). The string returned must be freed with {DS_FreeString()}.
- *
- * @return The version string. - */ - public static String Version() { - return implJNI.Version(); - } - - /** - * Returns a textual description corresponding to an error code.
- * The string returned must be freed with DS_FreeString()}.
- *
- * @return The error description. - */ - public static String ErrorCodeToErrorMessage(int aErrorCode) { - return implJNI.ErrorCodeToErrorMessage(aErrorCode); - } - -} diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/implJNI.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/implJNI.java deleted file mode 100644 index c461f936b8..0000000000 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/implJNI.java +++ /dev/null @@ -1,55 +0,0 @@ -/* ---------------------------------------------------------------------------- - * This file was automatically generated by SWIG (http://www.swig.org). - * Version 4.0.1 - * - * Do not make changes to this file unless you know what you are doing--modify - * the SWIG interface file instead. - * ----------------------------------------------------------------------------- */ - -package org.mozilla.deepspeech.libdeepspeech; - -public class implJNI { - public final static native long new_modelstatep(); - public final static native long copy_modelstatep(long jarg1); - public final static native void delete_modelstatep(long jarg1); - public final static native void modelstatep_assign(long jarg1, long jarg2); - public final static native long modelstatep_value(long jarg1); - public final static native long new_streamingstatep(); - public final static native long copy_streamingstatep(long jarg1); - public final static native void delete_streamingstatep(long jarg1); - public final static native void streamingstatep_assign(long jarg1, long jarg2); - public final static native long streamingstatep_value(long jarg1); - public final static native String TokenMetadata_Text_get(long jarg1, TokenMetadata jarg1_); - public final static native long TokenMetadata_Timestep_get(long jarg1, TokenMetadata jarg1_); - public final static native float TokenMetadata_StartTime_get(long jarg1, TokenMetadata jarg1_); - public final static native long CandidateTranscript_NumTokens_get(long jarg1, CandidateTranscript jarg1_); - public final static native double CandidateTranscript_Confidence_get(long jarg1, CandidateTranscript jarg1_); - public final static native long CandidateTranscript_getToken(long jarg1, CandidateTranscript jarg1_, int jarg2); - public final static native long Metadata_NumTranscripts_get(long jarg1, Metadata jarg1_); - public final static native long Metadata_getTranscript(long jarg1, Metadata jarg1_, int jarg2); - public final static native void delete_Metadata(long jarg1); - public final static native int CreateModel(String jarg1, long jarg2); - public final static native long GetModelBeamWidth(long jarg1); - public final static native int SetModelBeamWidth(long jarg1, long jarg2); - public final static native int GetModelSampleRate(long jarg1); - public final static native void FreeModel(long jarg1); - public final static native int EnableExternalScorer(long jarg1, String jarg2); - public final static native int AddHotWord(long jarg1, String jarg2, float jarg3); - public final static native int EraseHotWord(long jarg1, String jarg2); - public final static native int ClearHotWords(long jarg1); - public final static native int DisableExternalScorer(long jarg1); - public final static native int SetScorerAlphaBeta(long jarg1, float jarg2, float jarg3); - public final static native String SpeechToText(long jarg1, short[] jarg2, long jarg3); - public final static native long SpeechToTextWithMetadata(long jarg1, short[] jarg2, long jarg3, long jarg4); - public final static native int CreateStream(long jarg1, long jarg2); - public final static native void FeedAudioContent(long jarg1, short[] jarg2, long jarg3); - public final static native String IntermediateDecode(long jarg1); - public final static native long IntermediateDecodeWithMetadata(long jarg1, long jarg2); - public final static native String FinishStream(long jarg1); - public final static native long FinishStreamWithMetadata(long jarg1, long jarg2); - public final static native void FreeStream(long jarg1); - public final static native void FreeMetadata(long jarg1, Metadata jarg1_); - public final static native void FreeString(String jarg1); - public final static native String Version(); - public final static native String ErrorCodeToErrorMessage(int jarg1); -} From fe7275e654c5e682a67043ed14c42c7d59b65c2d Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 11:08:34 -0700 Subject: [PATCH 42/48] add hotword test to android tests --- taskcluster/tc-android-ds-tests.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/taskcluster/tc-android-ds-tests.sh b/taskcluster/tc-android-ds-tests.sh index 257a949627..bfaa8b0104 100755 --- a/taskcluster/tc-android-ds-tests.sh +++ b/taskcluster/tc-android-ds-tests.sh @@ -30,4 +30,6 @@ android_setup_ndk_data run_tflite_basic_inference_tests +run_hotword_tests + android_stop_emulator From 5432f56a87fae04b438438557716d6aa11edb8d9 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Tue, 22 Sep 2020 15:34:47 -0700 Subject: [PATCH 43/48] dotnet fixes from carlos --- .../dotnet/DeepSpeechClient/DeepSpeech.cs | 6 +++--- .../Interfaces/IDeepSpeech.cs | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs index cfda417616..b9b8e23763 100644 --- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs +++ b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs @@ -80,7 +80,7 @@ public unsafe void SetModelBeamWidth(uint aBeamWidth) /// Some word /// Some boost /// Thrown on failure. - public unsafe void addHotWord(string aWord, float aBoost) + public unsafe void AddHotWord(string aWord, float aBoost) { var resultCode = NativeImp.DS_AddHotWord(_modelStatePP, aWord, aBoost); EvaluateResultCode(resultCode); @@ -91,7 +91,7 @@ public unsafe void addHotWord(string aWord, float aBoost) /// /// Some word /// Thrown on failure. - public unsafe void eraseHotWord(string aWord) + public unsafe void EraseHotWord(string aWord) { var resultCode = NativeImp.DS_EraseHotWord(_modelStatePP, aWord); EvaluateResultCode(resultCode); @@ -101,7 +101,7 @@ public unsafe void eraseHotWord(string aWord) /// Clear all hot-words. /// /// Thrown on failure. - public unsafe void clearHotWords() + public unsafe void ClearHotWords() { var resultCode = NativeImp.DS_ClearHotWords(_modelStatePP); EvaluateResultCode(resultCode); diff --git a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs index e1ed9cad7e..344b758e74 100644 --- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs +++ b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs @@ -44,6 +44,27 @@ public interface IDeepSpeech : IDisposable /// Thrown when cannot find the scorer file. unsafe void EnableExternalScorer(string aScorerPath); + /// + /// Add a hot-word. + /// + /// Some word + /// Some boost + /// Thrown on failure. + unsafe void AddHotWord(string aWord, float aBoost); + + /// + /// Erase entry for a hot-word. + /// + /// Some word + /// Thrown on failure. + unsafe void EraseHotWord(string aWord); + + /// + /// Clear all hot-words. + /// + /// Thrown on failure. + unsafe void ClearHotWords(); + /// /// Disable decoding using an external scorer. /// From 4945380e8b5f2487fc68029b9bcfea04dd4b4bb9 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Wed, 23 Sep 2020 05:19:53 -0700 Subject: [PATCH 44/48] add DS_BINARY_PREFIX to tc-asserts.sh for hotwords command --- taskcluster/tc-asserts.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh index 4fd8162ba0..12bbf4903b 100755 --- a/taskcluster/tc-asserts.sh +++ b/taskcluster/tc-asserts.sh @@ -527,7 +527,7 @@ run_multi_inference_tests() run_hotword_tests() { set +e - hotwords_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) + hotwords_decode=$(${DS_BINARY_PREFIX}deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" From ae1d39f05c813b6b49ea177a45dbca0d864bf7cf Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 24 Sep 2020 08:33:49 -0700 Subject: [PATCH 45/48] make sure lm is on android for hotword test --- taskcluster/tc-android-ds-tests.sh | 2 +- taskcluster/tc-android-utils.sh | 4 ++++ taskcluster/tc-asserts.sh | 9 +++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/taskcluster/tc-android-ds-tests.sh b/taskcluster/tc-android-ds-tests.sh index bfaa8b0104..c0f2d36ebf 100755 --- a/taskcluster/tc-android-ds-tests.sh +++ b/taskcluster/tc-android-ds-tests.sh @@ -30,6 +30,6 @@ android_setup_ndk_data run_tflite_basic_inference_tests -run_hotword_tests +run_android_hotword_tests android_stop_emulator diff --git a/taskcluster/tc-android-utils.sh b/taskcluster/tc-android-utils.sh index 3bf66927f5..036edfcdd3 100755 --- a/taskcluster/tc-android-utils.sh +++ b/taskcluster/tc-android-utils.sh @@ -206,6 +206,10 @@ android_setup_ndk_data() ${TASKCLUSTER_TMP_DIR}/${model_name} \ ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \ ${ANDROID_TMP_DIR}/ds/ + + if [ -f "${TASKCLUSTER_TMP_DIR}/kenlm.scorer" ]; then + adb push ${TASKCLUSTER_TMP_DIR}/kenlm.scorer ${ANDROID_TMP_DIR}/ds/ + fi } android_setup_apk_data() diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh index 12bbf4903b..d485846e9e 100755 --- a/taskcluster/tc-asserts.sh +++ b/taskcluster/tc-asserts.sh @@ -533,6 +533,15 @@ run_hotword_tests() assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" } +run_android_hotword_tests() +{ + set +e + hotwords_decode=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --scorer ${DATA_TMP_DIR}/kenlm.scorer --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) + status=$? + set -e + assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" +} + run_cpp_only_inference_tests() { set +e From 90c6a873cc3c20f742a64f9ad8daf5941cc9f938 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 24 Sep 2020 08:47:26 -0700 Subject: [PATCH 46/48] path to android model + nit --- taskcluster/tc-android-utils.sh | 2 +- taskcluster/tc-asserts.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/taskcluster/tc-android-utils.sh b/taskcluster/tc-android-utils.sh index 036edfcdd3..752d57658d 100755 --- a/taskcluster/tc-android-utils.sh +++ b/taskcluster/tc-android-utils.sh @@ -208,7 +208,7 @@ android_setup_ndk_data() ${ANDROID_TMP_DIR}/ds/ if [ -f "${TASKCLUSTER_TMP_DIR}/kenlm.scorer" ]; then - adb push ${TASKCLUSTER_TMP_DIR}/kenlm.scorer ${ANDROID_TMP_DIR}/ds/ + adb push ${TASKCLUSTER_TMP_DIR}/kenlm.scorer ${ANDROID_TMP_DIR}/ds/ fi } diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh index d485846e9e..c67fd7b84b 100755 --- a/taskcluster/tc-asserts.sh +++ b/taskcluster/tc-asserts.sh @@ -536,7 +536,7 @@ run_hotword_tests() run_android_hotword_tests() { set +e - hotwords_decode=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --scorer ${DATA_TMP_DIR}/kenlm.scorer --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) + hotwords_decode=$(${DS_BINARY_PREFIX}deepspeech --model ${model_name_mmap} --scorer ${DATA_TMP_DIR}/kenlm.scorer --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" From 019a5149940dcc2dd10d7651dad0e318c36d1725 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 24 Sep 2020 09:24:57 -0700 Subject: [PATCH 47/48] path --- taskcluster/tc-asserts.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh index c67fd7b84b..0ff13296fd 100755 --- a/taskcluster/tc-asserts.sh +++ b/taskcluster/tc-asserts.sh @@ -536,7 +536,7 @@ run_hotword_tests() run_android_hotword_tests() { set +e - hotwords_decode=$(${DS_BINARY_PREFIX}deepspeech --model ${model_name_mmap} --scorer ${DATA_TMP_DIR}/kenlm.scorer --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) + hotwords_decode=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name_mmap} --scorer ${DATA_TMP_DIR}/kenlm.scorer --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" From a382783a927ff8efa5f8c76028a10c05b1f08918 Mon Sep 17 00:00:00 2001 From: josh meyer Date: Thu, 24 Sep 2020 10:18:24 -0700 Subject: [PATCH 48/48] path --- taskcluster/tc-asserts.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh index 0ff13296fd..d485846e9e 100755 --- a/taskcluster/tc-asserts.sh +++ b/taskcluster/tc-asserts.sh @@ -536,7 +536,7 @@ run_hotword_tests() run_android_hotword_tests() { set +e - hotwords_decode=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name_mmap} --scorer ${DATA_TMP_DIR}/kenlm.scorer --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) + hotwords_decode=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --scorer ${DATA_TMP_DIR}/kenlm.scorer --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${hotwords_decode}" "$status"