From 1b7e0449850cac4ef753b0e89822c5666ee53dbf Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Wed, 6 Jun 2018 21:26:41 -0700
Subject: [PATCH 01/10] Adapt the decoder to the new label

---
 fluid/DeepASR/decoder/post_decode_faster.cc   | 145 ---------------
 .../decoder/post_latgen_faster_mapped.cc      | 172 ++++++++++++++++++
 ...e_faster.h => post_latgen_faster_mapped.h} |  22 ++-
 fluid/DeepASR/decoder/pybind.cc               |  10 +-
 fluid/DeepASR/decoder/setup.py                |   8 +-
 fluid/DeepASR/infer_by_ckpt.py                |  41 ++++-
 6 files changed, 231 insertions(+), 167 deletions(-)
 delete mode 100644 fluid/DeepASR/decoder/post_decode_faster.cc
 create mode 100644 fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
 rename fluid/DeepASR/decoder/{post_decode_faster.h => post_latgen_faster_mapped.h} (75%)

diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc
deleted file mode 100644
index ce2b45bc6c..0000000000
--- a/fluid/DeepASR/decoder/post_decode_faster.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "post_decode_faster.h"
-
-typedef kaldi::int32 int32;
-using fst::SymbolTable;
-using fst::VectorFst;
-using fst::StdArc;
-
-Decoder::Decoder(std::string word_syms_filename,
-                 std::string fst_in_filename,
-                 std::string logprior_rxfilename,
-                 kaldi::BaseFloat acoustic_scale) {
-  const char* usage =
-      "Decode, reading log-likelihoods (of transition-ids or whatever symbol "
-      "is on the graph) as matrices.";
-
-  kaldi::ParseOptions po(usage);
-  binary = true;
-  this->acoustic_scale = acoustic_scale;
-  allow_partial = true;
-  kaldi::FasterDecoderOptions decoder_opts;
-  decoder_opts.Register(&po, true);  // true == include obscure settings.
-  po.Register("binary", &binary, "Write output in binary mode");
-  po.Register("allow-partial",
-              &allow_partial,
-              "Produce output even when final state was not reached");
-  po.Register("acoustic-scale",
-              &acoustic_scale,
-              "Scaling factor for acoustic likelihoods");
-
-  word_syms = NULL;
-  if (word_syms_filename != "") {
-    word_syms = fst::SymbolTable::ReadText(word_syms_filename);
-    if (!word_syms)
-      KALDI_ERR << "Could not read symbol table from file "
-                << word_syms_filename;
-  }
-
-  std::ifstream is_logprior(logprior_rxfilename);
-  logprior.Read(is_logprior, false);
-
-  // It's important that we initialize decode_fst after loglikes_reader, as it
-  // can prevent crashes on systems installed without enough virtual memory.
-  // It has to do with what happens on UNIX systems if you call fork() on a
-  // large process: the page-table entries are duplicated, which requires a
-  // lot of virtual memory.
-  decode_fst = fst::ReadFstKaldi(fst_in_filename);
-
-  decoder = new kaldi::FasterDecoder(*decode_fst, decoder_opts);
-}
-
-
-Decoder::~Decoder() {
-  if (!word_syms) delete word_syms;
-  delete decode_fst;
-  delete decoder;
-}
-
-std::string Decoder::decode(
-    std::string key,
-    const std::vector<std::vector<kaldi::BaseFloat>>& log_probs) {
-  size_t num_frames = log_probs.size();
-  size_t dim_label = log_probs[0].size();
-
-  kaldi::Matrix<kaldi::BaseFloat> loglikes(
-      num_frames, dim_label, kaldi::kSetZero, kaldi::kStrideEqualNumCols);
-  for (size_t i = 0; i < num_frames; ++i) {
-    memcpy(loglikes.Data() + i * dim_label,
-           log_probs[i].data(),
-           sizeof(kaldi::BaseFloat) * dim_label);
-  }
-
-  return decode(key, loglikes);
-}
-
-
-std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
-  kaldi::SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier);
-  std::vector<std::string> decoding_results;
-
-  for (; !posterior_reader.Done(); posterior_reader.Next()) {
-    std::string key = posterior_reader.Key();
-    kaldi::Matrix<kaldi::BaseFloat> loglikes(posterior_reader.Value());
-
-    decoding_results.push_back(decode(key, loglikes));
-  }
-
-  return decoding_results;
-}
-
-
-std::string Decoder::decode(std::string key,
-                            kaldi::Matrix<kaldi::BaseFloat>& loglikes) {
-  std::string decoding_result;
-
-  if (loglikes.NumRows() == 0) {
-    KALDI_WARN << "Zero-length utterance: " << key;
-  }
-  KALDI_ASSERT(loglikes.NumCols() == logprior.Dim());
-
-  loglikes.ApplyLog();
-  loglikes.AddVecToRows(-1.0, logprior);
-
-  kaldi::DecodableMatrixScaled decodable(loglikes, acoustic_scale);
-  decoder->Decode(&decodable);
-
-  VectorFst<kaldi::LatticeArc> decoded;  // linear FST.
-
-  if ((allow_partial || decoder->ReachedFinal()) &&
-      decoder->GetBestPath(&decoded)) {
-    if (!decoder->ReachedFinal())
-      KALDI_WARN << "Decoder did not reach end-state, outputting partial "
-                    "traceback.";
-
-    std::vector<int32> alignment;
-    std::vector<int32> words;
-    kaldi::LatticeWeight weight;
-
-    GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
-
-    if (word_syms != NULL) {
-      for (size_t i = 0; i < words.size(); i++) {
-        std::string s = word_syms->Find(words[i]);
-        decoding_result += s;
-        if (s == "")
-          KALDI_ERR << "Word-id " << words[i] << " not in symbol table.";
-      }
-    }
-  }
-
-  return decoding_result;
-}
diff --git a/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
new file mode 100644
index 0000000000..19d5dbea83
--- /dev/null
+++ b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
@@ -0,0 +1,172 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "post_latgen_faster_mapped.h"
+
+using namespace kaldi;
+typedef kaldi::int32 int32;
+using fst::SymbolTable;
+using fst::Fst;
+using fst::StdArc;
+
+Decoder::Decoder(std::string trans_model_in_filename,
+                 std::string word_syms_filename,
+                 std::string fst_in_filename,
+                 std::string logprior_in_filename,
+                 kaldi::BaseFloat acoustic_scale) {
+  const char *usage =
+      "Generate lattices using neural net model.\n"
+      "Usage: post-latgen-faster-mapped [options] <trans-model> "
+      "<fst-in|fsts-rspecifier> <logprior> <posts-rspecifier>"
+      " <lattice-wspecifier> [ <words-wspecifier> [<alignments-wspecifier>] "
+      "]\n";
+  ParseOptions po(usage);
+  allow_partial = false;
+  this->acoustic_scale = acoustic_scale;
+  LatticeFasterDecoderConfig config;
+
+  config.Register(&po);
+  int32 beam = 11;
+  po.Register("beam", &beam, "Beam size");
+  po.Register("acoustic-scale",
+              &acoustic_scale,
+              "Scaling factor for acoustic likelihoods");
+  po.Register("word-symbol-table",
+              &word_syms_filename,
+              "Symbol table for words [for debug output]");
+  po.Register("allow-partial",
+              &allow_partial,
+              "If true, produce output even if end state was not reached.");
+
+  // int argc = 2;
+  // char *argv[] = {"post-latgen-faster-mapped", "--beam=11"};
+  // po.Read(argc, argv);
+
+  std::ifstream is_logprior(logprior_in_filename);
+  logprior.Read(is_logprior, false);
+
+  {
+    bool binary;
+    Input ki(trans_model_in_filename, &binary);
+    this->trans_model.Read(ki.Stream(), binary);
+  }
+
+  this->determinize = config.determinize_lattice;
+
+  this->word_syms = NULL;
+  if (word_syms_filename != "") {
+    if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename))) {
+      KALDI_ERR << "Could not read symbol table from file "
+                << word_syms_filename;
+    }
+  }
+
+  // Input FST is just one FST, not a table of FSTs.
+  this->decode_fst = fst::ReadFstKaldiGeneric(fst_in_filename);
+
+  this->decoder = new LatticeFasterDecoder(*decode_fst, config);
+
+  std::string lattice_wspecifier =
+      "ark:|gzip -c > mapped_decoder_data/lat.JOB.gz";
+  if (!(determinize ? compact_lattice_writer.Open(lattice_wspecifier)
+                    : lattice_writer.Open(lattice_wspecifier)))
+    KALDI_ERR << "Could not open table for writing lattices: ";
+  // << lattice_wspecifier;
+
+  words_writer = new Int32VectorWriter("");
+  alignment_writer = new Int32VectorWriter("");
+}
+
+Decoder::~Decoder() {
+  if (!this->word_syms) delete this->word_syms;
+  delete this->decode_fst;
+  delete this->decoder;
+  delete words_writer;
+  delete alignment_writer;
+}
+
+
+std::string Decoder::decode(std::string key,
+                            kaldi::Matrix<kaldi::BaseFloat> &loglikes) {
+  std::string decoding_result;
+  if (loglikes.NumRows() == 0) {
+    KALDI_WARN << "Zero-length utterance: " << key;
+    // num_fail++;
+  }
+  KALDI_ASSERT(loglikes.NumCols() == logprior.Dim());
+
+  loglikes.ApplyLog();
+  loglikes.AddVecToRows(-1.0, logprior);
+
+  DecodableMatrixScaledMapped matrix_decodable(
+      trans_model, loglikes, acoustic_scale);
+  double like;
+
+  if (DecodeUtteranceLatticeFaster(*decoder,
+                                   matrix_decodable,
+                                   trans_model,
+                                   word_syms,
+                                   key,
+                                   acoustic_scale,
+                                   determinize,
+                                   allow_partial,
+                                   alignment_writer,
+                                   words_writer,
+                                   &compact_lattice_writer,
+                                   &lattice_writer,
+                                   &like)) {
+    // tot_like += like;
+    // frame_count += loglikes.NumRows();
+    // num_success++;
+    decoding_result = "succeed!";
+  } else {  // else num_fail++;
+    decoding_result = "fail!";
+  }
+  return decoding_result;
+}
+
+std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
+  std::vector<std::string> ret;
+
+  try {
+    double tot_like = 0.0;
+    kaldi::int64 frame_count = 0;
+    // int num_success = 0, num_fail = 0;
+
+    KALDI_ASSERT(ClassifyRspecifier(fst_in_filename, NULL, NULL) ==
+                 kNoRspecifier);
+    SequentialBaseFloatMatrixReader posterior_reader("ark:" +
+                                                     posterior_rspecifier);
+
+    Timer timer;
+    timer.Reset();
+
+    {
+      for (; !posterior_reader.Done(); posterior_reader.Next()) {
+        std::string utt = posterior_reader.Key();
+        Matrix<BaseFloat> &loglikes(posterior_reader.Value());
+        KALDI_LOG << utt << " " << loglikes.NumRows() << " x "
+                  << loglikes.NumCols();
+        ret.push_back(decode(utt, loglikes));
+      }
+    }
+
+    double elapsed = timer.Elapsed();
+    return ret;
+  } catch (const std::exception &e) {
+    std::cerr << e.what();
+    // ret.push_back("error");
+    return ret;
+  }
+}
diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_latgen_faster_mapped.h
similarity index 75%
rename from fluid/DeepASR/decoder/post_decode_faster.h
rename to fluid/DeepASR/decoder/post_latgen_faster_mapped.h
index 8bade8d698..4adbf6ba22 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.h
+++ b/fluid/DeepASR/decoder/post_latgen_faster_mapped.h
@@ -17,19 +17,18 @@ limitations under the License. */
 #include "base/kaldi-common.h"
 #include "base/timer.h"
 #include "decoder/decodable-matrix.h"
-#include "decoder/faster-decoder.h"
-#include "fstext/fstext-lib.h"
+#include "decoder/decoder-wrappers.h"
+#include "fstext/kaldi-fst-io.h"
 #include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"  // for {Compact}LatticeArc
 #include "tree/context-dep.h"
 #include "util/common-utils.h"
 
-
 class Decoder {
 public:
-  Decoder(std::string word_syms_filename,
+  Decoder(std::string trans_model_in_filename,
+          std::string word_syms_filename,
           std::string fst_in_filename,
-          std::string logprior_rxfilename,
+          std::string logprior_in_filename,
           kaldi::BaseFloat acoustic_scale);
   ~Decoder();
 
@@ -48,11 +47,18 @@ class Decoder {
                      kaldi::Matrix<kaldi::BaseFloat> &loglikes);
 
   fst::SymbolTable *word_syms;
-  fst::VectorFst<fst::StdArc> *decode_fst;
-  kaldi::FasterDecoder *decoder;
+  fst::Fst<fst::StdArc> *decode_fst;
+  kaldi::LatticeFasterDecoder *decoder;
   kaldi::Vector<kaldi::BaseFloat> logprior;
+  kaldi::TransitionModel trans_model;
+
+  kaldi::CompactLatticeWriter compact_lattice_writer;
+  kaldi::LatticeWriter lattice_writer;
+  kaldi::Int32VectorWriter *words_writer;
+  kaldi::Int32VectorWriter *alignment_writer;
 
   bool binary;
+  bool determinize;
   kaldi::BaseFloat acoustic_scale;
   bool allow_partial;
 };
diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc
index 90ea38ffb5..e99050e68d 100644
--- a/fluid/DeepASR/decoder/pybind.cc
+++ b/fluid/DeepASR/decoder/pybind.cc
@@ -15,15 +15,19 @@ limitations under the License. */
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
-#include "post_decode_faster.h"
+#include "post_latgen_faster_mapped.h"
 
 namespace py = pybind11;
 
-PYBIND11_MODULE(post_decode_faster, m) {
+PYBIND11_MODULE(post_latgen_faster_mapped, m) {
   m.doc() = "Decoder for Deep ASR model";
 
   py::class_<Decoder>(m, "Decoder")
-      .def(py::init<std::string, std::string, std::string, kaldi::BaseFloat>())
+      .def(py::init<std::string,
+                    std::string,
+                    std::string,
+                    std::string,
+                    kaldi::BaseFloat>())
       .def("decode",
            (std::vector<std::string> (Decoder::*)(std::string)) &
                Decoder::decode,
diff --git a/fluid/DeepASR/decoder/setup.py b/fluid/DeepASR/decoder/setup.py
index a98c0b4cc1..74e8aa00fb 100644
--- a/fluid/DeepASR/decoder/setup.py
+++ b/fluid/DeepASR/decoder/setup.py
@@ -49,8 +49,8 @@
 
 ext_modules = [
     Extension(
-        'post_decode_faster',
-        ['pybind.cc', 'post_decode_faster.cc'],
+        'post_latgen_faster_mapped',
+        ['pybind.cc', 'post_latgen_faster_mapped.cc'],
         include_dirs=[
             'pybind11/include', '.', os.path.join(kaldi_root, 'src'),
             os.path.join(kaldi_root, 'tools/openfst/src/include')
@@ -63,8 +63,8 @@
 ]
 
 setup(
-    name='post_decode_faster',
-    version='0.0.1',
+    name='post_latgen_faster_mapped',
+    version='0.1.0',
     author='Paddle',
     author_email='',
     description='Decoder for Deep ASR model',
diff --git a/fluid/DeepASR/infer_by_ckpt.py b/fluid/DeepASR/infer_by_ckpt.py
index 831581924e..36681e9a2b 100644
--- a/fluid/DeepASR/infer_by_ckpt.py
+++ b/fluid/DeepASR/infer_by_ckpt.py
@@ -14,7 +14,7 @@
 import data_utils.augmentor.trans_splice as trans_splice
 import data_utils.augmentor.trans_delay as trans_delay
 import data_utils.async_data_reader as reader
-from decoder.post_decode_faster import Decoder
+from decoder.post_latgen_faster_mapped import Decoder
 from data_utils.util import lodtensor_to_ndarray
 from model_utils.model import stacked_lstmp_model
 from data_utils.util import split_infer_result
@@ -98,20 +98,25 @@ def parse_args():
         type=str,
         default='./checkpoint',
         help="The checkpoint path to init model. (default: %(default)s)")
+    parser.add_argument(
+        '--trans_model',
+        type=str,
+        default='./graph/trans_model',
+        help="The path to vocabulary. (default: %(default)s)")
     parser.add_argument(
         '--vocabulary',
         type=str,
-        default='./decoder/graph/words.txt',
+        default='./graph/words.txt',
         help="The path to vocabulary. (default: %(default)s)")
     parser.add_argument(
         '--graphs',
         type=str,
-        default='./decoder/graph/TLG.fst',
+        default='./graph/TLG.fst',
         help="The path to TLG graphs for decoding. (default: %(default)s)")
     parser.add_argument(
         '--log_prior',
         type=str,
-        default="./decoder/logprior",
+        default="./logprior",
         help="The log prior probs for training data. (default: %(default)s)")
     parser.add_argument(
         '--acoustic_scale',
@@ -123,6 +128,11 @@ def parse_args():
         type=str,
         default="./decoder/target_trans.txt",
         help="The path to target transcription. (default: %(default)s)")
+    parser.add_argument(
+        '--post_matrix_path',
+        type=str,
+        default=None,
+        help="The path to output post prob matrix. (default: %(default)s)")
     args = parser.parse_args()
     return args
 
@@ -146,6 +156,16 @@ def get_trg_trans(args):
     return trans_dict
 
 
+def out_post_matrix(key, prob):
+    with open(args.post_matrix_path, "a") as post_matrix:
+        post_matrix.write(key + " [\n")
+        for i in range(prob.shape[0]):
+            for j in range(prob.shape[1]):
+                post_matrix.write(str(prob[i][j]) + " ")
+            post_matrix.write("\n")
+        post_matrix.write("]\n")
+
+
 def infer_from_ckpt(args):
     """Inference by using checkpoint."""
 
@@ -174,13 +194,13 @@ def infer_from_ckpt(args):
     fluid.io.load_persistables(exe, args.checkpoint)
 
     # init decoder
-    decoder = Decoder(args.vocabulary, args.graphs, args.log_prior,
-                      args.acoustic_scale)
+    decoder = Decoder(args.trans_model, args.vocabulary, args.graphs,
+                      args.log_prior, args.acoustic_scale)
 
     ltrans = [
         trans_add_delta.TransAddDelta(2, 2),
         trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
-        trans_splice.TransSplice(), trans_delay.TransDelay(5)
+        trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5)
     ]
 
     feature_t = fluid.LoDTensor()
@@ -197,6 +217,8 @@ def infer_from_ckpt(args):
                                              args.minimum_batch_size)):
         # load_data
         (features, labels, lod, name_lst) = batch_data
+        features = np.reshape(features, (-1, 11, 3, args.frame_dim))
+        features = np.transpose(features, (0, 2, 1, 3))
         feature_t.set(features, place)
         feature_t.set_lod([lod])
         label_t.set(labels, place)
@@ -216,6 +238,9 @@ def infer_from_ckpt(args):
         for index, sample in enumerate(infer_batch):
             key = name_lst[index]
             ref = trg_trans[key]
+            if args.post_matrix_path is not None:
+                out_post_matrix(key, sample)
+            '''
             hyp = decoder.decode(key, sample)
             edit_dist, ref_len = char_errors(ref.decode("utf8"), hyp)
             total_edit_dist += edit_dist
@@ -223,6 +248,8 @@ def infer_from_ckpt(args):
             print(key + "|Ref:", ref)
             print(key + "|Hyp:", hyp.encode("utf8"))
             print("Instance CER: ", edit_dist / ref_len)
+            '''
+        print("batch: ", batch_id)
 
     print("Total CER = %f" % (total_edit_dist / total_ref_len))
 

From 28515f698c70a5e16d059f1cca3cb09d6cf7de87 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Wed, 6 Jun 2018 21:28:00 -0700
Subject: [PATCH 02/10] Add infer by checkpoint script

---
 fluid/DeepASR/examples/aishell/infer_by_ckpt.sh | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 fluid/DeepASR/examples/aishell/infer_by_ckpt.sh

diff --git a/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
new file mode 100644
index 0000000000..de049d0221
--- /dev/null
+++ b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
@@ -0,0 +1,16 @@
+export CUDA_VISIBLE_DEVICES=0,1
+python -u ../../infer_by_ckpt.py --batch_size 64  \
+                        --checkpoint deep_asr.pass_20.checkpoint \
+                        --infer_feature_lst data/test_feature.lst  \
+                        --infer_label_lst data/test_label.lst  \
+                        --mean_var data/aishell/global_mean_var \
+                        --frame_dim 80  \
+                        --class_num 3040 \
+                        --post_matrix_path post_matrix.decoded \
+                        --target_trans data/text.test \
+                        --trans_model mapped_decoder_data/exp/tri5a/final.mdl \
+                        --log_prior mapped_decoder_data/logprior \
+                        --vocabulary mapped_decoder_data/exp/tri5a/graph/words.txt \
+                        --graphs mapped_decoder_data/exp/tri5a/graph/HCLG.fst \
+                        --acoustic_scale 0.059 \
+                        --parallel

From b3ba7fda4e1ca2ed1629540e5e7210790c23c6ab Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Thu, 7 Jun 2018 04:39:25 -0700
Subject: [PATCH 03/10] Add missing defined decoding function

---
 .../decoder/post_latgen_faster_mapped.cc      | 89 +++++++++++--------
 1 file changed, 54 insertions(+), 35 deletions(-)

diff --git a/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
index 19d5dbea83..c9176fb26c 100644
--- a/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
+++ b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
@@ -97,6 +97,60 @@ Decoder::~Decoder() {
 }
 
 
+std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
+  std::vector<std::string> ret;
+
+  try {
+    double tot_like = 0.0;
+    kaldi::int64 frame_count = 0;
+    // int num_success = 0, num_fail = 0;
+
+    KALDI_ASSERT(ClassifyRspecifier(fst_in_filename, NULL, NULL) ==
+                 kNoRspecifier);
+    SequentialBaseFloatMatrixReader posterior_reader("ark:" +
+                                                     posterior_rspecifier);
+
+    Timer timer;
+    timer.Reset();
+
+    {
+      for (; !posterior_reader.Done(); posterior_reader.Next()) {
+        std::string utt = posterior_reader.Key();
+        Matrix<BaseFloat> &loglikes(posterior_reader.Value());
+        KALDI_LOG << utt << " " << loglikes.NumRows() << " x "
+                  << loglikes.NumCols();
+        ret.push_back(decode(utt, loglikes));
+      }
+    }
+
+    double elapsed = timer.Elapsed();
+    return ret;
+  } catch (const std::exception &e) {
+    std::cerr << e.what();
+    // ret.push_back("error");
+    return ret;
+  }
+}
+
+
+std::string Decoder::decode(
+    std::string key,
+    const std::vector<std::vector<kaldi::BaseFloat>> &log_probs) {
+  size_t num_frames = log_probs.size();
+  size_t dim_label = log_probs[0].size();
+
+  kaldi::Matrix<kaldi::BaseFloat> loglikes(
+      num_frames, dim_label, kaldi::kSetZero, kaldi::kStrideEqualNumCols);
+  for (size_t i = 0; i < num_frames; ++i) {
+    memcpy(loglikes.Data() + i * dim_label,
+           log_probs[i].data(),
+           sizeof(kaldi::BaseFloat) * dim_label);
+  }
+
+  return decode(key, loglikes);
+}
+
+
 std::string Decoder::decode(std::string key,
                             kaldi::Matrix<kaldi::BaseFloat> &loglikes) {
   std::string decoding_result;
@@ -135,38 +189,3 @@ std::string Decoder::decode(std::string key,
   }
   return decoding_result;
 }
-
-std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
-  std::vector<std::string> ret;
-
-  try {
-    double tot_like = 0.0;
-    kaldi::int64 frame_count = 0;
-    // int num_success = 0, num_fail = 0;
-
-    KALDI_ASSERT(ClassifyRspecifier(fst_in_filename, NULL, NULL) ==
-                 kNoRspecifier);
-    SequentialBaseFloatMatrixReader posterior_reader("ark:" +
-                                                     posterior_rspecifier);
-
-    Timer timer;
-    timer.Reset();
-
-    {
-      for (; !posterior_reader.Done(); posterior_reader.Next()) {
-        std::string utt = posterior_reader.Key();
-        Matrix<BaseFloat> &loglikes(posterior_reader.Value());
-        KALDI_LOG << utt << " " << loglikes.NumRows() << " x "
-                  << loglikes.NumCols();
-        ret.push_back(decode(utt, loglikes));
-      }
-    }
-
-    double elapsed = timer.Elapsed();
-    return ret;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
-    // ret.push_back("error");
-    return ret;
-  }
-}

From 84152a09cf14219e5d351cf0569922a416320b75 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Sat, 9 Jun 2018 03:58:19 -0700
Subject: [PATCH 04/10] Disable splitting long sentence in infer

---
 fluid/DeepASR/data_utils/async_data_reader.py | 8 +++++++-
 fluid/DeepASR/infer_by_ckpt.py                | 7 +++++--
 fluid/DeepASR/train.py                        | 2 +-
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/fluid/DeepASR/data_utils/async_data_reader.py b/fluid/DeepASR/data_utils/async_data_reader.py
index 731c55de71..0c8d010755 100644
--- a/fluid/DeepASR/data_utils/async_data_reader.py
+++ b/fluid/DeepASR/data_utils/async_data_reader.py
@@ -185,6 +185,9 @@ class AsyncDataReader(object):
                                corresponding description file.
         drop_frame_len (int): Samples whose label length above the value will be
                               dropped.(Using '-1' to disable the policy)
+        split_sentence_threshold(int): Sentence whose length larger than
+                                the value will trigger split operation.
+                                (Assign -1 to disable split)
         proc_num (int): Number of processes for processing data.
         sample_buffer_size (int): Buffer size to indicate the maximum samples
                                   cached.
@@ -204,6 +207,7 @@ def __init__(self,
                  feature_file_list,
                  label_file_list="",
                  drop_frame_len=512,
+                 split_sentence_threshold=512,
                  proc_num=10,
                  sample_buffer_size=1024,
                  sample_info_buffer_size=1024,
@@ -214,6 +218,7 @@ def __init__(self,
         self._feature_file_list = feature_file_list
         self._label_file_list = label_file_list
         self._drop_frame_len = drop_frame_len
+        self._split_sentence_threshold = split_sentence_threshold
         self._shuffle_block_num = shuffle_block_num
         self._block_info_list = None
         self._rng = random.Random(random_seed)
@@ -262,7 +267,8 @@ def generate_bucket_list(self, is_shuffle):
                     map(lambda info: info[0], bucket_block_info),
                     map(lambda info: info[1], bucket_block_info),
                     map(lambda info: info[2], bucket_block_info),
-                    map(lambda info: info[3], bucket_block_info)))
+                    map(lambda info: info[3], bucket_block_info),
+                    split_sentence_threshold=self._split_sentence_threshold))
 
     # @TODO make this configurable
     def set_transformers(self, transformers):
diff --git a/fluid/DeepASR/infer_by_ckpt.py b/fluid/DeepASR/infer_by_ckpt.py
index 36681e9a2b..554dd7223d 100644
--- a/fluid/DeepASR/infer_by_ckpt.py
+++ b/fluid/DeepASR/infer_by_ckpt.py
@@ -207,8 +207,11 @@ def infer_from_ckpt(args):
     label_t = fluid.LoDTensor()
 
     # infer data reader
-    infer_data_reader = reader.AsyncDataReader(args.infer_feature_lst,
-                                               args.infer_label_lst)
+    infer_data_reader = reader.AsyncDataReader(
+        args.infer_feature_lst,
+        args.infer_label_lst,
+        drop_frame_len=-1,
+        split_sentence_threshold=-1)
     infer_data_reader.set_transformers(ltrans)
     infer_costs, infer_accs = [], []
     total_edit_dist, total_ref_len = 0.0, 0
diff --git a/fluid/DeepASR/train.py b/fluid/DeepASR/train.py
index 8373c0e04f..6073db0d07 100644
--- a/fluid/DeepASR/train.py
+++ b/fluid/DeepASR/train.py
@@ -187,7 +187,7 @@ def test(exe):
             return -1.0, -1.0
         # test data reader
         test_data_reader = reader.AsyncDataReader(args.val_feature_lst,
-                                                  args.val_label_lst)
+                                                  args.val_label_lst, -1)
         test_data_reader.set_transformers(ltrans)
         test_costs, test_accs = [], []
         for batch_id, batch_data in enumerate(

From 989e6cd58379d89a6959821ab0eb67da978bb2c5 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Wed, 13 Jun 2018 15:11:41 -0700
Subject: [PATCH 05/10] Return decoding result instead of output directly

---
 .../decoder/post_latgen_faster_mapped.cc      | 110 ++++++++++++++----
 .../decoder/post_latgen_faster_mapped.h       |   9 ++
 .../DeepASR/examples/aishell/infer_by_ckpt.sh |   2 +-
 3 files changed, 97 insertions(+), 24 deletions(-)

diff --git a/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
index c9176fb26c..448ec358ea 100644
--- a/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
+++ b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
@@ -117,8 +117,6 @@ std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
       for (; !posterior_reader.Done(); posterior_reader.Next()) {
         std::string utt = posterior_reader.Key();
         Matrix<BaseFloat> &loglikes(posterior_reader.Value());
-        KALDI_LOG << utt << " " << loglikes.NumRows() << " x "
-                  << loglikes.NumCols();
         ret.push_back(decode(utt, loglikes));
       }
     }
@@ -127,11 +125,20 @@ std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
     return ret;
   } catch (const std::exception &e) {
     std::cerr << e.what();
-    // ret.push_back("error");
     return ret;
   }
 }
 
+std::vector<std::string> Decoder::decode_batch(
+    std::vector<std::string> keys,
+    const std::vector<std::vector<std::vector<kaldi::BaseFloat>>>
+        &log_probs_batch) {
+  std::vector<std::string> decoding_results;
+  for (size_t i = 0; i < keys.size(); ++i) {
+    decoding_results.push_back(decode(keys[i], log_probs_batch[i]));
+  }
+  return decoding_results;
+}
 
 std::string Decoder::decode(
     std::string key,
@@ -167,25 +174,82 @@ std::string Decoder::decode(std::string key,
       trans_model, loglikes, acoustic_scale);
   double like;
 
-  if (DecodeUtteranceLatticeFaster(*decoder,
-                                   matrix_decodable,
-                                   trans_model,
-                                   word_syms,
-                                   key,
-                                   acoustic_scale,
-                                   determinize,
-                                   allow_partial,
-                                   alignment_writer,
-                                   words_writer,
-                                   &compact_lattice_writer,
-                                   &lattice_writer,
-                                   &like)) {
-    // tot_like += like;
-    // frame_count += loglikes.NumRows();
-    // num_success++;
-    decoding_result = "succeed!";
-  } else {  // else num_fail++;
-    decoding_result = "fail!";
+  return this->DecodeUtteranceLatticeFaster(matrix_decodable, key, &like);
+}
+
+
+// Takes care of output.  Returns true on success.
+std::string Decoder::DecodeUtteranceLatticeFaster(
+    DecodableInterface &decodable,  // not const but is really an input.
+    std::string utt,
+    double *like_ptr) {  // puts utterance's like in like_ptr on success.
+  using fst::VectorFst;
+
+  if (!decoder->Decode(&decodable)) {
+    KALDI_WARN << "Failed to decode file " << utt;
+    return false;
+  }
+  if (!decoder->ReachedFinal()) {
+    if (allow_partial) {
+      KALDI_WARN << "Outputting partial output for utterance " << utt
+                 << " since no final-state reached\n";
+    } else {
+      KALDI_WARN << "Not producing output for utterance " << utt
+                 << " since no final-state reached and "
+                 << "--allow-partial=false.\n";
+      return false;
+    }
+  }
+
+  double likelihood;
+  LatticeWeight weight;
+  int32 num_frames;
+  std::string ret = utt + ' ';
+  {  // First do some stuff with word-level traceback...
+    VectorFst<LatticeArc> decoded;
+    if (!decoder->GetBestPath(&decoded))
+      // Shouldn't really reach this point as already checked success.
+      KALDI_ERR << "Failed to get traceback for utterance " << utt;
+
+    std::vector<int32> alignment;
+    std::vector<int32> words;
+    GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
+    num_frames = alignment.size();
+    if (alignment_writer->IsOpen()) alignment_writer->Write(utt, alignment);
+    if (word_syms != NULL) {
+      for (size_t i = 0; i < words.size(); i++) {
+        std::string s = word_syms->Find(words[i]);
+        ret += s + ' ';
+      }
+    }
+    likelihood = -(weight.Value1() + weight.Value2());
+  }
+
+  // Get lattice, and do determinization if requested.
+  Lattice lat;
+  decoder->GetRawLattice(&lat);
+  if (lat.NumStates() == 0)
+    KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
+  fst::Connect(&lat);
+  if (determinize) {
+    CompactLattice clat;
+    if (!DeterminizeLatticePhonePrunedWrapper(
+            trans_model,
+            &lat,
+            decoder->GetOptions().lattice_beam,
+            &clat,
+            decoder->GetOptions().det_opts))
+      KALDI_WARN << "Determinization finished earlier than the beam for "
+                 << "utterance " << utt;
+    // We'll write the lattice without acoustic scaling.
+    if (acoustic_scale != 0.0)
+      fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);
+    compact_lattice_writer.Write(utt, clat);
+  } else {
+    // We'll write the lattice without acoustic scaling.
+    if (acoustic_scale != 0.0)
+      fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &lat);
+    lattice_writer.Write(utt, lat);
   }
-  return decoding_result;
+  return ret;
 }
diff --git a/fluid/DeepASR/decoder/post_latgen_faster_mapped.h b/fluid/DeepASR/decoder/post_latgen_faster_mapped.h
index 4adbf6ba22..a3a9e7d293 100644
--- a/fluid/DeepASR/decoder/post_latgen_faster_mapped.h
+++ b/fluid/DeepASR/decoder/post_latgen_faster_mapped.h
@@ -41,10 +41,19 @@ class Decoder {
       std::string key,
       const std::vector<std::vector<kaldi::BaseFloat>> &log_probs);
 
+  // Accept the scores of utterances in batch and return the decoding results
+  std::vector<std::string> decode_batch(
+      std::vector<std::string> key,
+      const std::vector<std::vector<std::vector<kaldi::BaseFloat>>>
+          &log_probs_batch);
+
 private:
   // For decoding one utterance
   std::string decode(std::string key,
                      kaldi::Matrix<kaldi::BaseFloat> &loglikes);
+  std::string DecodeUtteranceLatticeFaster(kaldi::DecodableInterface &decodable,
+                                           std::string utt,
+                                           double *like_ptr);
 
   fst::SymbolTable *word_syms;
   fst::Fst<fst::StdArc> *decode_fst;
diff --git a/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
index de049d0221..ba7a8fed5f 100644
--- a/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
+++ b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
@@ -1,5 +1,5 @@
 export CUDA_VISIBLE_DEVICES=0,1
-python -u ../../infer_by_ckpt.py --batch_size 64  \
+python -u ../../infer_by_ckpt.py --batch_size 48  \
                         --checkpoint deep_asr.pass_20.checkpoint \
                         --infer_feature_lst data/test_feature.lst  \
                         --infer_label_lst data/test_label.lst  \

From e1d90fc013b267dc5a61d08141d6f376b26f7071 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Wed, 20 Jun 2018 08:41:30 -0700
Subject: [PATCH 06/10] Use thread pool for parallel decoding

---
 .../decoder/post_latgen_faster_mapped.cc      | 135 ++++++++++++------
 .../decoder/post_latgen_faster_mapped.h       |  22 +--
 fluid/DeepASR/decoder/pybind.cc               |  17 ++-
 fluid/DeepASR/decoder/setup.py                |   4 +-
 fluid/DeepASR/decoder/setup.sh                |   5 +
 .../DeepASR/examples/aishell/infer_by_ckpt.sh |   1 -
 fluid/DeepASR/infer_by_ckpt.py                |  12 +-
 7 files changed, 131 insertions(+), 65 deletions(-)

diff --git a/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
index 448ec358ea..87791d5131 100644
--- a/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
+++ b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "post_latgen_faster_mapped.h"
+#include <limits>
+#include "ThreadPool.h"
 
 using namespace kaldi;
 typedef kaldi::int32 int32;
@@ -34,11 +36,9 @@ Decoder::Decoder(std::string trans_model_in_filename,
   ParseOptions po(usage);
   allow_partial = false;
   this->acoustic_scale = acoustic_scale;
-  LatticeFasterDecoderConfig config;
 
   config.Register(&po);
   int32 beam = 11;
-  po.Register("beam", &beam, "Beam size");
   po.Register("acoustic-scale",
               &acoustic_scale,
               "Scaling factor for acoustic likelihoods");
@@ -49,10 +49,13 @@ Decoder::Decoder(std::string trans_model_in_filename,
               &allow_partial,
               "If true, produce output even if end state was not reached.");
 
-  // int argc = 2;
-  // char *argv[] = {"post-latgen-faster-mapped", "--beam=11"};
-  // po.Read(argc, argv);
+  int argc = 2;
+  char *argv[] = {(char *)"post-latgen-faster-mapped",
+                  (char *)("--beam=" + std::string("11")).c_str()};
 
+  po.Read(argc, argv);
+
+  po.PrintConfig(std::cout);
   std::ifstream is_logprior(logprior_in_filename);
   logprior.Read(is_logprior, false);
 
@@ -75,14 +78,16 @@ Decoder::Decoder(std::string trans_model_in_filename,
   // Input FST is just one FST, not a table of FSTs.
   this->decode_fst = fst::ReadFstKaldiGeneric(fst_in_filename);
 
-  this->decoder = new LatticeFasterDecoder(*decode_fst, config);
+  kaldi::LatticeFasterDecoder *decoder =
+      new LatticeFasterDecoder(*decode_fst, config);
+  decoder_pool.emplace_back(decoder);
 
   std::string lattice_wspecifier =
       "ark:|gzip -c > mapped_decoder_data/lat.JOB.gz";
   if (!(determinize ? compact_lattice_writer.Open(lattice_wspecifier)
                     : lattice_writer.Open(lattice_wspecifier)))
-    KALDI_ERR << "Could not open table for writing lattices: ";
-  // << lattice_wspecifier;
+    KALDI_ERR << "Could not open table for writing lattices: "
+              << lattice_wspecifier;
 
   words_writer = new Int32VectorWriter("");
   alignment_writer = new Int32VectorWriter("");
@@ -91,15 +96,16 @@ Decoder::Decoder(std::string trans_model_in_filename,
 Decoder::~Decoder() {
   if (!this->word_syms) delete this->word_syms;
   delete this->decode_fst;
-  delete this->decoder;
+  for (size_t i = 0; i < decoder_pool.size(); ++i) {
+    delete decoder_pool[i];
+  }
   delete words_writer;
   delete alignment_writer;
 }
 
 
-std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
-  std::vector<std::string> ret;
-
+void Decoder::decode_from_file(std::string posterior_rspecifier,
+                               size_t num_processes) {
   try {
     double tot_like = 0.0;
     kaldi::int64 frame_count = 0;
@@ -112,40 +118,41 @@ std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
 
     Timer timer;
     timer.Reset();
+    double elapsed = 0.0;
+
+    for (size_t n = decoder_pool.size(); n < num_processes; ++n) {
+      kaldi::LatticeFasterDecoder *decoder =
+          new LatticeFasterDecoder(*decode_fst, config);
+      decoder_pool.emplace_back(decoder);
+    }
+    elapsed = timer.Elapsed();
+    ThreadPool thread_pool(num_processes);
 
-    {
-      for (; !posterior_reader.Done(); posterior_reader.Next()) {
+    while (!posterior_reader.Done()) {
+      timer.Reset();
+      std::vector<std::future<std::string>> que;
+      for (size_t i = 0; i < num_processes && !posterior_reader.Done(); ++i) {
         std::string utt = posterior_reader.Key();
         Matrix<BaseFloat> &loglikes(posterior_reader.Value());
-        ret.push_back(decode(utt, loglikes));
+        que.emplace_back(thread_pool.enqueue(std::bind(
+            &Decoder::decode_internal, this, decoder_pool[i], utt, loglikes)));
+        posterior_reader.Next();
+      }
+      timer.Reset();
+      for (size_t i = 0; i < que.size(); ++i) {
+        std::cout << que[i].get() << std::endl;
       }
     }
 
-    double elapsed = timer.Elapsed();
-    return ret;
   } catch (const std::exception &e) {
     std::cerr << e.what();
-    return ret;
   }
 }
 
-std::vector<std::string> Decoder::decode_batch(
-    std::vector<std::string> keys,
-    const std::vector<std::vector<std::vector<kaldi::BaseFloat>>>
-        &log_probs_batch) {
-  std::vector<std::string> decoding_results;
-  for (size_t i = 0; i < keys.size(); ++i) {
-    decoding_results.push_back(decode(keys[i], log_probs_batch[i]));
-  }
-  return decoding_results;
-}
-
-std::string Decoder::decode(
-    std::string key,
+inline kaldi::Matrix<kaldi::BaseFloat> vector2kaldi_mat(
     const std::vector<std::vector<kaldi::BaseFloat>> &log_probs) {
   size_t num_frames = log_probs.size();
   size_t dim_label = log_probs[0].size();
-
   kaldi::Matrix<kaldi::BaseFloat> loglikes(
       num_frames, dim_label, kaldi::kSetZero, kaldi::kStrideEqualNumCols);
   for (size_t i = 0; i < num_frames; ++i) {
@@ -153,14 +160,56 @@ std::string Decoder::decode(
            log_probs[i].data(),
            sizeof(kaldi::BaseFloat) * dim_label);
   }
+  return loglikes;
+}
+
+std::vector<std::string> Decoder::decode_batch(
+    std::vector<std::string> keys,
+    const std::vector<std::vector<std::vector<kaldi::BaseFloat>>>
+        &log_probs_batch,
+    size_t num_processes) {
+  ThreadPool thread_pool(num_processes);
+  std::vector<std::string> decoding_results;  //(keys.size(), "");
+
+  for (size_t n = decoder_pool.size(); n < num_processes; ++n) {
+    kaldi::LatticeFasterDecoder *decoder =
+        new LatticeFasterDecoder(*decode_fst, config);
+    decoder_pool.emplace_back(decoder);
+  }
 
-  return decode(key, loglikes);
+  size_t index = 0;
+  while (index < keys.size()) {
+    std::vector<std::future<std::string>> res_in_que;
+    for (size_t t = 0; t < num_processes && index < keys.size(); ++t) {
+      kaldi::Matrix<kaldi::BaseFloat> loglikes =
+          vector2kaldi_mat(log_probs_batch[index]);
+      res_in_que.emplace_back(
+          thread_pool.enqueue(std::bind(&Decoder::decode_internal,
+                                        this,
+                                        decoder_pool[t],
+                                        keys[index],
+                                        loglikes)));
+      index++;
+    }
+    for (size_t i = 0; i < res_in_que.size(); ++i) {
+      decoding_results.emplace_back(res_in_que[i].get());
+    }
+  }
+  return decoding_results;
 }
 
+std::string Decoder::decode(
+    std::string key,
+    const std::vector<std::vector<kaldi::BaseFloat>> &log_probs) {
+  kaldi::Matrix<kaldi::BaseFloat> loglikes = vector2kaldi_mat(log_probs);
+  return decode_internal(decoder_pool[0], key, loglikes);
+}
 
-std::string Decoder::decode(std::string key,
-                            kaldi::Matrix<kaldi::BaseFloat> &loglikes) {
-  std::string decoding_result;
+
+std::string Decoder::decode_internal(
+    LatticeFasterDecoder *decoder,
+    std::string key,
+    kaldi::Matrix<kaldi::BaseFloat> &loglikes) {
   if (loglikes.NumRows() == 0) {
     KALDI_WARN << "Zero-length utterance: " << key;
     // num_fail++;
@@ -173,21 +222,22 @@ std::string Decoder::decode(std::string key,
   DecodableMatrixScaledMapped matrix_decodable(
       trans_model, loglikes, acoustic_scale);
   double like;
-
-  return this->DecodeUtteranceLatticeFaster(matrix_decodable, key, &like);
+  return this->DecodeUtteranceLatticeFaster(
+      decoder, matrix_decodable, key, &like);
 }
 
 
-// Takes care of output.  Returns true on success.
 std::string Decoder::DecodeUtteranceLatticeFaster(
+    LatticeFasterDecoder *decoder,
     DecodableInterface &decodable,  // not const but is really an input.
     std::string utt,
     double *like_ptr) {  // puts utterance's like in like_ptr on success.
   using fst::VectorFst;
+  std::string ret = utt + ' ';
 
   if (!decoder->Decode(&decodable)) {
     KALDI_WARN << "Failed to decode file " << utt;
-    return false;
+    return ret;
   }
   if (!decoder->ReachedFinal()) {
     if (allow_partial) {
@@ -197,14 +247,13 @@ std::string Decoder::DecodeUtteranceLatticeFaster(
       KALDI_WARN << "Not producing output for utterance " << utt
                  << " since no final-state reached and "
                  << "--allow-partial=false.\n";
-      return false;
+      return ret;
     }
   }
 
   double likelihood;
   LatticeWeight weight;
   int32 num_frames;
-  std::string ret = utt + ' ';
   {  // First do some stuff with word-level traceback...
     VectorFst<LatticeArc> decoded;
     if (!decoder->GetBestPath(&decoded))
@@ -215,7 +264,7 @@ std::string Decoder::DecodeUtteranceLatticeFaster(
     std::vector<int32> words;
     GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
     num_frames = alignment.size();
-    if (alignment_writer->IsOpen()) alignment_writer->Write(utt, alignment);
+    // if (alignment_writer->IsOpen()) alignment_writer->Write(utt, alignment);
     if (word_syms != NULL) {
       for (size_t i = 0; i < words.size(); i++) {
         std::string s = word_syms->Find(words[i]);
diff --git a/fluid/DeepASR/decoder/post_latgen_faster_mapped.h b/fluid/DeepASR/decoder/post_latgen_faster_mapped.h
index a3a9e7d293..0bbb93065a 100644
--- a/fluid/DeepASR/decoder/post_latgen_faster_mapped.h
+++ b/fluid/DeepASR/decoder/post_latgen_faster_mapped.h
@@ -32,9 +32,10 @@ class Decoder {
           kaldi::BaseFloat acoustic_scale);
   ~Decoder();
 
-  // Interface to accept the scores read from specifier and return
-  // the batch decoding results
-  std::vector<std::string> decode(std::string posterior_rspecifier);
+  // Interface to accept the scores read from specifier and print
+  // the decoding results directly
+  void decode_from_file(std::string posterior_rspecifier,
+                        size_t num_processes = 1);
 
   // Accept the scores of one utterance and return the decoding result
   std::string decode(
@@ -45,21 +46,26 @@ class Decoder {
   std::vector<std::string> decode_batch(
       std::vector<std::string> key,
       const std::vector<std::vector<std::vector<kaldi::BaseFloat>>>
-          &log_probs_batch);
+          &log_probs_batch,
+      size_t num_processes = 1);
 
 private:
   // For decoding one utterance
-  std::string decode(std::string key,
-                     kaldi::Matrix<kaldi::BaseFloat> &loglikes);
-  std::string DecodeUtteranceLatticeFaster(kaldi::DecodableInterface &decodable,
+  std::string decode_internal(kaldi::LatticeFasterDecoder *decoder,
+                              std::string key,
+                              kaldi::Matrix<kaldi::BaseFloat> &loglikes);
+
+  std::string DecodeUtteranceLatticeFaster(kaldi::LatticeFasterDecoder *decoder,
+                                           kaldi::DecodableInterface &decodable,
                                            std::string utt,
                                            double *like_ptr);
 
   fst::SymbolTable *word_syms;
   fst::Fst<fst::StdArc> *decode_fst;
-  kaldi::LatticeFasterDecoder *decoder;
+  std::vector<kaldi::LatticeFasterDecoder *> decoder_pool;
   kaldi::Vector<kaldi::BaseFloat> logprior;
   kaldi::TransitionModel trans_model;
+  kaldi::LatticeFasterDecoderConfig config;
 
   kaldi::CompactLatticeWriter compact_lattice_writer;
   kaldi::LatticeWriter lattice_writer;
diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc
index e99050e68d..93605d214c 100644
--- a/fluid/DeepASR/decoder/pybind.cc
+++ b/fluid/DeepASR/decoder/pybind.cc
@@ -28,16 +28,23 @@ PYBIND11_MODULE(post_latgen_faster_mapped, m) {
                     std::string,
                     std::string,
                     kaldi::BaseFloat>())
-      .def("decode",
-           (std::vector<std::string> (Decoder::*)(std::string)) &
-               Decoder::decode,
+      .def("decode_from_file",
+           (void (Decoder::*)(std::string, size_t)) & Decoder::decode_from_file,
            "Decode for the probability matrices in specifier "
-           "and return the transcriptions.")
+           "and print the transcriptions.")
       .def(
           "decode",
           (std::string (Decoder::*)(
               std::string, const std::vector<std::vector<kaldi::BaseFloat>>&)) &
               Decoder::decode,
           "Decode one input probability matrix "
-          "and return the transcription.");
+          "and return the transcription.")
+      .def("decode_batch",
+           (std::vector<std::string> (Decoder::*)(
+               std::string,
+               const std::vector<std::vector<std::vector<kaldi::BaseFloat>>>&,
+               size_t num_processes)) &
+               Decoder::decode_batch,
+           "Decode one batch of probability matrices "
+           "and return the transcriptions.");
 }
diff --git a/fluid/DeepASR/decoder/setup.py b/fluid/DeepASR/decoder/setup.py
index 74e8aa00fb..81fc857cce 100644
--- a/fluid/DeepASR/decoder/setup.py
+++ b/fluid/DeepASR/decoder/setup.py
@@ -24,7 +24,7 @@
                      "install kaldi and export KALDI_ROOT=<kaldi's root dir> .")
 
 args = [
-    '-std=c++11', '-Wno-sign-compare', '-Wno-unused-variable',
+    '-std=c++11', '-fopenmp', '-Wno-sign-compare', '-Wno-unused-variable',
     '-Wno-unused-local-typedefs', '-Wno-unused-but-set-variable',
     '-Wno-deprecated-declarations', '-Wno-unused-function'
 ]
@@ -53,7 +53,7 @@
         ['pybind.cc', 'post_latgen_faster_mapped.cc'],
         include_dirs=[
             'pybind11/include', '.', os.path.join(kaldi_root, 'src'),
-            os.path.join(kaldi_root, 'tools/openfst/src/include')
+            os.path.join(kaldi_root, 'tools/openfst/src/include'), 'ThreadPool'
         ],
         language='c++',
         libraries=LIBS,
diff --git a/fluid/DeepASR/decoder/setup.sh b/fluid/DeepASR/decoder/setup.sh
index 1471f85f41..238cc64986 100644
--- a/fluid/DeepASR/decoder/setup.sh
+++ b/fluid/DeepASR/decoder/setup.sh
@@ -4,4 +4,9 @@ if [ ! -d pybind11 ]; then
     git clone https://github.com/pybind/pybind11.git
 fi 
 
+if [ ! -d ThreadPool ]; then
+    git clone https://github.com/progschj/ThreadPool.git
+    echo -e "\n"
+fi
+
 python setup.py build_ext -i 
diff --git a/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
index ba7a8fed5f..e8e199f923 100644
--- a/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
+++ b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
@@ -6,7 +6,6 @@ python -u ../../infer_by_ckpt.py --batch_size 48  \
                         --mean_var data/aishell/global_mean_var \
                         --frame_dim 80  \
                         --class_num 3040 \
-                        --post_matrix_path post_matrix.decoded \
                         --target_trans data/text.test \
                         --trans_model mapped_decoder_data/exp/tri5a/final.mdl \
                         --log_prior mapped_decoder_data/logprior \
diff --git a/fluid/DeepASR/infer_by_ckpt.py b/fluid/DeepASR/infer_by_ckpt.py
index 554dd7223d..0498e19aa5 100644
--- a/fluid/DeepASR/infer_by_ckpt.py
+++ b/fluid/DeepASR/infer_by_ckpt.py
@@ -238,10 +238,10 @@ def infer_from_ckpt(args):
         probs, lod = lodtensor_to_ndarray(results[0])
         infer_batch = split_infer_result(probs, lod)
 
-        for index, sample in enumerate(infer_batch):
-            key = name_lst[index]
-            ref = trg_trans[key]
-            if args.post_matrix_path is not None:
+        decoder.decode_batch(name_lst, infer_batch)
+        if args.post_matrix_path is not None:
+            for index, sample in enumerate(infer_batch):
+                key = name_lst[index]
                 out_post_matrix(key, sample)
             '''
             hyp = decoder.decode(key, sample)
@@ -252,9 +252,9 @@ def infer_from_ckpt(args):
             print(key + "|Hyp:", hyp.encode("utf8"))
             print("Instance CER: ", edit_dist / ref_len)
             '''
-        print("batch: ", batch_id)
+        #print("batch: ", batch_id)
 
-    print("Total CER = %f" % (total_edit_dist / total_ref_len))
+        #print("Total CER = %f" % (total_edit_dist / total_ref_len))
 
 
 if __name__ == '__main__':

From 175f36f9e02b53413def7daf3b616d1e58aacbfe Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Wed, 20 Jun 2018 21:15:13 -0700
Subject: [PATCH 07/10] Expose number of threads for decoding

---
 fluid/DeepASR/decoder/pybind.cc                 |  2 +-
 fluid/DeepASR/examples/aishell/infer_by_ckpt.sh |  6 +++---
 fluid/DeepASR/infer_by_ckpt.py                  | 11 +++++++++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc
index 93605d214c..151eae3b83 100644
--- a/fluid/DeepASR/decoder/pybind.cc
+++ b/fluid/DeepASR/decoder/pybind.cc
@@ -41,7 +41,7 @@ PYBIND11_MODULE(post_latgen_faster_mapped, m) {
           "and return the transcription.")
       .def("decode_batch",
            (std::vector<std::string> (Decoder::*)(
-               std::string,
+               std::vector<std::string>,
                const std::vector<std::vector<std::vector<kaldi::BaseFloat>>>&,
                size_t num_processes)) &
                Decoder::decode_batch,
diff --git a/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
index e8e199f923..77eb4ce9cd 100644
--- a/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
+++ b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
@@ -1,6 +1,6 @@
-export CUDA_VISIBLE_DEVICES=0,1
-python -u ../../infer_by_ckpt.py --batch_size 48  \
-                        --checkpoint deep_asr.pass_20.checkpoint \
+export CUDA_VISIBLE_DEVICES=2,3,4,5
+python -u ../../infer_by_ckpt.py --batch_size 96  \
+                        --checkpoint checkpoints/deep_asr.pass_20.checkpoint \
                         --infer_feature_lst data/test_feature.lst  \
                         --infer_label_lst data/test_label.lst  \
                         --mean_var data/aishell/global_mean_var \
diff --git a/fluid/DeepASR/infer_by_ckpt.py b/fluid/DeepASR/infer_by_ckpt.py
index 0498e19aa5..07e2d6fc56 100644
--- a/fluid/DeepASR/infer_by_ckpt.py
+++ b/fluid/DeepASR/infer_by_ckpt.py
@@ -59,6 +59,11 @@ def parse_args():
         type=int,
         default=1749,
         help='Number of classes in label. (default: %(default)d)')
+    parser.add_argument(
+        '--num_threads',
+        type=int,
+        default=10,
+        help='The number of threads for decoding. (default: %(default)d)')
     parser.add_argument(
         '--learning_rate',
         type=float,
@@ -189,7 +194,7 @@ def infer_from_ckpt(args):
     exe = fluid.Executor(place)
     exe.run(fluid.default_startup_program())
 
-    trg_trans = get_trg_trans(args)
+    #trg_trans = get_trg_trans(args)
     # load checkpoint.
     fluid.io.load_persistables(exe, args.checkpoint)
 
@@ -238,7 +243,9 @@ def infer_from_ckpt(args):
         probs, lod = lodtensor_to_ndarray(results[0])
         infer_batch = split_infer_result(probs, lod)
 
-        decoder.decode_batch(name_lst, infer_batch)
+        decoded = decoder.decode_batch(name_lst, infer_batch, args.num_threads)
+        for res in decoded:
+            print(res.encode("utf8"))
         if args.post_matrix_path is not None:
             for index, sample in enumerate(infer_batch):
                 key = name_lst[index]

From c462ab1ae5cbb628a20e6ddd62697ef01f45968a Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Mon, 2 Jul 2018 01:57:21 -0700
Subject: [PATCH 08/10] Refine infer_by_ckpt: code clean & move out cer scoring

---
 .../DeepASR/examples/aishell/infer_by_ckpt.sh |   6 +-
 fluid/DeepASR/examples/aishell/score_cer.sh   |   4 +
 fluid/DeepASR/infer_by_ckpt.py                | 130 ++++++++----------
 fluid/DeepASR/score_error_rate.py             |  68 +++++++++
 4 files changed, 137 insertions(+), 71 deletions(-)
 create mode 100644 fluid/DeepASR/examples/aishell/score_cer.sh
 create mode 100644 fluid/DeepASR/score_error_rate.py

diff --git a/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
index 77eb4ce9cd..990daee375 100644
--- a/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
+++ b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
@@ -1,12 +1,14 @@
+decode_to_path=./decoding_result.txt
+
 export CUDA_VISIBLE_DEVICES=2,3,4,5
 python -u ../../infer_by_ckpt.py --batch_size 96  \
                         --checkpoint checkpoints/deep_asr.pass_20.checkpoint \
                         --infer_feature_lst data/test_feature.lst  \
-                        --infer_label_lst data/test_label.lst  \
                         --mean_var data/aishell/global_mean_var \
                         --frame_dim 80  \
                         --class_num 3040 \
-                        --target_trans data/text.test \
+                        --num_threads 24  \
+                        --decode_to_path $decode_to_path \
                         --trans_model mapped_decoder_data/exp/tri5a/final.mdl \
                         --log_prior mapped_decoder_data/logprior \
                         --vocabulary mapped_decoder_data/exp/tri5a/graph/words.txt \
diff --git a/fluid/DeepASR/examples/aishell/score_cer.sh b/fluid/DeepASR/examples/aishell/score_cer.sh
new file mode 100644
index 0000000000..6c60d196a7
--- /dev/null
+++ b/fluid/DeepASR/examples/aishell/score_cer.sh
@@ -0,0 +1,4 @@
+ref_txt=data/text.test
+hyp_txt=decoding_result.txt
+
+python ../../score_error_rate.py --error_rate_type cer --ref $ref_txt --hyp $hyp_txt
diff --git a/fluid/DeepASR/infer_by_ckpt.py b/fluid/DeepASR/infer_by_ckpt.py
index 2461852cd4..881b5ba225 100644
--- a/fluid/DeepASR/infer_by_ckpt.py
+++ b/fluid/DeepASR/infer_by_ckpt.py
@@ -14,10 +14,9 @@
 import data_utils.augmentor.trans_splice as trans_splice
 import data_utils.augmentor.trans_delay as trans_delay
 import data_utils.async_data_reader as reader
-from decoder.post_latgen_faster_mapped import Decoder
-from data_utils.util import lodtensor_to_ndarray
+from data_utils.util import lodtensor_to_ndarray, split_infer_result
 from model_utils.model import stacked_lstmp_model
-from data_utils.util import split_infer_result
+from decoder.post_latgen_faster_mapped import Decoder
 from tools.error_rate import char_errors
 
 
@@ -64,11 +63,6 @@ def parse_args():
         type=int,
         default=10,
         help='The number of threads for decoding. (default: %(default)d)')
-    parser.add_argument(
-        '--learning_rate',
-        type=float,
-        default=0.00016,
-        help='Learning rate used to train. (default: %(default)f)')
     parser.add_argument(
         '--device',
         type=str,
@@ -80,7 +74,7 @@ def parse_args():
     parser.add_argument(
         '--mean_var',
         type=str,
-        default='data/global_mean_var_search26kHr',
+        default='data/global_mean_var',
         help="The path for feature's global mean and variance. "
         "(default: %(default)s)")
     parser.add_argument(
@@ -88,16 +82,6 @@ def parse_args():
         type=str,
         default='data/infer_feature.lst',
         help='The feature list path for inference. (default: %(default)s)')
-    parser.add_argument(
-        '--infer_label_lst',
-        type=str,
-        default='data/infer_label.lst',
-        help='The label list path for inference. (default: %(default)s)')
-    parser.add_argument(
-        '--ref_txt',
-        type=str,
-        default='data/text.test',
-        help='The reference text for decoding. (default: %(default)s)')
     parser.add_argument(
         '--checkpoint',
         type=str,
@@ -128,16 +112,17 @@ def parse_args():
         type=float,
         default=0.2,
         help="Scaling factor for acoustic likelihoods. (default: %(default)f)")
-    parser.add_argument(
-        '--target_trans',
-        type=str,
-        default="./decoder/target_trans.txt",
-        help="The path to target transcription. (default: %(default)s)")
     parser.add_argument(
         '--post_matrix_path',
         type=str,
         default=None,
         help="The path to output post prob matrix. (default: %(default)s)")
+    parser.add_argument(
+        '--decode_to_path',
+        type=str,
+        default='./decoding_result.txt',
+        required=True,
+        help="The path to output the decoding result. (default: %(default)s)")
     args = parser.parse_args()
     return args
 
@@ -149,26 +134,47 @@ def print_arguments(args):
     print('------------------------------------------------')
 
 
-def get_trg_trans(args):
-    trans_dict = {}
-    with open(args.target_trans) as trg_trans:
-        line = trg_trans.readline()
-        while line:
-            items = line.strip().split()
-            key = items[0]
-            trans_dict[key] = ''.join(items[1:])
-            line = trg_trans.readline()
-    return trans_dict
+class PostMatrixWriter:
+    """ The writer for outputing the post probability matrix
+    """
+
+    def __init__(self, to_path):
+        self._to_path = to_path
+        with open(self._to_path, "w") as post_matrix:
+            post_matrix.seek(0)
+            post_matrix.truncate()
+
+    def write(self, keys, probs):
+        with open(self._to_path, "a") as post_matrix:
+            if isinstance(keys, str):
+                keys, probs = [keys], [probs]
+
+            for key, prob in zip(keys, probs):
+                post_matrix.write(key + " [\n")
+                for i in range(prob.shape[0]):
+                    for j in range(prob.shape[1]):
+                        post_matrix.write(str(prob[i][j]) + " ")
+                    post_matrix.write("\n")
+                post_matrix.write("]\n")
+
 
+class DecodingResultWriter:
+    """ The writer for writing out decoding results
+    """
 
-def out_post_matrix(key, prob):
-    with open(args.post_matrix_path, "a") as post_matrix:
-        post_matrix.write(key + " [\n")
-        for i in range(prob.shape[0]):
-            for j in range(prob.shape[1]):
-                post_matrix.write(str(prob[i][j]) + " ")
-            post_matrix.write("\n")
-        post_matrix.write("]\n")
+    def __init__(self, to_path):
+        self._to_path = to_path
+        with open(self._to_path, "w") as decoding_result:
+            decoding_result.seek(0)
+            decoding_result.truncate()
+
+    def write(self, results):
+        with open(self._to_path, "a") as decoding_result:
+            if isinstance(results, str):
+                decoding_result.write(results.encode("utf8") + "\n")
+            else:
+                for result in results:
+                    decoding_result.write(result.encode("utf8") + "\n")
 
 
 def infer_from_ckpt(args):
@@ -187,9 +193,10 @@ def infer_from_ckpt(args):
 
     infer_program = fluid.default_main_program().clone()
 
+    # optimizer, placeholder
     optimizer = fluid.optimizer.Adam(
         learning_rate=fluid.layers.exponential_decay(
-            learning_rate=args.learning_rate,
+            learning_rate=0.0001,
             decay_steps=1879,
             decay_rate=1 / 1.2,
             staircase=True))
@@ -199,7 +206,6 @@ def infer_from_ckpt(args):
     exe = fluid.Executor(place)
     exe.run(fluid.default_startup_program())
 
-    #trg_trans = get_trg_trans(args)
     # load checkpoint.
     fluid.io.load_persistables(exe, args.checkpoint)
 
@@ -218,13 +224,13 @@ def infer_from_ckpt(args):
 
     # infer data reader
     infer_data_reader = reader.AsyncDataReader(
-        args.infer_feature_lst,
-        args.infer_label_lst,
-        drop_frame_len=-1,
-        split_sentence_threshold=-1)
+        args.infer_feature_lst, drop_frame_len=-1, split_sentence_threshold=-1)
     infer_data_reader.set_transformers(ltrans)
-    infer_costs, infer_accs = [], []
-    total_edit_dist, total_ref_len = 0.0, 0
+
+    decoding_result_writer = DecodingResultWriter(args.decode_to_path)
+    post_matrix_writer = None if args.post_matrix_path is None \
+                         else PostMatrixWriter(args.post_matrix_path)
+
     for batch_id, batch_data in enumerate(
             infer_data_reader.batch_iterator(args.batch_size,
                                              args.minimum_batch_size)):
@@ -242,31 +248,17 @@ def infer_from_ckpt(args):
                                 "label": label_t},
                           fetch_list=[prediction, avg_cost, accuracy],
                           return_numpy=False)
-        infer_costs.append(lodtensor_to_ndarray(results[1])[0])
-        infer_accs.append(lodtensor_to_ndarray(results[2])[0])
 
         probs, lod = lodtensor_to_ndarray(results[0])
         infer_batch = split_infer_result(probs, lod)
 
+        print("Decoding batch %d ..." % batch_id)
         decoded = decoder.decode_batch(name_lst, infer_batch, args.num_threads)
-        for res in decoded:
-            print(res.encode("utf8"))
+
+        decoding_result_writer.write(decoded)
+
         if args.post_matrix_path is not None:
-            for index, sample in enumerate(infer_batch):
-                key = name_lst[index]
-                out_post_matrix(key, sample)
-            '''
-            hyp = decoder.decode(key, sample)
-            edit_dist, ref_len = char_errors(ref.decode("utf8"), hyp)
-            total_edit_dist += edit_dist
-            total_ref_len += ref_len
-            print(key + "|Ref:", ref)
-            print(key + "|Hyp:", hyp.encode("utf8"))
-            print("Instance CER: ", edit_dist / ref_len)
-            '''
-        #print("batch: ", batch_id)
-
-        #print("Total CER = %f" % (total_edit_dist / total_ref_len))
+            post_matrix_writer.write(name_lst, infer_batch)
 
 
 if __name__ == '__main__':
diff --git a/fluid/DeepASR/score_error_rate.py b/fluid/DeepASR/score_error_rate.py
new file mode 100644
index 0000000000..5ecbca0862
--- /dev/null
+++ b/fluid/DeepASR/score_error_rate.py
@@ -0,0 +1,68 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+from tools.error_rate import char_errors, word_errors
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        "Score word/character error rate (WER/CER) "
+        "for decoding result.")
+    parser.add_argument(
+        '--error_rate_type',
+        type=str,
+        default='cer',
+        choices=['cer', 'wer'],
+        help="Error rate type. (default: %(default)s)")
+    parser.add_argument(
+        '--ref', type=str, required=True, help="The ground truth text.")
+    parser.add_argument(
+        '--hyp', type=str, required=True, help="The decoding result.")
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == '__main__':
+
+    args = parse_args()
+    ref_dict = {}
+    sum_errors, sum_ref_len = 0.0, 0
+    sent_cnt, not_in_ref_cnt = 0, 0
+
+    with open(args.ref, "r") as ref_txt:
+        line = ref_txt.readline()
+        while line:
+            del_pos = line.find(" ")
+            key, sent = line[0:del_pos], line[del_pos + 1:-1].strip()
+            ref_dict[key] = sent
+            line = ref_txt.readline()
+
+    with open(args.hyp, "r") as hyp_txt:
+        line = hyp_txt.readline()
+        while line:
+            del_pos = line.find(" ")
+            key, sent = line[0:del_pos], line[del_pos + 1:-1].strip()
+            sent_cnt += 1
+            line = hyp_txt.readline()
+            if key not in ref_dict:
+                not_in_ref_cnt += 1
+                continue
+
+            if args.error_rate_type == 'cer':
+                errors, ref_len = char_errors(
+                    ref_dict[key].decode("utf8"),
+                    sent.decode("utf8"),
+                    remove_space=True)
+            else:
+                errors, ref_len = word_errors(ref_dict[key].decode("utf8"),
+                                              sent.decode("utf8"))
+            sum_errors += errors
+            sum_ref_len += ref_len
+
+    print("Error rate[%s] = %f (%d/%d)," %
+          (args.error_rate_type, sum_errors / sum_ref_len, int(sum_errors),
+           sum_ref_len))
+    print("total %d sentences in hyp, %d not presented in ref." %
+          (sent_cnt, not_in_ref_cnt))

From b88f95a2d2fcb8ba70336824245f1832451c5ce1 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Mon, 2 Jul 2018 04:13:31 -0700
Subject: [PATCH 09/10] Expose beam size in decoder

---
 fluid/DeepASR/decoder/post_latgen_faster_mapped.cc | 3 ++-
 fluid/DeepASR/decoder/post_latgen_faster_mapped.h  | 1 +
 fluid/DeepASR/decoder/pybind.cc                    | 1 +
 fluid/DeepASR/examples/aishell/infer_by_ckpt.sh    | 3 ++-
 fluid/DeepASR/infer_by_ckpt.py                     | 7 ++++++-
 5 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
index 87791d5131..f83730ce51 100644
--- a/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
+++ b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
@@ -26,6 +26,7 @@ Decoder::Decoder(std::string trans_model_in_filename,
                  std::string word_syms_filename,
                  std::string fst_in_filename,
                  std::string logprior_in_filename,
+                 size_t beam_size,
                  kaldi::BaseFloat acoustic_scale) {
   const char *usage =
       "Generate lattices using neural net model.\n"
@@ -51,7 +52,7 @@ Decoder::Decoder(std::string trans_model_in_filename,
 
   int argc = 2;
   char *argv[] = {(char *)"post-latgen-faster-mapped",
-                  (char *)("--beam=" + std::string("11")).c_str()};
+                  (char *)("--beam=" + std::to_string(beam_size)).c_str()};
 
   po.Read(argc, argv);
 
diff --git a/fluid/DeepASR/decoder/post_latgen_faster_mapped.h b/fluid/DeepASR/decoder/post_latgen_faster_mapped.h
index 0bbb93065a..9c234b8681 100644
--- a/fluid/DeepASR/decoder/post_latgen_faster_mapped.h
+++ b/fluid/DeepASR/decoder/post_latgen_faster_mapped.h
@@ -29,6 +29,7 @@ class Decoder {
           std::string word_syms_filename,
           std::string fst_in_filename,
           std::string logprior_in_filename,
+          size_t beam_size,
           kaldi::BaseFloat acoustic_scale);
   ~Decoder();
 
diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc
index 151eae3b83..4a9b27d4cf 100644
--- a/fluid/DeepASR/decoder/pybind.cc
+++ b/fluid/DeepASR/decoder/pybind.cc
@@ -27,6 +27,7 @@ PYBIND11_MODULE(post_latgen_faster_mapped, m) {
                     std::string,
                     std::string,
                     std::string,
+                    size_t,
                     kaldi::BaseFloat>())
       .def("decode_from_file",
            (void (Decoder::*)(std::string, size_t)) & Decoder::decode_from_file,
diff --git a/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
index 990daee375..60a48ba5da 100644
--- a/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
+++ b/fluid/DeepASR/examples/aishell/infer_by_ckpt.sh
@@ -4,10 +4,11 @@ export CUDA_VISIBLE_DEVICES=2,3,4,5
 python -u ../../infer_by_ckpt.py --batch_size 96  \
                         --checkpoint checkpoints/deep_asr.pass_20.checkpoint \
                         --infer_feature_lst data/test_feature.lst  \
-                        --mean_var data/aishell/global_mean_var \
+                        --mean_var data/global_mean_var \
                         --frame_dim 80  \
                         --class_num 3040 \
                         --num_threads 24  \
+                        --beam_size 11 \
                         --decode_to_path $decode_to_path \
                         --trans_model mapped_decoder_data/exp/tri5a/final.mdl \
                         --log_prior mapped_decoder_data/logprior \
diff --git a/fluid/DeepASR/infer_by_ckpt.py b/fluid/DeepASR/infer_by_ckpt.py
index 881b5ba225..1e0fb15c6d 100644
--- a/fluid/DeepASR/infer_by_ckpt.py
+++ b/fluid/DeepASR/infer_by_ckpt.py
@@ -27,6 +27,11 @@ def parse_args():
         type=int,
         default=32,
         help='The sequence number of a batch data. (default: %(default)d)')
+    parser.add_argument(
+        '--beam_size',
+        type=int,
+        default=11,
+        help='The beam size for decoding. (default: %(default)d)')
     parser.add_argument(
         '--minimum_batch_size',
         type=int,
@@ -211,7 +216,7 @@ def infer_from_ckpt(args):
 
     # init decoder
     decoder = Decoder(args.trans_model, args.vocabulary, args.graphs,
-                      args.log_prior, args.acoustic_scale)
+                      args.log_prior, args.beam_size, args.acoustic_scale)
 
     ltrans = [
         trans_add_delta.TransAddDelta(2, 2),

From db42a954cdd605b3ecf521e086bd5e7cc00a036f Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Mon, 2 Jul 2018 05:44:40 -0700
Subject: [PATCH 10/10] Disable output config in cpp end

---
 fluid/DeepASR/decoder/post_latgen_faster_mapped.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
index f83730ce51..ad8aaa8480 100644
--- a/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
+++ b/fluid/DeepASR/decoder/post_latgen_faster_mapped.cc
@@ -56,7 +56,6 @@ Decoder::Decoder(std::string trans_model_in_filename,
 
   po.Read(argc, argv);
 
-  po.PrintConfig(std::cout);
   std::ifstream is_logprior(logprior_in_filename);
   logprior.Read(is_logprior, false);
 
@@ -294,12 +293,13 @@ std::string Decoder::DecodeUtteranceLatticeFaster(
     // We'll write the lattice without acoustic scaling.
     if (acoustic_scale != 0.0)
       fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);
-    compact_lattice_writer.Write(utt, clat);
+    // disable output lattice temporarily
+    // compact_lattice_writer.Write(utt, clat);
   } else {
     // We'll write the lattice without acoustic scaling.
     if (acoustic_scale != 0.0)
       fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &lat);
-    lattice_writer.Write(utt, lat);
+    // lattice_writer.Write(utt, lat);
   }
   return ret;
 }