Skip to content

Commit

Permalink
Decoding works, results are empty yet
Browse files Browse the repository at this point in the history
  • Loading branch information
nshmyrev committed Dec 13, 2021
1 parent 6977be7 commit 344e137
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 19 deletions.
13 changes: 8 additions & 5 deletions python/example/batch/test_batch.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
#!/usr/bin/env python3

from vosk import Model, BatchRecognizer
from vosk import Model, BatchRecognizer, GpuInit, GpuThreadInit
import sys
import os
import wave

GpuInit()
GpuThreadInit()

model = Model("model")
rec = BatchRecognizer(model, 16000.0)

fnames = open("tedlium.list").readlines()
fds = [open(x) for x in fnames]
fds = [open(x.strip(), "rb") for x in fnames]
ended = set()
while True:
for i, fd in fds:
if i in ended():
for i, fd in enumerate(fds):
if i in ended:
continue
data = fd.read(4000)
data = fd.read(16000)
if len(data) == 0:
rec.FinishStream(i)
ended.add(i)
Expand Down
4 changes: 2 additions & 2 deletions python/vosk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def AcceptWaveform(self, uid, data):
res = _c.vosk_batch_recognizer_accept_waveform(self._handle, uid, data, len(data))

def Results(self):
return _ffi.string(_c.vosk_batch_recognizer_result(self._handle)).decode('utf-8')
return _ffi.string(_c.vosk_batch_recognizer_results(self._handle)).decode('utf-8')

def FinishStream(self, uid):
_c.vosk_recognizer_final_result(self._handle, uid)
_c.vosk_batch_recognizer_finish_stream(self._handle, uid)
8 changes: 4 additions & 4 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,19 @@ CFLAGS=-g -O3 -std=c++17 -Wno-deprecated-declarations -fPIC -DFST_NO_DYNAMIC_LIN
-I. -I$(KALDI_ROOT)/src -I$(OPENFST_ROOT)/include $(EXTRA_CFLAGS)

LIBS= \
$(KALDI_ROOT)/src/cudadecoder/kaldi-cudadecoder.a \
$(KALDI_ROOT)/src/cudafeat/kaldi-cudafeat.a \
$(KALDI_ROOT)/src/online2/kaldi-online2.a \
$(KALDI_ROOT)/src/decoder/kaldi-decoder.a \
$(KALDI_ROOT)/src/ivector/kaldi-ivector.a \
$(KALDI_ROOT)/src/gmm/kaldi-gmm.a \
$(KALDI_ROOT)/src/nnet3/kaldi-nnet3.a \
$(KALDI_ROOT)/src/tree/kaldi-tree.a \
$(KALDI_ROOT)/src/feat/kaldi-feat.a \
$(KALDI_ROOT)/src/lat/kaldi-lat.a \
$(KALDI_ROOT)/src/lm/kaldi-lm.a \
$(KALDI_ROOT)/src/rnnlm/kaldi-rnnlm.a \
$(KALDI_ROOT)/src/hmm/kaldi-hmm.a \
$(KALDI_ROOT)/src/nnet3/kaldi-nnet3.a \
$(KALDI_ROOT)/src/transform/kaldi-transform.a \
$(KALDI_ROOT)/src/cudamatrix/kaldi-cudamatrix.a \
$(KALDI_ROOT)/src/matrix/kaldi-matrix.a \
Expand All @@ -68,7 +70,7 @@ ifeq ($(HAVE_OPENBLAS_CLAPACK), 1)
endif

ifeq ($(HAVE_MKL), 1)
CFLAGS += -I$(MKL_ROOT)/include
CFLAGS += -DHAVE_MKL=1 -I$(MKL_ROOT)/include
LIBS += -L$(MKL_ROOT)/lib/intel64 -Wl,-rpath=$(MKL_ROOT)/lib/intel64 -lmkl_rt -lmkl_intel_lp64 -lmkl_core -lmkl_sequential
endif

Expand All @@ -79,8 +81,6 @@ endif
ifeq ($(HAVE_CUDA), 1)
CFLAGS+=-DHAVE_CUDA=1 -I$(CUDA_ROOT)/include
LIBS+=\
$(KALDI_ROOT)/src/cudadecoder/kaldi-cudadecoder.a \
$(KALDI_ROOT)/src/cudafeat/kaldi-cudafeat.a \
-L$(CUDA_ROOT)/lib64 -lcuda -lcublas -lcusparse -lcudart -lcurand -lcufft -lcusolver -lnvToolsExt
endif

Expand Down
18 changes: 17 additions & 1 deletion src/batch_recognizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,22 @@ BatchRecognizer::BatchRecognizer(Model *model, float sample_frequency) : model_(
model_->Ref();

BatchedThreadedNnet3CudaOnlinePipelineConfig batched_decoder_config;
batched_decoder_config.num_worker_threads = 4;
batched_decoder_config.max_batch_size = 100;

batched_decoder_config.feature_opts.feature_type = "mfcc";
batched_decoder_config.feature_opts.mfcc_config = "model/conf/mfcc.conf";
batched_decoder_config.feature_opts.ivector_extraction_config = "model/conf/ivector.conf";
batched_decoder_config.decoder_opts.max_active = 7000;
batched_decoder_config.decoder_opts.default_beam = 13.0;
batched_decoder_config.decoder_opts.lattice_beam = 8.0;
batched_decoder_config.compute_opts.acoustic_scale = 1.0;
batched_decoder_config.compute_opts.frame_subsampling_factor = 3;
batched_decoder_config.compute_opts.frames_per_chunk = 312;

cuda_pipeline_ = new BatchedThreadedNnet3CudaOnlinePipeline
(batched_decoder_config, *model_->hclg_fst_, *model_->nnet_, *model_->trans_model_);
cuda_pipeline_->SetSymbolTable(*model_->word_syms_);

CudaOnlinePipelineDynamicBatcherConfig dynamic_batcher_config;
dynamic_batcher_ = new CudaOnlinePipelineDynamicBatcher(dynamic_batcher_config,
Expand Down Expand Up @@ -60,6 +73,9 @@ void BatchRecognizer::InitRescoring()

void BatchRecognizer::FinishStream(uint64_t id)
{
Vector<BaseFloat> wave;
SubVector<BaseFloat> chunk(wave.Data(), 0);
dynamic_batcher_->Push(id, false, true, chunk);
streams_.erase(id);
}

Expand All @@ -77,7 +93,7 @@ void BatchRecognizer::AcceptWaveform(uint64_t id, const char *data, int len)
[&, id](const std::string &str, bool partial,
bool endpoint_detected) {
if (partial) {
KALDI_LOG << "id #" << id << " [partial] : " << str;
KALDI_LOG << "id #" << id << " [partial] : " << str << ":";
}

if (endpoint_detected) {
Expand Down
6 changes: 3 additions & 3 deletions src/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,9 @@ void Model::ReadDataFiles()
SetDropoutTestMode(true, &(nnet_->GetNnet()));
nnet3::CollapseModel(nnet3::CollapseModelConfig(), &(nnet_->GetNnet()));
}
decodable_info_ = new nnet3::DecodableNnetSimpleLoopedInfo(decodable_opts_,
nnet_);

/* decodable_info_ = new nnet3::DecodableNnetSimpleLoopedInfo(decodable_opts_,
nnet_);
if (stat(final_ie_rxfilename_.c_str(), &buffer) == 0) {
KALDI_LOG << "Loading i-vector extractor from " << final_ie_rxfilename_;
Expand All @@ -261,7 +261,7 @@ void Model::ReadDataFiles()
} else {
feature_info_.use_ivectors = false;
}

*/
if (stat(global_cmvn_stats_rxfilename_.c_str(), &buffer) == 0) {
KALDI_LOG << "Reading CMVN stats from " << global_cmvn_stats_rxfilename_;
feature_info_.use_cmvn = true;
Expand Down
4 changes: 2 additions & 2 deletions src/vosk_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,12 @@ void vosk_batch_recognizer_free(VoskBatchRecognizer *recognizer)
delete ((BatchRecognizer *)recognizer);
}

void vosk_batch_recognizer_accept_waveform(VoskRecognizer *recognizer, int id, const char *data, int length)
void vosk_batch_recognizer_accept_waveform(VoskBatchRecognizer *recognizer, int id, const char *data, int length)
{
((BatchRecognizer *)recognizer)->AcceptWaveform(id, data, length);
}

void vosk_batch_recognizer_finish_stream(VoskRecognizer *recognizer, int id)
void vosk_batch_recognizer_finish_stream(VoskBatchRecognizer *recognizer, int id)
{
((BatchRecognizer *)recognizer)->FinishStream(id);
}
Expand Down
4 changes: 2 additions & 2 deletions src/vosk_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,10 @@ VoskBatchRecognizer *vosk_batch_recognizer_new(VoskModel *model, float sample_fr
void vosk_batch_recognizer_free(VoskBatchRecognizer *recognizer);

/** Accept batch voice data */
void vosk_batch_recognizer_accept_waveform(VoskRecognizer *recognizer, int id, const char *data, int length);
void vosk_batch_recognizer_accept_waveform(VoskBatchRecognizer *recognizer, int id, const char *data, int length);

/** Closes the stream */
void vosk_batch_recognizer_finish_stream(VoskRecognizer *recognizer, int id);
void vosk_batch_recognizer_finish_stream(VoskBatchRecognizer *recognizer, int id);

/** Return results */
const char *vosk_batch_recognizer_results(VoskBatchRecognizer *recognizer);
Expand Down

0 comments on commit 344e137

Please sign in to comment.