diff --git a/egs/asr/librispeech/shutil/decode/convert_ctm.py b/egs/asr/librispeech/shutil/decode/convert_ctm.py deleted file mode 100644 index 391a9c6..0000000 --- a/egs/asr/librispeech/shutil/decode/convert_ctm.py +++ /dev/null @@ -1,47 +0,0 @@ -""" - A script to convert ctm with word_ids to ctm with words - Amir Harati April 2018 -""" -import argparse - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("-i", "--input_ctm", - default=None, help="input ctm with word ids") - - parser.add_argument("-w", "--input_words", - default=None, help="input words") - parser.add_argument("-o", "--output_ctm", - default=None, help="output ctm with words") - args = parser.parse_args() - - if args.input_ctm is None: - print("You need to specify the input ctm file.") - exit(0) - - if args.input_words is None: - print("You need to specify the input words.") - exit(0) - - if args.output_ctm is None: - print("You need to specify the output ctm file.") - exit(0) - - id2word = {} - lines = [line.strip() for line in open(args.input_words)] - for line in lines: - parts = line.split() - id2word[parts[1]] = parts[0] - - - lines = [line.strip() for line in open(args.input_ctm)] - fo = open(args.output_ctm, "w") - for line in lines: - parts = line.split() - new_line = parts[0] + " " + parts[1] + " " + parts[2] + " " + parts[3] + " " + id2word[parts[4]] - fo.write(new_line + "\n") - fo.close() - -if __name__ == "__main__": - main() diff --git a/egs/asr/librispeech/shutil/decode/map_kaldi_transitionids.py b/egs/asr/librispeech/shutil/decode/map_kaldi_transitionids.py deleted file mode 100644 index 676e585..0000000 --- a/egs/asr/librispeech/shutil/decode/map_kaldi_transitionids.py +++ /dev/null @@ -1,71 +0,0 @@ -# map_kaldi_transitionids.py -# Amir Harati, July 2019 -""" - map transitionids into phoneme states. -""" -import argparse -import re - -pat1 = re.compile("\s*Transition-state.*phone\s+=\s+(.*)\s+hmm-state\s+=\s+(.*)\s+forward.*") -pat2 = re.compile("\s*Transition-id\s+=\s+(.*)\s+p.*") - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--input_transitions", default=None, help="transition file") - parser.add_argument("--input", - default=None, help="input transitionid") - parser.add_argument("--output", - default=None, help="output mapped file") - - args = parser.parse_args() - - if args.input_transitions is None: - print("You need to specify the input transitions") - exit(0) - if args.input is None: - print("You need to specify the input") - exit(0) - if args.output is None: - print("You need to specify the output") - exit(0) - - lines = [line.strip() for line in open(args.input)] - input_trs = {} - for line in lines: - parts = line.split() - input_trs[parts[0]] = parts[1:] - - lines = [line.strip() for line in open(args.input_transitions)] - - trans2state = {} - for line in lines: - x = pat1.match(line) - y = pat2.match(line) - if x is not None: - curr_phoneme = x.group(1).split("_")[0] - curr_state = x.group(2) - #print(line) - #print(y) - if y is not None: - ti = y.group(1) - #print(ti) - if curr_phoneme == "SIL": - trans2state[ti] = "_" - else: - trans2state[ti] = curr_phoneme - if len(trans2state[ti]) < 2: - trans2state[ti] += " " - - #print(trans2state) - with open(args.output, "w") as fo: - for key in input_trs: - fo.write(key+"len_"+str(len(input_trs[key])) + " ") - for v in input_trs[key]: - fo.write(trans2state[v] + " ") - fo.write("\n") - - -if __name__ == "__main__": - main() - - diff --git a/egs/asr/librispeech/shutil/decode/show_align.sh b/egs/asr/librispeech/shutil/decode/show_align.sh deleted file mode 100755 index 8f95124..0000000 --- a/egs/asr/librispeech/shutil/decode/show_align.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env bash - -set -e - -mkdir -p ./exp/align/ -TMPFILE=$(mktemp -d ./exp/align/d_XXX) || exit 1 - -export show_bn=1 - -model="shutil/decode/chain/e2e/tuning/tdnnf_vq.py --codebook-size 16" -exp_dir="exp/chain/e2e_tdnnf_vq_sizeco_16/" - -# model="shutil/decode/chain/e2e/tuning/tdnnf.py" -# exp_dir="exp/chain/e2e_tdnnf/" - -echo $model - -$model \ - --dir $exp_dir \ - --mode decode --use-gpu True --gpu-id 1 \ - --decode-feats test_one.scp $exp_dir/final.pt \ - | shutil/decode/latgen-faster-mapped.sh \ - --beam 1 \ - exp/chain/e2e_biphone_tree/graph_tgsmall/words.txt \ - $exp_dir/0.trans_mdl \ - exp/chain/e2e_biphone_tree/graph_tgsmall/HCLG.fst \ - $TMPFILE/lat.gz - - -oldlm=./data/lang_lp_test_tgsmall/G.fst -newlm=./data/lang_lp_test_fglarge/G.carpa -oldlmcommand="fstproject --project_output=true $oldlm |" - -lattice-lmrescore --lm-scale=-1.0 \ - "ark:gunzip -c $TMPFILE/lat.gz|" "$oldlmcommand" ark:- | \ - lattice-lmrescore-const-arpa --lm-scale=1.0 \ - ark:- "$newlm" "ark,t:|gzip -c>$TMPFILE/lat_rescore.gz" - -# bypass rescore -# cp $TMPFILE/lat.gz $TMPFILE/lat_rescore.gz - -# Word alin -lattice-align-words-lexicon \ - ./data/lang_lp/phones/align_lexicon.int \ - $exp_dir/0.trans_mdl \ - "ark:gunzip -c $TMPFILE/lat_rescore.gz|" ark:- \ - | lattice-1best ark:- ark:- | nbest-to-ctm ark:- $TMPFILE/align.ctm - -python3 shutil/decode/convert_ctm.py -i $TMPFILE/align.ctm \ - -w exp/chain/e2e_biphone_tree/graph_tgsmall/words.txt \ - -o $TMPFILE/out_ctm - -# cat $TMPFILE/out_ctm - - -# Phone alin - -zcat $TMPFILE/lat_rescore.gz > $TMPFILE/lat -lattice-1best --acoustic-scale=1 ark:$TMPFILE/lat ark:$TMPFILE/1best.lats -nbest-to-linear ark:$TMPFILE/1best.lats ark,t:$TMPFILE/ali -ali-to-phones --ctm-output $exp_dir/0.trans_mdl \ - ark:$TMPFILE/ali \ - $TMPFILE/phone_alined.ctm - -python3 shutil/decode/convert_ctm.py -i $TMPFILE/phone_alined.ctm \ - -w ./data/lang_lp/phones.txt \ - -o $TMPFILE/out_phone_ctm - -# cat $TMPFILE/out_phone_ctm - -copy-int-vector ark,t:$TMPFILE/ali ark,t:$TMPFILE/transids.txt -show-transitions ./data/lang_lp/phones.txt $exp_dir/0.trans_mdl > $TMPFILE/transitions.txt -python3 ./shutil/decode/map_kaldi_transitionids.py --input $TMPFILE/transids.txt --input_transitions $TMPFILE/transitions.txt --output $TMPFILE/out_state_seq - -echo "cat $TMPFILE/out_state_seq" -cat $TMPFILE/out_state_seq diff --git a/egs/asr/librispeech/shutil/decode/show_align_fromlat.sh b/egs/asr/librispeech/shutil/decode/show_align_fromlat.sh deleted file mode 100755 index b158bf6..0000000 --- a/egs/asr/librispeech/shutil/decode/show_align_fromlat.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env bash - -set -e - - -# oldlm=./data/lang_lp_test_tgsmall/G.fst -# newlm=./data/lang_lp_test_fglarge/G.carpa -# oldlmcommand="fstproject --project_output=true $oldlm |" - -# lattice-lmrescore --lm-scale=-1.0 \ - # "ark:gunzip -c $1|" "$oldlmcommand" ark:- | \ - # lattice-lmrescore-const-arpa --lm-scale=1.0 \ - # ark:- "$newlm" "ark,t:|gzip -c>$1.res" - -# bypass rescore -cp $1 $1.res - -exp_dir="exp/chain/e2e_tdnnf/" - -# Word alin -lattice-align-words-lexicon \ - ./data/lang_lp/phones/align_lexicon.int \ - $exp_dir/0.trans_mdl \ - "ark:gunzip -c $1.res|" ark:- \ - | lattice-1best ark:- ark:- | nbest-to-ctm ark:- $1.res_align.ctm - -python3 shutil/decode/convert_ctm.py -i $1.res_align.ctm \ - -w exp/chain/e2e_biphone_tree/graph_tgsmall/words.txt \ - -o $1.res_align.ctm_out_ctm - -# cat $TMPFILE/out_ctm - - -# Phone alin - -zcat $1.res > $1.res_lat -lattice-1best --acoustic-scale=1 ark:$1.res_lat ark:$1.res_lat_1best.lats -nbest-to-linear ark:$1.res_lat_1best.lats ark:$1.res_lat_ali -ali-to-phones --ctm-output $exp_dir/0.trans_mdl \ - ark:$1.res_lat_ali \ - $1.phone_alined.ctm - -python3 shutil/decode/convert_ctm.py -i $1.phone_alined.ctm \ - -w ./data/lang_lp/phones.txt \ - -o $1.out_phone_ctm - -# cat $TMPFILE/out_phone_ctm - -copy-int-vector ark,t:$1.res_lat_ali ark,t:$1.transids.txt -show-transitions ./data/lang_lp/phones.txt $exp_dir/0.trans_mdl > $1.transitions.txt -python3 ./shutil/decode/map_kaldi_transitionids.py --input $1.transids.txt --input_transitions $1.transitions.txt --output $1.out_state_seq diff --git a/satools/satools/chain/model.py b/satools/satools/chain/model.py index 80dd5a5..08276ae 100644 --- a/satools/satools/chain/model.py +++ b/satools/satools/chain/model.py @@ -467,6 +467,7 @@ def kaldi_decode(loglikes, ): """ Decode loglikes from a tensor, no lm rescoding is done + This and the following function are nice for few-utterances decoding, but for more, use kaldi with shutil/decode/latgen-faster-mapped.sh Example: import torch