_bibliography/papers.bib

@Article{You2023,
  author    = {Jiawei You and Ganyu Huang and Tianyuan Han and Haoze Yang and Liping Shen},
  journal   = {{IEEE} Access},
  title     = {A Unified Framework From Face Image Restoration to Data Augmentation Using Generative Prior},
  year      = {2023},
  pages     = {2907--2919},
  volume    = {11},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/access/YouHHYS23.bib},
  doi       = {10.1109/ACCESS.2022.3233868},
  timestamp = {Tue, 31 Jan 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ACCESS.2022.3233868},
}

@InProceedings{Han2023,
  author    = {Tianyuan Han and Ganyu Huang and Chunhui Li and Liping Shen},
  booktitle = {Proceedings of the 2023 International Conference on Advances in Artificial Intelligence and Applications, {AAIA} 2023, Wuhan, China, November 18-20, 2023},
  title     = {Human Pose Estimation with Combined Feature Maps and Joint Embeddings},
  year      = {2023},
  pages     = {101--106},
  publisher = {{ACM}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/aaia/HanHLS23.bib},
  doi       = {10.1145/3603273.3636495},
  timestamp = {Sat, 13 Jan 2024 00:00:00 +0100},
  url       = {https://doi.org/10.1145/3603273.3636495},
}

@InProceedings{He2023,
  author    = {Chaofan He and Chunhui Li and Tianyuan Han and Liping Shen},
  booktitle = {Neural Information Processing - 30th International Conference, {ICONIP} 2023, Changsha, China, November 20-23, 2023, Proceedings, Part {XIII}},
  title     = {Assessing and Enhancing LLMs: {A} Physics and History Dataset and One-More-Check Pipeline Method},
  year      = {2023},
  editor    = {Biao Luo and Long Cheng and Zheng{-}Guang Wu and Hongyi Li and Chaojie Li},
  pages     = {504--517},
  publisher = {Springer},
  series    = {Communications in Computer and Information Science},
  volume    = {1967},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iconip/HeLHS23.bib},
  doi       = {10.1007/978-981-99-8178-6\_38},
  timestamp = {Mon, 18 Dec 2023 09:47:49 +0100},
  url       = {https://doi.org/10.1007/978-981-99-8178-6\_38},
}

@InProceedings{Xiong2023,
  author    = {Yuhan Xiong and Jiawei You and Liping Shen},
  booktitle = {International Joint Conference on Neural Networks, {IJCNN} 2023, Gold Coast, Australia, June 18-23, 2023},
  title     = {{GAN} Latent Space Manipulation Based Augmentation for Unbalanced Emotion Datasets},
  year      = {2023},
  pages     = {1--8},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/ijcnn/XiongYS23.bib},
  doi       = {10.1109/IJCNN54540.2023.10191807},
  timestamp = {Wed, 09 Aug 2023 16:25:09 +0200},
  url       = {https://doi.org/10.1109/IJCNN54540.2023.10191807},
}

@InProceedings{Chen2022,
  author    = {Jisen Chen and Jian Shen and Ting Long and Liping Shen and Weinan Zhang and Yong Yu},
  booktitle = {Neural Information Processing - 29th International Conference, {ICONIP} 2022, Virtual Event, November 22-26, 2022, Proceedings, Part {I}},
  title     = {Heterogeneous Graph Representation for Knowledge Tracing},
  year      = {2022},
  editor    = {Mohammad Tanveer and Sonali Agarwal and Seiichi Ozawa and Asif Ekbal and Adam Jatowt},
  pages     = {224--235},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {13623},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iconip/ChenSLS0022.bib},
  doi       = {10.1007/978-3-031-30105-6\_19},
  timestamp = {Thu, 20 Apr 2023 15:23:54 +0200},
  url       = {https://doi.org/10.1007/978-3-031-30105-6\_19},
}

@InProceedings{Yang2022,
  author    = {Haoze Yang and Kunyao Lan and Jiawei You and Liping Shen},
  booktitle = {International Joint Conference on Neural Networks, {IJCNN} 2022, Padua, Italy, July 18-23, 2022},
  title     = {A simple but practical method: How to improve the usage of entities in the Chinese question generation},
  year      = {2022},
  pages     = {1--8},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/ijcnn/YangLYS22.bib},
  doi       = {10.1109/IJCNN55064.2022.9891960},
  timestamp = {Mon, 10 Oct 2022 17:40:09 +0200},
  url       = {https://doi.org/10.1109/IJCNN55064.2022.9891960},
}

@InProceedings{You2022,
  author    = {Jiawei You and Tianyuan Han and Liping Shen},
  booktitle = {International Joint Conference on Neural Networks, {IJCNN} 2022, Padua, Italy, July 18-23, 2022},
  title     = {From Uniform Models To Generic Representations: Stock Return Prediction With Pre-training},
  year      = {2022},
  pages     = {1--8},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/ijcnn/YouHS22.bib},
  doi       = {10.1109/IJCNN55064.2022.9892697},
  timestamp = {Mon, 10 Oct 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/IJCNN55064.2022.9892697},
}

@InProceedings{Gong2023,
  author    = {Xun Gong and Yu Wu and Jinyu Li and Shujie Liu and Rui Zhao and Xie Chen and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {LongFNT: Long-Form Speech Recognition with Factorized Neural Transducer},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/GongWLLZCQ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10096900},
  timestamp = {Sun, 05 Nov 2023 16:51:21 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10096900},
}

@InProceedings{Gong2023a,
  author    = {Xun Gong and Wei Wang and Hang Shao and Xie Chen and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Factorized {AED:} Factorized Attention-Based Encoder-Decoder for Text-Only Domain Adaptive {ASR}},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/GongWSCQ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10095937},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10095937},
}

@InProceedings{Han2023a,
  author    = {Bing Han and Zhengyang Chen and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Exploring Binary Classification Loss for Speaker Verification},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/HanCQ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10094954},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10094954},
}

@InProceedings{Han2023b,
  author    = {Bing Han and Wen Huang and Zhengyang Chen and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2023 - Workshops, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Improving Dino-Based Self-Supervised Speaker Verification with Progressive Cluster-Aware Training},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/HanHCQ23.bib},
  doi       = {10.1109/ICASSPW59220.2023.10192957},
  timestamp = {Mon, 07 Aug 2023 15:56:26 +0200},
  url       = {https://doi.org/10.1109/ICASSPW59220.2023.10192957},
}

@InProceedings{Li2023,
  author    = {Jiahong Li and Chenda Li and Yifei Wu and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Robust Audio-Visual {ASR} with Unified Cross-Modal Attention},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiLWQ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10096893},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10096893},
}

@InProceedings{Li2023a,
  author    = {Chenda Li and Yao Qian and Zhuo Chen and Dongmei Wang and Takuya Yoshioka and Shujie Liu and Yanmin Qian and Michael Zeng},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Target Sound Extraction with Variable Cross-Modality Clues},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiQCWYLQZ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10095266},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10095266},
}

@InProceedings{Li2023b,
  author    = {Chenda Li and Yifei Wu and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Predictive Skim: Contrastive Predictive Coding for Low-Latency Online Speech Separation},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiWQ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10097107},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10097107},
}

@InProceedings{Liu2023,
  author    = {Tao Liu and Zhengyang Chen and Yanmin Qian and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Multi-Speaker End-to-End Multi-Modal Speaker Diarization System for the {MISP} 2022 Challenge},
  year      = {2023},
  pages     = {1--2},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiuCQY23.bib},
  doi       = {10.1109/ICASSP49357.2023.10096327},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10096327},
}

@InProceedings{Shao2023,
  author    = {Hang Shao and Tian Tan and Wei Wang and Xun Gong and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Joint Discriminator and Transfer Based Fast Domain Adaptation For End-To-End Speech Recognition},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ShaoTWGQ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10095910},
  timestamp = {Mon, 20 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10095910},
}

@InProceedings{Wang2023,
  author    = {Haoyu Wang and Bei Liu and Yifei Wu and Zhengyang Chen and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2023 - Workshops, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Lowbit Neural Network Quantization for Speaker Verification},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WangLWCQ23.bib},
  doi       = {10.1109/ICASSPW59220.2023.10193337},
  timestamp = {Mon, 07 Aug 2023 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSPW59220.2023.10193337},
}

@InProceedings{Wang2023a,
  author    = {Hongji Wang and Chengdong Liang and Shuai Wang and Zhengyang Chen and Binbin Zhang and Xu Xiang and Yanlei Deng and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Wespeaker: {A} Research and Production Oriented Speaker Embedding Learning Toolkit},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WangLWCZXDQ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10096626},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10096626},
}

@InProceedings{Wang2023b,
  author    = {Wei Wang and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {HuBERT-AGG: Aggregated Representation Distillation of Hidden-Unit Bert for Robust Speech Recognition},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WangQ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10096308},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10096308},
}

@InProceedings{Wu2023,
  author    = {Yifei Wu and Chenda Li and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2023 - Workshops, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Light-Weight Visualvoice: Neural Network Quantization On Audio Visual Speech Separation},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WuLQ23.bib},
  doi       = {10.1109/ICASSPW59220.2023.10193263},
  timestamp = {Mon, 07 Aug 2023 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSPW59220.2023.10193263},
}

@InProceedings{Yu2023,
  author    = {Haibin Yu and Yuxuan Hu and Yao Qian and Ma Jin and Linquan Liu and Shujie Liu and Yu Shi and Yanmin Qian and Edward Lin and Michael Zeng},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Code-Switching Text Generation and Injection in Mandarin-English {ASR}},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/YuHQJLLSQLZ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10096317},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10096317},
}

@InProceedings{Zhang2023,
  author    = {Leying Zhang and Zhengyang Chen and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Adaptive Large Margin Fine-Tuning For Robust Speaker Verification},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ZhangCQ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10094744},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10094744},
}

@InProceedings{Le2023,
  author    = {Chenyang Le and Yao Qian and Long Zhou and Shujie Liu and Yanmin Qian and Michael Zeng and Xuedong Huang},
  booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
  title     = {ComSL: {A} Composite Speech-Language Model for End-to-End Speech-to-Text Translation},
  year      = {2023},
  editor    = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/nips/LeQZLQ0023.bib},
  timestamp = {Fri, 01 Mar 2024 00:00:00 +0100},
  url       = {http://papers.nips.cc/paper\_files/paper/2023/hash/b6262f7a34e5d641cdb3d33dc9ad1a5a-Abstract-Conference.html},
}

@InProceedings{Masuyama2023,
  author    = {Yoshiki Masuyama and Xuankai Chang and Wangyou Zhang and Samuele Cornell and Zhong{-}Qiu Wang and Nobutaka Ono and Yanmin Qian and Shinji Watanabe},
  booktitle = {{IEEE} Workshop on Applications of Signal Processing to Audio and Acoustics, {WASPAA} 2023, New Paltz, NY, USA, October 22-25, 2023},
  title     = {Exploring the Integration of Speech Separation and Recognition with Self-Supervised Learning Representation},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/waspaa/MasuyamaCZCWOQW23.bib},
  doi       = {10.1109/WASPAA58266.2023.10248096},
  timestamp = {Sat, 23 Sep 2023 11:31:10 +0200},
  url       = {https://doi.org/10.1109/WASPAA58266.2023.10248096},
}

@Misc{Lu2023,
  author       = {Yen{-}Ju Lu and Xuankai Chang and Chenda Li and Wangyou Zhang and Samuele Cornell and Zhaoheng Ni and Yoshiki Masuyama and Brian Yan and Robin Scheibler and Zhong{-}Qiu Wang and Yu Tsao and Yanmin Qian and Shinji Watanabe},
  howpublished = {\url{https://doi.org/10.5281/zenodo.10048174}},
  month        = oct,
  note         = {Accessed on YYYY-MM-DD.},
  title        = {Software Design and User Interface of ESPnet-SE++: Speech Enhancement for Robust Speech Processing (espnet-v.202310) (Version 1)},
  year         = {2023},
  bibsource    = {dblp computer science bibliography, https://dblp.org},
  biburl       = {https://dblp.org/rec/data/10/LuCLZCNMYSWTQW23.bib},
  doi          = {10.5281/ZENODO.10048174},
  publisher    = {Zenodo},
  timestamp    = {Tue, 21 Nov 2023 00:00:00 +0100},
  url          = {https://doi.org/10.5281/zenodo.10048174},
}

@Article{Han2023c,
  author        = {Bing Han and Zhengyang Chen and Yanmin Qian},
  journal       = {CoRR},
  title         = {Self-Supervised Learning with Cluster-Aware-DINO for High-Performance Robust Speaker Verification},
  year          = {2023},
  volume        = {abs/2304.05754},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2304-05754.bib},
  doi           = {10.48550/ARXIV.2304.05754},
  eprint        = {2304.05754},
  timestamp     = {Wed, 19 Apr 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2304.05754},
}

@Article{Chen2023,
  author        = {Zhengyang Chen and Bing Han and Shuai Wang and Yanmin Qian},
  journal       = {CoRR},
  title         = {Attention-based Encoder-Decoder Network for End-to-End Neural Speaker Diarization with Target Speaker Attractor},
  year          = {2023},
  volume        = {abs/2305.10704},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2305-10704.bib},
  doi           = {10.48550/ARXIV.2305.10704},
  eprint        = {2305.10704},
  timestamp     = {Thu, 25 May 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2305.10704},
}

@Article{Shao2023a,
  author        = {Hang Shao and Wei Wang and Bei Liu and Xun Gong and Haoyu Wang and Yanmin Qian},
  journal       = {CoRR},
  title         = {Whisper-KDQ: {A} Lightweight Whisper via Guided Knowledge Distillation and Quantization for Efficient {ASR}},
  year          = {2023},
  volume        = {abs/2305.10788},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2305-10788.bib},
  doi           = {10.48550/ARXIV.2305.10788},
  eprint        = {2305.10788},
  timestamp     = {Thu, 25 May 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2305.10788},
}

@Article{Zhang2023a,
  author        = {Wangyou Zhang and Yanmin Qian},
  journal       = {CoRR},
  title         = {Weakly-Supervised Speech Pre-training: {A} Case Study on Target Speech Recognition},
  year          = {2023},
  volume        = {abs/2305.16286},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2305-16286.bib},
  doi           = {10.48550/ARXIV.2305.16286},
  eprint        = {2305.16286},
  timestamp     = {Tue, 06 Jun 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2305.16286},
}

@Article{Li2023d,
  author        = {Chenda Li and Yao Qian and Zhuo Chen and Naoyuki Kanda and Dongmei Wang and Takuya Yoshioka and Yanmin Qian and Michael Zeng},
  journal       = {CoRR},
  title         = {Adapting Multi-Lingual {ASR} Models for Handling Multiple Talkers},
  year          = {2023},
  volume        = {abs/2305.18747},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2305-18747.bib},
  doi           = {10.48550/ARXIV.2305.18747},
  eprint        = {2305.18747},
  timestamp     = {Sat, 04 Nov 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2305.18747},
}

@Article{Han2023e,
  author        = {Bing Han and Junyu Dai and Xuchen Song and Weituo Hao and Xinyan He and Dong Guo and Jitong Chen and Yuxuan Wang and Yanmin Qian},
  journal       = {CoRR},
  title         = {InstructME: An Instruction Guided Music Edit And Remix Framework with Latent Diffusion Models},
  year          = {2023},
  volume        = {abs/2308.14360},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2308-14360.bib},
  doi           = {10.48550/ARXIV.2308.14360},
  eprint        = {2308.14360},
  timestamp     = {Fri, 01 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2308.14360},
}

@Article{Chen2023a,
  author        = {Zhengyang Chen and Bing Han and Shuai Wang and Yanmin Qian},
  journal       = {CoRR},
  title         = {Attention-based Encoder-Decoder End-to-End Neural Diarization with Embedding Enhancer},
  year          = {2023},
  volume        = {abs/2309.06672},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-06672.bib},
  doi           = {10.48550/ARXIV.2309.06672},
  eprint        = {2309.06672},
  timestamp     = {Tue, 19 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2309.06672},
}

@Article{Ao2023,
  author        = {Junyi Ao and Mehmet Sinan Yildirim and Meng Ge and Shuai Wang and Ruijie Tao and Yanmin Qian and Liqun Deng and Longshuai Xiao and Haizhou Li},
  journal       = {CoRR},
  title         = {{USED:} Universal Speaker Extraction and Diarization},
  year          = {2023},
  volume        = {abs/2309.10674},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-10674.bib},
  doi           = {10.48550/ARXIV.2309.10674},
  eprint        = {2309.10674},
  timestamp     = {Mon, 25 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2309.10674},
}

@Article{Wang2023c,
  author        = {Shuai Wang and Qibing Bai and Qi Liu and Jianwei Yu and Zhengyang Chen and Bing Han and Yanmin Qian and Haizhou Li},
  journal       = {CoRR},
  title         = {Leveraging In-the-Wild Data for Effective Self-Supervised Pretraining in Speaker Recognition},
  year          = {2023},
  volume        = {abs/2309.11730},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-11730.bib},
  doi           = {10.48550/ARXIV.2309.11730},
  eprint        = {2309.11730},
  timestamp     = {Tue, 26 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2309.11730},
}

@Article{Liang2023,
  author        = {Yuhao Liang and Mohan Shi and Fan Yu and Yangze Li and Shiliang Zhang and Zhihao Du and Qian Chen and Lei Xie and Yanmin Qian and Jian Wu and Zhuo Chen and Kong Aik Lee and Zhijie Yan and Hui Bu},
  journal       = {CoRR},
  title         = {The second multi-channel multi-party meeting transcription challenge (M2MeT) 2.0): {A} benchmark for speaker-attributed {ASR}},
  year          = {2023},
  volume        = {abs/2309.13573},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-13573.bib},
  doi           = {10.48550/ARXIV.2309.13573},
  eprint        = {2309.13573},
  timestamp     = {Wed, 27 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2309.13573},
}

@Article{Zhang2023b,
  author        = {Leying Zhang and Yao Qian and Linfeng Yu and Heming Wang and Xinkai Wang and Hemin Yang and Long Zhou and Shujie Liu and Yanmin Qian and Michael Zeng},
  journal       = {CoRR},
  title         = {Diffusion Conditional Expectation Model for Efficient and Robust Target Speech Extraction},
  year          = {2023},
  volume        = {abs/2309.13874},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-13874.bib},
  doi           = {10.48550/ARXIV.2309.13874},
  eprint        = {2309.13874},
  timestamp     = {Sat, 21 Oct 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2309.13874},
}

@Article{Zhang2023c,
  author        = {Wangyou Zhang and Kohei Saijo and Zhong{-}Qiu Wang and Shinji Watanabe and Yanmin Qian},
  journal       = {CoRR},
  title         = {Toward Universal Speech Enhancement for Diverse Input Conditions},
  year          = {2023},
  volume        = {abs/2309.17384},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-17384.bib},
  doi           = {10.48550/ARXIV.2309.17384},
  eprint        = {2309.17384},
  timestamp     = {Tue, 17 Oct 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2309.17384},
}

@Article{Shao2023b,
  author        = {Hang Shao and Bei Liu and Yanmin Qian},
  journal       = {CoRR},
  title         = {One-Shot Sensitivity-Aware Mixed Sparsity Pruning for Large Language Models},
  year          = {2023},
  volume        = {abs/2310.09499},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2310-09499.bib},
  doi           = {10.48550/ARXIV.2310.09499},
  eprint        = {2310.09499},
  timestamp     = {Wed, 25 Oct 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2310.09499},
}

@Article{Yang2023,
  author        = {Dongning Yang and Wei Wang and Yanmin Qian},
  journal       = {CoRR},
  title         = {FAT-HuBERT: Front-end Adaptive Training of Hidden-unit {BERT} for Distortion-Invariant Robust Speech Recognition},
  year          = {2023},
  volume        = {abs/2311.17790},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2311-17790.bib},
  doi           = {10.48550/ARXIV.2311.17790},
  eprint        = {2311.17790},
  timestamp     = {Tue, 05 Dec 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2311.17790},
}

@Article{Chen2022a,
  author    = {Sanyuan Chen and Chengyi Wang and Zhengyang Chen and Yu Wu and Shujie Liu and Zhuo Chen and Jinyu Li and Naoyuki Kanda and Takuya Yoshioka and Xiong Xiao and Jian Wu and Long Zhou and Shuo Ren and Yanmin Qian and Yao Qian and Jian Wu and Michael Zeng and Xiangzhan Yu and Furu Wei},
  journal   = {{IEEE} J. Sel. Top. Signal Process.},
  title     = {WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing},
  year      = {2022},
  number    = {6},
  pages     = {1505--1518},
  volume    = {16},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/jstsp/ChenWCWLCLKYXWZ22.bib},
  doi       = {10.1109/JSTSP.2022.3188113},
  timestamp = {Sun, 13 Nov 2022 00:00:00 +0100},
  url       = {https://doi.org/10.1109/JSTSP.2022.3188113},
}

@Article{Qian2022,
  author    = {Yanmin Qian and Zhikai Zhou},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Optimizing Data Usage for Low-Resource Speech Recognition},
  year      = {2022},
  pages     = {394--403},
  volume    = {30},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/QianZ22.bib},
  doi       = {10.1109/TASLP.2022.3140552},
  timestamp = {Tue, 08 Feb 2022 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2022.3140552},
}

@Article{Li2022,
  author    = {Chenda Li and Zhuo Chen and Yanmin Qian},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Dual-Path Modeling With Memory Embedding Model for Continuous Speech Separation},
  year      = {2022},
  pages     = {1508--1520},
  volume    = {30},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/LiCQ22.bib},
  doi       = {10.1109/TASLP.2022.3165712},
  timestamp = {Wed, 18 May 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2022.3165712},
}

@Article{Qian2022a,
  author    = {Yanmin Qian and Xun Gong and Houjun Huang},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Layer-Wise Fast Adaptation for End-to-End Multi-Accent Speech Recognition},
  year      = {2022},
  pages     = {2842--2853},
  volume    = {30},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/QianGH22.bib},
  doi       = {10.1109/TASLP.2022.3198546},
  timestamp = {Fri, 23 Sep 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2022.3198546},
}

@Article{Zhang2022,
  author    = {Wangyou Zhang and Xuankai Chang and Christoph B{\"{o}}ddeker and Tomohiro Nakatani and Shinji Watanabe and Yanmin Qian},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {End-to-End Dereverberation, Beamforming, and Speech Recognition in a Cocktail Party},
  year      = {2022},
  pages     = {3173--3188},
  volume    = {30},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/ZhangCBNWQ22.bib},
  doi       = {10.1109/TASLP.2022.3209942},
  timestamp = {Mon, 05 Dec 2022 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2022.3209942},
}

@InProceedings{Wu2022,
  author    = {Yifei Wu and Chenda Li and Jinfeng Bai and Zhongqin Wu and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Time-Domain Audio-Visual Speech Separation on Low Quality Videos},
  year      = {2022},
  pages     = {256--260},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WuLBWQ22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746866},
  timestamp = {Tue, 07 Jun 2022 17:34:47 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746866},
}

@InProceedings{Li2022a,
  author    = {Chenda Li and Lei Yang and Weiqin Wang and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Skim: Skipping Memory Lstm for Low-Latency Real-Time Continuous Speech Separation},
  year      = {2022},
  pages     = {681--685},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiYWQ22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746372},
  timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746372},
}

@InProceedings{Chen2022b,
  author    = {Zhengyang Chen and Sanyuan Chen and Yu Wu and Yao Qian and Chengyi Wang and Shujie Liu and Yanmin Qian and Michael Zeng},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Large-Scale Self-Supervised Speech Representation Learning for Automatic Speaker Verification},
  year      = {2022},
  pages     = {6147--6151},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ChenCWQWLQZ22.bib},
  doi       = {10.1109/ICASSP43922.2022.9747814},
  timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9747814},
}

@InProceedings{Han2022,
  author    = {Bing Han and Zhengyang Chen and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Local Information Modeling with Self-Attention for Speaker Verification},
  year      = {2022},
  pages     = {6727--6731},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/HanCQ22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746050},
  timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746050},
}

@InProceedings{Zhou2022,
  author    = {Zhikai Zhou and Tian Tan and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Punctuation Prediction for Streaming On-Device Speech Recognition},
  year      = {2022},
  pages     = {7277--7281},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ZhouTQ22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746366},
  timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746366},
}

@InProceedings{Han2022a,
  author    = {Bing Han and Zhengyang Chen and Bei Liu and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {{MLP-SVNET:} {A} Multi-Layer Perceptrons Based Network for Speaker Verification},
  year      = {2022},
  pages     = {7522--7526},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/HanCLQ22.bib},
  doi       = {10.1109/ICASSP43922.2022.9747172},
  timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9747172},
}

@InProceedings{Liu2022,
  author    = {Bei Liu and Haoyu Wang and Zhengyang Chen and Shuai Wang and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Self-Knowledge Distillation via Feature Enhancement for Speaker Verification},
  year      = {2022},
  pages     = {7542--7546},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiuWCWQ22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746529},
  timestamp = {Fri, 10 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746529},
}

@InProceedings{Wang2022,
  author    = {Wei Wang and Shuo Ren and Yao Qian and Shujie Liu and Yu Shi and Yanmin Qian and Michael Zeng},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Optimizing Alignment of Speech and Language Latent Spaces for End-To-End Speech Recognition and Understanding},
  year      = {2022},
  pages     = {7802--7806},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WangRQLSQZ22.bib},
  doi       = {10.1109/ICASSP43922.2022.9747760},
  timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9747760},
}

@InProceedings{Zhou2022a,
  author    = {Zhikai Zhou and Wei Wang and Wangyou Zhang and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Exploring Effective Data Utilization for Low-Resource Speech Recognition},
  year      = {2022},
  pages     = {8192--8196},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ZhouWZQ22.bib},
  doi       = {10.1109/ICASSP43922.2022.9747543},
  timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9747543},
}

@InProceedings{Yu2022,
  author    = {Fan Yu and Shiliang Zhang and Pengcheng Guo and Yihui Fu and Zhihao Du and Siqi Zheng and Weilong Huang and Lei Xie and Zheng{-}Hua Tan and DeLiang Wang and Yanmin Qian and Kong Aik Lee and Zhijie Yan and Bin Ma and Xin Xu and Hui Bu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Summary on the {ICASSP} 2022 Multi-Channel Multi-Party Meeting Transcription Grand Challenge},
  year      = {2022},
  pages     = {9156--9160},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/YuZGFDZHXTWQLYM22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746270},
  timestamp = {Mon, 13 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746270},
}

@InProceedings{Wang2022a,
  author    = {Wei Wang and Xun Gong and Yifei Wu and Zhikai Zhou and Chenda Li and Wangyou Zhang and Bing Han and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {The Sjtu System For Multimodal Information Based Speech Processing Challenge 2021},
  year      = {2022},
  pages     = {9261--9265},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WangGWZLZHQ22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746874},
  timestamp = {Fri, 23 Sep 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746874},
}

@InProceedings{Liu2022a,
  author    = {Bei Liu and Zhengyang Chen and Yanmin Qian},
  booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
  title     = {Attentive Feature Fusion for Robust Speaker Verification},
  year      = {2022},
  editor    = {Hanseok Ko and John H. L. Hansen},
  pages     = {286--290},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/LiuCQ22.bib},
  doi       = {10.21437/INTERSPEECH.2022-478},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2022-478},
}

@InProceedings{Liu2022b,
  author    = {Bei Liu and Zhengyang Chen and Yanmin Qian},
  booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
  title     = {Dual Path Embedding Learning for Speaker Verification with Triplet Attention},
  year      = {2022},
  editor    = {Hanseok Ko and John H. L. Hansen},
  pages     = {291--295},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/LiuCQ22a.bib},
  doi       = {10.21437/INTERSPEECH.2022-481},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2022-481},
}

@InProceedings{Liu2022c,
  author    = {Bei Liu and Zhengyang Chen and Shuai Wang and Haoyu Wang and Bing Han and Yanmin Qian},
  booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
  title     = {DF-ResNet: Boosting Speaker Verification Performance with Depth-First Design},
  year      = {2022},
  editor    = {Hanseok Ko and John H. L. Hansen},
  pages     = {296--300},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/LiuCWWHQ22.bib},
  doi       = {10.21437/INTERSPEECH.2022-484},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2022-484},
}

@InProceedings{Zhang2022a,
  author    = {Leying Zhang and Zhengyang Chen and Yanmin Qian},
  booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
  title     = {Enroll-Aware Attentive Statistics Pooling for Target Speaker Verification},
  year      = {2022},
  editor    = {Hanseok Ko and John H. L. Hansen},
  pages     = {311--315},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ZhangCQ22.bib},
  doi       = {10.21437/INTERSPEECH.2022-645},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2022-645},
}

@InProceedings{Liu2022d,
  author    = {Tao Liu and Shuai Fan and Xu Xiang and Hongbo Song and Shaoxiong Lin and Jiaqi Sun and Tianyuan Han and Siyuan Chen and Binwei Yao and Sen Liu and Yifei Wu and Yanmin Qian and Kai Yu},
  booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
  title     = {MSDWild: Multi-modal Speaker Diarization Dataset in the Wild},
  year      = {2022},
  editor    = {Hanseok Ko and John H. L. Hansen},
  pages     = {1476--1480},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/Liu0XSLSHCYLWQ022.bib},
  doi       = {10.21437/INTERSPEECH.2022-10466},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2022-10466},
}

@InProceedings{Gong2022,
  author    = {Xun Gong and Zhikai Zhou and Yanmin Qian},
  booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
  title     = {Knowledge Transfer and Distillation from Autoregressive to Non-Autoregessive Speech Recognition},
  year      = {2022},
  editor    = {Hanseok Ko and John H. L. Hansen},
  pages     = {2618--2622},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/0005ZQ22.bib},
  doi       = {10.21437/INTERSPEECH.2022-632},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2022-632},
}

@InProceedings{Han2022b,
  author    = {Bing Han and Zhengyang Chen and Yanmin Qian},
  booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
  title     = {Self-Supervised Speaker Verification Using Dynamic Loss-Gate and Label Correction},
  year      = {2022},
  editor    = {Hanseok Ko and John H. L. Hansen},
  pages     = {4780--4784},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/HanCQ22.bib},
  doi       = {10.21437/INTERSPEECH.2022-742},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2022-742},
}

@InProceedings{Zhang2022b,
  author    = {Wangyou Zhang and Zhuo Chen and Naoyuki Kanda and Shujie Liu and Jinyu Li and Sefik Emre Eskimez and Takuya Yoshioka and Xiong Xiao and Zhong Meng and Yanmin Qian and Furu Wei},
  booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
  title     = {Separating Long-Form Speech with Group-wise Permutation Invariant Training},
  year      = {2022},
  editor    = {Hanseok Ko and John H. L. Hansen},
  pages     = {5383--5387},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/Zhang0K00EYXMQW22.bib},
  doi       = {10.21437/INTERSPEECH.2022-10362},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2022-10362},
}

@InProceedings{Lu2022,
  author    = {Yen{-}Ju Lu and Xuankai Chang and Chenda Li and Wangyou Zhang and Samuele Cornell and Zhaoheng Ni and Yoshiki Masuyama and Brian Yan and Robin Scheibler and Zhong{-}Qiu Wang and Yu Tsao and Yanmin Qian and Shinji Watanabe},
  booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
  title     = {ESPnet-SE++: Speech Enhancement for Robust Speech Recognition, Translation, and Understanding},
  year      = {2022},
  editor    = {Hanseok Ko and John H. L. Hansen},
  pages     = {5458--5462},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/LuCLZCNMYSW0Q022.bib},
  doi       = {10.21437/INTERSPEECH.2022-10727},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2022-10727},
}

@InProceedings{Qu2022,
  author    = {Bowen Qu and Chenda Li and Jinfeng Bai and Yanmin Qian},
  booktitle = {13th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2022, Singapore, December 11-14, 2022},
  title     = {Improving Speech Separation with Knowledge Distilled from Self-supervised Pre-trained Models},
  year      = {2022},
  editor    = {Kong Aik Lee and Hung{-}yi Lee and Yanfeng Lu and Minghui Dong},
  pages     = {329--333},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/QuLBQ22.bib},
  doi       = {10.1109/ISCSLP57327.2022.10038203},
  timestamp = {Fri, 17 Feb 2023 18:27:20 +0100},
  url       = {https://doi.org/10.1109/ISCSLP57327.2022.10038203},
}

@InProceedings{Wang2022b,
  author    = {Wei Wang and Wangyou Zhang and Shaoxiong Lin and Yanmin Qian},
  booktitle = {13th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2022, Singapore, December 11-14, 2022},
  title     = {Text-Informed Knowledge Distillation for Robust Speech Enhancement and Recognition},
  year      = {2022},
  editor    = {Kong Aik Lee and Hung{-}yi Lee and Yanfeng Lu and Minghui Dong},
  pages     = {334--338},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/WangZLQ22.bib},
  doi       = {10.1109/ISCSLP57327.2022.10037802},
  timestamp = {Fri, 17 Feb 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ISCSLP57327.2022.10037802},
}

@InProceedings{Zhou2022b,
  author    = {Zhikai Zhou and Shuang Cao and Zhengyang Chen and Bei Liu and Ming Xia and Hong Jiang and Yanmin Qian},
  booktitle = {13th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2022, Singapore, December 11-14, 2022},
  title     = {Medical Difficult Airway Detection using Speech Technology},
  year      = {2022},
  editor    = {Kong Aik Lee and Hung{-}yi Lee and Yanfeng Lu and Minghui Dong},
  pages     = {349--353},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/ZhouCCLXJQ22.bib},
  doi       = {10.1109/ISCSLP57327.2022.10037911},
  timestamp = {Fri, 17 Feb 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ISCSLP57327.2022.10037911},
}

@InProceedings{Huang2022,
  author    = {Houjun Huang and Yanmin Qian},
  booktitle = {13th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2022, Singapore, December 11-14, 2022},
  title     = {Speaking style compensation on synthetic audio for robust keyword spotting},
  year      = {2022},
  editor    = {Kong Aik Lee and Hung{-}yi Lee and Yanfeng Lu and Minghui Dong},
  pages     = {448--452},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/HuangQ22.bib},
  doi       = {10.1109/ISCSLP57327.2022.10038031},
  timestamp = {Mon, 13 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ISCSLP57327.2022.10038031},
}

@InProceedings{Cheng2022,
  author    = {Gaofeng Cheng and Yifan Chen and Runyan Yang and Qingxuan Li and Zehui Yang and Lingxuan Ye and Pengyuan Zhang and Qingqing Zhang and Lei Xie and Yanmin Qian and Kong Aik Lee and Yonghong Yan},
  booktitle = {13th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2022, Singapore, December 11-14, 2022},
  title     = {The Conversational Short-phrase Speaker Diarization {(CSSD)} Task: Dataset, Evaluation Metric and Baselines},
  year      = {2022},
  editor    = {Kong Aik Lee and Hung{-}yi Lee and Yanfeng Lu and Minghui Dong},
  pages     = {488--492},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/ChengCYLYYZZXQLY22.bib},
  doi       = {10.1109/ISCSLP57327.2022.10038258},
  timestamp = {Tue, 14 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ISCSLP57327.2022.10038258},
}

@InProceedings{Liu2022e,
  author    = {Tao Liu and Xu Xiang and Zhengyang Chen and Bing Han and Kai Yu and Yanmin Qian},
  booktitle = {13th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2022, Singapore, December 11-14, 2022},
  title     = {The X-Lance Speaker Diarization System for the Conversational Short-phrase Speaker Diarization Challenge 2022},
  year      = {2022},
  editor    = {Kong Aik Lee and Hung{-}yi Lee and Yanfeng Lu and Minghui Dong},
  pages     = {498--501},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/LiuXCHYQ22.bib},
  doi       = {10.1109/ISCSLP57327.2022.10037955},
  timestamp = {Fri, 17 Feb 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ISCSLP57327.2022.10037955},
}

@InProceedings{Scheibler2022,
  author    = {Robin Scheibler and Wangyou Zhang and Xuankai Chang and Shinji Watanabe and Yanmin Qian},
  booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2022, Doha, Qatar, January 9-12, 2023},
  title     = {End-to-End Multi-Speaker {ASR} with Independent Vector Analysis},
  year      = {2022},
  pages     = {496--501},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/slt/ScheiblerZCWQ22.bib},
  doi       = {10.1109/SLT54892.2023.10023037},
  timestamp = {Tue, 21 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/SLT54892.2023.10023037},
}

@InProceedings{Chen2022c,
  author    = {Zhengyang Chen and Yao Qian and Bing Han and Yanmin Qian and Michael Zeng},
  booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2022, Doha, Qatar, January 9-12, 2023},
  title     = {A Comprehensive Study on Self-Supervised Distillation for Speaker Representation Learning},
  year      = {2022},
  pages     = {599--604},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/slt/ChenQHQZ22.bib},
  doi       = {10.1109/SLT54892.2023.10022470},
  timestamp = {Mon, 06 Feb 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/SLT54892.2023.10022470},
}

@Article{Chen2022d,
  author        = {Zhengyang Chen and Bei Liu and Bing Han and Leying Zhang and Yanmin Qian},
  journal       = {CoRR},
  title         = {The {SJTU} {X-LANCE} Lab System for {CNSRC} 2022},
  year          = {2022},
  volume        = {abs/2206.11699},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2206-11699.bib},
  doi           = {10.48550/ARXIV.2206.11699},
  eprint        = {2206.11699},
  timestamp     = {Mon, 27 Jun 2022 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2206.11699},
}

@Article{Chen2022e,
  author        = {Zhengyang Chen and Bing Han and Xu Xiang and Houjun Huang and Bei Liu and Yanmin Qian},
  journal       = {CoRR},
  title         = {{SJTU-AISPEECH} System for VoxCeleb Speaker Recognition Challenge 2022},
  year          = {2022},
  volume        = {abs/2209.09076},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2209-09076.bib},
  doi           = {10.48550/ARXIV.2209.09076},
  eprint        = {2209.09076},
  timestamp     = {Wed, 07 Dec 2022 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2209.09076},
}

@Article{Chen2022g,
  author        = {Zhengyang Chen and Bing Han and Xu Xiang and Houjun Huang and Bei Liu and Yanmin Qian},
  journal       = {CoRR},
  title         = {Build a {SRE} Challenge System: Lessons from VoxSRC 2022 and {CNSRC} 2022},
  year          = {2022},
  volume        = {abs/2211.00815},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2211-00815.bib},
  doi           = {10.48550/ARXIV.2211.00815},
  eprint        = {2211.00815},
  timestamp     = {Wed, 07 Dec 2022 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2211.00815},
}

@Article{Yang2021,
  author    = {Jichen Yang and Hongji Wang and Rohan Kumar Das and Yanmin Qian},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Modified Magnitude-Phase Spectrum Information for Spoofing Detection},
  year      = {2021},
  pages     = {1065--1078},
  volume    = {29},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/YangWDQ21.bib},
  doi       = {10.1109/TASLP.2021.3060810},
  timestamp = {Tue, 23 Mar 2021 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2021.3060810},
}

@Article{Qian2021,
  author    = {Yanmin Qian and Zhengyang Chen and Shuai Wang},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Audio-Visual Deep Neural Network for Robust Person Verification},
  year      = {2021},
  pages     = {1079--1092},
  volume    = {29},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/QianCW21.bib},
  doi       = {10.1109/TASLP.2021.3057230},
  timestamp = {Thu, 29 Apr 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2021.3057230},
}

@InProceedings{Li2021,
  author    = {Chenda Li and Zhuo Chen and Yi Luo and Cong Han and Tianyan Zhou and Keisuke Kinoshita and Marc Delcroix and Shinji Watanabe and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {Dual-Path Modeling for Long Recording Speech Separation in Meetings},
  year      = {2021},
  pages     = {5739--5743},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiCLHZKD0Q21.bib},
  doi       = {10.1109/ICASSP39728.2021.9414127},
  timestamp = {Sun, 12 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9414127},
}

@InProceedings{Chen2021,
  author    = {Zhengyang Chen and Shuai Wang and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {Self-Supervised Learning Based Domain Adaptation for Robust Speaker Verification},
  year      = {2021},
  pages     = {5834--5838},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ChenWQ21.bib},
  doi       = {10.1109/ICASSP39728.2021.9414261},
  timestamp = {Fri, 10 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9414261},
}

@InProceedings{Du2021,
  author    = {Chenpeng Du and Bing Han and Shuai Wang and Yanmin Qian and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {SynAug: Synthesis-Based Data Augmentation for Text-Dependent Speaker Verification},
  year      = {2021},
  pages     = {5844--5848},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/DuHWQ021.bib},
  doi       = {10.1109/ICASSP39728.2021.9414438},
  timestamp = {Tue, 14 Dec 2021 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9414438},
}

@InProceedings{Huang2021,
  author    = {Houjun Huang and Xu Xiang and Fei Zhao and Shuai Wang and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {Unit Selection Synthesis Based Data Augmentation for Fixed Phrase Speaker Verification},
  year      = {2021},
  pages     = {5849--5853},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/HuangXZWQ21.bib},
  doi       = {10.1109/ICASSP39728.2021.9414550},
  timestamp = {Fri, 10 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9414550},
}

@InProceedings{Huang2021a,
  author    = {Houjun Huang and Xu Xiang and Yexin Yang and Rao Ma and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {AISpeech-SJTU Accent Identification System for the Accented English Speech Recognition Challenge},
  year      = {2021},
  pages     = {6254--6258},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/HuangXYMQ21.bib},
  doi       = {10.1109/ICASSP39728.2021.9414292},
  timestamp = {Sun, 02 Oct 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9414292},
}

@InProceedings{Tan2021,
  author    = {Tian Tan and Yizhou Lu and Rao Ma and Sen Zhu and Jiaqi Guo and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {AISpeech-SJTU {ASR} System for the Accented English Speech Recognition Challenge},
  year      = {2021},
  pages     = {6413--6417},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/0002LMZGQ21.bib},
  doi       = {10.1109/ICASSP39728.2021.9414471},
  timestamp = {Fri, 09 Jul 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9414471},
}

@InProceedings{Wang2021,
  author    = {Wei Wang and Zhikai Zhou and Yizhou Lu and Hongji Wang and Chenpeng Du and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {Towards Data Selection on {TTS} Data for Children's Speech Recognition},
  year      = {2021},
  pages     = {6888--6892},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WangZLWDQ21.bib},
  doi       = {10.1109/ICASSP39728.2021.9413930},
  timestamp = {Thu, 14 Oct 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9413930},
}

@InProceedings{Zhang2021,
  author    = {Wangyou Zhang and Christoph B{\"{o}}ddeker and Shinji Watanabe and Tomohiro Nakatani and Marc Delcroix and Keisuke Kinoshita and Tsubasa Ochiai and Naoyuki Kamo and Reinhold Haeb{-}Umbach and Yanmin Qian},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend},
  year      = {2021},
  pages     = {6898--6902},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ZhangB0NDKOKHQ21.bib},
  doi       = {10.1109/ICASSP39728.2021.9414464},
  timestamp = {Sun, 12 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9414464},
}

@InProceedings{Shi2021,
  author    = {Xian Shi and Fan Yu and Yizhou Lu and Yuhao Liang and Qiangze Feng and Daliang Wang and Yanmin Qian and Lei Xie},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {The Accented English Speech Recognition Challenge 2020: Open Datasets, Tracks, Baselines, Results and Methods},
  year      = {2021},
  pages     = {6918--6922},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ShiYLLFWQX21.bib},
  doi       = {10.1109/ICASSP39728.2021.9413386},
  timestamp = {Mon, 31 Oct 2022 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9413386},
}

@InProceedings{Boeddeker2021,
  author    = {Christoph B{\"{o}}ddeker and Wangyou Zhang and Tomohiro Nakatani and Keisuke Kinoshita and Tsubasa Ochiai and Marc Delcroix and Naoyuki Kamo and Yanmin Qian and Reinhold Haeb{-}Umbach},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {Convolutive Transfer Function Invariant {SDR} Training Criteria for Multi-Channel Reverberant Speech Separation},
  year      = {2021},
  pages     = {8428--8432},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/BoddekerZNKODKQ21.bib},
  doi       = {10.1109/ICASSP39728.2021.9414661},
  timestamp = {Sun, 12 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9414661},
}

@InProceedings{Gong2021,
  author    = {Xun Gong and Yizhou Lu and Zhikai Zhou and Yanmin Qian},
  booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech Communication Association, Brno, Czechia, 30 August - 3 September 2021},
  title     = {Layer-Wise Fast Adaptation for End-to-End Multi-Accent Speech Recognition},
  year      = {2021},
  editor    = {Hynek Hermansky and Honza Cernock{\'{y}} and Luk{\'{a}}s Burget and Lori Lamel and Odette Scharenborg and Petr Motl{\'{\i}}cek},
  pages     = {1274--1278},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/GongLZQ21.bib},
  doi       = {10.21437/INTERSPEECH.2021-1075},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2021-1075},
}

@InProceedings{Zhang2021a,
  author    = {Leying Zhang and Zhengyang Chen and Yanmin Qian},
  booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech Communication Association, Brno, Czechia, 30 August - 3 September 2021},
  title     = {Knowledge Distillation from Multi-Modality to Single-Modality for Person Verification},
  year      = {2021},
  editor    = {Hynek Hermansky and Honza Cernock{\'{y}} and Luk{\'{a}}s Burget and Lori Lamel and Odette Scharenborg and Petr Motl{\'{\i}}cek},
  pages     = {1897--1901},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ZhangCQ21.bib},
  doi       = {10.21437/INTERSPEECH.2021-2119},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2021-2119},
}

@InProceedings{Liu2021,
  author    = {Zhengxi Liu and Yanmin Qian},
  booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech Communication Association, Brno, Czechia, 30 August - 3 September 2021},
  title     = {Basis-MelGAN: Efficient Neural Vocoder Based on Audio Decomposition},
  year      = {2021},
  editor    = {Hynek Hermansky and Honza Cernock{\'{y}} and Luk{\'{a}}s Burget and Lori Lamel and Odette Scharenborg and Petr Motl{\'{\i}}cek},
  pages     = {2222--2226},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/LiuQ21.bib},
  doi       = {10.21437/INTERSPEECH.2021-2173},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2021-2173},
}

@InProceedings{Han2021,
  author    = {Bing Han and Zhengyang Chen and Zhikai Zhou and Yanmin Qian},
  booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech Communication Association, Brno, Czechia, 30 August - 3 September 2021},
  title     = {The {SJTU} System for Short-Duration Speaker Verification Challenge 2021},
  year      = {2021},
  editor    = {Hynek Hermansky and Honza Cernock{\'{y}} and Luk{\'{a}}s Burget and Lori Lamel and Odette Scharenborg and Petr Motl{\'{\i}}cek},
  pages     = {2332--2336},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/HanCZQ21.bib},
  doi       = {10.21437/INTERSPEECH.2021-2136},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2021-2136},
}

@InProceedings{Wu2021,
  author    = {Yifei Wu and Chenda Li and Song Yang and Zhongqin Wu and Yanmin Qian},
  booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech Communication Association, Brno, Czechia, 30 August - 3 September 2021},
  title     = {Audio-Visual Multi-Talker Speech Recognition in a Cocktail Party},
  year      = {2021},
  editor    = {Hynek Hermansky and Honza Cernock{\'{y}} and Luk{\'{a}}s Burget and Lori Lamel and Odette Scharenborg and Petr Motl{\'{\i}}cek},
  pages     = {3021--3025},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/WuLYWQ21.bib},
  doi       = {10.21437/INTERSPEECH.2021-2128},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2021-2128},
}

@InProceedings{Gong2021a,
  author    = {Xun Gong and Zhengyang Chen and Yexin Yang and Shuai Wang and Lan Wang and Yanmin Qian},
  booktitle = {12th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2021, Hong Kong, January 24-27, 2021},
  title     = {Speaker Embedding Augmentation with Noise Distribution Matching},
  year      = {2021},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/GongCYWWQ21.bib},
  doi       = {10.1109/ISCSLP49672.2021.9362090},
  timestamp = {Fri, 10 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ISCSLP49672.2021.9362090},
}

@InProceedings{Wang2021a,
  author    = {Shuai Wang and Yexin Yang and Yanmin Qian and Kai Yu},
  booktitle = {12th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2021, Hong Kong, January 24-27, 2021},
  title     = {Revisiting the Statistics Pooling Layer in Deep Speaker Embedding Learning},
  year      = {2021},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/WangYQ021.bib},
  doi       = {10.1109/ISCSLP49672.2021.9362097},
  timestamp = {Tue, 14 Dec 2021 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ISCSLP49672.2021.9362097},
}

@InProceedings{Du2021a,
  author    = {Chenpeng Du and Hao Li and Yizhou Lu and Lan Wang and Yanmin Qian},
  booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen, China, January 19-22, 2021},
  title     = {Data Augmentation for end-to-end Code-Switching Speech Recognition},
  year      = {2021},
  pages     = {194--200},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/slt/DuLLWQ21.bib},
  doi       = {10.1109/SLT48900.2021.9383620},
  timestamp = {Thu, 14 Oct 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1109/SLT48900.2021.9383620},
}

@InProceedings{Li2021a,
  author    = {Chenda Li and Yi Luo and Cong Han and Jinyu Li and Takuya Yoshioka and Tianyan Zhou and Marc Delcroix and Keisuke Kinoshita and Christoph B{\"{o}}ddeker and Yanmin Qian and Shinji Watanabe and Zhuo Chen},
  booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen, China, January 19-22, 2021},
  title     = {Dual-Path {RNN} for Long Recording Speech Separation},
  year      = {2021},
  pages     = {865--872},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/slt/LiLHLYZDKBQ0C21.bib},
  doi       = {10.1109/SLT48900.2021.9383514},
  timestamp = {Sun, 12 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/SLT48900.2021.9383514},
}

@InProceedings{Zhang2021b,
  author    = {Wangyou Zhang and Jing Shi and Chenda Li and Shinji Watanabe and Yanmin Qian},
  booktitle = {{IEEE} Workshop on Applications of Signal Processing to Audio and Acoustics, {WASPAA} 2021, New Paltz, NY, USA, October 17-20, 2021},
  title     = {Closing the Gap Between Time-Domain Multi-Channel Speech Enhancement on Real and Simulation Conditions},
  year      = {2021},
  pages     = {146--150},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/waspaa/ZhangSLWQ21.bib},
  doi       = {10.1109/WASPAA52581.2021.9632720},
  timestamp = {Thu, 23 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/WASPAA52581.2021.9632720},
}

@Article{Zhang2020,
  author    = {Wangyou Zhang and Xuankai Chang and Yanmin Qian and Shinji Watanabe},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Improving End-to-End Single-Channel Multi-Talker Speech Recognition},
  year      = {2020},
  pages     = {1385--1394},
  volume    = {28},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/ZhangCQW20.bib},
  doi       = {10.1109/TASLP.2020.2988423},
  timestamp = {Fri, 26 Jun 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2020.2988423},
}

@Article{Wang2020,
  author    = {Shuai Wang and Yexin Yang and Zhanghao Wu and Yanmin Qian and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Data Augmentation Using Deep Generative Models for Embedding Based Speaker Recognition},
  year      = {2020},
  pages     = {2598--2609},
  volume    = {28},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/WangYWQY20.bib},
  doi       = {10.1109/TASLP.2020.3016498},
  timestamp = {Tue, 06 Oct 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2020.3016498},
}

@InProceedings{Chang2020,
  author    = {Xuankai Chang and Wangyou Zhang and Yanmin Qian and Jonathan Le Roux and Shinji Watanabe},
  booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  title     = {End-To-End Multi-Speaker Speech Recognition With Transformer},
  year      = {2020},
  pages     = {6134--6138},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ChangZQRW20.bib},
  doi       = {10.1109/ICASSP40776.2020.9054029},
  timestamp = {Tue, 29 Dec 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP40776.2020.9054029},
}

@InProceedings{Yang2020,
  author    = {Yexin Yang and Shuai Wang and Xun Gong and Yanmin Qian and Kai Yu},
  booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  title     = {Text Adaptation for Speaker Verification with Speaker-Text Factorized Embeddings},
  year      = {2020},
  pages     = {6454--6458},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/Yang0GQ020.bib},
  doi       = {10.1109/ICASSP40776.2020.9054333},
  timestamp = {Fri, 23 Sep 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP40776.2020.9054333},
}

@InProceedings{Chen2020,
  author    = {Zhengyang Chen and Shuai Wang and Yanmin Qian and Kai Yu},
  booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  title     = {Channel Invariant Speaker Embedding Learning with Joint Multi-Task and Adversarial Training},
  year      = {2020},
  pages     = {6574--6578},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/Chen0Q020.bib},
  doi       = {10.1109/ICASSP40776.2020.9053905},
  timestamp = {Thu, 23 Jul 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP40776.2020.9053905},
}

@InProceedings{Li2020,
  author    = {Chenda Li and Yanmin Qian},
  booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  title     = {Deep Audio-Visual Speech Separation with Attention Mechanism},
  year      = {2020},
  pages     = {7314--7318},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiQ20.bib},
  doi       = {10.1109/ICASSP40776.2020.9054180},
  timestamp = {Thu, 23 Jul 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP40776.2020.9054180},
}

@InProceedings{Zhang2020a,
  author    = {Wangyou Zhang and Yanmin Qian},
  booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25-29 October 2020},
  title     = {Learning Contextual Language Embeddings for Monaural Multi-Talker Speech Recognition},
  year      = {2020},
  editor    = {Helen Meng and Bo Xu and Thomas Fang Zheng},
  pages     = {304--308},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ZhangQ20.bib},
  doi       = {10.21437/INTERSPEECH.2020-2015},
  timestamp = {Fri, 29 Jan 2021 17:40:16 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2020-2015},
}

@InProceedings{Zhang2020b,
  author    = {Wangyou Zhang and Aswin Shanmugam Subramanian and Xuankai Chang and Shinji Watanabe and Yanmin Qian},
  booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25-29 October 2020},
  title     = {End-to-End Far-Field Speech Recognition with Unified Dereverberation and Beamforming},
  year      = {2020},
  editor    = {Helen Meng and Bo Xu and Thomas Fang Zheng},
  pages     = {324--328},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ZhangSC0Q20.bib},
  doi       = {10.21437/INTERSPEECH.2020-2432},
  timestamp = {Sun, 02 Oct 2022 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2020-2432},
}

@InProceedings{Wang2020a,
  author    = {Hongji Wang and Heinrich Dinkel and Shuai Wang and Yanmin Qian and Kai Yu},
  booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25-29 October 2020},
  title     = {Dual-Adversarial Domain Adaptation for Generalized Replay Attack Detection},
  year      = {2020},
  editor    = {Helen Meng and Bo Xu and Thomas Fang Zheng},
  pages     = {1086--1090},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/WangD0Q020.bib},
  doi       = {10.21437/INTERSPEECH.2020-1255},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2020-1255},
}

@InProceedings{Li2020a,
  author    = {Chenda Li and Yanmin Qian},
  booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25-29 October 2020},
  title     = {Listen, Watch and Understand at the Cocktail Party: Audio-Visual-Contextual Speech Separation},
  year      = {2020},
  editor    = {Helen Meng and Bo Xu and Thomas Fang Zheng},
  pages     = {1426--1430},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/LiQ20.bib},
  doi       = {10.21437/INTERSPEECH.2020-2028},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2020-2028},
}

@InProceedings{Chen2020a,
  author    = {Zhengyang Chen and Shuai Wang and Yanmin Qian},
  booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25-29 October 2020},
  title     = {Multi-Modality Matters: {A} Performance Leap on VoxCeleb},
  year      = {2020},
  editor    = {Helen Meng and Bo Xu and Thomas Fang Zheng},
  pages     = {2252--2256},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ChenWQ20.bib},
  doi       = {10.21437/INTERSPEECH.2020-2229},
  timestamp = {Fri, 10 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2020-2229},
}

@InProceedings{Chen2020b,
  author    = {Zhengyang Chen and Shuai Wang and Yanmin Qian},
  booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25-29 October 2020},
  title     = {Adversarial Domain Adaptation for Speaker Verification Using Partially Shared Network},
  year      = {2020},
  editor    = {Helen Meng and Bo Xu and Thomas Fang Zheng},
  pages     = {3017--3021},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ChenWQ20a.bib},
  doi       = {10.21437/INTERSPEECH.2020-2226},
  timestamp = {Fri, 10 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2020-2226},
}

@InProceedings{Lu2020,
  author    = {Yizhou Lu and Mingkun Huang and Hao Li and Jiaqi Guo and Yanmin Qian},
  booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25-29 October 2020},
  title     = {Bi-Encoder Transformer Network for Mandarin-English Code-Switching Speech Recognition Using Mixture of Experts},
  year      = {2020},
  editor    = {Helen Meng and Bo Xu and Thomas Fang Zheng},
  pages     = {4766--4770},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/LuHLGQ20.bib},
  doi       = {10.21437/INTERSPEECH.2020-2485},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2020-2485},
}

@Article{Chen2020c,
  author        = {Yefei Chen and Shuai Wang and Yanmin Qian and Kai Yu},
  journal       = {CoRR},
  title         = {End-to-End Speaker-Dependent Voice Activity Detection},
  year          = {2020},
  volume        = {abs/2009.09906},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2009-09906.bib},
  eprint        = {2009.09906},
  timestamp     = {Wed, 23 Sep 2020 01:00:00 +0200},
  url           = {https://arxiv.org/abs/2009.09906},
}

@Article{Qian2019,
  author    = {Yanmin Qian and Chao Weng and Xuankai Chang and Shuai Wang and Dong Yu},
  journal   = {Frontiers Inf. Technol. Electron. Eng.},
  title     = {Erratum to: Past review, current progress, and challenges ahead on the cocktail party problem},
  year      = {2019},
  number    = {3},
  pages     = {438},
  volume    = {20},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/jzusc/QianWCWY19.bib},
  doi       = {10.1631/FITEE.19E0001},
  timestamp = {Thu, 05 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1631/FITEE.19e0001},
}

@Article{Qian2019a,
  author    = {Yanmin Qian and Xu Xiang},
  journal   = {Frontiers Inf. Technol. Electron. Eng.},
  title     = {Binary neural networks for speech recognition},
  year      = {2019},
  number    = {5},
  pages     = {701--715},
  volume    = {20},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/jzusc/QianX19.bib},
  doi       = {10.1631/FITEE.1800469},
  timestamp = {Sun, 02 Oct 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1631/fitee.1800469},
}

@Article{Qian2019b,
  author    = {Yanmin Qian and Hu Hu and Tian Tan},
  journal   = {Speech Commun.},
  title     = {Data augmentation using generative adversarial networks for robust speech recognition},
  year      = {2019},
  pages     = {1--9},
  volume    = {114},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/speech/QianHT19.bib},
  doi       = {10.1016/J.SPECOM.2019.08.006},
  timestamp = {Sat, 22 Feb 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1016/j.specom.2019.08.006},
}

@Article{Wang2019,
  author    = {Shuai Wang and Zili Huang and Yanmin Qian and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Discriminative Neural Embedding Learning for Short-Duration Text-Independent Speaker Verification},
  year      = {2019},
  number    = {11},
  pages     = {1686--1696},
  volume    = {27},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/WangHQY19.bib},
  doi       = {10.1109/TASLP.2019.2928128},
  timestamp = {Fri, 13 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2019.2928128},
}

@InProceedings{Xiang2019,
  author    = {Xu Xiang and Shuai Wang and Houjun Huang and Yanmin Qian and Kai Yu},
  booktitle = {2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference, {APSIPA} {ASC} 2019, Lanzhou, China, November 18-21, 2019},
  title     = {Margin Matters: Towards More Discriminative Deep Neural Network Embeddings for Speaker Recognition},
  year      = {2019},
  pages     = {1652--1656},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/apsipa/XiangWHQ019.bib},
  doi       = {10.1109/APSIPAASC47483.2019.9023039},
  timestamp = {Sun, 02 Oct 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/APSIPAASC47483.2019.9023039},
}

@InProceedings{Sheng2019,
  author    = {Peiyao Sheng and Zhuolin Yang and Yanmin Qian},
  booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} 2019, Singapore, December 14-18, 2019},
  title     = {GANs for Children: {A} Generative Data Augmentation Strategy for Children Speech Recognition},
  year      = {2019},
  pages     = {129--135},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/ShengYQ19.bib},
  doi       = {10.1109/ASRU46091.2019.9003933},
  timestamp = {Mon, 24 Feb 2020 17:51:31 +0100},
  url       = {https://doi.org/10.1109/ASRU46091.2019.9003933},
}

@InProceedings{Chang2019,
  author    = {Xuankai Chang and Wangyou Zhang and Yanmin Qian and Jonathan Le Roux and Shinji Watanabe},
  booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} 2019, Singapore, December 14-18, 2019},
  title     = {MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition},
  year      = {2019},
  pages     = {237--244},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/ChangZQRW19.bib},
  doi       = {10.1109/ASRU46091.2019.9003986},
  timestamp = {Tue, 29 Dec 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ASRU46091.2019.9003986},
}

@InProceedings{Huang2019,
  author    = {Mingkun Huang and Yizhou Lu and Lan Wang and Yanmin Qian and Kai Yu},
  booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} 2019, Singapore, December 14-18, 2019},
  title     = {Exploring Model Units and Training Strategies for End-to-End Speech Recognition},
  year      = {2019},
  pages     = {524--531},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/HuangLWQY19.bib},
  doi       = {10.1109/ASRU46091.2019.9003834},
  timestamp = {Mon, 24 Feb 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ASRU46091.2019.9003834},
}

@InProceedings{Zhang2019,
  author    = {Wangyou Zhang and Man Sun and Lan Wang and Yanmin Qian},
  booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} 2019, Singapore, December 14-18, 2019},
  title     = {End-to-End Overlapped Speech Detection and Speaker Counting with Raw Waveform},
  year      = {2019},
  pages     = {660--666},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/ZhangSWQ19.bib},
  doi       = {10.1109/ASRU46091.2019.9003962},
  timestamp = {Mon, 24 Feb 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ASRU46091.2019.9003962},
}

@InProceedings{Wang2019a,
  author    = {Shuai Wang and Yexin Yang and Tianzhe Wang and Yanmin Qian and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
  title     = {Knowledge Distillation for Small Foot-print Deep Speaker Embedding},
  year      = {2019},
  pages     = {6021--6025},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WangYWQ019.bib},
  doi       = {10.1109/ICASSP.2019.8683443},
  timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2019.8683443},
}

@InProceedings{Chang2019a,
  author    = {Xuankai Chang and Yanmin Qian and Kai Yu and Shinji Watanabe},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
  title     = {End-to-end Monaural Multi-speaker {ASR} System without Pretraining},
  year      = {2019},
  pages     = {6256--6260},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ChangQ0W19.bib},
  doi       = {10.1109/ICASSP.2019.8682822},
  timestamp = {Fri, 26 Jun 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2019.8682822},
}

@InProceedings{Yang2019,
  author    = {Yexin Yang and Hongji Wang and Heinrich Dinkel and Zhengyang Chen and Shuai Wang and Yanmin Qian and Kai Yu},
  booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech Communication Association, Graz, Austria, 15-19 September 2019},
  title     = {The {SJTU} Robust Anti-Spoofing System for the ASVspoof 2019 Challenge},
  year      = {2019},
  editor    = {Gernot Kubin and Zdravko Kacic},
  pages     = {1038--1042},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/YangWDCWQ019.bib},
  doi       = {10.21437/INTERSPEECH.2019-2170},
  timestamp = {Fri, 29 Jan 2021 17:41:10 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2019-2170},
}

@InProceedings{Wang2019b,
  author    = {Shuai Wang and Johan Rohdin and Luk{\'{a}}s Burget and Oldrich Plchot and Yanmin Qian and Kai Yu and Jan Cernock{\'{y}}},
  booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech Communication Association, Graz, Austria, 15-19 September 2019},
  title     = {On the Usage of Phonetic Information for Text-Independent Speaker Embedding Extraction},
  year      = {2019},
  editor    = {Gernot Kubin and Zdravko Kacic},
  pages     = {1148--1152},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/WangRBPQ0C19.bib},
  doi       = {10.21437/INTERSPEECH.2019-3036},
  timestamp = {Thu, 14 Oct 2021 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2019-3036},
}

@InProceedings{Wu2019,
  author    = {Zhanghao Wu and Shuai Wang and Yanmin Qian and Kai Yu},
  booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech Communication Association, Graz, Austria, 15-19 September 2019},
  title     = {Data Augmentation Using Variational Autoencoder for Embedding Based Speaker Verification},
  year      = {2019},
  editor    = {Gernot Kubin and Zdravko Kacic},
  pages     = {1163--1167},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/WuWQ019.bib},
  doi       = {10.21437/INTERSPEECH.2019-2248},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2019-2248},
}

@InProceedings{Guo2019,
  author    = {Jiaqi Guo and Yongbin You and Yanmin Qian and Kai Yu},
  booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech Communication Association, Graz, Austria, 15-19 September 2019},
  title     = {Joint Decoding of {CTC} Based Systems for Speech Recognition},
  year      = {2019},
  editor    = {Gernot Kubin and Zdravko Kacic},
  pages     = {2205--2209},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/GuoYQ019.bib},
  doi       = {10.21437/INTERSPEECH.2019-2026},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2019-2026},
}

@InProceedings{Zhang2019a,
  author    = {Wangyou Zhang and Xuankai Chang and Yanmin Qian},
  booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech Communication Association, Graz, Austria, 15-19 September 2019},
  title     = {Knowledge Distillation for End-to-End Monaural Multi-Talker {ASR} System},
  year      = {2019},
  editor    = {Gernot Kubin and Zdravko Kacic},
  pages     = {2633--2637},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ZhangCQ19.bib},
  doi       = {10.21437/INTERSPEECH.2019-3192},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2019-3192},
}

@InProceedings{Zhang2019b,
  author    = {Wangyou Zhang and Ying Zhou and Yanmin Qian},
  booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech Communication Association, Graz, Austria, 15-19 September 2019},
  title     = {Robust {DOA} Estimation Based on Convolutional Neural Network and Time-Frequency Masking},
  year      = {2019},
  editor    = {Gernot Kubin and Zdravko Kacic},
  pages     = {2703--2707},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ZhangZQ19.bib},
  doi       = {10.21437/INTERSPEECH.2019-3158},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2019-3158},
}

@InProceedings{Wang2019c,
  author    = {Hongji Wang and Heinrich Dinkel and Shuai Wang and Yanmin Qian and Kai Yu},
  booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech Communication Association, Graz, Austria, 15-19 September 2019},
  title     = {Cross-Domain Replay Spoofing Attack Detection Using Domain Adversarial Training},
  year      = {2019},
  editor    = {Gernot Kubin and Zdravko Kacic},
  pages     = {2938--2942},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/WangD0Q019.bib},
  doi       = {10.21437/INTERSPEECH.2019-2120},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2019-2120},
}

@InProceedings{Li2019,
  author    = {Chenda Li and Yanmin Qian},
  booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech Communication Association, Graz, Austria, 15-19 September 2019},
  title     = {Prosody Usage Optimization for Children Speech Recognition with Zero Resource Children Speech},
  year      = {2019},
  editor    = {Gernot Kubin and Zdravko Kacic},
  pages     = {3446--3450},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/LiQ19.bib},
  doi       = {10.21437/INTERSPEECH.2019-2659},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2019-2659},
}

@Article{Qian2018,
  author    = {Yanmin Qian and Chao Weng and Xuankai Chang and Shuai Wang and Dong Yu},
  journal   = {Frontiers Inf. Technol. Electron. Eng.},
  title     = {Past review, current progress, and challenges ahead on the cocktail party problem},
  year      = {2018},
  number    = {1},
  pages     = {40--63},
  volume    = {19},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/jzusc/QianWCWY18.bib},
  doi       = {10.1631/FITEE.1700814},
  timestamp = {Thu, 05 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1631/FITEE.1700814},
}

@Article{Qian2018a,
  author    = {Yanmin Qian and Chao Weng and Xuankai Chang and Shuai Wang and Dong Yu},
  journal   = {Frontiers Inf. Technol. Electron. Eng.},
  title     = {Erratum to: Past review, current progress, and challenges ahead on the cocktail party problem},
  year      = {2018},
  number    = {4},
  pages     = {582},
  volume    = {19},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/jzusc/QianWCWY18a.bib},
  doi       = {10.1631/FITEE.17E0814},
  timestamp = {Thu, 05 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1631/FITEE.17e0814},
}

@Article{Chen2018,
  author    = {Zhehuai Chen and Yanmin Qian and Kai Yu},
  journal   = {Speech Commun.},
  title     = {Sequence discriminative training for deep learning based acoustic keyword spotting},
  year      = {2018},
  pages     = {100--111},
  volume    = {102},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/speech/ChenQ018.bib},
  doi       = {10.1016/J.SPECOM.2018.08.001},
  timestamp = {Sat, 22 Feb 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1016/j.specom.2018.08.001},
}

@Article{Qian2018b,
  author    = {Yanmin Qian and Xuankai Chang and Dong Yu},
  journal   = {Speech Commun.},
  title     = {Single-channel multi-talker speech recognition with permutation invariant training},
  year      = {2018},
  pages     = {1--11},
  volume    = {104},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/speech/QianCY18.bib},
  doi       = {10.1016/J.SPECOM.2018.09.003},
  timestamp = {Sat, 22 Feb 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1016/j.specom.2018.09.003},
}

@Article{Tan2018,
  author    = {Tian Tan and Yanmin Qian and Hu Hu and Ying Zhou and Wen Ding and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Adaptive Very Deep Convolutional Residual Network for Noise Robust Speech Recognition},
  year      = {2018},
  number    = {8},
  pages     = {1393--1405},
  volume    = {26},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/TanQHZDY18.bib},
  doi       = {10.1109/TASLP.2018.2825432},
  timestamp = {Fri, 13 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2018.2825432},
}

@Article{Dinkel2018,
  author    = {Heinrich Dinkel and Yanmin Qian and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Investigating Raw Wave Deep Neural Networks for End-to-End Speaker Spoofing Detection},
  year      = {2018},
  number    = {11},
  pages     = {2002--2014},
  volume    = {26},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/DinkelQY18.bib},
  doi       = {10.1109/TASLP.2018.2851155},
  timestamp = {Fri, 13 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2018.2851155},
}

@InProceedings{Zhou2018,
  author    = {Ying Zhou and Yanmin Qian},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {Robust Mask Estimation By Integrating Neural Network-Based and Clustering-Based Approaches for Adaptive Acoustic Beamforming},
  year      = {2018},
  pages     = {536--540},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ZhouQ18.bib},
  doi       = {10.1109/ICASSP.2018.8462462},
  timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8462462},
}

@InProceedings{Tan2018a,
  author    = {Tian Tan and Yanmin Qian and Dong Yu},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {Knowledge Transfer in Permutation Invariant Training for Single-Channel Multi-Talker Speech Recognition},
  year      = {2018},
  pages     = {571--5718},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/0002Q018.bib},
  doi       = {10.1109/ICASSP.2018.8461883},
  timestamp = {Tue, 18 Sep 2018 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8461883},
}

@InProceedings{Huang2018,
  author    = {Zili Huang and Shuai Wang and Yanmin Qian},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {Joint I-Vector with End-to-End System for Short Duration Text-Independent Speaker Verification},
  year      = {2018},
  pages     = {4869--4873},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/HuangWQ18.bib},
  doi       = {10.1109/ICASSP.2018.8462508},
  timestamp = {Fri, 11 Oct 2019 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8462508},
}

@InProceedings{Hu2018,
  author    = {Hu Hu and Tian Tan and Yanmin Qian},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {Generative Adversarial Networks Based Data Augmentation for Noise Robust Speech Recognition},
  year      = {2018},
  pages     = {5044--5048},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/Hu0Q18.bib},
  doi       = {10.1109/ICASSP.2018.8462624},
  timestamp = {Tue, 18 Sep 2018 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8462624},
}

@InProceedings{Wang2018,
  author    = {Shuai Wang and Yanmin Qian and Kai Yu},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {Focal Kl-Divergence Based Dilated Convolutional Neural Networks for Co-Channel Speaker Identification},
  year      = {2018},
  pages     = {5339--5343},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WangQ018.bib},
  doi       = {10.1109/ICASSP.2018.8462620},
  timestamp = {Fri, 11 Oct 2019 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8462620},
}

@InProceedings{Qian2018c,
  author    = {Yanmin Qian and Tian Tan and Hu Hu and Qi Liu},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {Noise Robust Speech Recognition on Aurora4 by Humans and Machines},
  year      = {2018},
  pages     = {5604--5608},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/Qian0HL18.bib},
  doi       = {10.1109/ICASSP.2018.8462629},
  timestamp = {Wed, 18 May 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8462629},
}

@InProceedings{Ding2018,
  author    = {Wen Ding and Tian Tan and Yanmin Qian},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {Fast Adaptation on Deepmixture Generative Network Based Acoustic Modeling},
  year      = {2018},
  pages     = {5944--5948},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/Ding0Q18.bib},
  doi       = {10.1109/ICASSP.2018.8462411},
  timestamp = {Tue, 18 Sep 2018 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8462411},
}

@InProceedings{Chang2018,
  author    = {Xuankai Chang and Yanmin Qian and Dong Yu},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {Adaptive Permutation Invariant Training with Auxiliary Information for Monaural Multi-Talker Speech Recognition},
  year      = {2018},
  pages     = {5974--5978},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ChangQY18.bib},
  doi       = {10.1109/ICASSP.2018.8461570},
  timestamp = {Tue, 18 Sep 2018 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8461570},
}

@InProceedings{Chen2018a,
  author    = {Lianwu Chen and Meng Yu and Yanmin Qian and Dan Su and Dong Yu},
  booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech Communication Association, Hyderabad, India, 2-6 September 2018},
  title     = {Permutation Invariant Training of Generative Adversarial Network for Monaural Speech Separation},
  year      = {2018},
  editor    = {B. Yegnanarayana},
  pages     = {302--306},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ChenYQSY18.bib},
  doi       = {10.21437/INTERSPEECH.2018-1603},
  timestamp = {Fri, 21 May 2021 08:16:43 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2018-1603},
}

@InProceedings{Wang2018a,
  author    = {Jun Wang and Jie Chen and Dan Su and Lianwu Chen and Meng Yu and Yanmin Qian and Dong Yu},
  booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech Communication Association, Hyderabad, India, 2-6 September 2018},
  title     = {Deep Extractor Network for Target Speaker Recovery from Single Channel Speech Mixtures},
  year      = {2018},
  editor    = {B. Yegnanarayana},
  pages     = {307--311},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/WangCSCYQY18.bib},
  doi       = {10.21437/INTERSPEECH.2018-1205},
  timestamp = {Wed, 22 Dec 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2018-1205},
}

@InProceedings{Chang2018a,
  author    = {Xuankai Chang and Yanmin Qian and Dong Yu},
  booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech Communication Association, Hyderabad, India, 2-6 September 2018},
  title     = {Monaural Multi-Talker Speech Recognition with Attention Mechanism and Gated Convolutional Networks},
  year      = {2018},
  editor    = {B. Yegnanarayana},
  pages     = {1586--1590},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ChangQ018.bib},
  doi       = {10.21437/INTERSPEECH.2018-1547},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2018-1547},
}

@InProceedings{Huang2018a,
  author    = {Mingkun Huang and Yongbin You and Zhehuai Chen and Yanmin Qian and Kai Yu},
  booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech Communication Association, Hyderabad, India, 2-6 September 2018},
  title     = {Knowledge Distillation for Sequence Model},
  year      = {2018},
  editor    = {B. Yegnanarayana},
  pages     = {3703--3707},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/HuangYCQ018.bib},
  doi       = {10.21437/INTERSPEECH.2018-1589},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2018-1589},
}

@InProceedings{Wang2018b,
  author    = {Shuai Wang and Heinrich Dinkel and Yanmin Qian and Kai Yu},
  booktitle = {Intelligence Science and Big Data Engineering - 8th International Conference, IScIDE 2018, Lanzhou, China, August 18-19, 2018, Revised Selected Papers},
  title     = {Covariance Based Deep Feature for Text-Dependent Speaker Verification},
  year      = {2018},
  editor    = {Yuxin Peng and Kai Yu and Jiwen Lu and Xingpeng Jiang},
  pages     = {231--242},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {11266},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscide/WangDQ018.bib},
  doi       = {10.1007/978-3-030-02698-1\_20},
  timestamp = {Fri, 11 Oct 2019 01:00:00 +0200},
  url       = {https://doi.org/10.1007/978-3-030-02698-1\_20},
}

@InProceedings{Sheng2018,
  author    = {Peiyao Sheng and Zhuolin Yang and Hu Hu and Tian Tan and Yanmin Qian},
  booktitle = {11th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2018, Taipei City, Taiwan, November 26-29, 2018},
  title     = {Data Augmentation using Conditional Generative Adversarial Networks for Robust Speech Recognition},
  year      = {2018},
  pages     = {121--125},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/ShengYH0Q18.bib},
  doi       = {10.1109/ISCSLP.2018.8706651},
  timestamp = {Wed, 16 Oct 2019 14:14:48 +0200},
  url       = {https://doi.org/10.1109/ISCSLP.2018.8706651},
}

@InProceedings{Wang2018c,
  author    = {Shuai Wang and Zili Huang and Yanmin Qian and Kai Yu},
  booktitle = {11th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2018, Taipei City, Taiwan, November 26-29, 2018},
  title     = {Deep Discriminant Analysis for i-vector Based Robust Speaker Recognition},
  year      = {2018},
  pages     = {195--199},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/WangHQ018.bib},
  doi       = {10.1109/ISCSLP.2018.8706632},
  timestamp = {Fri, 11 Oct 2019 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ISCSLP.2018.8706632},
}

@InProceedings{Yang2018,
  author    = {Yexin Yang and Shuai Wang and Man Sun and Yanmin Qian and Kai Yu},
  booktitle = {11th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2018, Taipei City, Taiwan, November 26-29, 2018},
  title     = {Generative Adversarial Networks based X-vector Augmentation for Robust Probabilistic Linear Discriminant Analysis in Speaker Verification},
  year      = {2018},
  pages     = {205--209},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/YangWSQ018.bib},
  doi       = {10.1109/ISCSLP.2018.8706575},
  timestamp = {Fri, 11 Oct 2019 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ISCSLP.2018.8706575},
}

@Article{Chen2017,
  author    = {Zhehuai Chen and Yimeng Zhuang and Yanmin Qian and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Phone Synchronous Speech Recognition With {CTC} Lattices},
  year      = {2017},
  number    = {1},
  pages     = {86--97},
  volume    = {25},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/ChenZQY17.bib},
  doi       = {10.1109/TASLP.2016.2625459},
  timestamp = {Fri, 13 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2016.2625459},
}

@Article{Qian2017,
  author    = {Yanmin Qian and Nanxin Chen and Heinrich Dinkel and Zhizheng Wu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Deep Feature Engineering for Noise Robust Spoofing Detection},
  year      = {2017},
  number    = {10},
  pages     = {1942--1955},
  volume    = {25},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/QianCDW17.bib},
  doi       = {10.1109/TASLP.2017.2732162},
  timestamp = {Sat, 18 Feb 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2017.2732162},
}

@InProceedings{Jiang2017,
  author    = {Xiaowei Jiang and Shuai Wang and Xu Xiang and Yanmin Qian},
  booktitle = {2017 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference, {APSIPA} {ASC} 2017, Kuala Lumpur, Malaysia, December 12-15, 2017},
  title     = {Integrating online i-vector into {GMM-UBM} for text-dependent speaker verification},
  year      = {2017},
  pages     = {1628--1632},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/apsipa/JiangWXQ17.bib},
  doi       = {10.1109/APSIPA.2017.8282293},
  timestamp = {Sun, 02 Oct 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/APSIPA.2017.8282293},
}

@InProceedings{Liu2017,
  author    = {Qi Liu and Yanmin Qian and Kai Yu},
  booktitle = {2017 {IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} 2017, Okinawa, Japan, December 16-20, 2017},
  title     = {Future vector enhanced {LSTM} language model for {LVCSR}},
  year      = {2017},
  pages     = {104--110},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/LiuQ017.bib},
  doi       = {10.1109/ASRU.2017.8268923},
  timestamp = {Wed, 18 May 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ASRU.2017.8268923},
}

@InProceedings{Wu2017,
  author    = {Yue Wu and Tianxing He and Zhehuai Chen and Yanmin Qian and Kai Yu},
  booktitle = {Chinese Computational Linguistics and Natural Language Processing Based on Naturally Annotated Big Data - 16th China National Conference, {CCL} 2017, - and - 5th International Symposium, {NLP-NABD} 2017, Nanjing, China, October 13-15, 2017, Proceedings},
  title     = {Multi-view {LSTM} Language Model with Word-Synchronized Auxiliary Feature for {LVCSR}},
  year      = {2017},
  editor    = {Maosong Sun and Xiaojie Wang and Baobao Chang and Deyi Xiong},
  pages     = {398--410},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {10565},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/cncl/WuHCQY17.bib},
  doi       = {10.1007/978-3-319-69005-6\_33},
  timestamp = {Fri, 01 Sep 2023 13:50:17 +0200},
  url       = {https://doi.org/10.1007/978-3-319-69005-6\_33},
}

@InProceedings{Dinkel2017,
  author    = {Heinrich Dinkel and Nanxin Chen and Yanmin Qian and Kai Yu},
  booktitle = {2017 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2017, New Orleans, LA, USA, March 5-9, 2017},
  title     = {End-to-end spoofing detection with raw waveform {CLDNNS}},
  year      = {2017},
  pages     = {4860--4864},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/DinkelCQY17.bib},
  doi       = {10.1109/ICASSP.2017.7953080},
  timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2017.7953080},
}

@InProceedings{Dinkel2017a,
  author    = {Heinrich Dinkel and Yanmin Qian and Kai Yu},
  booktitle = {2017 International Joint Conference on Neural Networks, {IJCNN} 2017, Anchorage, AK, USA, May 14-19, 2017},
  title     = {Small-footprint convolutional neural network for spoofing detection},
  year      = {2017},
  pages     = {3086--3091},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/ijcnn/DinkelQY17.bib},
  doi       = {10.1109/IJCNN.2017.7966240},
  timestamp = {Wed, 16 Oct 2019 14:14:55 +0200},
  url       = {https://doi.org/10.1109/IJCNN.2017.7966240},
}

@InProceedings{Xiang2017,
  author    = {Xu Xiang and Yanmin Qian and Kai Yu},
  booktitle = {Interspeech 2017, 18th Annual Conference of the International Speech Communication Association, Stockholm, Sweden, August 20-24, 2017},
  title     = {Binary Deep Neural Networks for Speech Recognition},
  year      = {2017},
  editor    = {Francisco Lacerda},
  pages     = {533--537},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/XiangQ017.bib},
  doi       = {10.21437/INTERSPEECH.2017-1343},
  timestamp = {Sun, 06 Aug 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2017-1343},
}

@InProceedings{Wang2017,
  author    = {Shuai Wang and Yanmin Qian and Kai Yu},
  booktitle = {Interspeech 2017, 18th Annual Conference of the International Speech Communication Association, Stockholm, Sweden, August 20-24, 2017},
  title     = {What Does the Speaker Embedding Encode?},
  year      = {2017},
  editor    = {Francisco Lacerda},
  pages     = {1497--1501},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/WangQ017.bib},
  doi       = {10.21437/INTERSPEECH.2017-1125},
  timestamp = {Mon, 26 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2017-1125},
}

@InProceedings{Yu2017,
  author    = {Dong Yu and Xuankai Chang and Yanmin Qian},
  booktitle = {Interspeech 2017, 18th Annual Conference of the International Speech Communication Association, Stockholm, Sweden, August 20-24, 2017},
  title     = {Recognizing Multi-Talker Speech with Permutation Invariant Training},
  year      = {2017},
  editor    = {Francisco Lacerda},
  pages     = {2456--2460},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/YuCQ17.bib},
  doi       = {10.21437/INTERSPEECH.2017-305},
  timestamp = {Mon, 26 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2017-305},
}

@InProceedings{Chen2017a,
  author    = {Zhehuai Chen and Yanmin Qian and Kai Yu},
  booktitle = {Intelligence Science and Big Data Engineering - 7th International Conference, IScIDE 2017, Dalian, China, September 22-23, 2017, Proceedings},
  title     = {A Unified Confidence Measure Framework Using Auxiliary Normalization Graph},
  year      = {2017},
  editor    = {Yi Sun and Huchuan Lu and Lihe Zhang and Jian Yang and Hua Huang},
  pages     = {123--133},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {10559},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscide/ChenQY17.bib},
  doi       = {10.1007/978-3-319-67777-4\_11},
  timestamp = {Tue, 14 May 2019 10:00:39 +0200},
  url       = {https://doi.org/10.1007/978-3-319-67777-4\_11},
}

@InCollection{Sim2017,
  author    = {Khe Chai Sim and Yanmin Qian and Gautam Mantena and Lahiru Samarakoon and Souvik Kundu and Tian Tan},
  booktitle = {New Era for Robust Speech Recognition, Exploiting Deep Learning},
  publisher = {Springer},
  title     = {Adaptation of Deep Neural Network Acoustic Models for Robust Automatic Speech Recognition},
  year      = {2017},
  editor    = {Shinji Watanabe and Marc Delcroix and Florian Metze and John R. Hershey},
  pages     = {219--243},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/books/sp/17/SimQMSKT17.bib},
  doi       = {10.1007/978-3-319-64680-0\_9},
  timestamp = {Wed, 23 Jun 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1007/978-3-319-64680-0\_9},
}

@Article{Qian2016,
  author    = {Yanmin Qian and Nanxin Chen and Kai Yu},
  journal   = {Speech Commun.},
  title     = {Deep features for automatic spoofing detection},
  year      = {2016},
  pages     = {43--52},
  volume    = {85},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/speech/QianCY16.bib},
  doi       = {10.1016/J.SPECOM.2016.10.007},
  timestamp = {Sat, 22 Feb 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1016/j.specom.2016.10.007},
}

@Article{Tan2016,
  author    = {Tian Tan and Yanmin Qian and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Cluster Adaptive Training for Deep Neural Network Based Acoustic Model},
  year      = {2016},
  number    = {3},
  pages     = {459--468},
  volume    = {24},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/TanQY16.bib},
  doi       = {10.1109/TASLP.2015.2511922},
  timestamp = {Fri, 13 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2015.2511922},
}

@Article{Qian2016a,
  author    = {Yanmin Qian and Tian Tan and Dong Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Neural Network Based Multi-Factor Aware Joint Training for Robust Speech Recognition},
  year      = {2016},
  number    = {12},
  pages     = {2231--2240},
  volume    = {24},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/QianTY16.bib},
  doi       = {10.1109/TASLP.2016.2598308},
  timestamp = {Fri, 13 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2016.2598308},
}

@Article{Qian2016b,
  author    = {Yanmin Qian and Mengxiao Bi and Tian Tan and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Very Deep Convolutional Neural Networks for Noise Robust Speech Recognition},
  year      = {2016},
  number    = {12},
  pages     = {2263--2276},
  volume    = {24},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/QianBTY16.bib},
  doi       = {10.1109/TASLP.2016.2602884},
  timestamp = {Fri, 13 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2016.2602884},
}

@InProceedings{Korshunov2016,
  author    = {Pavel Korshunov and S{\'{e}}bastien Marcel and Hannah Muckenhirn and Andr{\'{e}} R. Gon{\c{c}}alves and A. G. Souza Mello and Ricardo Paranhos Velloso Violato and Fl{\'{a}}vio Olmos Sim{\~{o}}es and M. U. Neto and Marcus de Assis Angeloni and Jos{\'{e}} Augusto Stuchi and Heinrich Dinkel and Nanxin Chen and Yanmin Qian and Dipjyoti Paul and Goutam Saha and Md. Sahidullah},
  booktitle = {8th {IEEE} International Conference on Biometrics Theory, Applications and Systems, {BTAS} 2016, Niagara Falls, NY, USA, September 6-9, 2016},
  title     = {Overview of {BTAS} 2016 speaker anti-spoofing competition},
  year      = {2016},
  pages     = {1--6},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/btas/KorshunovMMGMVS16.bib},
  doi       = {10.1109/BTAS.2016.7791200},
  timestamp = {Tue, 22 Nov 2022 00:00:00 +0100},
  url       = {https://doi.org/10.1109/BTAS.2016.7791200},
}

@InProceedings{Kundu2016,
  author    = {Souvik Kundu and Gautam Mantena and Yanmin Qian and Tian Tan and Marc Delcroix and Khe Chai Sim},
  booktitle = {2016 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016},
  title     = {Joint acoustic factor learning for robust deep neural network based automatic speech recognition},
  year      = {2016},
  pages     = {5025--5029},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/KunduMQTDS16.bib},
  doi       = {10.1109/ICASSP.2016.7472634},
  timestamp = {Wed, 23 Jun 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2016.7472634},
}

@InProceedings{Tan2016a,
  author    = {Tian Tan and Yanmin Qian and Dong Yu and Souvik Kundu and Liang Lu and Khe Chai Sim and Xiong Xiao and Yu Zhang},
  booktitle = {2016 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016},
  title     = {Speaker-aware training of {LSTM-RNNS} for acoustic modelling},
  year      = {2016},
  pages     = {5280--5284},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/TanQYKLSXZ16.bib},
  doi       = {10.1109/ICASSP.2016.7472685},
  timestamp = {Sat, 31 Jul 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2016.7472685},
}

@InProceedings{Wang2016,
  author    = {Linlin Wang and Chao Zhang and Philip C. Woodland and Mark J. F. Gales and Panagiota Karanasou and Pierre Lanchantin and Xunying Liu and Yanmin Qian},
  booktitle = {2016 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016},
  title     = {Improved DNN-based segmentation for multi-genre broadcast audio},
  year      = {2016},
  pages     = {5700--5704},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WangZWGKLLQ16.bib},
  doi       = {10.1109/ICASSP.2016.7472769},
  timestamp = {Tue, 23 Jun 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2016.7472769},
}

@InProceedings{Qian2016c,
  author    = {Yanmin Qian and Tian Tan and Dong Yu},
  booktitle = {2016 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016},
  title     = {An investigation into using parallel data for far-field speech recognition},
  year      = {2016},
  pages     = {5725--5729},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/QianTY16.bib},
  doi       = {10.1109/ICASSP.2016.7472774},
  timestamp = {Mon, 16 Jul 2018 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2016.7472774},
}

@InProceedings{Qian2016d,
  author    = {Yanmin Qian and Tian Tan and Dong Yu and Yu Zhang},
  booktitle = {2016 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016},
  title     = {Integrated adaptation with multi-factor joint-learning for far-field speech recognition},
  year      = {2016},
  pages     = {5770--5774},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/QianTYZ16.bib},
  doi       = {10.1109/ICASSP.2016.7472783},
  timestamp = {Mon, 16 Jul 2018 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2016.7472783},
}

@InProceedings{Zhuang2016,
  author    = {Yimeng Zhuang and Xuankai Chang and Yanmin Qian and Kai Yu},
  booktitle = {Interspeech 2016, 17th Annual Conference of the International Speech Communication Association, San Francisco, CA, USA, September 8-12, 2016},
  title     = {Unrestricted Vocabulary Keyword Spotting Using {LSTM-CTC}},
  year      = {2016},
  editor    = {Nelson Morgan},
  pages     = {938--942},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ZhuangCQY16.bib},
  doi       = {10.21437/INTERSPEECH.2016-753},
  timestamp = {Mon, 26 Jun 2023 16:43:56 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2016-753},
}

@InProceedings{Zhuang2016a,
  author    = {Yimeng Zhuang and Sibo Tong and Maofan Yin and Yanmin Qian and Kai Yu},
  booktitle = {10th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2016, Tianjin, China, October 17-20, 2016},
  title     = {Multi-task joint-learning for robust voice activity detection},
  year      = {2016},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/ZhuangTYQY16.bib},
  doi       = {10.1109/ISCSLP.2016.7918383},
  timestamp = {Wed, 16 Oct 2019 14:14:48 +0200},
  url       = {https://doi.org/10.1109/ISCSLP.2016.7918383},
}

@InProceedings{Qian2016e,
  author    = {Yanmin Qian and Philip C. Woodland},
  booktitle = {2016 {IEEE} Spoken Language Technology Workshop, {SLT} 2016, San Diego, CA, USA, December 13-16, 2016},
  title     = {Very deep convolutional neural networks for robust speech recognition},
  year      = {2016},
  pages     = {481--488},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/slt/QianW16.bib},
  doi       = {10.1109/SLT.2016.7846307},
  timestamp = {Wed, 16 Oct 2019 14:14:53 +0200},
  url       = {https://doi.org/10.1109/SLT.2016.7846307},
}

@Article{Liu2015,
  author    = {Yuan Liu and Yanmin Qian and Nanxin Chen and Tianfan Fu and Ya Zhang and Kai Yu},
  journal   = {Speech Commun.},
  title     = {Deep feature for text-dependent speaker verification},
  year      = {2015},
  pages     = {1--13},
  volume    = {73},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/speech/LiuQCFZY15.bib},
  doi       = {10.1016/J.SPECOM.2015.07.003},
  timestamp = {Fri, 28 Apr 2023 01:00:00 +0200},
  url       = {https://doi.org/10.1016/j.specom.2015.07.003},
}

@InProceedings{Qian2015,
  author    = {Yanmin Qian and Maofan Yin and Yongbin You and Kai Yu},
  booktitle = {2015 {IEEE} Workshop on Automatic Speech Recognition and Understanding, {ASRU} 2015, Scottsdale, AZ, USA, December 13-17, 2015},
  title     = {Multi-task joint-learning of deep neural networks for robust speech recognition},
  year      = {2015},
  pages     = {310--316},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/QianYYY15.bib},
  doi       = {10.1109/ASRU.2015.7404810},
  timestamp = {Wed, 16 Oct 2019 14:14:51 +0200},
  url       = {https://doi.org/10.1109/ASRU.2015.7404810},
}

@InProceedings{Woodland2015,
  author    = {Philip C. Woodland and Xunying Liu and Yanmin Qian and Chao Zhang and Mark J. F. Gales and Penny Karanasou and Pierre Lanchantin and Linlin Wang},
  booktitle = {2015 {IEEE} Workshop on Automatic Speech Recognition and Understanding, {ASRU} 2015, Scottsdale, AZ, USA, December 13-17, 2015},
  title     = {Cambridge university transcription systems for the multi-genre broadcast challenge},
  year      = {2015},
  pages     = {639--646},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/WoodlandLQZGKLW15.bib},
  doi       = {10.1109/ASRU.2015.7404856},
  timestamp = {Tue, 23 Jun 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ASRU.2015.7404856},
}

@InProceedings{Lanchantin2015,
  author    = {Pierre Lanchantin and Mark J. F. Gales and Penny Karanasou and Xunying Liu and Yanmin Qian and Linlin Wang and Philip C. Woodland and Chao Zhang},
  booktitle = {2015 {IEEE} Workshop on Automatic Speech Recognition and Understanding, {ASRU} 2015, Scottsdale, AZ, USA, December 13-17, 2015},
  title     = {The development of the cambridge university alignment systems for the multi-genre broadcast challenge},
  year      = {2015},
  pages     = {647--653},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/LanchantinGKLQW15.bib},
  doi       = {10.1109/ASRU.2015.7404857},
  timestamp = {Tue, 23 Jun 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ASRU.2015.7404857},
}

@InProceedings{Karanasou2015,
  author    = {Penny Karanasou and Mark J. F. Gales and Pierre Lanchantin and Xunying Liu and Yanmin Qian and Linlin Wang and Philip C. Woodland and Chao Zhang},
  booktitle = {2015 {IEEE} Workshop on Automatic Speech Recognition and Understanding, {ASRU} 2015, Scottsdale, AZ, USA, December 13-17, 2015},
  title     = {Speaker diarisation and longitudinal linking in multi-genre broadcast data},
  year      = {2015},
  pages     = {660--666},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/KaranasouGLLQWW15.bib},
  doi       = {10.1109/ASRU.2015.7404859},
  timestamp = {Tue, 23 Jun 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ASRU.2015.7404859},
}

@InProceedings{You2015,
  author    = {Yongbin You and Yanmin Qian and Kai Yu},
  booktitle = {{IEEE} China Summit and International Conference on Signal and Information Processing, ChinaSIP 2015, Chengdu, China, July 12-15, 2015},
  title     = {Local trajectory based speech enhancement for robust speech recognition with deep neural network},
  year      = {2015},
  pages     = {5--9},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/chinasip/YouQY15.bib},
  doi       = {10.1109/CHINASIP.2015.7230351},
  timestamp = {Wed, 16 Oct 2019 14:14:51 +0200},
  url       = {https://doi.org/10.1109/ChinaSIP.2015.7230351},
}

@InProceedings{You2015a,
  author    = {Yongbin You and Yanmin Qian and Tianxing He and Kai Yu},
  booktitle = {{IEEE} China Summit and International Conference on Signal and Information Processing, ChinaSIP 2015, Chengdu, China, July 12-15, 2015},
  title     = {An investigation on DNN-derived bottleneck features for {GMM-HMM} based robust speech recognition},
  year      = {2015},
  pages     = {30--34},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/chinasip/YouQHY15.bib},
  doi       = {10.1109/CHINASIP.2015.7230356},
  timestamp = {Tue, 17 Oct 2017 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ChinaSIP.2015.7230356},
}

@InProceedings{Tan2015,
  author    = {Tian Tan and Yanmin Qian and Maofan Yin and Yimeng Zhuang and Kai Yu},
  booktitle = {2015 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2015, South Brisbane, Queensland, Australia, April 19-24, 2015},
  title     = {Cluster adaptive training for deep neural network},
  year      = {2015},
  pages     = {4325--4329},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/TanQYZY15.bib},
  doi       = {10.1109/ICASSP.2015.7178787},
  timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2015.7178787},
}

@InProceedings{Bu2015,
  author    = {Suliang Bu and Yunxin Zhao and Yanmin Qian and Kai Yu},
  booktitle = {2015 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2015, South Brisbane, Queensland, Australia, April 19-24, 2015},
  title     = {A novel static parameter calculation method for model compensation},
  year      = {2015},
  pages     = {4510--4514},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/BuZQY15.bib},
  doi       = {10.1109/ICASSP.2015.7178824},
  timestamp = {Tue, 17 Oct 2017 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2015.7178824},
}

@InProceedings{He2015,
  author    = {Tianxing He and Xu Xiang and Yanmin Qian and Kai Yu},
  booktitle = {2015 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2015, South Brisbane, Queensland, Australia, April 19-24, 2015},
  title     = {Recurrent neural network language model with structured word embeddings for speech recognition},
  year      = {2015},
  pages     = {5396--5400},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/HeXQY15.bib},
  doi       = {10.1109/ICASSP.2015.7179002},
  timestamp = {Sun, 02 Oct 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2015.7179002},
}

@InProceedings{Qian2015a,
  author    = {Yanmin Qian and Tianxing He and Wei Deng and Kai Yu},
  booktitle = {2015 International Joint Conference on Neural Networks, {IJCNN} 2015, Killarney, Ireland, July 12-17, 2015},
  title     = {Automatic model redundancy reduction for fast back-propagation for deep neural networks in speech recognition},
  year      = {2015},
  pages     = {1--6},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/ijcnn/QianHDY15.bib},
  doi       = {10.1109/IJCNN.2015.7280335},
  timestamp = {Wed, 16 Oct 2019 14:14:55 +0200},
  url       = {https://doi.org/10.1109/IJCNN.2015.7280335},
}

@InProceedings{Chen2015,
  author    = {Nanxin Chen and Yanmin Qian and Kai Yu},
  booktitle = {{INTERSPEECH} 2015, 16th Annual Conference of the International Speech Communication Association, Dresden, Germany, September 6-10, 2015},
  title     = {Multi-task learning for text-dependent speaker verification},
  year      = {2015},
  pages     = {185--189},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ChenQY15.bib},
  doi       = {10.21437/INTERSPEECH.2015-81},
  timestamp = {Fri, 23 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2015-81},
}

@InProceedings{Chen2015a,
  author    = {Nanxin Chen and Yanmin Qian and Heinrich Dinkel and Bo Chen and Kai Yu},
  booktitle = {{INTERSPEECH} 2015, 16th Annual Conference of the International Speech Communication Association, Dresden, Germany, September 6-10, 2015},
  title     = {Robust deep feature for spoofing detection - the {SJTU} system for ASVspoof 2015 challenge},
  year      = {2015},
  pages     = {2097--2101},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ChenQDCY15.bib},
  doi       = {10.21437/INTERSPEECH.2015-474},
  timestamp = {Fri, 23 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2015-474},
}

@InProceedings{Bi2015,
  author    = {Mengxiao Bi and Yanmin Qian and Kai Yu},
  booktitle = {{INTERSPEECH} 2015, 16th Annual Conference of the International Speech Communication Association, Dresden, Germany, September 6-10, 2015},
  title     = {Very deep convolutional neural networks for {LVCSR}},
  year      = {2015},
  pages     = {3259--3263},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/BiQY15.bib},
  doi       = {10.21437/INTERSPEECH.2015-656},
  timestamp = {Fri, 23 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2015-656},
}

@InProceedings{Jin2015,
  author    = {Wengong Jin and Tianxing He and Yanmin Qian and Kai Yu},
  booktitle = {{INTERSPEECH} 2015, 16th Annual Conference of the International Speech Communication Association, Dresden, Germany, September 6-10, 2015},
  title     = {Paragraph vector based topic model for language model adaptation},
  year      = {2015},
  pages     = {3516--3520},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/JinHQY15.bib},
  doi       = {10.21437/INTERSPEECH.2015-697},
  timestamp = {Fri, 23 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2015-697},
}

@InProceedings{Deng2014,
  author    = {Wei Deng and Yanmin Qian and Yuchen Fan and Tianfan Fu and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2014, Florence, Italy, May 4-9, 2014},
  title     = {Stochastic data sweeping for fast {DNN} training},
  year      = {2014},
  pages     = {240--244},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/DengQFFY14.bib},
  doi       = {10.1109/ICASSP.2014.6853594},
  timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2014.6853594},
}

@InProceedings{He2014,
  author    = {Tianxing He and Yuchen Fan and Yanmin Qian and Tian Tan and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2014, Florence, Italy, May 4-9, 2014},
  title     = {Reshaping deep neural network for fast decoding by node-pruning},
  year      = {2014},
  pages     = {245--249},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/HeFQTY14.bib},
  doi       = {10.1109/ICASSP.2014.6853595},
  timestamp = {Sat, 31 Mar 2018 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2014.6853595},
}

@InProceedings{Bu2014,
  author    = {Suliang Bu and Yanmin Qian and Khe Chai Sim and Yongbin You and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2014, Florence, Italy, May 4-9, 2014},
  title     = {Second order vector taylor series based robust speech recognition},
  year      = {2014},
  pages     = {1769--1773},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/BuQSYY14.bib},
  doi       = {10.1109/ICASSP.2014.6853902},
  timestamp = {Tue, 17 Oct 2017 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2014.6853902},
}

@InProceedings{Liu2014,
  author    = {Yuan Liu and Tianfan Fu and Yuchen Fan and Yanmin Qian and Kai Yu},
  booktitle = {2014 International Joint Conference on Neural Networks, {IJCNN} 2014, Beijing, China, July 6-11, 2014},
  title     = {Speaker verification with deep features},
  year      = {2014},
  pages     = {747--753},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/ijcnn/LiuFFQY14.bib},
  doi       = {10.1109/IJCNN.2014.6889708},
  timestamp = {Wed, 16 Oct 2019 14:14:55 +0200},
  url       = {https://doi.org/10.1109/IJCNN.2014.6889708},
}

@InProceedings{Fu2014,
  author    = {Tianfan Fu and Yanmin Qian and Yuan Liu and Kai Yu},
  booktitle = {{INTERSPEECH} 2014, 15th Annual Conference of the International Speech Communication Association, Singapore, September 14-18, 2014},
  title     = {Tandem deep features for text-dependent speaker verification},
  year      = {2014},
  editor    = {Haizhou Li and Helen M. Meng and Bin Ma and Engsiong Chng and Lei Xie},
  pages     = {1327--1331},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/FuQLY14.bib},
  doi       = {10.21437/INTERSPEECH.2014-329},
  timestamp = {Fri, 23 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2014-329},
}

@InProceedings{Bu2014a,
  author    = {Suliang Bu and Yanmin Qian and Kai Yu},
  booktitle = {{INTERSPEECH} 2014, 15th Annual Conference of the International Speech Communication Association, Singapore, September 14-18, 2014},
  title     = {A novel dynamic parameters calculation approach for model compensation},
  year      = {2014},
  editor    = {Haizhou Li and Helen M. Meng and Bin Ma and Engsiong Chng and Lei Xie},
  pages     = {2744--2748},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/BuQY14.bib},
  doi       = {10.21437/INTERSPEECH.2014-159},
  timestamp = {Fri, 23 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2014-159},
}

@InProceedings{Niu2014,
  author    = {Jianwei Niu and Yanmin Qian and Kai Yu},
  booktitle = {The 9th International Symposium on Chinese Spoken Language Processing, Singapore, September 12-14, 2014},
  title     = {Acoustic emotion recognition using deep neural network},
  year      = {2014},
  editor    = {Minghui Dong and Jianhua Tao and Haizhou Li and Thomas Fang Zheng and Yanfeng Lu},
  pages     = {128--132},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/NiuQY14.bib},
  doi       = {10.1109/ISCSLP.2014.6936657},
  timestamp = {Mon, 18 Mar 2024 17:29:23 +0100},
  url       = {https://doi.org/10.1109/ISCSLP.2014.6936657},
}

@InProceedings{Qian2013,
  author    = {Yanmin Qian and Kai Yu and Jia Liu},
  booktitle = {2013 {IEEE} Workshop on Automatic Speech Recognition and Understanding, Olomouc, Czech Republic, December 8-12, 2013},
  title     = {Combination of data borrowing strategies for low-resource {LVCSR}},
  year      = {2013},
  pages     = {404--409},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/QianYL13.bib},
  doi       = {10.1109/ASRU.2013.6707764},
  timestamp = {Mon, 20 Apr 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ASRU.2013.6707764},
}

@InProceedings{Qian2013a,
  author    = {Yanmin Qian and Jia Liu},
  booktitle = {{INTERSPEECH} 2013, 14th Annual Conference of the International Speech Communication Association, Lyon, France, August 25-29, 2013},
  title     = {{MLP-HMM} two-stage unsupervised training for low-resource languages on conversational telephone speech recognition},
  year      = {2013},
  editor    = {Fr{\'{e}}d{\'{e}}ric Bimbot and Christophe Cerisara and C{\'{e}}cile Fougeron and Guillaume Gravier and Lori Lamel and Fran{\c{c}}ois Pellegrino and Pascal Perrier},
  pages     = {1816--1820},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/QianL13.bib},
  doi       = {10.21437/INTERSPEECH.2013-448},
  timestamp = {Fri, 23 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2013-448},
}

@Article{Gong2024,
  author    = {Xun Gong and Yu Wu and Jinyu Li and Shujie Liu and Rui Zhao and Xie Chen and Yanmin Qian},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Advanced Long-Content Speech Recognition With Factorized Neural Transducer},
  year      = {2024},
  pages     = {1803--1815},
  volume    = {32},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/GongWLLZCQ24.bib},
  doi       = {10.1109/TASLP.2024.3350893},
  timestamp = {Thu, 21 Mar 2024 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2024.3350893},
}

@Article{Chen2024,
  author        = {Wenxi Chen and Yuzhe Liang and Ziyang Ma and Zhisheng Zheng and Xie Chen},
  journal       = {CoRR},
  title         = {{EAT:} Self-Supervised Pre-Training with Efficient Audio Transformer},
  year          = {2024},
  volume        = {abs/2401.03497},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2401-03497.bib},
  doi           = {10.48550/ARXIV.2401.03497},
  eprint        = {2401.03497},
  timestamp     = {Fri, 26 Jan 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2401.03497},
}

@Article{Song2024,
  author        = {Yakun Song and Zhuo Chen and Xiaofei Wang and Ziyang Ma and Xie Chen},
  journal       = {CoRR},
  title         = {{ELLA-V:} Stable Neural Codec Language Modeling with Alignment-guided Sequence Reordering},
  year          = {2024},
  volume        = {abs/2401.07333},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2401-07333.bib},
  doi           = {10.48550/ARXIV.2401.07333},
  eprint        = {2401.07333},
  timestamp     = {Thu, 01 Feb 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2401.07333},
}

@Article{Zheng2024,
  author        = {Zhisheng Zheng and Puyuan Peng and Ziyang Ma and Xie Chen and Eunsol Choi and David Harwath},
  journal       = {CoRR},
  title         = {{BAT:} Learning to Reason about Spatial Sounds with Large Language Models},
  year          = {2024},
  volume        = {abs/2402.01591},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2402-01591.bib},
  doi           = {10.48550/ARXIV.2402.01591},
  eprint        = {2402.01591},
  timestamp     = {Wed, 14 Feb 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2402.01591},
}

@Article{Ma2024,
  author        = {Ziyang Ma and Guanrou Yang and Yifan Yang and Zhifu Gao and Jiaming Wang and Zhihao Du and Fan Yu and Qian Chen and Siqi Zheng and Shiliang Zhang and Xie Chen},
  journal       = {CoRR},
  title         = {An Embarrassingly Simple Approach for {LLM} with Strong {ASR} Capacity},
  year          = {2024},
  volume        = {abs/2402.08846},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2402-08846.bib},
  doi           = {10.48550/ARXIV.2402.08846},
  eprint        = {2402.08846},
  timestamp     = {Mon, 19 Feb 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2402.08846},
}

@Article{Du2023,
  author    = {Chenpeng Du and Yiwei Guo and Xie Chen and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Speaker Adaptive Text-to-Speech With Timbre-Normalized Vector-Quantized Feature},
  year      = {2023},
  pages     = {3446--3456},
  volume    = {31},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/DuGCY23.bib},
  doi       = {10.1109/TASLP.2023.3308374},
  timestamp = {Thu, 09 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2023.3308374},
}

@InProceedings{Yang2023a,
  author    = {Guanrou Yang and Ziyang Ma and Zhisheng Zheng and Yakun Song and Zhikang Niu and Xie Chen},
  booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} 2023, Taipei, Taiwan, December 16-20, 2023},
  title     = {Fast-Hubert: an Efficient Training Framework for Self-Supervised Speech Representation Learning},
  year      = {2023},
  pages     = {1--7},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/YangMZSNC23.bib},
  doi       = {10.1109/ASRU57964.2023.10389778},
  timestamp = {Tue, 13 Feb 2024 21:21:14 +0100},
  url       = {https://doi.org/10.1109/ASRU57964.2023.10389778},
}

@InProceedings{Chen2023b,
  author    = {Qi Chen and Ziyang Ma and Tao Liu and Xu Tan and Qu Lu and Kai Yu and Xie Chen},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Improving Few-Shot Learning for Talking Face System with {TTS} Data Augmentation},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ChenMLTLYC23.bib},
  doi       = {10.1109/ICASSP49357.2023.10094777},
  timestamp = {Sun, 05 Nov 2023 16:51:21 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10094777},
}

@InProceedings{Chen2023c,
  author    = {Xie Chen and Ziyang Ma and Changli Tang and Yujin Wang and Zhisheng Zheng},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Front-End Adapter: Adapting Front-End Input of Speech Based Self-Supervised Learning for Speech Recognition},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ChenMTWZ23.bib},
  doi       = {10.1109/ICASSP49357.2023.10095130},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10095130},
}

@InProceedings{Guo2023,
  author    = {Yiwei Guo and Chenpeng Du and Xie Chen and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Emodiff: Intensity Controllable Emotional Text-to-Speech with Soft-Label Guidance},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/GuoDCY23.bib},
  doi       = {10.1109/ICASSP49357.2023.10095621},
  timestamp = {Fri, 10 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10095621},
}

@InProceedings{Du2023a,
  author    = {Chenpeng Du and Qi Chen and Tianyu He and Xu Tan and Xie Chen and Kai Yu and Sheng Zhao and Jiang Bian},
  booktitle = {Proceedings of the 31st {ACM} International Conference on Multimedia, {MM} 2023, Ottawa, ON, Canada, 29 October 2023- 3 November 2023},
  title     = {DAE-Talker: High Fidelity Speech-Driven Talking Face Generation with Diffusion Autoencoder},
  year      = {2023},
  editor    = {Abdulmotaleb El{-}Saddik and Tao Mei and Rita Cucchiara and Marco Bertini and Diana Patricia Tobon Vallejo and Pradeep K. Atrey and M. Shamim Hossain},
  pages     = {4281--4289},
  publisher = {{ACM}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/mm/DuCHT00Z023.bib},
  doi       = {10.1145/3581783.3613753},
  timestamp = {Mon, 27 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1145/3581783.3613753},
}

@Article{Yang2023b,
  author        = {Yifan Yang and Xiaoyu Yang and Liyong Guo and Zengwei Yao and Wei Kang and Fangjun Kuang and Long Lin and Xie Chen and Daniel Povey},
  journal       = {CoRR},
  title         = {Blank-regularized {CTC} for Frame Skipping in Neural Transducer},
  year          = {2023},
  volume        = {abs/2305.11558},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2305-11558.bib},
  doi           = {10.48550/ARXIV.2305.11558},
  eprint        = {2305.11558},
  timestamp     = {Wed, 20 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2305.11558},
}

@Article{Du2023c,
  author        = {Chenpeng Du and Yiwei Guo and Feiyu Shen and Zhijun Liu and Zheng Liang and Xie Chen and Shuai Wang and Hui Zhang and Kai Yu},
  journal       = {CoRR},
  title         = {UniCATS: {A} Unified Context-Aware Text-to-Speech Framework with Contextual VQ-Diffusion and Vocoding},
  year          = {2023},
  volume        = {abs/2306.07547},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2306-07547.bib},
  doi           = {10.48550/ARXIV.2306.07547},
  eprint        = {2306.07547},
  timestamp     = {Sat, 17 Jun 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2306.07547},
}

@Article{Liang2023a,
  author        = {Zheng Liang and Zheshu Song and Ziyang Ma and Chenpeng Du and Kai Yu and Xie Chen},
  journal       = {CoRR},
  title         = {Improving Code-Switching and Named Entity Recognition in {ASR} with Speech Editing based Data Augmentation},
  year          = {2023},
  volume        = {abs/2306.08588},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2306-08588.bib},
  doi           = {10.48550/ARXIV.2306.08588},
  eprint        = {2306.08588},
  timestamp     = {Sun, 18 Jun 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2306.08588},
}

@Article{Ma2023,
  author        = {Ziyang Ma and Zhisheng Zheng and Guanrou Yang and Yu Wang and Chao Zhang and Xie Chen},
  journal       = {CoRR},
  title         = {Pushing the Limits of Unsupervised Unit Discovery for {SSL} Speech Representation},
  year          = {2023},
  volume        = {abs/2306.08920},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2306-08920.bib},
  doi           = {10.48550/ARXIV.2306.08920},
  eprint        = {2306.08920},
  timestamp     = {Sun, 18 Jun 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2306.08920},
}

@Article{Cui2023,
  author        = {Mingyu Cui and Jiawen Kang and Jiajun Deng and Xi Yin and Yutao Xie and Xie Chen and Xunying Liu},
  journal       = {CoRR},
  title         = {Towards Effective and Compact Contextual Representation for Conformer Transducer Speech Recognition Systems},
  year          = {2023},
  volume        = {abs/2306.13307},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2306-13307.bib},
  doi           = {10.48550/ARXIV.2306.13307},
  eprint        = {2306.13307},
  timestamp     = {Tue, 04 Jul 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2306.13307},
}

@Article{Liu2023a,
  author        = {Sen Liu and Yiwei Guo and Chenpeng Du and Xie Chen and Kai Yu},
  journal       = {CoRR},
  title         = {{DSE-TTS:} Dual Speaker Embedding for Cross-Lingual Text-to-Speech},
  year          = {2023},
  volume        = {abs/2306.14145},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2306-14145.bib},
  doi           = {10.48550/ARXIV.2306.14145},
  eprint        = {2306.14145},
  timestamp     = {Tue, 27 Jun 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2306.14145},
}

@Article{Zheng2023,
  author        = {Zhisheng Zheng and Ziyang Ma and Yu Wang and Xie Chen},
  journal       = {CoRR},
  title         = {Unsupervised Active Learning: Optimizing Labeling Cost-Effectiveness for Automatic Speech Recognition},
  year          = {2023},
  volume        = {abs/2308.14814},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2308-14814.bib},
  doi           = {10.48550/ARXIV.2308.14814},
  eprint        = {2308.14814},
  timestamp     = {Fri, 01 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2308.14814},
}

@Article{Guo2023a,
  author        = {Yiwei Guo and Chenpeng Du and Ziyang Ma and Xie Chen and Kai Yu},
  journal       = {CoRR},
  title         = {VoiceFlow: Efficient Text-to-Speech with Rectified Flow Matching},
  year          = {2023},
  volume        = {abs/2309.05027},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-05027.bib},
  doi           = {10.48550/ARXIV.2309.05027},
  eprint        = {2309.05027},
  timestamp     = {Wed, 20 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2309.05027},
}

@Article{Yang2023c,
  author        = {Yifan Yang and Feiyu Shen and Chenpeng Du and Ziyang Ma and Kai Yu and Daniel Povey and Xie Chen},
  journal       = {CoRR},
  title         = {Towards Universal Speech Discrete Tokens: {A} Case Study for {ASR} and {TTS}},
  year          = {2023},
  volume        = {abs/2309.07377},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-07377.bib},
  doi           = {10.48550/ARXIV.2309.07377},
  eprint        = {2309.07377},
  timestamp     = {Wed, 20 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2309.07377},
}

@Article{Wang2023d,
  author        = {Peng Wang and Yifan Yang and Zheng Liang and Tian Tan and Shiliang Zhang and Xie Chen},
  journal       = {CoRR},
  title         = {Incorporating Class-based Language Model for Named Entity Recognition in Factorized Neural Transducer},
  year          = {2023},
  volume        = {abs/2309.07648},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-07648.bib},
  doi           = {10.48550/ARXIV.2309.07648},
  eprint        = {2309.07648},
  timestamp     = {Thu, 18 Jan 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2309.07648},
}

@Article{Liu2023b,
  author        = {Junzhe Liu and Jianwei Yu and Xie Chen},
  journal       = {CoRR},
  title         = {Improved Factorized Neural Transducer Model For text-only Domain Adaptation},
  year          = {2023},
  volume        = {abs/2309.09524},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-09524.bib},
  doi           = {10.48550/ARXIV.2309.09524},
  eprint        = {2309.09524},
  timestamp     = {Fri, 22 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2309.09524},
}

@Article{Ma2023a,
  author        = {Ziyang Ma and Wen Wu and Zhisheng Zheng and Yiwei Guo and Qian Chen and Shiliang Zhang and Xie Chen},
  journal       = {CoRR},
  title         = {Leveraging Speech PTM, Text LLM, and Emotional {TTS} for Speech Emotion Recognition},
  year          = {2023},
  volume        = {abs/2309.10294},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-10294.bib},
  doi           = {10.48550/ARXIV.2309.10294},
  eprint        = {2309.10294},
  timestamp     = {Wed, 27 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2309.10294},
}

@Article{Shen2023,
  author        = {Feiyu Shen and Yiwei Guo and Chenpeng Du and Xie Chen and Kai Yu},
  journal       = {CoRR},
  title         = {Acoustic {BPE} for Speech Generation with Discrete Tokens},
  year          = {2023},
  volume        = {abs/2310.14580},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2310-14580.bib},
  doi           = {10.48550/ARXIV.2310.14580},
  eprint        = {2310.14580},
  timestamp     = {Mon, 30 Oct 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2310.14580},
}

@Article{Zhang2023d,
  author        = {Hanglei Zhang and Yiwei Guo and Sen Liu and Xie Chen and Kai Yu},
  journal       = {CoRR},
  title         = {Expressive {TTS} Driven by Natural Language Prompts Using Few Human Annotations},
  year          = {2023},
  volume        = {abs/2311.01260},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2311-01260.bib},
  doi           = {10.48550/ARXIV.2311.01260},
  eprint        = {2311.01260},
  timestamp     = {Wed, 08 Nov 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2311.01260},
}

@Article{Ma2023b,
  author        = {Ziyang Ma and Zhisheng Zheng and Jiaxin Ye and Jinchao Li and Zhifu Gao and Shiliang Zhang and Xie Chen},
  journal       = {CoRR},
  title         = {emotion2vec: Self-Supervised Pre-Training for Speech Emotion Representation},
  year          = {2023},
  volume        = {abs/2312.15185},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2312-15185.bib},
  doi           = {10.48550/ARXIV.2312.15185},
  eprint        = {2312.15185},
  timestamp     = {Thu, 18 Jan 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2312.15185},
}

@InProceedings{Chen2022h,
  author    = {Xie Chen and Zhong Meng and Sarangarajan Parthasarathy and Jinyu Li},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Factorized Neural Transducer for Efficient Language Model Adaptation},
  year      = {2022},
  pages     = {8132--8136},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ChenMPL22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746908},
  timestamp = {Tue, 21 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746908},
}

@InProceedings{Du2022,
  author    = {Chenpeng Du and Yiwei Guo and Xie Chen and Kai Yu},
  booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
  title     = {{VQTTS:} High-Fidelity Text-to-Speech Synthesis with Self-Supervised {VQ} Acoustic Feature},
  year      = {2022},
  editor    = {Hanseok Ko and John H. L. Hansen},
  pages     = {1596--1600},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/DuGC022.bib},
  doi       = {10.21437/INTERSPEECH.2022-489},
  timestamp = {Mon, 26 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2022-489},
}

@InProceedings{Meng2022,
  author    = {Zhong Meng and Yashesh Gaur and Naoyuki Kanda and Jinyu Li and Xie Chen and Yu Wu and Yifan Gong},
  booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
  title     = {Internal Language Model Adaptation with Text-Only Data for End-to-End Speech Recognition},
  year      = {2022},
  editor    = {Hanseok Ko and John H. L. Hansen},
  pages     = {2608--2612},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/MengGK0CW022.bib},
  doi       = {10.21437/INTERSPEECH.2022-13},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2022-13},
}

@Article{Wang2022d,
  author        = {Yujin Wang and Changli Tang and Ziyang Ma and Zhisheng Zheng and Xie Chen and Wei{-}Qiang Zhang},
  journal       = {CoRR},
  title         = {Exploring Effective Distillation of Self-Supervised Speech Models for Automatic Speech Recognition},
  year          = {2022},
  volume        = {abs/2210.15631},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2210-15631.bib},
  doi           = {10.48550/ARXIV.2210.15631},
  eprint        = {2210.15631},
  timestamp     = {Tue, 24 Jan 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2210.15631},
}

@Article{Ma2022,
  author        = {Ziyang Ma and Zhisheng Zheng and Changli Tang and Yujin Wang and Xie Chen},
  journal       = {CoRR},
  title         = {{MT4SSL:} Boosting Self-Supervised Speech Representation Learning by Integrating Multiple Targets},
  year          = {2022},
  volume        = {abs/2211.07321},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2211-07321.bib},
  doi           = {10.48550/ARXIV.2211.07321},
  eprint        = {2211.07321},
  timestamp     = {Tue, 24 Jan 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2211.07321},
}

@Article{Guo2022,
  author        = {Yiwei Guo and Chenpeng Du and Xie Chen and Kai Yu},
  journal       = {CoRR},
  title         = {EmoDiff: Intensity Controllable Emotional Text-to-Speech with Soft-Label Guidance},
  year          = {2022},
  volume        = {abs/2211.09496},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2211-09496.bib},
  doi           = {10.48550/ARXIV.2211.09496},
  eprint        = {2211.09496},
  timestamp     = {Tue, 24 Jan 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2211.09496},
}

@Article{Tang2022,
  author        = {Changli Tang and Yujin Wang and Xie Chen and Wei{-}Qiang Zhang},
  journal       = {CoRR},
  title         = {Exploring Effective Fusion Algorithms for Speech Based Self-Supervised Learning Models},
  year          = {2022},
  volume        = {abs/2212.10092},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2212-10092.bib},
  doi           = {10.48550/ARXIV.2212.10092},
  eprint        = {2212.10092},
  timestamp     = {Tue, 24 Jan 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2212.10092},
}

@Article{Xu2024,
  author    = {Xuenan Xu and Zeyu Xie and Mengyue Wu and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Beyond the Status Quo: {A} Contemporary Survey of Advances and Challenges in Audio Captioning},
  year      = {2024},
  pages     = {95--112},
  volume    = {32},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/XuXWY24.bib},
  doi       = {10.1109/TASLP.2023.3321968},
  timestamp = {Thu, 09 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2023.3321968},
}

@Article{Xu2024a,
  author        = {Xuenan Xu and Ziyang Ma and Mengyue Wu and Kai Yu},
  journal       = {CoRR},
  title         = {Towards Weakly Supervised Text-to-Audio Grounding},
  year          = {2024},
  volume        = {abs/2401.02584},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2401-02584.bib},
  doi           = {10.48550/ARXIV.2401.02584},
  eprint        = {2401.02584},
  timestamp     = {Tue, 23 Jan 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2401.02584},
}

@Article{Chen2023f,
  author    = {Zhi Chen and Yuncong Liu and Lu Chen and Su Zhu and Mengyue Wu and Kai Yu},
  journal   = {Trans. Assoc. Comput. Linguistics},
  title     = {{OPAL:} Ontology-Aware Pretrained Language Model for End-to-End Task-Oriented Dialogue},
  year      = {2023},
  pages     = {68--84},
  volume    = {11},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/tacl/ChenLCZWY23.bib},
  timestamp = {Mon, 27 Feb 2023 00:00:00 +0100},
  url       = {https://transacl.org/ojs/index.php/tacl/article/view/4067},
}

@InProceedings{Huang2023,
  author    = {Jieyi Huang and Chunhao Zhang and Mengyue Wu and Kenny Q. Zhu},
  booktitle = {Findings of the Association for Computational Linguistics: {ACL} 2023, Toronto, Canada, July 9-14, 2023},
  title     = {Transcribing Vocal Communications of Domestic Shiba lnu Dogs},
  year      = {2023},
  editor    = {Anna Rogers and Jordan L. Boyd{-}Graber and Naoaki Okazaki},
  pages     = {13819--13832},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/acl/HuangZWZ23.bib},
  doi       = {10.18653/V1/2023.FINDINGS-ACL.869},
  timestamp = {Mon, 11 Dec 2023 00:00:00 +0100},
  url       = {https://doi.org/10.18653/v1/2023.findings-acl.869},
}

@InProceedings{Chen2023g,
  author    = {Siyuan Chen and Zhiling Zhang and Mengyue Wu and Kenny Q. Zhu},
  booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2023, Singapore, December 6-10, 2023},
  title     = {Detection of Multiple Mental Disorders from Social Media with Two-Stream Psychiatric Experts},
  year      = {2023},
  editor    = {Houda Bouamor and Juan Pino and Kalika Bali},
  pages     = {9071--9084},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/ChenZWZ23.bib},
  timestamp = {Wed, 13 Dec 2023 17:20:20 +0100},
  url       = {https://aclanthology.org/2023.emnlp-main.562},
}

@InProceedings{Zhang2023e,
  author    = {Zhiling Zhang and Mengyue Wu and Kenny Q. Zhu},
  booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2023, Singapore, December 6-10, 2023},
  title     = {Semantic Space Grounded Weighted Decoding for Multi-Attribute Controllable Dialogue Generation},
  year      = {2023},
  editor    = {Houda Bouamor and Juan Pino and Kalika Bali},
  pages     = {13230--13243},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/ZhangWZ23.bib},
  timestamp = {Tue, 19 Dec 2023 00:00:00 +0100},
  url       = {https://aclanthology.org/2023.emnlp-main.817},
}

@InProceedings{Li2023e,
  author    = {Guangwei Li and Xuenan Xu and Lingfeng Dai and Mengyue Wu and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Diverse and Vivid Sound Generation from Text Descriptions},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiXDWY23.bib},
  doi       = {10.1109/ICASSP49357.2023.10096643},
  timestamp = {Sun, 05 Nov 2023 16:51:21 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10096643},
}

@InProceedings{Xu2023,
  author    = {Xuenan Xu and Mengyue Wu and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2023 - Workshops, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Investigating Pooling Strategies and Loss Functions for Weakly-Supervised Text-to-Audio Grounding via Contrastive Learning},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/XuWY23.bib},
  doi       = {10.1109/ICASSPW59220.2023.10192960},
  timestamp = {Mon, 07 Aug 2023 15:56:26 +0200},
  url       = {https://doi.org/10.1109/ICASSPW59220.2023.10192960},
}

@InProceedings{Xu2023a,
  author    = {Xuenan Xu and Zhiling Zhang and Zelin Zhou and Pingyue Zhang and Zeyu Xie and Mengyue Wu and Kenny Q. Zhu},
  booktitle = {Proceedings of the 31st {ACM} International Conference on Multimedia, {MM} 2023, Ottawa, ON, Canada, 29 October 2023- 3 November 2023},
  title     = {{BLAT:} Bootstrapping Language-Audio Pre-training based on AudioSet Tag-guided Synthetic Data},
  year      = {2023},
  editor    = {Abdulmotaleb El{-}Saddik and Tao Mei and Rita Cucchiara and Marco Bertini and Diana Patricia Tobon Vallejo and Pradeep K. Atrey and M. Shamim Hossain},
  pages     = {2756--2764},
  publisher = {{ACM}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/mm/XuZZZXWZ23.bib},
  doi       = {10.1145/3581783.3613820},
  timestamp = {Thu, 09 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1145/3581783.3613820},
}

@Article{Chen2023h,
  author        = {Siyuan Chen and Mengyue Wu and Kenny Q. Zhu and Kunyao Lan and Zhiling Zhang and Lyuchun Cui},
  journal       = {CoRR},
  title         = {LLM-empowered Chatbots for Psychiatrist and Patient Simulation: Application and Evaluation},
  year          = {2023},
  volume        = {abs/2305.13614},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2305-13614.bib},
  doi           = {10.48550/ARXIV.2305.13614},
  eprint        = {2305.13614},
  timestamp     = {Mon, 05 Jun 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2305.13614},
}

@Article{Xie2023,
  author        = {Zeyu Xie and Xuenan Xu and Mengyue Wu and Kai Yu},
  journal       = {CoRR},
  title         = {Enhance Temporal Relations in Audio Captioning with Sound Event Detection},
  year          = {2023},
  volume        = {abs/2306.01533},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2306-01533.bib},
  doi           = {10.48550/ARXIV.2306.01533},
  eprint        = {2306.01533},
  timestamp     = {Mon, 12 Jun 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2306.01533},
}

@Article{Zhang2023g,
  author        = {Hanxue Zhang and Zeyu Xie and Xuenan Xu and Mengyue Wu and Kai Yu},
  journal       = {CoRR},
  title         = {Improving Audio Caption Fluency with Automatic Error Correction},
  year          = {2023},
  volume        = {abs/2306.10090},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2306-10090.bib},
  doi           = {10.48550/ARXIV.2306.10090},
  eprint        = {2306.10090},
  timestamp     = {Fri, 23 Jun 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2306.10090},
}

@Article{Sun2023,
  author        = {Luoyi Sun and Xuenan Xu and Mengyue Wu and Weidi Xie},
  journal       = {CoRR},
  title         = {A Large-scale Dataset for Audio-Language Representation Learning},
  year          = {2023},
  volume        = {abs/2309.11500},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-11500.bib},
  doi           = {10.48550/ARXIV.2309.11500},
  eprint        = {2309.11500},
  timestamp     = {Mon, 25 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2309.11500},
}

@Article{Huang2023a,
  author        = {Jieyi Huang and Chunhao Zhang and Yufei Wang and Mengyue Wu and Kenny Q. Zhu},
  journal       = {CoRR},
  title         = {Does My Dog "Speak" Like Me? The Acoustic Correlation between Pet Dogs and Their Human Owners},
  year          = {2023},
  volume        = {abs/2309.13085},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-13085.bib},
  doi           = {10.48550/ARXIV.2309.13085},
  eprint        = {2309.13085},
  timestamp     = {Mon, 11 Dec 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2309.13085},
}

@Article{Wang2023e,
  author        = {Yufei Wang and Chunhao Zhang and Jieyi Huang and Mengyue Wu and Kenny Q. Zhu},
  journal       = {CoRR},
  title         = {Towards Lexical Analysis of Dog Vocalizations via Online Videos},
  year          = {2023},
  volume        = {abs/2309.13086},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2309-13086.bib},
  doi           = {10.48550/ARXIV.2309.13086},
  eprint        = {2309.13086},
  timestamp     = {Mon, 11 Dec 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2309.13086},
}

@Article{Jin2023,
  author        = {Haoan Jin and Siyuan Chen and Mengyue Wu and Kenny Q. Zhu},
  journal       = {CoRR},
  title         = {PsyEval: {A} Comprehensive Large Language Model Evaluation Benchmark for Mental Health},
  year          = {2023},
  volume        = {abs/2311.09189},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2311-09189.bib},
  doi           = {10.48550/ARXIV.2311.09189},
  eprint        = {2311.09189},
  timestamp     = {Tue, 21 Nov 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2311.09189},
}

@InProceedings{Yao2022,
  author    = {Binwei Yao and Chao Shi and Likai Zou and Lingfeng Dai and Mengyue Wu and Lu Chen and Zhen Wang and Kai Yu},
  booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2022, Abu Dhabi, United Arab Emirates, December 7-11, 2022},
  title     = {{D4:} a Chinese Dialogue Dataset for Depression-Diagnosis-Oriented Chat},
  year      = {2022},
  editor    = {Yoav Goldberg and Zornitsa Kozareva and Yue Zhang},
  pages     = {2438--2459},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/YaoSZDWCWY22.bib},
  doi       = {10.18653/V1/2022.EMNLP-MAIN.156},
  timestamp = {Thu, 10 Aug 2023 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2022.emnlp-main.156},
}

@InProceedings{Zhang2022c,
  author    = {Zhiling Zhang and Siyuan Chen and Mengyue Wu and Kenny Q. Zhu},
  booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2022, Abu Dhabi, United Arab Emirates, December 7-11, 2022},
  title     = {Symptom Identification for Interpretable Detection of Multiple Mental Disorders on Social Media},
  year      = {2022},
  editor    = {Yoav Goldberg and Zornitsa Kozareva and Yue Zhang},
  pages     = {9970--9985},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/ZhangCWZ22.bib},
  doi       = {10.18653/V1/2022.EMNLP-MAIN.677},
  timestamp = {Thu, 10 Aug 2023 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2022.emnlp-main.677},
}

@InProceedings{Li2022c,
  author    = {Guangwei Li and Xuenan Xu and Heinrich Dinkel and Mengyue Wu and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Category-Adapted Sound Event Enhancement with Weakly Labeled Data},
  year      = {2022},
  pages     = {851--855},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiXDWY22.bib},
  doi       = {10.1109/ICASSP43922.2022.9747722},
  timestamp = {Tue, 07 Jun 2022 17:34:47 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9747722},
}

@InProceedings{Xu2022,
  author    = {Xuenan Xu and Mengyue Wu and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Diversity-Controllable and Accurate Audio Captioning Based on Neural Condition},
  year      = {2022},
  pages     = {971--975},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/XuWY22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746834},
  timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746834},
}

@InProceedings{Zhou2022c,
  author    = {Zelin Zhou and Zhiling Zhang and Xuenan Xu and Zeyu Xie and Mengyue Wu and Kenny Q. Zhu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Can Audio Captions Be Evaluated With Image Caption Metrics?},
  year      = {2022},
  pages     = {981--985},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ZhouZXXWZ22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746427},
  timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746427},
}

@InProceedings{Li2022d,
  author    = {Guangwei Li and Xuenan Xu and Mengyue Wu and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Navigating Audio-Visual Event Detection Across Mismatched Modalities},
  year      = {2022},
  pages     = {1975--1979},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiXWY22.bib},
  doi       = {10.1109/ICASSP43922.2022.9747579},
  timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9747579},
}

@InProceedings{Lou2022,
  author    = {Siyu Lou and Xuenan Xu and Mengyue Wu and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Audio-Text Retrieval in Context},
  year      = {2022},
  pages     = {4793--4797},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LouXWY22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746786},
  timestamp = {Sun, 02 Oct 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746786},
}

@InProceedings{Wu2022a,
  author    = {Wen Wu and Mengyue Wu and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Climate and Weather: Inspecting Depression Detection via Emotion Recognition},
  year      = {2022},
  pages     = {6262--6266},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WuWY22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746634},
  timestamp = {Sun, 02 Oct 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746634},
}

@InProceedings{Zhang2022d,
  author    = {Zhiling Zhang and Siyuan Chen and Mengyue Wu and Kenny Q. Zhu},
  booktitle = {Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, {IJCAI} 2022, Vienna, Austria, 23-29 July 2022},
  title     = {Psychiatric Scale Guided Risky Post Screening for Early Detection of Depression},
  year      = {2022},
  editor    = {Luc De Raedt},
  pages     = {5220--5226},
  publisher = {ijcai.org},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/ijcai/ZhangCWZ22.bib},
  doi       = {10.24963/IJCAI.2022/725},
  timestamp = {Wed, 27 Jul 2022 16:43:00 +0200},
  url       = {https://doi.org/10.24963/ijcai.2022/725},
}

@Article{Xu2022a,
  author        = {Xuenan Xu and Mengyue Wu and Kai Yu},
  journal       = {CoRR},
  title         = {A Comprehensive Survey of Automated Audio Captioning},
  year          = {2022},
  volume        = {abs/2205.05357},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2205-05357.bib},
  doi           = {10.48550/ARXIV.2205.05357},
  eprint        = {2205.05357},
  timestamp     = {Tue, 17 May 2022 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2205.05357},
}

@Article{Chen2022i,
  author        = {Zhi Chen and Jijia Bao and Lu Chen and Yuncong Liu and Da Ma and Bei Chen and Mengyue Wu and Su Zhu and Jian{-}Guang Lou and Kai Yu},
  journal       = {CoRR},
  title         = {DialogZoo: Large-Scale Dialog-Oriented Task Learning},
  year          = {2022},
  volume        = {abs/2205.12662},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2205-12662.bib},
  doi           = {10.48550/ARXIV.2205.12662},
  eprint        = {2205.12662},
  timestamp     = {Mon, 30 May 2022 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2205.12662},
}

@Article{Dinkel2021,
  author    = {Heinrich Dinkel and Mengyue Wu and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Towards Duration Robust Weakly Supervised Sound Event Detection},
  year      = {2021},
  pages     = {887--900},
  volume    = {29},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/DinkelW021.bib},
  doi       = {10.1109/TASLP.2021.3054313},
  timestamp = {Tue, 23 Mar 2021 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2021.3054313},
}

@Article{Dinkel2021a,
  author    = {Heinrich Dinkel and Shuai Wang and Xuenan Xu and Mengyue Wu and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Voice Activity Detection in the Wild: {A} Data-Driven Approach Using Teacher-Student Training},
  year      = {2021},
  pages     = {1542--1555},
  volume    = {29},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/DinkelWXWY21.bib},
  doi       = {10.1109/TASLP.2021.3073596},
  timestamp = {Tue, 15 Jun 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2021.3073596},
}

@InProceedings{Zhang2021f,
  author    = {Die Zhang and Hao Zhang and Huilin Zhou and Xiaoyi Bao and Da Huo and Ruizhao Chen and Xu Cheng and Mengyue Wu and Quanshi Zhang},
  booktitle = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9, 2021},
  title     = {Building Interpretable Interaction Trees for Deep {NLP} Models},
  year      = {2021},
  pages     = {14328--14337},
  publisher = {{AAAI} Press},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/aaai/ZhangZZBHCCWZ21.bib},
  doi       = {10.1609/AAAI.V35I16.17685},
  timestamp = {Sat, 21 Oct 2023 01:00:00 +0200},
  url       = {https://doi.org/10.1609/aaai.v35i16.17685},
}

@InProceedings{Chen2021d,
  author    = {Zhi Chen and Lu Chen and Hanqi Li and Ruisheng Cao and Da Ma and Mengyue Wu and Kai Yu},
  booktitle = {Findings of the Association for Computational Linguistics: {ACL/IJCNLP} 2021, Online Event, August 1-6, 2021},
  title     = {Decoupled Dialogue Modeling and Semantic Parsing for Multi-Turn Text-to-SQL},
  year      = {2021},
  editor    = {Chengqing Zong and Fei Xia and Wenjie Li and Roberto Navigli},
  pages     = {3063--3074},
  publisher = {Association for Computational Linguistics},
  series    = {Findings of {ACL}},
  volume    = {{ACL/IJCNLP} 2021},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/acl/ChenCLCMWY21.bib},
  doi       = {10.18653/V1/2021.FINDINGS-ACL.270},
  timestamp = {Thu, 23 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2021.findings-acl.270},
}

@InProceedings{Zhang2021g,
  author    = {Zhiling Zhang and Zelin Zhou and Haifeng Tang and Guangwei Li and Mengyue Wu and Kenny Q. Zhu},
  booktitle = {{CIKM} '21: The 30th {ACM} International Conference on Information and Knowledge Management, Virtual Event, Queensland, Australia, November 1 - 5, 2021},
  title     = {Enriching Ontology with Temporal Commonsense for Low-Resource Audio Tagging},
  year      = {2021},
  editor    = {Gianluca Demartini and Guido Zuccon and J. Shane Culpepper and Zi Huang and Hanghang Tong},
  pages     = {3652--3656},
  publisher = {{ACM}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/cikm/ZhangZTLWZ21.bib},
  doi       = {10.1145/3459637.3482097},
  timestamp = {Tue, 16 Aug 2022 23:04:38 +0200},
  url       = {https://doi.org/10.1145/3459637.3482097},
}

@InProceedings{Xu2021,
  author    = {Xuenan Xu and Heinrich Dinkel and Mengyue Wu and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {Text-to-Audio Grounding: Building Correspondence Between Captions and Sound Events},
  year      = {2021},
  pages     = {606--610},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/XuDW021.bib},
  doi       = {10.1109/ICASSP39728.2021.9414834},
  timestamp = {Thu, 08 Jul 2021 17:12:48 +0200},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9414834},
}

@InProceedings{Xu2021a,
  author    = {Xuenan Xu and Heinrich Dinkel and Mengyue Wu and Zeyu Xie and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  title     = {Investigating Local and Global Information for Automated Audio Captioning with Transfer Learning},
  year      = {2021},
  pages     = {905--909},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/XuDWX021.bib},
  doi       = {10.1109/ICASSP39728.2021.9413982},
  timestamp = {Thu, 08 Jul 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP39728.2021.9413982},
}

@InProceedings{Xu2021b,
  author    = {Xuenan Xu and Heinrich Dinkel and Mengyue Wu and Kai Yu},
  booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech Communication Association, Brno, Czechia, 30 August - 3 September 2021},
  title     = {A Lightweight Framework for Online Voice Activity Detection in the Wild},
  year      = {2021},
  editor    = {Hynek Hermansky and Honza Cernock{\'{y}} and Luk{\'{a}}s Burget and Lori Lamel and Odette Scharenborg and Petr Motl{\'{\i}}cek},
  pages     = {371--375},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/XuDW021.bib},
  doi       = {10.21437/INTERSPEECH.2021-1977},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2021-1977},
}

@InProceedings{Xu2021c,
  author    = {Xuenan Xu and Heinrich Dinkel and Mengyue Wu and Kai Yu},
  booktitle = {12th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2021, Hong Kong, January 24-27, 2021},
  title     = {Audio Caption in a Car Setting with a Sentence-Level Loss},
  year      = {2021},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/XuDW021.bib},
  doi       = {10.1109/ISCSLP49672.2021.9362117},
  timestamp = {Mon, 22 Mar 2021 16:16:13 +0100},
  url       = {https://doi.org/10.1109/ISCSLP49672.2021.9362117},
}

@InProceedings{Zhang2021h,
  author    = {Pingyue Zhang and Mengyue Wu and Heinrich Dinkel and Kai Yu},
  booktitle = {{MM} '21: {ACM} Multimedia Conference, Virtual Event, China, October 20 - 24, 2021},
  title     = {{DEPA:} Self-Supervised Audio Embedding for Depression Detection},
  year      = {2021},
  editor    = {Heng Tao Shen and Yueting Zhuang and John R. Smith and Yang Yang and Pablo C{\'{e}}sar and Florian Metze and Balakrishnan Prabhakaran},
  pages     = {135--143},
  publisher = {{ACM}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/mm/ZhangWD021.bib},
  doi       = {10.1145/3474085.3479236},
  timestamp = {Fri, 02 Dec 2022 08:27:05 +0100},
  url       = {https://doi.org/10.1145/3474085.3479236},
}

@InProceedings{Xu2020,
  author    = {Xuenan Xu and Heinrich Dinkel and Mengyue Wu and Kai Yu},
  booktitle = {Proceedings of 5th the Workshop on Detection and Classification of Acoustic Scenes and Events 2020 {(DCASE} 2020), Tokyo, Japan (full virtual), November 2-4, 2020},
  title     = {A {CRNN-GRU} Based Reinforcement Learning Approach to Audio Captioning},
  year      = {2020},
  editor    = {Nobutaka Ono and Noboru Harada and Yohei Kawaguchi and Annamaria Mesaros and Keisuke Imoto and Yuma Koizumi and Tatsuya Komatsu},
  pages     = {225--229},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/dcase/XuDWY20.bib},
  timestamp = {Tue, 21 Dec 2021 00:00:00 +0100},
  url       = {http://dcase.community/documents/workshop2020/proceedings/DCASE2020Workshop\_Xu\_83.pdf},
}

@InProceedings{Qian2020,
  author    = {Rui Qian and Di Hu and Heinrich Dinkel and Mengyue Wu and Ning Xu and Weiyao Lin},
  booktitle = {Computer Vision - {ECCV} 2020 - 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part {XX}},
  title     = {Multiple Sound Sources Localization from Coarse to Fine},
  year      = {2020},
  editor    = {Andrea Vedaldi and Horst Bischof and Thomas Brox and Jan{-}Michael Frahm},
  pages     = {292--308},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {12365},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/eccv/QianHDWXL20.bib},
  doi       = {10.1007/978-3-030-58565-5\_18},
  timestamp = {Sat, 09 Apr 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1007/978-3-030-58565-5\_18},
}

@InProceedings{Chen2020d,
  author    = {Yefei Chen and Heinrich Dinkel and Mengyue Wu and Kai Yu},
  booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25-29 October 2020},
  title     = {Voice Activity Detection in the Wild via Weakly Supervised Sound Event Detection},
  year      = {2020},
  editor    = {Helen Meng and Bo Xu and Thomas Fang Zheng},
  pages     = {3665--3669},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ChenDW020.bib},
  doi       = {10.21437/INTERSPEECH.2020-995},
  timestamp = {Tue, 27 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2020-995},
}

@Article{Dinkel2020a,
  author        = {Heinrich Dinkel and Yefei Chen and Mengyue Wu and Kai Yu},
  journal       = {CoRR},
  title         = {{GPVAD:} Towards noise robust voice activity detection via weakly supervised sound event detection},
  year          = {2020},
  volume        = {abs/2003.12222},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2003-12222.bib},
  eprint        = {2003.12222},
  timestamp     = {Wed, 01 Apr 2020 01:00:00 +0200},
  url           = {https://arxiv.org/abs/2003.12222},
}

@Article{Zhang2020d,
  author        = {Die Zhang and Huilin Zhou and Xiaoyi Bao and Da Huo and Ruizhao Chen and Xu Cheng and Hao Zhang and Mengyue Wu and Quanshi Zhang},
  journal       = {CoRR},
  title         = {Interpreting Hierarchical Linguistic Interactions in DNNs},
  year          = {2020},
  volume        = {abs/2007.04298},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2007-04298.bib},
  eprint        = {2007.04298},
  timestamp     = {Fri, 07 Jan 2022 00:00:00 +0100},
  url           = {https://arxiv.org/abs/2007.04298},
}

@InProceedings{Wu2019a,
  author    = {Mengyue Wu and Heinrich Dinkel and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
  title     = {Audio Caption: Listen and Tell},
  year      = {2019},
  pages     = {830--834},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/WuD019.bib},
  doi       = {10.1109/ICASSP.2019.8682377},
  timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2019.8682377},
}

@Article{Dinkel2019,
  author        = {Heinrich Dinkel and Mengyue Wu and Kai Yu},
  journal       = {CoRR},
  title         = {Text-based Depression Detection: What Triggers An Alert},
  year          = {2019},
  volume        = {abs/1904.05154},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-1904-05154.bib},
  eprint        = {1904.05154},
  timestamp     = {Thu, 25 Apr 2019 01:00:00 +0200},
  url           = {http://arxiv.org/abs/1904.05154},
}

@Article{Xu2019,
  author        = {Xuenan Xu and Heinrich Dinkel and Mengyue Wu and Kai Yu},
  journal       = {CoRR},
  title         = {What does a Car-ssette tape tell?},
  year          = {2019},
  volume        = {abs/1905.13448},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-1905-13448.bib},
  eprint        = {1905.13448},
  timestamp     = {Mon, 03 Jun 2019 01:00:00 +0200},
  url           = {http://arxiv.org/abs/1905.13448},
}

@Article{Du2024,
  author        = {Chenpeng Du and Yiwei Guo and Hankun Wang and Yifan Yang and Zhikang Niu and Shuai Wang and Hui Zhang and Xie Chen and Kai Yu},
  journal       = {CoRR},
  title         = {{VALL-T:} Decoder-Only Generative Transducer for Robust and Decoding-Controllable Text-to-Speech},
  year          = {2024},
  volume        = {abs/2401.14321},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2401-14321.bib},
  doi           = {10.48550/ARXIV.2401.14321},
  eprint        = {2401.14321},
  timestamp     = {Wed, 07 Feb 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2401.14321},
}

@Article{Zhao2024,
  author        = {Zihan Zhao and Da Ma and Lu Chen and Liangtai Sun and Zihao Li and Hongshen Xu and Zichen Zhu and Su Zhu and Shuai Fan and Guodong Shen and Xin Chen and Kai Yu},
  journal       = {CoRR},
  title         = {ChemDFM: Dialogue Foundation Model for Chemistry},
  year          = {2024},
  volume        = {abs/2401.14818},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2401-14818.bib},
  doi           = {10.48550/ARXIV.2401.14818},
  eprint        = {2401.14818},
  timestamp     = {Tue, 06 Feb 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2401.14818},
}

@Article{Zhu2024,
  author        = {Zichen Zhu and Yang Xu and Lu Chen and Jingkai Yang and Yichuan Ma and Yiming Sun and Hailin Wen and Jiaqi Liu and Jinyu Cai and Yingzi Ma and Situo Zhang and Zihan Zhao and Liangtai Sun and Kai Yu},
  journal       = {CoRR},
  title         = {{MULTI:} Multimodal Understanding Leaderboard with Text and Images},
  year          = {2024},
  volume        = {abs/2402.03173},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2402-03173.bib},
  doi           = {10.48550/ARXIV.2402.03173},
  eprint        = {2402.03173},
  timestamp     = {Mon, 26 Feb 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2402.03173},
}

@Article{Cao2023,
  author    = {Ruisheng Cao and Lu Chen and Jieyu Li and Hanchong Zhang and Hongshen Xu and Wangyou Zhang and Kai Yu},
  journal   = {{IEEE} Trans. Pattern Anal. Mach. Intell.},
  title     = {A Heterogeneous Graph to Abstract Syntax Tree Framework for Text-to-SQL},
  year      = {2023},
  number    = {11},
  pages     = {13796--13813},
  volume    = {45},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/pami/CaoCLZXZY23.bib},
  doi       = {10.1109/TPAMI.2023.3298895},
  timestamp = {Sat, 14 Oct 2023 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TPAMI.2023.3298895},
}

@Article{Jiang2023,
  author    = {Wenbin Jiang and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Speech Enhancement With Integration of Neural Homomorphic Synthesis and Spectral Masking},
  year      = {2023},
  pages     = {1758--1770},
  volume    = {31},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/JiangY23.bib},
  doi       = {10.1109/TASLP.2023.3271151},
  timestamp = {Sun, 22 Oct 2023 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2023.3271151},
}

@InProceedings{Jiang2023a,
  author    = {Sheng Jiang and Su Zhu and Ruisheng Cao and Qingliang Miao and Kai Yu},
  booktitle = {Proceedings of the The 61st Annual Meeting of the Association for Computational Linguistics: Industry Track, {ACL} 2023, Toronto, Canada, July 9-14, 2023},
  title     = {{SPM:} {A} Split-Parsing Method for Joint Multi-Intent Detection and Slot Filling},
  year      = {2023},
  editor    = {Sunayana Sitaram and Beata Beigman Klebanov and Jason D. Williams},
  pages     = {668--675},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/acl/JiangZCM023.bib},
  doi       = {10.18653/V1/2023.ACL-INDUSTRY.64},
  timestamp = {Thu, 10 Aug 2023 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2023.acl-industry.64},
}

@InProceedings{Li2023g,
  author    = {Jieyu Li and Lu Chen and Ruisheng Cao and Su Zhu and Hongshen Xu and Zhi Chen and Hanchong Zhang and Kai Yu},
  booktitle = {Findings of the Association for Computational Linguistics: {ACL} 2023, Toronto, Canada, July 9-14, 2023},
  title     = {Exploring Schema Generalizability of Text-to-SQL},
  year      = {2023},
  editor    = {Anna Rogers and Jordan L. Boyd{-}Graber and Naoaki Okazaki},
  pages     = {1344--1360},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/acl/LiCCZX0Z023.bib},
  doi       = {10.18653/V1/2023.FINDINGS-ACL.87},
  timestamp = {Mon, 05 Feb 2024 00:00:00 +0100},
  url       = {https://doi.org/10.18653/v1/2023.findings-acl.87},
}

@InProceedings{Ai2023,
  author    = {Yiming Ai and Zhiwei He and Kai Yu and Rui Wang},
  booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), {ACL} 2023, Toronto, Canada, July 9-14, 2023},
  title     = {TeCS: {A} Dataset and Benchmark for Tense Consistency of Machine Translation},
  year      = {2023},
  editor    = {Anna Rogers and Jordan L. Boyd{-}Graber and Naoaki Okazaki},
  pages     = {1930--1941},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/acl/Ai00W23.bib},
  doi       = {10.18653/V1/2023.ACL-SHORT.164},
  timestamp = {Thu, 07 Dec 2023 00:00:00 +0100},
  url       = {https://doi.org/10.18653/v1/2023.acl-short.164},
}

@InProceedings{Zhang2023h,
  author    = {Hanchong Zhang and Jieyu Li and Lu Chen and Ruisheng Cao and Yunyan Zhang and Yu Huang and Yefeng Zheng and Kai Yu},
  booktitle = {Findings of the Association for Computational Linguistics: {ACL} 2023, Toronto, Canada, July 9-14, 2023},
  title     = {{CSS:} {A} Large-scale Cross-schema Chinese Text-to-SQL Medical Dataset},
  year      = {2023},
  editor    = {Anna Rogers and Jordan L. Boyd{-}Graber and Naoaki Okazaki},
  pages     = {6970--6983},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/acl/ZhangLCCZH0023.bib},
  doi       = {10.18653/V1/2023.FINDINGS-ACL.435},
  timestamp = {Mon, 05 Feb 2024 00:00:00 +0100},
  url       = {https://doi.org/10.18653/v1/2023.findings-acl.435},
}

@InProceedings{Zhang2023i,
  author    = {Hanchong Zhang and Ruisheng Cao and Lu Chen and Hongshen Xu and Kai Yu},
  booktitle = {Findings of the Association for Computational Linguistics: {EMNLP} 2023, Singapore, December 6-10, 2023},
  title     = {{ACT-SQL:} In-Context Learning for Text-to-SQL with Automatically-Generated Chain-of-Thought},
  year      = {2023},
  editor    = {Houda Bouamor and Juan Pino and Kalika Bali},
  pages     = {3501--3532},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/ZhangCCX023.bib},
  timestamp = {Wed, 13 Dec 2023 17:20:20 +0100},
  url       = {https://aclanthology.org/2023.findings-emnlp.227},
}

@InProceedings{Du2023d,
  author    = {Chenpeng Du and Yiwei Guo and Feiyu Shen and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {Multi-Speaker Multi-Lingual {VQTTS} System for {LIMMITS} 2023 Challenge},
  year      = {2023},
  pages     = {1--2},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/DuGSY23.bib},
  doi       = {10.1109/ICASSP49357.2023.10096415},
  timestamp = {Fri, 10 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10096415},
}

@InProceedings{Liu2023c,
  author    = {Zhijun Liu and Yiwei Guo and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  title     = {DiffVoice: Text-to-Speech with Latent Diffusion},
  year      = {2023},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiuGY23.bib},
  doi       = {10.1109/ICASSP49357.2023.10095100},
  timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP49357.2023.10095100},
}

@InProceedings{Zhang2023j,
  author    = {Danyang Zhang and Lu Chen and Situo Zhang and Hongshen Xu and Zihan Zhao and Kai Yu},
  booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
  title     = {Large Language Models Are Semi-Parametric Reinforcement Learning Agents},
  year      = {2023},
  editor    = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/nips/ZhangCZXZ023.bib},
  timestamp = {Fri, 01 Mar 2024 00:00:00 +0100},
  url       = {http://papers.nips.cc/paper\_files/paper/2023/hash/f6b22ac37beb5da61efd4882082c9ecd-Abstract-Conference.html},
}

@Article{Zhang2023k,
  author        = {Danyang Zhang and Lu Chen and Kai Yu},
  journal       = {CoRR},
  title         = {Mobile-Env: {A} Universal Platform for Training and Evaluation of Mobile Interaction},
  year          = {2023},
  volume        = {abs/2305.08144},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2305-08144.bib},
  doi           = {10.48550/ARXIV.2305.08144},
  eprint        = {2305.08144},
  timestamp     = {Wed, 17 May 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2305.08144},
}

@Article{Sun2023a,
  author        = {Liangtai Sun and Yang Han and Zihan Zhao and Da Ma and Zhennan Shen and Baocai Chen and Lu Chen and Kai Yu},
  journal       = {CoRR},
  title         = {SciEval: {A} Multi-Level Large Language Model Evaluation Benchmark for Scientific Research},
  year          = {2023},
  volume        = {abs/2308.13149},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2308-13149.bib},
  doi           = {10.48550/ARXIV.2308.13149},
  eprint        = {2308.13149},
  timestamp     = {Fri, 01 Sep 2023 01:00:00 +0200},
  url           = {https://doi.org/10.48550/arXiv.2308.13149},
}

@Article{Cao2023a,
  author        = {Ruisheng Cao and Hanchong Zhang and Hongshen Xu and Jieyu Li and Da Ma and Lu Chen and Kai Yu},
  journal       = {CoRR},
  title         = {ASTormer: An {AST} Structure-aware Transformer Decoder for Text-to-SQL},
  year          = {2023},
  volume        = {abs/2310.18662},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2310-18662.bib},
  doi           = {10.48550/ARXIV.2310.18662},
  eprint        = {2310.18662},
  timestamp     = {Mon, 05 Feb 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2310.18662},
}

@Article{Liu2023e,
  author        = {Tao Liu and Chenpeng Du and Shuai Fan and Feilong Chen and Kai Yu},
  journal       = {CoRR},
  title         = {DiffDub: Person-generic Visual Dubbing Using Inpainting Renderer with Diffusion Auto-encoder},
  year          = {2023},
  volume        = {abs/2311.01811},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2311-01811.bib},
  doi           = {10.48550/ARXIV.2311.01811},
  eprint        = {2311.01811},
  timestamp     = {Tue, 07 Nov 2023 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2311.01811},
}

@Article{Li2023h,
  author        = {Junjie Li and Yiwei Guo and Xie Chen and Kai Yu},
  journal       = {CoRR},
  title         = {{SEF-VC:} Speaker Embedding Free Zero-Shot Voice Conversion with Cross Attention},
  year          = {2023},
  volume        = {abs/2312.08676},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2312-08676.bib},
  doi           = {10.48550/ARXIV.2312.08676},
  eprint        = {2312.08676},
  timestamp     = {Mon, 08 Jan 2024 00:00:00 +0100},
  url           = {https://doi.org/10.48550/arXiv.2312.08676},
}

@Article{Chen2022k,
  author    = {Bo Chen and Zhihang Xu and Kai Yu},
  journal   = {Speech Commun.},
  title     = {Data augmentation based non-parallel voice conversion with frame-level speaker disentangler},
  year      = {2022},
  pages     = {14--22},
  volume    = {136},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/speech/ChenXY22.bib},
  doi       = {10.1016/J.SPECOM.2021.10.001},
  timestamp = {Fri, 21 Jan 2022 00:00:00 +0100},
  url       = {https://doi.org/10.1016/j.specom.2021.10.001},
}

@Article{Du2022b,
  author    = {Chenpeng Du and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Phone-Level Prosody Modelling With GMM-Based {MDN} for Diverse and Controllable Speech Synthesis},
  year      = {2022},
  pages     = {190--201},
  volume    = {30},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/DuY22.bib},
  doi       = {10.1109/TASLP.2021.3133205},
  timestamp = {Tue, 08 Feb 2022 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2021.3133205},
}

@Article{Chen2022l,
  author    = {Bo Chen and Chenpeng Du and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Neural Fusion for Voice Cloning},
  year      = {2022},
  pages     = {1993--2001},
  volume    = {30},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/ChenDY22.bib},
  doi       = {10.1109/TASLP.2022.3171971},
  timestamp = {Tue, 28 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2022.3171971},
}

@InProceedings{Sun2022,
  author    = {Liangtai Sun and Xingyu Chen and Lu Chen and Tianle Dai and Zichen Zhu and Kai Yu},
  booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2022, Abu Dhabi, United Arab Emirates, December 7-11, 2022},
  title     = {{META-GUI:} Towards Multi-modal Conversational Agents on Mobile {GUI}},
  year      = {2022},
  editor    = {Yoav Goldberg and Zornitsa Kozareva and Yue Zhang},
  pages     = {6699--6712},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/SunCCDZY22.bib},
  doi       = {10.18653/V1/2022.EMNLP-MAIN.449},
  timestamp = {Thu, 10 Aug 2023 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2022.emnlp-main.449},
}

@InProceedings{Chen2022m,
  author    = {Zhi Chen and Bei Chen and Lu Chen and Kai Yu and Jian{-}Guang Lou},
  booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2022, Abu Dhabi, United Arab Emirates, December 7-11, 2022},
  title     = {AdapterShare: Task Correlation Modeling with Adapter Differentiation},
  year      = {2022},
  editor    = {Yoav Goldberg and Zornitsa Kozareva and Yue Zhang},
  pages     = {10645--10651},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/ChenC00L22.bib},
  doi       = {10.18653/V1/2022.EMNLP-MAIN.728},
  timestamp = {Thu, 10 Aug 2023 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2022.emnlp-main.728},
}

@InProceedings{Dai2022,
  author    = {Lingfeng Dai and Lu Chen and Zhikai Zhou and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {LatticeBART: Lattice-to-Lattice Pre-Training for Speech Recognition},
  year      = {2022},
  pages     = {6112--6116},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/DaiCZY22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746796},
  timestamp = {Sat, 29 Apr 2023 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746796},
}

@InProceedings{Xi2022,
  author    = {Yu Xi and Tian Tan and Wangyou Zhang and Baochen Yang and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Text Adaptive Detection for Customizable Keyword Spotting},
  year      = {2022},
  pages     = {6652--6656},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/XiTZYY22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746647},
  timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746647},
}

@InProceedings{Guo2022a,
  author    = {Yiwei Guo and Chenpeng Du and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  title     = {Unsupervised Word-Level Prosody Tagging for Controllable Speech Synthesis},
  year      = {2022},
  pages     = {7597--7601},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/GuoDY22.bib},
  doi       = {10.1109/ICASSP43922.2022.9746323},
  timestamp = {Thu, 23 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP43922.2022.9746323},
}

@InProceedings{Zhu2022,
  author    = {Qinpei Zhu and Renshou Wu and Guangfeng Liu and Xinyu Zhu and Xingyu Chen and Yang Zhou and Qingliang Miao and Rui Wang and Kai Yu},
  booktitle = {Proceedings of the 19th International Conference on Spoken Language Translation, IWSLT@ACL 2022, Dublin, Ireland (in-person and online), May 26-27, 2022},
  title     = {The {AISP-SJTU} Simultaneous Translation System for {IWSLT} 2022},
  year      = {2022},
  editor    = {Elizabeth Salesky and Marcello Federico and Marta Ruiz Costa{-}juss{\`{a}}},
  pages     = {208--215},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iwslt/ZhuWLZCZMWY22.bib},
  doi       = {10.18653/V1/2022.IWSLT-1.16},
  timestamp = {Wed, 12 Oct 2022 08:58:55 +0200},
  url       = {https://doi.org/10.18653/v1/2022.iwslt-1.16},
}

@InProceedings{Zhao2022,
  author    = {Zihan Zhao and Lu Chen and Ruisheng Cao and Hongshen Xu and Xingyu Chen and Kai Yu},
  booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, {NAACL} 2022, Seattle, WA, United States, July 10-15, 2022},
  title     = {{TIE:} Topological Information Enhanced Structural Reading Comprehension on Web Pages},
  year      = {2022},
  editor    = {Marine Carpuat and Marie{-}Catherine de Marneffe and Iv{\'{a}}n Vladimir Meza Ru{\'{\i}}z},
  pages     = {1808--1821},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/naacl/ZhaoCCXC022.bib},
  doi       = {10.18653/V1/2022.NAACL-MAIN.132},
  timestamp = {Sat, 30 Sep 2023 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2022.naacl-main.132},
}

@InProceedings{Chen2022n,
  author    = {Zhi Chen and Lu Chen and Bei Chen and Libo Qin and Yuncong Liu and Su Zhu and Jian{-}Guang Lou and Kai Yu},
  booktitle = {Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue, {SIGDIAL} 2022, Edinburgh, UK, 07-09 September 2022},
  title     = {UniDU: Towards {A} Unified Generative Dialogue Understanding Framework},
  year      = {2022},
  editor    = {Oliver Lemon and Dilek Hakkani{-}T{\"{u}}r and Junyi Jessy Li and Arash Ashrafzadeh and Daniel Hern{\'{a}}ndez Garc{\'{\i}}a and Malihe Alikhani and David Vandyke and Ondrej Dusek},
  pages     = {442--455},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/sigdial/ChenCCQLZL022.bib},
  timestamp = {Thu, 19 Jan 2023 00:00:00 +0100},
  url       = {https://aclanthology.org/2022.sigdial-1.43},
}

@InProceedings{Liu2022f,
  author    = {Guangfeng Liu and Qinpei Zhu and Xingyu Chen and Renjie Feng and Jianxin Ren and Renshou Wu and Qingliang Miao and Rui Wang and Kai Yu},
  booktitle = {Proceedings of the Seventh Conference on Machine Translation, {WMT} 2022, Abu Dhabi, United Arab Emirates (Hybrid), December 7-8, 2022},
  title     = {The {AISP-SJTU} Translation System for {WMT} 2022},
  year      = {2022},
  editor    = {Philipp Koehn and Lo{\"{\i}}c Barrault and Ondrej Bojar and Fethi Bougares and Rajen Chatterjee and Marta R. Costa{-}juss{\`{a}} and Christian Federmann and Mark Fishel and Alexander Fraser and Markus Freitag and Yvette Graham and Roman Grundkiewicz and Paco Guzman and Barry Haddow and Matthias Huck and Antonio Jimeno{-}Yepes and Tom Kocmi and Andr{\'{e}} Martins and Makoto Morishita and Christof Monz and Masaaki Nagata and Toshiaki Nakazawa and Matteo Negri and Aur{\'{e}}lie N{\'{e}}v{\'{e}}ol and Mariana Neves and Martin Popel and Marco Turchi and Marcos Zampieri},
  pages     = {310--317},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/wmt/LiuZCFRWMWY22.bib},
  timestamp = {Fri, 17 Mar 2023 17:00:14 +0100},
  url       = {https://aclanthology.org/2022.wmt-1.24},
}

@InProceedings{Lyu2021,
  author    = {Boer Lyu and Lu Chen and Su Zhu and Kai Yu},
  booktitle = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9, 2021},
  title     = {{LET:} Linguistic Knowledge Enhanced Graph Transformer for Chinese Short Text Matching},
  year      = {2021},
  pages     = {13498--13506},
  publisher = {{AAAI} Press},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/aaai/LyuCZ021.bib},
  doi       = {10.1609/AAAI.V35I15.17592},
  timestamp = {Mon, 04 Sep 2023 12:29:24 +0200},
  url       = {https://doi.org/10.1609/aaai.v35i15.17592},
}

@InProceedings{Cao2021,
  author    = {Ruisheng Cao and Lu Chen and Zhi Chen and Yanbin Zhao and Su Zhu and Kai Yu},
  booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, {ACL/IJCNLP} 2021, (Volume 1: Long Papers), Virtual Event, August 1-6, 2021},
  title     = {{LGESQL:} Line Graph Enhanced Text-to-SQL Model with Mixed Local and Non-Local Relations},
  year      = {2021},
  editor    = {Chengqing Zong and Fei Xia and Wenjie Li and Roberto Navigli},
  pages     = {2541--2555},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/acl/CaoC0ZZ020.bib},
  doi       = {10.18653/V1/2021.ACL-LONG.198},
  timestamp = {Thu, 23 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2021.acl-long.198},
}

@InProceedings{Chen2021f,
  author    = {Xingyu Chen and Zihan Zhao and Lu Chen and Jiabao Ji and Danyang Zhang and Ao Luo and Yuxuan Xiong and Kai Yu},
  booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2021, Virtual Event / Punta Cana, Dominican Republic, 7-11 November, 2021},
  title     = {WebSRC: {A} Dataset for Web-Based Structural Reading Comprehension},
  year      = {2021},
  editor    = {Marie{-}Francine Moens and Xuanjing Huang and Lucia Specia and Scott Wen{-}tau Yih},
  pages     = {4173--4185},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/ChenZCJZLX021.bib},
  doi       = {10.18653/V1/2021.EMNLP-MAIN.343},
  timestamp = {Fri, 16 Feb 2024 08:27:36 +0100},
  url       = {https://doi.org/10.18653/v1/2021.emnlp-main.343},
}

@InProceedings{Lyu2021a,
  author    = {Boer Lyu and Lu Chen and Kai Yu},
  booktitle = {Findings of the Association for Computational Linguistics: {EMNLP} 2021, Virtual Event / Punta Cana, Dominican Republic, 16-20 November, 2021},
  title     = {Glyph Enhanced Chinese Character Pre-Training for Lexical Sememe Prediction},
  year      = {2021},
  editor    = {Marie{-}Francine Moens and Xuanjing Huang and Lucia Specia and Scott Wen{-}tau Yih},
  pages     = {4549--4555},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/LyuC021.bib},
  doi       = {10.18653/V1/2021.FINDINGS-EMNLP.386},
  timestamp = {Fri, 16 Feb 2024 08:27:36 +0100},
  url       = {https://doi.org/10.18653/v1/2021.findings-emnlp.386},
}

@InProceedings{Dai2021,
  author    = {Lingfeng Dai and Qi Liu and Kai Yu},
  booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech Communication Association, Brno, Czechia, 30 August - 3 September 2021},
  title     = {Class-Based Neural Network Language Model for Second-Pass Rescoring in {ASR}},
  year      = {2021},
  editor    = {Hynek Hermansky and Honza Cernock{\'{y}} and Luk{\'{a}}s Burget and Lori Lamel and Odette Scharenborg and Petr Motl{\'{\i}}cek},
  pages     = {2022--2026},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/DaiLY21.bib},
  doi       = {10.21437/INTERSPEECH.2021-1080},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2021-1080},
}

@InProceedings{Du2021b,
  author    = {Chenpeng Du and Kai Yu},
  booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech Communication Association, Brno, Czechia, 30 August - 3 September 2021},
  title     = {Rich Prosody Diversity Modelling with Phone-Level Mixture Density Network},
  year      = {2021},
  editor    = {Hynek Hermansky and Honza Cernock{\'{y}} and Luk{\'{a}}s Burget and Lori Lamel and Odette Scharenborg and Petr Motl{\'{\i}}cek},
  pages     = {3136--3140},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/Du021.bib},
  doi       = {10.21437/INTERSPEECH.2021-802},
  timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2021-802},
}

@InProceedings{Chen2021g,
  author    = {Zhi Chen and Lu Chen and Yanbin Zhao and Ruisheng Cao and Zihan Xu and Su Zhu and Kai Yu},
  booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, {NAACL-HLT} 2021, Online, June 6-11, 2021},
  title     = {ShadowGNN: Graph Projection Neural Network for Text-to-SQL Parser},
  year      = {2021},
  editor    = {Kristina Toutanova and Anna Rumshisky and Luke Zettlemoyer and Dilek Hakkani{-}T{\"{u}}r and Iz Beltagy and Steven Bethard and Ryan Cotterell and Tanmoy Chakraborty and Yichao Zhou},
  pages     = {5567--5577},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/naacl/ChenCZCXZY21.bib},
  doi       = {10.18653/V1/2021.NAACL-MAIN.441},
  timestamp = {Thu, 23 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2021.naacl-main.441},
}

@InProceedings{Zhu2021,
  author    = {Su Zhu and Lu Chen and Ruisheng Cao and Zhi Chen and Qingliang Miao and Kai Yu},
  booktitle = {Natural Language Processing and Chinese Computing - 10th {CCF} International Conference, {NLPCC} 2021, Qingdao, China, October 13-17, 2021, Proceedings, Part {I}},
  title     = {Few-Shot {NLU} with Vector Projection Distance and Abstract Triangular {CRF}},
  year      = {2021},
  editor    = {Lu Wang and Yansong Feng and Yu Hong and Ruifang He},
  pages     = {505--516},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {13028},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/nlpcc/ZhuCCCMY21.bib},
  doi       = {10.1007/978-3-030-88480-2\_40},
  timestamp = {Wed, 28 Dec 2022 17:53:03 +0100},
  url       = {https://doi.org/10.1007/978-3-030-88480-2\_40},
}

@InProceedings{Zhao2021,
  author    = {Yao Zhao and Lu Chen and Kai Yu},
  booktitle = {Natural Language Processing and Chinese Computing - 10th {CCF} International Conference, {NLPCC} 2021, Qingdao, China, October 13-17, 2021, Proceedings, Part {I}},
  title     = {Relation-Aware Multi-hop Reasoning forVisual Dialog},
  year      = {2021},
  editor    = {Lu Wang and Yansong Feng and Yu Hong and Ruifang He},
  pages     = {810--821},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {13028},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/nlpcc/ZhaoCY21.bib},
  doi       = {10.1007/978-3-030-88480-2\_65},
  timestamp = {Wed, 13 Apr 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1007/978-3-030-88480-2\_65},
}

@Article{Du2021c,
  author        = {Chenpeng Du and Kai Yu},
  journal       = {CoRR},
  title         = {Mixture Density Network for Phone-Level Prosody Modelling in Speech Synthesis},
  year          = {2021},
  volume        = {abs/2102.00851},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2102-00851.bib},
  eprint        = {2102.00851},
  timestamp     = {Fri, 09 Jul 2021 01:00:00 +0200},
  url           = {https://arxiv.org/abs/2102.00851},
}

@Article{Du2021d,
  author        = {Chenpeng Du and Kai Yu},
  journal       = {CoRR},
  title         = {Diverse and Controllable Speech Synthesis with GMM-Based Phone-Level Prosody Modelling},
  year          = {2021},
  volume        = {abs/2105.13086},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2105-13086.bib},
  eprint        = {2105.13086},
  timestamp     = {Fri, 09 Jul 2021 01:00:00 +0200},
  url           = {https://arxiv.org/abs/2105.13086},
}

@Article{Wu2020,
  author    = {Fei Wu and Cewu Lu and Mingjie Zhu and Hao Chen and Jun Zhu and Kai Yu and Lei Li and Ming Li and Qianfeng Chen and Xi Li and Xudong Cao and Zhongyuan Wang and Zhengjun Zha and Yueting Zhuang and Yunhe Pan},
  journal   = {Nat. Mach. Intell.},
  title     = {Towards a new generation of artificial intelligence in China},
  year      = {2020},
  number    = {6},
  pages     = {312--316},
  volume    = {2},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/natmi/WuLZCZYLLCLCWZZ20.bib},
  doi       = {10.1038/S42256-020-0183-4},
  timestamp = {Tue, 14 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1038/s42256-020-0183-4},
}

@Article{Zhu2020,
  author    = {Su Zhu and Zijian Zhao and Rao Ma and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Prior Knowledge Driven Label Embedding for Slot Filling in Natural Language Understanding},
  year      = {2020},
  pages     = {1440--1451},
  volume    = {28},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/ZhuZMY20.bib},
  doi       = {10.1109/TASLP.2020.2980152},
  timestamp = {Wed, 26 Aug 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2020.2980152},
}

@Article{Zhu2020a,
  author    = {Su Zhu and Ruisheng Cao and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Dual Learning for Semi-Supervised Natural Language Understanding},
  year      = {2020},
  pages     = {1936--1947},
  volume    = {28},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/ZhuCY20.bib},
  doi       = {10.1109/TASLP.2020.3001684},
  timestamp = {Thu, 06 Aug 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2020.3001684},
}

@Article{Liu2020a,
  author    = {Qi Liu and Zhehuai Chen and Hao Li and Mingkun Huang and Yizhou Lu and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Modular End-to-End Automatic Speech Recognition Framework for Acoustic-to-Word Model},
  year      = {2020},
  pages     = {2174--2183},
  volume    = {28},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/LiuCLHLY20.bib},
  doi       = {10.1109/TASLP.2020.3009477},
  timestamp = {Wed, 18 May 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2020.3009477},
}

@Article{Chen2020e,
  author    = {Zhi Chen and Lu Chen and Xiaoyuan Liu and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Distributed Structured Actor-Critic Reinforcement Learning for Universal Dialogue Management},
  year      = {2020},
  pages     = {2400--2411},
  volume    = {28},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/ChenCLY20.bib},
  doi       = {10.1109/TASLP.2020.3013392},
  timestamp = {Tue, 06 Oct 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2020.3013392},
}

@Article{Yu2020,
  author    = {Kai Yu and Rao Ma and Kaiyu Shi and Qi Liu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Neural Network Language Model Compression With Product Quantization and Soft Binarization},
  year      = {2020},
  pages     = {2438--2449},
  volume    = {28},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/YuMSL20.bib},
  doi       = {10.1109/TASLP.2020.3015659},
  timestamp = {Tue, 16 Aug 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2020.3015659},
}

@InProceedings{Chen2020f,
  author    = {Lu Chen and Boer Lv and Chi Wang and Su Zhu and Bowen Tan and Kai Yu},
  booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI} 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA, February 7-12, 2020},
  title     = {Schema-Guided Multi-Domain Dialogue State Tracking with Graph Attention Neural Networks},
  year      = {2020},
  pages     = {7521--7528},
  publisher = {{AAAI} Press},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/aaai/0002LWZT020.bib},
  doi       = {10.1609/AAAI.V34I05.6250},
  timestamp = {Mon, 04 Sep 2023 12:29:24 +0200},
  url       = {https://doi.org/10.1609/aaai.v34i05.6250},
}

@InProceedings{Zhao2020,
  author    = {Yanbin Zhao and Lu Chen and Zhi Chen and Kai Yu},
  booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI} 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA, February 7-12, 2020},
  title     = {Semi-Supervised Text Simplification with Back-Translation and Asymmetric Denoising Autoencoders},
  year      = {2020},
  pages     = {9668--9675},
  publisher = {{AAAI} Press},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/aaai/ZhaoCCY20.bib},
  doi       = {10.1609/AAAI.V34I05.6515},
  timestamp = {Mon, 04 Sep 2023 01:00:00 +0200},
  url       = {https://doi.org/10.1609/aaai.v34i05.6515},
}

@InProceedings{Zhao2020a,
  author    = {Yanbin Zhao and Lu Chen and Zhi Chen and Ruisheng Cao and Su Zhu and Kai Yu},
  booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, {ACL} 2020, Online, July 5-10, 2020},
  title     = {Line Graph Enhanced AMR-to-Text Generation with Mix-Order Graph Attention Networks},
  year      = {2020},
  editor    = {Dan Jurafsky and Joyce Chai and Natalie Schluter and Joel R. Tetreault},
  pages     = {732--741},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/acl/ZhaoCCCZY20.bib},
  doi       = {10.18653/V1/2020.ACL-MAIN.67},
  timestamp = {Thu, 23 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2020.acl-main.67},
}

@InProceedings{Chen2020g,
  author    = {Lu Chen and Yanbin Zhao and Boer Lyu and Lesheng Jin and Zhi Chen and Su Zhu and Kai Yu},
  booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, {ACL} 2020, Online, July 5-10, 2020},
  title     = {Neural Graph Matching Networks for Chinese Short Text Matching},
  year      = {2020},
  editor    = {Dan Jurafsky and Joyce Chai and Natalie Schluter and Joel R. Tetreault},
  pages     = {6152--6158},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/acl/ChenZLJCZY20.bib},
  doi       = {10.18653/V1/2020.ACL-MAIN.547},
  timestamp = {Fri, 06 Aug 2021 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2020.acl-main.547},
}

@InProceedings{Cao2020,
  author    = {Ruisheng Cao and Su Zhu and Chenyu Yang and Chen Liu and Rao Ma and Yanbin Zhao and Lu Chen and Kai Yu},
  booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, {ACL} 2020, Online, July 5-10, 2020},
  title     = {Unsupervised Dual Paraphrasing for Two-stage Semantic Parsing},
  year      = {2020},
  editor    = {Dan Jurafsky and Joyce Chai and Natalie Schluter and Joel R. Tetreault},
  pages     = {6806--6817},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/acl/CaoZYLMZCY20.bib},
  doi       = {10.18653/V1/2020.ACL-MAIN.608},
  timestamp = {Thu, 23 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/2020.acl-main.608},
}

@InProceedings{Zhu2020b,
  author    = {Su Zhu and Jieyu Li and Lu Chen and Kai Yu},
  booktitle = {Findings of the Association for Computational Linguistics: {EMNLP} 2020, Online Event, 16-20 November 2020},
  title     = {Efficient Context and Schema Fusion Networks for Multi-Domain Dialogue State Tracking},
  year      = {2020},
  editor    = {Trevor Cohn and Yulan He and Yang Liu},
  pages     = {766--781},
  publisher = {Association for Computational Linguistics},
  series    = {Findings of {ACL}},
  volume    = {{EMNLP} 2020},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/ZhuL0020.bib},
  doi       = {10.18653/V1/2020.FINDINGS-EMNLP.68},
  timestamp = {Wed, 23 Mar 2022 10:11:55 +0100},
  url       = {https://doi.org/10.18653/v1/2020.findings-emnlp.68},
}

@InProceedings{Dinkel2020b,
  author    = {Heinrich Dinkel and Kai Yu},
  booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  title     = {Duration Robust Weakly Supervised Sound Event Detection},
  year      = {2020},
  pages     = {311--315},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/Dinkel020.bib},
  doi       = {10.1109/ICASSP40776.2020.9053459},
  timestamp = {Thu, 23 Jul 2020 16:19:28 +0200},
  url       = {https://doi.org/10.1109/ICASSP40776.2020.9053459},
}

@InProceedings{Wang2020b,
  author    = {Shuai Wang and Johan Rohdin and Oldrich Plchot and Luk{\'{a}}s Burget and Kai Yu and Jan Cernock{\'{y}}},
  booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  title     = {Investigation of Specaugment for Deep Speaker Embedding Learning},
  year      = {2020},
  pages     = {7139--7143},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/0016RPBYC20.bib},
  doi       = {10.1109/ICASSP40776.2020.9053481},
  timestamp = {Sun, 02 Oct 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP40776.2020.9053481},
}

@InProceedings{Du2020a,
  author    = {Chenpeng Du and Kai Yu},
  booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  title     = {Speaker Augmentation for Low Resource Speech Recognition},
  year      = {2020},
  pages     = {7719--7723},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/DuY20.bib},
  doi       = {10.1109/ICASSP40776.2020.9053139},
  timestamp = {Thu, 14 Oct 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP40776.2020.9053139},
}

@InProceedings{Ma2020,
  author    = {Rao Ma and Hao Li and Qi Liu and Lu Chen and Kai Yu},
  booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  title     = {Neural Lattice Search for Speech Recognition},
  year      = {2020},
  pages     = {7794--7798},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/MaLLCY20.bib},
  doi       = {10.1109/ICASSP40776.2020.9054109},
  timestamp = {Wed, 18 May 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP40776.2020.9054109},
}

@InProceedings{Li2020b,
  author    = {Jieyu Li and Su Zhu and Kai Yu},
  booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  title     = {A Hierarchical Tracker for Multi-Domain Dialogue State Tracking},
  year      = {2020},
  pages     = {8014--8018},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LiZY20.bib},
  doi       = {10.1109/ICASSP40776.2020.9053248},
  timestamp = {Tue, 04 Aug 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP40776.2020.9053248},
}

@InProceedings{Ma2020a,
  author    = {Rao Ma and Lesheng Jin and Qi Liu and Lu Chen and Kai Yu},
  booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  title     = {Addressing the Polysemy Problem in Language Modeling with Attentional Multi-Sense Embeddings},
  year      = {2020},
  pages     = {8129--8133},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/MaJLCY20.bib},
  doi       = {10.1109/ICASSP40776.2020.9053503},
  timestamp = {Wed, 18 May 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP40776.2020.9053503},
}

@InProceedings{Zhao2020b,
  author    = {Han Zhao and Weihao Cui and Quan Chen and Jingwen Leng and Kai Yu and Deze Zeng and Chao Li and Minyi Guo},
  booktitle = {40th {IEEE} International Conference on Distributed Computing Systems, {ICDCS} 2020, Singapore, November 29 - December 1, 2020},
  title     = {{CODA:} Improving Resource Utilization by Slimming and Co-locating {DNN} and {CPU} Jobs},
  year      = {2020},
  pages     = {853--863},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icdcs/0005CCLYZ0G20.bib},
  doi       = {10.1109/ICDCS47774.2020.00069},
  timestamp = {Sat, 18 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICDCS47774.2020.00069},
}

@InProceedings{Liu2020b,
  author    = {Chen Liu and Su Zhu and Zijian Zhao and Ruisheng Cao and Lu Chen and Kai Yu},
  booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25-29 October 2020},
  title     = {Jointly Encoding Word Confusion Network and Dialogue Context with {BERT} for Spoken Language Understanding},
  year      = {2020},
  editor    = {Helen Meng and Bo Xu and Thomas Fang Zheng},
  pages     = {871--875},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/LiuZZC0020.bib},
  doi       = {10.21437/INTERSPEECH.2020-1632},
  timestamp = {Thu, 23 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2020-1632},
}

@InProceedings{Xu2020a,
  author    = {Zihan Xu and Zhi Chen and Lu Chen and Su Zhu and Kai Yu},
  booktitle = {Natural Language Processing and Chinese Computing - 9th {CCF} International Conference, {NLPCC} 2020, Zhengzhou, China, October 14-18, 2020, Proceedings, Part {I}},
  title     = {Memory Attention Neural Network for Multi-domain Dialogue State Tracking},
  year      = {2020},
  editor    = {Xiaodan Zhu and Min Zhang and Yu Hong and Ruifang He},
  pages     = {41--52},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {12430},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/nlpcc/XuCCZY20.bib},
  doi       = {10.1007/978-3-030-60450-9\_4},
  timestamp = {Wed, 20 Dec 2023 17:00:52 +0100},
  url       = {https://doi.org/10.1007/978-3-030-60450-9\_4},
}

@InProceedings{Liu2020c,
  author    = {Chen Liu and Su Zhu and Lu Chen and Kai Yu},
  booktitle = {Natural Language Processing and Chinese Computing - 9th {CCF} International Conference, {NLPCC} 2020, Zhengzhou, China, October 14-18, 2020, Proceedings, Part {I}},
  title     = {Robust Spoken Language Understanding with RL-Based Value Error Recovery},
  year      = {2020},
  editor    = {Xiaodan Zhu and Min Zhang and Yu Hong and Ruifang He},
  pages     = {78--90},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {12430},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/nlpcc/LiuZCY20.bib},
  doi       = {10.1007/978-3-030-60450-9\_7},
  timestamp = {Wed, 05 May 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1007/978-3-030-60450-9\_7},
}

@InProceedings{Zhao2020c,
  author    = {Zihan Zhao and Yuncong Liu and Lu Chen and Qi Liu and Rao Ma and Kai Yu},
  booktitle = {Natural Language Processing and Chinese Computing - 9th {CCF} International Conference, {NLPCC} 2020, Zhengzhou, China, October 14-18, 2020, Proceedings, Part {I}},
  title     = {An Investigation on Different Underlying Quantization Schemes for Pre-trained Language Models},
  year      = {2020},
  editor    = {Xiaodan Zhu and Min Zhang and Yu Hong and Ruifang He},
  pages     = {359--371},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {12430},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/nlpcc/ZhaoLCLMY20.bib},
  doi       = {10.1007/978-3-030-60450-9\_29},
  timestamp = {Wed, 18 May 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1007/978-3-030-60450-9\_29},
}

@Article{Liu2020f,
  author        = {Qi Liu and Tian Tan and Kai Yu},
  journal       = {CoRR},
  title         = {An Investigation on Deep Learning with Beta Stabilizer},
  year          = {2020},
  volume        = {abs/2008.01173},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2008-01173.bib},
  eprint        = {2008.01173},
  timestamp     = {Wed, 18 May 2022 01:00:00 +0200},
  url           = {https://arxiv.org/abs/2008.01173},
}

@Article{Zhu2020f,
  author        = {Su Zhu and Ruisheng Cao and Lu Chen and Kai Yu},
  journal       = {CoRR},
  title         = {Vector Projection Network for Few-shot Slot Tagging in Natural Language Understanding},
  year          = {2020},
  volume        = {abs/2009.09568},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2009-09568.bib},
  eprint        = {2009.09568},
  timestamp     = {Thu, 24 Sep 2020 01:00:00 +0200},
  url           = {https://arxiv.org/abs/2009.09568},
}

@Article{Chen2020h,
  author        = {Zhi Chen and Lu Chen and Xiang Zhou and Kai Yu},
  journal       = {CoRR},
  title         = {Deep Reinforcement Learning for On-line Dialogue State Tracking},
  year          = {2020},
  volume        = {abs/2009.10321},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2009-10321.bib},
  eprint        = {2009.10321},
  timestamp     = {Wed, 07 Oct 2020 01:00:00 +0200},
  url           = {https://arxiv.org/abs/2009.10321},
}

@Article{Chen2020j,
  author        = {Zhi Chen and Xiaoyuan Liu and Lu Chen and Kai Yu},
  journal       = {CoRR},
  title         = {Structured Hierarchical Dialogue Policy with Graph Neural Networks},
  year          = {2020},
  volume        = {abs/2009.10355},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2009-10355.bib},
  eprint        = {2009.10355},
  timestamp     = {Wed, 07 Oct 2020 01:00:00 +0200},
  url           = {https://arxiv.org/abs/2009.10355},
}

@Article{Chen2020k,
  author        = {Zhi Chen and Lu Chen and Yanbin Zhao and Su Zhu and Kai Yu},
  journal       = {CoRR},
  title         = {Dual Learning for Dialogue State Tracking},
  year          = {2020},
  volume        = {abs/2009.10430},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2009-10430.bib},
  eprint        = {2009.10430},
  timestamp     = {Wed, 07 Oct 2020 01:00:00 +0200},
  url           = {https://arxiv.org/abs/2009.10430},
}

@Article{Chen2020l,
  author        = {Zhi Chen and Lu Chen and Zihan Xu and Yanbin Zhao and Su Zhu and Kai Yu},
  journal       = {CoRR},
  title         = {{CREDIT:} Coarse-to-Fine Sequence Generation for Dialogue State Tracking},
  year          = {2020},
  volume        = {abs/2009.10435},
  archiveprefix = {arXiv},
  bibsource     = {dblp computer science bibliography, https://dblp.org},
  biburl        = {https://dblp.org/rec/journals/corr/abs-2009-10435.bib},
  eprint        = {2009.10435},
  timestamp     = {Wed, 07 Oct 2020 01:00:00 +0200},
  url           = {https://arxiv.org/abs/2009.10435},
}

@Article{Chen2019,
  author    = {Lu Chen and Zhi Chen and Bowen Tan and Sishan Long and Milica Gasic and Kai Yu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {AgentGraph: Toward Universal Dialogue Management With Structured Deep Reinforcement Learning},
  year      = {2019},
  number    = {9},
  pages     = {1378--1391},
  volume    = {27},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/ChenCTLGY19.bib},
  doi       = {10.1109/TASLP.2019.2919872},
  timestamp = {Fri, 13 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2019.2919872},
}

@InProceedings{Cao2019,
  author    = {Ruisheng Cao and Su Zhu and Chen Liu and Jieyu Li and Kai Yu},
  booktitle = {Proceedings of the 57th Conference of the Association for Computational Linguistics, {ACL} 2019, Florence, Italy, July 28- August 2, 2019, Volume 1: Long Papers},
  title     = {Semantic Parsing with Dual Learning},
  year      = {2019},
  editor    = {Anna Korhonen and David R. Traum and Llu{\'{\i}}s M{\`{a}}rquez},
  pages     = {51--64},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/acl/CaoZLLY19.bib},
  doi       = {10.18653/V1/P19-1007},
  timestamp = {Thu, 23 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/p19-1007},
}

@InProceedings{Ma2019,
  author    = {Rao Ma and Qi Liu and Kai Yu},
  booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} 2019, Singapore, December 14-18, 2019},
  title     = {Highly Efficient Neural Network Language Model Compression Using Soft Binarization Training},
  year      = {2019},
  pages     = {62--69},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/asru/MaLY19.bib},
  doi       = {10.1109/ASRU46091.2019.9003744},
  timestamp = {Wed, 18 May 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ASRU46091.2019.9003744},
}

@InProceedings{Zhao2019,
  author    = {Zijian Zhao and Su Zhu and Kai Yu},
  booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing, {EMNLP-IJCNLP} 2019, Hong Kong, China, November 3-7, 2019},
  title     = {Data Augmentation with Atomic Templates for Spoken Language Understanding},
  year      = {2019},
  editor    = {Kentaro Inui and Jing Jiang and Vincent Ng and Xiaojun Wan},
  pages     = {3635--3641},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/ZhaoZY19.bib},
  doi       = {10.18653/V1/D19-1375},
  timestamp = {Thu, 07 Apr 2022 09:14:07 +0200},
  url       = {https://doi.org/10.18653/v1/D19-1375},
}

@InProceedings{Zhao2019a,
  author    = {Zijian Zhao and Su Zhu and Kai Yu},
  booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
  title     = {A Hierarchical Decoding Model for Spoken Language Understanding from Unaligned Data},
  year      = {2019},
  pages     = {7305--7309},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ZhaoZY19.bib},
  doi       = {10.1109/ICASSP.2019.8682463},
  timestamp = {Thu, 29 Aug 2019 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2019.8682463},
}

@InProceedings{Zhu2019,
  author    = {Su Zhu and Zijian Zhao and Tiejun Zhao and Chengqing Zong and Kai Yu},
  booktitle = {International Conference on Multimodal Interaction, {ICMI} 2019, Suzhou, China, October 14-18, 2019},
  title     = {{CATSLU:} The 1st Chinese Audio-Textual Spoken Language Understanding Challenge},
  year      = {2019},
  editor    = {Wen Gao and Helen Mei{-}Ling Meng and Matthew A. Turk and Susan R. Fussell and Bj{\"{o}}rn W. Schuller and Yale Song and Kai Yu},
  pages     = {521--525},
  publisher = {{ACM}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icmi/ZhuZZZ019.bib},
  doi       = {10.1145/3340555.3356098},
  timestamp = {Fri, 03 Jul 2020 08:57:26 +0200},
  url       = {https://doi.org/10.1145/3340555.3356098},
}

@InProceedings{Li2019a,
  author    = {Hao Li and Chen Liu and Su Zhu and Kai Yu},
  booktitle = {International Conference on Multimodal Interaction, {ICMI} 2019, Suzhou, China, October 14-18, 2019},
  title     = {Robust Spoken Language Understanding with Acoustic and Domain Knowledge},
  year      = {2019},
  editor    = {Wen Gao and Helen Mei{-}Ling Meng and Matthew A. Turk and Susan R. Fussell and Bj{\"{o}}rn W. Schuller and Yale Song and Kai Yu},
  pages     = {531--535},
  publisher = {{ACM}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icmi/LiLZY19.bib},
  doi       = {10.1145/3340555.3356100},
  timestamp = {Wed, 05 May 2021 01:00:00 +0200},
  url       = {https://doi.org/10.1145/3340555.3356100},
}

@InProceedings{Cao2019a,
  author    = {Juncheng Cao and Hai Zhao and Kai Yu},
  booktitle = {Natural Language Processing and Chinese Computing - 8th {CCF} International Conference, {NLPCC} 2019, Dunhuang, China, October 9-14, 2019, Proceedings, Part {I}},
  title     = {Cross Aggregation of Multi-head Attention for Neural Machine Translation},
  year      = {2019},
  editor    = {Jie Tang and Min{-}Yen Kan and Dongyan Zhao and Sujian Li and Hongying Zan},
  pages     = {380--392},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {11838},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/nlpcc/CaoZY19.bib},
  doi       = {10.1007/978-3-030-32233-5\_30},
  timestamp = {Sat, 18 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1007/978-3-030-32233-5\_30},
}

@Proceedings{Gao2019,
  title     = {International Conference on Multimodal Interaction, {ICMI} 2019, Suzhou, China, October 14-18, 2019},
  year      = {2019},
  editor    = {Wen Gao and Helen Mei{-}Ling Meng and Matthew A. Turk and Susan R. Fussell and Bj{\"{o}}rn W. Schuller and Yale Song and Kai Yu},
  isbn      = {978-1-4503-6860-5},
  publisher = {{ACM}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icmi/2019.bib},
  doi       = {10.1145/3340555},
  timestamp = {Fri, 03 Jul 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1145/3340555},
}

@Article{Yu2018,
  author    = {Kai Yu and Zijian Zhao and Xueyang Wu and Hongtao Lin and Xuan Liu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Rich Short Text Conversation Using Semantic-Key-Controlled Sequence Generation},
  year      = {2018},
  number    = {8},
  pages     = {1359--1368},
  volume    = {26},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/YuZWLL18.bib},
  doi       = {10.1109/TASLP.2018.2819941},
  timestamp = {Fri, 13 Mar 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/TASLP.2018.2819941},
}

@InProceedings{Chen2018c,
  author    = {Lu Chen and Bowen Tan and Sishan Long and Kai Yu},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics, {COLING} 2018, Santa Fe, New Mexico, USA, August 20-26, 2018},
  title     = {Structured Dialogue Policy with Graph Neural Networks},
  year      = {2018},
  editor    = {Emily M. Bender and Leon Derczynski and Pierre Isabelle},
  pages     = {1257--1268},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/coling/ChenTLY18.bib},
  timestamp = {Fri, 06 Aug 2021 01:00:00 +0200},
  url       = {https://aclanthology.org/C18-1107/},
}

@InProceedings{Ren2018,
  author    = {Liliang Ren and Kaige Xie and Lu Chen and Kai Yu},
  booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
  title     = {Towards Universal Dialogue State Tracking},
  year      = {2018},
  editor    = {Ellen Riloff and David Chiang and Julia Hockenmaier and Jun'ichi Tsujii},
  pages     = {2780--2786},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/RenXCY18.bib},
  doi       = {10.18653/V1/D18-1299},
  timestamp = {Fri, 06 Aug 2021 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/d18-1299},
}

@InProceedings{Chen2018d,
  author    = {Zhehuai Chen and Qi Liu and Hao Li and Kai Yu},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {On Modular Training of Neural Acoustics-to-Word Model for {LVCSR}},
  year      = {2018},
  pages     = {4754--4758},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ChenLLY18.bib},
  doi       = {10.1109/ICASSP.2018.8461361},
  timestamp = {Wed, 18 May 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8461361},
}

@InProceedings{Lan2018,
  author    = {Ouyu Lan and Su Zhu and Kai Yu},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {Semi-Supervised Training Using Adversarial Multi-Task Learning for Spoken Language Understanding},
  year      = {2018},
  pages     = {6049--6053},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/LanZY18.bib},
  doi       = {10.1109/ICASSP.2018.8462669},
  timestamp = {Fri, 18 Oct 2019 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8462669},
}

@InProceedings{Chen2018e,
  author    = {Lu Chen and Cheng Chang and Zhi Chen and Bowen Tan and Milica Gasic and Kai Yu},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {Policy Adaptation for Deep Reinforcement Learning-Based Dialogue Management},
  year      = {2018},
  pages     = {6074--6078},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ChenCCTGY18.bib},
  doi       = {10.1109/ICASSP.2018.8462272},
  timestamp = {Fri, 27 Sep 2019 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8462272},
}

@InProceedings{Zhu2018,
  author    = {Su Zhu and Ouyu Lan and Kai Yu},
  booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  title     = {Robust Spoken Language Understanding with Unsupervised ASR-Error Adaptation},
  year      = {2018},
  pages     = {6179--6183},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ZhuLY18.bib},
  doi       = {10.1109/ICASSP.2018.8461831},
  timestamp = {Fri, 18 Oct 2019 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2018.8461831},
}

@InProceedings{Huang2018b,
  author    = {Bo Huang and Ya Zhang and Kai Yu},
  booktitle = {Proceedings of the 2nd International Conference on Video and Image Processing, {ICVIP} 2018, Hong Kong, China, December 29-31, 2018},
  title     = {{MLN:} Moment localization Network and Samples Selection for Moment Retrieval},
  year      = {2018},
  pages     = {165--170},
  publisher = {{ACM}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icvip/HuangZ018.bib},
  doi       = {10.1145/3301506.3301538},
  timestamp = {Fri, 28 Apr 2023 01:00:00 +0200},
  url       = {https://doi.org/10.1145/3301506.3301538},
}

@InProceedings{Huang2018c,
  author    = {Zili Huang and Shuai Wang and Kai Yu},
  booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech Communication Association, Hyderabad, India, 2-6 September 2018},
  title     = {Angular Softmax for Short-Duration Text-independent Speaker Verification},
  year      = {2018},
  editor    = {B. Yegnanarayana},
  pages     = {3623--3627},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/HuangW018.bib},
  doi       = {10.21437/INTERSPEECH.2018-1545},
  timestamp = {Fri, 21 May 2021 08:16:43 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2018-1545},
}

@InProceedings{Zhang2018,
  author    = {Huifeng Zhang and Su Zhu and Shuai Fan and Kai Yu},
  booktitle = {Intelligence Science and Big Data Engineering - 8th International Conference, IScIDE 2018, Lanzhou, China, August 18-19, 2018, Revised Selected Papers},
  title     = {Joint Spoken Language Understanding and Domain Adaptive Language Modeling},
  year      = {2018},
  editor    = {Yuxin Peng and Kai Yu and Jiwen Lu and Xingpeng Jiang},
  pages     = {311--324},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {11266},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscide/ZhangZF018.bib},
  doi       = {10.1007/978-3-030-02698-1\_27},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.1007/978-3-030-02698-1\_27},
}

@InProceedings{Liu2018,
  author    = {Xuan Liu and Di Cao and Kai Yu},
  booktitle = {Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, {NAACL-HLT} 2018, New Orleans, Louisiana, USA, June 1-6, 2018, Volume 1 (Long Papers)},
  title     = {Binarized {LSTM} Language Model},
  year      = {2018},
  editor    = {Marilyn A. Walker and Heng Ji and Amanda Stent},
  pages     = {2113--2121},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/naacl/LiuCY18.bib},
  doi       = {10.18653/V1/N18-1192},
  timestamp = {Wed, 01 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/n18-1192},
}

@InProceedings{Xie2018,
  author    = {Kaige Xie and Cheng Chang and Liliang Ren and Lu Chen and Kai Yu},
  booktitle = {Proceedings of the 19th Annual SIGdial Meeting on Discourse and Dialogue, Melbourne, Australia, July 12-14, 2018},
  title     = {Cost-Sensitive Active Learning for Dialogue State Tracking},
  year      = {2018},
  editor    = {Kazunori Komatani and Diane J. Litman and Kai Yu and Lawrence Cavedon and Mikio Nakano and Alex Papangelis},
  pages     = {209--213},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/sigdial/XieCRCY18.bib},
  doi       = {10.18653/V1/W18-5022},
  timestamp = {Fri, 06 Aug 2021 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/w18-5022},
}

@InProceedings{Zhu2018a,
  author    = {Su Zhu and Kai Yu},
  booktitle = {Proceedings of the 19th Annual SIGdial Meeting on Discourse and Dialogue, Melbourne, Australia, July 12-14, 2018},
  title     = {Concept Transfer Learning for Adaptive Language Understanding},
  year      = {2018},
  editor    = {Kazunori Komatani and Diane J. Litman and Kai Yu and Lawrence Cavedon and Mikio Nakano and Alex Papangelis},
  pages     = {391--399},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/sigdial/ZhuY18.bib},
  doi       = {10.18653/V1/W18-5047},
  timestamp = {Fri, 06 Aug 2021 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/w18-5047},
}

@Proceedings{Peng2018,
  title     = {Intelligence Science and Big Data Engineering - 8th International Conference, IScIDE 2018, Lanzhou, China, August 18-19, 2018, Revised Selected Papers},
  year      = {2018},
  editor    = {Yuxin Peng and Kai Yu and Jiwen Lu and Xingpeng Jiang},
  isbn      = {978-3-030-02697-4},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {11266},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscide/2018.bib},
  doi       = {10.1007/978-3-030-02698-1},
  timestamp = {Tue, 14 May 2019 01:00:00 +0200},
  url       = {https://doi.org/10.1007/978-3-030-02698-1},
}

@InProceedings{Chen2017b,
  author    = {Lu Chen and Runzhe Yang and Cheng Chang and Zihao Ye and Xiang Zhou and Kai Yu},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics, {EACL} 2017, Valencia, Spain, April 3-7, 2017, Volume 2: Short Papers},
  title     = {On-line Dialogue Policy Learning with Companion Teaching},
  year      = {2017},
  editor    = {Mirella Lapata and Phil Blunsom and Alexander Koller},
  pages     = {198--204},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/eacl/ChenYCYZY17.bib},
  doi       = {10.18653/V1/E17-2032},
  timestamp = {Fri, 06 Aug 2021 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/e17-2032},
}

@InProceedings{Chang2017,
  author    = {Cheng Chang and Runzhe Yang and Lu Chen and Xiang Zhou and Kai Yu},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2017, Copenhagen, Denmark, September 9-11, 2017},
  title     = {Affordable On-line Dialogue Policy Learning},
  year      = {2017},
  editor    = {Martha Palmer and Rebecca Hwa and Sebastian Riedel},
  pages     = {2200--2209},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/ChangYCZY17.bib},
  doi       = {10.18653/V1/D17-1234},
  timestamp = {Fri, 06 Aug 2021 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/d17-1234},
}

@InProceedings{Chen2017c,
  author    = {Lu Chen and Xiang Zhou and Cheng Chang and Runzhe Yang and Kai Yu},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2017, Copenhagen, Denmark, September 9-11, 2017},
  title     = {Agent-Aware Dropout {DQN} for Safe and Efficient On-line Dialogue Policy Learning},
  year      = {2017},
  editor    = {Martha Palmer and Rebecca Hwa and Sebastian Riedel},
  pages     = {2454--2464},
  publisher = {Association for Computational Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/emnlp/ChenZCYY17.bib},
  doi       = {10.18653/V1/D17-1260},
  timestamp = {Fri, 06 Aug 2021 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/d17-1260},
}

@InProceedings{Chen2017d,
  author    = {Zhehuai Chen and Yimeng Zhuang and Kai Yu},
  booktitle = {2017 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2017, New Orleans, LA, USA, March 5-9, 2017},
  title     = {Confidence measures for CTC-based phone synchronous decoding},
  year      = {2017},
  pages     = {4850--4854},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ChenZY17.bib},
  doi       = {10.1109/ICASSP.2017.7953078},
  timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2017.7953078},
}

@InProceedings{Zhu2017,
  author    = {Su Zhu and Kai Yu},
  booktitle = {2017 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2017, New Orleans, LA, USA, March 5-9, 2017},
  title     = {Encoder-decoder with focus-mechanism for sequence labelling based spoken language understanding},
  year      = {2017},
  pages     = {5675--5679},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/ZhuY17.bib},
  doi       = {10.1109/ICASSP.2017.7953243},
  timestamp = {Fri, 18 Oct 2019 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2017.7953243},
}

@InProceedings{Chen2017e,
  author    = {Bo Chen and Tianling Bian and Kai Yu},
  booktitle = {Interspeech 2017, 18th Annual Conference of the International Speech Communication Association, Stockholm, Sweden, August 20-24, 2017},
  title     = {Discrete Duration Model for Speech Synthesis},
  year      = {2017},
  editor    = {Francisco Lacerda},
  pages     = {789--793},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ChenB017.bib},
  doi       = {10.21437/INTERSPEECH.2017-1144},
  timestamp = {Mon, 26 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2017-1144},
}

@InProceedings{Cao2017,
  author    = {Di Cao and Kai Yu},
  booktitle = {Intelligence Science and Big Data Engineering - 7th International Conference, IScIDE 2017, Dalian, China, September 22-23, 2017, Proceedings},
  title     = {Deep Attentive Structured Language Model Based on {LSTM}},
  year      = {2017},
  editor    = {Yi Sun and Huchuan Lu and Lihe Zhang and Jian Yang and Hua Huang},
  pages     = {169--180},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {10559},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscide/CaoY17.bib},
  doi       = {10.1007/978-3-319-67777-4\_15},
  timestamp = {Wed, 01 Jun 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1007/978-3-319-67777-4\_15},
}

@InProceedings{Liu2017a,
  author    = {Xuan Liu and Xueyang Wu and Ruinian Chen and Zijian Zhao and Hongtao Lin and Kai Yu},
  booktitle = {The 13th {NTCIR} Conference, Evaluation of Information Access Technologies, National Center of Sciences, Tokyo, Japan, December 5-8, 2017},
  title     = {splab at the {NTCIR-13} {STC-2} Task},
  year      = {2017},
  publisher = {National Institute of Informatics {(NII)}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/ntcir/Liu0CZL017.bib},
  timestamp = {Wed, 01 Jun 2022 17:01:01 +0200},
  url       = {https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings13/pdf/ntcir/21-NTCIR13-STC-LiuX.pdf},
}

@Article{Yu2016,
  author    = {Kai Yu and Lu Chen and Kai Sun and Qizhe Xie and Su Zhu},
  journal   = {Frontiers Comput. Sci.},
  title     = {Evolvable dialogue state tracking for statistical dialogue management},
  year      = {2016},
  number    = {2},
  pages     = {201--215},
  volume    = {10},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/fcsc/YuCSXZ16.bib},
  doi       = {10.1007/S11704-015-5209-4},
  timestamp = {Thu, 24 Sep 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1007/s11704-015-5209-4},
}

@InProceedings{Yin2016,
  author    = {Maofan Yin and Sunil Sivadas and Kai Yu and Bin Ma},
  booktitle = {2016 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016},
  title     = {Discriminatively trained joint speaker and environment representations for adaptation of deep neural network acoustic models},
  year      = {2016},
  pages     = {5065--5069},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/YinSYM16.bib},
  doi       = {10.1109/ICASSP.2016.7472642},
  timestamp = {Wed, 15 Mar 2023 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ICASSP.2016.7472642},
}

@InProceedings{Tong2016,
  author    = {Sibo Tong and Hao Gu and Kai Yu},
  booktitle = {2016 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016},
  title     = {A comparative study of robustness of deep learning approaches for {VAD}},
  year      = {2016},
  pages     = {5695--5699},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icassp/TongGY16.bib},
  doi       = {10.1109/ICASSP.2016.7472768},
  timestamp = {Wed, 18 Oct 2017 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ICASSP.2016.7472768},
}

@InProceedings{Chen2016,
  author    = {Zhehuai Chen and Wei Deng and Tao Xu and Kai Yu},
  booktitle = {Interspeech 2016, 17th Annual Conference of the International Speech Communication Association, San Francisco, CA, USA, September 8-12, 2016},
  title     = {Phone Synchronous Decoding with {CTC} Lattice},
  year      = {2016},
  editor    = {Nelson Morgan},
  pages     = {1923--1927},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ChenDXY16.bib},
  doi       = {10.21437/INTERSPEECH.2016-831},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2016-831},
}

@InProceedings{Sun2016,
  author    = {Kai Sun and Su Zhu and Lu Chen and Siqiu Yao and Xueyang Wu and Kai Yu},
  booktitle = {Interspeech 2016, 17th Annual Conference of the International Speech Communication Association, San Francisco, CA, USA, September 8-12, 2016},
  title     = {Hybrid Dialogue State Tracking for Real World Human-to-Human Dialogues},
  year      = {2016},
  editor    = {Nelson Morgan},
  pages     = {2060--2064},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/SunZCYWY16.bib},
  doi       = {10.21437/INTERSPEECH.2016-949},
  timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
  url       = {https://doi.org/10.21437/Interspeech.2016-949},
}

@InProceedings{He2016,
  author    = {Tianxing He and Yu Zhang and Jasha Droppo and Kai Yu},
  booktitle = {10th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2016, Tianjin, China, October 17-20, 2016},
  title     = {On training bi-directional neural network language model with noise contrastive estimation},
  year      = {2016},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/HeZDY16.bib},
  doi       = {10.1109/ISCSLP.2016.7918423},
  timestamp = {Mon, 18 Jul 2022 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ISCSLP.2016.7918423},
}

@InProceedings{Wu2016,
  author    = {Xueyang Wu and Su Zhu and Yue Wu and Kai Yu},
  booktitle = {10th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2016, Tianjin, China, October 17-20, 2016},
  title     = {Rich punctuations prediction using large-scale deep learning},
  year      = {2016},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/WuZWY16.bib},
  doi       = {10.1109/ISCSLP.2016.7918418},
  timestamp = {Wed, 15 Jan 2020 00:00:00 +0100},
  url       = {https://doi.org/10.1109/ISCSLP.2016.7918418},
}

@InProceedings{Zheng2016,
  author    = {Da Zheng and Zhehuai Chen and Yue Wu and Kai Yu},
  booktitle = {10th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2016, Tianjin, China, October 17-20, 2016},
  title     = {Directed automatic speech transcription error correction using bidirectional {LSTM}},
  year      = {2016},
  pages     = {1--5},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/iscslp/ZhengCWY16.bib},
  doi       = {10.1109/ISCSLP.2016.7918446},
  timestamp = {Wed, 18 Oct 2017 01:00:00 +0200},
  url       = {https://doi.org/10.1109/ISCSLP.2016.7918446},
}

@InProceedings{Wu2016a,
  author    = {Ke Wu and Xuan Liu and Kai Yu},
  booktitle = {Proceedings of the 12th {NTCIR} Conference on Evaluation of Information Access Technologies, National Center of Sciences, Tokyo, Japan, June 7-10, 2016},
  title     = {The splab at the {NTCIR-12} Short Text Conversation Task},
  year      = {2016},
  editor    = {Noriko Kando and Tetsuya Sakai and Mark Sanderson},
  publisher = {National Institute of Informatics {(NII)}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/ntcir/WuLY16.bib},
  timestamp = {Wed, 01 Jun 2022 17:01:01 +0200},
  url       = {http://research.nii.ac.jp/ntcir/workshop/OnlineProceedings12/pdf/ntcir/STC/04-NTCIR12-STC-WuK.pdf},
}

@Article{Yu2015,
  author    = {Kai Yu and Kai Sun and Lu Chen and Su Zhu},
  journal   = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  title     = {Constrained Markov Bayesian Polynomial for Efficient Dialogue State Tracking},
  year      = {2015},
  number    = {12},
  pages     = {2177--2188},
  volume    = {23},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/taslp/YuSCZ15.bib},
  doi       = {10.1109/TASLP.2015.2470597},
  timestamp = {Thu, 24 Sep 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/TASLP.2015.2470597},
}

@InProceedings{Chen2015b,
  author    = {Bo Chen and Zhehuai Chen and Jiachen Xu and Kai Yu},
  booktitle = {{INTERSPEECH} 2015, 16th Annual Conference of the International Speech Communication Association, Dresden, Germany, September 6-10, 2015},
  title     = {An investigation of context clustering for statistical speech synthesis with deep neural network},
  year      = {2015},
  pages     = {2212--2216},
  publisher = {{ISCA}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/interspeech/ChenCXY15.bib},
  doi       = {10.21437/INTERSPEECH.2015-127},
  timestamp = {Fri, 23 Jun 2023 01:00:00 +0200},
  url       = {https://doi.org/10.21437/Interspeech.2015-127},
}

@InProceedings{Xie2015,
  author    = {Qizhe Xie and Kai Sun and Su Zhu and Lu Chen and Kai Yu},
  booktitle = {Proceedings of the {SIGDIAL} 2015 Conference, The 16th Annual Meeting of the Special Interest Group on Discourse and Dialogue, 2-4 September 2015, Prague, Czech Republic},
  title     = {Recurrent Polynomial Network for Dialogue State Tracking with Mismatched Semantic Parsers},
  year      = {2015},
  pages     = {295--304},
  publisher = {The Association for Computer Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/sigdial/XieSZCY15.bib},
  doi       = {10.18653/V1/W15-4641},
  timestamp = {Fri, 06 Aug 2021 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/w15-4641},
}

@InProceedings{Sun2014,
  author    = {Kai Sun and Lu Chen and Su Zhu and Kai Yu},
  booktitle = {Proceedings of the {SIGDIAL} 2014 Conference, The 15th Annual Meeting of the Special Interest Group on Discourse and Dialogue, 18-20 June 2014, Philadelphia, PA, {USA}},
  title     = {The {SJTU} System for Dialog State Tracking Challenge 2},
  year      = {2014},
  pages     = {318--326},
  publisher = {The Association for Computer Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/sigdial/SunCZY14.bib},
  doi       = {10.3115/V1/W14-4343},
  timestamp = {Fri, 06 Aug 2021 01:00:00 +0200},
  url       = {https://doi.org/10.3115/v1/w14-4343},
}

@InProceedings{Sun2014a,
  author    = {Kai Sun and Lu Chen and Su Zhu and Kai Yu},
  booktitle = {2014 {IEEE} Spoken Language Technology Workshop, {SLT} 2014, South Lake Tahoe, NV, USA, December 7-10, 2014},
  title     = {A generalized rule based tracker for dialogue state tracking},
  year      = {2014},
  pages     = {330--335},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/slt/SunCZY14.bib},
  doi       = {10.1109/SLT.2014.7078596},
  timestamp = {Thu, 24 Sep 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/SLT.2014.7078596},
}

@InProceedings{Zhu2014,
  author    = {Su Zhu and Lu Chen and Kai Sun and Da Zheng and Kai Yu},
  booktitle = {2014 {IEEE} Spoken Language Technology Workshop, {SLT} 2014, South Lake Tahoe, NV, USA, December 7-10, 2014},
  title     = {Semantic parser enhancement for dialogue domain extension with little data},
  year      = {2014},
  pages     = {336--341},
  publisher = {{IEEE}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/slt/ZhuCSZY14.bib},
  doi       = {10.1109/SLT.2014.7078597},
  timestamp = {Thu, 24 Sep 2020 01:00:00 +0200},
  url       = {https://doi.org/10.1109/SLT.2014.7078597},
}

@InProceedings{Wang2013,
  author    = {Peilu Wang and Ruihua Sun and Hai Zhao and Kai Yu},
  booktitle = {Chinese Computational Linguistics and Natural Language Processing Based on Naturally Annotated Big Data - 12th China National Conference, {CCL} 2013 and First International Symposium, {NLP-NABD} 2013, Suzhou, China, October 10-12, 2013. Proceedings},
  title     = {A New Word Language Model Evaluation Metric for Character Based Languages},
  year      = {2013},
  editor    = {Maosong Sun and Min Zhang and Dekang Lin and Haifeng Wang},
  pages     = {315--324},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {8202},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/cncl/WangSZY13.bib},
  doi       = {10.1007/978-3-642-41491-6\_29},
  timestamp = {Fri, 01 Sep 2023 13:50:17 +0200},
  url       = {https://doi.org/10.1007/978-3-642-41491-6\_29},
}

@Article{Williams2012,
  author    = {Jason D. Williams and Kai Yu and Brahim Chaib{-}draa and Oliver Lemon and Roberto Pieraccini and Olivier Pietquin and Pascal Poupart and Steve J. Young},
  journal   = {{IEEE} J. Sel. Top. Signal Process.},
  title     = {Introduction to the Issue on Advances in Spoken Dialogue Systems and Mobile Interface},
  year      = {2012},
  number    = {8},
  pages     = {889--890},
  volume    = {6},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/journals/jstsp/WilliamsYCLPPPY12.bib},
  doi       = {10.1109/JSTSP.2012.2234401},
  timestamp = {Wed, 07 Dec 2022 00:00:00 +0100},
  url       = {https://doi.org/10.1109/JSTSP.2012.2234401},
}

@InProceedings{Sim2012,
  author    = {Khe Chai Sim and Shengdong Zhao and Kai Yu and Hank Liao},
  booktitle = {International Conference on Multimodal Interaction, {ICMI} '12, Santa Monica, CA, USA, October 22-26, 2012},
  title     = {ICMI'12 grand challenge: haptic voice recognition},
  year      = {2012},
  editor    = {Louis{-}Philippe Morency and Dan Bohus and Hamid K. Aghajan and Justine Cassell and Anton Nijholt and Julien Epps},
  pages     = {363--370},
  publisher = {{ACM}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icmi/SimZYL12.bib},
  doi       = {10.1145/2388676.2388759},
  timestamp = {Tue, 06 Nov 2018 00:00:00 +0100},
  url       = {https://doi.org/10.1145/2388676.2388759},
}

@InProceedings{Xu2012,
  author    = {Hainan Xu and Yuchen Fan and Kai Yu},
  booktitle = {International Conference on Multimodal Interaction, {ICMI} '12, Santa Monica, CA, USA, October 22-26, 2012},
  title     = {Development of the 2012 {SJTU} {HVR} system},
  year      = {2012},
  editor    = {Louis{-}Philippe Morency and Dan Bohus and Hamid K. Aghajan and Justine Cassell and Anton Nijholt and Julien Epps},
  pages     = {539--544},
  publisher = {{ACM}},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/icmi/XuFY12.bib},
  doi       = {10.1145/2388676.2388790},
  timestamp = {Tue, 06 Nov 2018 00:00:00 +0100},
  url       = {https://doi.org/10.1145/2388676.2388790},
}

@InProceedings{Chen2015c,
  author    = {Lu Chen and Pei{-}Hao Su and Milica Gasic},
  booktitle = {Proceedings of the {SIGDIAL} 2015 Conference, The 16th Annual Meeting of the Special Interest Group on Discourse and Dialogue, 2-4 September 2015, Prague, Czech Republic},
  title     = {Hyper-parameter Optimisation of Gaussian Process Reinforcement Learning for Statistical Dialogue Management},
  year      = {2015},
  pages     = {407--411},
  publisher = {The Association for Computer Linguistics},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl    = {https://dblp.org/rec/conf/sigdial/ChenSG15.bib},
  doi       = {10.18653/V1/W15-4653},
  timestamp = {Fri, 06 Aug 2021 01:00:00 +0200},
  url       = {https://doi.org/10.18653/v1/w15-4653},
}