forked from george-gca/multi-language-al-folio
-
Notifications
You must be signed in to change notification settings - Fork 0
/
papers.bib
5815 lines (5416 loc) · 287 KB
/
papers.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@Article{You2023,
author = {Jiawei You and Ganyu Huang and Tianyuan Han and Haoze Yang and Liping Shen},
journal = {{IEEE} Access},
title = {A Unified Framework From Face Image Restoration to Data Augmentation Using Generative Prior},
year = {2023},
pages = {2907--2919},
volume = {11},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/access/YouHHYS23.bib},
doi = {10.1109/ACCESS.2022.3233868},
timestamp = {Tue, 31 Jan 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ACCESS.2022.3233868},
}
@InProceedings{Han2023,
author = {Tianyuan Han and Ganyu Huang and Chunhui Li and Liping Shen},
booktitle = {Proceedings of the 2023 International Conference on Advances in Artificial Intelligence and Applications, {AAIA} 2023, Wuhan, China, November 18-20, 2023},
title = {Human Pose Estimation with Combined Feature Maps and Joint Embeddings},
year = {2023},
pages = {101--106},
publisher = {{ACM}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/aaia/HanHLS23.bib},
doi = {10.1145/3603273.3636495},
timestamp = {Sat, 13 Jan 2024 00:00:00 +0100},
url = {https://doi.org/10.1145/3603273.3636495},
}
@InProceedings{He2023,
author = {Chaofan He and Chunhui Li and Tianyuan Han and Liping Shen},
booktitle = {Neural Information Processing - 30th International Conference, {ICONIP} 2023, Changsha, China, November 20-23, 2023, Proceedings, Part {XIII}},
title = {Assessing and Enhancing LLMs: {A} Physics and History Dataset and One-More-Check Pipeline Method},
year = {2023},
editor = {Biao Luo and Long Cheng and Zheng{-}Guang Wu and Hongyi Li and Chaojie Li},
pages = {504--517},
publisher = {Springer},
series = {Communications in Computer and Information Science},
volume = {1967},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/iconip/HeLHS23.bib},
doi = {10.1007/978-981-99-8178-6\_38},
timestamp = {Mon, 18 Dec 2023 09:47:49 +0100},
url = {https://doi.org/10.1007/978-981-99-8178-6\_38},
}
@InProceedings{Xiong2023,
author = {Yuhan Xiong and Jiawei You and Liping Shen},
booktitle = {International Joint Conference on Neural Networks, {IJCNN} 2023, Gold Coast, Australia, June 18-23, 2023},
title = {{GAN} Latent Space Manipulation Based Augmentation for Unbalanced Emotion Datasets},
year = {2023},
pages = {1--8},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/ijcnn/XiongYS23.bib},
doi = {10.1109/IJCNN54540.2023.10191807},
timestamp = {Wed, 09 Aug 2023 16:25:09 +0200},
url = {https://doi.org/10.1109/IJCNN54540.2023.10191807},
}
@InProceedings{Chen2022,
author = {Jisen Chen and Jian Shen and Ting Long and Liping Shen and Weinan Zhang and Yong Yu},
booktitle = {Neural Information Processing - 29th International Conference, {ICONIP} 2022, Virtual Event, November 22-26, 2022, Proceedings, Part {I}},
title = {Heterogeneous Graph Representation for Knowledge Tracing},
year = {2022},
editor = {Mohammad Tanveer and Sonali Agarwal and Seiichi Ozawa and Asif Ekbal and Adam Jatowt},
pages = {224--235},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = {13623},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/iconip/ChenSLS0022.bib},
doi = {10.1007/978-3-031-30105-6\_19},
timestamp = {Thu, 20 Apr 2023 15:23:54 +0200},
url = {https://doi.org/10.1007/978-3-031-30105-6\_19},
}
@InProceedings{Yang2022,
author = {Haoze Yang and Kunyao Lan and Jiawei You and Liping Shen},
booktitle = {International Joint Conference on Neural Networks, {IJCNN} 2022, Padua, Italy, July 18-23, 2022},
title = {A simple but practical method: How to improve the usage of entities in the Chinese question generation},
year = {2022},
pages = {1--8},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/ijcnn/YangLYS22.bib},
doi = {10.1109/IJCNN55064.2022.9891960},
timestamp = {Mon, 10 Oct 2022 17:40:09 +0200},
url = {https://doi.org/10.1109/IJCNN55064.2022.9891960},
}
@InProceedings{You2022,
author = {Jiawei You and Tianyuan Han and Liping Shen},
booktitle = {International Joint Conference on Neural Networks, {IJCNN} 2022, Padua, Italy, July 18-23, 2022},
title = {From Uniform Models To Generic Representations: Stock Return Prediction With Pre-training},
year = {2022},
pages = {1--8},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/ijcnn/YouHS22.bib},
doi = {10.1109/IJCNN55064.2022.9892697},
timestamp = {Mon, 10 Oct 2022 01:00:00 +0200},
url = {https://doi.org/10.1109/IJCNN55064.2022.9892697},
}
@InProceedings{Gong2023,
author = {Xun Gong and Yu Wu and Jinyu Li and Shujie Liu and Rui Zhao and Xie Chen and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {LongFNT: Long-Form Speech Recognition with Factorized Neural Transducer},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/GongWLLZCQ23.bib},
doi = {10.1109/ICASSP49357.2023.10096900},
timestamp = {Sun, 05 Nov 2023 16:51:21 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10096900},
}
@InProceedings{Gong2023a,
author = {Xun Gong and Wei Wang and Hang Shao and Xie Chen and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {Factorized {AED:} Factorized Attention-Based Encoder-Decoder for Text-Only Domain Adaptive {ASR}},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/GongWSCQ23.bib},
doi = {10.1109/ICASSP49357.2023.10095937},
timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10095937},
}
@InProceedings{Han2023a,
author = {Bing Han and Zhengyang Chen and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {Exploring Binary Classification Loss for Speaker Verification},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/HanCQ23.bib},
doi = {10.1109/ICASSP49357.2023.10094954},
timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10094954},
}
@InProceedings{Han2023b,
author = {Bing Han and Wen Huang and Zhengyang Chen and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2023 - Workshops, Rhodes Island, Greece, June 4-10, 2023},
title = {Improving Dino-Based Self-Supervised Speaker Verification with Progressive Cluster-Aware Training},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/HanHCQ23.bib},
doi = {10.1109/ICASSPW59220.2023.10192957},
timestamp = {Mon, 07 Aug 2023 15:56:26 +0200},
url = {https://doi.org/10.1109/ICASSPW59220.2023.10192957},
}
@InProceedings{Li2023,
author = {Jiahong Li and Chenda Li and Yifei Wu and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {Robust Audio-Visual {ASR} with Unified Cross-Modal Attention},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/LiLWQ23.bib},
doi = {10.1109/ICASSP49357.2023.10096893},
timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10096893},
}
@InProceedings{Li2023a,
author = {Chenda Li and Yao Qian and Zhuo Chen and Dongmei Wang and Takuya Yoshioka and Shujie Liu and Yanmin Qian and Michael Zeng},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {Target Sound Extraction with Variable Cross-Modality Clues},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/LiQCWYLQZ23.bib},
doi = {10.1109/ICASSP49357.2023.10095266},
timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10095266},
}
@InProceedings{Li2023b,
author = {Chenda Li and Yifei Wu and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {Predictive Skim: Contrastive Predictive Coding for Low-Latency Online Speech Separation},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/LiWQ23.bib},
doi = {10.1109/ICASSP49357.2023.10097107},
timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10097107},
}
@InProceedings{Liu2023,
author = {Tao Liu and Zhengyang Chen and Yanmin Qian and Kai Yu},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {Multi-Speaker End-to-End Multi-Modal Speaker Diarization System for the {MISP} 2022 Challenge},
year = {2023},
pages = {1--2},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/LiuCQY23.bib},
doi = {10.1109/ICASSP49357.2023.10096327},
timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10096327},
}
@InProceedings{Shao2023,
author = {Hang Shao and Tian Tan and Wei Wang and Xun Gong and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {Joint Discriminator and Transfer Based Fast Domain Adaptation For End-To-End Speech Recognition},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/ShaoTWGQ23.bib},
doi = {10.1109/ICASSP49357.2023.10095910},
timestamp = {Mon, 20 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10095910},
}
@InProceedings{Wang2023,
author = {Haoyu Wang and Bei Liu and Yifei Wu and Zhengyang Chen and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2023 - Workshops, Rhodes Island, Greece, June 4-10, 2023},
title = {Lowbit Neural Network Quantization for Speaker Verification},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/WangLWCQ23.bib},
doi = {10.1109/ICASSPW59220.2023.10193337},
timestamp = {Mon, 07 Aug 2023 01:00:00 +0200},
url = {https://doi.org/10.1109/ICASSPW59220.2023.10193337},
}
@InProceedings{Wang2023a,
author = {Hongji Wang and Chengdong Liang and Shuai Wang and Zhengyang Chen and Binbin Zhang and Xu Xiang and Yanlei Deng and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {Wespeaker: {A} Research and Production Oriented Speaker Embedding Learning Toolkit},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/WangLWCZXDQ23.bib},
doi = {10.1109/ICASSP49357.2023.10096626},
timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10096626},
}
@InProceedings{Wang2023b,
author = {Wei Wang and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {HuBERT-AGG: Aggregated Representation Distillation of Hidden-Unit Bert for Robust Speech Recognition},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/WangQ23.bib},
doi = {10.1109/ICASSP49357.2023.10096308},
timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10096308},
}
@InProceedings{Wu2023,
author = {Yifei Wu and Chenda Li and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2023 - Workshops, Rhodes Island, Greece, June 4-10, 2023},
title = {Light-Weight Visualvoice: Neural Network Quantization On Audio Visual Speech Separation},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/WuLQ23.bib},
doi = {10.1109/ICASSPW59220.2023.10193263},
timestamp = {Mon, 07 Aug 2023 01:00:00 +0200},
url = {https://doi.org/10.1109/ICASSPW59220.2023.10193263},
}
@InProceedings{Yu2023,
author = {Haibin Yu and Yuxuan Hu and Yao Qian and Ma Jin and Linquan Liu and Shujie Liu and Yu Shi and Yanmin Qian and Edward Lin and Michael Zeng},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {Code-Switching Text Generation and Injection in Mandarin-English {ASR}},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/YuHQJLLSQLZ23.bib},
doi = {10.1109/ICASSP49357.2023.10096317},
timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10096317},
}
@InProceedings{Zhang2023,
author = {Leying Zhang and Zhengyang Chen and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
title = {Adaptive Large Margin Fine-Tuning For Robust Speaker Verification},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/ZhangCQ23.bib},
doi = {10.1109/ICASSP49357.2023.10094744},
timestamp = {Sun, 05 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP49357.2023.10094744},
}
@InProceedings{Le2023,
author = {Chenyang Le and Yao Qian and Long Zhou and Shujie Liu and Yanmin Qian and Michael Zeng and Xuedong Huang},
booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
title = {ComSL: {A} Composite Speech-Language Model for End-to-End Speech-to-Text Translation},
year = {2023},
editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/nips/LeQZLQ0023.bib},
timestamp = {Fri, 01 Mar 2024 00:00:00 +0100},
url = {http://papers.nips.cc/paper\_files/paper/2023/hash/b6262f7a34e5d641cdb3d33dc9ad1a5a-Abstract-Conference.html},
}
@InProceedings{Masuyama2023,
author = {Yoshiki Masuyama and Xuankai Chang and Wangyou Zhang and Samuele Cornell and Zhong{-}Qiu Wang and Nobutaka Ono and Yanmin Qian and Shinji Watanabe},
booktitle = {{IEEE} Workshop on Applications of Signal Processing to Audio and Acoustics, {WASPAA} 2023, New Paltz, NY, USA, October 22-25, 2023},
title = {Exploring the Integration of Speech Separation and Recognition with Self-Supervised Learning Representation},
year = {2023},
pages = {1--5},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/waspaa/MasuyamaCZCWOQW23.bib},
doi = {10.1109/WASPAA58266.2023.10248096},
timestamp = {Sat, 23 Sep 2023 11:31:10 +0200},
url = {https://doi.org/10.1109/WASPAA58266.2023.10248096},
}
@Misc{Lu2023,
author = {Yen{-}Ju Lu and Xuankai Chang and Chenda Li and Wangyou Zhang and Samuele Cornell and Zhaoheng Ni and Yoshiki Masuyama and Brian Yan and Robin Scheibler and Zhong{-}Qiu Wang and Yu Tsao and Yanmin Qian and Shinji Watanabe},
howpublished = {\url{https://doi.org/10.5281/zenodo.10048174}},
month = oct,
note = {Accessed on YYYY-MM-DD.},
title = {Software Design and User Interface of ESPnet-SE++: Speech Enhancement for Robust Speech Processing (espnet-v.202310) (Version 1)},
year = {2023},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/data/10/LuCLZCNMYSWTQW23.bib},
doi = {10.5281/ZENODO.10048174},
publisher = {Zenodo},
timestamp = {Tue, 21 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.5281/zenodo.10048174},
}
@Article{Han2023c,
author = {Bing Han and Zhengyang Chen and Yanmin Qian},
journal = {CoRR},
title = {Self-Supervised Learning with Cluster-Aware-DINO for High-Performance Robust Speaker Verification},
year = {2023},
volume = {abs/2304.05754},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2304-05754.bib},
doi = {10.48550/ARXIV.2304.05754},
eprint = {2304.05754},
timestamp = {Wed, 19 Apr 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2304.05754},
}
@Article{Chen2023,
author = {Zhengyang Chen and Bing Han and Shuai Wang and Yanmin Qian},
journal = {CoRR},
title = {Attention-based Encoder-Decoder Network for End-to-End Neural Speaker Diarization with Target Speaker Attractor},
year = {2023},
volume = {abs/2305.10704},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-10704.bib},
doi = {10.48550/ARXIV.2305.10704},
eprint = {2305.10704},
timestamp = {Thu, 25 May 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2305.10704},
}
@Article{Shao2023a,
author = {Hang Shao and Wei Wang and Bei Liu and Xun Gong and Haoyu Wang and Yanmin Qian},
journal = {CoRR},
title = {Whisper-KDQ: {A} Lightweight Whisper via Guided Knowledge Distillation and Quantization for Efficient {ASR}},
year = {2023},
volume = {abs/2305.10788},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-10788.bib},
doi = {10.48550/ARXIV.2305.10788},
eprint = {2305.10788},
timestamp = {Thu, 25 May 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2305.10788},
}
@Article{Zhang2023a,
author = {Wangyou Zhang and Yanmin Qian},
journal = {CoRR},
title = {Weakly-Supervised Speech Pre-training: {A} Case Study on Target Speech Recognition},
year = {2023},
volume = {abs/2305.16286},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-16286.bib},
doi = {10.48550/ARXIV.2305.16286},
eprint = {2305.16286},
timestamp = {Tue, 06 Jun 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2305.16286},
}
@Article{Li2023d,
author = {Chenda Li and Yao Qian and Zhuo Chen and Naoyuki Kanda and Dongmei Wang and Takuya Yoshioka and Yanmin Qian and Michael Zeng},
journal = {CoRR},
title = {Adapting Multi-Lingual {ASR} Models for Handling Multiple Talkers},
year = {2023},
volume = {abs/2305.18747},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-18747.bib},
doi = {10.48550/ARXIV.2305.18747},
eprint = {2305.18747},
timestamp = {Sat, 04 Nov 2023 00:00:00 +0100},
url = {https://doi.org/10.48550/arXiv.2305.18747},
}
@Article{Han2023e,
author = {Bing Han and Junyu Dai and Xuchen Song and Weituo Hao and Xinyan He and Dong Guo and Jitong Chen and Yuxuan Wang and Yanmin Qian},
journal = {CoRR},
title = {InstructME: An Instruction Guided Music Edit And Remix Framework with Latent Diffusion Models},
year = {2023},
volume = {abs/2308.14360},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2308-14360.bib},
doi = {10.48550/ARXIV.2308.14360},
eprint = {2308.14360},
timestamp = {Fri, 01 Sep 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2308.14360},
}
@Article{Chen2023a,
author = {Zhengyang Chen and Bing Han and Shuai Wang and Yanmin Qian},
journal = {CoRR},
title = {Attention-based Encoder-Decoder End-to-End Neural Diarization with Embedding Enhancer},
year = {2023},
volume = {abs/2309.06672},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2309-06672.bib},
doi = {10.48550/ARXIV.2309.06672},
eprint = {2309.06672},
timestamp = {Tue, 19 Sep 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2309.06672},
}
@Article{Ao2023,
author = {Junyi Ao and Mehmet Sinan Yildirim and Meng Ge and Shuai Wang and Ruijie Tao and Yanmin Qian and Liqun Deng and Longshuai Xiao and Haizhou Li},
journal = {CoRR},
title = {{USED:} Universal Speaker Extraction and Diarization},
year = {2023},
volume = {abs/2309.10674},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2309-10674.bib},
doi = {10.48550/ARXIV.2309.10674},
eprint = {2309.10674},
timestamp = {Mon, 25 Sep 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2309.10674},
}
@Article{Wang2023c,
author = {Shuai Wang and Qibing Bai and Qi Liu and Jianwei Yu and Zhengyang Chen and Bing Han and Yanmin Qian and Haizhou Li},
journal = {CoRR},
title = {Leveraging In-the-Wild Data for Effective Self-Supervised Pretraining in Speaker Recognition},
year = {2023},
volume = {abs/2309.11730},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2309-11730.bib},
doi = {10.48550/ARXIV.2309.11730},
eprint = {2309.11730},
timestamp = {Tue, 26 Sep 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2309.11730},
}
@Article{Liang2023,
author = {Yuhao Liang and Mohan Shi and Fan Yu and Yangze Li and Shiliang Zhang and Zhihao Du and Qian Chen and Lei Xie and Yanmin Qian and Jian Wu and Zhuo Chen and Kong Aik Lee and Zhijie Yan and Hui Bu},
journal = {CoRR},
title = {The second multi-channel multi-party meeting transcription challenge (M2MeT) 2.0): {A} benchmark for speaker-attributed {ASR}},
year = {2023},
volume = {abs/2309.13573},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2309-13573.bib},
doi = {10.48550/ARXIV.2309.13573},
eprint = {2309.13573},
timestamp = {Wed, 27 Sep 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2309.13573},
}
@Article{Zhang2023b,
author = {Leying Zhang and Yao Qian and Linfeng Yu and Heming Wang and Xinkai Wang and Hemin Yang and Long Zhou and Shujie Liu and Yanmin Qian and Michael Zeng},
journal = {CoRR},
title = {Diffusion Conditional Expectation Model for Efficient and Robust Target Speech Extraction},
year = {2023},
volume = {abs/2309.13874},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2309-13874.bib},
doi = {10.48550/ARXIV.2309.13874},
eprint = {2309.13874},
timestamp = {Sat, 21 Oct 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2309.13874},
}
@Article{Zhang2023c,
author = {Wangyou Zhang and Kohei Saijo and Zhong{-}Qiu Wang and Shinji Watanabe and Yanmin Qian},
journal = {CoRR},
title = {Toward Universal Speech Enhancement for Diverse Input Conditions},
year = {2023},
volume = {abs/2309.17384},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2309-17384.bib},
doi = {10.48550/ARXIV.2309.17384},
eprint = {2309.17384},
timestamp = {Tue, 17 Oct 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2309.17384},
}
@Article{Shao2023b,
author = {Hang Shao and Bei Liu and Yanmin Qian},
journal = {CoRR},
title = {One-Shot Sensitivity-Aware Mixed Sparsity Pruning for Large Language Models},
year = {2023},
volume = {abs/2310.09499},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2310-09499.bib},
doi = {10.48550/ARXIV.2310.09499},
eprint = {2310.09499},
timestamp = {Wed, 25 Oct 2023 01:00:00 +0200},
url = {https://doi.org/10.48550/arXiv.2310.09499},
}
@Article{Yang2023,
author = {Dongning Yang and Wei Wang and Yanmin Qian},
journal = {CoRR},
title = {FAT-HuBERT: Front-end Adaptive Training of Hidden-unit {BERT} for Distortion-Invariant Robust Speech Recognition},
year = {2023},
volume = {abs/2311.17790},
archiveprefix = {arXiv},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/corr/abs-2311-17790.bib},
doi = {10.48550/ARXIV.2311.17790},
eprint = {2311.17790},
timestamp = {Tue, 05 Dec 2023 00:00:00 +0100},
url = {https://doi.org/10.48550/arXiv.2311.17790},
}
@Article{Chen2022a,
author = {Sanyuan Chen and Chengyi Wang and Zhengyang Chen and Yu Wu and Shujie Liu and Zhuo Chen and Jinyu Li and Naoyuki Kanda and Takuya Yoshioka and Xiong Xiao and Jian Wu and Long Zhou and Shuo Ren and Yanmin Qian and Yao Qian and Jian Wu and Michael Zeng and Xiangzhan Yu and Furu Wei},
journal = {{IEEE} J. Sel. Top. Signal Process.},
title = {WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing},
year = {2022},
number = {6},
pages = {1505--1518},
volume = {16},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/jstsp/ChenWCWLCLKYXWZ22.bib},
doi = {10.1109/JSTSP.2022.3188113},
timestamp = {Sun, 13 Nov 2022 00:00:00 +0100},
url = {https://doi.org/10.1109/JSTSP.2022.3188113},
}
@Article{Qian2022,
author = {Yanmin Qian and Zhikai Zhou},
journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
title = {Optimizing Data Usage for Low-Resource Speech Recognition},
year = {2022},
pages = {394--403},
volume = {30},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/taslp/QianZ22.bib},
doi = {10.1109/TASLP.2022.3140552},
timestamp = {Tue, 08 Feb 2022 00:00:00 +0100},
url = {https://doi.org/10.1109/TASLP.2022.3140552},
}
@Article{Li2022,
author = {Chenda Li and Zhuo Chen and Yanmin Qian},
journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
title = {Dual-Path Modeling With Memory Embedding Model for Continuous Speech Separation},
year = {2022},
pages = {1508--1520},
volume = {30},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/taslp/LiCQ22.bib},
doi = {10.1109/TASLP.2022.3165712},
timestamp = {Wed, 18 May 2022 01:00:00 +0200},
url = {https://doi.org/10.1109/TASLP.2022.3165712},
}
@Article{Qian2022a,
author = {Yanmin Qian and Xun Gong and Houjun Huang},
journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
title = {Layer-Wise Fast Adaptation for End-to-End Multi-Accent Speech Recognition},
year = {2022},
pages = {2842--2853},
volume = {30},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/taslp/QianGH22.bib},
doi = {10.1109/TASLP.2022.3198546},
timestamp = {Fri, 23 Sep 2022 01:00:00 +0200},
url = {https://doi.org/10.1109/TASLP.2022.3198546},
}
@Article{Zhang2022,
author = {Wangyou Zhang and Xuankai Chang and Christoph B{\"{o}}ddeker and Tomohiro Nakatani and Shinji Watanabe and Yanmin Qian},
journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
title = {End-to-End Dereverberation, Beamforming, and Speech Recognition in a Cocktail Party},
year = {2022},
pages = {3173--3188},
volume = {30},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/journals/taslp/ZhangCBNWQ22.bib},
doi = {10.1109/TASLP.2022.3209942},
timestamp = {Mon, 05 Dec 2022 00:00:00 +0100},
url = {https://doi.org/10.1109/TASLP.2022.3209942},
}
@InProceedings{Wu2022,
author = {Yifei Wu and Chenda Li and Jinfeng Bai and Zhongqin Wu and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
title = {Time-Domain Audio-Visual Speech Separation on Low Quality Videos},
year = {2022},
pages = {256--260},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/WuLBWQ22.bib},
doi = {10.1109/ICASSP43922.2022.9746866},
timestamp = {Tue, 07 Jun 2022 17:34:47 +0200},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746866},
}
@InProceedings{Li2022a,
author = {Chenda Li and Lei Yang and Weiqin Wang and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
title = {Skim: Skipping Memory Lstm for Low-Latency Real-Time Continuous Speech Separation},
year = {2022},
pages = {681--685},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/LiYWQ22.bib},
doi = {10.1109/ICASSP43922.2022.9746372},
timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746372},
}
@InProceedings{Chen2022b,
author = {Zhengyang Chen and Sanyuan Chen and Yu Wu and Yao Qian and Chengyi Wang and Shujie Liu and Yanmin Qian and Michael Zeng},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
title = {Large-Scale Self-Supervised Speech Representation Learning for Automatic Speaker Verification},
year = {2022},
pages = {6147--6151},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/ChenCWQWLQZ22.bib},
doi = {10.1109/ICASSP43922.2022.9747814},
timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
url = {https://doi.org/10.1109/ICASSP43922.2022.9747814},
}
@InProceedings{Han2022,
author = {Bing Han and Zhengyang Chen and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
title = {Local Information Modeling with Self-Attention for Speaker Verification},
year = {2022},
pages = {6727--6731},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/HanCQ22.bib},
doi = {10.1109/ICASSP43922.2022.9746050},
timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746050},
}
@InProceedings{Zhou2022,
author = {Zhikai Zhou and Tian Tan and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
title = {Punctuation Prediction for Streaming On-Device Speech Recognition},
year = {2022},
pages = {7277--7281},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/ZhouTQ22.bib},
doi = {10.1109/ICASSP43922.2022.9746366},
timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746366},
}
@InProceedings{Han2022a,
author = {Bing Han and Zhengyang Chen and Bei Liu and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
title = {{MLP-SVNET:} {A} Multi-Layer Perceptrons Based Network for Speaker Verification},
year = {2022},
pages = {7522--7526},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/HanCLQ22.bib},
doi = {10.1109/ICASSP43922.2022.9747172},
timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
url = {https://doi.org/10.1109/ICASSP43922.2022.9747172},
}
@InProceedings{Liu2022,
author = {Bei Liu and Haoyu Wang and Zhengyang Chen and Shuai Wang and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
title = {Self-Knowledge Distillation via Feature Enhancement for Speaker Verification},
year = {2022},
pages = {7542--7546},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/LiuWCWQ22.bib},
doi = {10.1109/ICASSP43922.2022.9746529},
timestamp = {Fri, 10 Mar 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746529},
}
@InProceedings{Wang2022,
author = {Wei Wang and Shuo Ren and Yao Qian and Shujie Liu and Yu Shi and Yanmin Qian and Michael Zeng},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
title = {Optimizing Alignment of Speech and Language Latent Spaces for End-To-End Speech Recognition and Understanding},
year = {2022},
pages = {7802--7806},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/WangRQLSQZ22.bib},
doi = {10.1109/ICASSP43922.2022.9747760},
timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
url = {https://doi.org/10.1109/ICASSP43922.2022.9747760},
}
@InProceedings{Zhou2022a,
author = {Zhikai Zhou and Wei Wang and Wangyou Zhang and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
title = {Exploring Effective Data Utilization for Low-Resource Speech Recognition},
year = {2022},
pages = {8192--8196},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/ZhouWZQ22.bib},
doi = {10.1109/ICASSP43922.2022.9747543},
timestamp = {Tue, 07 Jun 2022 01:00:00 +0200},
url = {https://doi.org/10.1109/ICASSP43922.2022.9747543},
}
@InProceedings{Yu2022,
author = {Fan Yu and Shiliang Zhang and Pengcheng Guo and Yihui Fu and Zhihao Du and Siqi Zheng and Weilong Huang and Lei Xie and Zheng{-}Hua Tan and DeLiang Wang and Yanmin Qian and Kong Aik Lee and Zhijie Yan and Bin Ma and Xin Xu and Hui Bu},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
title = {Summary on the {ICASSP} 2022 Multi-Channel Multi-Party Meeting Transcription Grand Challenge},
year = {2022},
pages = {9156--9160},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/YuZGFDZHXTWQLYM22.bib},
doi = {10.1109/ICASSP43922.2022.9746270},
timestamp = {Mon, 13 Mar 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746270},
}
@InProceedings{Wang2022a,
author = {Wei Wang and Xun Gong and Yifei Wu and Zhikai Zhou and Chenda Li and Wangyou Zhang and Bing Han and Yanmin Qian},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
title = {The Sjtu System For Multimodal Information Based Speech Processing Challenge 2021},
year = {2022},
pages = {9261--9265},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/icassp/WangGWZLZHQ22.bib},
doi = {10.1109/ICASSP43922.2022.9746874},
timestamp = {Fri, 23 Sep 2022 01:00:00 +0200},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746874},
}
@InProceedings{Liu2022a,
author = {Bei Liu and Zhengyang Chen and Yanmin Qian},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
title = {Attentive Feature Fusion for Robust Speaker Verification},
year = {2022},
editor = {Hanseok Ko and John H. L. Hansen},
pages = {286--290},
publisher = {{ISCA}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/interspeech/LiuCQ22.bib},
doi = {10.21437/INTERSPEECH.2022-478},
timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
url = {https://doi.org/10.21437/Interspeech.2022-478},
}
@InProceedings{Liu2022b,
author = {Bei Liu and Zhengyang Chen and Yanmin Qian},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
title = {Dual Path Embedding Learning for Speaker Verification with Triplet Attention},
year = {2022},
editor = {Hanseok Ko and John H. L. Hansen},
pages = {291--295},
publisher = {{ISCA}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/interspeech/LiuCQ22a.bib},
doi = {10.21437/INTERSPEECH.2022-481},
timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
url = {https://doi.org/10.21437/Interspeech.2022-481},
}
@InProceedings{Liu2022c,
author = {Bei Liu and Zhengyang Chen and Shuai Wang and Haoyu Wang and Bing Han and Yanmin Qian},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
title = {DF-ResNet: Boosting Speaker Verification Performance with Depth-First Design},
year = {2022},
editor = {Hanseok Ko and John H. L. Hansen},
pages = {296--300},
publisher = {{ISCA}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/interspeech/LiuCWWHQ22.bib},
doi = {10.21437/INTERSPEECH.2022-484},
timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
url = {https://doi.org/10.21437/Interspeech.2022-484},
}
@InProceedings{Zhang2022a,
author = {Leying Zhang and Zhengyang Chen and Yanmin Qian},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
title = {Enroll-Aware Attentive Statistics Pooling for Target Speaker Verification},
year = {2022},
editor = {Hanseok Ko and John H. L. Hansen},
pages = {311--315},
publisher = {{ISCA}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/interspeech/ZhangCQ22.bib},
doi = {10.21437/INTERSPEECH.2022-645},
timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
url = {https://doi.org/10.21437/Interspeech.2022-645},
}
@InProceedings{Liu2022d,
author = {Tao Liu and Shuai Fan and Xu Xiang and Hongbo Song and Shaoxiong Lin and Jiaqi Sun and Tianyuan Han and Siyuan Chen and Binwei Yao and Sen Liu and Yifei Wu and Yanmin Qian and Kai Yu},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
title = {MSDWild: Multi-modal Speaker Diarization Dataset in the Wild},
year = {2022},
editor = {Hanseok Ko and John H. L. Hansen},
pages = {1476--1480},
publisher = {{ISCA}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/interspeech/Liu0XSLSHCYLWQ022.bib},
doi = {10.21437/INTERSPEECH.2022-10466},
timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
url = {https://doi.org/10.21437/Interspeech.2022-10466},
}
@InProceedings{Gong2022,
author = {Xun Gong and Zhikai Zhou and Yanmin Qian},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
title = {Knowledge Transfer and Distillation from Autoregressive to Non-Autoregessive Speech Recognition},
year = {2022},
editor = {Hanseok Ko and John H. L. Hansen},
pages = {2618--2622},
publisher = {{ISCA}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/interspeech/0005ZQ22.bib},
doi = {10.21437/INTERSPEECH.2022-632},
timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
url = {https://doi.org/10.21437/Interspeech.2022-632},
}
@InProceedings{Han2022b,
author = {Bing Han and Zhengyang Chen and Yanmin Qian},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
title = {Self-Supervised Speaker Verification Using Dynamic Loss-Gate and Label Correction},
year = {2022},
editor = {Hanseok Ko and John H. L. Hansen},
pages = {4780--4784},
publisher = {{ISCA}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/interspeech/HanCQ22.bib},
doi = {10.21437/INTERSPEECH.2022-742},
timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
url = {https://doi.org/10.21437/Interspeech.2022-742},
}
@InProceedings{Zhang2022b,
author = {Wangyou Zhang and Zhuo Chen and Naoyuki Kanda and Shujie Liu and Jinyu Li and Sefik Emre Eskimez and Takuya Yoshioka and Xiong Xiao and Zhong Meng and Yanmin Qian and Furu Wei},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
title = {Separating Long-Form Speech with Group-wise Permutation Invariant Training},
year = {2022},
editor = {Hanseok Ko and John H. L. Hansen},
pages = {5383--5387},
publisher = {{ISCA}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/interspeech/Zhang0K00EYXMQW22.bib},
doi = {10.21437/INTERSPEECH.2022-10362},
timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
url = {https://doi.org/10.21437/Interspeech.2022-10362},
}
@InProceedings{Lu2022,
author = {Yen{-}Ju Lu and Xuankai Chang and Chenda Li and Wangyou Zhang and Samuele Cornell and Zhaoheng Ni and Yoshiki Masuyama and Brian Yan and Robin Scheibler and Zhong{-}Qiu Wang and Yu Tsao and Yanmin Qian and Shinji Watanabe},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022},
title = {ESPnet-SE++: Speech Enhancement for Robust Speech Recognition, Translation, and Understanding},
year = {2022},
editor = {Hanseok Ko and John H. L. Hansen},
pages = {5458--5462},
publisher = {{ISCA}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/interspeech/LuCLZCNMYSW0Q022.bib},
doi = {10.21437/INTERSPEECH.2022-10727},
timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
url = {https://doi.org/10.21437/Interspeech.2022-10727},
}
@InProceedings{Qu2022,
author = {Bowen Qu and Chenda Li and Jinfeng Bai and Yanmin Qian},
booktitle = {13th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2022, Singapore, December 11-14, 2022},
title = {Improving Speech Separation with Knowledge Distilled from Self-supervised Pre-trained Models},
year = {2022},
editor = {Kong Aik Lee and Hung{-}yi Lee and Yanfeng Lu and Minghui Dong},
pages = {329--333},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/iscslp/QuLBQ22.bib},
doi = {10.1109/ISCSLP57327.2022.10038203},
timestamp = {Fri, 17 Feb 2023 18:27:20 +0100},
url = {https://doi.org/10.1109/ISCSLP57327.2022.10038203},
}
@InProceedings{Wang2022b,
author = {Wei Wang and Wangyou Zhang and Shaoxiong Lin and Yanmin Qian},
booktitle = {13th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2022, Singapore, December 11-14, 2022},
title = {Text-Informed Knowledge Distillation for Robust Speech Enhancement and Recognition},
year = {2022},
editor = {Kong Aik Lee and Hung{-}yi Lee and Yanfeng Lu and Minghui Dong},
pages = {334--338},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/iscslp/WangZLQ22.bib},
doi = {10.1109/ISCSLP57327.2022.10037802},
timestamp = {Fri, 17 Feb 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ISCSLP57327.2022.10037802},
}
@InProceedings{Zhou2022b,
author = {Zhikai Zhou and Shuang Cao and Zhengyang Chen and Bei Liu and Ming Xia and Hong Jiang and Yanmin Qian},
booktitle = {13th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2022, Singapore, December 11-14, 2022},
title = {Medical Difficult Airway Detection using Speech Technology},
year = {2022},
editor = {Kong Aik Lee and Hung{-}yi Lee and Yanfeng Lu and Minghui Dong},
pages = {349--353},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/iscslp/ZhouCCLXJQ22.bib},
doi = {10.1109/ISCSLP57327.2022.10037911},
timestamp = {Fri, 17 Feb 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ISCSLP57327.2022.10037911},
}
@InProceedings{Huang2022,
author = {Houjun Huang and Yanmin Qian},
booktitle = {13th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2022, Singapore, December 11-14, 2022},
title = {Speaking style compensation on synthetic audio for robust keyword spotting},
year = {2022},
editor = {Kong Aik Lee and Hung{-}yi Lee and Yanfeng Lu and Minghui Dong},
pages = {448--452},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/iscslp/HuangQ22.bib},
doi = {10.1109/ISCSLP57327.2022.10038031},
timestamp = {Mon, 13 Mar 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ISCSLP57327.2022.10038031},
}
@InProceedings{Cheng2022,
author = {Gaofeng Cheng and Yifan Chen and Runyan Yang and Qingxuan Li and Zehui Yang and Lingxuan Ye and Pengyuan Zhang and Qingqing Zhang and Lei Xie and Yanmin Qian and Kong Aik Lee and Yonghong Yan},
booktitle = {13th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2022, Singapore, December 11-14, 2022},
title = {The Conversational Short-phrase Speaker Diarization {(CSSD)} Task: Dataset, Evaluation Metric and Baselines},
year = {2022},
editor = {Kong Aik Lee and Hung{-}yi Lee and Yanfeng Lu and Minghui Dong},
pages = {488--492},
publisher = {{IEEE}},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/iscslp/ChengCYLYYZZXQLY22.bib},
doi = {10.1109/ISCSLP57327.2022.10038258},
timestamp = {Tue, 14 Mar 2023 00:00:00 +0100},
url = {https://doi.org/10.1109/ISCSLP57327.2022.10038258},
}