You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Summary:
This PR adds the `ray_tracing()` helper to compute a RIR (part of #2624). The implementation is heavily based on `pyroomacoustics`.
Pull Request resolved: #2850
Differential Revision: D41764237
Pulled By: nateanl
fbshipit-source-id: f54d8e5f39d5b26d806dd9a1fba1f30adab0f40e
title={Common Voice: A Massively-Multilingual Speech Corpus},
83
+
title={Common Voice: A Massively-Multilingual Speech Corpus},
84
84
author={Rosana Ardila and Megan Branson and Kelly Davis and Michael Henretty and Michael Kohler and Josh Meyer and Reuben Morais and Lindsay Saunders and Francis M. Tyers and Gregor Weber},
85
85
year={2020},
86
86
eprint={1912.06670},
@@ -99,16 +99,16 @@ @article{Pratap_2020
99
99
}
100
100
@INPROCEEDINGS{librilight,
101
101
author={J. {Kahn} and M. {Rivière} and W. {Zheng} and E. {Kharitonov} and Q. {Xu} and P. E. {Mazaré} and J. {Karadayi} and V. {Liptchinsky} and R. {Collobert} and C. {Fuegen} and T. {Likhomanenko} and G. {Synnaeve} and A. {Joulin} and A. {Mohamed} and E. {Dupoux}},
102
-
booktitle={ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
103
-
title={Libri-Light: A Benchmark for ASR with Limited or No Supervision},
102
+
booktitle={ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
103
+
title={Libri-Light: A Benchmark for ASR with Limited or No Supervision},
title={wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations},
125
+
title={wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations},
126
126
author={Alexei Baevski and Henry Zhou and Abdelrahman Mohamed and Michael Auli},
127
127
year={2020},
128
128
eprint={2006.11477},
129
129
archivePrefix={arXiv},
130
130
primaryClass={cs.CL}
131
131
}
132
132
@misc{hsu2021hubert,
133
-
title={HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units},
133
+
title={HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units},
134
134
author={Wei-Ning Hsu and Benjamin Bolte and Yao-Hung Hubert Tsai and Kushal Lakhotia and Ruslan Salakhutdinov and Abdelrahman Mohamed},
135
135
year={2021},
136
136
eprint={2106.07447},
137
137
archivePrefix={arXiv},
138
138
primaryClass={cs.CL}
139
139
}
140
140
@misc{hannun2014deep,
141
-
title={Deep Speech: Scaling up end-to-end speech recognition},
141
+
title={Deep Speech: Scaling up end-to-end speech recognition},
142
142
author={Awni Hannun and Carl Case and Jared Casper and Bryan Catanzaro and Greg Diamos and Erich Elsen and Ryan Prenger and Sanjeev Satheesh and Shubho Sengupta and Adam Coates and Andrew Y. Ng},
143
143
year={2014},
144
144
eprint={1412.5567},
145
145
archivePrefix={arXiv},
146
146
primaryClass={cs.CL}
147
147
}
148
148
@misc{graves2012sequence,
149
-
title={Sequence Transduction with Recurrent Neural Networks},
149
+
title={Sequence Transduction with Recurrent Neural Networks},
150
150
author={Alex Graves},
151
151
year={2012},
152
152
eprint={1211.3711},
153
153
archivePrefix={arXiv},
154
154
primaryClass={cs.NE}
155
155
}
156
156
@misc{collobert2016wav2letter,
157
-
title={Wav2Letter: an End-to-End ConvNet-based Speech Recognition System},
157
+
title={Wav2Letter: an End-to-End ConvNet-based Speech Recognition System},
158
158
author={Ronan Collobert and Christian Puhrsch and Gabriel Synnaeve},
159
159
year={2016},
160
160
eprint={1609.03193},
161
161
archivePrefix={arXiv},
162
162
primaryClass={cs.LG}
163
163
}
164
164
@misc{kalchbrenner2018efficient,
165
-
title={Efficient Neural Audio Synthesis},
165
+
title={Efficient Neural Audio Synthesis},
166
166
author={Nal Kalchbrenner and Erich Elsen and Karen Simonyan and Seb Noury and Norman Casagrande and Edward Lockhart and Florian Stimberg and Aaron van den Oord and Sander Dieleman and Koray Kavukcuoglu},
title={Emformer: Efficient Memory Transformer Based Acoustic Model for Low Latency Streaming Speech Recognition},
257
+
title={Emformer: Efficient Memory Transformer Based Acoustic Model for Low Latency Streaming Speech Recognition},
258
258
author={Shi, Yangyang and Wang, Yongqiang and Wu, Chunyang and Yeh, Ching-Feng and Chan, Julian and Zhang, Frank and Le, Duc and Seltzer, Mike},
259
-
booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
259
+
booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
260
260
pages={6783-6787},
261
261
year={2021}
262
262
}
263
263
@inproceedings{9747706,
264
264
author={Shi, Yangyang and Wu, Chunyang and Wang, Dilin and Xiao, Alex and Mahadeokar, Jay and Zhang, Xiaohui and Liu, Chunxi and Li, Ke and Shangguan, Yuan and Nagaraja, Varun and Kalinli, Ozlem and Seltzer, Mike},
265
-
booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
266
-
title={Streaming Transformer Transducer based Speech Recognition Using Non-Causal Convolution},
265
+
booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
266
+
title={Streaming Transformer Transducer based Speech Recognition Using Non-Causal Convolution},
267
267
year={2022},
268
268
volume={},
269
269
number={},
@@ -441,8 +441,8 @@ @article{coucke2018snips
441
441
}
442
442
@INPROCEEDINGS{9746490,
443
443
author={Srivastava, Sangeeta and Wang, Yun and Tjandra, Andros and Kumar, Anurag and Liu, Chunxi and Singh, Kritika and Saraf, Yatharth},
444
-
booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
445
-
title={Conformer-Based Self-Supervised Learning For Non-Speech Audio Tasks},
444
+
booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
445
+
title={Conformer-Based Self-Supervised Learning For Non-Speech Audio Tasks},
0 commit comments