Skip to content

Commit e7907d8

Browse files
NicolasHugfacebook-github-bot
authored andcommitted
Add Ray tracing method for RIR (#2850)
Summary: This PR adds the `ray_tracing()` helper to compute a RIR (part of #2624). The implementation is heavily based on `pyroomacoustics`. Pull Request resolved: #2850 Differential Revision: D41764237 Pulled By: nateanl fbshipit-source-id: f54d8e5f39d5b26d806dd9a1fba1f30adab0f40e
1 parent a403624 commit e7907d8

File tree

8 files changed

+1074
-48
lines changed

8 files changed

+1074
-48
lines changed

docs/source/prototype.functional.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@ Room Impulse Response Simulation
3030
:toctree: generated
3131
:nosignatures:
3232

33+
ray_tracing
3334
simulate_rir_ism

docs/source/refs.bib

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ @misc{ljspeech17
6666
year = {2017}
6767
}
6868
@misc{conneau2020unsupervised,
69-
title={Unsupervised Cross-lingual Representation Learning for Speech Recognition},
69+
title={Unsupervised Cross-lingual Representation Learning for Speech Recognition},
7070
author={Alexis Conneau and Alexei Baevski and Ronan Collobert and Abdelrahman Mohamed and Michael Auli},
7171
year={2020},
7272
eprint={2006.13979},
@@ -80,7 +80,7 @@ @inproceedings{Gales2014SpeechRA
8080
year={2014}
8181
}
8282
@misc{ardila2020common,
83-
title={Common Voice: A Massively-Multilingual Speech Corpus},
83+
title={Common Voice: A Massively-Multilingual Speech Corpus},
8484
author={Rosana Ardila and Megan Branson and Kelly Davis and Michael Henretty and Michael Kohler and Josh Meyer and Reuben Morais and Lindsay Saunders and Francis M. Tyers and Gregor Weber},
8585
year={2020},
8686
eprint={1912.06670},
@@ -99,16 +99,16 @@ @article{Pratap_2020
9999
}
100100
@INPROCEEDINGS{librilight,
101101
author={J. {Kahn} and M. {Rivière} and W. {Zheng} and E. {Kharitonov} and Q. {Xu} and P. E. {Mazaré} and J. {Karadayi} and V. {Liptchinsky} and R. {Collobert} and C. {Fuegen} and T. {Likhomanenko} and G. {Synnaeve} and A. {Joulin} and A. {Mohamed} and E. {Dupoux}},
102-
booktitle={ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
103-
title={Libri-Light: A Benchmark for ASR with Limited or No Supervision},
102+
booktitle={ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
103+
title={Libri-Light: A Benchmark for ASR with Limited or No Supervision},
104104
year={2020},
105105
pages={7669-7673},
106106
note = {\url{https://github.com/facebookresearch/libri-light}},
107107
}
108108
@INPROCEEDINGS{7178964,
109109
author={Panayotov, Vassil and Chen, Guoguo and Povey, Daniel and Khudanpur, Sanjeev},
110-
booktitle={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
111-
title={Librispeech: An ASR corpus based on public domain audio books},
110+
booktitle={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
111+
title={Librispeech: An ASR corpus based on public domain audio books},
112112
year={2015},
113113
volume={},
114114
number={},
@@ -122,47 +122,47 @@ @inproceedings{ott2019fairseq
122122
year = {2019},
123123
}
124124
@misc{baevski2020wav2vec,
125-
title={wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations},
125+
title={wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations},
126126
author={Alexei Baevski and Henry Zhou and Abdelrahman Mohamed and Michael Auli},
127127
year={2020},
128128
eprint={2006.11477},
129129
archivePrefix={arXiv},
130130
primaryClass={cs.CL}
131131
}
132132
@misc{hsu2021hubert,
133-
title={HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units},
133+
title={HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units},
134134
author={Wei-Ning Hsu and Benjamin Bolte and Yao-Hung Hubert Tsai and Kushal Lakhotia and Ruslan Salakhutdinov and Abdelrahman Mohamed},
135135
year={2021},
136136
eprint={2106.07447},
137137
archivePrefix={arXiv},
138138
primaryClass={cs.CL}
139139
}
140140
@misc{hannun2014deep,
141-
title={Deep Speech: Scaling up end-to-end speech recognition},
141+
title={Deep Speech: Scaling up end-to-end speech recognition},
142142
author={Awni Hannun and Carl Case and Jared Casper and Bryan Catanzaro and Greg Diamos and Erich Elsen and Ryan Prenger and Sanjeev Satheesh and Shubho Sengupta and Adam Coates and Andrew Y. Ng},
143143
year={2014},
144144
eprint={1412.5567},
145145
archivePrefix={arXiv},
146146
primaryClass={cs.CL}
147147
}
148148
@misc{graves2012sequence,
149-
title={Sequence Transduction with Recurrent Neural Networks},
149+
title={Sequence Transduction with Recurrent Neural Networks},
150150
author={Alex Graves},
151151
year={2012},
152152
eprint={1211.3711},
153153
archivePrefix={arXiv},
154154
primaryClass={cs.NE}
155155
}
156156
@misc{collobert2016wav2letter,
157-
title={Wav2Letter: an End-to-End ConvNet-based Speech Recognition System},
157+
title={Wav2Letter: an End-to-End ConvNet-based Speech Recognition System},
158158
author={Ronan Collobert and Christian Puhrsch and Gabriel Synnaeve},
159159
year={2016},
160160
eprint={1609.03193},
161161
archivePrefix={arXiv},
162162
primaryClass={cs.LG}
163163
}
164164
@misc{kalchbrenner2018efficient,
165-
title={Efficient Neural Audio Synthesis},
165+
title={Efficient Neural Audio Synthesis},
166166
author={Nal Kalchbrenner and Erich Elsen and Karen Simonyan and Seb Noury and Norman Casagrande and Edward Lockhart and Florian Stimberg and Aaron van den Oord and Sander Dieleman and Koray Kavukcuoglu},
167167
year={2018},
168168
eprint={1802.08435},
@@ -202,26 +202,26 @@ @InProceedings{ brian_mcfee-proc-scipy-2015
202202
}
203203
@INPROCEEDINGS{6701851,
204204
author={Perraudin, Nathanaël and Balazs, Peter and Søndergaard, Peter L.},
205-
booktitle={2013 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics},
206-
title={A fast Griffin-Lim algorithm},
205+
booktitle={2013 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics},
206+
title={A fast Griffin-Lim algorithm},
207207
year={2013},
208208
volume={},
209209
number={},
210210
pages={1-4},
211211
doi={10.1109/WASPAA.2013.6701851}}
212212
@INPROCEEDINGS{1172092,
213213
author={Griffin, D. and Jae Lim},
214-
booktitle={ICASSP '83. IEEE International Conference on Acoustics, Speech, and Signal Processing},
215-
title={Signal estimation from modified short-time Fourier transform},
214+
booktitle={ICASSP '83. IEEE International Conference on Acoustics, Speech, and Signal Processing},
215+
title={Signal estimation from modified short-time Fourier transform},
216216
year={1983},
217217
volume={8},
218218
number={},
219219
pages={804-807},
220220
doi={10.1109/ICASSP.1983.1172092}}
221221
@INPROCEEDINGS{6854049,
222222
author={Ghahremani, Pegah and BabaAli, Bagher and Povey, Daniel and Riedhammer, Korbinian and Trmal, Jan and Khudanpur, Sanjeev},
223-
booktitle={2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
224-
title={A pitch extraction algorithm tuned for automatic speech recognition},
223+
booktitle={2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
224+
title={A pitch extraction algorithm tuned for automatic speech recognition},
225225
year={2014},
226226
volume={},
227227
number={},
@@ -254,16 +254,16 @@ @inproceedings{higuchi2016robust
254254
organization={IEEE}
255255
}
256256
@inproceedings{shi2021emformer,
257-
title={Emformer: Efficient Memory Transformer Based Acoustic Model for Low Latency Streaming Speech Recognition},
257+
title={Emformer: Efficient Memory Transformer Based Acoustic Model for Low Latency Streaming Speech Recognition},
258258
author={Shi, Yangyang and Wang, Yongqiang and Wu, Chunyang and Yeh, Ching-Feng and Chan, Julian and Zhang, Frank and Le, Duc and Seltzer, Mike},
259-
booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
259+
booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
260260
pages={6783-6787},
261261
year={2021}
262262
}
263263
@inproceedings{9747706,
264264
author={Shi, Yangyang and Wu, Chunyang and Wang, Dilin and Xiao, Alex and Mahadeokar, Jay and Zhang, Xiaohui and Liu, Chunxi and Li, Ke and Shangguan, Yuan and Nagaraja, Varun and Kalinli, Ozlem and Seltzer, Mike},
265-
booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
266-
title={Streaming Transformer Transducer based Speech Recognition Using Non-Causal Convolution},
265+
booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
266+
title={Streaming Transformer Transducer based Speech Recognition Using Non-Causal Convolution},
267267
year={2022},
268268
volume={},
269269
number={},
@@ -441,8 +441,8 @@ @article{coucke2018snips
441441
}
442442
@INPROCEEDINGS{9746490,
443443
author={Srivastava, Sangeeta and Wang, Yun and Tjandra, Andros and Kumar, Anurag and Liu, Chunxi and Singh, Kritika and Saraf, Yatharth},
444-
booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
445-
title={Conformer-Based Self-Supervised Learning For Non-Speech Audio Tasks},
444+
booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
445+
title={Conformer-Based Self-Supervised Learning For Non-Speech Audio Tasks},
446446
year={2022},
447447
volume={},
448448
number={},

0 commit comments

Comments
 (0)