refactor code

nateanl · nateanl · commit 0ec7746b0753 · 2022-08-29T12:31:47.000-04:00
diff --git a/.circleci/unittest/linux/scripts/install.sh b/.circleci/unittest/linux/scripts/install.sh
@@ -80,7 +80,7 @@ fi
 (
     set -x
     conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} 'librosa>=0.8.0' parameterized 'requests>=2.20'
-    pip install kaldi-io SoundFile coverage pytest pytest-cov 'scipy==1.7.3' transformers expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag
+    pip install kaldi-io SoundFile coverage pytest pytest-cov 'scipy==1.7.3' transformers expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag pyroomacoustics
 )
 # Install fairseq
 git clone https://github.com/pytorch/fairseq
diff --git a/.circleci/unittest/windows/scripts/install.sh b/.circleci/unittest/windows/scripts/install.sh
@@ -90,7 +90,8 @@ esac
         unidecode \
         'protobuf<4.21.0' \
         demucs \
-        tinytag
+        tinytag \
+        pyroomacoustics
 )
 # Install fairseq
 git clone https://github.com/pytorch/fairseq
diff --git a/docs/source/prototype.functional.rst b/docs/source/prototype.functional.rst
@@ -18,3 +18,8 @@ fftconvolve
 ~~~~~~~~~~~
 
 .. autofunction:: fftconvolve
+
+simulate_rir_ism
+~~~~~~~~~~~~~~~~
+
+.. autofunction:: simulate_rir_ism
diff --git a/test/torchaudio_unittest/prototype/functional/autograd_test_impl.py b/test/torchaudio_unittest/prototype/functional/autograd_test_impl.py
@@ -32,11 +32,21 @@ def test_add_noise(self):
         self.assertTrue(gradcheck(F.add_noise, (waveform, noise, lengths, snr)))
         self.assertTrue(gradgradcheck(F.add_noise, (waveform, noise, lengths, snr)))
 
-    def test_simulate_rir_ism(self):
-        room = torch.tensor([9.0, 7.0, 3.0], dtype=self.dtype, device=self.device, requires_grad=True)
-        mic_array = torch.tensor([0.1, 3.5, 1.5], dtype=self.dtype, device=self.device, requires_grad=True).reshape(1, -1).repeat(6,1)
-        source = torch.tensor([8.8,3.5,1.5],dtype=self.dtype, device=self.device, requires_grad=True)
-        max_order= 3
-        e_absorption= torch.rand(7, 6, dtype=self.dtype, device=self.device, requires_grad=True)
-        self.assertTrue(gradcheck(F.simulate_rir_ism, (room, source, mic_array, max_order, e_absorption), eps=1e-2, atol=1e-2))
-        self.assertTrue(gradgradcheck(F.simulate_rir_ism, (room, source, mic_array, max_order, e_absorption), eps=1e-2, atol=1e-2))
+    @parameterized.expand([(2, 1), (3, 4)])
+    def test_simulate_rir_ism(self, D, channel):
+        room = torch.rand(D, dtype=self.dtype, device=self.device, requires_grad=True)
+        mic_array = torch.rand(channel, D, dtype=self.dtype, device=self.device, requires_grad=True)
+        source = torch.rand(D, dtype=self.dtype, device=self.device, requires_grad=True)
+        max_order = 2
+        e_absorption = 0.5
+        output_length = 1000
+        self.assertTrue(
+            gradcheck(
+                F.simulate_rir_ism, (room, source, mic_array, max_order, e_absorption, output_length), atol=1e-3, rtol=1
+            )
+        )
+        self.assertTrue(
+            gradgradcheck(
+                F.simulate_rir_ism, (room, source, mic_array, max_order, e_absorption, output_length), atol=1e-3, rtol=1
+            )
+        )
diff --git a/test/torchaudio_unittest/prototype/functional/functional_test_impl.py b/test/torchaudio_unittest/prototype/functional/functional_test_impl.py
@@ -4,7 +4,7 @@
 import torchaudio.prototype.functional as F
 from parameterized import parameterized
 from scipy import signal
-from torchaudio_unittest.common_utils import nested_params, TestBaseMixin
+from torchaudio_unittest.common_utils import nested_params, skipIfNoModule, TestBaseMixin
 
 
 class FunctionalTestImpl(TestBaseMixin):
@@ -109,36 +109,76 @@ def test_add_noise_length_check(self):
         with self.assertRaisesRegex(ValueError, "Length dimensions"):
             F.add_noise(waveform, noise, lengths, snr)
 
-    def test_simulate_rir_ism(self):
-        room_dim = torch.tensor([9.0, 9.0, 9.0], dtype=self.dtype, device=self.device, requires_grad=True)
-        mic_array = torch.tensor([1, 1, 1], dtype=self.dtype, device=self.device, requires_grad=True).reshape(1, -1).repeat(6,1)
-        source = torch.tensor([7,7,7],dtype=self.dtype, device=self.device, requires_grad=True)
-        max_order= 3
-        e_absorption= torch.rand(7, 6, dtype=self.dtype, device=self.device, requires_grad=True)
-        walls = ["west", "east", "south", "north", "floor", "ceiling"]
-        room2= pra.ShoeBox(
+    @skipIfNoModule("pyroomacoustics")
+    @parameterized.expand([(2, 1), (3, 4)])
+    def test_simulate_rir_ism_single_band(self, D, channel):
+        """Test simulate_rir_ism when absorption coefficients are identical for all walls."""
+        room_dim = torch.rand(D, dtype=self.dtype, device=self.device) + 10
+        mic_array = torch.rand(channel, D, dtype=self.dtype, device=self.device)
+        source = torch.rand(D, dtype=self.dtype, device=self.device)
+        max_order = 3
+        e_absorption = 0.5
+        room = pra.ShoeBox(
+            room_dim.detach().numpy(),
+            fs=16000,
+            materials=pra.Material(e_absorption),
+            max_order=max_order,
+            ray_tracing=False,
+            air_absorption=False,
+        )
+        mic_locs = np.asarray([mic_array[i].tolist() for i in range(channel)]).swapaxes(0, 1)
+        room.add_microphone_array(mic_locs)
+        room.add_source(source.tolist())
+        room.compute_rir()
+        max_len = max([room.rir[i][0].shape[0] for i in range(channel)])
+        actual = torch.zeros(channel, max_len, dtype=self.dtype, device=self.device)
+        for i in range(channel):
+            actual[i, 0 : room.rir[i][0].shape[0]] = torch.from_numpy(room.rir[i][0])
+        expected = F.simulate_rir_ism(room_dim, source, mic_array, max_order, e_absorption)
+        self.assertEqual(expected, actual, atol=4e-4, rtol=2)
+
+    @skipIfNoModule("pyroomacoustics")
+    @parameterized.expand([(2, 1), (3, 4)])
+    def test_simulate_rir_ism_multi_band(self, D, channel):
+        """Test simulate_rir_ism when absorption coefficients are different for all walls."""
+        room_dim = torch.rand(D, dtype=self.dtype, device=self.device) + 10
+        mic_array = torch.rand(channel, D, dtype=self.dtype, device=self.device)
+        source = torch.rand(D, dtype=self.dtype, device=self.device)
+        max_order = 3
+        if D == 2:
+            e_absorption = torch.rand(7, 4, dtype=self.dtype, device=self.device)
+            walls = ["west", "east", "south", "north"]
+        else:
+            e_absorption = torch.rand(7, 6, dtype=self.dtype, device=self.device)
+            walls = ["west", "east", "south", "north", "floor", "ceiling"]
+        room = pra.ShoeBox(
             room_dim.detach().numpy(),
             fs=16000,
             materials={
-                walls[i] : pra.Material(
+                walls[i]: pra.Material(
                     {
-                        "coeffs": e_absorption[:, i].reshape(-1,).detach().numpy(),
+                        "coeffs": e_absorption[:, i]
+                        .reshape(
+                            -1,
+                        )
+                        .detach()
+                        .numpy(),
                         "center_freqs": [125.0, 250.0, 500.0, 1000.0, 2000.0, 4000.0, 8000.0],
                     }
-                ) for i in range(len(walls))
+                )
+                for i in range(len(walls))
             },
             max_order=max_order,
             ray_tracing=False,
             air_absorption=False,
         )
-        mic_locs = np.asarray(
-            [[1.0,1.0,1.0]for _ in range(6)]  # mic 1
-        ).swapaxes(0,1)
-        room2.add_microphone_array(mic_locs)
-        room2.add_source([7.0,7.0,7.0])
-        room2.compute_rir()
-        actual = torch.concat([torch.tensor(room2.rir[0]) for i in range(6)]).to(self.dtype)
+        mic_locs = np.asarray([mic_array[i].tolist() for i in range(channel)]).swapaxes(0, 1)
+        room.add_microphone_array(mic_locs)
+        room.add_source(source.tolist())
+        room.compute_rir()
+        max_len = max([room.rir[i][0].shape[0] for i in range(channel)])
+        actual = torch.zeros(channel, max_len, dtype=self.dtype, device=self.device)
+        for i in range(channel):
+            actual[i, 0 : room.rir[i][0].shape[0]] = torch.from_numpy(room.rir[i][0])
         expected = F.simulate_rir_ism(room_dim, source, mic_array, max_order, e_absorption)
-        self.assertEqual(expected, actual)
-
-
+        self.assertEqual(expected, actual, atol=4e-4, rtol=2)
diff --git a/test/torchaudio_unittest/prototype/functional/torchscript_consistency_test_impl.py b/test/torchaudio_unittest/prototype/functional/torchscript_consistency_test_impl.py
@@ -48,3 +48,27 @@ def test_add_noise(self):
         snr = torch.rand(*leading_dims, dtype=self.dtype, device=self.device, requires_grad=True) * 10
 
         self._assert_consistency(F.add_noise, (waveform, noise, lengths, snr))
+
+    def test_simulate_rir_ism_single_band(self):
+        room_dim = torch.tensor([9.0, 9.0, 9.0], dtype=self.dtype, device=self.device)
+        mic_array = torch.tensor([1, 1, 1], dtype=self.dtype, device=self.device).reshape(1, -1).repeat(6, 1)
+        source = torch.tensor([7, 7, 7], dtype=self.dtype, device=self.device)
+        max_order = 3
+        e_absorption = 0.5
+        center_frequency = torch.tensor([125, 250, 500, 1000, 2000, 4000, 8000], dtype=self.dtype, device=self.device)
+        self._assert_consistency(
+            F.simulate_rir_ism,
+            (room_dim, source, mic_array, max_order, e_absorption, 1000, 81, center_frequency, 343.0, 16000.0),
+        )
+
+    def test_simulate_rir_ism_multi_band(self):
+        room_dim = torch.tensor([9.0, 9.0, 9.0], dtype=self.dtype, device=self.device)
+        mic_array = torch.tensor([1, 1, 1], dtype=self.dtype, device=self.device).reshape(1, -1).repeat(6, 1)
+        source = torch.tensor([7, 7, 7], dtype=self.dtype, device=self.device)
+        max_order = 3
+        e_absorption = torch.rand(7, 6, dtype=self.dtype, device=self.device)
+        center_frequency = torch.tensor([125, 250, 500, 1000, 2000, 4000, 8000], dtype=self.dtype, device=self.device)
+        self._assert_consistency(
+            F.simulate_rir_ism,
+            (room_dim, source, mic_array, max_order, e_absorption, 1000, 81, center_frequency, 343.0, 16000.0),
+        )
diff --git a/torchaudio/csrc/build_rir.cpp b/torchaudio/csrc/build_rir.cpp
@@ -54,15 +54,17 @@ torch::Tensor build_rir(
   return rirs;
 }
 
-torch::Tensor make_filter(
-    torch::Tensor centers,
+template <typename scalar_t>
+void make_filter_impl(
+    torch::Tensor& centers,
     double sample_rate,
-    int64_t n_fft) {
+    int64_t n_fft,
+    torch::Tensor& filters) {
   int64_t n = centers.size(0);
-  torch::Tensor new_bands = torch::zeros({n, 2});
+  torch::Tensor new_bands = torch::zeros({n, 2}, centers.dtype());
   new_bands.requires_grad_(true);
-  float* newband_data = new_bands.data_ptr<float>();
-  const float* centers_data = centers.data_ptr<float>();
+  scalar_t* newband_data = new_bands.data_ptr<scalar_t>();
+  const scalar_t* centers_data = centers.data_ptr<scalar_t>();
   at::parallel_for(0, n, 0, [&](int64_t start, int64_t end) {
     for (int64_t i = start; i < end; i++) {
       if (i == 0) {
@@ -78,10 +80,11 @@ torch::Tensor make_filter(
     }
   });
   auto n_freq = n_fft / 2 + 1;
-  torch::Tensor freq_resp = torch::zeros({n_freq, n});
-  torch::Tensor freq = torch::arange(n_freq) / n_fft * sample_rate;
-  const float* freq_data = freq.data_ptr<float>();
-  float* freqreq_data = freq_resp.data_ptr<float>();
+  torch::Tensor freq_resp = torch::zeros({n_freq, n}, centers.dtype());
+  torch::Tensor freq =
+      torch::arange(n_freq, centers.dtype()) / n_fft * sample_rate;
+  const scalar_t* freq_data = freq.data_ptr<scalar_t>();
+  scalar_t* freqreq_data = freq_resp.data_ptr<scalar_t>();
 
   at::parallel_for(0, n, 0, [&](int64_t start, int64_t end) {
     at::parallel_for(0, n_freq, 0, [&](int64_t start2, int64_t end2) {
@@ -104,9 +107,20 @@ torch::Tensor make_filter(
       }
     });
   });
-  torch::Tensor filters =
-      torch::fft::fftshift(torch::fft::irfft(freq_resp, n_fft, 0), 0);
-  return filters.index({Slice(1)}).transpose(0, 1);
+  filters = torch::fft::fftshift(torch::fft::irfft(freq_resp, n_fft, 0), 0);
+  filters = filters.index({Slice(1)}).transpose(0, 1);
+}
+
+torch::Tensor make_filter(
+    torch::Tensor centers,
+    double sample_rate,
+    int64_t n_fft) {
+  torch::Tensor filters;
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      centers.scalar_type(), "make_filter", [&] {
+        make_filter_impl<scalar_t>(centers, sample_rate, n_fft, filters);
+      });
+  return filters;
 }
 
 TORCH_LIBRARY(rir, m) {
diff --git a/torchaudio/prototype/functional/rir.py b/torchaudio/prototype/functional/rir.py

Original file line number	Diff line number	Diff line change
`@@ -80,7 +80,7 @@ fi`
`80`	`80`	`(`
`81`	`81`	`set -x`
`82`	`82`	`conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} 'librosa>=0.8.0' parameterized 'requests>=2.20'`
`83`		`- pip install kaldi-io SoundFile coverage pytest pytest-cov 'scipy==1.7.3' transformers expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag`
	`83`	`+ pip install kaldi-io SoundFile coverage pytest pytest-cov 'scipy==1.7.3' transformers expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag pyroomacoustics`
`84`	`84`	`)`
`85`	`85`	`# Install fairseq`
`86`	`86`	`git clone https://github.com/pytorch/fairseq`