pytorch
diff --git a/‎.circleci/unittest/linux/scripts/run_style_checks.sh
Lines changed: 1 addition & 1 deletion b/‎.circleci/unittest/linux/scripts/run_style_checks.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitmodules
Lines changed: 4 additions & 0 deletions b/‎.gitmodules
Lines changed: 4 additions & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎build_tools/setup_helpers/extension.py
Lines changed: 1 addition & 0 deletions b/‎build_tools/setup_helpers/extension.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/torchaudio_unittest/assets/kaldi_test_pitch_args.json
Lines changed: 5 additions & 0 deletions b/‎test/torchaudio_unittest/assets/kaldi_test_pitch_args.json
Lines changed: 5 additions & 0 deletions
diff --git a/‎test/torchaudio_unittest/common_utils/kaldi_utils.py
Lines changed: 39 additions & 0 deletions b/‎test/torchaudio_unittest/common_utils/kaldi_utils.py
Lines changed: 39 additions & 0 deletions
diff --git a/‎test/torchaudio_unittest/functional/batch_consistency_test.py
Lines changed: 6 additions & 0 deletions b/‎test/torchaudio_unittest/functional/batch_consistency_test.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎test/torchaudio_unittest/functional/kaldi_compatibility_cpu_test.py
Lines changed: 9 additions & 0 deletions b/‎test/torchaudio_unittest/functional/kaldi_compatibility_cpu_test.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py
Lines changed: 37 additions & 0 deletions b/‎test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py
Lines changed: 37 additions & 0 deletions
diff --git a/‎test/torchaudio_unittest/functional/torchscript_consistency_impl.py
Lines changed: 12 additions & 0 deletions b/‎test/torchaudio_unittest/functional/torchscript_consistency_impl.py
Lines changed: 12 additions & 0 deletions
@@ -38,7 +38,7 @@ fi
 
 printf "\x1b[34mRunning clang-format:\x1b[0m\n"
 "${this_dir}"/run_clang_format.py \
-  -r torchaudio/csrc \
+  -r torchaudio/csrc third_party/kaldi/src \
   --clang-format-executable "${clangformat_path}" \
     && git diff --exit-code
 status=$?
 
@@ -2,3 +2,7 @@
 	path = third_party/transducer/submodule
 	url = https://github.com/HawkAaron/warp-transducer
 	ignore = dirty
+[submodule "kaldi"]
+	path = third_party/kaldi/submodule
+	url = https://github.com/kaldi-asr/kaldi
+	ignore = dirty
@@ -47,6 +47,7 @@ endif()
 
 # Options
 option(BUILD_SOX "Build libsox statically" OFF)
+option(BUILD_KALDI "Build kaldi statically" ON)
 option(BUILD_TRANSDUCER "Enable transducer" OFF)
 option(BUILD_LIBTORCHAUDIO "Build C++ Library" ON)
 option(BUILD_TORCHAUDIO_PYTHON_EXTENSION "Build Python extension" OFF)
 
@@ -68,6 +68,7 @@ def build_extension(self, ext):
             '-DCMAKE_VERBOSE_MAKEFILE=ON',
             f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
             f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
+            "-DBUILD_KALDI:BOOL=ON",
             f"-DBUILD_TRANSDUCER:BOOL={'ON' if _BUILD_TRANSDUCER else 'OFF'}",
             "-DBUILD_TORCHAUDIO_PYTHON_EXTENSION:BOOL=ON",
             "-DBUILD_LIBTORCHAUDIO:BOOL=OFF",
 
@@ -0,0 +1,5 @@
+{"sample_rate": 8000}
+{"sample_rate": 8000, "frames_per_chunk": 200}
+{"sample_rate": 8000, "frames_per_chunk": 200, "simulate_first_pass_online": true}
+{"sample_rate": 16000}
+{"sample_rate": 44100}
@@ -0,0 +1,39 @@
+import subprocess
+
+import torch
+
+
+def convert_args(**kwargs):
+    args = []
+    for key, value in kwargs.items():
+        if key == 'sample_rate':
+            key = 'sample_frequency'
+        key = '--' + key.replace('_', '-')
+        value = str(value).lower() if value in [True, False] else str(value)
+        args.append('%s=%s' % (key, value))
+    return args
+
+
+def run_kaldi(command, input_type, input_value):
+    """Run provided Kaldi command, pass a tensor and get the resulting tensor
+
+    Args:
+        input_type: str
+            'ark' or 'scp'
+        input_value:
+            Tensor for 'ark'
+            string for 'scp' (path to an audio file)
+    """
+    import kaldi_io
+
+    key = 'foo'
+    process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+    if input_type == 'ark':
+        kaldi_io.write_mat(process.stdin, input_value.cpu().numpy(), key=key)
+    elif input_type == 'scp':
+        process.stdin.write(f'{key} {input_value}'.encode('utf8'))
+    else:
+        raise NotImplementedError('Unexpected type')
+    process.stdin.close()
+    result = dict(kaldi_io.read_mat_ark(process.stdout))['foo']
+    return torch.from_numpy(result.copy())  # copy supresses some torch warning
@@ -184,3 +184,9 @@ def test_vad(self):
         waveform, sample_rate = torchaudio.load(filepath)
         self.assert_batch_consistencies(
             F.vad, waveform, sample_rate=sample_rate)
+
+    @common_utils.skipIfNoExtension
+    def test_compute_kaldi_pitch(self):
+        sample_rate = 44100
+        waveform = common_utils.get_whitenoise(sample_rate=sample_rate)
+        self.assert_batch_consistencies(F.compute_kaldi_pitch, waveform, sample_rate=sample_rate)
@@ -0,0 +1,9 @@
+import torch
+
+from torchaudio_unittest.common_utils import PytorchTestCase
+from .kaldi_compatibility_test_impl import KaldiCPUOnly
+
+
+class TestKaldiCPUOnly(KaldiCPUOnly, PytorchTestCase):
+    dtype = torch.float32
+    device = torch.device('cpu')
@@ -0,0 +1,37 @@
+from parameterized import parameterized
+import torchaudio.functional as F
+
+from torchaudio_unittest.common_utils import (
+    get_sinusoid,
+    load_params,
+    save_wav,
+    skipIfNoExec,
+    TempDirMixin,
+    TestBaseMixin,
+)
+from torchaudio_unittest.common_utils.kaldi_utils import (
+    convert_args,
+    run_kaldi,
+)
+
+
+class KaldiCPUOnly(TempDirMixin, TestBaseMixin):
+    def assert_equal(self, output, *, expected, rtol=None, atol=None):
+        expected = expected.to(dtype=self.dtype, device=self.device)
+        self.assertEqual(output, expected, rtol=rtol, atol=atol)
+
+    @parameterized.expand(load_params('kaldi_test_pitch_args.json'))
+    @skipIfNoExec('compute-kaldi-pitch-feats')
+    def test_pitch_feats(self, kwargs):
+        """compute_kaldi_pitch produces numerically compatible result with compute-kaldi-pitch-feats"""
+        sample_rate = kwargs['sample_rate']
+        waveform = get_sinusoid(dtype='float32', sample_rate=sample_rate)
+        result = F.compute_kaldi_pitch(waveform[0], **kwargs)
+
+        waveform = get_sinusoid(dtype='int16', sample_rate=sample_rate)
+        wave_file = self.get_temp_path('test.wav')
+        save_wav(wave_file, waveform, sample_rate)
+
+        command = ['compute-kaldi-pitch-feats'] + convert_args(**kwargs) + ['scp:-', 'ark:-']
+        kaldi_result = run_kaldi(command, 'scp', wave_file)
+        self.assert_equal(result, expected=kaldi_result)
@@ -547,3 +547,15 @@ def func(tensor):
 
         tensor = common_utils.get_whitenoise(sample_rate=44100)
         self._assert_consistency(func, tensor)
+
+    @common_utils.skipIfNoExtension
+    def test_compute_kaldi_pitch(self):
+        if self.dtype != torch.float32 or self.device != torch.device('cpu'):
+            raise unittest.SkipTest("Only float32, cpu is supported.")
+
+        def func(tensor):
+            sample_rate: float = 44100.
+            return F.compute_kaldi_pitch(tensor, sample_rate)
+
+        tensor = common_utils.get_whitenoise(sample_rate=44100)
+        self._assert_consistency(func, tensor)