Add Tacotron2 loss function

yangarbiter · yangarbiter · commit f65959d64128 · 2021-07-12T15:20:38.000Z
diff --git a/examples/pipeline_tacotron2/loss_function.py b/examples/pipeline_tacotron2/loss_function.py
@@ -0,0 +1,73 @@
+# *****************************************************************************
+#  Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#      * Redistributions of source code must retain the above copyright
+#        notice, this list of conditions and the following disclaimer.
+#      * Redistributions in binary form must reproduce the above copyright
+#        notice, this list of conditions and the following disclaimer in the
+#        documentation and/or other materials provided with the distribution.
+#      * Neither the name of the NVIDIA CORPORATION nor the
+#        names of its contributors may be used to endorse or promote products
+#        derived from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+#  DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *****************************************************************************
+
+from typing import Tuple
+
+from torch import nn, Tensor
+
+
+class Tacotron2Loss(nn.Module):
+    """Tacotron2 loss function adapted from:
+    https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/SpeechSynthesis/Tacotron2/tacotron2/loss_function.py
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self,
+                model_outputs: Tuple[Tensor, Tensor, Tensor],
+                targets: Tuple[Tensor, Tensor]) -> Tuple[Tensor, Tensor, Tensor]:
+        r"""Pass the input through the Tacotron2 loss.
+
+        Args:
+            model_outputs (tuple of three Tensors): The outputs of the
+                Tacotron2. These outputs should include three items:
+                (1) the predicted mel spectrogram before the postnet (mel_specgram) with shape (n_batch, n_mel, n_time),
+                (2) predicted mel spectrogram after the postnet (mel_specgram_postnet)
+                    with shape (n_batch, n_mel, n_time), and
+                (3) the stop token prediction (gate_out) with shape (n_batch).
+            targets (tuple of two Tensors): The ground truth mel spectrogram (n_batch, n_mel, n_time) and
+                stop token with shape (n_batch).
+
+        Returns:
+            mel_loss (Tensor): The mean MSE of the mel_specgram and ground truth mel spectrogram with shape (n_batch, ).
+            mel_postnet_loss (Tensor): The mean MSE of the mel_specgram_postnet and
+                ground truth mel spectrogram with shape (n_batch, ).
+            gate_loss (Tensor): The mean binary cross entropy loss of
+                the prediction on the stop token with shape (n_batch, ).
+        """
+        mel_target, gate_target = targets[0], targets[1]
+        mel_target.requires_grad = False
+        gate_target.requires_grad = False
+        gate_target = gate_target.view(-1, 1)
+
+        mel_specgram, mel_specgram_postnet, gate_out = model_outputs
+        gate_out = gate_out.view(-1, 1)
+        mel_loss = nn.MSELoss(reduction="mean")(mel_specgram, mel_target)
+        mel_postnet_loss = nn.MSELoss(reduction="mean")(mel_specgram_postnet, mel_target)
+        gate_loss = nn.BCEWithLogitsLoss(reduction="mean")(gate_out, gate_target)
+        return mel_loss, mel_postnet_loss, gate_loss
diff --git a/examples/pipeline_tacotron2/test_tacotron2_loss.py b/examples/pipeline_tacotron2/test_tacotron2_loss.py
@@ -0,0 +1,94 @@
+import os
+import unittest
+import tempfile
+
+import torch
+from torch.autograd import gradcheck, gradgradcheck
+
+from loss_function import Tacotron2Loss
+
+
+class TempDirMixin:
+    """Mixin to provide easy access to temp dir"""
+    temp_dir_ = None
+
+    @classmethod
+    def get_base_temp_dir(cls):
+        # If TORCHAUDIO_TEST_TEMP_DIR is set, use it instead of temporary directory.
+        # this is handy for debugging.
+        key = 'TORCHAUDIO_TEST_TEMP_DIR'
+        if key in os.environ:
+            return os.environ[key]
+        if cls.temp_dir_ is None:
+            cls.temp_dir_ = tempfile.TemporaryDirectory()
+        return cls.temp_dir_.name
+
+    @classmethod
+    def tearDownClass(cls):
+        super().tearDownClass()
+        if cls.temp_dir_ is not None:
+            cls.temp_dir_.cleanup()
+            cls.temp_dir_ = None
+
+    def get_temp_path(self, *paths):
+        temp_dir = os.path.join(self.get_base_temp_dir(), self.id())
+        path = os.path.join(temp_dir, *paths)
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        return path
+
+
+class Tacotron2LossTest(unittest.TestCase, TempDirMixin):
+
+    dtype = torch.float64
+    device = "cpu"
+
+    def _assert_torchscript_consistency(self, fn, tensors):
+        path = self.get_temp_path('func.zip')
+        torch.jit.script(fn).save(path)
+        ts_func = torch.jit.load(path)
+
+        torch.random.manual_seed(40)
+        output = fn(*tensors)
+
+        torch.random.manual_seed(40)
+        ts_output = ts_func(*tensors)
+
+        self.assertEqual(ts_output, output)
+
+    def _get_inputs(self):
+        n_mel, n_batch, max_mel_specgram_length = 10, 8, 20
+        mel_specgram = torch.rand(n_batch, n_mel, max_mel_specgram_length, dtype=self.dtype, device=self.device)
+        mel_specgram_postnet = torch.rand(n_batch, n_mel, max_mel_specgram_length, dtype=self.dtype, device=self.device)
+        gate_out = torch.rand(n_batch, dtype=self.dtype, device=self.device)
+        truth_mel_specgram = torch.rand(n_batch, n_mel, max_mel_specgram_length, dtype=self.dtype, device=self.device)
+        truth_gate_out = torch.rand(n_batch, dtype=self.dtype, device=self.device)
+
+        return mel_specgram, mel_specgram_postnet, gate_out, truth_mel_specgram, truth_gate_out
+
+    def test_torchscript_consistency(self):
+        f"""Validate the torchscript consistency of Tacotron2Loss."""
+
+        def _fn(mel_specgram, mel_specgram_postnet, gate_out, truth_mel_specgram, truth_gate_out):
+            loss_fn = Tacotron2Loss()
+            return loss_fn((mel_specgram, mel_specgram_postnet, gate_out), (truth_mel_specgram, truth_gate_out))
+
+        self._assert_torchscript_consistency(_fn, self._get_inputs())
+
+    def test_gradcheck(self):
+        f"""Performing gradient check on Tacotron2Loss."""
+
+        mel_specgram, mel_specgram_postnet, gate_out, truth_mel_specgram, truth_gate_out = self._get_inputs()
+
+        mel_specgram.requires_grad_(True)
+        mel_specgram_postnet.requires_grad_(True)
+        gate_out.requires_grad_(True)
+
+        def _fn(mel_specgram, mel_specgram_postnet, gate_out, truth_mel_specgram, truth_gate_out):
+            loss_fn = Tacotron2Loss()
+            return loss_fn((mel_specgram, mel_specgram_postnet, gate_out), (truth_mel_specgram, truth_gate_out))
+
+        gradcheck(_fn, (mel_specgram, mel_specgram_postnet, gate_out, truth_mel_specgram, truth_gate_out))
+        gradgradcheck(_fn, (mel_specgram, mel_specgram_postnet, gate_out, truth_mel_specgram, truth_gate_out))
+
+if __name__ == "__main__":
+    unittest.main()