Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions examples/tts/conf/zh/fastpitch_align_22050.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This config contains the default values for training FastPitch model with aligner using 22KHz sampling
# rate. If you want to train model on other dataset, you can change config values according to your dataset.
# This config contains the default values for training FastPitch model with aligner using 22KHz sampling
# rate. If you want to train model on other dataset, you can change config values according to your dataset.
# Most dataset-specific arguments are in the head of the config file, see below.

name: FastPitch
Expand Down Expand Up @@ -28,7 +28,13 @@ lowfreq: 0
highfreq: null
window: hann

phoneme_dict_path: "scripts/tts_dataset_files/zh/pinyin_dict_nv_22.10.txt"
# There are four candidates of `phoneme_dict_path` provided for Chinese as shown below,
# 1) 24-final Pinyin: "scripts/tts_dataset_files/zh/24finals/pinyin_dict_nv_22.10.txt",
# 2) IPA converted from 24-final Pinyin: "scripts/tts_dataset_files/zh/24finals/ipa_dict_nv23.05.txt",
# 3) 36-final Pinyin: "scripts/tts_dataset_files/zh/36finals/pinyin_dict_nv23.05.txt",
# 4) (default) IPA converted from 36-final Pinyin: "scripts/tts_dataset_files/zh/36finals/ipa_dict_nv23.05.txt"
# Suggest to choose IPA symbol set converted from 36-final Pinyin because better audio quality were observed.
phoneme_dict_path: "scripts/tts_dataset_files/zh/36finals/ipa_dict_nv23.05.txt"

model:
learn_alignment: true
Expand Down Expand Up @@ -73,6 +79,11 @@ model:
_target_: nemo.collections.tts.g2p.models.zh_cn_pinyin.ChineseG2p
phoneme_dict: ${phoneme_dict_path}
word_segmenter: jieba # Only jieba is supported now.
phoneme_prefix: ""
phoneme_case: lower
tone_prefix: "#"
ascii_letter_prefix: ""
ascii_letter_case: upper

train_ds:
dataset:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,10 @@ def __init__(
pad_with_space=False,
text_preprocessing_func=chinese_text_preprocessing,
):
"""Chinese phoneme-based tokenizer.
"""
Chinese phoneme-based tokenizer.
Note: This tokenizer for now covers Chinese phonemes/tones and English letters because our dataset contains
both Chinese and English graphemes.
Args:
g2p: Grapheme to phoneme module.
punct: Whether to reserve grapheme for basic punctuation or not.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: "ds_for_fastpitch_align"
manifest_filepath: "train_manifest.json"
sup_data_path: "sup_data"
sup_data_types: [ "align_prior_matrix", "pitch" ]
phoneme_dict_path: "scripts/tts_dataset_files/zh/pinyin_dict_nv_22.10.txt"
phoneme_dict_path: "scripts/tts_dataset_files/zh/24finals/pinyin_dict_nv_22.10.txt"

dataset:
_target_: nemo.collections.tts.data.dataset.TTSDataset
Expand Down
Loading