|
1 |
| -# This config contains the default values for training FastPitch model with aligner using 22KHz sampling |
2 |
| -# rate. If you want to train model on other dataset, you can change config values according to your dataset. |
| 1 | +# This config contains the default values for training FastPitch model with aligner using 22KHz sampling |
| 2 | +# rate. If you want to train model on other dataset, you can change config values according to your dataset. |
3 | 3 | # Most dataset-specific arguments are in the head of the config file, see below.
|
4 | 4 |
|
5 | 5 | name: FastPitch
|
@@ -28,7 +28,13 @@ lowfreq: 0
|
28 | 28 | highfreq: null
|
29 | 29 | window: hann
|
30 | 30 |
|
31 |
| -phoneme_dict_path: "scripts/tts_dataset_files/zh/pinyin_dict_nv_22.10.txt" |
| 31 | +# There are four candidates of `phoneme_dict_path` provided for Chinese as shown below, |
| 32 | +# 1) 24-final Pinyin: "scripts/tts_dataset_files/zh/24finals/pinyin_dict_nv_22.10.txt", |
| 33 | +# 2) IPA converted from 24-final Pinyin: "scripts/tts_dataset_files/zh/24finals/ipa_dict_nv23.05.txt", |
| 34 | +# 3) 36-final Pinyin: "scripts/tts_dataset_files/zh/36finals/pinyin_dict_nv23.05.txt", |
| 35 | +# 4) (default) IPA converted from 36-final Pinyin: "scripts/tts_dataset_files/zh/36finals/ipa_dict_nv23.05.txt" |
| 36 | +# Suggest to choose IPA symbol set converted from 36-final Pinyin because better audio quality were observed. |
| 37 | +phoneme_dict_path: "scripts/tts_dataset_files/zh/36finals/ipa_dict_nv23.05.txt" |
32 | 38 |
|
33 | 39 | model:
|
34 | 40 | learn_alignment: true
|
@@ -73,6 +79,11 @@ model:
|
73 | 79 | _target_: nemo.collections.tts.g2p.models.zh_cn_pinyin.ChineseG2p
|
74 | 80 | phoneme_dict: ${phoneme_dict_path}
|
75 | 81 | word_segmenter: jieba # Only jieba is supported now.
|
| 82 | + phoneme_prefix: "" |
| 83 | + phoneme_case: lower |
| 84 | + tone_prefix: "#" |
| 85 | + ascii_letter_prefix: "" |
| 86 | + ascii_letter_case: upper |
76 | 87 |
|
77 | 88 | train_ds:
|
78 | 89 | dataset:
|
|
0 commit comments