Skip to content

Commit 3c2f4bc

Browse files
authored
Merge pull request #1280 from oxygen-dioxide/diffsinger-phonemizer
Diffsinger phonemizers: G2p results add langcode by default; Check if phoneme is supported by duration model
2 parents 9671c4f + 1ee2be0 commit 3c2f4bc

File tree

3 files changed

+30
-25
lines changed

3 files changed

+30
-25
lines changed

OpenUtau.Core/Api/G2pDictionary.cs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,15 @@ void BuildTrie(TrieNode node, string grapheme, int index, IEnumerable<string> sy
120120

121121
public Builder Load(string input) {
122122
var data = Core.Yaml.DefaultDeserializer.Deserialize<G2pDictionaryData>(input);
123+
return Load(data);
124+
}
125+
126+
public Builder Load(TextReader textReader) {
127+
var data = Core.Yaml.DefaultDeserializer.Deserialize<G2pDictionaryData>(textReader);
128+
return Load(data);
129+
}
130+
131+
public Builder Load(G2pDictionaryData data){
123132
if (data.symbols != null) {
124133
foreach (var symbolData in data.symbols) {
125134
AddSymbol(symbolData.symbol, symbolData.type);
@@ -133,17 +142,6 @@ public Builder Load(string input) {
133142
return this;
134143
}
135144

136-
public Builder Load(TextReader textReader) {
137-
var data = Core.Yaml.DefaultDeserializer.Deserialize<G2pDictionaryData>(textReader);
138-
foreach (var symbolData in data.symbols) {
139-
AddSymbol(symbolData.symbol, symbolData.type);
140-
}
141-
foreach (var entry in data.entries) {
142-
AddEntry(entry.grapheme, entry.phonemes);
143-
}
144-
return this;
145-
}
146-
147145
public G2pDictionary Build() {
148146
return new G2pDictionary(root, phonemeSymbols, glideSymbols);
149147
}

OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ private bool _executeSetSinger(USinger singer) {
8383
}
8484
this.frameMs = dsConfig.frameMs();
8585
//Load g2p
86-
g2p = LoadG2p(rootPath);
86+
g2p = LoadG2p(rootPath, dsConfig.use_lang_id);
8787
//Load phonemes list
8888
string phonemesPath = Path.Combine(rootPath, dsConfig.phonemes);
8989
phonemeTokens = DiffSingerUtils.LoadPhonemes(phonemesPath);
@@ -109,7 +109,7 @@ private bool _executeSetSinger(USinger singer) {
109109
return true;
110110
}
111111

112-
protected virtual IG2p LoadG2p(string rootPath) {
112+
protected virtual IG2p LoadG2p(string rootPath, bool useLangId = false) {
113113
//Each phonemizer has a delicated dictionary name, such as dsdict-en.yaml, dsdict-ru.yaml.
114114
//If this dictionary exists, load it.
115115
//If not, load dsdict.yaml.
@@ -138,13 +138,13 @@ protected virtual IG2p LoadG2p(string rootPath) {
138138
//Check if the phoneme is supported. If unsupported, return an empty string.
139139
//And apply language prefix to phoneme
140140
string ValidatePhoneme(string phoneme){
141-
if(g2p.IsValidSymbol(phoneme)){
141+
if(g2p.IsValidSymbol(phoneme) && phonemeTokens.ContainsKey(phoneme)){
142142
return phoneme;
143143
}
144144
var langCode = GetLangCode();
145145
if(langCode != String.Empty){
146146
var phonemeWithLanguage = langCode + "/" + phoneme;
147-
if(g2p.IsValidSymbol(phonemeWithLanguage)){
147+
if(g2p.IsValidSymbol(phonemeWithLanguage) && phonemeTokens.ContainsKey(phonemeWithLanguage)){
148148
return phonemeWithLanguage;
149149
}
150150
}
@@ -306,7 +306,7 @@ protected override void ProcessPart(Note[][] phrase) {
306306
var wordFound = new bool[phrase.Length];
307307
foreach (int wordIndex in Enumerable.Range(0, phrase.Length)) {
308308
Note[] word = phrase[wordIndex];
309-
var symbols = GetSymbols(word[0]);
309+
var symbols = GetSymbols(word[0]).Where(s => phonemeTokens.ContainsKey(s)).ToArray();
310310
if (symbols == null || symbols.Length == 0) {
311311
symbols = new string[] { defaultPause };
312312
wordFound[wordIndex] = false;

OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerG2pPhonemizer.cs

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,20 @@
22
using System;
33
using System.Collections.Generic;
44
using System.IO;
5+
using System.Linq;
56

67
using OpenUtau.Api;
78

89
namespace OpenUtau.Core.DiffSinger
910
{
10-
public class G2pReplacementsData{
11+
class DiffSingerG2pDictionaryData : G2pDictionaryData{
1112
public struct Replacement{
1213
public string from;
1314
public string to;
1415
}
1516
public Replacement[]? replacements;
16-
17-
public static G2pReplacementsData Load(string text){
18-
return OpenUtau.Core.Yaml.DefaultDeserializer.Deserialize<G2pReplacementsData>(text);
19-
}
2017

21-
public Dictionary<string, string> toDict(){
18+
public Dictionary<string, string> replacementsDict(){
2219
var dict = new Dictionary<string, string>();
2320
if(replacements!=null){
2421
foreach(var r in replacements){
@@ -39,7 +36,7 @@ public abstract class DiffSingerG2pPhonemizer : DiffSingerBasePhonemizer
3936
protected virtual string[] GetBaseG2pVowels()=>new string[]{};
4037
protected virtual string[] GetBaseG2pConsonants()=>new string[]{};
4138

42-
protected override IG2p LoadG2p(string rootPath) {
39+
protected override IG2p LoadG2p(string rootPath, bool useLangId = false) {
4340
//Each phonemizer has a delicated dictionary name, such as dsdict-en.yaml, dsdict-ru.yaml.
4441
//If this dictionary exists, load it.
4542
//If not, load dsdict.yaml.
@@ -54,8 +51,9 @@ protected override IG2p LoadG2p(string rootPath) {
5451
if (File.Exists(dictionaryPath)) {
5552
try {
5653
string dictText = File.ReadAllText(dictionaryPath);
57-
replacements = G2pReplacementsData.Load(dictText).toDict();
58-
g2pBuilder.Load(dictText);
54+
var dictData = Yaml.DefaultDeserializer.Deserialize<DiffSingerG2pDictionaryData>(dictText);
55+
g2pBuilder.Load(dictData);
56+
replacements = dictData.replacementsDict();
5957
} catch (Exception e) {
6058
Log.Error(e, $"Failed to load {dictionaryPath}");
6159
}
@@ -79,6 +77,15 @@ protected override IG2p LoadG2p(string rootPath) {
7977
foreach(var c in GetBaseG2pConsonants()){
8078
phonemeSymbols[c]=false;
8179
}
80+
if(useLangId){
81+
//For diffsinger multi dict voicebanks, the replacements of g2p phonemes default to the <langcode>/<phoneme>
82+
var langCode = GetLangCode();
83+
foreach(var ph in GetBaseG2pVowels().Concat(GetBaseG2pConsonants())){
84+
if(!replacements.ContainsKey(ph)){
85+
replacements[ph]=langCode + "/" + ph;
86+
}
87+
}
88+
}
8289
foreach(var from in replacements.Keys){
8390
var to = replacements[from];
8491
if(baseG2p.IsValidSymbol(to)){

0 commit comments

Comments
 (0)