Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 72 additions & 68 deletions OpenUtau.Plugin.Builtin/ChineseCVVPhonemizer.cs
Original file line number Diff line number Diff line change
@@ -1,16 +1,66 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using OpenUtau.Api;
using OpenUtau.Core;
using OpenUtau.Core.Ustx;
using Serilog;

namespace OpenUtau.Plugin.Builtin {
namespace OpenUtau.Plugin.Builtin
{
/// <summary>
/// Chinese 十月式整音扩张 CVV Phonemizer.
/// <para>It works by spliting "duang" to "duang" + "_ang", to produce the proper tail sound.</para>
/// </summary>
[Phonemizer("Chinese CVV (十月式整音扩张) Phonemizer", "ZH CVV", language: "ZH")]
public class ChineseCVVPhonemizer : BaseChinesePhonemizer {
public class ChineseCVVMonophonePhonemizer : MonophonePhonemizer
{
static readonly string pinyins = "a,ai,an,ang,ao,ba,bai,ban,bang,bao,bei,ben,beng,bi,bian,biao,bie,bin,bing,bo,bu,ca,cai,can,cang,cao,ce,cei,cen,ceng,cha,chai,chan,chang,chao,che,chen,cheng,chi,chong,chou,chu,chua,chuai,chuan,chuang,chui,chun,chuo,ci,cong,cou,cu,cuan,cui,cun,cuo,da,dai,dan,dang,dao,de,dei,den,deng,di,dia,dian,diao,die,ding,diu,dong,dou,du,duan,dui,dun,duo,e,ei,en,eng,er,fa,fan,fang,fei,fen,feng,fo,fou,fu,ga,gai,gan,gang,gao,ge,gei,gen,geng,gong,gou,gu,gua,guai,guan,guang,gui,gun,guo,ha,hai,han,hang,hao,he,hei,hen,heng,hong,hou,hu,hua,huai,huan,huang,hui,hun,huo,ji,jia,jian,jiang,jiao,jie,jin,jing,jiong,jiu,ju,jv,juan,jvan,jue,jve,jun,jvn,ka,kai,kan,kang,kao,ke,kei,ken,keng,kong,kou,ku,kua,kuai,kuan,kuang,kui,kun,kuo,la,lai,lan,lang,lao,le,lei,leng,li,lia,lian,liang,liao,lie,lin,ling,liu,lo,long,lou,lu,luan,lun,luo,lv,lve,ma,mai,man,mang,mao,me,mei,men,meng,mi,mian,miao,mie,min,ming,miu,mo,mou,mu,na,nai,nan,nang,nao,ne,nei,nen,neng,ni,nian,niang,niao,nie,nin,ning,niu,nong,nou,nu,nuan,nun,nuo,nv,nve,o,ou,pa,pai,pan,pang,pao,pei,pen,peng,pi,pian,piao,pie,pin,ping,po,pou,pu,qi,qia,qian,qiang,qiao,qie,qin,qing,qiong,qiu,qu,qv,quan,qvan,que,qve,qun,qvn,ran,rang,rao,re,ren,reng,ri,rong,rou,ru,rua,ruan,rui,run,ruo,sa,sai,san,sang,sao,se,sen,seng,sha,shai,shan,shang,shao,she,shei,shen,sheng,shi,shou,shu,shua,shuai,shuan,shuang,shui,shun,shuo,si,song,sou,su,suan,sui,sun,suo,ta,tai,tan,tang,tao,te,tei,teng,ti,tian,tiao,tie,ting,tong,tou,tu,tuan,tui,tun,tuo,wa,wai,wan,wang,wei,wen,weng,wo,wu,xi,xia,xian,xiang,xiao,xie,xin,xing,xiong,xiu,xu,xv,xuan,xvan,xue,xve,xun,xvn,ya,yan,yang,yao,ye,yi,yin,ying,yo,yong,you,yu,yv,yuan,yvan,yue,yve,yun,yvn,za,zai,zan,zang,zao,ze,zei,zen,zeng,zha,zhai,zhan,zhang,zhao,zhe,zhei,zhen,zheng,zhi,zhong,zhou,zhu,zhua,zhuai,zhuan,zhuang,zhui,zhun,zhuo,zi,zong,zou,zu,zuan,zui,zun";
static readonly string tails = "_vn,_ing,_ong,_an,_ou,_er,_ao,_eng,_ang,_en,_en2,_ai,_iong,_in,_ei";

static readonly string[] pinyinList = pinyins.Split(',');
static readonly string[] tailList = tails.Split(',');

public ChineseCVVMonophonePhonemizer() {
ConsonantLength = 120;
}

protected override IG2p LoadG2p() {
var g2ps = new List<IG2p>();

// Load dictionary from plugin folder.
string path = Path.Combine(PluginDir, "zhcvv.yaml");
if (File.Exists(path)) {
g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build());
}

// Load dictionary from singer folder.
if (singer != null && singer.Found && singer.Loaded) {
string file = Path.Combine(singer.Location, "zhcvv.yaml");
if (File.Exists(file)) {
try {
g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build());
} catch (Exception e) {
Log.Error(e, $"Failed to load {file}");
}
}
}
g2ps.Add(new ChineseCVVG2p());
return new G2pFallbacks(g2ps.ToArray());
}

protected override Dictionary<string, string[]> LoadVowelFallbacks() {
return "_un=_en".Split(';')
.Select(entry => entry.Split('='))
.ToDictionary(parts => parts[0], parts => parts[1].Split(','));
}

public override void SetUp(Note[][] groups) {
BaseChinesePhonemizer.RomanizeNotes(groups);
}
}

class ChineseCVVG2p : IG2p{
/// <summary>
/// The consonant table.
/// </summary>
Expand All @@ -22,26 +72,22 @@ public class ChineseCVVPhonemizer : BaseChinesePhonemizer {

static HashSet<string> cSet;
static Dictionary<string, string> vDict;

static ChineseCVVPhonemizer() {
static ChineseCVVG2p() {
cSet = new HashSet<string>(consonants.Split(','));
vDict = vowels.Split(',')
.Select(s => s.Split('='))
.ToDictionary(a => a[0], a => a[1]);
}

private USinger singer;

// Simply stores the singer in a field.
public override void SetSinger(USinger singer) => this.singer = singer;
public bool IsVowel(string phoneme){
return !phoneme.StartsWith("_");
}

public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) {
public string[] Query(string lyric){
// The overall logic is:
// 1. Remove consonant: "duang" -> "uang".
// 2. Lookup the trailing sound in vowel table: "uang" -> "_ang".
// 3. Split the total duration and returns "duang" and "_ang".
var lyric = notes[0].lyric;
var note = notes[0];
string consonant = string.Empty;
string vowel = string.Empty;
if (lyric.Length > 2 && cSet.Contains(lyric.Substring(0, 2))) {
Expand All @@ -63,62 +109,20 @@ public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevN
if ((vowel == "an") && (consonant == "y")) {
vowel = "ian";
}
string phoneme0 = lyric;
// Get color
string color = string.Empty;
int toneShift = 0;
if (note.phonemeAttributes != null) {
var attr = note.phonemeAttributes.FirstOrDefault(attr => attr.index == 0);
color = attr.voiceColor;
toneShift = attr.toneShift;
if(vDict.TryGetValue(vowel, out var tail)){
return new string[] { lyric, tail };
}else{
return new string[] { lyric };
}
// We will need to split the total duration for phonemes, so we compute it here.
int totalDuration = notes.Sum(n => n.duration);
// Lookup the vowel split table. For example, "uang" will match "_ang".
if (vDict.TryGetValue(vowel, out var phoneme1)) {
// Now phoneme0="duang" and phoneme1="_ang",
// try to give "_ang" 120 ticks, but no more than half of the total duration.
int length1 = 120;
if (length1 > totalDuration / 2) {
length1 = totalDuration / 2;
}
if (singer.TryGetMappedOto(phoneme0, note.tone + toneShift, color, out var oto0)) {
phoneme0 = oto0.Alias;
}

if (singer.TryGetMappedOto(phoneme1, note.tone + toneShift, color, out var oto1)) {
phoneme1 = oto1.Alias;
}

if (phoneme1.Contains("_un") && !singer.TryGetMappedOto(phoneme1, note.tone + toneShift, color, out var oto2)) {
phoneme1 = "_en";
} else if (phoneme1.Contains("_un") && singer.TryGetMappedOto(phoneme1, note.tone + toneShift, color, out var oto3)) {
phoneme1 = oto3.Alias;
}

}
public bool IsValidSymbol(string symbol){
return true;
}

return new Result {
phonemes = new Phoneme[] {
new Phoneme() {
phoneme = phoneme0,
},
new Phoneme() {
phoneme = phoneme1,
position = totalDuration - length1,
}
},
};
}
if (singer.TryGetMappedOto(phoneme0, note.tone + toneShift, color, out var oto)) {
phoneme0 = oto.Alias;
}
// Not spliting is needed. Return as is.
return new Result {
phonemes = new Phoneme[] {
new Phoneme() {
phoneme = phoneme0,
}
},
};
public string[] UnpackHint(string hint, char separator = ' ') {
return hint.Split(separator)
.ToArray();
}
}
}
}
203 changes: 2 additions & 201 deletions OpenUtau.Plugin.Builtin/LatinDiphonePhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,172 +10,8 @@ namespace OpenUtau.Plugin.Builtin {
/// <summary>
/// A base diphone phonemizer for latin languages.
/// </summary>
public abstract class LatinDiphonePhonemizer : Phonemizer {
protected Dictionary<string, string[]> vowelFallback;
protected USinger singer;
protected IG2p g2p;
protected bool isDictionaryLoading;

//[(index of phoneme, tick position from the lyrical note in notes[], is manual)]
protected readonly List<Tuple<int, int, bool>> alignments = new List<Tuple<int, int, bool>>();

/// <summary>
/// This property will later be exposed in UI for user adjustment.
/// </summary>
public int ConsonantLength { get; set; } = 60;

public LatinDiphonePhonemizer() {
try {
Initialize();
} catch (Exception e) {
Log.Error(e, "Failed to initialize.");
}
}

protected abstract IG2p LoadG2p();

protected abstract Dictionary<string, string[]> LoadVowelFallbacks();

protected void Initialize() {
g2p = LoadG2p();
vowelFallback = LoadVowelFallbacks();
}

public override void SetSinger(USinger singer) {
this.singer = singer;
g2p = LoadG2p();
}

public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) {
if (isDictionaryLoading) {
return MakeSimpleResult("");
}
var note = notes[0];

// Force alias using ? prefix
if (!string.IsNullOrEmpty(note.lyric) && note.lyric[0] == '?') {
return MakeSimpleResult(note.lyric.Substring(1));
}

// Get the symbols of previous note.
var prevSymbols = prevNeighbour == null ? null : GetSymbols(prevNeighbour.Value);
// The user is using a tail "-" note to produce a "<something> -" sound.
if (note.lyric == "-" && prevSymbols != null) {
var attr = note.phonemeAttributes?.FirstOrDefault() ?? default;
string color = attr.voiceColor;
string alias = $"{prevSymbols.Last()} -";
if (singer.TryGetMappedOto(alias, note.tone, color, out var oto)) {
return MakeSimpleResult(oto.Alias);
}
return MakeSimpleResult(alias);
}
// Get the symbols of current note.
string[] symbols = GetSymbols(note);
if (nextNeighbour == null) {
// Auto add tail "-".
symbols = symbols.Append("-").ToArray();
}
if (symbols == null || symbols.Length == 0) {
// No symbol is found for current note.
// Otherwise assumes the user put in an alias.
return MakeSimpleResult(note.lyric);
}
// Find phone types of symbols.
var isVowel = symbols.Select(s => g2p.IsVowel(s)).ToArray();
// Arpasing aligns the first vowel at 0 and shifts leading consonants to negative positions,
// so we need to find the first vowel.
var phonemes = new Phoneme[symbols.Length];

// Alignments
// - Tries to align every note to one syllable.
// - "+n" manually aligns to n-th phoneme.
alignments.Clear();
//notes except those whose lyrics start witn "+*" or "+~"
var nonExtensionNotes = notes.Where(n=>!IsSyllableVowelExtensionNote(n)).ToArray();
for (int i = 0; i < symbols.Length; i++) {
if (isVowel[i] && alignments.Count < nonExtensionNotes.Length) {
alignments.Add(Tuple.Create(i, nonExtensionNotes[alignments.Count].position - notes[0].position, false));
}
}
int position = notes[0].duration;
for (int i = 1; i < notes.Length; ++i) {
if (int.TryParse(notes[i].lyric.Substring(1), out var idx)) {
alignments.Add(Tuple.Create(idx - 1, position, true));
}
position += notes[i].duration;
}
alignments.Add(Tuple.Create(phonemes.Length, position, true));
alignments.Sort((a, b) => a.Item1.CompareTo(b.Item1));
for (int i = 0; i < alignments.Count; ++i) {
if (alignments[i].Item3) {
while (i > 0 && (alignments[i - 1].Item2 >= alignments[i].Item2 ||
alignments[i - 1].Item1 == alignments[i].Item1)) {
alignments.RemoveAt(i - 1);
i--;
}
while (i < alignments.Count - 1 && (alignments[i + 1].Item2 <= alignments[i].Item2 ||
alignments[i + 1].Item1 == alignments[i].Item1)) {
alignments.RemoveAt(i + 1);
}
}
}

int startIndex = 0;
int firstVowel = Array.IndexOf(isVowel, true);
int startTick = -ConsonantLength * firstVowel;
foreach (var alignment in alignments) {
// Distributes phonemes between two aligment points.
DistributeDuration(isVowel, phonemes, startIndex, alignment.Item1, startTick, alignment.Item2);
startIndex = alignment.Item1;
startTick = alignment.Item2;
}
alignments.Clear();

// Select aliases.
int noteIndex = 0;
string prevSymbol = prevSymbols == null ? "-" : prevSymbols.Last();
for (int i = 0; i < symbols.Length; i++) {
var attr = note.phonemeAttributes?.FirstOrDefault(attr => attr.index == i) ?? default;
string alt = attr.alternate?.ToString() ?? string.Empty;
string color = attr.voiceColor;
int toneShift = attr.toneShift;
var phoneme = phonemes[i];
while (noteIndex < notes.Length - 1 && notes[noteIndex].position - note.position < phoneme.position) {
noteIndex++;
}
int tone = (i == 0 && prevNeighbours != null && prevNeighbours.Length > 0)
? prevNeighbours.Last().tone : notes[noteIndex].tone;
phoneme.phoneme = GetPhonemeOrFallback(prevSymbol, symbols[i], tone + toneShift, color, alt);
phonemes[i] = phoneme;
prevSymbol = symbols[i];
}

return new Result {
phonemes = phonemes,
};
}

/// <summary>
/// Does this note extend the previous syllable?
/// </summary>
/// <param name="note"></param>
/// <returns></returns>
protected bool IsSyllableVowelExtensionNote(Note note) {
return note.lyric.StartsWith("+~") || note.lyric.StartsWith("+*");
}

string[] GetSymbols(Note note) {
if (string.IsNullOrEmpty(note.phoneticHint)) {
// User has not provided hint, query CMUdict.
return g2p.Query(note.lyric.ToLowerInvariant());
}
// Split space-separated symbols into an array.
return note.phoneticHint.Split()
.Where(s => g2p.IsValidSymbol(s)) // skip the invalid symbols.
.ToArray();
}

string GetPhonemeOrFallback(string prevSymbol, string symbol, int tone, string color, string alt) {
public abstract class LatinDiphonePhonemizer : PhonemeBasedPhonemizer {
protected override string GetPhonemeOrFallback(string prevSymbol, string symbol, int tone, string color, string alt) {
if (!string.IsNullOrEmpty(alt) && singer.TryGetMappedOto($"{prevSymbol} {symbol}{alt}", tone, color, out var oto)) {
return oto.Alias;
}
Expand All @@ -194,40 +30,5 @@ string GetPhonemeOrFallback(string prevSymbol, string symbol, int tone, string c
}
return $"{prevSymbol} {symbol}{alt}";
}

void DistributeDuration(bool[] isVowel, Phoneme[] phonemes, int startIndex, int endIndex, int startTick, int endTick) {
if (startIndex == endIndex) {
return;
}
// First count number of vowels and consonants.
int consonants = 0;
int vowels = 0;
int duration = endTick - startTick;
for (int i = startIndex; i < endIndex; i++) {
if (isVowel[i]) {
vowels++;
} else {
consonants++;
}
}
// If vowels exist, consonants are given fixed length, but combined no more than half duration.
// However, if no vowel exists, consonants are evenly distributed within the total duration.
int consonantDuration = vowels > 0
? (consonants > 0 ? Math.Min(ConsonantLength, duration / 2 / consonants) : 0)
: duration / consonants;
// Vowels are evenly distributed within (total duration - total consonant duration).
int vowelDuration = vowels > 0 ? (duration - consonantDuration * consonants) / vowels : 0;
int position = startTick;
// Compute positions using previously computed durations.
for (int i = startIndex; i < endIndex; i++) {
if (isVowel[i]) {
phonemes[i].position = position;
position += vowelDuration;
} else {
phonemes[i].position = position;
position += consonantDuration;
}
}
}
}
}
Loading