Skip to content

Commit 8538207

Browse files
authored
Merge pull request #1075 from rokujyushi/AddVoicevox
VOICEVOX support
2 parents 2917119 + 65292fd commit 8538207

File tree

13 files changed

+1022
-6
lines changed

13 files changed

+1022
-6
lines changed

OpenUtau.Core/Classic/ClassicSingerLoader.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ static USinger AdjustSingerType(Voicebank v) {
1111
return new Core.Enunu.EnunuSinger(v) as USinger;
1212
case USingerType.DiffSinger:
1313
return new Core.DiffSinger.DiffSingerSinger(v) as USinger;
14+
case USingerType.Voicevox:
15+
return new Core.Voicevox.VoicevoxSinger(v) as USinger;
1416
default:
1517
return new ClassicSinger(v) as USinger;
1618
}

OpenUtau.Core/Render/RenderPhrase.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ public class RenderPhone {
6767
public readonly bool direct;
6868
public readonly Vector2[] envelope;
6969

70+
// voicevox args
71+
public readonly int toneShift;
72+
7073
public readonly UOto oto;
7174
public readonly ulong hash;
7275

@@ -118,6 +121,7 @@ internal RenderPhone(UProject project, UTrack track, UVoicePart part, UNote note
118121
leadingMs = phoneme.preutter;
119122
envelope = phoneme.envelope.data.ToArray();
120123
direct = phoneme.GetExpression(project, track, Format.Ustx.DIR).Item1 == 1;
124+
toneShift = (int)phoneme.GetExpression(project, track, Format.Ustx.SHFT).Item1;
121125

122126
oto = phoneme.oto;
123127
hash = Hash();

OpenUtau.Core/Render/Renderers.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@ public static class Renderers {
1313
public const string ENUNU = "ENUNU";
1414
public const string VOGEN = "VOGEN";
1515
public const string DIFFSINGER = "DIFFSINGER";
16+
public const string VOICEVOX = "VOICEVOX";
1617

1718
static readonly string[] classicRenderers = new[] { WORLDLINER, CLASSIC };
1819
static readonly string[] enunuRenderers = new[] { ENUNU };
1920
static readonly string[] vogenRenderers = new[] { VOGEN };
2021
static readonly string[] diffSingerRenderers = new[] { DIFFSINGER };
22+
static readonly string[] voicevoxRenderers = new[] { VOICEVOX };
2123
static readonly string[] noRenderers = new string[0];
2224

2325
public static string[] GetSupportedRenderers(USingerType singerType) {
@@ -30,6 +32,8 @@ public static string[] GetSupportedRenderers(USingerType singerType) {
3032
return vogenRenderers;
3133
case USingerType.DiffSinger:
3234
return diffSingerRenderers;
35+
case USingerType.Voicevox:
36+
return voicevoxRenderers;
3337
default:
3438
return noRenderers;
3539
}
@@ -61,6 +65,8 @@ public static IRenderer CreateRenderer(string renderer) {
6165
return new Vogen.VogenRenderer();
6266
} else if (renderer == DIFFSINGER) {
6367
return new DiffSinger.DiffSingerRenderer();
68+
} else if (renderer == VOICEVOX) {
69+
return new Voicevox.VoicevoxRenderer();
6470
}
6571
return null;
6672
}

OpenUtau.Core/Ustx/USinger.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,19 +185,21 @@ private static void AddToneRange(string range, SortedSet<int> set) {
185185
}
186186
}
187187

188-
[Flags] public enum USingerType { Classic = 0x1, Enunu = 0x2, Vogen = 0x4, DiffSinger=0x5 }
188+
[Flags] public enum USingerType { Classic = 0x1, Enunu = 0x2, Vogen = 0x4, DiffSinger=0x5, Voicevox=0x6 }
189189

190190
public static class SingerTypeUtils{
191191
public static Dictionary<USingerType?, string> SingerTypeNames = new Dictionary<USingerType?, string>(){
192192
{USingerType.Classic, "utau"},
193193
{USingerType.Enunu, "enunu"},
194194
{USingerType.DiffSinger, "diffsinger"},
195+
{USingerType.Voicevox, "voicevox"},
195196
};
196197

197198
public static Dictionary<string, USingerType> SingerTypeFromName = new Dictionary<string, USingerType>(){
198199
{"utau", USingerType.Classic},
199200
{"enunu", USingerType.Enunu},
200201
{"diffsinger", USingerType.DiffSinger},
202+
{"voicevox", USingerType.Voicevox},
201203
};
202204

203205
}

OpenUtau.Core/Util/Base64.cs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
using System.Collections.Generic;
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
24
using System.Text;
5+
using Serilog;
36

4-
namespace OpenUtau.Core.Util
5-
{
7+
namespace OpenUtau.Core.Util {
68
public static class Base64
79
{
810
public static string Base64EncodeInt12(int[] data)
@@ -59,5 +61,17 @@ private static string Base64EncodeInt12(int data)
5961
base64[1] = intToBase64[data & 0x003F];
6062
return new string(base64);
6163
}
64+
65+
public static void Base64ToFile(string base64str,string filePath) {
66+
try {
67+
byte[] bytes = Convert.FromBase64String(base64str);
68+
69+
// Write to file
70+
File.WriteAllBytes(filePath, bytes);
71+
72+
} catch (Exception ex) {
73+
Log.Error(@"{ex}");
74+
}
75+
}
6276
}
6377
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
using System.Linq;
2+
using OpenUtau.Api;
3+
using OpenUtau.Core.Ustx;
4+
using OpenUtau.Core.Voicevox;
5+
6+
namespace Voicevox {
7+
[Phonemizer("Simple Voicevox Japanese Phonemizer", "S-VOICEVOX JA", language: "JA")]
8+
public class SimpleVoicevoxPhonemizer : Phonemizer {
9+
10+
protected VoicevoxSinger singer;
11+
12+
public override void SetSinger(USinger singer) {
13+
this.singer = singer as VoicevoxSinger;
14+
if (this.singer != null) {
15+
this.singer.voicevoxConfig.Tag = this.Tag;
16+
}
17+
}
18+
19+
public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) {
20+
var note = notes[0];
21+
var currentLyric = note.lyric.Normalize(); //measures for Unicode
22+
23+
Dictionary_list dic = new Dictionary_list();
24+
dic.Loaddic(singer.Location);
25+
int toneShift = 0;
26+
int? alt = null;
27+
if (note.phonemeAttributes != null) {
28+
var attr = note.phonemeAttributes.FirstOrDefault(attr => attr.index == 0);
29+
toneShift = attr.toneShift;
30+
alt = attr.alternate;
31+
}
32+
33+
//currentLyric = note.phoneticHint.Normalize();
34+
Note[][] simplenotes = new Note[1][];
35+
var lyricList = notes[0].lyric.Split(" ");
36+
if (lyricList.Length > 1) {
37+
notes[0].lyric = lyricList[1];
38+
}
39+
if (VoicevoxUtils.IsHiraKana(notes[0].lyric)) {
40+
return new Result {
41+
phonemes = new Phoneme[] {
42+
new Phoneme {
43+
phoneme = notes[0].lyric,
44+
}
45+
},
46+
};
47+
} else if (VoicevoxUtils.IsPau(notes[0].lyric)) {
48+
return new Result {
49+
phonemes = new Phoneme[] {
50+
new Phoneme {
51+
phoneme = "R",
52+
}
53+
},
54+
};
55+
}
56+
else
57+
{
58+
return new Result {
59+
phonemes = new Phoneme[] {
60+
new Phoneme {
61+
phoneme = "error",
62+
}
63+
},
64+
};
65+
}
66+
}
67+
}
68+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Net.Http;
4+
using System.Net.Http.Headers;
5+
using System.Text;
6+
using Serilog;
7+
8+
namespace OpenUtau.Core.Voicevox {
9+
class VoicevoxClient : Util.SingletonBase<VoicevoxClient> {
10+
internal Tuple<string, byte[]> SendRequest(VoicevoxURL voicevoxURL) {
11+
try {
12+
using (var client = new HttpClient()) {
13+
using (var request = new HttpRequestMessage(new HttpMethod(voicevoxURL.method.ToUpper()), this.RequestURL(voicevoxURL))) {
14+
request.Headers.TryAddWithoutValidation("accept", voicevoxURL.accept);
15+
16+
request.Content = new StringContent(voicevoxURL.body);
17+
request.Content.Headers.ContentType = MediaTypeHeaderValue.Parse("application/json");
18+
19+
Log.Information($"VoicevoxProcess sending {request}");
20+
var response = client.SendAsync(request);
21+
Log.Information($"VoicevoxProcess received");
22+
string str = response.Result.Content.ReadAsStringAsync().Result;
23+
//May not fit json format
24+
if (!str.StartsWith("{") || !str.EndsWith("}")) {
25+
str = "{ \"json\":" + str + "}";
26+
}
27+
return new Tuple<string, byte[]>(str, response.Result.Content.ReadAsByteArrayAsync().Result);
28+
}
29+
}
30+
} catch (Exception ex) {
31+
Log.Error(@"{ex}");
32+
}
33+
return new Tuple<string, byte[]>("", new byte[0]);
34+
}
35+
36+
public string RequestURL(VoicevoxURL voicevoxURL) {
37+
StringBuilder queryStringBuilder = new StringBuilder();
38+
foreach (var parameter in voicevoxURL.query) {
39+
queryStringBuilder.Append($"{parameter.Key}={parameter.Value}&");
40+
}
41+
42+
// Remove extra "&" at the end
43+
string queryString = "?" + queryStringBuilder.ToString().TrimEnd('&');
44+
45+
string str = $"{voicevoxURL.protocol}{voicevoxURL.host}{voicevoxURL.path}{queryString}";
46+
return str;
47+
}
48+
}
49+
public class VoicevoxURL {
50+
public string method = string.Empty;
51+
public string protocol = "http://";
52+
//Currently fixed port 50021 to connect to
53+
public string host = "127.0.0.1:50021";
54+
public string path = string.Empty;
55+
public Dictionary<string, string> query = new Dictionary<string, string>();
56+
public string body = string.Empty;
57+
public string accept = "application/json";
58+
}
59+
}

0 commit comments

Comments
 (0)