Skip to content

Commit 2917119

Browse files
authored
Merge pull request #1068 from maiko3tattun/0317_ModPlus
Modulation plus
2 parents ee5fe4f + dfb5113 commit 2917119

File tree

7 files changed

+192
-28
lines changed

7 files changed

+192
-28
lines changed

OpenUtau.Core/Classic/ClassicRenderer.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ public class ClassicRenderer : IRenderer {
2525
Ustx.ATK,
2626
Ustx.DEC,
2727
Ustx.MOD,
28+
Ustx.MODP,
2829
Ustx.ALT,
2930
};
3031

OpenUtau.Core/Classic/Frq.cs

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using System.IO;
33
using System.Linq;
44
using System.Text;
5+
using OpenUtau.Core;
56

67
namespace OpenUtau.Classic {
78
public class Frq {
@@ -12,22 +13,40 @@ public class Frq {
1213
public double[] f0 = new double[0];
1314
public double[] amp = new double[0];
1415

15-
public void Load(Stream stream) {
16-
using (var reader = new BinaryReader(stream)) {
17-
string header = new string(reader.ReadChars(8));
18-
if (header != "FREQ0003") {
19-
throw new FormatException("FREQ0003 header not found.");
20-
}
21-
hopSize = reader.ReadInt32();
22-
averageF0 = reader.ReadDouble();
23-
_ = reader.ReadBytes(16); // blank
24-
int length = reader.ReadInt32();
25-
f0 = new double[length];
26-
amp = new double[length];
27-
for (int i = 0; i < length; i++) {
28-
f0[i] = reader.ReadDouble();
29-
amp[i] = reader.ReadDouble();
16+
/// <summary>
17+
/// If the wav path is null (machine learning voicebank), return false.
18+
/// <summary>
19+
public bool Load(string wavPath) {
20+
if (string.IsNullOrEmpty(wavPath)) {
21+
return false;
22+
}
23+
string frqFile = VoicebankFiles.GetFrqFile(wavPath);
24+
if (!File.Exists(frqFile)) {
25+
return false;
26+
}
27+
try {
28+
using (var fileStream = File.OpenRead(frqFile)) {
29+
using (var reader = new BinaryReader(fileStream)) {
30+
string header = new string(reader.ReadChars(8));
31+
if (header != "FREQ0003") {
32+
throw new FormatException("FREQ0003 header not found.");
33+
}
34+
hopSize = reader.ReadInt32();
35+
averageF0 = reader.ReadDouble();
36+
_ = reader.ReadBytes(16); // blank
37+
int length = reader.ReadInt32();
38+
f0 = new double[length];
39+
amp = new double[length];
40+
for (int i = 0; i < length; i++) {
41+
f0[i] = reader.ReadDouble();
42+
amp[i] = reader.ReadDouble();
43+
}
44+
}
3045
}
46+
return true;
47+
} catch (Exception e) {
48+
DocManager.Inst.ExecuteCmd(new ErrorMessageNotification("failed to load frq file", e));
49+
return false;
3150
}
3251
}
3352

OpenUtau.Core/Classic/WorldlineRenderer.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ public class WorldlineRenderer : IRenderer {
2525
Ustx.VEL,
2626
Ustx.VOL,
2727
Ustx.MOD,
28+
Ustx.MODP,
2829
Ustx.ALT,
2930
Ustx.GENC,
3031
Ustx.BREC,

OpenUtau.Core/Format/USTx.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ public class Ustx {
2626
public const string LPF = "lpf";
2727
public const string NORM = "norm";
2828
public const string MOD = "mod";
29+
public const string MODP = "mod+";
2930
public const string ALT = "alt";
3031
public const string DIR = "dir";
3132
public const string SHFT = "shft";
@@ -51,6 +52,7 @@ public static void AddDefaultExpressions(UProject project) {
5152
project.RegisterExpression(new UExpressionDescriptor("lowpass", LPF, 0, 100, 0, "H"));
5253
project.RegisterExpression(new UExpressionDescriptor("normalize", NORM, 0, 100, 86, "P"));
5354
project.RegisterExpression(new UExpressionDescriptor("modulation", MOD, 0, 100, 0));
55+
project.RegisterExpression(new UExpressionDescriptor("modulation plus", MODP, 0, 100, 0));
5456
project.RegisterExpression(new UExpressionDescriptor("alternate", ALT, 0, 16, 0));
5557
project.RegisterExpression(new UExpressionDescriptor("direct", DIR, false, new string[] { "off", "on" }));
5658
project.RegisterExpression(new UExpressionDescriptor("tone shift", SHFT, -36, 36, 0));

OpenUtau.Core/Render/RenderPhrase.cs

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Numerics;
66
using K4os.Hash.xxHash;
77
using OpenUtau.Core.Ustx;
8+
using Serilog;
89

910
namespace OpenUtau.Core.Render {
1011
public class RenderNote {
@@ -185,8 +186,7 @@ public class RenderPhrase {
185186
public readonly string wavtool;
186187

187188
internal RenderPhrase(UProject project, UTrack track, UVoicePart part, IEnumerable<UPhoneme> phonemes) {
188-
var uNotes = new List<UNote>();
189-
uNotes.Add(phonemes.First().Parent);
189+
var uNotes = new List<UNote> { phonemes.First().Parent };
190190
var endNote = phonemes.Last().Parent;
191191
while (endNote.Next != null && endNote.Next.Extends != null) {
192192
endNote = endNote.Next;
@@ -228,6 +228,7 @@ internal RenderPhrase(UProject project, UTrack track, UVoicePart part, IEnumerab
228228
int pitchStart = position - part.position - leading;
229229
pitches = new float[(end - part.position - pitchStart) / pitchInterval + 1];
230230
int index = 0;
231+
// Create flat pitches
231232
foreach (var note in uNotes) {
232233
while (pitchStart + index * pitchInterval < note.End && index < pitches.Length) {
233234
pitches[index] = note.tone * 100;
@@ -239,6 +240,7 @@ internal RenderPhrase(UProject project, UTrack track, UVoicePart part, IEnumerab
239240
pitches[index] = pitches[index - 1];
240241
index++;
241242
}
243+
// Vibrato
242244
foreach (var note in uNotes) {
243245
if (note.vibrato.length <= 0) {
244246
continue;
@@ -253,6 +255,7 @@ internal RenderPhrase(UProject project, UTrack track, UVoicePart part, IEnumerab
253255
pitches[i] = point.Y * 100;
254256
}
255257
}
258+
// Pitch points
256259
foreach (var note in uNotes) {
257260
var pitchPoints = note.pitch.data
258261
.Select(point => {
@@ -291,7 +294,78 @@ internal RenderPhrase(UProject project, UTrack track, UVoicePart part, IEnumerab
291294
lastPoint = point;
292295
}
293296
}
297+
// Mod plus
298+
if (track.TryGetExpDescriptor(project, Format.Ustx.MODP, out var modp) && renderer.SupportsExpression(modp)) {
299+
foreach (var phoneme in phonemes) {
300+
var mod = phoneme.GetExpression(project, track, Format.Ustx.MODP).Item1;
301+
if (mod == 0) {
302+
continue;
303+
}
304+
305+
try {
306+
if (phoneme.TryGetFrq(out var frqFix, out var frqStretch, out double average, out int hopSize)) {
307+
UTempo[] noteTempos = project.timeAxis.TemposBetweenTicks(part.position + phoneme.position, part.position + phoneme.End);
308+
var tempo = noteTempos[0].bpm; // compromise 妥協!
309+
var frqIntervalTick = MusicMath.TempoMsToTick(tempo, (double)1 * 1000 / 44100 * hopSize);
310+
double consonantStretch = Math.Pow(2f, 1.0f - phoneme.GetExpression(project, track, Format.Ustx.VEL).Item1 / 100f);
311+
312+
var preutter = MusicMath.TempoMsToTick(tempo, Math.Min(phoneme.preutter, phoneme.oto.Preutter * consonantStretch));
313+
int startIndex = Math.Max(0, (int)Math.Floor((phoneme.position - pitchStart - preutter) / pitchInterval));
314+
int position = (int)Math.Round((double)((phoneme.position - pitchStart) / pitchInterval));
315+
int startStretch = position + (int)Math.Round(MusicMath.TempoMsToTick(tempo, (phoneme.oto.Consonant - phoneme.oto.Preutter) * consonantStretch) / pitchInterval);
316+
int endIndex = Math.Min(pitches.Length, (int)Math.Ceiling(phoneme.End - pitchStart - MusicMath.TempoMsToTick(tempo, phoneme.tailIntrude - phoneme.tailOverlap)) / pitchInterval);
317+
318+
frqFix = frqFix.Select(f => f - average).ToArray();
319+
frqStretch = frqStretch.Select(f => f - average).ToArray();
320+
double stretch = 1;
321+
if (frqStretch.Length * frqIntervalTick < ((double)endIndex - startStretch) * pitchInterval) {
322+
stretch = ((double)endIndex - startStretch) * pitchInterval / (frqStretch.Length * frqIntervalTick);
323+
}
324+
var env0 = new Vector2(0, 0);
325+
var env1 = new Vector2((phoneme.envelope.data[1].X - phoneme.envelope.data[0].X) / (phoneme.envelope.data[4].X - phoneme.envelope.data[0].X), 100);
326+
var env3 = new Vector2((phoneme.envelope.data[3].X - phoneme.envelope.data[0].X) / (phoneme.envelope.data[4].X - phoneme.envelope.data[0].X), 100);
327+
var env4 = new Vector2(1, 0);
328+
329+
for (int i = 0; startStretch + i <= endIndex; i++) {
330+
var pit = startStretch + i;
331+
if (pit >= pitches.Length) break;
332+
var frq = i * (pitchInterval / frqIntervalTick) / stretch;
333+
var frqMin = Math.Clamp((int)Math.Floor(frq), 0, frqStretch.Length - 1);
334+
var frqMax = Math.Clamp((int)Math.Ceiling(frq), 0, frqStretch.Length - 1);
335+
var diff = MusicMath.Linear(frqMin, frqMax, frqStretch[frqMin], frqStretch[frqMax], frq);
336+
diff = diff * mod / 100;
337+
diff = Fade(diff, pit);
338+
pitches[pit] = pitches[pit] + (float)(diff * 100);
339+
}
340+
for (int i = 0; startStretch + i - 1 >= startIndex; i--) {
341+
var pit = startStretch + i - 1;
342+
if (pit > endIndex || pit >= pitches.Length) continue;
343+
var frq = frqFix.Length + (i * (pitchInterval / frqIntervalTick) / consonantStretch);
344+
var frqMin = Math.Clamp((int)Math.Floor(frq), 0, frqFix.Length - 1);
345+
var frqMax = Math.Clamp((int)Math.Ceiling(frq), 0, frqFix.Length - 1);
346+
var diff = MusicMath.Linear(frqMin, frqMax, frqFix[frqMin], frqFix[frqMax], frq);
347+
diff = diff * mod / 100;
348+
diff = Fade(diff, pit);
349+
pitches[pit] = pitches[pit] + (float)(diff * 100);
350+
}
351+
double Fade(double diff, int pit) {
352+
var percentage = (double)(pit - startIndex) / (endIndex - startIndex);
353+
if (phoneme.Next != null && phoneme.End == phoneme.Next.position && percentage > env3.X) {
354+
diff = diff * Math.Clamp(MusicMath.Linear(env3.X, env4.X, env3.Y, env4.Y, percentage), 0, 100) / 100;
355+
}
356+
if (phoneme.Prev != null && phoneme.Prev.End == phoneme.position && percentage < env1.X) {
357+
diff = diff * Math.Clamp(MusicMath.Linear(env0.X, env1.X, env0.Y, env1.Y, percentage), 0, 100) / 100;
358+
}
359+
return diff;
360+
}
361+
}
362+
} catch(Exception e) {
363+
Log.Error(e, "Failed to compute mod plus.");
364+
}
365+
}
366+
}
294367

368+
// PITD
295369
pitchesBeforeDeviation = pitches.ToArray();
296370
var pitchCurve = part.curves.FirstOrDefault(c => c.abbr == Format.Ustx.PITD);
297371
if (pitchCurve != null && !pitchCurve.IsEmpty) {

OpenUtau.Core/Ustx/UPhoneme.cs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
using System.Collections.Generic;
33
using System.Linq;
44
using System.Numerics;
5+
using NAudio.Wave;
6+
using OpenUtau.Classic;
7+
using SharpCompress;
58
using YamlDotNet.Serialization;
69

710
namespace OpenUtau.Core.Ustx {
@@ -234,6 +237,77 @@ public string GetVoiceColor(UProject project, UTrack track) {
234237
}
235238
return track.VoiceColorExp.options[index];
236239
}
240+
241+
public bool TryGetFrq(out double[] frqFix, out double[] frqStretch, out double average, out int hopSize) {
242+
frqFix = new double[0];
243+
frqStretch = new double[0];
244+
average = 0;
245+
hopSize = 0;
246+
247+
var frq = new Frq();
248+
if (frq.Load(oto.File)) {
249+
average = MusicMath.FreqToTone(frq.averageF0); // 1 = 1tone
250+
hopSize = frq.hopSize;
251+
252+
int wavLength;
253+
using (var waveStream = Format.Wave.OpenFile(oto.File)) {
254+
var sampleProvider = waveStream.ToSampleProvider();
255+
if (sampleProvider.WaveFormat.SampleRate != 44100) {
256+
return false;
257+
}
258+
wavLength = Format.Wave.GetSamples(sampleProvider).Length;
259+
}
260+
261+
int offset = (int)Math.Floor(oto.Offset * 44100 / 1000 / frq.hopSize); // frq samples
262+
int consonant = (int)Math.Floor((oto.Offset + oto.Consonant) * 44100 / 1000 / frq.hopSize);
263+
int cutoff = oto.Cutoff < 0 ?
264+
(int)Math.Floor((oto.Offset - oto.Cutoff) * 44100 / 1000 / frq.hopSize)
265+
: wavLength - (int)Math.Floor(oto.Cutoff * 44100 / 1000 / frq.hopSize);
266+
var avr = average;
267+
var f0 = Completion(frq.f0);
268+
frqFix = f0.Skip(offset).Take(consonant - offset).Select(f => MusicMath.FreqToTone(f)).ToArray();
269+
frqStretch = f0.Skip(consonant).Take(cutoff - consonant).Select(f => MusicMath.FreqToTone(f)).ToArray();
270+
271+
double[] Completion(double[] frqs) {
272+
var list = new List<double>();
273+
for (int i = 0; i < frqs.Length; i++) {
274+
if (frqs[i] <= 0) {
275+
int min = i - 1;
276+
double minFrq = 0;
277+
while (min >= 0) {
278+
if (frqs[min] > 0) {
279+
minFrq = frqs[min];
280+
break;
281+
}
282+
min--;
283+
}
284+
int max = i + 1;
285+
double maxFrq = 0;
286+
while (max < frqs.Length) {
287+
if (frqs[max] > 0) {
288+
maxFrq = frqs[max];
289+
break;
290+
}
291+
max++;
292+
}
293+
if(minFrq <= 0) {
294+
list.Add(maxFrq);
295+
} else if (maxFrq <= 0) {
296+
list.Add(minFrq);
297+
} else {
298+
list.Add(MusicMath.Linear(min, max, minFrq, maxFrq, i));
299+
}
300+
} else {
301+
list.Add(frqs[i]);
302+
}
303+
}
304+
return list.ToArray();
305+
}
306+
return true;
307+
} else {
308+
return false;
309+
}
310+
}
237311
}
238312

239313
public class UEnvelope {

OpenUtau/Views/SingersDialog.axaml.cs

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -363,19 +363,12 @@ void DrawOto(UOto? oto) {
363363
}
364364

365365
Tuple<int, double[]>? LoadF0(string wavPath) {
366-
if(String.IsNullOrEmpty(wavPath)){
367-
//If the wav path is null (machine learning voicebank), return null.
368-
return null;
369-
}
370-
string frqFile = Classic.VoicebankFiles.GetFrqFile(wavPath);
371-
if (!File.Exists(frqFile)) {
372-
return null;
373-
}
374366
var frq = new Classic.Frq();
375-
using (var fileStream = File.OpenRead(frqFile)) {
376-
frq.Load(fileStream);
367+
if (frq.Load(wavPath)) {
368+
return Tuple.Create(frq.hopSize, frq.f0);
369+
} else {
370+
return null;
377371
}
378-
return Tuple.Create(frq.hopSize, frq.f0);
379372
}
380373

381374
void OnKeyDown(object sender, KeyEventArgs args) {

0 commit comments

Comments
 (0)