Skip to content

Commit 8bfd3a6

Browse files
authored
Merge pull request #5 from YuzukiTsuru/Shine
Shine
2 parents c7dc28d + 7acadd2 commit 8bfd3a6

26 files changed

+412
-180
lines changed

.github/workflows/cmake-macos.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ name: CMake macOS
22

33
on:
44
push:
5-
branches: [ master ]
5+
branches: "*"
66
pull_request:
7-
branches: [ master ]
7+
branches: "*"
88

99
env:
1010
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)

.github/workflows/cmake-ubuntu.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ name: CMake Ubuntu
22

33
on:
44
push:
5-
branches: [ master ]
5+
branches: "*"
66
pull_request:
7-
branches: [ master ]
7+
branches: "*"
88

99
env:
1010
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)

.github/workflows/cmake-windows.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ name: CMake Windows
22

33
on:
44
push:
5-
branches: [ master ]
5+
branches: "*"
66
pull_request:
7-
branches: [ master ]
7+
branches: "*"
88

99
env:
1010
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)

src/AudioModel/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Library Audio Model
22

33
add_subdirectory(WorldModule)
4+
add_subdirectory(Synthesis)
45

56
file(GLOB audio_model_source *.cpp *.h)
67
add_library(AudioModel ${audio_model_source})

src/AudioModel/Synthesis/Synthesis.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,51 @@
1717
//
1818

1919
#include "Synthesis.h"
20+
21+
#include <world/synthesis.h>
22+
23+
#include "Utils/LOG.h"
24+
#include "world/synthesisrealtime.h"
25+
26+
Synthesis::Synthesis(lessAudioModel audioModel, int x_length) : x_length(x_length) {
27+
YALL_DEBUG_ << "Allocate Memory for output wav, length: " + std::to_string(x_length);
28+
AllocateMemory();
29+
30+
}
31+
32+
void Synthesis::AllocateMemory() {
33+
x = new double[x_length];
34+
}
35+
36+
void Synthesis::SynthesisWav() const {
37+
WorldSynthesizer synthesizer = {0};
38+
int buffer_size = 64;
39+
InitializeSynthesizer(audioModel.fs, audioModel.frame_period,
40+
audioModel.fft_size, buffer_size, 100, &synthesizer);
41+
42+
int offset = 0;
43+
int index = 0;
44+
for (int i = 0; i < audioModel.f0_length;) {
45+
// Add one frame ('i' shows the frame index that should be added)
46+
if (AddParameters(&audioModel.f0[i], 1, &audioModel.spectrogram[i], &audioModel.aperiodicity[i], &synthesizer) == 1) {
47+
++i;
48+
}
49+
50+
// Synthesize speech with length of buffer_size sample.
51+
// It is repeated until the function returns 0
52+
// (it suggests that the synthesizer cannot generate speech).
53+
while (Synthesis2(&synthesizer) != 0) {
54+
index = offset * buffer_size;
55+
for (int j = 0; j < buffer_size; ++j)
56+
x[j + index] = synthesizer.buffer[j];
57+
offset++;
58+
}
59+
60+
// Check the "Lock" (Please see synthesisrealtime.h)
61+
if (IsLocked(&synthesizer) == 1) {
62+
YALL_WARN_ << "Synthesis Buffer Locked";
63+
break;
64+
}
65+
}
66+
DestroySynthesizer(&synthesizer);
67+
}

src/AudioModel/Synthesis/Synthesis.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,33 @@
1919
#ifndef LESSAMPLER_SYNTHESIS_H
2020
#define LESSAMPLER_SYNTHESIS_H
2121

22+
#include <iostream>
23+
24+
#include "AudioModel/lessAudioModel.h"
25+
26+
class SynthesisPara {
27+
public:
28+
int fs;
29+
int f0_length;
30+
double *f0;
31+
double **spectrogram;
32+
double **aperiodicity;
33+
};
2234

2335
class Synthesis {
36+
public:
37+
explicit Synthesis(lessAudioModel audioModel, int x_length);
38+
39+
private:
40+
double *x = nullptr;
41+
int x_length = 0;
42+
43+
lessAudioModel audioModel{};
44+
45+
private:
46+
void AllocateMemory();
2447

48+
void SynthesisWav() const;
2549
};
2650

2751

src/AudioModel/lessAudioModel.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,4 @@ class lessAudioModel {
3838
int fft_size = 0;
3939
};
4040

41-
class TransAudioModel {
42-
public:
43-
int t_f0_length;
44-
double *t_f0;
45-
double **t_spectrogram;
46-
double **t_aperiodicity;
47-
};
48-
4941
#endif //LESSAMPLER_LESSAUDIOMODEL_H

src/AudioProcess/AduioProcess.cpp

Lines changed: 38 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -18,80 +18,45 @@
1818
//
1919
#include <cmath>
2020
#include <utility>
21-
#include <cstring>
2221

2322
#include "Utils/exception.h"
2423
#include "Utils/LOG.h"
2524
#include "AudioProcess.h"
26-
#include "libUTAU/PitchBendDecoder.h"
2725

28-
AduioProcess::AduioProcess(lessAudioModel audioModel, UTAUPara utauPara, UTAUFlags flags) : audioModel(audioModel), utauPara(std::move(utauPara)),
29-
flags(flags) {
26+
AudioProcess::AudioProcess(lessAudioModel audioModel, ShinePara shine) : audioModel(audioModel), shine(std::move(shine)) {
3027
YALL_DEBUG_ << "Equalizing Picth...";
3128
PicthEqualizing();
32-
YALL_DEBUG_ << "Decode Pitch Bend...";
33-
DecodePitchBend();
3429
YALL_DEBUG_ << "Time Stretch...";
3530
TimeStretch();
3631
}
3732

38-
TransAudioModel AduioProcess::GetTransAudioModel() {
33+
lessAudioModel AudioProcess::GetTransAudioModel() {
3934
return transAudioModel;
4035
}
4136

42-
void AduioProcess::PicthEqualizing() {
37+
void AudioProcess::PicthEqualizing() {
4338
auto freq_avg = GetAvgFreq();
4439
YALL_DEBUG_ << "The average frequency is " + std::to_string(freq_avg);
4540
if (freq_avg == 0.0) {
4641
for (int i = 0; i < audioModel.f0_length; ++i) {
4742
if (audioModel.f0[i] != 0.0) {
48-
audioModel.f0[i] = utauPara.scaleNum;
43+
audioModel.f0[i] = shine.scale_num;
4944
} else {
5045
audioModel.f0[i] = 0;
5146
}
5247
}
5348
} else {
5449
for (int i = 0; i < audioModel.f0_length; ++i) {
5550
if (audioModel.f0[i] != 0.0) {
56-
audioModel.f0[i] = ((audioModel.f0[i] - freq_avg) * utauPara.modulation / 100.0 + freq_avg) * (utauPara.scaleNum / freq_avg);
51+
audioModel.f0[i] = ((audioModel.f0[i] - freq_avg) * shine.modulation / 100.0 + freq_avg) * (shine.scale_num / freq_avg);
5752
} else {
5853
audioModel.f0[i] = 0;
5954
}
6055
}
6156
}
6257
}
6358

64-
void AduioProcess::DecodePitchBend() {
65-
if (utauPara.tempoNum == 0)
66-
utauPara.tempoNum = 120;
67-
68-
if (utauPara.isCustomPitch) {
69-
pitch_step = static_cast<int>(lround(60.0 / 96.0 / utauPara.tempoNum * audioModel.fs));
70-
pitch_length = utauPara.output_samples / pitch_step + 1;
71-
72-
YALL_DEBUG_ << "The Pitch Length is: " + std::to_string(pitch_length);
73-
74-
PitchBendDecoder pitchBendDecoder(utauPara.pitch, pitch_length);
75-
76-
utauPara.pitch_bend = new int[pitch_length + 1];
77-
for (int i = 0; i < pitch_length + 1; ++i) {
78-
utauPara.pitch_bend[i] = 0;
79-
}
80-
81-
std::memcpy(utauPara.pitch_bend, pitchBendDecoder.getPitchBend(), sizeof(int) * pitch_length);
82-
} else {
83-
utauPara.pitch_bend = new int[pitch_length + 1];
84-
for (int i = 0; i < pitch_length + 1; ++i) {
85-
utauPara.pitch_bend[i] = 0;
86-
}
87-
}
88-
89-
required_frame = static_cast<int>(1000.0 * utauPara.output_samples / audioModel.fs / audioModel.frame_period) + 1;
90-
YALL_DEBUG_ << "The required frame is: " + std::to_string(required_frame);
91-
transAudioModel.t_f0_length = required_frame;
92-
}
93-
94-
double AduioProcess::GetAvgFreq() const {
59+
double AudioProcess::GetAvgFreq() const {
9560
double freq_avg = 0.0, timePercent, r, p[6], q, base_timePercent = 0;
9661
for (int i = 0; i < audioModel.f0_length; ++i) {
9762
timePercent = audioModel.f0[i];
@@ -114,25 +79,27 @@ double AduioProcess::GetAvgFreq() const {
11479
return freq_avg;
11580
}
11681

117-
void AduioProcess::TimeStretch() {
82+
void AudioProcess::TimeStretch() {
11883
YALL_DEBUG_ << "Allocate memory for target audio f0, sp, ap";
11984

120-
if (transAudioModel.t_f0_length == 0)
85+
if (shine.required_frame == 0)
12186
throw parameter_error("The target audio frame length is 0");
12287

123-
transAudioModel.t_f0 = new double[transAudioModel.t_f0_length];
124-
for (int i = 0; i < transAudioModel.t_f0_length; ++i) {
125-
transAudioModel.t_f0[i] = 0.0;
88+
transAudioModel.f0_length = shine.required_frame;
89+
90+
transAudioModel.f0 = new double[transAudioModel.f0_length];
91+
for (int i = 0; i < transAudioModel.f0_length; ++i) {
92+
transAudioModel.f0[i] = 0.0;
12693
}
12794

128-
transAudioModel.t_spectrogram = new double *[transAudioModel.t_f0_length];
129-
transAudioModel.t_aperiodicity = new double *[transAudioModel.t_f0_length];
130-
for (int i = 0; i < transAudioModel.t_f0_length; ++i) {
131-
transAudioModel.t_spectrogram[i] = new double[audioModel.w_length];
132-
transAudioModel.t_aperiodicity[i] = new double[audioModel.w_length];
95+
transAudioModel.spectrogram = new double *[transAudioModel.f0_length];
96+
transAudioModel.aperiodicity = new double *[transAudioModel.f0_length];
97+
for (int i = 0; i < transAudioModel.f0_length; ++i) {
98+
transAudioModel.spectrogram[i] = new double[audioModel.w_length];
99+
transAudioModel.aperiodicity[i] = new double[audioModel.w_length];
133100
for (int j = 0; j < audioModel.w_length; ++j) {
134-
transAudioModel.t_spectrogram[i][j] = 0.0;
135-
transAudioModel.t_aperiodicity[i][j] = 0.0;
101+
transAudioModel.spectrogram[i][j] = 0.0;
102+
transAudioModel.aperiodicity[i][j] = 0.0;
136103
}
137104
}
138105

@@ -142,12 +109,12 @@ void AduioProcess::TimeStretch() {
142109
double _sample_sp_trans_index, _sample_ap_trans_index, _out_sample_index, _in_sample_index;
143110
int _sp_trans_index, _ap_trans_index;
144111

145-
for (int i = 0; i < transAudioModel.t_f0_length; ++i) {
112+
for (int i = 0; i < transAudioModel.f0_length; ++i) {
146113
_out_sample_index = audioModel.frame_period * i;
147-
if (_out_sample_index < utauPara.base_length) {
148-
_in_sample_index = utauPara.offset + _out_sample_index * utauPara.velocity;
114+
if (_out_sample_index < shine.base_length) {
115+
_in_sample_index = shine.offset + _out_sample_index * shine.velocity;
149116
} else {
150-
_in_sample_index = utauPara.offset + utauPara.firstHalfFixedPart + (_out_sample_index - utauPara.base_length) * utauPara.stretch_length;
117+
_in_sample_index = shine.offset + shine.first_half_fixed_part + (_out_sample_index - shine.base_length) * shine.stretch_length;
151118
}
152119
YALL_DEBUG_ << "_in_sample_index -> " + std::to_string(_in_sample_index);
153120
YALL_DEBUG_ << "_out_sample_index -> " + std::to_string(_out_sample_index);
@@ -173,34 +140,34 @@ void AduioProcess::TimeStretch() {
173140
}
174141
}
175142

176-
_sample_ap_trans_index = _out_sample_index * 0.001 * audioModel.fs / pitch_step;
143+
_sample_ap_trans_index = _out_sample_index * 0.001 * audioModel.fs / shine.pitch_step;
177144
_ap_trans_index = static_cast<int>(floor(_sample_ap_trans_index));
178145
_sample_ap_trans_index -= _ap_trans_index;
179146

180-
if (_ap_trans_index >= pitch_length) {
181-
_ap_trans_index = pitch_length - 1;
147+
if (_ap_trans_index >= shine.pitch_length) {
148+
_ap_trans_index = shine.pitch_length - 1;
182149
_sample_sp_trans_index = 0.0;
183150
}
184151

185152
YALL_DEBUG_ << "_ap_trans_index -> " + std::to_string(_ap_trans_index);
186153
YALL_DEBUG_ << "_sample_ap_trans_index -> " + std::to_string(_ap_trans_index + _sample_ap_trans_index);
187154

188155
YALL_DEBUG_ << "Apply Pitch Shift With Pitch Bend";
189-
auto pitch_base = utauPara.scaleNum * pow(2, (utauPara.pitch_bend[_ap_trans_index] * (1.0 - _sample_ap_trans_index) +
190-
utauPara.pitch_bend[_ap_trans_index + 1] * _sample_ap_trans_index) / 1200.0);
156+
auto pitch_base = shine.scale_num * pow(2, (shine.pitch_bend[_ap_trans_index] * (1.0 - _sample_ap_trans_index) +
157+
shine.pitch_bend[_ap_trans_index + 1] * _sample_ap_trans_index) / 1200.0);
191158

192-
YALL_DEBUG_ << "Trans F0 " + std::to_string(transAudioModel.t_f0[i]) + " Add " + std::to_string(pitch_base);
193-
transAudioModel.t_f0[i] = pitch_base;
159+
YALL_DEBUG_ << "Trans F0 " + std::to_string(transAudioModel.f0[i]) + " Add " + std::to_string(pitch_base);
160+
transAudioModel.f0[i] = pitch_base;
194161

195-
transAudioModel.t_f0[i] = transAudioModel.t_f0[i] * pow(temp_f0 / avg_freq, utauPara.modulation * 0.01);
162+
transAudioModel.f0[i] = transAudioModel.f0[i] * pow(temp_f0 / avg_freq, shine.modulation * 0.01);
196163

197164
YALL_DEBUG_ << "Trans SP ";
198165
for (int j = 0; j < audioModel.w_length; ++j) {
199166
if (_sp_trans_index < audioModel.f0_length - 1) {
200-
transAudioModel.t_spectrogram[i][j] = audioModel.spectrogram[_sp_trans_index][j] * (1.0 - _sample_sp_trans_index) +
167+
transAudioModel.spectrogram[i][j] = audioModel.spectrogram[_sp_trans_index][j] * (1.0 - _sample_sp_trans_index) +
201168
audioModel.spectrogram[_sp_trans_index + 1][j] * _sample_sp_trans_index;
202169
} else {
203-
transAudioModel.t_spectrogram[i][j] = audioModel.spectrogram[audioModel.f0_length - 1][j];
170+
transAudioModel.spectrogram[i][j] = audioModel.spectrogram[audioModel.f0_length - 1][j];
204171
}
205172
}
206173

@@ -212,15 +179,15 @@ void AduioProcess::TimeStretch() {
212179

213180
for (int j = 0; j < audioModel.w_length; ++j) {
214181
if (_ap_trans_index < audioModel.f0_length) {
215-
transAudioModel.t_aperiodicity[i][j] = audioModel.aperiodicity[_ap_trans_index][j];
182+
transAudioModel.aperiodicity[i][j] = audioModel.aperiodicity[_ap_trans_index][j];
216183
} else {
217-
transAudioModel.t_aperiodicity[i][j] = audioModel.aperiodicity[audioModel.f0_length - 1][j];
184+
transAudioModel.aperiodicity[i][j] = audioModel.aperiodicity[audioModel.f0_length - 1][j];
218185
}
219186
}
220187
}
221188
}
222189

223-
void AduioProcess::interp1(const double *x, const double *y, int x_length, const double *xi, int xi_length, double *yi) {
190+
void AudioProcess::interp1(const double *x, const double *y, int x_length, const double *xi, int xi_length, double *yi) {
224191
auto *h = new double[x_length - 1];
225192
int *k = new int[xi_length];
226193

@@ -243,7 +210,7 @@ void AduioProcess::interp1(const double *x, const double *y, int x_length, const
243210
delete[] h;
244211
}
245212

246-
void AduioProcess::histc(const double *x, int x_length, const double *edges, int edges_length, int *index) {
213+
void AudioProcess::histc(const double *x, int x_length, const double *edges, int edges_length, int *index) {
247214
int count = 1;
248215

249216
int i = 0;

src/AudioProcess/AudioProcess.h

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,29 +21,22 @@
2121
#define LESSAMPLER_AUDIOPROCESS_H
2222

2323
#include "AudioModel/lessAudioModel.h"
24-
#include "libUTAU/libUTAU.h"
24+
#include "Shine/ShinePara.h"
2525

26-
class AduioProcess {
26+
class AudioProcess {
2727
public:
28-
AduioProcess(lessAudioModel audioModel, UTAUPara utauPara, UTAUFlags flags);
28+
AudioProcess(lessAudioModel audioModel, ShinePara shine);
2929

30-
TransAudioModel GetTransAudioModel();
30+
lessAudioModel GetTransAudioModel();
3131

3232
private:
3333
lessAudioModel audioModel{};
34-
TransAudioModel transAudioModel{};
35-
UTAUPara utauPara{};
36-
UTAUFlags flags;
37-
38-
int pitch_length = 0;
39-
int pitch_step = 256;
40-
int required_frame = 0;
34+
lessAudioModel transAudioModel{};
35+
ShinePara shine;
4136

4237
private:
4338
void PicthEqualizing();
4439

45-
void DecodePitchBend();
46-
4740
[[nodiscard]] double GetAvgFreq() const;
4841

4942
void TimeStretch();

0 commit comments

Comments
 (0)