Skip to content

Commit 05d1366

Browse files
authored
Merge pull request #9 from YuzukiTsuru/AudioModelV2
Introduce AudioModel V2 as AudioModel interface, use STL operation
2 parents 19afdce + 647213d commit 05d1366

28 files changed

+526
-645
lines changed

.clang-format

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Generated from CLion C/C++ Code Style settings
2+
BasedOnStyle: LLVM
3+
AccessModifierOffset: -4
4+
AlignAfterOpenBracket: Align
5+
AlignConsecutiveAssignments: None
6+
AlignOperands: Align
7+
AllowAllArgumentsOnNextLine: false
8+
AllowAllConstructorInitializersOnNextLine: false
9+
AllowAllParametersOfDeclarationOnNextLine: false
10+
AllowShortBlocksOnASingleLine: Always
11+
AllowShortCaseLabelsOnASingleLine: false
12+
AllowShortFunctionsOnASingleLine: All
13+
AllowShortIfStatementsOnASingleLine: Always
14+
AllowShortLambdasOnASingleLine: All
15+
AllowShortLoopsOnASingleLine: true
16+
AlwaysBreakAfterReturnType: None
17+
AlwaysBreakTemplateDeclarations: Yes
18+
BreakBeforeBraces: Custom
19+
BraceWrapping:
20+
AfterCaseLabel: false
21+
AfterClass: false
22+
AfterControlStatement: Never
23+
AfterEnum: false
24+
AfterFunction: false
25+
AfterNamespace: false
26+
AfterUnion: false
27+
BeforeCatch: false
28+
BeforeElse: false
29+
IndentBraces: false
30+
SplitEmptyFunction: false
31+
SplitEmptyRecord: true
32+
BreakBeforeBinaryOperators: None
33+
BreakBeforeTernaryOperators: true
34+
BreakConstructorInitializers: BeforeColon
35+
BreakInheritanceList: BeforeColon
36+
ColumnLimit: 0
37+
CompactNamespaces: false
38+
ContinuationIndentWidth: 8
39+
IndentCaseLabels: true
40+
IndentPPDirectives: None
41+
IndentWidth: 4
42+
KeepEmptyLinesAtTheStartOfBlocks: true
43+
MaxEmptyLinesToKeep: 2
44+
NamespaceIndentation: All
45+
ObjCSpaceAfterProperty: false
46+
ObjCSpaceBeforeProtocolList: true
47+
PointerAlignment: Right
48+
ReflowComments: false
49+
SpaceAfterCStyleCast: true
50+
SpaceAfterLogicalNot: false
51+
SpaceAfterTemplateKeyword: false
52+
SpaceBeforeAssignmentOperators: true
53+
SpaceBeforeCpp11BracedList: false
54+
SpaceBeforeCtorInitializerColon: true
55+
SpaceBeforeInheritanceColon: true
56+
SpaceBeforeParens: ControlStatements
57+
SpaceBeforeRangeBasedForLoopColon: false
58+
SpaceInEmptyParentheses: false
59+
SpacesBeforeTrailingComments: 0
60+
SpacesInAngles: false
61+
SpacesInCStyleCastParentheses: false
62+
SpacesInContainerLiterals: false
63+
SpacesInParentheses: false
64+
SpacesInSquareBrackets: false
65+
TabWidth: 4
66+
UseTab: Never

.gitmodules

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,3 @@
1919
[submodule "lib/sndfile"]
2020
path = lib/sndfile
2121
url = https://github.com/libsndfile/libsndfile
22-
[submodule "lib/lz4"]
23-
path = lib/lz4
24-
url = https://github.com/lz4/lz4

CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@ add_subdirectory(lib/dialog EXCLUDE_FROM_ALL)
8787
add_subdirectory(lib/inicpp EXCLUDE_FROM_ALL)
8888
add_subdirectory(lib/ftxui EXCLUDE_FROM_ALL)
8989
add_subdirectory(lib/rapidjson EXCLUDE_FROM_ALL)
90-
add_subdirectory(lib/lz4/build/cmake EXCLUDE_FROM_ALL)
9190
add_subdirectory(lib/sndfile EXCLUDE_FROM_ALL)
9291

9392
# include header files
@@ -101,7 +100,6 @@ include_directories(
101100
lib/inicpp/include
102101
lib/rapidjson/include
103102
lib/sndfile/include
104-
lib/lz4/lib
105103
${PROJECT_BINARY_DIR}
106104
${PROJECT_BINARY_DIR}/lib/sndfile/include
107105
)

docs/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,28 @@ lessampler is a Singing Voice Synthesizer. It provides complete pitch shifting,
3434
***Currently lesssampler is still very unstable, there are many bugs that need to be fixed, but you are very welcome to participate in the test.***
3535

3636
You can find the alpha version at [Release](https://github.com/YuzukiTsuru/lessampler/releases)
37+
38+
# Contributing to lessampler
39+
40+
This section contains articles that are relevant to anyone who wants to contribute to lessampler and help improve the project.
41+
42+
It provides an introduction to contributing, along with a few ideas for how you can help.
43+
44+
## lessaudio file format
45+
46+
lessaudio is the audio source model format file used by lessampler, which contains the audio f0, spectrogram, aperiodicity and some basic parameters corresponding to them. The following table lists the index order and its function.
47+
48+
> ! Note: the current version of lessaudio only saves the data of lessaudiomodel, if there is any modification in the future, it will be explained here
49+
50+
| index | data | data size |
51+
| ----- | ---- | --------- |
52+
| 1 | | |
53+
| 2 | | |
54+
| 3 | | |
55+
| 4 | | |
56+
| 5 | | |
57+
| 6 | | |
58+
| 7 | | |
59+
| 8 | | |
60+
| 9 | | |
61+

docs/shine/shine.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Shine
2+
Shine is a data class used to describe the parameters of individual audio transforms. Contains input and output audio files, audio interception and transformation parameters, audio pitch adjustment parameters, and elongation parameters. Shine also provides a convenient interface for describing timbre transformations, etc.
3+
4+
## Basic define
5+
6+
```c++
7+
class ShinePara {
8+
public:
9+
// Basic Transformation Parameters
10+
std::string input_file_name = {};
11+
std::string output_file_name = {};
12+
int time_percent = 0;
13+
double velocity = 0.0;
14+
double offset = 0.0;
15+
double required_length = 0.0;
16+
int required_frame = 0;
17+
double first_half_fixed_part = 0.0;
18+
double last_unused_part = 0.0;
19+
double volumes = 0;
20+
int modulation = 0;
21+
double wave_length = 0.0;
22+
double pre_cross_length = 0.0;
23+
double base_length = 0.0;
24+
double cross_length = 0.0;
25+
double stretch_length = 0.0;
26+
int output_samples = 0;
27+
double scale_num = 0.0;
28+
int tempo_num = 0;
29+
30+
public:
31+
// Pitch sections
32+
int *pitch_bend = nullptr;
33+
int pitch_length = 0;
34+
int pitch_step = 256;
35+
36+
public:
37+
// Options
38+
bool is_custom_pitch = false;
39+
bool is_gender = false;
40+
}
41+
```

lib/lz4

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/AudioModel/AudioModel.cpp

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,18 @@
1111
//
1212
// Created by gloom on 2022/5/2.
1313
//
14-
#include <memory>
15-
16-
#include "FileIO/AudioModelIO.h"
17-
#include "FileIO/JSONFileIO.h"
18-
1914
#include "AudioModel.h"
2015
#include "WorldModule/WorldModule.h"
2116

22-
AudioModel::AudioModel(double *x, size_t x_length, int fs, const lessConfigure &configure) : configure(configure) {
23-
_lessAudioModel.x = x;
24-
_lessAudioModel.x_length = static_cast<int>(x_length);
17+
AudioModel::AudioModel(double *x, int x_length, int fs, const lessConfigure &configure) : configure(configure) {
18+
_lessAudioModel.x.reserve(x_length);
19+
_lessAudioModel.x.insert(_lessAudioModel.x.end(), x, x + x_length);
20+
21+
_lessAudioModel.x_length = x_length;
2522
_lessAudioModel.fs = fs;
2623

2724
// initialize the audio model from x, x_length, fs using World Vocoder
28-
WorldModule model(_lessAudioModel.x, _lessAudioModel.x_length, _lessAudioModel.fs, configure);
25+
WorldModule model(x, x_length, _lessAudioModel.fs, configure);
2926
worldPara = model.GetModule();
3027
InitAudioModel();
3128
}
@@ -39,23 +36,21 @@ void AudioModel::InitAudioModel() {
3936
_lessAudioModel.frame_period = worldPara.frame_period;
4037
_lessAudioModel.f0_length = worldPara.f0_length;
4138

42-
_lessAudioModel.f0 = new double[worldPara.f0_length];
43-
std::memcpy(_lessAudioModel.f0, worldPara.f0, sizeof(double) * worldPara.f0_length);
39+
_lessAudioModel.f0.reserve(worldPara.f0_length);
40+
_lessAudioModel.f0.insert(_lessAudioModel.f0.end(), worldPara.f0, worldPara.f0 + worldPara.f0_length);
4441

45-
_lessAudioModel.time_axis = new double[worldPara.f0_length];
46-
std::memcpy(_lessAudioModel.time_axis, worldPara.time_axis, sizeof(double) * worldPara.f0_length);
42+
_lessAudioModel.time_axis.reserve(worldPara.f0_length);
43+
_lessAudioModel.time_axis.insert(_lessAudioModel.time_axis.end(), worldPara.time_axis, worldPara.time_axis + worldPara.f0_length);
4744

4845
_lessAudioModel.w_length = worldPara.fft_size / 2 + 1;
4946

50-
_lessAudioModel.spectrogram = new double *[worldPara.f0_length];
47+
_lessAudioModel.spectrogram.resize(worldPara.f0_length, std::vector<double>(_lessAudioModel.w_length));
5148
for (int i = 0; i < worldPara.f0_length; ++i) {
52-
_lessAudioModel.spectrogram[i] = new double[_lessAudioModel.w_length];
53-
std::memcpy(_lessAudioModel.spectrogram[i], worldPara.spectrogram[i], sizeof(double) * _lessAudioModel.w_length);
49+
_lessAudioModel.spectrogram[i].assign(&(worldPara.spectrogram[i][0]), &(worldPara.spectrogram[i][_lessAudioModel.w_length]));
5450
}
5551

56-
_lessAudioModel.aperiodicity = new double *[worldPara.f0_length];
52+
_lessAudioModel.aperiodicity.resize(worldPara.f0_length, std::vector<double>(_lessAudioModel.w_length));
5753
for (int i = 0; i < worldPara.f0_length; ++i) {
58-
_lessAudioModel.aperiodicity[i] = new double[_lessAudioModel.w_length];
59-
std::memcpy(_lessAudioModel.aperiodicity[i], worldPara.aperiodicity[i], sizeof(double) * _lessAudioModel.w_length);
54+
_lessAudioModel.aperiodicity[i].assign(&(worldPara.aperiodicity[i][0]), &(worldPara.aperiodicity[i][_lessAudioModel.w_length]));
6055
}
6156
}

src/AudioModel/AudioModel.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
class AudioModel {
2424
public:
25-
AudioModel(double *x, size_t x_length, int fs, const lessConfigure &configure);
25+
AudioModel(double *x, int x_length, int fs, const lessConfigure &configure);
2626

2727
lessAudioModel GetAudioModel();
2828

src/AudioModel/Synthesis/Synthesis.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,22 @@
1414

1515
#include "Synthesis.h"
1616

17-
#include <world/synthesis.h>
17+
#include <utility>
1818

1919
#include "Utils/LOG.h"
20-
#include "world/synthesisrealtime.h"
20+
#include "Utils/Timer.h"
2121

22-
Synthesis::Synthesis(lessAudioModel audioModel, int x_length) : audioModel(audioModel), x_length(x_length) {
22+
#include <world/synthesisrealtime.h>
23+
24+
Synthesis::Synthesis(lessAudioModel audioModel, int x_length) : audioModel(std::move(audioModel)), x_length(x_length) {
2325
YALL_DEBUG_ << "Allocate Out Memory, Length: " + std::to_string(x_length);
2426
AllocateMemory();
25-
YALL_DEBUG_ << "Synthesis Wav...";
27+
YALL_DEBUG_ << "Synthesis Audio...";
28+
uint64_t tmsStart = get_perf_count();
2629
SynthesisWav();
30+
uint64_t tmsEnd = get_perf_count();
31+
uint64_t usVal = (tmsEnd - tmsStart) / 10000;
32+
YALL_INFO_ << "Synthesis Audio: " + std::to_string(usVal) + " ms";
2733
}
2834

2935
Synthesis::~Synthesis() {
@@ -44,11 +50,22 @@ void Synthesis::SynthesisWav() const {
4450
InitializeSynthesizer(audioModel.fs, audioModel.frame_period,
4551
audioModel.fft_size, buffer_size, 100, &synthesizer);
4652

53+
auto f0 = new double[audioModel.f0.size()];
54+
std::copy(audioModel.f0.begin(), audioModel.f0.end(), f0);
55+
auto spectrogram = new double *[audioModel.f0_length];
56+
auto aperiodicity = new double *[audioModel.f0_length];
57+
for (int i = 0; i < audioModel.f0_length; ++i) {
58+
spectrogram[i] = new double[audioModel.w_length];
59+
aperiodicity[i] = new double[audioModel.w_length];
60+
std::copy(audioModel.spectrogram[i].begin(), audioModel.spectrogram[i].end(), spectrogram[i]);
61+
std::copy(audioModel.aperiodicity[i].begin(), audioModel.aperiodicity[i].end(), aperiodicity[i]);
62+
}
63+
4764
int offset = 0;
48-
int index = 0;
65+
int index;
4966
for (int i = 0; i < audioModel.f0_length;) {
5067
// Add one frame ('i' shows the frame index that should be added)
51-
if (AddParameters(&audioModel.f0[i], 1, &audioModel.spectrogram[i], &audioModel.aperiodicity[i], &synthesizer) == 1) {
68+
if (AddParameters(&f0[i], 1, &spectrogram[i], &aperiodicity[i], &synthesizer) == 1) {
5269
++i;
5370
}
5471

src/AudioModel/WorldModule/WorldModule.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,20 @@
2222
#include <world/harvest.h>
2323
#include <world/d4c.h>
2424

25+
#include <utility>
26+
2527
WorldModule::WorldModule(double *x, int x_length, int fs, const lessConfigure &configure) : x(x), x_length(x_length), configure(configure) {
28+
// Set the para
2629
this->worldPara.fs = fs;
2730
this->worldPara.frame_period = configure.audio_model_frame_period;
31+
2832
YALL_DEBUG_ << "Generate F0 from PCM file.";
2933
if (configure.f0_mode == lessConfigure::F0_MODE::F0_MODE_DIO) {
3034
F0EstimationDio();
3135
} else if (configure.f0_mode == lessConfigure::F0_MODE::F0_MODE_HARVEST) {
3236
F0EstimationHarvest();
3337
} else {
34-
YALL_ERROR_ << "F0 Estimation Mode Error.";
38+
throw std::runtime_error("F0 Estimation Mode Error.");
3539
}
3640
YALL_DEBUG_ << "Generate Envelope from PCM file and F0.";
3741
SpectralEnvelopeEstimation();

0 commit comments

Comments
 (0)