Skip to content

Commit 28a54e7

Browse files
authored
Merge pull request #983 from oxygen-dioxide/free-memory
DiffSinger: Free memory for singers no longer in use
2 parents 61ec78b + 63903b6 commit 28a54e7

File tree

8 files changed

+152
-18
lines changed

8 files changed

+152
-18
lines changed

OpenUtau.Core/DiffSinger/DiffSingerPitch.cs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
namespace OpenUtau.Core.DiffSinger
1616
{
17-
public class DsPitch
17+
public class DsPitch : IDisposable
1818
{
1919
string rootPath;
2020
DsConfig dsConfig;
@@ -73,6 +73,7 @@ void SetRange<T>(T[] list, T value, int startIndex, int endIndex){
7373
list[i] = value;
7474
}
7575
}
76+
7677
public RenderPitchResult Process(RenderPhrase phrase){
7778
var startMs = Math.Min(phrase.notes[0].positionMs, phrase.phones[0].positionMs) - headMs;
7879
var endMs = phrase.notes[^1].endMs + tailMs;
@@ -280,5 +281,22 @@ public RenderPitchResult Process(RenderPhrase phrase){
280281
};
281282
}
282283
}
284+
285+
private bool disposedValue;
286+
287+
protected virtual void Dispose(bool disposing) {
288+
if (!disposedValue) {
289+
if (disposing) {
290+
linguisticModel?.Dispose();
291+
pitchModel?.Dispose();
292+
}
293+
disposedValue = true;
294+
}
295+
}
296+
297+
public void Dispose() {
298+
Dispose(disposing: true);
299+
GC.SuppressFinalize(this);
300+
}
283301
}
284302
}

OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,12 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
101101
}
102102
}
103103
if (result.samples == null) {
104-
result.samples = InvokeDiffsinger(phrase, depth, speedup);
105-
var source = new WaveSource(0, 0, 0, 1);
106-
source.SetSamples(result.samples);
107-
WaveFileWriter.CreateWaveFile16(wavPath, new ExportAdapter(source).ToMono(1, 0));
104+
result.samples = InvokeDiffsinger(phrase, depth, speedup, cancellation);
105+
if (result.samples != null) {
106+
var source = new WaveSource(0, 0, 0, 1);
107+
source.SetSamples(result.samples);
108+
WaveFileWriter.CreateWaveFile16(wavPath, new ExportAdapter(source).ToMono(1, 0));
109+
}
108110
}
109111
if (result.samples != null) {
110112
Renderers.ApplyDynamics(phrase, result);
@@ -120,7 +122,7 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
120122
leadingMs、positionMs、estimatedLengthMs: timeaxis layout in Ms, double
121123
*/
122124

123-
float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup) {
125+
float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup, CancellationTokenSource cancellation) {
124126
var singer = phrase.singer as DiffSingerSinger;
125127
//Check if dsconfig.yaml is correct
126128
if(String.IsNullOrEmpty(singer.dsConfig.vocoder) ||
@@ -130,6 +132,7 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup) {
130132
}
131133

132134
var vocoder = singer.getVocoder();
135+
var acousticModel = singer.getAcousticSession();
133136
var frameMs = vocoder.frameMs();
134137
var frameSec = frameMs / 1000;
135138
int headFrames = (int)Math.Round(headMs / frameMs);
@@ -218,8 +221,16 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup) {
218221
}
219222

220223
//Variance: Energy and Breathiness
224+
221225
if(singer.dsConfig.useBreathinessEmbed || singer.dsConfig.useEnergyEmbed){
222-
var varianceResult = singer.getVariancePredictor().Process(phrase);
226+
var variancePredictor = singer.getVariancePredictor();
227+
VarianceResult varianceResult;
228+
lock(variancePredictor){
229+
if(cancellation.IsCancellationRequested) {
230+
return null;
231+
}
232+
varianceResult = singer.getVariancePredictor().Process(phrase);
233+
}
223234
//TODO: let user edit variance curves
224235
if(singer.dsConfig.useEnergyEmbed){
225236
var energyCurve = phrase.curves.FirstOrDefault(curve => curve.Item1 == ENE);
@@ -246,26 +257,36 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup) {
246257
.Reshape(new int[] { 1, breathiness.Length })));
247258
}
248259
}
249-
250-
var acousticModel = singer.getAcousticSession();
251-
Onnx.VerifyInputNames(acousticModel, acousticInputs);
252260
Tensor<float> mel;
253-
var acousticOutputs = acousticModel.Run(acousticInputs);
254-
mel = acousticOutputs.First().AsTensor<float>().Clone();
255-
261+
lock(acousticModel){
262+
if(cancellation.IsCancellationRequested) {
263+
return null;
264+
}
265+
Onnx.VerifyInputNames(acousticModel, acousticInputs);
266+
var acousticOutputs = acousticModel.Run(acousticInputs);
267+
mel = acousticOutputs.First().AsTensor<float>().Clone();
268+
}
256269
//vocoder
257270
//waveform = session.run(['waveform'], {'mel': mel, 'f0': f0})[0]
258271
var vocoderInputs = new List<NamedOnnxValue>();
259272
vocoderInputs.Add(NamedOnnxValue.CreateFromTensor("mel", mel));
260273
vocoderInputs.Add(NamedOnnxValue.CreateFromTensor("f0",f0tensor));
261274
float[] samples;
262-
var vocoderOutputs = vocoder.session.Run(vocoderInputs);
263-
samples = vocoderOutputs.First().AsTensor<float>().ToArray();
275+
lock(vocoder){
276+
if(cancellation.IsCancellationRequested) {
277+
return null;
278+
}
279+
var vocoderOutputs = vocoder.session.Run(vocoderInputs);
280+
samples = vocoderOutputs.First().AsTensor<float>().ToArray();
281+
}
264282
return samples;
265283
}
266284

267285
public RenderPitchResult LoadRenderedPitch(RenderPhrase phrase) {
268-
return (phrase.singer as DiffSingerSinger).getPitchPredictor().Process(phrase);
286+
var pitchPredictor = (phrase.singer as DiffSingerSinger).getPitchPredictor();
287+
lock(pitchPredictor){
288+
return pitchPredictor.Process(phrase);
289+
}
269290
}
270291

271292
public UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSettings renderSettings) {

OpenUtau.Core/DiffSinger/DiffSingerSinger.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,5 +168,33 @@ public DsVariance getVariancePredictor(){
168168
}
169169
return variancePredictor;
170170
}
171+
172+
public override void FreeMemory(){
173+
Log.Information($"Freeing memory for singer {Id}");
174+
if(acousticSession != null) {
175+
lock(acousticSession) {
176+
acousticSession?.Dispose();
177+
}
178+
acousticSession = null;
179+
}
180+
if(vocoder != null) {
181+
lock(vocoder) {
182+
vocoder?.Dispose();
183+
}
184+
vocoder = null;
185+
}
186+
if(pitchPredictor != null) {
187+
lock(pitchPredictor) {
188+
pitchPredictor?.Dispose();
189+
}
190+
pitchPredictor = null;
191+
}
192+
if(variancePredictor != null){
193+
lock(variancePredictor) {
194+
variancePredictor?.Dispose();
195+
}
196+
variancePredictor = null;
197+
}
198+
}
171199
}
172200
}

OpenUtau.Core/DiffSinger/DiffSingerVariance.cs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public struct VarianceResult{
1717
public float[] energy;
1818
public float[] breathiness;
1919
}
20-
public class DsVariance{
20+
public class DsVariance : IDisposable{
2121
string rootPath;
2222
DsConfig dsConfig;
2323
List<string> phonemes;
@@ -172,5 +172,22 @@ public VarianceResult Process(RenderPhrase phrase){
172172
breathiness = breathiness_pred.ToArray()
173173
};
174174
}
175+
176+
private bool disposedValue;
177+
178+
protected virtual void Dispose(bool disposing) {
179+
if (!disposedValue) {
180+
if (disposing) {
181+
linguisticModel?.Dispose();
182+
varianceModel?.Dispose();
183+
}
184+
disposedValue = true;
185+
}
186+
}
187+
188+
public void Dispose() {
189+
Dispose(disposing: true);
190+
GC.SuppressFinalize(this);
191+
}
175192
}
176193
}

OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
using Microsoft.ML.OnnxRuntime;
44

55
namespace OpenUtau.Core.DiffSinger {
6-
public class DsVocoder {
6+
public class DsVocoder : IDisposable {
77
public string Location;
88
public DsVocoderConfig config;
99
public InferenceSession session;
@@ -27,6 +27,23 @@ public DsVocoder(string name) {
2727
public float frameMs() {
2828
return 1000f * config.hop_size / config.sample_rate;
2929
}
30+
31+
private bool disposedValue;
32+
33+
protected virtual void Dispose(bool disposing) {
34+
if (!disposedValue) {
35+
if (disposing) {
36+
session?.Dispose();
37+
}
38+
disposedValue = true;
39+
}
40+
}
41+
42+
public void Dispose() {
43+
Dispose(disposing: true);
44+
GC.SuppressFinalize(this);
45+
}
46+
3047
}
3148

3249
[Serializable]

OpenUtau.Core/Render/RenderEngine.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ public void PreRenderProject(ref CancellationTokenSource cancellation) {
169169

170170
private RenderPartRequest[] PrepareRequests() {
171171
RenderPartRequest[] requests;
172+
SingerManager.Inst.ReleaseSingersNotInUse(project);
172173
lock (project) {
173174
requests = project.parts
174175
.Where(part => part is UVoicePart)

OpenUtau.Core/SingerManager.cs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ public class SingerManager : SingletonBase<SingerManager> {
1919

2020
private readonly ConcurrentQueue<USinger> reloadQueue = new ConcurrentQueue<USinger>();
2121
private CancellationTokenSource reloadCancellation;
22+
23+
private HashSet<USinger> singersUsed = new HashSet<USinger>();
2224

2325
public void Initialize() {
2426
InitializationTask = Task.Run(() => {
@@ -109,5 +111,26 @@ private void Refresh() {
109111
}).Start(DocManager.Inst.MainScheduler);
110112
}
111113
}
114+
115+
//Check which singers are in use and free memory for those that are not
116+
public void ReleaseSingersNotInUse(UProject project) {
117+
//Check which singers are in use
118+
var singersInUse = new HashSet<USinger>();
119+
foreach(var track in project.tracks){
120+
var singer = track.Singer;
121+
if(singer != null){
122+
singersInUse.Add(singer);
123+
}
124+
}
125+
//Release singers that are no longer in use
126+
foreach(var singer in singersUsed){
127+
if(!singersInUse.Contains(singer)){
128+
singer.FreeMemory();
129+
singersUsed.Remove(singer);
130+
}
131+
}
132+
//Update singers used
133+
singersUsed.UnionWith(singersInUse);
134+
}
112135
}
113136
}

OpenUtau.Core/Ustx/USinger.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,5 +280,14 @@ public static USinger CreateMissing(string name) {
280280
private void NotifyPropertyChanged(string propertyName = "") {
281281
PropertyChanged?.Invoke(this, new PropertyChangedEventArgs(propertyName));
282282
}
283+
284+
/// <summary>
285+
/// Some types of singers store their data in memory when rendering.
286+
/// This method is called when the singer is no longer used.
287+
/// Note:
288+
/// - the voicebank may be used again even after this method is called.
289+
/// - this method may be called even when the singer has not been used
290+
/// </summary>
291+
public virtual void FreeMemory(){ }
283292
}
284293
}

0 commit comments

Comments
 (0)