@@ -101,10 +101,12 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
101101 }
102102 }
103103 if ( result . samples == null ) {
104- result . samples = InvokeDiffsinger ( phrase , depth , speedup ) ;
105- var source = new WaveSource ( 0 , 0 , 0 , 1 ) ;
106- source . SetSamples ( result . samples ) ;
107- WaveFileWriter . CreateWaveFile16 ( wavPath , new ExportAdapter ( source ) . ToMono ( 1 , 0 ) ) ;
104+ result . samples = InvokeDiffsinger ( phrase , depth , speedup , cancellation ) ;
105+ if ( result . samples != null ) {
106+ var source = new WaveSource ( 0 , 0 , 0 , 1 ) ;
107+ source . SetSamples ( result . samples ) ;
108+ WaveFileWriter . CreateWaveFile16 ( wavPath , new ExportAdapter ( source ) . ToMono ( 1 , 0 ) ) ;
109+ }
108110 }
109111 if ( result . samples != null ) {
110112 Renderers . ApplyDynamics ( phrase , result ) ;
@@ -120,7 +122,7 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
120122 leadingMs、positionMs、estimatedLengthMs: timeaxis layout in Ms, double
121123 */
122124
123- float [ ] InvokeDiffsinger ( RenderPhrase phrase , int depth , int speedup ) {
125+ float [ ] InvokeDiffsinger ( RenderPhrase phrase , int depth , int speedup , CancellationTokenSource cancellation ) {
124126 var singer = phrase . singer as DiffSingerSinger ;
125127 //Check if dsconfig.yaml is correct
126128 if ( String . IsNullOrEmpty ( singer . dsConfig . vocoder ) ||
@@ -130,6 +132,7 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup) {
130132 }
131133
132134 var vocoder = singer . getVocoder ( ) ;
135+ var acousticModel = singer . getAcousticSession ( ) ;
133136 var frameMs = vocoder . frameMs ( ) ;
134137 var frameSec = frameMs / 1000 ;
135138 int headFrames = ( int ) Math . Round ( headMs / frameMs ) ;
@@ -218,8 +221,16 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup) {
218221 }
219222
220223 //Variance: Energy and Breathiness
224+
221225 if ( singer . dsConfig . useBreathinessEmbed || singer . dsConfig . useEnergyEmbed ) {
222- var varianceResult = singer . getVariancePredictor ( ) . Process ( phrase ) ;
226+ var variancePredictor = singer . getVariancePredictor ( ) ;
227+ VarianceResult varianceResult ;
228+ lock ( variancePredictor ) {
229+ if ( cancellation . IsCancellationRequested ) {
230+ return null ;
231+ }
232+ varianceResult = singer . getVariancePredictor ( ) . Process ( phrase ) ;
233+ }
223234 //TODO: let user edit variance curves
224235 if ( singer . dsConfig . useEnergyEmbed ) {
225236 var energyCurve = phrase . curves . FirstOrDefault ( curve => curve . Item1 == ENE ) ;
@@ -246,26 +257,36 @@ float[] InvokeDiffsinger(RenderPhrase phrase, int depth, int speedup) {
246257 . Reshape ( new int [ ] { 1 , breathiness . Length } ) ) ) ;
247258 }
248259 }
249-
250- var acousticModel = singer . getAcousticSession ( ) ;
251- Onnx . VerifyInputNames ( acousticModel , acousticInputs ) ;
252260 Tensor < float > mel ;
253- var acousticOutputs = acousticModel . Run ( acousticInputs ) ;
254- mel = acousticOutputs . First ( ) . AsTensor < float > ( ) . Clone ( ) ;
255-
261+ lock ( acousticModel ) {
262+ if ( cancellation . IsCancellationRequested ) {
263+ return null ;
264+ }
265+ Onnx . VerifyInputNames ( acousticModel , acousticInputs ) ;
266+ var acousticOutputs = acousticModel . Run ( acousticInputs ) ;
267+ mel = acousticOutputs . First ( ) . AsTensor < float > ( ) . Clone ( ) ;
268+ }
256269 //vocoder
257270 //waveform = session.run(['waveform'], {'mel': mel, 'f0': f0})[0]
258271 var vocoderInputs = new List < NamedOnnxValue > ( ) ;
259272 vocoderInputs . Add ( NamedOnnxValue . CreateFromTensor ( "mel" , mel ) ) ;
260273 vocoderInputs . Add ( NamedOnnxValue . CreateFromTensor ( "f0" , f0tensor ) ) ;
261274 float [ ] samples ;
262- var vocoderOutputs = vocoder . session . Run ( vocoderInputs ) ;
263- samples = vocoderOutputs . First ( ) . AsTensor < float > ( ) . ToArray ( ) ;
275+ lock ( vocoder ) {
276+ if ( cancellation . IsCancellationRequested ) {
277+ return null ;
278+ }
279+ var vocoderOutputs = vocoder . session . Run ( vocoderInputs ) ;
280+ samples = vocoderOutputs . First ( ) . AsTensor < float > ( ) . ToArray ( ) ;
281+ }
264282 return samples ;
265283 }
266284
267285 public RenderPitchResult LoadRenderedPitch ( RenderPhrase phrase ) {
268- return ( phrase . singer as DiffSingerSinger ) . getPitchPredictor ( ) . Process ( phrase ) ;
286+ var pitchPredictor = ( phrase . singer as DiffSingerSinger ) . getPitchPredictor ( ) ;
287+ lock ( pitchPredictor ) {
288+ return pitchPredictor . Process ( phrase ) ;
289+ }
269290 }
270291
271292 public UExpressionDescriptor [ ] GetSuggestedExpressions ( USinger singer , URenderSettings renderSettings ) {
0 commit comments