Skip to content

Commit 20e09b9

Browse files
committed
fix(audio): off vocal extraction
sometimes the substraction failed because the two tensors didn't have the same length (???)
1 parent bf6bd26 commit 20e09b9

File tree

2 files changed

+21
-16
lines changed

2 files changed

+21
-16
lines changed

yohane-cli/yohane_cli/audio.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ def save_separated_tracks(yohane: Yohane, song_path: Path):
6666
filename = song_path.with_suffix(".vocals.wav")
6767
logger.info(f"Saving vocals track to {filename}")
6868
torchaudio.save(filename.as_posix(), waveform.to("cpu"), sample_rate)
69-
if yohane.off_vocal is not None:
70-
waveform, sample_rate = yohane.off_vocal
69+
off_vocal = yohane.extract_off_vocal()
70+
if off_vocal is not None:
71+
waveform, sample_rate = off_vocal
7172
filename = song_path.with_suffix(".off_vocal.wav")
7273
logger.info(f"Saving off vocal track to {filename}")
7374
torchaudio.save(filename.as_posix(), waveform.to("cpu"), sample_rate)

yohane/pipeline.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,29 @@ def __init__(self, separator: Separator | None):
2424
def forced_aligned_audio(self):
2525
return self.vocals if self.vocals is not None else self.song
2626

27-
@property
28-
def off_vocal(self):
29-
if self.song is not None and self.vocals is not None:
30-
song_waveform, song_sample_rate = self.song
31-
vocals_waveform, vocals_sample_rate = self.vocals
32-
vocals_waveform_resampled = torchaudio.functional.resample(
33-
vocals_waveform, vocals_sample_rate, song_sample_rate
34-
)
35-
return song_waveform - vocals_waveform_resampled, song_sample_rate
36-
3727
def load_song(self, song_file: Path):
3828
logger.info("Loading song")
3929
self.song = torchaudio.load(song_file.as_posix())
4030

4131
def extract_vocals(self):
42-
if self.separator is not None:
43-
logger.info(f"Extracting vocals with {self.separator=}")
44-
assert self.song
45-
self.vocals = self.separator(*self.song)
32+
if self.separator is None:
33+
return
34+
logger.info(f"Extracting vocals with {self.separator=}")
35+
assert self.song
36+
self.vocals = self.separator(*self.song)
37+
38+
def extract_off_vocal(self):
39+
if self.song is None or self.vocals is None:
40+
return
41+
song_waveform, song_sample_rate = self.song
42+
vocals_waveform, vocals_sample_rate = self.vocals
43+
vocals_waveform_resampled = torchaudio.functional.resample(
44+
vocals_waveform, vocals_sample_rate, song_sample_rate
45+
)
46+
min_length = min(song_waveform.size(1), vocals_waveform_resampled.size(1))
47+
song_waveform = song_waveform[:, :min_length]
48+
vocals_waveform_resampled = vocals_waveform_resampled[:, :min_length]
49+
return song_waveform - vocals_waveform_resampled, song_sample_rate
4650

4751
def load_lyrics(self, lyrics_str: str):
4852
logger.info("Loading lyrics")

0 commit comments

Comments
 (0)