Skip to content

Commit fd060c1

Browse files
committed
.
1 parent d4c9eb0 commit fd060c1

File tree

14 files changed

+878
-25
lines changed

14 files changed

+878
-25
lines changed

.github/build_windows_packages.ps1

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ switch ($cuda) {
134134
}
135135

136136
Write-Host "[INFO] Installing dependencies..."
137+
& ".\runtime\python.exe" -m pip install --pre torchcodec --index-url https://download.pytorch.org/whl/nightly/cpu
137138
& ".\runtime\python.exe" -m pip install -r extra-req.txt --no-deps --no-warn-script-location
138139
& ".\runtime\python.exe" -m pip install -r requirements.txt --no-warn-script-location
139140

GPT_SoVITS/TTS_infer_pack/TTS.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -721,20 +721,18 @@ def _set_ref_spec(self, ref_audio_path):
721721
self.prompt_cache["refer_spec"][0] = spec_audio
722722

723723
def _get_ref_spec(self, ref_audio_path):
724-
audio_n, raw_sr = librosa.load(ref_audio_path, sr=self.configs.sampling_rate)
725-
raw_audio = torch.from_numpy(audio_n).unsqueeze(0)
726-
raw_audio = raw_audio.to(self.configs.device).float()
724+
raw_audio, raw_sr = torchaudio.load_with_torchcodec(ref_audio_path)
727725
self.prompt_cache["raw_audio"] = raw_audio
728726
self.prompt_cache["raw_sr"] = raw_sr
729727

730728
if raw_sr != self.configs.sampling_rate:
731729
audio = raw_audio.to(self.configs.device)
732-
if audio.shape[0] == 2:
730+
if audio.shape[0] > 1:
733731
audio = audio.mean(0).unsqueeze(0)
734732
audio = resample(audio, raw_sr, self.configs.sampling_rate, self.configs.device)
735733
else:
736734
audio = raw_audio.to(self.configs.device)
737-
if audio.shape[0] == 2:
735+
if audio.shape[0] > 1:
738736
audio = audio.mean(0).unsqueeze(0)
739737

740738
maxx = audio.abs().max()

GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ def clean_text_inf(self, text: str, language: str, version: str = "v2"):
206206
phones = cleaned_text_to_sequence(phones, version)
207207
return phones, word2ph, norm_text
208208

209+
@torch.no_grad()
209210
def get_bert_inf(self, phones: list, word2ph: list, norm_text: str, language: str):
210211
language = language.replace("all_", "")
211212
if language == "zh":

0 commit comments

Comments
 (0)