Skip to content
Draft

Draft #2575

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions .github/build_windows_packages.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ $UVR5_URL = "$baseHF/uvr5_weights.zip"
$NLTK_URL = "$baseHF/nltk_data.zip"
$JTALK_URL = "$baseHF/open_jtalk_dic_utf_8-1.11.tar.gz"

$PYTHON_VERSION = "3.11.12"
$PY_RELEASE_VERSION = "20250409"
$PYTHON_VERSION = "3.10.18"
$PY_RELEASE_VERSION = "20250902"

Write-Host "[INFO] Cleaning .git..."
Remove-Item "$srcDir\.git" -Recurse -Force -ErrorAction SilentlyContinue
Expand Down Expand Up @@ -115,12 +115,17 @@ Remove-Item $ffDir.FullName -Recurse -Force
Write-Host "[INFO] Installing PyTorch..."
& ".\runtime\python.exe" -m ensurepip
& ".\runtime\python.exe" -m pip install --upgrade pip --no-warn-script-location

switch ($cuda) {
"cu124" {
& ".\runtime\python.exe" -m pip install torch==2.6 torchaudio --index-url https://download.pytorch.org/whl/cu124 --no-warn-script-location
"cu126" {
& ".\runtime\python.exe" -m pip install psutil ninja packaging wheel "setuptools>=42" --no-warn-script-location
& ".\runtime\python.exe" -m pip install torch --index-url https://download.pytorch.org/whl/cu126 --no-warn-script-location
& ".\runtime\python.exe" -m pip install flash-attn -i https://xxxxrt666.github.io/PIP-Index/ --no-build-isolation
}
"cu128" {
& ".\runtime\python.exe" -m pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu128 --no-warn-script-location
& ".\runtime\python.exe" -m pip install psutil ninja packaging wheel "setuptools>=42" --no-warn-script-location
& ".\runtime\python.exe" -m pip install torch --index-url https://download.pytorch.org/whl/cu128 --no-warn-script-location
& ".\runtime\python.exe" -m pip install flash-attn -i https://xxxxrt666.github.io/PIP-Index/ --no-build-isolation
}
default {
Write-Error "Unsupported CUDA version: $cuda"
Expand All @@ -129,6 +134,7 @@ switch ($cuda) {
}

Write-Host "[INFO] Installing dependencies..."
& ".\runtime\python.exe" -m pip install --pre torchcodec --index-url https://download.pytorch.org/whl/nightly/cpu
& ".\runtime\python.exe" -m pip install -r extra-req.txt --no-deps --no-warn-script-location
& ".\runtime\python.exe" -m pip install -r requirements.txt --no-warn-script-location

Expand Down Expand Up @@ -162,7 +168,7 @@ Copy-Item -Path $curr -Destination $pkgName -Recurse
$7zPath = "$pkgName.7z"
$start = Get-Date
Write-Host "Compress Starting at $start"
& "C:\Program Files\7-Zip\7z.exe" a -t7z "$7zPath" "$pkgName" -m0=lzma2 -mx=9 -md=1g -ms=1g -mmc=500 -mfb=273 -mlc=0 -mlp=4 -mpb=4 -mc=8g -mmt=on -bsp1
& "C:\Program Files\7-Zip\7z.exe" a -t7z "$7zPath" "$pkgName" -m0=lzma2 -mx=9 -mmt=on -bsp1
$end = Get-Date
Write-Host "Elapsed time: $($end - $start)"
Get-ChildItem .
Expand All @@ -189,6 +195,6 @@ if (-not $hfUser -or -not $hfToken) {
exit 1
}
$env:HF_HUB_ENABLE_HF_TRANSFER = "1"
huggingface-cli upload "$hfUser/GPT-SoVITS-Packages" "$7zPath" "$7zPath" --repo-type model --token $hfToken
hf upload "$hfUser/GPT-SoVITS-Packages" "$7zPath" "$7zPath" --repo-type model --token $hfToken

Write-Host "[SUCCESS] Uploaded: $7zPath to HuggingFace"
10 changes: 9 additions & 1 deletion .github/workflows/build_windows_packages.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
runs-on: windows-latest
strategy:
matrix:
torch_cuda: [cu124, cu128]
torch_cuda: [cu126, cu128]
env:
TORCH_CUDA: ${{ matrix.torch_cuda }}
MODELSCOPE_USERNAME: ${{ secrets.MODELSCOPE_USERNAME }}
Expand All @@ -31,6 +31,14 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

- name: Install Windows CUDA 12.9
uses: Jimver/[email protected]
id: cuda-toolkit-win-129
with:
cuda: 12.9.0
method: "network"
sub-packages: '["nvcc", "cudart", "visual_studio_integration"]'

- name: Run Build and Upload Script
shell: pwsh
run: |
Expand Down
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ ffprobe*
cfg.json
speakers.json
ref_audios
tools/AP_BWE_main/24kto48k/*
!tools/AP_BWE_main/24kto48k/readme.txt
tools/AP_BWE/24kto48k/*
!tools/AP_BWE/24kto48k/readme.txt
onnx_export

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
22 changes: 20 additions & 2 deletions Docker/miniconda_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ fi

if [ "$TARGETPLATFORM" = "linux/amd64" ]; then
"${WGET_CMD[@]}" -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py311_25.3.1-1-Linux-x86_64.sh
SYSROOT_PKG="sysroot_linux-64>=2.28"
elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then
"${WGET_CMD[@]}" -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py311_25.3.1-1-Linux-aarch64.sh
SYSROOT_PKG="sysroot_linux-aarch64>=2.28"
else
exit 1
fi
Expand All @@ -45,20 +47,36 @@ rm miniconda.sh

source "$HOME/miniconda3/etc/profile.d/conda.sh"

"$HOME/miniconda3/bin/conda" init bash

source "$HOME/.bashrc"

"$HOME/miniconda3/bin/conda" config --add channels conda-forge

"$HOME/miniconda3/bin/conda" update -q --all -y 1>/dev/null

"$HOME/miniconda3/bin/conda" install python=3.11 -q -y

"$HOME/miniconda3/bin/conda" install gcc=14 gxx ffmpeg cmake make unzip -q -y
"$HOME/miniconda3/bin/conda" install gcc=11 gxx ffmpeg cmake make unzip $SYSROOT_PKG "libstdcxx-ng>=11" -q -y

if [ "$CUDA_VERSION" = "12.8" ]; then
"$HOME/miniconda3/bin/pip" install torch torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/cu128
"$HOME/miniconda3/bin/conda" install cuda-nvcc=12.8 -c nvidia
elif [ "$CUDA_VERSION" = "12.6" ]; then
"$HOME/miniconda3/bin/pip" install torch==2.6 torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/cu126
"$HOME/miniconda3/bin/pip" install torch torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/cu126
"$HOME/miniconda3/bin/conda" install cuda-nvcc=12.6 -c nvidia
fi

CUDA_PATH=$(echo "$HOME/miniconda3/targets/"*-linux | awk '{print $1}')

export CUDA_HOME=$CUDA_PATH
export PATH="$HOME/miniconda3/bin:$PATH"
export PATH="$CUDA_HOME/bin:$PATH"
export PATH="$CUDA_HOME/nvvm/bin:$PATH"

"$HOME/miniconda3/bin/pip" install psutil ninja packaging wheel "setuptools>=42"
"$HOME/miniconda3/bin/pip" install flash-attn -i https://xxxxrt666.github.io/PIP-Index/ --no-build-isolation

"$HOME/miniconda3/bin/pip" cache purge

rm $LOG_PATH
Expand Down
Empty file removed GPT_SoVITS/AR/__init__.py
Empty file.
6 changes: 3 additions & 3 deletions GPT_SoVITS/AR/data/bucket_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ def __init__(
if num_replicas is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = dist.get_world_size() if torch.cuda.is_available() else 1
num_replicas = dist.get_world_size() if torch.cuda.device_count() > 1 else 1
if rank is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = dist.get_rank() if torch.cuda.is_available() else 0
if torch.cuda.is_available():
rank = dist.get_rank() if torch.cuda.device_count() > 1 else 0
if torch.cuda.device_count() > 1:
torch.cuda.set_device(rank)
if rank >= num_replicas or rank < 0:
raise ValueError("Invalid rank {}, rank should be in the interval [0, {}]".format(rank, num_replicas - 1))
Expand Down
4 changes: 2 additions & 2 deletions GPT_SoVITS/AR/data/data_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from pytorch_lightning import LightningDataModule
from torch.utils.data import DataLoader

from AR.data.bucket_sampler import DistributedBucketSampler
from AR.data.dataset import Text2SemanticDataset
from GPT_SoVITS.AR.data.bucket_sampler import DistributedBucketSampler
from GPT_SoVITS.AR.data.dataset import Text2SemanticDataset


class Text2SemanticDataModule(LightningDataModule):
Expand Down
6 changes: 3 additions & 3 deletions GPT_SoVITS/AR/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
import torch
from torch.utils.data import DataLoader, Dataset

version = os.environ.get("version", None)
from GPT_SoVITS.text import cleaned_text_to_sequence

from text import cleaned_text_to_sequence
version = os.environ.get("version", None)

# from config import exp_dir

Expand Down Expand Up @@ -220,7 +220,7 @@ def __getitem__(self, idx: int) -> Dict:

flag = 0
path_bert = "%s/%s.pt" % (self.path3, item_name)
if os.path.exists(path_bert) == True:
if os.path.exists(path_bert) is True:
bert_feature = torch.load(path_bert, map_location="cpu")
else:
flag = 1
Expand Down
Empty file removed GPT_SoVITS/AR/models/__init__.py
Empty file.
12 changes: 4 additions & 8 deletions GPT_SoVITS/AR/models/t2s_lightning_module.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
# reference: https://github.com/lifeiteng/vall-e
import os
import sys

now_dir = os.getcwd()
sys.path.append(now_dir)
from typing import Dict

import torch
from pytorch_lightning import LightningModule

from AR.models.t2s_model import Text2SemanticDecoder
from AR.modules.lr_schedulers import WarmupCosineLRSchedule
from AR.modules.optim import ScaledAdam
from ..modules.lr_schedulers import WarmupCosineLRSchedule
from ..modules.optim import ScaledAdam
from .t2s_model import Text2SemanticDecoder


class Text2SemanticLightningModule(LightningModule):
Expand Down Expand Up @@ -42,7 +38,7 @@ def __init__(self, config, output_dir, is_train=True):
def training_step(self, batch: Dict, batch_idx: int):
opt = self.optimizers()
scheduler = self.lr_schedulers()
forward = self.model.forward if self.config["train"].get("if_dpo", False) == True else self.model.forward_old
forward = self.model.forward if self.config["train"].get("if_dpo", False) is True else self.model.forward_old
loss, acc = forward(
batch["phoneme_ids"],
batch["phoneme_ids_len"],
Expand Down
97 changes: 1 addition & 96 deletions GPT_SoVITS/AR/models/t2s_lightning_module_onnx.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
# reference: https://github.com/lifeiteng/vall-e
import os
import sys

now_dir = os.getcwd()
sys.path.append(now_dir)
from typing import Dict

import torch
from pytorch_lightning import LightningModule

from AR.models.t2s_model_onnx import Text2SemanticDecoder
from AR.modules.lr_schedulers import WarmupCosineLRSchedule
from AR.modules.optim import ScaledAdam
from .t2s_model_onnx import Text2SemanticDecoder


class Text2SemanticLightningModule(LightningModule):
Expand All @@ -21,90 +13,3 @@ def __init__(self, config, output_dir, is_train=True):
self.config = config
self.top_k = 3
self.model = Text2SemanticDecoder(config=config, top_k=self.top_k)
pretrained_s1 = config.get("pretrained_s1")
if pretrained_s1 and is_train:
# print(self.load_state_dict(torch.load(pretrained_s1,map_location="cpu")["state_dict"]))
print(
self.load_state_dict(
torch.load(
pretrained_s1,
map_location="cpu",
)["weight"],
),
)
if is_train:
self.automatic_optimization = False
self.save_hyperparameters()
self.eval_dir = output_dir / "eval"
self.eval_dir.mkdir(parents=True, exist_ok=True)

def training_step(self, batch: Dict, batch_idx: int):
opt = self.optimizers()
scheduler = self.lr_schedulers()
loss, acc = self.model.forward(
batch["phoneme_ids"],
batch["phoneme_ids_len"],
batch["semantic_ids"],
batch["semantic_ids_len"],
batch["bert_feature"],
)
self.manual_backward(loss)
if batch_idx > 0 and batch_idx % 4 == 0:
opt.step()
opt.zero_grad()
scheduler.step()

self.log(
"total_loss",
loss,
on_step=True,
on_epoch=True,
prog_bar=True,
sync_dist=True,
)
self.log(
"lr",
scheduler.get_last_lr()[0],
on_epoch=True,
prog_bar=True,
sync_dist=True,
)
self.log(
f"top_{self.top_k}_acc",
acc,
on_step=True,
on_epoch=True,
prog_bar=True,
sync_dist=True,
)

def validation_step(self, batch: Dict, batch_idx: int):
return

def configure_optimizers(self):
model_parameters = self.model.parameters()
parameters_names = []
parameters_names.append([name_param_pair[0] for name_param_pair in self.model.named_parameters()])
lm_opt = ScaledAdam(
model_parameters,
lr=0.01,
betas=(0.9, 0.95),
clipping_scale=2.0,
parameters_names=parameters_names,
show_dominant_parameters=False,
clipping_update_period=1000,
)

return {
"optimizer": lm_opt,
"lr_scheduler": {
"scheduler": WarmupCosineLRSchedule(
lm_opt,
init_lr=self.config["optimizer"]["lr_init"],
peak_lr=self.config["optimizer"]["lr"],
end_lr=self.config["optimizer"]["lr_end"],
warmup_steps=self.config["optimizer"]["warmup_steps"],
total_steps=self.config["optimizer"]["decay_steps"],
)
},
}
16 changes: 8 additions & 8 deletions GPT_SoVITS/AR/models/t2s_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from torchmetrics.classification import MulticlassAccuracy
from tqdm import tqdm

from AR.models.utils import (
from GPT_SoVITS.AR.models.utils import (
dpo_loss,
get_batch_logps,
make_pad_mask,
Expand All @@ -18,8 +18,8 @@
sample,
topk_sampling,
)
from AR.modules.embedding import SinePositionalEmbedding, TokenEmbedding
from AR.modules.transformer import LayerNorm, TransformerEncoder, TransformerEncoderLayer
from GPT_SoVITS.AR.modules.embedding import SinePositionalEmbedding, TokenEmbedding
from GPT_SoVITS.AR.modules.transformer import LayerNorm, TransformerEncoder, TransformerEncoderLayer

default_config = {
"embedding_dim": 512,
Expand Down Expand Up @@ -420,7 +420,7 @@ def forward(self, x, x_lens, y, y_lens, bert_feature):
mask=xy_attn_mask,
)
x_len = x_lens.max()
logits = self.ar_predict_layer(xy_dec[:, x_len-1:])
logits = self.ar_predict_layer(xy_dec[:, x_len - 1 :])

###### DPO #############
reject_xy_pos, reject_xy_attn_mask, reject_targets = self.make_input_data(
Expand All @@ -432,7 +432,7 @@ def forward(self, x, x_lens, y, y_lens, bert_feature):
mask=reject_xy_attn_mask,
)
x_len = x_lens.max()
reject_logits = self.ar_predict_layer(reject_xy_dec[:, x_len-1:])
reject_logits = self.ar_predict_layer(reject_xy_dec[:, x_len - 1 :])

# loss
# from feiteng: 每次 duration 越多, 梯度更新也应该更多, 所以用 sum
Expand Down Expand Up @@ -502,7 +502,7 @@ def forward_old(self, x, x_lens, y, y_lens, bert_feature):
(xy_pos, None),
mask=xy_attn_mask,
)
logits = self.ar_predict_layer(xy_dec[:, x_len-1:]).permute(0, 2, 1)
logits = self.ar_predict_layer(xy_dec[:, x_len - 1 :]).permute(0, 2, 1)
# loss
# from feiteng: 每次 duration 越多, 梯度更新也应该更多, 所以用 sum
loss = F.cross_entropy(logits, targets, reduction="sum")
Expand Down Expand Up @@ -724,8 +724,8 @@ def infer_panel_batch_infer(
l1 = samples[:, 0] == self.EOS
l2 = tokens == self.EOS
l = l1.logical_or(l2)
removed_idx_of_batch_for_y = torch.where(l == True)[0].tolist()
reserved_idx_of_batch_for_y = torch.where(l == False)[0]
removed_idx_of_batch_for_y = torch.where(l is True)[0].tolist()
reserved_idx_of_batch_for_y = torch.where(l is False)[0]
# batch_indexs = torch.tensor(batch_idx_map, device=y.device)[removed_idx_of_batch_for_y]
for i in removed_idx_of_batch_for_y:
batch_index = batch_idx_map[i]
Expand Down
Loading