RVC-Boss · XXXXRT666 · Aug 16, 2025 · Sep 8, 2025
diff --git a/.github/build_windows_packages.ps1 b/.github/build_windows_packages.ps1
@@ -31,8 +31,8 @@ $UVR5_URL = "$baseHF/uvr5_weights.zip"
 $NLTK_URL = "$baseHF/nltk_data.zip"
 $JTALK_URL = "$baseHF/open_jtalk_dic_utf_8-1.11.tar.gz"
 
-$PYTHON_VERSION = "3.11.12"
-$PY_RELEASE_VERSION = "20250409"
+$PYTHON_VERSION = "3.10.18"
+$PY_RELEASE_VERSION = "20250902"
 
 Write-Host "[INFO] Cleaning .git..."
 Remove-Item "$srcDir\.git" -Recurse -Force -ErrorAction SilentlyContinue
@@ -115,12 +115,17 @@ Remove-Item $ffDir.FullName -Recurse -Force
 Write-Host "[INFO] Installing PyTorch..."
 & ".\runtime\python.exe" -m ensurepip
 & ".\runtime\python.exe" -m pip install --upgrade pip --no-warn-script-location
+
 switch ($cuda) {
-    "cu124" {
-        & ".\runtime\python.exe" -m pip install torch==2.6 torchaudio --index-url https://download.pytorch.org/whl/cu124 --no-warn-script-location
+    "cu126" {
+        & ".\runtime\python.exe" -m pip install psutil ninja packaging wheel "setuptools>=42" --no-warn-script-location
+        & ".\runtime\python.exe" -m pip install torch --index-url https://download.pytorch.org/whl/cu126 --no-warn-script-location
+        & ".\runtime\python.exe" -m pip install flash-attn -i https://xxxxrt666.github.io/PIP-Index/ --no-build-isolation
     }
     "cu128" {
-        & ".\runtime\python.exe" -m pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu128 --no-warn-script-location
+        & ".\runtime\python.exe" -m pip install psutil ninja packaging wheel "setuptools>=42" --no-warn-script-location
+        & ".\runtime\python.exe" -m pip install torch --index-url https://download.pytorch.org/whl/cu128 --no-warn-script-location
+        & ".\runtime\python.exe" -m pip install flash-attn -i https://xxxxrt666.github.io/PIP-Index/ --no-build-isolation
     }
     default {
         Write-Error "Unsupported CUDA version: $cuda"
@@ -129,6 +134,7 @@ switch ($cuda) {
 }
 
 Write-Host "[INFO] Installing dependencies..."
+& ".\runtime\python.exe" -m pip install --pre torchcodec --index-url https://download.pytorch.org/whl/nightly/cpu
 & ".\runtime\python.exe" -m pip install -r extra-req.txt --no-deps --no-warn-script-location
 & ".\runtime\python.exe" -m pip install -r requirements.txt --no-warn-script-location
 
@@ -162,7 +168,7 @@ Copy-Item -Path $curr -Destination $pkgName -Recurse
 $7zPath = "$pkgName.7z"
 $start = Get-Date
 Write-Host "Compress Starting at $start"
-& "C:\Program Files\7-Zip\7z.exe" a -t7z "$7zPath" "$pkgName" -m0=lzma2 -mx=9 -md=1g -ms=1g -mmc=500 -mfb=273 -mlc=0 -mlp=4 -mpb=4 -mc=8g -mmt=on -bsp1
+& "C:\Program Files\7-Zip\7z.exe" a -t7z "$7zPath" "$pkgName" -m0=lzma2 -mx=9 -mmt=on -bsp1
 $end = Get-Date
 Write-Host "Elapsed time: $($end - $start)"
 Get-ChildItem .
@@ -189,6 +195,6 @@ if (-not $hfUser -or -not $hfToken) {
     exit 1
 }
 $env:HF_HUB_ENABLE_HF_TRANSFER = "1"
-huggingface-cli upload "$hfUser/GPT-SoVITS-Packages" "$7zPath" "$7zPath" --repo-type model --token $hfToken
+hf upload "$hfUser/GPT-SoVITS-Packages" "$7zPath" "$7zPath" --repo-type model --token $hfToken
 
 Write-Host "[SUCCESS] Uploaded: $7zPath to HuggingFace"
diff --git a/.github/workflows/build_windows_packages.yaml b/.github/workflows/build_windows_packages.yaml
@@ -17,7 +17,7 @@ jobs:
     runs-on: windows-latest
     strategy:
       matrix:
-        torch_cuda: [cu124, cu128]
+        torch_cuda: [cu126, cu128]
     env:
       TORCH_CUDA: ${{ matrix.torch_cuda }}
       MODELSCOPE_USERNAME: ${{ secrets.MODELSCOPE_USERNAME }}
@@ -31,6 +31,14 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
+      - name: Install Windows CUDA 12.9
+        uses: Jimver/[email protected]
+        id: cuda-toolkit-win-129
+        with:
+          cuda: 12.9.0
+          method: "network"
+          sub-packages: '["nvcc", "cudart", "visual_studio_integration"]'
+
       - name: Run Build and Upload Script
         shell: pwsh
         run: |

diff --git a/.gitignore b/.gitignore
@@ -16,8 +16,9 @@ ffprobe*
 cfg.json
 speakers.json
 ref_audios
-tools/AP_BWE_main/24kto48k/*
-!tools/AP_BWE_main/24kto48k/readme.txt
+tools/AP_BWE/24kto48k/*
+!tools/AP_BWE/24kto48k/readme.txt
+onnx_export
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/Docker/miniconda_install.sh b/Docker/miniconda_install.sh
@@ -23,8 +23,10 @@ fi
 
 if [ "$TARGETPLATFORM" = "linux/amd64" ]; then
     "${WGET_CMD[@]}" -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py311_25.3.1-1-Linux-x86_64.sh
+    SYSROOT_PKG="sysroot_linux-64>=2.28"
 elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then
     "${WGET_CMD[@]}" -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py311_25.3.1-1-Linux-aarch64.sh
+    SYSROOT_PKG="sysroot_linux-aarch64>=2.28"
 else
     exit 1
 fi
@@ -45,20 +47,36 @@ rm miniconda.sh
 
 source "$HOME/miniconda3/etc/profile.d/conda.sh"
 
+"$HOME/miniconda3/bin/conda" init bash
+
+source "$HOME/.bashrc"
+
 "$HOME/miniconda3/bin/conda" config --add channels conda-forge
 
 "$HOME/miniconda3/bin/conda" update -q --all -y 1>/dev/null
 
 "$HOME/miniconda3/bin/conda" install python=3.11 -q -y
 
-"$HOME/miniconda3/bin/conda" install gcc=14 gxx ffmpeg cmake make unzip -q -y
+"$HOME/miniconda3/bin/conda" install gcc=11 gxx ffmpeg cmake make unzip $SYSROOT_PKG "libstdcxx-ng>=11" -q -y
 
 if [ "$CUDA_VERSION" = "12.8" ]; then
     "$HOME/miniconda3/bin/pip" install torch torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/cu128
+    "$HOME/miniconda3/bin/conda" install cuda-nvcc=12.8 -c nvidia
 elif [ "$CUDA_VERSION" = "12.6" ]; then
-    "$HOME/miniconda3/bin/pip" install torch==2.6 torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/cu126
+    "$HOME/miniconda3/bin/pip" install torch torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/cu126
+    "$HOME/miniconda3/bin/conda" install cuda-nvcc=12.6 -c nvidia
 fi
 
+CUDA_PATH=$(echo "$HOME/miniconda3/targets/"*-linux | awk '{print $1}')
+
+export CUDA_HOME=$CUDA_PATH
+export PATH="$HOME/miniconda3/bin:$PATH"
+export PATH="$CUDA_HOME/bin:$PATH"
+export PATH="$CUDA_HOME/nvvm/bin:$PATH"
+
+"$HOME/miniconda3/bin/pip" install psutil ninja packaging wheel "setuptools>=42"
+"$HOME/miniconda3/bin/pip" install flash-attn -i https://xxxxrt666.github.io/PIP-Index/ --no-build-isolation
+
 "$HOME/miniconda3/bin/pip" cache purge
 
 rm $LOG_PATH

diff --git a/GPT_SoVITS/AR/__init__.py b/GPT_SoVITS/AR/__init__.py
diff --git a/GPT_SoVITS/AR/data/bucket_sampler.py b/GPT_SoVITS/AR/data/bucket_sampler.py
@@ -39,12 +39,12 @@ def __init__(
         if num_replicas is None:
             if not dist.is_available():
                 raise RuntimeError("Requires distributed package to be available")
-            num_replicas = dist.get_world_size() if torch.cuda.is_available() else 1
+            num_replicas = dist.get_world_size() if torch.cuda.device_count() > 1 else 1
         if rank is None:
             if not dist.is_available():
                 raise RuntimeError("Requires distributed package to be available")
-            rank = dist.get_rank() if torch.cuda.is_available() else 0
-            if torch.cuda.is_available():
+            rank = dist.get_rank() if torch.cuda.device_count() > 1 else 0
+            if torch.cuda.device_count() > 1:
                 torch.cuda.set_device(rank)
         if rank >= num_replicas or rank < 0:
             raise ValueError("Invalid rank {}, rank should be in the interval [0, {}]".format(rank, num_replicas - 1))

diff --git a/GPT_SoVITS/AR/data/data_module.py b/GPT_SoVITS/AR/data/data_module.py
@@ -3,8 +3,8 @@
 from pytorch_lightning import LightningDataModule
 from torch.utils.data import DataLoader
 
-from AR.data.bucket_sampler import DistributedBucketSampler
-from AR.data.dataset import Text2SemanticDataset
+from GPT_SoVITS.AR.data.bucket_sampler import DistributedBucketSampler
+from GPT_SoVITS.AR.data.dataset import Text2SemanticDataset
 
 
 class Text2SemanticDataModule(LightningDataModule):

diff --git a/GPT_SoVITS/AR/data/dataset.py b/GPT_SoVITS/AR/data/dataset.py
@@ -11,9 +11,9 @@
 import torch
 from torch.utils.data import DataLoader, Dataset
 
-version = os.environ.get("version", None)
+from GPT_SoVITS.text import cleaned_text_to_sequence
 
-from text import cleaned_text_to_sequence
+version = os.environ.get("version", None)
 
 # from config import exp_dir
 
@@ -220,7 +220,7 @@ def __getitem__(self, idx: int) -> Dict:
 
         flag = 0
         path_bert = "%s/%s.pt" % (self.path3, item_name)
-        if os.path.exists(path_bert) == True:
+        if os.path.exists(path_bert) is True:
             bert_feature = torch.load(path_bert, map_location="cpu")
         else:
             flag = 1

diff --git a/GPT_SoVITS/AR/models/__init__.py b/GPT_SoVITS/AR/models/__init__.py
diff --git a/GPT_SoVITS/AR/models/t2s_lightning_module.py b/GPT_SoVITS/AR/models/t2s_lightning_module.py
@@ -1,18 +1,14 @@
 # modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
 # reference: https://github.com/lifeiteng/vall-e
-import os
-import sys
 
-now_dir = os.getcwd()
-sys.path.append(now_dir)
 from typing import Dict
 
 import torch
 from pytorch_lightning import LightningModule
 
-from AR.models.t2s_model import Text2SemanticDecoder
-from AR.modules.lr_schedulers import WarmupCosineLRSchedule
-from AR.modules.optim import ScaledAdam
+from ..modules.lr_schedulers import WarmupCosineLRSchedule
+from ..modules.optim import ScaledAdam
+from .t2s_model import Text2SemanticDecoder
 
 
 class Text2SemanticLightningModule(LightningModule):
@@ -42,7 +38,7 @@ def __init__(self, config, output_dir, is_train=True):
     def training_step(self, batch: Dict, batch_idx: int):
         opt = self.optimizers()
         scheduler = self.lr_schedulers()
-        forward = self.model.forward if self.config["train"].get("if_dpo", False) == True else self.model.forward_old
+        forward = self.model.forward if self.config["train"].get("if_dpo", False) is True else self.model.forward_old
         loss, acc = forward(
             batch["phoneme_ids"],
             batch["phoneme_ids_len"],

diff --git a/GPT_SoVITS/AR/models/t2s_lightning_module_onnx.py b/GPT_SoVITS/AR/models/t2s_lightning_module_onnx.py
@@ -1,18 +1,10 @@
 # modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
 # reference: https://github.com/lifeiteng/vall-e
-import os
-import sys
 
-now_dir = os.getcwd()
-sys.path.append(now_dir)
-from typing import Dict
 
-import torch
 from pytorch_lightning import LightningModule
 
-from AR.models.t2s_model_onnx import Text2SemanticDecoder
-from AR.modules.lr_schedulers import WarmupCosineLRSchedule
-from AR.modules.optim import ScaledAdam
+from .t2s_model_onnx import Text2SemanticDecoder
 
 
 class Text2SemanticLightningModule(LightningModule):
@@ -21,90 +13,3 @@ def __init__(self, config, output_dir, is_train=True):
         self.config = config
         self.top_k = 3
         self.model = Text2SemanticDecoder(config=config, top_k=self.top_k)
-        pretrained_s1 = config.get("pretrained_s1")
-        if pretrained_s1 and is_train:
-            # print(self.load_state_dict(torch.load(pretrained_s1,map_location="cpu")["state_dict"]))
-            print(
-                self.load_state_dict(
-                    torch.load(
-                        pretrained_s1,
-                        map_location="cpu",
-                    )["weight"],
-                ),
-            )
-        if is_train:
-            self.automatic_optimization = False
-            self.save_hyperparameters()
-            self.eval_dir = output_dir / "eval"
-            self.eval_dir.mkdir(parents=True, exist_ok=True)
-
-    def training_step(self, batch: Dict, batch_idx: int):
-        opt = self.optimizers()
-        scheduler = self.lr_schedulers()
-        loss, acc = self.model.forward(
-            batch["phoneme_ids"],
-            batch["phoneme_ids_len"],
-            batch["semantic_ids"],
-            batch["semantic_ids_len"],
-            batch["bert_feature"],
-        )
-        self.manual_backward(loss)
-        if batch_idx > 0 and batch_idx % 4 == 0:
-            opt.step()
-            opt.zero_grad()
-            scheduler.step()
-
-        self.log(
-            "total_loss",
-            loss,
-            on_step=True,
-            on_epoch=True,
-            prog_bar=True,
-            sync_dist=True,
-        )
-        self.log(
-            "lr",
-            scheduler.get_last_lr()[0],
-            on_epoch=True,
-            prog_bar=True,
-            sync_dist=True,
-        )
-        self.log(
-            f"top_{self.top_k}_acc",
-            acc,
-            on_step=True,
-            on_epoch=True,
-            prog_bar=True,
-            sync_dist=True,
-        )
-
-    def validation_step(self, batch: Dict, batch_idx: int):
-        return
-
-    def configure_optimizers(self):
-        model_parameters = self.model.parameters()
-        parameters_names = []
-        parameters_names.append([name_param_pair[0] for name_param_pair in self.model.named_parameters()])
-        lm_opt = ScaledAdam(
-            model_parameters,
-            lr=0.01,
-            betas=(0.9, 0.95),
-            clipping_scale=2.0,
-            parameters_names=parameters_names,
-            show_dominant_parameters=False,
-            clipping_update_period=1000,
-        )
-
-        return {
-            "optimizer": lm_opt,
-            "lr_scheduler": {
-                "scheduler": WarmupCosineLRSchedule(
-                    lm_opt,
-                    init_lr=self.config["optimizer"]["lr_init"],
-                    peak_lr=self.config["optimizer"]["lr"],
-                    end_lr=self.config["optimizer"]["lr_end"],
-                    warmup_steps=self.config["optimizer"]["warmup_steps"],
-                    total_steps=self.config["optimizer"]["decay_steps"],
-                )
-            },
-        }
diff --git a/GPT_SoVITS/AR/models/t2s_model.py b/GPT_SoVITS/AR/models/t2s_model.py
@@ -9,7 +9,7 @@
 from torchmetrics.classification import MulticlassAccuracy
 from tqdm import tqdm
 
-from AR.models.utils import (
+from GPT_SoVITS.AR.models.utils import (
     dpo_loss,
     get_batch_logps,
     make_pad_mask,
@@ -18,8 +18,8 @@
     sample,
     topk_sampling,
 )
-from AR.modules.embedding import SinePositionalEmbedding, TokenEmbedding
-from AR.modules.transformer import LayerNorm, TransformerEncoder, TransformerEncoderLayer
+from GPT_SoVITS.AR.modules.embedding import SinePositionalEmbedding, TokenEmbedding
+from GPT_SoVITS.AR.modules.transformer import LayerNorm, TransformerEncoder, TransformerEncoderLayer
 
 default_config = {
     "embedding_dim": 512,
@@ -420,7 +420,7 @@ def forward(self, x, x_lens, y, y_lens, bert_feature):
             mask=xy_attn_mask,
         )
         x_len = x_lens.max()
-        logits = self.ar_predict_layer(xy_dec[:, x_len-1:])
+        logits = self.ar_predict_layer(xy_dec[:, x_len - 1 :])
 
         ###### DPO #############
         reject_xy_pos, reject_xy_attn_mask, reject_targets = self.make_input_data(
@@ -432,7 +432,7 @@ def forward(self, x, x_lens, y, y_lens, bert_feature):
             mask=reject_xy_attn_mask,
         )
         x_len = x_lens.max()
-        reject_logits = self.ar_predict_layer(reject_xy_dec[:, x_len-1:])
+        reject_logits = self.ar_predict_layer(reject_xy_dec[:, x_len - 1 :])
 
         # loss
         # from feiteng: 每次 duration 越多, 梯度更新也应该更多, 所以用 sum
@@ -502,7 +502,7 @@ def forward_old(self, x, x_lens, y, y_lens, bert_feature):
             (xy_pos, None),
             mask=xy_attn_mask,
         )
-        logits = self.ar_predict_layer(xy_dec[:, x_len-1:]).permute(0, 2, 1)
+        logits = self.ar_predict_layer(xy_dec[:, x_len - 1 :]).permute(0, 2, 1)
         # loss
         # from feiteng: 每次 duration 越多, 梯度更新也应该更多, 所以用 sum
         loss = F.cross_entropy(logits, targets, reduction="sum")
@@ -724,8 +724,8 @@ def infer_panel_batch_infer(
                 l1 = samples[:, 0] == self.EOS
                 l2 = tokens == self.EOS
                 l = l1.logical_or(l2)
-                removed_idx_of_batch_for_y = torch.where(l == True)[0].tolist()
-                reserved_idx_of_batch_for_y = torch.where(l == False)[0]
+                removed_idx_of_batch_for_y = torch.where(l is True)[0].tolist()
+                reserved_idx_of_batch_for_y = torch.where(l is False)[0]
                 # batch_indexs = torch.tensor(batch_idx_map, device=y.device)[removed_idx_of_batch_for_y]
                 for i in removed_idx_of_batch_for_y:
                     batch_index = batch_idx_map[i]