Skip to content

Commit 9675b5e

Browse files
raa0121Hiroshiba
andauthored
refactor: tts_engine から song_engine に分離 (#1592)
Co-authored-by: Hiroshiba <[email protected]>
1 parent 52672de commit 9675b5e

File tree

17 files changed

+585
-437
lines changed

17 files changed

+585
-437
lines changed

run.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from voicevox_engine.preset.preset_manager import PresetManager
2222
from voicevox_engine.setting.model import CorsPolicyMode
2323
from voicevox_engine.setting.setting_manager import USER_SETTING_PATH, SettingHandler
24+
from voicevox_engine.tts_pipeline.song_engine import make_song_engines_from_cores
2425
from voicevox_engine.tts_pipeline.tts_engine import make_tts_engines_from_cores
2526
from voicevox_engine.user_dict.user_dict_manager import UserDictionary
2627
from voicevox_engine.utility.path_utility import (
@@ -326,7 +327,9 @@ def main() -> None:
326327
load_all_models=args.load_all_models,
327328
)
328329
tts_engines = make_tts_engines_from_cores(core_manager)
330+
song_engines = make_song_engines_from_cores(core_manager)
329331
assert len(tts_engines.versions()) != 0, "音声合成エンジンがありません。"
332+
assert len(song_engines.versions()) != 0, "音声合成エンジンがありません。"
330333

331334
cancellable_engine: CancellableEngine | None = None
332335
if args.enable_cancellable_synthesis:
@@ -389,6 +392,7 @@ def main() -> None:
389392
# ASGI に準拠した VOICEVOX ENGINE アプリケーションを生成する
390393
app = generate_app(
391394
tts_engines,
395+
song_engines,
392396
core_manager,
393397
setting_loader,
394398
preset_manager,

test/benchmark/engine_preparation.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from voicevox_engine.library.library_manager import LibraryManager
1414
from voicevox_engine.preset.preset_manager import PresetManager
1515
from voicevox_engine.setting.setting_manager import SettingHandler
16+
from voicevox_engine.tts_pipeline.song_engine import make_song_engines_from_cores
1617
from voicevox_engine.tts_pipeline.tts_engine import make_tts_engines_from_cores
1718
from voicevox_engine.user_dict.user_dict_manager import UserDictionary
1819
from voicevox_engine.utility.path_utility import engine_manifest_path, get_save_dir
@@ -23,6 +24,7 @@ def _generate_engine_fake_server(root_dir: Path) -> TestClient:
2324
voicevox_dir=root_dir, use_gpu=False, enable_mock=False
2425
)
2526
tts_engines = make_tts_engines_from_cores(core_manager)
27+
song_engines = make_song_engines_from_cores(core_manager)
2628
setting_loader = SettingHandler(Path("./not_exist.yaml"))
2729
preset_manager = PresetManager(get_save_dir() / "presets.yaml")
2830
user_dict = UserDictionary()
@@ -36,6 +38,7 @@ def _generate_engine_fake_server(root_dir: Path) -> TestClient:
3638
)
3739
app = generate_app(
3840
tts_engines=tts_engines,
41+
song_engines=song_engines,
3942
core_manager=core_manager,
4043
setting_loader=setting_loader,
4144
preset_manager=preset_manager,

test/e2e/conftest.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from voicevox_engine.library.library_manager import LibraryManager
1515
from voicevox_engine.preset.preset_manager import PresetManager
1616
from voicevox_engine.setting.setting_manager import SettingHandler
17+
from voicevox_engine.tts_pipeline.song_engine import make_song_engines_from_cores
1718
from voicevox_engine.tts_pipeline.tts_engine import make_tts_engines_from_cores
1819
from voicevox_engine.user_dict.user_dict_manager import (
1920
DEFAULT_DICT_PATH,
@@ -33,6 +34,7 @@ def _copy_under_dir(file_path: Path, dir_path: Path) -> Path:
3334
def app_params(tmp_path: Path) -> dict[str, Any]:
3435
core_manager = initialize_cores(use_gpu=False, enable_mock=True)
3536
tts_engines = make_tts_engines_from_cores(core_manager)
37+
song_engines = make_song_engines_from_cores(core_manager)
3638
setting_loader = SettingHandler(tmp_path / "not_exist.yaml")
3739

3840
# テスト用に隔離されたプリセットを生成する
@@ -57,6 +59,7 @@ def app_params(tmp_path: Path) -> dict[str, Any]:
5759

5860
return {
5961
"tts_engines": tts_engines,
62+
"song_engines": song_engines,
6063
"core_manager": core_manager,
6164
"setting_loader": setting_loader,
6265
"preset_manager": preset_manager,

test/unit/tts_pipeline/test_tts_engine.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
Note,
1818
Score,
1919
)
20+
from voicevox_engine.tts_pipeline.song_engine import (
21+
SongEngine,
22+
)
2023
from voicevox_engine.tts_pipeline.tts_engine import (
2124
TTSEngine,
2225
_apply_interrogative_upspeak,
@@ -277,13 +280,13 @@ def test_mocked_create_sing_volume_from_phoneme_and_f0_output(
277280
NOTE: 入力生成の簡略化に別関数を呼び出すため、別関数が正しく動作しない場合テストが落ちる
278281
"""
279282
# Inputs
280-
tts_engine = TTSEngine(MockCoreWrapper())
283+
tts_engine = SongEngine(MockCoreWrapper())
281284
doremi_srore = _gen_doremi_score()
282-
phonemes, f0s, _ = tts_engine.create_sing_phoneme_and_f0_and_volume(
285+
phonemes, f0s, _ = tts_engine.create_phoneme_and_f0_and_volume(
283286
doremi_srore, StyleId(1)
284287
)
285288
# Outputs
286-
result = tts_engine.create_sing_volume_from_phoneme_and_f0(
289+
result = tts_engine.create_volume_from_phoneme_and_f0(
287290
doremi_srore, phonemes, f0s, StyleId(1)
288291
)
289292
# Tests
@@ -298,10 +301,10 @@ def test_mocked_synthesize_wave_from_score_output(
298301
`TTSEngine.frame_synthesize_wave()` の出力スナップショットが一定である
299302
"""
300303
# Inputs
301-
tts_engine = TTSEngine(MockCoreWrapper())
304+
tts_engine = SongEngine(MockCoreWrapper())
302305
doremi_srore = _gen_doremi_score()
303306
# Outputs
304-
result = tts_engine.create_sing_phoneme_and_f0_and_volume(doremi_srore, StyleId(1))
307+
result = tts_engine.create_phoneme_and_f0_and_volume(doremi_srore, StyleId(1))
305308
# Tests
306309
assert snapshot_json(name="query") == round_floats(
307310
pydantic_to_native_type(result), round_value=2

test/unit/tts_pipeline/test_tts_engines.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_tts_engines_get_engine_existing() -> None:
4545
# Expects
4646
true_acquired_tts_engine = tts_engine2
4747
# Outputs
48-
acquired_tts_engine = tts_engines.get_engine("0.0.2")
48+
acquired_tts_engine = tts_engines.get_tts_engine("0.0.2")
4949

5050
# Test
5151
assert true_acquired_tts_engine == acquired_tts_engine
@@ -64,7 +64,7 @@ def test_tts_engines_get_engine_latest() -> None:
6464
# Expects
6565
true_acquired_tts_engine = tts_engine3
6666
# Outputs
67-
acquired_tts_engine = tts_engines.get_engine(LATEST_VERSION)
67+
acquired_tts_engine = tts_engines.get_tts_engine(LATEST_VERSION)
6868

6969
# Test
7070
assert true_acquired_tts_engine == acquired_tts_engine
@@ -80,4 +80,4 @@ def test_tts_engines_get_engine_missing() -> None:
8080
tts_engines.register_engine(tts_engine2, "0.0.2")
8181
# Test
8282
with pytest.raises(TTSEngineNotFound):
83-
tts_engines.get_engine("0.0.3")
83+
tts_engines.get_tts_engine("0.0.3")

test/unit/tts_pipeline/test_wave_synthesizer.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,20 @@
33
import numpy as np
44

55
from voicevox_engine.model import AudioQuery
6+
from voicevox_engine.tts_pipeline.audio_postprocessing import (
7+
_apply_output_sampling_rate,
8+
_apply_output_stereo,
9+
_apply_volume_scale,
10+
raw_wave_to_output_wave,
11+
)
612
from voicevox_engine.tts_pipeline.model import AccentPhrase
713
from voicevox_engine.tts_pipeline.tts_engine import (
814
_apply_intonation_scale,
9-
_apply_output_sampling_rate,
10-
_apply_output_stereo,
1115
_apply_pitch_scale,
1216
_apply_prepost_silence,
1317
_apply_speed_scale,
14-
_apply_volume_scale,
1518
_count_frame_per_unit,
1619
_query_to_decoder_feature,
17-
raw_wave_to_output_wave,
1820
)
1921

2022
from .tts_utils import gen_mora, sec

tools/make_docs.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from voicevox_engine.library.library_manager import LibraryManager
1111
from voicevox_engine.preset.preset_manager import PresetManager
1212
from voicevox_engine.setting.setting_manager import USER_SETTING_PATH, SettingHandler
13+
from voicevox_engine.tts_pipeline.song_engine import SongEngineManager
1314
from voicevox_engine.tts_pipeline.tts_engine import TTSEngineManager
1415
from voicevox_engine.user_dict.user_dict_manager import UserDictionary
1516
from voicevox_engine.utility.path_utility import engine_manifest_path, get_save_dir
@@ -39,6 +40,7 @@ def generate_api_docs_html(schema: str) -> str:
3940
core_manager = CoreManager()
4041
core_manager.register_core(CoreAdapter(MockCoreWrapper()), "mock")
4142
tts_engines = TTSEngineManager()
43+
song_engines = SongEngineManager()
4244
tts_engines.register_engine(MockTTSEngine(), "mock")
4345
preset_path = get_save_dir() / "presets.yaml"
4446
engine_manifest = load_manifest(engine_manifest_path())
@@ -53,6 +55,7 @@ def generate_api_docs_html(schema: str) -> str:
5355
# FastAPI の機能を用いて OpenAPI schema を生成する
5456
app = generate_app(
5557
tts_engines=tts_engines,
58+
song_engines=song_engines,
5659
core_manager=core_manager,
5760
setting_loader=SettingHandler(USER_SETTING_PATH),
5861
preset_manager=PresetManager(preset_path),

voicevox_engine/app/application.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from voicevox_engine.resource_manager import ResourceManager
3232
from voicevox_engine.setting.model import CorsPolicyMode
3333
from voicevox_engine.setting.setting_manager import SettingHandler
34+
from voicevox_engine.tts_pipeline.song_engine import SongEngineManager
3435
from voicevox_engine.tts_pipeline.tts_engine import TTSEngineManager
3536
from voicevox_engine.user_dict.user_dict_manager import UserDictionary
3637
from voicevox_engine.utility.path_utility import engine_root
@@ -39,6 +40,7 @@
3940

4041
def generate_app(
4142
tts_engines: TTSEngineManager,
43+
song_engines: SongEngineManager,
4244
core_manager: CoreManager,
4345
setting_loader: SettingHandler,
4446
preset_manager: PresetManager,
@@ -85,7 +87,9 @@ def _get_core_characters(version: str | None) -> list[CoreCharacter]:
8587
)
8688

8789
app.include_router(
88-
generate_tts_pipeline_router(tts_engines, preset_manager, cancellable_engine)
90+
generate_tts_pipeline_router(
91+
tts_engines, song_engines, preset_manager, cancellable_engine
92+
)
8993
)
9094
app.include_router(generate_morphing_router(tts_engines, metas_store))
9195
app.include_router(

voicevox_engine/app/routers/morphing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def _synthesis_morphing(
8787
モーフィングの割合は`morph_rate`で指定でき、0.0でベースのスタイル、1.0でターゲットのスタイルに近づきます。
8888
"""
8989
version = core_version or LATEST_VERSION
90-
engine = tts_engines.get_engine(version)
90+
engine = tts_engines.get_tts_engine(version)
9191

9292
# モーフィングが許可されないキャラクターペアを拒否する
9393
characters = metas_store.characters(core_version)

0 commit comments

Comments
 (0)