32
32
--num_workers=1 \
33
33
--trim_config_path="<nemo_root_path>/examples/tts/conf/trim/energy.yaml" \
34
34
--output_sample_rate=22050 \
35
+ --output_format=flac \
35
36
--volume_level=0.95 \
36
37
--min_duration=0.5 \
37
38
--max_duration=20.0 \
@@ -97,6 +98,12 @@ def get_args():
97
98
parser .add_argument (
98
99
"--output_sample_rate" , default = 0 , type = int , help = "If provided, rate to resample the audio to."
99
100
)
101
+ parser .add_argument (
102
+ "--output_format" ,
103
+ default = "wav" ,
104
+ type = str ,
105
+ help = "If provided, format output audio will be saved as. If not provided, will keep original format." ,
106
+ )
100
107
parser .add_argument (
101
108
"--volume_level" , default = 0.0 , type = float , help = "If provided, peak volume to normalize audio to."
102
109
)
@@ -123,12 +130,18 @@ def _process_entry(
123
130
overwrite_audio : bool ,
124
131
audio_trimmer : AudioTrimmer ,
125
132
output_sample_rate : int ,
133
+ output_format : str ,
126
134
volume_level : float ,
127
135
) -> Tuple [dict , float , float ]:
128
136
audio_filepath = Path (entry ["audio_filepath" ])
129
137
130
138
audio_path , audio_path_rel = get_abs_rel_paths (input_path = audio_filepath , base_path = input_audio_dir )
139
+
140
+ if not output_format :
141
+ output_format = audio_path .suffix
142
+
131
143
output_path = output_audio_dir / audio_path_rel
144
+ output_path = output_path .with_suffix (output_format )
132
145
output_path .parent .mkdir (exist_ok = True , parents = True )
133
146
134
147
if output_path .exists () and not overwrite_audio :
@@ -159,6 +172,9 @@ def _process_entry(
159
172
160
173
if os .path .isabs (audio_filepath ):
161
174
entry ["audio_filepath" ] = str (output_path )
175
+ else :
176
+ output_filepath = audio_path_rel .with_suffix (output_format )
177
+ entry ["audio_filepath" ] = str (output_filepath )
162
178
163
179
return entry , original_duration , output_duration
164
180
@@ -175,6 +191,7 @@ def main():
175
191
num_workers = args .num_workers
176
192
max_entries = args .max_entries
177
193
output_sample_rate = args .output_sample_rate
194
+ output_format = args .output_format
178
195
volume_level = args .volume_level
179
196
min_duration = args .min_duration
180
197
max_duration = args .max_duration
@@ -192,6 +209,11 @@ def main():
192
209
else :
193
210
audio_trimmer = None
194
211
212
+ if output_format :
213
+ if output_format .upper () not in sf .available_formats ():
214
+ raise ValueError (f"Unsupported output audio format: { output_format } " )
215
+ output_format = f".{ output_format } "
216
+
195
217
output_audio_dir .mkdir (exist_ok = True , parents = True )
196
218
197
219
entries = read_manifest (input_manifest_path )
@@ -207,6 +229,7 @@ def main():
207
229
overwrite_audio = overwrite_audio ,
208
230
audio_trimmer = audio_trimmer ,
209
231
output_sample_rate = output_sample_rate ,
232
+ output_format = output_format ,
210
233
volume_level = volume_level ,
211
234
)
212
235
for entry in tqdm (entries )
0 commit comments