积极的屁孩 commited on
Commit
cba1c8b
·
1 Parent(s): 3b944a1

download before infer

Browse files
Files changed (1) hide show
  1. app.py +310 -94
app.py CHANGED
@@ -12,6 +12,17 @@ import subprocess
12
  import re
13
  import spaces
14
 
 
 
 
 
 
 
 
 
 
 
 
15
  def install_espeak():
16
  """Detect and install espeak-ng dependency"""
17
  try:
@@ -150,6 +161,10 @@ from models.vc.vevo.vevo_utils import VevoInferencePipeline, save_audio, load_wa
150
 
151
  # Download and setup config files
152
  def setup_configs():
 
 
 
 
153
  config_path = "models/vc/vevo/config"
154
  os.makedirs(config_path, exist_ok=True)
155
 
@@ -175,6 +190,8 @@ def setup_configs():
175
  subprocess.run(["cp", file_data, file_path])
176
  except Exception as e:
177
  print(f"Error downloading config file {file}: {e}")
 
 
178
 
179
  setup_configs()
180
 
@@ -192,54 +209,102 @@ def get_pipeline(pipeline_type):
192
  # Initialize pipeline based on the required pipeline type
193
  if pipeline_type == "style" or pipeline_type == "voice":
194
  # Download Content Tokenizer
195
- local_dir = snapshot_download(
196
- repo_id="amphion/Vevo",
197
- repo_type="model",
198
- cache_dir="./ckpts/Vevo",
199
- allow_patterns=["tokenizer/vq32/*"],
200
- )
201
- content_tokenizer_ckpt_path = os.path.join(
202
- local_dir, "tokenizer/vq32/hubert_large_l18_c32.pkl"
203
- )
 
 
 
 
 
 
 
 
 
204
 
205
  # Download Content-Style Tokenizer
206
- local_dir = snapshot_download(
207
- repo_id="amphion/Vevo",
208
- repo_type="model",
209
- cache_dir="./ckpts/Vevo",
210
- allow_patterns=["tokenizer/vq8192/*"],
211
- )
212
- content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
 
 
 
 
 
 
 
 
 
213
 
214
  # Download Autoregressive Transformer
215
- local_dir = snapshot_download(
216
- repo_id="amphion/Vevo",
217
- repo_type="model",
218
- cache_dir="./ckpts/Vevo",
219
- allow_patterns=["contentstyle_modeling/Vq32ToVq8192/*"],
220
- )
221
- ar_cfg_path = "./models/vc/vevo/config/Vq32ToVq8192.json"
222
- ar_ckpt_path = os.path.join(local_dir, "contentstyle_modeling/Vq32ToVq8192")
 
 
 
 
 
 
 
 
 
 
223
 
224
  # Download Flow Matching Transformer
225
- local_dir = snapshot_download(
226
- repo_id="amphion/Vevo",
227
- repo_type="model",
228
- cache_dir="./ckpts/Vevo",
229
- allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
230
- )
231
- fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
232
- fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
 
 
 
 
 
 
 
 
 
 
233
 
234
  # Download Vocoder
235
- local_dir = snapshot_download(
236
- repo_id="amphion/Vevo",
237
- repo_type="model",
238
- cache_dir="./ckpts/Vevo",
239
- allow_patterns=["acoustic_modeling/Vocoder/*"],
240
- )
241
- vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
242
- vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
 
 
 
 
 
 
 
 
 
 
243
 
244
  # Initialize pipeline
245
  inference_pipeline = VevoInferencePipeline(
@@ -256,33 +321,62 @@ def get_pipeline(pipeline_type):
256
 
257
  elif pipeline_type == "timbre":
258
  # Download Content-Style Tokenizer (only needed for timbre)
259
- local_dir = snapshot_download(
260
- repo_id="amphion/Vevo",
261
- repo_type="model",
262
- cache_dir="./ckpts/Vevo",
263
- allow_patterns=["tokenizer/vq8192/*"],
264
- )
265
- content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
 
 
 
 
 
 
 
 
 
266
 
267
  # Download Flow Matching Transformer
268
- local_dir = snapshot_download(
269
- repo_id="amphion/Vevo",
270
- repo_type="model",
271
- cache_dir="./ckpts/Vevo",
272
- allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
273
- )
274
- fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
275
- fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
 
 
 
 
 
 
 
 
 
 
276
 
277
  # Download Vocoder
278
- local_dir = snapshot_download(
279
- repo_id="amphion/Vevo",
280
- repo_type="model",
281
- cache_dir="./ckpts/Vevo",
282
- allow_patterns=["acoustic_modeling/Vocoder/*"],
283
- )
284
- vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
285
- vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
 
 
 
 
 
 
 
 
 
 
286
 
287
  # Initialize pipeline
288
  inference_pipeline = VevoInferencePipeline(
@@ -296,43 +390,82 @@ def get_pipeline(pipeline_type):
296
 
297
  elif pipeline_type == "tts":
298
  # Download Content-Style Tokenizer
299
- local_dir = snapshot_download(
300
- repo_id="amphion/Vevo",
301
- repo_type="model",
302
- cache_dir="./ckpts/Vevo",
303
- allow_patterns=["tokenizer/vq8192/*"],
304
- )
305
- content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
 
 
 
 
 
 
 
 
 
306
 
307
  # Download Autoregressive Transformer (TTS specific)
308
- local_dir = snapshot_download(
309
- repo_id="amphion/Vevo",
310
- repo_type="model",
311
- cache_dir="./ckpts/Vevo",
312
- allow_patterns=["contentstyle_modeling/PhoneToVq8192/*"],
313
- )
314
- ar_cfg_path = "./models/vc/vevo/config/PhoneToVq8192.json"
315
- ar_ckpt_path = os.path.join(local_dir, "contentstyle_modeling/PhoneToVq8192")
 
 
 
 
 
 
 
 
 
 
316
 
317
  # Download Flow Matching Transformer
318
- local_dir = snapshot_download(
319
- repo_id="amphion/Vevo",
320
- repo_type="model",
321
- cache_dir="./ckpts/Vevo",
322
- allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
323
- )
324
- fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
325
- fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
 
 
 
 
 
 
 
 
 
 
326
 
327
  # Download Vocoder
328
- local_dir = snapshot_download(
329
- repo_id="amphion/Vevo",
330
- repo_type="model",
331
- cache_dir="./ckpts/Vevo",
332
- allow_patterns=["acoustic_modeling/Vocoder/*"],
333
- )
334
- vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
335
- vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
 
 
 
 
 
 
 
 
 
 
336
 
337
  # Initialize pipeline
338
  inference_pipeline = VevoInferencePipeline(
@@ -761,6 +894,89 @@ def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_langua
761
  traceback.print_exc()
762
  raise e
763
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
764
  # Create Gradio interface
765
  with gr.Blocks(title="Vevo: Controllable Zero-Shot Voice Imitation with Self-Supervised Disentanglement") as demo:
766
  gr.Markdown("# Vevo: Controllable Zero-Shot Voice Imitation with Self-Supervised Disentanglement")
 
12
  import re
13
  import spaces
14
 
15
+ # 创建一个全局变量来跟踪已下载的资源
16
+ downloaded_resources = {
17
+ "configs": False,
18
+ "tokenizer_vq32": False,
19
+ "tokenizer_vq8192": False,
20
+ "ar_Vq32ToVq8192": False,
21
+ "ar_PhoneToVq8192": False,
22
+ "fmt_Vq8192ToMels": False,
23
+ "vocoder": False
24
+ }
25
+
26
  def install_espeak():
27
  """Detect and install espeak-ng dependency"""
28
  try:
 
161
 
162
  # Download and setup config files
163
  def setup_configs():
164
+ if downloaded_resources["configs"]:
165
+ print("Config files already downloaded, skipping...")
166
+ return
167
+
168
  config_path = "models/vc/vevo/config"
169
  os.makedirs(config_path, exist_ok=True)
170
 
 
190
  subprocess.run(["cp", file_data, file_path])
191
  except Exception as e:
192
  print(f"Error downloading config file {file}: {e}")
193
+
194
+ downloaded_resources["configs"] = True
195
 
196
  setup_configs()
197
 
 
209
  # Initialize pipeline based on the required pipeline type
210
  if pipeline_type == "style" or pipeline_type == "voice":
211
  # Download Content Tokenizer
212
+ content_tokenizer_ckpt_path = ""
213
+ if not downloaded_resources["tokenizer_vq32"]:
214
+ local_dir = snapshot_download(
215
+ repo_id="amphion/Vevo",
216
+ repo_type="model",
217
+ cache_dir="./ckpts/Vevo",
218
+ allow_patterns=["tokenizer/vq32/*"],
219
+ )
220
+ content_tokenizer_ckpt_path = os.path.join(
221
+ local_dir, "tokenizer/vq32/hubert_large_l18_c32.pkl"
222
+ )
223
+ downloaded_resources["tokenizer_vq32"] = True
224
+ print("Downloaded Content Tokenizer (vq32)")
225
+ else:
226
+ print("Content Tokenizer (vq32) already downloaded, skipping...")
227
+ content_tokenizer_ckpt_path = os.path.join(
228
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "tokenizer/vq32/hubert_large_l18_c32.pkl"
229
+ )
230
 
231
  # Download Content-Style Tokenizer
232
+ content_style_tokenizer_ckpt_path = ""
233
+ if not downloaded_resources["tokenizer_vq8192"]:
234
+ local_dir = snapshot_download(
235
+ repo_id="amphion/Vevo",
236
+ repo_type="model",
237
+ cache_dir="./ckpts/Vevo",
238
+ allow_patterns=["tokenizer/vq8192/*"],
239
+ )
240
+ content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
241
+ downloaded_resources["tokenizer_vq8192"] = True
242
+ print("Downloaded Content-Style Tokenizer (vq8192)")
243
+ else:
244
+ print("Content-Style Tokenizer (vq8192) already downloaded, skipping...")
245
+ content_style_tokenizer_ckpt_path = os.path.join(
246
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "tokenizer/vq8192"
247
+ )
248
 
249
  # Download Autoregressive Transformer
250
+ ar_ckpt_path = ""
251
+ if not downloaded_resources["ar_Vq32ToVq8192"]:
252
+ local_dir = snapshot_download(
253
+ repo_id="amphion/Vevo",
254
+ repo_type="model",
255
+ cache_dir="./ckpts/Vevo",
256
+ allow_patterns=["contentstyle_modeling/Vq32ToVq8192/*"],
257
+ )
258
+ ar_cfg_path = "./models/vc/vevo/config/Vq32ToVq8192.json"
259
+ ar_ckpt_path = os.path.join(local_dir, "contentstyle_modeling/Vq32ToVq8192")
260
+ downloaded_resources["ar_Vq32ToVq8192"] = True
261
+ print("Downloaded Autoregressive Transformer (Vq32ToVq8192)")
262
+ else:
263
+ print("Autoregressive Transformer (Vq32ToVq8192) already downloaded, skipping...")
264
+ ar_cfg_path = "./models/vc/vevo/config/Vq32ToVq8192.json"
265
+ ar_ckpt_path = os.path.join(
266
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "contentstyle_modeling/Vq32ToVq8192"
267
+ )
268
 
269
  # Download Flow Matching Transformer
270
+ fmt_ckpt_path = ""
271
+ if not downloaded_resources["fmt_Vq8192ToMels"]:
272
+ local_dir = snapshot_download(
273
+ repo_id="amphion/Vevo",
274
+ repo_type="model",
275
+ cache_dir="./ckpts/Vevo",
276
+ allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
277
+ )
278
+ fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
279
+ fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
280
+ downloaded_resources["fmt_Vq8192ToMels"] = True
281
+ print("Downloaded Flow Matching Transformer (Vq8192ToMels)")
282
+ else:
283
+ print("Flow Matching Transformer (Vq8192ToMels) already downloaded, skipping...")
284
+ fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
285
+ fmt_ckpt_path = os.path.join(
286
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vq8192ToMels"
287
+ )
288
 
289
  # Download Vocoder
290
+ vocoder_ckpt_path = ""
291
+ if not downloaded_resources["vocoder"]:
292
+ local_dir = snapshot_download(
293
+ repo_id="amphion/Vevo",
294
+ repo_type="model",
295
+ cache_dir="./ckpts/Vevo",
296
+ allow_patterns=["acoustic_modeling/Vocoder/*"],
297
+ )
298
+ vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
299
+ vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
300
+ downloaded_resources["vocoder"] = True
301
+ print("Downloaded Vocoder")
302
+ else:
303
+ print("Vocoder already downloaded, skipping...")
304
+ vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
305
+ vocoder_ckpt_path = os.path.join(
306
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vocoder"
307
+ )
308
 
309
  # Initialize pipeline
310
  inference_pipeline = VevoInferencePipeline(
 
321
 
322
  elif pipeline_type == "timbre":
323
  # Download Content-Style Tokenizer (only needed for timbre)
324
+ content_style_tokenizer_ckpt_path = ""
325
+ if not downloaded_resources["tokenizer_vq8192"]:
326
+ local_dir = snapshot_download(
327
+ repo_id="amphion/Vevo",
328
+ repo_type="model",
329
+ cache_dir="./ckpts/Vevo",
330
+ allow_patterns=["tokenizer/vq8192/*"],
331
+ )
332
+ content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
333
+ downloaded_resources["tokenizer_vq8192"] = True
334
+ print("Downloaded Content-Style Tokenizer (vq8192)")
335
+ else:
336
+ print("Content-Style Tokenizer (vq8192) already downloaded, skipping...")
337
+ content_style_tokenizer_ckpt_path = os.path.join(
338
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "tokenizer/vq8192"
339
+ )
340
 
341
  # Download Flow Matching Transformer
342
+ fmt_ckpt_path = ""
343
+ if not downloaded_resources["fmt_Vq8192ToMels"]:
344
+ local_dir = snapshot_download(
345
+ repo_id="amphion/Vevo",
346
+ repo_type="model",
347
+ cache_dir="./ckpts/Vevo",
348
+ allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
349
+ )
350
+ fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
351
+ fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
352
+ downloaded_resources["fmt_Vq8192ToMels"] = True
353
+ print("Downloaded Flow Matching Transformer (Vq8192ToMels)")
354
+ else:
355
+ print("Flow Matching Transformer (Vq8192ToMels) already downloaded, skipping...")
356
+ fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
357
+ fmt_ckpt_path = os.path.join(
358
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vq8192ToMels"
359
+ )
360
 
361
  # Download Vocoder
362
+ vocoder_ckpt_path = ""
363
+ if not downloaded_resources["vocoder"]:
364
+ local_dir = snapshot_download(
365
+ repo_id="amphion/Vevo",
366
+ repo_type="model",
367
+ cache_dir="./ckpts/Vevo",
368
+ allow_patterns=["acoustic_modeling/Vocoder/*"],
369
+ )
370
+ vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
371
+ vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
372
+ downloaded_resources["vocoder"] = True
373
+ print("Downloaded Vocoder")
374
+ else:
375
+ print("Vocoder already downloaded, skipping...")
376
+ vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
377
+ vocoder_ckpt_path = os.path.join(
378
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vocoder"
379
+ )
380
 
381
  # Initialize pipeline
382
  inference_pipeline = VevoInferencePipeline(
 
390
 
391
  elif pipeline_type == "tts":
392
  # Download Content-Style Tokenizer
393
+ content_style_tokenizer_ckpt_path = ""
394
+ if not downloaded_resources["tokenizer_vq8192"]:
395
+ local_dir = snapshot_download(
396
+ repo_id="amphion/Vevo",
397
+ repo_type="model",
398
+ cache_dir="./ckpts/Vevo",
399
+ allow_patterns=["tokenizer/vq8192/*"],
400
+ )
401
+ content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
402
+ downloaded_resources["tokenizer_vq8192"] = True
403
+ print("Downloaded Content-Style Tokenizer (vq8192)")
404
+ else:
405
+ print("Content-Style Tokenizer (vq8192) already downloaded, skipping...")
406
+ content_style_tokenizer_ckpt_path = os.path.join(
407
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "tokenizer/vq8192"
408
+ )
409
 
410
  # Download Autoregressive Transformer (TTS specific)
411
+ ar_ckpt_path = ""
412
+ if not downloaded_resources["ar_PhoneToVq8192"]:
413
+ local_dir = snapshot_download(
414
+ repo_id="amphion/Vevo",
415
+ repo_type="model",
416
+ cache_dir="./ckpts/Vevo",
417
+ allow_patterns=["contentstyle_modeling/PhoneToVq8192/*"],
418
+ )
419
+ ar_cfg_path = "./models/vc/vevo/config/PhoneToVq8192.json"
420
+ ar_ckpt_path = os.path.join(local_dir, "contentstyle_modeling/PhoneToVq8192")
421
+ downloaded_resources["ar_PhoneToVq8192"] = True
422
+ print("Downloaded Autoregressive Transformer (PhoneToVq8192)")
423
+ else:
424
+ print("Autoregressive Transformer (PhoneToVq8192) already downloaded, skipping...")
425
+ ar_cfg_path = "./models/vc/vevo/config/PhoneToVq8192.json"
426
+ ar_ckpt_path = os.path.join(
427
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "contentstyle_modeling/PhoneToVq8192"
428
+ )
429
 
430
  # Download Flow Matching Transformer
431
+ fmt_ckpt_path = ""
432
+ if not downloaded_resources["fmt_Vq8192ToMels"]:
433
+ local_dir = snapshot_download(
434
+ repo_id="amphion/Vevo",
435
+ repo_type="model",
436
+ cache_dir="./ckpts/Vevo",
437
+ allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
438
+ )
439
+ fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
440
+ fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
441
+ downloaded_resources["fmt_Vq8192ToMels"] = True
442
+ print("Downloaded Flow Matching Transformer (Vq8192ToMels)")
443
+ else:
444
+ print("Flow Matching Transformer (Vq8192ToMels) already downloaded, skipping...")
445
+ fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
446
+ fmt_ckpt_path = os.path.join(
447
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vq8192ToMels"
448
+ )
449
 
450
  # Download Vocoder
451
+ vocoder_ckpt_path = ""
452
+ if not downloaded_resources["vocoder"]:
453
+ local_dir = snapshot_download(
454
+ repo_id="amphion/Vevo",
455
+ repo_type="model",
456
+ cache_dir="./ckpts/Vevo",
457
+ allow_patterns=["acoustic_modeling/Vocoder/*"],
458
+ )
459
+ vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
460
+ vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
461
+ downloaded_resources["vocoder"] = True
462
+ print("Downloaded Vocoder")
463
+ else:
464
+ print("Vocoder already downloaded, skipping...")
465
+ vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
466
+ vocoder_ckpt_path = os.path.join(
467
+ "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vocoder"
468
+ )
469
 
470
  # Initialize pipeline
471
  inference_pipeline = VevoInferencePipeline(
 
894
  traceback.print_exc()
895
  raise e
896
 
897
+ # 在程序启动时下载所有需要的模型资源
898
+ def preload_all_resources():
899
+ print("预加载所有模型资源...")
900
+ # 下载配置文件
901
+ setup_configs()
902
+
903
+ # 下载Content Tokenizer (vq32)
904
+ if not downloaded_resources["tokenizer_vq32"]:
905
+ print("预下载 Content Tokenizer (vq32)...")
906
+ local_dir = snapshot_download(
907
+ repo_id="amphion/Vevo",
908
+ repo_type="model",
909
+ cache_dir="./ckpts/Vevo",
910
+ allow_patterns=["tokenizer/vq32/*"],
911
+ )
912
+ downloaded_resources["tokenizer_vq32"] = True
913
+ print("Content Tokenizer (vq32) 下载完成")
914
+
915
+ # 下载Content-Style Tokenizer (vq8192)
916
+ if not downloaded_resources["tokenizer_vq8192"]:
917
+ print("预下载 Content-Style Tokenizer (vq8192)...")
918
+ local_dir = snapshot_download(
919
+ repo_id="amphion/Vevo",
920
+ repo_type="model",
921
+ cache_dir="./ckpts/Vevo",
922
+ allow_patterns=["tokenizer/vq8192/*"],
923
+ )
924
+ downloaded_resources["tokenizer_vq8192"] = True
925
+ print("Content-Style Tokenizer (vq8192) 下载完成")
926
+
927
+ # 下载Autoregressive Transformer (Vq32ToVq8192)
928
+ if not downloaded_resources["ar_Vq32ToVq8192"]:
929
+ print("预下载 Autoregressive Transformer (Vq32ToVq8192)...")
930
+ local_dir = snapshot_download(
931
+ repo_id="amphion/Vevo",
932
+ repo_type="model",
933
+ cache_dir="./ckpts/Vevo",
934
+ allow_patterns=["contentstyle_modeling/Vq32ToVq8192/*"],
935
+ )
936
+ downloaded_resources["ar_Vq32ToVq8192"] = True
937
+ print("Autoregressive Transformer (Vq32ToVq8192) 下载完成")
938
+
939
+ # 下载Autoregressive Transformer (PhoneToVq8192)
940
+ if not downloaded_resources["ar_PhoneToVq8192"]:
941
+ print("预下载 Autoregressive Transformer (PhoneToVq8192)...")
942
+ local_dir = snapshot_download(
943
+ repo_id="amphion/Vevo",
944
+ repo_type="model",
945
+ cache_dir="./ckpts/Vevo",
946
+ allow_patterns=["contentstyle_modeling/PhoneToVq8192/*"],
947
+ )
948
+ downloaded_resources["ar_PhoneToVq8192"] = True
949
+ print("Autoregressive Transformer (PhoneToVq8192) 下载完成")
950
+
951
+ # 下载Flow Matching Transformer
952
+ if not downloaded_resources["fmt_Vq8192ToMels"]:
953
+ print("预下载 Flow Matching Transformer (Vq8192ToMels)...")
954
+ local_dir = snapshot_download(
955
+ repo_id="amphion/Vevo",
956
+ repo_type="model",
957
+ cache_dir="./ckpts/Vevo",
958
+ allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
959
+ )
960
+ downloaded_resources["fmt_Vq8192ToMels"] = True
961
+ print("Flow Matching Transformer (Vq8192ToMels) 下载完成")
962
+
963
+ # 下载Vocoder
964
+ if not downloaded_resources["vocoder"]:
965
+ print("预下载 Vocoder...")
966
+ local_dir = snapshot_download(
967
+ repo_id="amphion/Vevo",
968
+ repo_type="model",
969
+ cache_dir="./ckpts/Vevo",
970
+ allow_patterns=["acoustic_modeling/Vocoder/*"],
971
+ )
972
+ downloaded_resources["vocoder"] = True
973
+ print("Vocoder 下载完成")
974
+
975
+ print("所有模型资源预加载完成!")
976
+
977
+ # 在创建Gradio界面之前预加载所有资源
978
+ preload_all_resources()
979
+
980
  # Create Gradio interface
981
  with gr.Blocks(title="Vevo: Controllable Zero-Shot Voice Imitation with Self-Supervised Disentanglement") as demo:
982
  gr.Markdown("# Vevo: Controllable Zero-Shot Voice Imitation with Self-Supervised Disentanglement")