Spaces:

amphion
/

Vevo

Running on Zero

App Files Files Community

积极的屁孩 commited on 6 days ago

Commit

9755f3f

1 Parent(s): 2fc31e9

fix path

Browse files

Files changed (1) hide show

app.py +197 -228

app.py CHANGED Viewed

@@ -203,15 +203,124 @@ print(f"Using device: {device}")
 # Initialize pipeline dictionary
 inference_pipelines = {}
 def get_pipeline(pipeline_type):
     if pipeline_type in inference_pipelines:
         return inference_pipelines[pipeline_type]
     # Initialize pipeline based on the required pipeline type
     if pipeline_type == "style" or pipeline_type == "voice":
-        # Download Content Tokenizer
-        content_tokenizer_ckpt_path = ""
-        if not downloaded_resources["tokenizer_vq32"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
@@ -221,17 +330,14 @@ def get_pipeline(pipeline_type):
             content_tokenizer_ckpt_path = os.path.join(
                 local_dir, "tokenizer/vq32/hubert_large_l18_c32.pkl"
             )
-            downloaded_resources["tokenizer_vq32"] = True
-            print("Downloaded Content Tokenizer (vq32)")
-        else:
-            print("Content Tokenizer (vq32) already downloaded, skipping...")
-            content_tokenizer_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "tokenizer/vq32/hubert_large_l18_c32.pkl"
-            )
-        # Download Content-Style Tokenizer
-        content_style_tokenizer_ckpt_path = ""
-        if not downloaded_resources["tokenizer_vq8192"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
@@ -239,73 +345,54 @@ def get_pipeline(pipeline_type):
                 allow_patterns=["tokenizer/vq8192/*"],
             )
             content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
-            downloaded_resources["tokenizer_vq8192"] = True
-            print("Downloaded Content-Style Tokenizer (vq8192)")
-        else:
-            print("Content-Style Tokenizer (vq8192) already downloaded, skipping...")
-            content_style_tokenizer_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "tokenizer/vq8192"
-            )
-        # Download Autoregressive Transformer
-        ar_ckpt_path = ""
-        if not downloaded_resources["ar_Vq32ToVq8192"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["contentstyle_modeling/Vq32ToVq8192/*"],
             )
-            ar_cfg_path = "./models/vc/vevo/config/Vq32ToVq8192.json"
             ar_ckpt_path = os.path.join(local_dir, "contentstyle_modeling/Vq32ToVq8192")
-            downloaded_resources["ar_Vq32ToVq8192"] = True
-            print("Downloaded Autoregressive Transformer (Vq32ToVq8192)")
-        else:
-            print("Autoregressive Transformer (Vq32ToVq8192) already downloaded, skipping...")
-            ar_cfg_path = "./models/vc/vevo/config/Vq32ToVq8192.json"
-            ar_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "contentstyle_modeling/Vq32ToVq8192"
-            )
-        # Download Flow Matching Transformer
-        fmt_ckpt_path = ""
-        if not downloaded_resources["fmt_Vq8192ToMels"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
             )
-            fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
             fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
-            downloaded_resources["fmt_Vq8192ToMels"] = True
-            print("Downloaded Flow Matching Transformer (Vq8192ToMels)")
-        else:
-            print("Flow Matching Transformer (Vq8192ToMels) already downloaded, skipping...")
-            fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
-            fmt_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vq8192ToMels"
-            )
-        # Download Vocoder
-        vocoder_ckpt_path = ""
-        if not downloaded_resources["vocoder"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vocoder/*"],
             )
-            vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
             vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
-            downloaded_resources["vocoder"] = True
-            print("Downloaded Vocoder")
-        else:
-            print("Vocoder already downloaded, skipping...")
-            vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
-            vocoder_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vocoder"
-            )
         # Initialize pipeline
         inference_pipeline = VevoInferencePipeline(
@@ -321,9 +408,13 @@ def get_pipeline(pipeline_type):
         )
     elif pipeline_type == "timbre":
-        # Download Content-Style Tokenizer (only needed for timbre)
-        content_style_tokenizer_ckpt_path = ""
-        if not downloaded_resources["tokenizer_vq8192"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
@@ -331,53 +422,38 @@ def get_pipeline(pipeline_type):
                 allow_patterns=["tokenizer/vq8192/*"],
             )
             content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
-            downloaded_resources["tokenizer_vq8192"] = True
-            print("Downloaded Content-Style Tokenizer (vq8192)")
-        else:
-            print("Content-Style Tokenizer (vq8192) already downloaded, skipping...")
-            content_style_tokenizer_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "tokenizer/vq8192"
-            )
-        # Download Flow Matching Transformer
-        fmt_ckpt_path = ""
-        if not downloaded_resources["fmt_Vq8192ToMels"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
             )
-            fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
             fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
-            downloaded_resources["fmt_Vq8192ToMels"] = True
-            print("Downloaded Flow Matching Transformer (Vq8192ToMels)")
-        else:
-            print("Flow Matching Transformer (Vq8192ToMels) already downloaded, skipping...")
-            fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
-            fmt_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vq8192ToMels"
-            )
-        # Download Vocoder
-        vocoder_ckpt_path = ""
-        if not downloaded_resources["vocoder"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vocoder/*"],
             )
-            vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
             vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
-            downloaded_resources["vocoder"] = True
-            print("Downloaded Vocoder")
-        else:
-            print("Vocoder already downloaded, skipping...")
-            vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
-            vocoder_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vocoder"
-            )
         # Initialize pipeline
         inference_pipeline = VevoInferencePipeline(
@@ -390,9 +466,13 @@ def get_pipeline(pipeline_type):
         )
     elif pipeline_type == "tts":
-        # Download Content-Style Tokenizer
-        content_style_tokenizer_ckpt_path = ""
-        if not downloaded_resources["tokenizer_vq8192"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
@@ -400,73 +480,54 @@ def get_pipeline(pipeline_type):
                 allow_patterns=["tokenizer/vq8192/*"],
             )
             content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
-            downloaded_resources["tokenizer_vq8192"] = True
-            print("Downloaded Content-Style Tokenizer (vq8192)")
-        else:
-            print("Content-Style Tokenizer (vq8192) already downloaded, skipping...")
-            content_style_tokenizer_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "tokenizer/vq8192"
-            )
-        # Download Autoregressive Transformer (TTS specific)
-        ar_ckpt_path = ""
-        if not downloaded_resources["ar_PhoneToVq8192"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["contentstyle_modeling/PhoneToVq8192/*"],
             )
-            ar_cfg_path = "./models/vc/vevo/config/PhoneToVq8192.json"
             ar_ckpt_path = os.path.join(local_dir, "contentstyle_modeling/PhoneToVq8192")
-            downloaded_resources["ar_PhoneToVq8192"] = True
-            print("Downloaded Autoregressive Transformer (PhoneToVq8192)")
-        else:
-            print("Autoregressive Transformer (PhoneToVq8192) already downloaded, skipping...")
-            ar_cfg_path = "./models/vc/vevo/config/PhoneToVq8192.json"
-            ar_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "contentstyle_modeling/PhoneToVq8192"
-            )
-        # Download Flow Matching Transformer
-        fmt_ckpt_path = ""
-        if not downloaded_resources["fmt_Vq8192ToMels"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
             )
-            fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
             fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
-            downloaded_resources["fmt_Vq8192ToMels"] = True
-            print("Downloaded Flow Matching Transformer (Vq8192ToMels)")
-        else:
-            print("Flow Matching Transformer (Vq8192ToMels) already downloaded, skipping...")
-            fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
-            fmt_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vq8192ToMels"
-            )
-        # Download Vocoder
-        vocoder_ckpt_path = ""
-        if not downloaded_resources["vocoder"]:
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vocoder/*"],
             )
-            vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
             vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
-            downloaded_resources["vocoder"] = True
-            print("Downloaded Vocoder")
-        else:
-            print("Vocoder already downloaded, skipping...")
-            vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
-            vocoder_ckpt_path = os.path.join(
-                "./ckpts/Vevo/snapshots/amphion/Vevo", "acoustic_modeling/Vocoder"
-            )
         # Initialize pipeline
         inference_pipeline = VevoInferencePipeline(
@@ -895,98 +956,6 @@ def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_langua
         traceback.print_exc()
         raise e
-# 在程序启动时下载所有需要的模型资源
-# Download all necessary model resources at startup
-def preload_all_resources():
-    print("Preloading all model resources...")
-    # 下载配置文件
-    # Download configuration files
-    setup_configs()
-    # 下载Content Tokenizer (vq32)
-    # Download Content Tokenizer (vq32)
-    if not downloaded_resources["tokenizer_vq32"]:
-        print("Preloading Content Tokenizer (vq32)...")
-        local_dir = snapshot_download(
-            repo_id="amphion/Vevo",
-            repo_type="model",
-            cache_dir="./ckpts/Vevo",
-            allow_patterns=["tokenizer/vq32/*"],
-        )
-        downloaded_resources["tokenizer_vq32"] = True
-        print("Content Tokenizer (vq32) download completed")
-    # 下载Content-Style Tokenizer (vq8192)
-    # Download Content-Style Tokenizer (vq8192)
-    if not downloaded_resources["tokenizer_vq8192"]:
-        print("Preloading Content-Style Tokenizer (vq8192)...")
-        local_dir = snapshot_download(
-            repo_id="amphion/Vevo",
-            repo_type="model",
-            cache_dir="./ckpts/Vevo",
-            allow_patterns=["tokenizer/vq8192/*"],
-        )
-        downloaded_resources["tokenizer_vq8192"] = True
-        print("Content-Style Tokenizer (vq8192) download completed")
-    # 下载Autoregressive Transformer (Vq32ToVq8192)
-    # Download Autoregressive Transformer (Vq32ToVq8192)
-    if not downloaded_resources["ar_Vq32ToVq8192"]:
-        print("Preloading Autoregressive Transformer (Vq32ToVq8192)...")
-        local_dir = snapshot_download(
-            repo_id="amphion/Vevo",
-            repo_type="model",
-            cache_dir="./ckpts/Vevo",
-            allow_patterns=["contentstyle_modeling/Vq32ToVq8192/*"],
-        )
-        downloaded_resources["ar_Vq32ToVq8192"] = True
-        print("Autoregressive Transformer (Vq32ToVq8192) download completed")
-    # 下载Autoregressive Transformer (PhoneToVq8192)
-    # Download Autoregressive Transformer (PhoneToVq8192)
-    if not downloaded_resources["ar_PhoneToVq8192"]:
-        print("Preloading Autoregressive Transformer (PhoneToVq8192)...")
-        local_dir = snapshot_download(
-            repo_id="amphion/Vevo",
-            repo_type="model",
-            cache_dir="./ckpts/Vevo",
-            allow_patterns=["contentstyle_modeling/PhoneToVq8192/*"],
-        )
-        downloaded_resources["ar_PhoneToVq8192"] = True
-        print("Autoregressive Transformer (PhoneToVq8192) download completed")
-    # 下载Flow Matching Transformer
-    # Download Flow Matching Transformer
-    if not downloaded_resources["fmt_Vq8192ToMels"]:
-        print("Preloading Flow Matching Transformer (Vq8192ToMels)...")
-        local_dir = snapshot_download(
-            repo_id="amphion/Vevo",
-            repo_type="model",
-            cache_dir="./ckpts/Vevo",
-            allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
-        )
-        downloaded_resources["fmt_Vq8192ToMels"] = True
-        print("Flow Matching Transformer (Vq8192ToMels) download completed")
-    # 下载Vocoder
-    # Download Vocoder
-    if not downloaded_resources["vocoder"]:
-        print("Preloading Vocoder...")
-        local_dir = snapshot_download(
-            repo_id="amphion/Vevo",
-            repo_type="model",
-            cache_dir="./ckpts/Vevo",
-            allow_patterns=["acoustic_modeling/Vocoder/*"],
-        )
-        downloaded_resources["vocoder"] = True
-        print("Vocoder download completed")
-    print("All model resources preloading completed!")
-# 在创建Gradio界面之前预加载所有资源
-# Preload all resources before creating the Gradio interface
-preload_all_resources()
 # Create Gradio interface
 with gr.Blocks(title="Vevo: Controllable Zero-Shot Voice Imitation with Self-Supervised Disentanglement") as demo:
     gr.Markdown("# Vevo: Controllable Zero-Shot Voice Imitation with Self-Supervised Disentanglement")

 # Initialize pipeline dictionary
 inference_pipelines = {}
+# Download all necessary model resources at startup
+def preload_all_resources():
+    print("Preloading all model resources...")
+    # Download configuration files
+    setup_configs()
+    # Store the downloaded model paths
+    global downloaded_content_tokenizer_path
+    global downloaded_content_style_tokenizer_path
+    global downloaded_ar_vq32_path
+    global downloaded_ar_phone_path
+    global downloaded_fmt_path
+    global downloaded_vocoder_path
+    # Download Content Tokenizer (vq32)
+    if not downloaded_resources["tokenizer_vq32"]:
+        print("Preloading Content Tokenizer (vq32)...")
+        local_dir = snapshot_download(
+            repo_id="amphion/Vevo",
+            repo_type="model",
+            cache_dir="./ckpts/Vevo",
+            allow_patterns=["tokenizer/vq32/*"],
+        )
+        downloaded_content_tokenizer_path = local_dir
+        downloaded_resources["tokenizer_vq32"] = True
+        print("Content Tokenizer (vq32) download completed")
+    # Download Content-Style Tokenizer (vq8192)
+    if not downloaded_resources["tokenizer_vq8192"]:
+        print("Preloading Content-Style Tokenizer (vq8192)...")
+        local_dir = snapshot_download(
+            repo_id="amphion/Vevo",
+            repo_type="model",
+            cache_dir="./ckpts/Vevo",
+            allow_patterns=["tokenizer/vq8192/*"],
+        )
+        downloaded_content_style_tokenizer_path = local_dir
+        downloaded_resources["tokenizer_vq8192"] = True
+        print("Content-Style Tokenizer (vq8192) download completed")
+    # Download Autoregressive Transformer (Vq32ToVq8192)
+    if not downloaded_resources["ar_Vq32ToVq8192"]:
+        print("Preloading Autoregressive Transformer (Vq32ToVq8192)...")
+        local_dir = snapshot_download(
+            repo_id="amphion/Vevo",
+            repo_type="model",
+            cache_dir="./ckpts/Vevo",
+            allow_patterns=["contentstyle_modeling/Vq32ToVq8192/*"],
+        )
+        downloaded_ar_vq32_path = local_dir
+        downloaded_resources["ar_Vq32ToVq8192"] = True
+        print("Autoregressive Transformer (Vq32ToVq8192) download completed")
+    # Download Autoregressive Transformer (PhoneToVq8192)
+    if not downloaded_resources["ar_PhoneToVq8192"]:
+        print("Preloading Autoregressive Transformer (PhoneToVq8192)...")
+        local_dir = snapshot_download(
+            repo_id="amphion/Vevo",
+            repo_type="model",
+            cache_dir="./ckpts/Vevo",
+            allow_patterns=["contentstyle_modeling/PhoneToVq8192/*"],
+        )
+        downloaded_ar_phone_path = local_dir
+        downloaded_resources["ar_PhoneToVq8192"] = True
+        print("Autoregressive Transformer (PhoneToVq8192) download completed")
+    # Download Flow Matching Transformer
+    if not downloaded_resources["fmt_Vq8192ToMels"]:
+        print("Preloading Flow Matching Transformer (Vq8192ToMels)...")
+        local_dir = snapshot_download(
+            repo_id="amphion/Vevo",
+            repo_type="model",
+            cache_dir="./ckpts/Vevo",
+            allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
+        )
+        downloaded_fmt_path = local_dir
+        downloaded_resources["fmt_Vq8192ToMels"] = True
+        print("Flow Matching Transformer (Vq8192ToMels) download completed")
+    # Download Vocoder
+    if not downloaded_resources["vocoder"]:
+        print("Preloading Vocoder...")
+        local_dir = snapshot_download(
+            repo_id="amphion/Vevo",
+            repo_type="model",
+            cache_dir="./ckpts/Vevo",
+            allow_patterns=["acoustic_modeling/Vocoder/*"],
+        )
+        downloaded_vocoder_path = local_dir
+        downloaded_resources["vocoder"] = True
+        print("Vocoder download completed")
+    print("All model resources preloading completed!")
+# Initialize path variables to store downloaded model paths
+downloaded_content_tokenizer_path = None
+downloaded_content_style_tokenizer_path = None
+downloaded_ar_vq32_path = None
+downloaded_ar_phone_path = None
+downloaded_fmt_path = None
+downloaded_vocoder_path = None
+# Preload all resources before creating the Gradio interface
+preload_all_resources()
 def get_pipeline(pipeline_type):
     if pipeline_type in inference_pipelines:
         return inference_pipelines[pipeline_type]
     # Initialize pipeline based on the required pipeline type
     if pipeline_type == "style" or pipeline_type == "voice":
+        # Use already downloaded Content Tokenizer
+        if downloaded_resources["tokenizer_vq32"]:
+            content_tokenizer_ckpt_path = os.path.join(
+                downloaded_content_tokenizer_path, "tokenizer/vq32/hubert_large_l18_c32.pkl"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
             content_tokenizer_ckpt_path = os.path.join(
                 local_dir, "tokenizer/vq32/hubert_large_l18_c32.pkl"
             )
+        # Use already downloaded Content-Style Tokenizer
+        if downloaded_resources["tokenizer_vq8192"]:
+            content_style_tokenizer_ckpt_path = os.path.join(
+                downloaded_content_style_tokenizer_path, "tokenizer/vq8192"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 allow_patterns=["tokenizer/vq8192/*"],
             )
             content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
+        # Use already downloaded Autoregressive Transformer
+        ar_cfg_path = "./models/vc/vevo/config/Vq32ToVq8192.json"
+        if downloaded_resources["ar_Vq32ToVq8192"]:
+            ar_ckpt_path = os.path.join(
+                downloaded_ar_vq32_path, "contentstyle_modeling/Vq32ToVq8192"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["contentstyle_modeling/Vq32ToVq8192/*"],
             )
             ar_ckpt_path = os.path.join(local_dir, "contentstyle_modeling/Vq32ToVq8192")
+        # Use already downloaded Flow Matching Transformer
+        fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
+        if downloaded_resources["fmt_Vq8192ToMels"]:
+            fmt_ckpt_path = os.path.join(
+                downloaded_fmt_path, "acoustic_modeling/Vq8192ToMels"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
             )
             fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
+        # Use already downloaded Vocoder
+        vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
+        if downloaded_resources["vocoder"]:
+            vocoder_ckpt_path = os.path.join(
+                downloaded_vocoder_path, "acoustic_modeling/Vocoder"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vocoder/*"],
             )
             vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
         # Initialize pipeline
         inference_pipeline = VevoInferencePipeline(
         )
     elif pipeline_type == "timbre":
+        # Use already downloaded Content-Style Tokenizer
+        if downloaded_resources["tokenizer_vq8192"]:
+            content_style_tokenizer_ckpt_path = os.path.join(
+                downloaded_content_style_tokenizer_path, "tokenizer/vq8192"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 allow_patterns=["tokenizer/vq8192/*"],
             )
             content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
+        # Use already downloaded Flow Matching Transformer
+        fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
+        if downloaded_resources["fmt_Vq8192ToMels"]:
+            fmt_ckpt_path = os.path.join(
+                downloaded_fmt_path, "acoustic_modeling/Vq8192ToMels"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
             )
             fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
+        # Use already downloaded Vocoder
+        vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
+        if downloaded_resources["vocoder"]:
+            vocoder_ckpt_path = os.path.join(
+                downloaded_vocoder_path, "acoustic_modeling/Vocoder"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vocoder/*"],
             )
             vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
         # Initialize pipeline
         inference_pipeline = VevoInferencePipeline(
         )
     elif pipeline_type == "tts":
+        # Use already downloaded Content-Style Tokenizer
+        if downloaded_resources["tokenizer_vq8192"]:
+            content_style_tokenizer_ckpt_path = os.path.join(
+                downloaded_content_style_tokenizer_path, "tokenizer/vq8192"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 allow_patterns=["tokenizer/vq8192/*"],
             )
             content_style_tokenizer_ckpt_path = os.path.join(local_dir, "tokenizer/vq8192")
+        # Use already downloaded Autoregressive Transformer (TTS specific)
+        ar_cfg_path = "./models/vc/vevo/config/PhoneToVq8192.json"
+        if downloaded_resources["ar_PhoneToVq8192"]:
+            ar_ckpt_path = os.path.join(
+                downloaded_ar_phone_path, "contentstyle_modeling/PhoneToVq8192"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["contentstyle_modeling/PhoneToVq8192/*"],
             )
             ar_ckpt_path = os.path.join(local_dir, "contentstyle_modeling/PhoneToVq8192")
+        # Use already downloaded Flow Matching Transformer
+        fmt_cfg_path = "./models/vc/vevo/config/Vq8192ToMels.json"
+        if downloaded_resources["fmt_Vq8192ToMels"]:
+            fmt_ckpt_path = os.path.join(
+                downloaded_fmt_path, "acoustic_modeling/Vq8192ToMels"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vq8192ToMels/*"],
             )
             fmt_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vq8192ToMels")
+        # Use already downloaded Vocoder
+        vocoder_cfg_path = "./models/vc/vevo/config/Vocoder.json"
+        if downloaded_resources["vocoder"]:
+            vocoder_ckpt_path = os.path.join(
+                downloaded_vocoder_path, "acoustic_modeling/Vocoder"
+            )
+        else:
+            # Fallback to direct download
             local_dir = snapshot_download(
                 repo_id="amphion/Vevo",
                 repo_type="model",
                 cache_dir="./ckpts/Vevo",
                 allow_patterns=["acoustic_modeling/Vocoder/*"],
             )
             vocoder_ckpt_path = os.path.join(local_dir, "acoustic_modeling/Vocoder")
         # Initialize pipeline
         inference_pipeline = VevoInferencePipeline(
         traceback.print_exc()
         raise e
 # Create Gradio interface
 with gr.Blocks(title="Vevo: Controllable Zero-Shot Voice Imitation with Self-Supervised Disentanglement") as demo:
     gr.Markdown("# Vevo: Controllable Zero-Shot Voice Imitation with Self-Supervised Disentanglement")