Spaces:

thomwolf
/

Mimi-playground

Running on Zero

thomwolf HF Staff commited on 7 days ago

Commit

c39a9ba

1 Parent(s): 0c8c55f

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import gradio as gr
-import random
 import time
 from huggingface_hub import hf_hub_download
@@ -40,22 +39,10 @@ def mimi_streaming_test(input_wave, max_duration_sec=10.0):
     sample_pcm = sample_pcm[None].to(device=device)
     print("streaming encoding...")
-    start_time = time.time()
-    all_codes = []
-    def run_loop():
-        for start_idx in range(0, sample_pcm.shape[-1], pcm_chunk_size):
-            end_idx = min(sample_pcm.shape[-1], start_idx + pcm_chunk_size)
-            chunk = sample_pcm[..., start_idx:end_idx]
-            with torch.no_grad():
-                codes = mimi.encode(chunk)
-            if codes.shape[-1]:
-                print(start_idx, codes.shape, end="\r")
-                all_codes.append(codes)
-    run_loop()
-    all_codes_th = torch.cat(all_codes, dim=-1)
-    print(f"codes {all_codes_th.shape} generated in {time.time() - start_time:.2f}s")
     all_codes_list = [all_codes_th[:, :1, :],
                         all_codes_th[:, :2, :],
@@ -82,7 +69,7 @@ demo = gr.Interface(
             #  gr.Audio(type="numpy", label="With 8 codebooks"),
             #  gr.Audio(type="numpy", label="With 16 codebooks"),
              gr.Audio(type="numpy", label="With 32 codebooks")],
-    examples= [["hello.mp3"]],
     title="Mimi tokenizer playground",
     description="Explore the quality of compression when using various number of code books in the Mimi model."
     )

 import gradio as gr
 import time
 from huggingface_hub import hf_hub_download
     sample_pcm = sample_pcm[None].to(device=device)
     print("streaming encoding...")
+    with torch.no_grad():
+        all_codes_th = mimi.encode(sample_pcm)
+    print(f"codes {all_codes_th.shape}")
     all_codes_list = [all_codes_th[:, :1, :],
                         all_codes_th[:, :2, :],
             #  gr.Audio(type="numpy", label="With 8 codebooks"),
             #  gr.Audio(type="numpy", label="With 16 codebooks"),
              gr.Audio(type="numpy", label="With 32 codebooks")],
+    examples= [["./hello.mp3"]],
     title="Mimi tokenizer playground",
     description="Explore the quality of compression when using various number of code books in the Mimi model."
     )