Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -6,9 +6,9 @@ if hf_spaces:
|
|
6 |
except Exception as e:
|
7 |
print(e)
|
8 |
import os
|
|
|
9 |
import gradio as gr
|
10 |
|
11 |
-
from kokoro import KPipeline
|
12 |
from demo.infer import LiveCCDemoInfer
|
13 |
|
14 |
class GradioBackend:
|
@@ -20,7 +20,6 @@ class GradioBackend:
|
|
20 |
}
|
21 |
def __init__(self, model_path: str = 'chenjoya/LiveCC-7B-Instruct'):
|
22 |
self.infer = LiveCCDemoInfer(model_path)
|
23 |
-
self.audio_pipeline = KPipeline(lang_code='a')
|
24 |
|
25 |
def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
|
26 |
return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
|
@@ -33,7 +32,7 @@ with gr.Blocks() as demo:
|
|
33 |
gr.Markdown("1️⃣ Select Mode, Real-Time Commentary (LiveCC) or Conversation (Common QA)")
|
34 |
gr.Markdown("2️⃣🅰️ **Real-Time Commentary: Input a query (optional) -> Click or upload a video**.")
|
35 |
gr.Markdown("2️⃣🅱️ **Conversation: Click or upload a video -> Input a query**. But as the past_key_values support in ZeroGPU is not good, multi-turn conversation could be slower.")
|
36 |
-
gr.Markdown("*
|
37 |
gr_state = gr.State({}, render=False) # control all useful state, including kv cache
|
38 |
gr_video_state = gr.JSON({}, visible=False) # only record video state, belong to gr_state but lightweight
|
39 |
gr_static_trigger = gr.Number(value=0, visible=False) # control start streaming or stop
|
@@ -53,7 +52,10 @@ with gr.Blocks() as demo:
|
|
53 |
gr_examples = gr.Examples(
|
54 |
examples=[
|
55 |
'demo/sources/howto_fix_laptop_mute_1080p.mp4',
|
56 |
-
'demo/sources/writing_mute_1080p.mp4'
|
|
|
|
|
|
|
57 |
],
|
58 |
inputs=[gr_video],
|
59 |
)
|
@@ -76,22 +78,29 @@ with gr.Blocks() as demo:
|
|
76 |
response, state = gradio_backend(message=message, history=history, state=state, mode=mode, hf_spaces=hf_spaces)
|
77 |
yield response, state
|
78 |
|
79 |
-
def gr_chatinterface_chatbot_clear_fn():
|
80 |
-
return {}, {}, 0,
|
81 |
gr_chatinterface = gr.ChatInterface(
|
82 |
fn=gr_chatinterface_fn,
|
83 |
type="messages",
|
84 |
additional_inputs=[gr_state, gr_video, gr_radio_mode],
|
85 |
additional_outputs=[gr_state]
|
86 |
)
|
87 |
-
gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
88 |
-
gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
89 |
-
|
|
|
90 |
def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
|
91 |
if static_trigger == 0:
|
92 |
yield [], {}, dynamic_trigger
|
93 |
return
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
if not js_monitor:
|
96 |
video_state['video_timestamp'] = 19260817 # 👓
|
97 |
state.update(video_state)
|
@@ -141,6 +150,7 @@ with gr.Blocks() as demo:
|
|
141 |
inputs=[gr_radio_mode],
|
142 |
outputs=[gr_static_trigger, gr_dynamic_trigger]
|
143 |
)
|
|
|
144 |
gr_dynamic_trigger.change(
|
145 |
fn=gr_get_video_state,
|
146 |
inputs=[gr_video_state],
|
@@ -151,28 +161,6 @@ with gr.Blocks() as demo:
|
|
151 |
inputs=[gr_chatinterface.chatbot, gr_video_state, gr_state, gr_radio_mode, gr_static_trigger, gr_dynamic_trigger],
|
152 |
outputs=[gr_chatinterface.chatbot, gr_state, gr_dynamic_trigger],
|
153 |
)
|
154 |
-
|
155 |
demo.queue(max_size=5, default_concurrency_limit=5)
|
156 |
-
demo.launch(share=True)
|
157 |
-
|
158 |
-
|
159 |
-
# --- for streaming ---
|
160 |
-
|
161 |
-
# gr_tts = gr.Audio(visible=False, elem_id="gr_tts", streaming=True, autoplay=True)
|
162 |
-
# def tts():
|
163 |
-
# while True:
|
164 |
-
# contents = ''
|
165 |
-
# while not gradio_backend.contents.empty():
|
166 |
-
# content = gradio_backend.contents.get()
|
167 |
-
# contents += ' ' + content.rstrip(' ...')
|
168 |
-
# contents = contents.strip()
|
169 |
-
# if contents:
|
170 |
-
# generator = gradio_backend.audio_pipeline(contents, voice='af_heart', speed=1.2)
|
171 |
-
# for _, _, audio_torch in generator:
|
172 |
-
# audio_np = audio_torch.cpu().numpy()
|
173 |
-
# max_val = np.max(np.abs(audio_np))
|
174 |
-
# if max_val > 0:
|
175 |
-
# audio_np = audio_np / max_val
|
176 |
-
# audio_int16 = (audio_np * 32767).astype(np.int16)
|
177 |
-
# yield (24000, audio_int16)
|
178 |
-
# gr_video.change(fn=tts, outputs=[gr_tts])
|
|
|
6 |
except Exception as e:
|
7 |
print(e)
|
8 |
import os
|
9 |
+
import numpy as np
|
10 |
import gradio as gr
|
11 |
|
|
|
12 |
from demo.infer import LiveCCDemoInfer
|
13 |
|
14 |
class GradioBackend:
|
|
|
20 |
}
|
21 |
def __init__(self, model_path: str = 'chenjoya/LiveCC-7B-Instruct'):
|
22 |
self.infer = LiveCCDemoInfer(model_path)
|
|
|
23 |
|
24 |
def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
|
25 |
return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
|
|
|
32 |
gr.Markdown("1️⃣ Select Mode, Real-Time Commentary (LiveCC) or Conversation (Common QA)")
|
33 |
gr.Markdown("2️⃣🅰️ **Real-Time Commentary: Input a query (optional) -> Click or upload a video**.")
|
34 |
gr.Markdown("2️⃣🅱️ **Conversation: Click or upload a video -> Input a query**. But as the past_key_values support in ZeroGPU is not good, multi-turn conversation could be slower.")
|
35 |
+
gr.Markdown("*HF Space Gradio has unsolvable latency (10s~20s). If you want to enjoy the very real-time experience, please deploy locally https://github.com/showlab/livecc*")
|
36 |
gr_state = gr.State({}, render=False) # control all useful state, including kv cache
|
37 |
gr_video_state = gr.JSON({}, visible=False) # only record video state, belong to gr_state but lightweight
|
38 |
gr_static_trigger = gr.Number(value=0, visible=False) # control start streaming or stop
|
|
|
52 |
gr_examples = gr.Examples(
|
53 |
examples=[
|
54 |
'demo/sources/howto_fix_laptop_mute_1080p.mp4',
|
55 |
+
'demo/sources/writing_mute_1080p.mp4',
|
56 |
+
'demo/sources/spacex_falcon9_mute_1080p.mp4',
|
57 |
+
'demo/sources/warriors_vs_rockets_2025wcr1_mute_1080p.mp4',
|
58 |
+
'demo/sources/dota2_facelessvoid_mute_1080p.mp4'
|
59 |
],
|
60 |
inputs=[gr_video],
|
61 |
)
|
|
|
78 |
response, state = gradio_backend(message=message, history=history, state=state, mode=mode, hf_spaces=hf_spaces)
|
79 |
yield response, state
|
80 |
|
81 |
+
def gr_chatinterface_chatbot_clear_fn(gr_dynamic_trigger):
|
82 |
+
return {}, {}, 0, gr_dynamic_trigger
|
83 |
gr_chatinterface = gr.ChatInterface(
|
84 |
fn=gr_chatinterface_fn,
|
85 |
type="messages",
|
86 |
additional_inputs=[gr_state, gr_video, gr_radio_mode],
|
87 |
additional_outputs=[gr_state]
|
88 |
)
|
89 |
+
gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, inputs=[gr_dynamic_trigger], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
90 |
+
gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], inputs=[gr_dynamic_trigger], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
91 |
+
|
92 |
+
@spaces.GPU
|
93 |
def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
|
94 |
if static_trigger == 0:
|
95 |
yield [], {}, dynamic_trigger
|
96 |
return
|
97 |
+
global gradio_backend
|
98 |
+
if gradio_backend is None:
|
99 |
+
yield '(ZeroGPU needs to initialize model under @spaces.GPU, thanks for waiting...)', state
|
100 |
+
gradio_backend = GradioBackend()
|
101 |
+
yield '(finished initialization, responding...)', state
|
102 |
+
waiting_prompt = 'Loading video now... thanks for waiting...'
|
103 |
+
yield history + [gr.ChatMessage(role="assistant", content=waiting_prompt)], state, dynamic_trigger
|
104 |
if not js_monitor:
|
105 |
video_state['video_timestamp'] = 19260817 # 👓
|
106 |
state.update(video_state)
|
|
|
150 |
inputs=[gr_radio_mode],
|
151 |
outputs=[gr_static_trigger, gr_dynamic_trigger]
|
152 |
)
|
153 |
+
|
154 |
gr_dynamic_trigger.change(
|
155 |
fn=gr_get_video_state,
|
156 |
inputs=[gr_video_state],
|
|
|
161 |
inputs=[gr_chatinterface.chatbot, gr_video_state, gr_state, gr_radio_mode, gr_static_trigger, gr_dynamic_trigger],
|
162 |
outputs=[gr_chatinterface.chatbot, gr_state, gr_dynamic_trigger],
|
163 |
)
|
164 |
+
|
165 |
demo.queue(max_size=5, default_concurrency_limit=5)
|
166 |
+
demo.launch(share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|