chenjoya commited on
Commit
05b1e58
·
verified ·
1 Parent(s): 5207817

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -34
app.py CHANGED
@@ -6,9 +6,9 @@ if hf_spaces:
6
  except Exception as e:
7
  print(e)
8
  import os
 
9
  import gradio as gr
10
 
11
- from kokoro import KPipeline
12
  from demo.infer import LiveCCDemoInfer
13
 
14
  class GradioBackend:
@@ -20,7 +20,6 @@ class GradioBackend:
20
  }
21
  def __init__(self, model_path: str = 'chenjoya/LiveCC-7B-Instruct'):
22
  self.infer = LiveCCDemoInfer(model_path)
23
- self.audio_pipeline = KPipeline(lang_code='a')
24
 
25
  def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
26
  return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
@@ -33,7 +32,7 @@ with gr.Blocks() as demo:
33
  gr.Markdown("1️⃣ Select Mode, Real-Time Commentary (LiveCC) or Conversation (Common QA)")
34
  gr.Markdown("2️⃣🅰️ **Real-Time Commentary: Input a query (optional) -> Click or upload a video**.")
35
  gr.Markdown("2️⃣🅱️ **Conversation: Click or upload a video -> Input a query**. But as the past_key_values support in ZeroGPU is not good, multi-turn conversation could be slower.")
36
- gr.Markdown("*Web Gradio has unexpected latency (3s~5s). If you want to enjoy the very real-time experience, please deploy locally https://github.com/showlab/livecc*")
37
  gr_state = gr.State({}, render=False) # control all useful state, including kv cache
38
  gr_video_state = gr.JSON({}, visible=False) # only record video state, belong to gr_state but lightweight
39
  gr_static_trigger = gr.Number(value=0, visible=False) # control start streaming or stop
@@ -53,7 +52,10 @@ with gr.Blocks() as demo:
53
  gr_examples = gr.Examples(
54
  examples=[
55
  'demo/sources/howto_fix_laptop_mute_1080p.mp4',
56
- 'demo/sources/writing_mute_1080p.mp4'
 
 
 
57
  ],
58
  inputs=[gr_video],
59
  )
@@ -76,22 +78,29 @@ with gr.Blocks() as demo:
76
  response, state = gradio_backend(message=message, history=history, state=state, mode=mode, hf_spaces=hf_spaces)
77
  yield response, state
78
 
79
- def gr_chatinterface_chatbot_clear_fn():
80
- return {}, {}, 0, 0
81
  gr_chatinterface = gr.ChatInterface(
82
  fn=gr_chatinterface_fn,
83
  type="messages",
84
  additional_inputs=[gr_state, gr_video, gr_radio_mode],
85
  additional_outputs=[gr_state]
86
  )
87
- gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
88
- gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
89
-
 
90
  def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
91
  if static_trigger == 0:
92
  yield [], {}, dynamic_trigger
93
  return
94
- yield history + [gr.ChatMessage(role="assistant", content='Loading video... thanks for waiting...')], state, dynamic_trigger
 
 
 
 
 
 
95
  if not js_monitor:
96
  video_state['video_timestamp'] = 19260817 # 👓
97
  state.update(video_state)
@@ -141,6 +150,7 @@ with gr.Blocks() as demo:
141
  inputs=[gr_radio_mode],
142
  outputs=[gr_static_trigger, gr_dynamic_trigger]
143
  )
 
144
  gr_dynamic_trigger.change(
145
  fn=gr_get_video_state,
146
  inputs=[gr_video_state],
@@ -151,28 +161,6 @@ with gr.Blocks() as demo:
151
  inputs=[gr_chatinterface.chatbot, gr_video_state, gr_state, gr_radio_mode, gr_static_trigger, gr_dynamic_trigger],
152
  outputs=[gr_chatinterface.chatbot, gr_state, gr_dynamic_trigger],
153
  )
154
-
155
  demo.queue(max_size=5, default_concurrency_limit=5)
156
- demo.launch(share=True)
157
-
158
-
159
- # --- for streaming ---
160
-
161
- # gr_tts = gr.Audio(visible=False, elem_id="gr_tts", streaming=True, autoplay=True)
162
- # def tts():
163
- # while True:
164
- # contents = ''
165
- # while not gradio_backend.contents.empty():
166
- # content = gradio_backend.contents.get()
167
- # contents += ' ' + content.rstrip(' ...')
168
- # contents = contents.strip()
169
- # if contents:
170
- # generator = gradio_backend.audio_pipeline(contents, voice='af_heart', speed=1.2)
171
- # for _, _, audio_torch in generator:
172
- # audio_np = audio_torch.cpu().numpy()
173
- # max_val = np.max(np.abs(audio_np))
174
- # if max_val > 0:
175
- # audio_np = audio_np / max_val
176
- # audio_int16 = (audio_np * 32767).astype(np.int16)
177
- # yield (24000, audio_int16)
178
- # gr_video.change(fn=tts, outputs=[gr_tts])
 
6
  except Exception as e:
7
  print(e)
8
  import os
9
+ import numpy as np
10
  import gradio as gr
11
 
 
12
  from demo.infer import LiveCCDemoInfer
13
 
14
  class GradioBackend:
 
20
  }
21
  def __init__(self, model_path: str = 'chenjoya/LiveCC-7B-Instruct'):
22
  self.infer = LiveCCDemoInfer(model_path)
 
23
 
24
  def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
25
  return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
 
32
  gr.Markdown("1️⃣ Select Mode, Real-Time Commentary (LiveCC) or Conversation (Common QA)")
33
  gr.Markdown("2️⃣🅰️ **Real-Time Commentary: Input a query (optional) -> Click or upload a video**.")
34
  gr.Markdown("2️⃣🅱️ **Conversation: Click or upload a video -> Input a query**. But as the past_key_values support in ZeroGPU is not good, multi-turn conversation could be slower.")
35
+ gr.Markdown("*HF Space Gradio has unsolvable latency (10s~20s). If you want to enjoy the very real-time experience, please deploy locally https://github.com/showlab/livecc*")
36
  gr_state = gr.State({}, render=False) # control all useful state, including kv cache
37
  gr_video_state = gr.JSON({}, visible=False) # only record video state, belong to gr_state but lightweight
38
  gr_static_trigger = gr.Number(value=0, visible=False) # control start streaming or stop
 
52
  gr_examples = gr.Examples(
53
  examples=[
54
  'demo/sources/howto_fix_laptop_mute_1080p.mp4',
55
+ 'demo/sources/writing_mute_1080p.mp4',
56
+ 'demo/sources/spacex_falcon9_mute_1080p.mp4',
57
+ 'demo/sources/warriors_vs_rockets_2025wcr1_mute_1080p.mp4',
58
+ 'demo/sources/dota2_facelessvoid_mute_1080p.mp4'
59
  ],
60
  inputs=[gr_video],
61
  )
 
78
  response, state = gradio_backend(message=message, history=history, state=state, mode=mode, hf_spaces=hf_spaces)
79
  yield response, state
80
 
81
+ def gr_chatinterface_chatbot_clear_fn(gr_dynamic_trigger):
82
+ return {}, {}, 0, gr_dynamic_trigger
83
  gr_chatinterface = gr.ChatInterface(
84
  fn=gr_chatinterface_fn,
85
  type="messages",
86
  additional_inputs=[gr_state, gr_video, gr_radio_mode],
87
  additional_outputs=[gr_state]
88
  )
89
+ gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, inputs=[gr_dynamic_trigger], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
90
+ gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], inputs=[gr_dynamic_trigger], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
91
+
92
+ @spaces.GPU
93
  def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
94
  if static_trigger == 0:
95
  yield [], {}, dynamic_trigger
96
  return
97
+ global gradio_backend
98
+ if gradio_backend is None:
99
+ yield '(ZeroGPU needs to initialize model under @spaces.GPU, thanks for waiting...)', state
100
+ gradio_backend = GradioBackend()
101
+ yield '(finished initialization, responding...)', state
102
+ waiting_prompt = 'Loading video now... thanks for waiting...'
103
+ yield history + [gr.ChatMessage(role="assistant", content=waiting_prompt)], state, dynamic_trigger
104
  if not js_monitor:
105
  video_state['video_timestamp'] = 19260817 # 👓
106
  state.update(video_state)
 
150
  inputs=[gr_radio_mode],
151
  outputs=[gr_static_trigger, gr_dynamic_trigger]
152
  )
153
+
154
  gr_dynamic_trigger.change(
155
  fn=gr_get_video_state,
156
  inputs=[gr_video_state],
 
161
  inputs=[gr_chatinterface.chatbot, gr_video_state, gr_state, gr_radio_mode, gr_static_trigger, gr_dynamic_trigger],
162
  outputs=[gr_chatinterface.chatbot, gr_state, gr_dynamic_trigger],
163
  )
164
+
165
  demo.queue(max_size=5, default_concurrency_limit=5)
166
+ demo.launch(share=True)