Spaces:

Didier
/

Vision_Language_SmolVLM2

Running on Zero

Vision_Language_SmolVLM2 / module_vision.py

Update module_vision.py

9f10199 verified 25 days ago

1.01 kB

	"""
	File: module_chat.py
	Description: A module for chat using video/image + text with a multimodal interface.
	Author: Didier Guillevic
	Date: 2025-04-02
	"""

	import gradio as gr
	import vlm

	def process(message, history):
	"""Generate the model response given message and history
	"""
	messages = vlm.build_messages(message, history)
	yield from vlm.stream_response(messages)

	examples=[
	[{"text": "What is happening in the video?", "files": ["Usain_Bolt_floats_to_victory.mp4"]}],
	[{"text": "Pourrais-tu décrire cette image?", "files": ["le_monde_2025-04-01.jpg"]}],
	[{"text": "Could you descrive the video?", "files": ["threads_brittlestar_post_DIABZcnJ.mp4"]}],
	]

	#
	# User interface
	#
	with gr.Blocks() as demo:
	chat_interface = gr.ChatInterface(
	fn=process,
	description="Chat with text / text+image / text+video.",
	examples=examples,
	cache_examples=False,
	stop_btn="Stop Generation",
	multimodal=True,
	type="messages"
	)