|
import gradio as gr |
|
import torch |
|
import cv2 |
|
from PIL import Image |
|
from auto_gptq import AutoGPTQForCausalLM |
|
from transformers import AutoTokenizer |
|
from processing_qwen_vl import QWenVLProcessor |
|
import os |
|
|
|
model_id = "Qwen/Qwen-VL-Chat-Int4" |
|
|
|
processor = QWenVLProcessor.from_pretrained(model_id, trust_remote_code=True) |
|
|
|
model = AutoGPTQForCausalLM.from_quantized( |
|
model_id, |
|
device="cuda" if torch.cuda.is_available() else "cpu", |
|
trust_remote_code=True |
|
).eval() |
|
|
|
def capture_photo(filename="sitting.jpg"): |
|
cap = cv2.VideoCapture(0) |
|
ret, frame = cap.read() |
|
cap.release() |
|
if ret: |
|
cv2.imwrite(filename, frame) |
|
return filename |
|
return None |
|
|
|
def speak_text(text, lang="zh"): |
|
voice = "zh-CN-XiaoxiaoNeural" if lang == "zh" else "en-US-AriaNeural" |
|
os.system(f'edge-tts --text "{text}" --voice "{voice}" --write-media output.mp3') |
|
os.system('start output.mp3' if os.name == 'nt' else 'afplay output.mp3') |
|
|
|
def analyze_posture(image=None, auto_capture=False): |
|
if auto_capture: |
|
image_path = capture_photo() |
|
if image_path is None: |
|
return "β η‘ζ³εεζει ", None |
|
image = Image.open(image_path) |
|
elif image is None: |
|
return "β θ«δΈε³εηζεη¨θͺεζη
§", None |
|
|
|
question = "θ«ε€ζ·ιεδΊΊζ―ε¦εε§ΏδΈθ―οΌε¦ι§θγεεΎζζͺζοΌη¨δΈθ±ζεηγ" |
|
inputs = processor(text=question, images=image, return_tensors="pt").to(model.device) |
|
outputs = model.generate(**inputs, max_new_tokens=512) |
|
answer = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip() |
|
|
|
if "θ«" in answer: |
|
speak_text(answer, lang="zh") |
|
if "please" in answer.lower(): |
|
speak_text(answer, lang="en") |
|
|
|
return answer, image |
|
|
|
def run_auto_capture(): |
|
return analyze_posture(auto_capture=True) |
|
|
|
with gr.Blocks(title="ι§θθε₯ε©ζ") as demo: |
|
gr.Markdown("## πͺ Qwen-VL-Chat-Int4 ι§θθε₯ Demo") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
auto_btn = gr.Button("π· θͺεζει ζη
§δΈ¦ε€ζ·") |
|
image_input = gr.Image(type="pil", label="ζζεδΈε³εη") |
|
submit_btn = gr.Button("π€ δΈε³δΈ¦ε€ζ·") |
|
with gr.Column(): |
|
output_text = gr.Textbox(label="π§ 樑εε€ζ·η΅ζ", lines=6) |
|
output_image = gr.Image(type="pil", label="εζεη") |
|
|
|
auto_btn.click(fn=run_auto_capture, outputs=[output_text, output_image]) |
|
submit_btn.click(fn=analyze_posture, inputs=[image_input], outputs=[output_text, output_image]) |
|
|
|
demo.launch(share=True) |