TEST-02 / app.py
taybeyond's picture
Upload 3 files
c132fb4 verified
import gradio as gr
import torch
import cv2
from PIL import Image
from auto_gptq import AutoGPTQForCausalLM
from transformers import AutoTokenizer
from processing_qwen_vl import QWenVLProcessor
import os
model_id = "Qwen/Qwen-VL-Chat-Int4"
processor = QWenVLProcessor.from_pretrained(model_id, trust_remote_code=True)
model = AutoGPTQForCausalLM.from_quantized(
model_id,
device="cuda" if torch.cuda.is_available() else "cpu",
trust_remote_code=True
).eval()
def capture_photo(filename="sitting.jpg"):
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
cap.release()
if ret:
cv2.imwrite(filename, frame)
return filename
return None
def speak_text(text, lang="zh"):
voice = "zh-CN-XiaoxiaoNeural" if lang == "zh" else "en-US-AriaNeural"
os.system(f'edge-tts --text "{text}" --voice "{voice}" --write-media output.mp3')
os.system('start output.mp3' if os.name == 'nt' else 'afplay output.mp3')
def analyze_posture(image=None, auto_capture=False):
if auto_capture:
image_path = capture_photo()
if image_path is None:
return "❌ η„‘ζ³•ε•Ÿε‹•ζ”εƒι ­", None
image = Image.open(image_path)
elif image is None:
return "❌ θ«‹δΈŠε‚³εœ–η‰‡ζˆ–ε•Ÿη”¨θ‡ͺ動拍照", None
question = "θ«‹εˆ€ζ–·ι€™ε€‹δΊΊζ˜―ε¦εε§ΏδΈθ‰―οΌŒε¦‚ι§θƒŒγ€ε‰ε‚Ύζˆ–ζ­ͺζ–œοΌŸη”¨δΈ­θ‹±ζ–‡ε›žη­”γ€‚"
inputs = processor(text=question, images=image, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=512)
answer = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
if "θ«‹" in answer:
speak_text(answer, lang="zh")
if "please" in answer.lower():
speak_text(answer, lang="en")
return answer, image
def run_auto_capture():
return analyze_posture(auto_capture=True)
with gr.Blocks(title="ι§θƒŒθ­˜εˆ₯εŠ©ζ‰‹") as demo:
gr.Markdown("## πŸͺ‘ Qwen-VL-Chat-Int4 ι§θƒŒθ­˜εˆ₯ Demo")
with gr.Row():
with gr.Column():
auto_btn = gr.Button("πŸ“· θ‡ͺε‹•ζ”εƒι ­ζ‹η…§δΈ¦εˆ€ζ–·")
image_input = gr.Image(type="pil", label="ζˆ–ζ‰‹ε‹•δΈŠε‚³εœ–η‰‡")
submit_btn = gr.Button("πŸ“€ δΈŠε‚³δΈ¦εˆ€ζ–·")
with gr.Column():
output_text = gr.Textbox(label="🧠 ζ¨‘εž‹εˆ€ζ–·η΅ζžœ", lines=6)
output_image = gr.Image(type="pil", label="εˆ†ζžεœ–η‰‡")
auto_btn.click(fn=run_auto_capture, outputs=[output_text, output_image])
submit_btn.click(fn=analyze_posture, inputs=[image_input], outputs=[output_text, output_image])
demo.launch(share=True)