Spaces:

taybeyond
/

TEST-02

Sleeping

App Files Files Community

taybeyond commited on 18 days ago

Commit

c132fb4

verified ·

1 Parent(s): 72501ac

Upload 3 files

Browse files

Files changed (3) hide show

app.py +73 -0
processing_qwen_vl.py +26 -0
requirements.txt +15 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import gradio as gr
+import torch
+import cv2
+from PIL import Image
+from auto_gptq import AutoGPTQForCausalLM
+from transformers import AutoTokenizer
+from processing_qwen_vl import QWenVLProcessor
+import os
+model_id = "Qwen/Qwen-VL-Chat-Int4"
+processor = QWenVLProcessor.from_pretrained(model_id, trust_remote_code=True)
+model = AutoGPTQForCausalLM.from_quantized(
+    model_id,
+    device="cuda" if torch.cuda.is_available() else "cpu",
+    trust_remote_code=True
+).eval()
+def capture_photo(filename="sitting.jpg"):
+    cap = cv2.VideoCapture(0)
+    ret, frame = cap.read()
+    cap.release()
+    if ret:
+        cv2.imwrite(filename, frame)
+        return filename
+    return None
+def speak_text(text, lang="zh"):
+    voice = "zh-CN-XiaoxiaoNeural" if lang == "zh" else "en-US-AriaNeural"
+    os.system(f'edge-tts --text "{text}" --voice "{voice}" --write-media output.mp3')
+    os.system('start output.mp3' if os.name == 'nt' else 'afplay output.mp3')
+def analyze_posture(image=None, auto_capture=False):
+    if auto_capture:
+        image_path = capture_photo()
+        if image_path is None:
+            return "❌ 無法啟動攝像頭", None
+        image = Image.open(image_path)
+    elif image is None:
+        return "❌ 請上傳圖片或啟用自動拍照", None
+    question = "請判斷這個人是否坐姿不良，如駝背、前傾或歪斜？用中英文回答。"
+    inputs = processor(text=question, images=image, return_tensors="pt").to(model.device)
+    outputs = model.generate(**inputs, max_new_tokens=512)
+    answer = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
+    if "請" in answer:
+        speak_text(answer, lang="zh")
+    if "please" in answer.lower():
+        speak_text(answer, lang="en")
+    return answer, image
+def run_auto_capture():
+    return analyze_posture(auto_capture=True)
+with gr.Blocks(title="駝背識別助手") as demo:
+    gr.Markdown("## 🪑 Qwen-VL-Chat-Int4 駝背識別 Demo")
+    with gr.Row():
+        with gr.Column():
+            auto_btn = gr.Button("📷 自動攝像頭拍照並判斷")
+            image_input = gr.Image(type="pil", label="或手動上傳圖片")
+            submit_btn = gr.Button("📤 上傳並判斷")
+        with gr.Column():
+            output_text = gr.Textbox(label="🧠 模型判斷結果", lines=6)
+            output_image = gr.Image(type="pil", label="分析圖片")
+    auto_btn.click(fn=run_auto_capture, outputs=[output_text, output_image])
+    submit_btn.click(fn=analyze_posture, inputs=[image_input], outputs=[output_text, output_image])
+demo.launch(share=True)

processing_qwen_vl.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from transformers import CLIPImageProcessor, AutoTokenizer
+class QWenVLProcessor:
+    def __init__(self, tokenizer, image_processor):
+        self.tokenizer = tokenizer
+        self.image_processor = image_processor
+    @classmethod
+    def from_pretrained(cls, model_id, **kwargs):
+        tokenizer = AutoTokenizer.from_pretrained(model_id, **kwargs)
+        image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
+        return cls(tokenizer=tokenizer, image_processor=image_processor)
+    def __call__(self, text=None, images=None, return_tensors=None):
+        if images is not None:
+            image_inputs = self.image_processor(images, return_tensors=return_tensors)
+        else:
+            image_inputs = {}
+        if text is not None:
+            text_inputs = self.tokenizer(text, return_tensors=return_tensors, padding=True)
+        else:
+            text_inputs = {}
+        return {**text_inputs, **image_inputs}
+    def batch_decode(self, *args, **kwargs):
+        return self.tokenizer.batch_decode(*args, **kwargs)

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+gradio>=4.12.0
+torch>=2.1.0
+transformers>=4.37.0
+accelerate
+matplotlib
+tiktoken
+einops
+transformers_stream_generator
+torchvision
+opencv-python
+optimum
+opencv-python
+matplotlib
+auto-gptq