taybeyond commited on
Commit
294e9b9
·
verified ·
1 Parent(s): 45cb7de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -21
app.py CHANGED
@@ -1,39 +1,29 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, AutoImageProcessor
3
- import torch
4
- from PIL import Image
5
- import os
6
  from huggingface_hub import login
 
 
7
 
8
- # ✅ 登入 Token(注意,不要寫死 token,請用 Secrets)
9
  HF_TOKEN = os.environ.get("HF_TOKEN")
10
  login(token=HF_TOKEN)
11
 
12
- # 模型與處理器
13
- MODEL_ID = "Qwen/Qwen-VL-Chat"
14
 
15
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True, token=HF_TOKEN)
16
- image_processor = AutoImageProcessor.from_pretrained(MODEL_ID, trust_remote_code=True, token=HF_TOKEN)
17
- model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True, token=HF_TOKEN).eval()
18
 
19
- # ✅ 推理函數
20
  def ask(image, prompt):
21
- image_tensor = image_processor(image, return_tensors="pt")["pixel_values"].to(model.device)
22
- text_input = tokenizer(prompt, return_tensors="pt").to(model.device)
23
- inputs = {
24
- "input_ids": text_input["input_ids"],
25
- "pixel_values": image_tensor
26
- }
27
- output = model.generate(**inputs, max_new_tokens=512)
28
- response = tokenizer.decode(output[0], skip_special_tokens=True)
29
  return response
30
 
31
- # ✅ Gradio UI
32
  demo = gr.Interface(
33
  fn=ask,
34
  inputs=[gr.Image(type="pil"), gr.Textbox(label="請輸入問題")],
35
  outputs="text",
36
- title="🧠 Qwen-VL 圖文問答 Demo"
37
  )
38
 
39
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor
 
 
 
3
  from huggingface_hub import login
4
+ import os
5
+ import torch
6
 
 
7
  HF_TOKEN = os.environ.get("HF_TOKEN")
8
  login(token=HF_TOKEN)
9
 
10
+ MODEL_ID = "Qwen/Qwen-VL-Chat-Int4"
 
11
 
12
+ processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True, token=HF_TOKEN)
13
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True, device_map="auto", token=HF_TOKEN)
14
+ model.eval()
15
 
 
16
  def ask(image, prompt):
17
+ inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device)
18
+ outputs = model.generate(**inputs, max_new_tokens=512)
19
+ response = processor.batch_decode(outputs, skip_special_tokens=True)[0]
 
 
 
 
 
20
  return response
21
 
 
22
  demo = gr.Interface(
23
  fn=ask,
24
  inputs=[gr.Image(type="pil"), gr.Textbox(label="請輸入問題")],
25
  outputs="text",
26
+ title="🧠 Qwen1.5-VL 圖文問答 Demo"
27
  )
28
 
29
  if __name__ == "__main__":