File size: 1,146 Bytes
b9a55fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from transformers import AutoProcessor, AutoModel
import torch
import gradio as gr
from PIL import Image

# โหลด processor และ model
model_name = "google/siglip2-base-patch16-224"
processor = AutoProcessor.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# ฟังก์ชันประมวลผล
def match_image_text(image, text):
    inputs = processor(text=text, images=image, return_tensors="pt", padding=True)
    with torch.no_grad():
        outputs = model(**inputs)

    image_embeds = outputs.image_embeds
    text_embeds = outputs.text_embeds

    # คำนวณ cosine similarity
    similarity = torch.nn.functional.cosine_similarity(image_embeds, text_embeds).item()
    return f"Similarity score: {similarity:.4f}"

# Gradio UI
gr.Interface(
    fn=match_image_text,
    inputs=[gr.Image(type="pil"), gr.Textbox(label="Enter a caption")],
    outputs="text",
    title="SigLIP2 Image-Text Similarity",
    description="ใส่รูป + คำบรรยาย แล้วดูว่าโมเดลคิดว่าแมตช์กันแค่ไหน"
).launch()