mjwong's picture
Update app.py
f74c03b verified
import gradio as gr
import torch
from transformers import AutoTokenizer, pipeline
from typing import Dict
# Custom models for zero-shot classification requiring trust_remote_code=True
CUSTOM_MODELS = [
"mjwong/gte-multilingual-base-xnli-anli"
]
# Available models for zero-shot classification
AVAILABLE_MODELS = [
"mjwong/multilingual-e5-large-instruct-xnli-anli",
"mjwong/multilingual-e5-base-xnli-anli",
"mjwong/multilingual-e5-large-xnli-anli",
"mjwong/drama-base-xnli-anli",
"mjwong/drama-large-xnli-anli",
"mjwong/mcontriever-msmarco-xnli",
"mjwong/mcontriever-xnli"
] + CUSTOM_MODELS
def classify_text(
model_name: str,
text: str,
labels: str,
multi_label: bool = False,
) -> Dict[str, float]:
"""
Classifies the input text into one of the provided labels using a zero-shot classification model.
Args:
model_name: The name of the Hugging Face model to use.
text: The input text to classify.
labels: A comma-separated string of candidate labels.
Returns:
Dict[str, float]: A dictionary mapping each label to its classification score.
"""
if not text.strip():
return "Error: Please enter some text to classify."
if not labels.strip():
return "Error: Please enter some labels to classify the text."
try:
# Set device: 0 if GPU available, else -1 for CPU
device = 0 if torch.cuda.is_available() else -1
if model_name in CUSTOM_MODELS:
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline("zero-shot-classification", model=model_name, device=device, tokenizer=tokenizer, trust_remote_code=True)
else:
classifier = pipeline("zero-shot-classification", model=model_name, device=device)
labels_list = [label.strip() for label in labels.split(",")]
result = classifier(text, candidate_labels=labels_list, multi_label=multi_label)
return {label: score for label, score in zip(result["labels"], result["scores"])}
except Exception as _:
return "Error: An unexpected error occurred. Please try again later."
# Example Input with Mutually Exclusive Labels from News Articles
examples = [
[
"The government announced a new economic policy today aimed at reducing inflation and stabilizing the currency market.",
"economy, politics, finance, policy, inflation, government, currency"
],
[
"中国的科技公司在人工智能领域取得了重大突破,这可能会影响全球市场。",
"科技, 经济, 创新, 市场, 人工智能, 全球"
],
[
"นักวิจัยค้นพบวิธีใหม่ในการรักษาโรคมะเร็ง ซึ่งอาจช่วยชีวิตผู้ป่วยหลายล้านคนทั่วโลก",
"การแพทย์, วิทยาศาสตร์, นวัตกรรม, สุขภาพ, โรคมะเร็ง, การรักษา"
],
[
"La conférence des Nations Unies sur le climat a abouti à un nouvel accord pour réduire les émissions de carbone d'ici 2030.",
"environnement, climat, politique, énergie, carbone, écologie, ONU"
],
[
"सरकार ने आज एक नई आर्थिक नीति की घोषणा की, जिसका उद्देश्य मुद्रास्फीति को कम करना और मुद्रा बाजार को स्थिर करना है।",
"अर्थव्यवस्था, राजनीति, वित्त, नीति, मुद्रास्फीति, सरकार, मुद्रा"
]
]
# Define the Gradio interface
css = """
footer {display:none !important}
.output-markdown{display:none !important}
.gr-button-primary {
z-index: 14;
height: 43px;
width: 130px;
left: 0px;
top: 0px;
padding: 0px;
cursor: pointer !important;
background: none rgb(17, 20, 45) !important;
border: none !important;
text-align: center !important;
font-family: Poppins !important;
font-size: 14px !important;
font-weight: 500 !important;
color: rgb(255, 255, 255) !important;
line-height: 1 !important;
border-radius: 12px !important;
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
box-shadow: none !important;
}
.classify-button {
background: linear-gradient(90deg, yellow, orange) !important;
}
"""
# Initialize Gradio interface
with gr.Blocks(css=css) as iface:
gr.Markdown("# Zero-Shot Text Classifier")
gr.Markdown("Select a model, enter text, and a set of labels to classify the text using a zero-shot classification model.")
gr.Markdown("More than 10 languages are officially supported, including: English, Arabic, Bulgarian, German, Greek, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnam and Chinese.")
with gr.Row():
# Dropdown to select a model
model_dropdown = gr.Dropdown(AVAILABLE_MODELS, label="Choose Model")
# Checkbox for multi-label classification
multi_label = gr.Checkbox(label="True", value=False, info="Check for multi-label classification, uncheck for single-label (multi-class).")
# Input fields for text and labels
with gr.Row():
text_input = gr.Textbox(label="Enter Text", placeholder="Type or paste text here...")
label_input = gr.Textbox(label="Enter Labels (comma-separated)", placeholder="e.g., sports, politics, technology")
# Output display
output_label = gr.Label(label="Classification Scores")
# Classification button
submit_button = gr.Button("Classify", elem_classes=["classify-button"])
submit_button.click(fn=classify_text, inputs=[model_dropdown, text_input, label_input, multi_label], outputs=output_label)
# Example input/output pairs
gr.Examples(examples, inputs=[text_input, label_input])
# Launch the app
if __name__ == "__main__":
iface.launch()