File size: 2,928 Bytes
1b442a6
 
0307fac
 
1b442a6
0307fac
 
 
 
 
 
 
 
1b442a6
0307fac
1b442a6
 
 
0307fac
 
 
 
 
 
 
 
 
 
31b27a0
0307fac
31b27a0
0307fac
31b27a0
1b442a6
0307fac
 
 
 
 
 
 
 
 
 
 
 
1b442a6
 
 
 
0307fac
 
 
 
 
1b442a6
 
 
 
 
 
 
 
 
0307fac
1b442a6
0307fac
 
1b442a6
0307fac
1b442a6
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np  # Import numpy


# Check for GPU availability and set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load the model and tokenizer
model_name = "explorewithai/PersianSwear-Detector"  # Corrected model name
loaded_model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)  # Move model to device
loaded_tokenizer = AutoTokenizer.from_pretrained(model_name)



def predict_sentiment(text):
    """Predicts the sentiment (Bad Word, Good Word, Neutral Word) of a given text."""
    inputs = loaded_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)  # Move inputs to GPU
    with torch.no_grad():  # Ensure no gradients are calculated
        outputs = loaded_model(**inputs)
    logits = outputs.logits
    probabilities = torch.nn.functional.softmax(logits, dim=-1) # Get probabilities
    prediction = torch.argmax(logits, dim=-1).item()

    # Map numeric labels to meaningful strings and get probabilities
    if prediction == 4:
        sentiment = "Bad sentence"
    elif prediction == 0:
        sentiment = "Good sentence"
    elif prediction == 3:
        sentiment = "Neutral sentence"
    else:
        sentiment = "Unknown"  # Should not happen, but good practice

    # Create a dictionary for the probabilities
    prob_dict = {}
    if "Bad Word" in ["Bad Word", "Good Word", "Neutral Word"]:
        prob_dict["Bad Word"] = float(probabilities[0][4]) if 4 < probabilities.shape[1] else 0.0
    if "Good Word" in ["Bad Word", "Good Word", "Neutral Word"]:
        prob_dict["Good Word"] = float(probabilities[0][0]) if 0 < probabilities.shape[1] else 0.0
    if "Neutral Word" in ["Bad Word", "Good Word", "Neutral Word"]:
        prob_dict["Neutral Word"] = float(probabilities[0][3]) if 3 < probabilities.shape[1] else 0.0

    return prob_dict, sentiment


# Create example sentences
examples = [
    ["چه کت و شلوار زیبایی"],  # Good word example
    ["این فیلم خیلی زیبا بود"],  # Good word example
    ["میز"],  # Neutral word example
    ["کثافت"],  # Bad word example
    ["هوا خوب است."] #neutral
]


# Create the Gradio interface
iface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(label="Enter Persian Text", lines=5, placeholder="Type your text here..."),
    outputs=[
        gr.Label(label="Sentiment Probabilities"),
        gr.Textbox(label="Predicted Sentiment")  # Output component for the sentiment string
    ],
    title="Persian Swear Word Detection",
    description="Enter a Persian sentence and get its sentiment (Good Word, Bad Word, or Neutral Word).",
    examples=examples,
    live=False  # Set to True for automatic updates as you type
)

if __name__ == "__main__":
    iface.launch()