File size: 2,004 Bytes
39416b3
 
 
d7b55bd
39416b3
202a7dd
d7b55bd
9d1f2fb
e147914
39416b3
d7b55bd
3a18141
 
 
 
 
 
e147914
3a18141
e147914
3a18141
 
aba0045
 
 
 
 
 
 
 
 
 
 
e147914
3a18141
 
 
39416b3
1337340
 
 
 
 
 
 
 
 
39416b3
3a18141
39416b3
 
 
3a18141
 
 
aba0045
 
 
 
39416b3
ffab20a
 
 
 
 
 
 
 
 
27937a8
 
ffab20a
 
39416b3
ffab20a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
import torch
import librosa
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification

MODEL_NAME = "ameliabb0913/emotion-classifier1"
processor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_NAME)
model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_NAME, trust_remote_code=True)
model.eval()


# Emotion labels (based on the dataset used to train the model)
id2label = {
    0: "Neutral",
    1: "Happy",
    2: "Sad",
    3: "Angry",
    4: "Fearful",
    5: "Disgusted",
    6: "Surprised"
}

emotion_emojis = {
    "Neutral": "😐",
    "Happy": "😊",
    "Sad": "😒",
    "Angry": "😠",
    "Fearful": "😨",
    "Disgusted": "🀒",
    "Surprised": "😲"
}



# Function to classify emotions from audio
def classify_emotion(audio_file):
    # Load and process audio
    speech, sr = librosa.load(audio_file, sr=16000)
    inputs = processor(
        speech,
        sampling_rate=16000,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=48000  # You can adjust this depending on expected audio length
        )


    # Get predictions
    with torch.no_grad():
        logits = model(**inputs).logits
    predicted_class_id = torch.argmax(logits, dim=-1).item()
    
    # Convert class ID to emotion label
    predicted_emotion = id2label.get(predicted_class_id, "Unknown")
    emoji = emotion_emojis.get(predicted_emotion, "❓")

    return f"Predicted Emotion: {predicted_emotion} {emoji}"


# Gradio Interface
interface = gr.Interface(
    fn=classify_emotion,
    inputs=gr.Audio(type="filepath"),
    outputs="text",
   title="🎧 Speak Your Emotion | AI Emotion Detector",
description=(
    "🎀 Upload a voice clip or speak into the mic β€” this AI will identify the **emotion** in your voice!\n\n"
    "**Supported 8 Emotions**: Neutral, Happy, Sad, Angry, Fearful, Disgusted, Surprised."
))


# Launch the app
if __name__ == "__main__":
    interface.launch()