ameliabb0913's picture
Update app.py
27937a8 verified
import gradio as gr
import torch
import librosa
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
MODEL_NAME = "ameliabb0913/emotion-classifier1"
processor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_NAME)
model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_NAME, trust_remote_code=True)
model.eval()
# Emotion labels (based on the dataset used to train the model)
id2label = {
0: "Neutral",
1: "Happy",
2: "Sad",
3: "Angry",
4: "Fearful",
5: "Disgusted",
6: "Surprised"
}
emotion_emojis = {
"Neutral": "😐",
"Happy": "😊",
"Sad": "😒",
"Angry": "😠",
"Fearful": "😨",
"Disgusted": "🀒",
"Surprised": "😲"
}
# Function to classify emotions from audio
def classify_emotion(audio_file):
# Load and process audio
speech, sr = librosa.load(audio_file, sr=16000)
inputs = processor(
speech,
sampling_rate=16000,
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=48000 # You can adjust this depending on expected audio length
)
# Get predictions
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_id = torch.argmax(logits, dim=-1).item()
# Convert class ID to emotion label
predicted_emotion = id2label.get(predicted_class_id, "Unknown")
emoji = emotion_emojis.get(predicted_emotion, "❓")
return f"Predicted Emotion: {predicted_emotion} {emoji}"
# Gradio Interface
interface = gr.Interface(
fn=classify_emotion,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="🎧 Speak Your Emotion | AI Emotion Detector",
description=(
"🎀 Upload a voice clip or speak into the mic β€” this AI will identify the **emotion** in your voice!\n\n"
"**Supported 8 Emotions**: Neutral, Happy, Sad, Angry, Fearful, Disgusted, Surprised."
))
# Launch the app
if __name__ == "__main__":
interface.launch()