File size: 2,004 Bytes
39416b3 d7b55bd 39416b3 202a7dd d7b55bd 9d1f2fb e147914 39416b3 d7b55bd 3a18141 e147914 3a18141 e147914 3a18141 aba0045 e147914 3a18141 39416b3 1337340 39416b3 3a18141 39416b3 3a18141 aba0045 39416b3 ffab20a 27937a8 ffab20a 39416b3 ffab20a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
import torch
import librosa
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
MODEL_NAME = "ameliabb0913/emotion-classifier1"
processor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_NAME)
model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_NAME, trust_remote_code=True)
model.eval()
# Emotion labels (based on the dataset used to train the model)
id2label = {
0: "Neutral",
1: "Happy",
2: "Sad",
3: "Angry",
4: "Fearful",
5: "Disgusted",
6: "Surprised"
}
emotion_emojis = {
"Neutral": "π",
"Happy": "π",
"Sad": "π’",
"Angry": "π ",
"Fearful": "π¨",
"Disgusted": "π€’",
"Surprised": "π²"
}
# Function to classify emotions from audio
def classify_emotion(audio_file):
# Load and process audio
speech, sr = librosa.load(audio_file, sr=16000)
inputs = processor(
speech,
sampling_rate=16000,
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=48000 # You can adjust this depending on expected audio length
)
# Get predictions
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_id = torch.argmax(logits, dim=-1).item()
# Convert class ID to emotion label
predicted_emotion = id2label.get(predicted_class_id, "Unknown")
emoji = emotion_emojis.get(predicted_emotion, "β")
return f"Predicted Emotion: {predicted_emotion} {emoji}"
# Gradio Interface
interface = gr.Interface(
fn=classify_emotion,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="π§ Speak Your Emotion | AI Emotion Detector",
description=(
"π€ Upload a voice clip or speak into the mic β this AI will identify the **emotion** in your voice!\n\n"
"**Supported 8 Emotions**: Neutral, Happy, Sad, Angry, Fearful, Disgusted, Surprised."
))
# Launch the app
if __name__ == "__main__":
interface.launch()
|