Speech-Analyser / emotion_test.py
Arjunadhithya's picture
Upload 7 files
1f75494 verified
import torch
import torchaudio
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
MODEL_PATH = "D:/SER MiniProj/wav2vec2_model/"
TARGET_SAMPLE_RATE = 16000 # Model requires 16kHz audio
# Load feature extractor and model
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_PATH)
model = AutoModelForAudioClassification.from_pretrained(MODEL_PATH)
print("Feature extractor and model loaded successfully!")
# Load an audio file
audio_file = "D:/SER MiniProj/temp_audio.wav"
waveform, sample_rate = torchaudio.load(audio_file)
# Convert to mono if needed
if waveform.shape[0] > 1:
waveform = torch.mean(waveform, dim=0, keepdim=True)
# Resample if the sample rate is not 16kHz
if sample_rate != TARGET_SAMPLE_RATE:
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=TARGET_SAMPLE_RATE)
waveform = resampler(waveform)
sample_rate = TARGET_SAMPLE_RATE # Update sample rate
# Process the audio for the model
inputs = feature_extractor(waveform.squeeze(0), sampling_rate=sample_rate, return_tensors="pt")
# Perform inference
with torch.no_grad():
logits = model(**inputs).logits
# Get the predicted emotion
predicted_label = torch.argmax(logits, dim=-1).item()
# Print the output
print(f"Predicted Emotion Class: {predicted_label}")