Spaces:
Running
Running
import gradio as gr | |
from fastapi import FastAPI | |
import librosa | |
import openai | |
from transformers import pipeline | |
import requests | |
import os | |
from pydantic import BaseModel | |
import numpy as np | |
# Initialize FastAPI | |
app = FastAPI() | |
# Initialize emotion classifier | |
text_emotion_classifier = pipeline("text-classification", | |
model="bhadresh-savani/distilbert-base-uncased-emotion", | |
device=-1) | |
# Environment variables | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY") | |
VOICE_ID = os.getenv("VOICE_ID", "9BWtsMINqrJLrRacOk9x") | |
def analyze_text_emotion(text): | |
try: | |
emotion_result = text_emotion_classifier(text) | |
emotion_data = emotion_result[0] | |
return f"Emotion: {emotion_data['label']}\nConfidence: {emotion_data['score']:.2f}" | |
except Exception as e: | |
return f"Error: {str(e)}" | |
def analyze_voice_emotion(audio): | |
try: | |
if audio is None: | |
return "Please upload an audio file" | |
# Ensure audio is loaded with correct format | |
sr = audio[0] | |
y = audio[1] | |
# Check if the audio data is already in float format; if not, convert it | |
if y.dtype != 'float32': | |
y = y.astype('float32') | |
# Calculate features and convert numpy values to Python scalars | |
pitch = float(librosa.feature.spectral_centroid(y=y, sr=sr).mean()) | |
intensity = float(librosa.feature.rms(y=y).mean()) | |
tempo, _ = librosa.beat.beat_track(y=y, sr=sr) | |
# Convert tempo to Python float to avoid numpy formatting issues | |
tempo = float(tempo) | |
# Determine emotion based on features | |
if pitch < 150 and intensity < 0.02: | |
emotion = "sadness" | |
elif pitch > 200 and intensity > 0.05: | |
emotion = "anger" | |
elif pitch > 150 and intensity < 0.03: | |
emotion = "joy" | |
else: | |
emotion = "anxiety" | |
# Format the output using Python floats instead of numpy values | |
return "Emotion: {}\nPitch: {:.2f}\nIntensity: {:.2f}\nTempo: {:.2f}".format( | |
emotion, pitch, intensity, tempo | |
) | |
except Exception as e: | |
return f"Error analyzing audio: {str(e)}" | |
def chat_and_tts(message): | |
try: | |
if not OPENAI_API_KEY or not ELEVEN_LABS_API_KEY: | |
return "API keys not configured", None | |
openai.api_key = OPENAI_API_KEY | |
chat_response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": message}, | |
] | |
) | |
response_text = chat_response['choices'][0]['message']['content'].strip() | |
url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}" | |
headers = { | |
"xi-api-key": ELEVEN_LABS_API_KEY, | |
"Content-Type": "application/json" | |
} | |
data = { | |
"text": response_text, | |
"voice_settings": { | |
"stability": 0.75, | |
"similarity_boost": 0.75 | |
} | |
} | |
response = requests.post(url, json=data, headers=headers) | |
if response.status_code != 200: | |
return response_text, None | |
audio_path = "response.mp3" | |
with open(audio_path, "wb") as f: | |
f.write(response.content) | |
return response_text, audio_path | |
except Exception as e: | |
return f"Error: {str(e)}", None | |
# Create Gradio interface | |
demo = gr.Blocks(title="AI Therapist") | |
with demo: | |
gr.Markdown("# AI Virtual Therapist") | |
with gr.Tab("Text Emotion Analysis"): | |
text_input = gr.Textbox(label="Enter text") | |
text_button = gr.Button("Analyze Text Emotion") | |
text_output = gr.Textbox(label="Emotion Analysis Result") | |
text_button.click(analyze_text_emotion, inputs=text_input, outputs=text_output) | |
with gr.Tab("Voice Emotion Analysis"): | |
audio_input = gr.Audio(label="Upload Audio", type="numpy") | |
audio_button = gr.Button("Analyze Voice Emotion") | |
audio_output = gr.Textbox(label="Voice Analysis Result") | |
audio_button.click(analyze_voice_emotion, inputs=audio_input, outputs=audio_output) | |
with gr.Tab("Chat with TTS"): | |
chat_input = gr.Textbox(label="Enter your message") | |
chat_button = gr.Button("Send Message") | |
chat_output = gr.Textbox(label="Assistant Response") | |
audio_output = gr.Audio(label="Voice Response") | |
chat_button.click(chat_and_tts, inputs=chat_input, outputs=[chat_output, audio_output]) | |
if __name__ == "__main__": | |
demo.launch(server_name="0.0.0.0", server_port=7860) |