Spaces:
Running
Running
File size: 4,818 Bytes
7085a87 358065e 7085a87 358065e 7085a87 358065e 7085a87 72c2dd7 7085a87 358065e 72c2dd7 7085a87 72c2dd7 9e9a95b 7085a87 9e9a95b 7085a87 72c2dd7 7085a87 9e9a95b 7085a87 358065e 7085a87 72c2dd7 7085a87 358065e 7085a87 358065e 7085a87 358065e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import gradio as gr
from fastapi import FastAPI
import librosa
import openai
from transformers import pipeline
import requests
import os
from pydantic import BaseModel
import numpy as np
# Initialize FastAPI
app = FastAPI()
# Initialize emotion classifier
text_emotion_classifier = pipeline("text-classification",
model="bhadresh-savani/distilbert-base-uncased-emotion",
device=-1)
# Environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")
VOICE_ID = os.getenv("VOICE_ID", "9BWtsMINqrJLrRacOk9x")
def analyze_text_emotion(text):
try:
emotion_result = text_emotion_classifier(text)
emotion_data = emotion_result[0]
return f"Emotion: {emotion_data['label']}\nConfidence: {emotion_data['score']:.2f}"
except Exception as e:
return f"Error: {str(e)}"
def analyze_voice_emotion(audio):
try:
if audio is None:
return "Please upload an audio file"
# Ensure audio is loaded with correct format
sr = audio[0]
y = audio[1]
# Check if the audio data is already in float format; if not, convert it
if y.dtype != 'float32':
y = y.astype('float32')
# Calculate features and convert numpy values to Python scalars
pitch = float(librosa.feature.spectral_centroid(y=y, sr=sr).mean())
intensity = float(librosa.feature.rms(y=y).mean())
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
# Convert tempo to Python float to avoid numpy formatting issues
tempo = float(tempo)
# Determine emotion based on features
if pitch < 150 and intensity < 0.02:
emotion = "sadness"
elif pitch > 200 and intensity > 0.05:
emotion = "anger"
elif pitch > 150 and intensity < 0.03:
emotion = "joy"
else:
emotion = "anxiety"
# Format the output using Python floats instead of numpy values
return "Emotion: {}\nPitch: {:.2f}\nIntensity: {:.2f}\nTempo: {:.2f}".format(
emotion, pitch, intensity, tempo
)
except Exception as e:
return f"Error analyzing audio: {str(e)}"
def chat_and_tts(message):
try:
if not OPENAI_API_KEY or not ELEVEN_LABS_API_KEY:
return "API keys not configured", None
openai.api_key = OPENAI_API_KEY
chat_response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": message},
]
)
response_text = chat_response['choices'][0]['message']['content'].strip()
url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
headers = {
"xi-api-key": ELEVEN_LABS_API_KEY,
"Content-Type": "application/json"
}
data = {
"text": response_text,
"voice_settings": {
"stability": 0.75,
"similarity_boost": 0.75
}
}
response = requests.post(url, json=data, headers=headers)
if response.status_code != 200:
return response_text, None
audio_path = "response.mp3"
with open(audio_path, "wb") as f:
f.write(response.content)
return response_text, audio_path
except Exception as e:
return f"Error: {str(e)}", None
# Create Gradio interface
demo = gr.Blocks(title="AI Therapist")
with demo:
gr.Markdown("# AI Virtual Therapist")
with gr.Tab("Text Emotion Analysis"):
text_input = gr.Textbox(label="Enter text")
text_button = gr.Button("Analyze Text Emotion")
text_output = gr.Textbox(label="Emotion Analysis Result")
text_button.click(analyze_text_emotion, inputs=text_input, outputs=text_output)
with gr.Tab("Voice Emotion Analysis"):
audio_input = gr.Audio(label="Upload Audio", type="numpy")
audio_button = gr.Button("Analyze Voice Emotion")
audio_output = gr.Textbox(label="Voice Analysis Result")
audio_button.click(analyze_voice_emotion, inputs=audio_input, outputs=audio_output)
with gr.Tab("Chat with TTS"):
chat_input = gr.Textbox(label="Enter your message")
chat_button = gr.Button("Send Message")
chat_output = gr.Textbox(label="Assistant Response")
audio_output = gr.Audio(label="Voice Response")
chat_button.click(chat_and_tts, inputs=chat_input, outputs=[chat_output, audio_output])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) |