Spaces:

nikhildsst
/

AI_Virtual

Running

App Files Files Community

AI_Virtual / app.py

nikhildsst

Update app.py

9e9a95b verified 3 months ago

raw

history blame contribute delete

4.82 kB

	import gradio as gr
	from fastapi import FastAPI
	import librosa
	import openai
	from transformers import pipeline
	import requests
	import os
	from pydantic import BaseModel
	import numpy as np

	# Initialize FastAPI
	app = FastAPI()

	# Initialize emotion classifier
	text_emotion_classifier = pipeline("text-classification",
	model="bhadresh-savani/distilbert-base-uncased-emotion",
	device=-1)

	# Environment variables
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")
	VOICE_ID = os.getenv("VOICE_ID", "9BWtsMINqrJLrRacOk9x")

	def analyze_text_emotion(text):
	try:
	emotion_result = text_emotion_classifier(text)
	emotion_data = emotion_result[0]
	return f"Emotion: {emotion_data['label']}\nConfidence: {emotion_data['score']:.2f}"
	except Exception as e:
	return f"Error: {str(e)}"


	def analyze_voice_emotion(audio):
	try:
	if audio is None:
	return "Please upload an audio file"

	# Ensure audio is loaded with correct format
	sr = audio[0]
	y = audio[1]

	# Check if the audio data is already in float format; if not, convert it
	if y.dtype != 'float32':
	y = y.astype('float32')

	# Calculate features and convert numpy values to Python scalars
	pitch = float(librosa.feature.spectral_centroid(y=y, sr=sr).mean())
	intensity = float(librosa.feature.rms(y=y).mean())
	tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

	# Convert tempo to Python float to avoid numpy formatting issues
	tempo = float(tempo)

	# Determine emotion based on features
	if pitch < 150 and intensity < 0.02:
	emotion = "sadness"
	elif pitch > 200 and intensity > 0.05:
	emotion = "anger"
	elif pitch > 150 and intensity < 0.03:
	emotion = "joy"
	else:
	emotion = "anxiety"

	# Format the output using Python floats instead of numpy values
	return "Emotion: {}\nPitch: {:.2f}\nIntensity: {:.2f}\nTempo: {:.2f}".format(
	emotion, pitch, intensity, tempo
	)
	except Exception as e:
	return f"Error analyzing audio: {str(e)}"


	def chat_and_tts(message):
	try:
	if not OPENAI_API_KEY or not ELEVEN_LABS_API_KEY:
	return "API keys not configured", None

	openai.api_key = OPENAI_API_KEY
	chat_response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": message},
	]
	)
	response_text = chat_response['choices'][0]['message']['content'].strip()

	url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
	headers = {
	"xi-api-key": ELEVEN_LABS_API_KEY,
	"Content-Type": "application/json"
	}
	data = {
	"text": response_text,
	"voice_settings": {
	"stability": 0.75,
	"similarity_boost": 0.75
	}
	}
	response = requests.post(url, json=data, headers=headers)

	if response.status_code != 200:
	return response_text, None

	audio_path = "response.mp3"
	with open(audio_path, "wb") as f:
	f.write(response.content)

	return response_text, audio_path
	except Exception as e:
	return f"Error: {str(e)}", None

	# Create Gradio interface
	demo = gr.Blocks(title="AI Therapist")

	with demo:
	gr.Markdown("# AI Virtual Therapist")

	with gr.Tab("Text Emotion Analysis"):
	text_input = gr.Textbox(label="Enter text")
	text_button = gr.Button("Analyze Text Emotion")
	text_output = gr.Textbox(label="Emotion Analysis Result")
	text_button.click(analyze_text_emotion, inputs=text_input, outputs=text_output)

	with gr.Tab("Voice Emotion Analysis"):
	audio_input = gr.Audio(label="Upload Audio", type="numpy")
	audio_button = gr.Button("Analyze Voice Emotion")
	audio_output = gr.Textbox(label="Voice Analysis Result")
	audio_button.click(analyze_voice_emotion, inputs=audio_input, outputs=audio_output)

	with gr.Tab("Chat with TTS"):
	chat_input = gr.Textbox(label="Enter your message")
	chat_button = gr.Button("Send Message")
	chat_output = gr.Textbox(label="Assistant Response")
	audio_output = gr.Audio(label="Voice Response")
	chat_button.click(chat_and_tts, inputs=chat_input, outputs=[chat_output, audio_output])

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)