Spaces:
Running
Running
File size: 5,618 Bytes
152d61c e4cf4e2 c39c802 152d61c e4cf4e2 c39c802 152d61c c39c802 152d61c e4cf4e2 152d61c f26186a 152d61c e4cf4e2 152d61c c39c802 152d61c e4cf4e2 c39c802 e4cf4e2 152d61c c39c802 152d61c b70e6a4 e4cf4e2 b70e6a4 e4cf4e2 c39c802 e4cf4e2 c39c802 e4cf4e2 c39c802 e4cf4e2 c39c802 e4cf4e2 152d61c e4cf4e2 152d61c e4cf4e2 152d61c e4cf4e2 152d61c e4cf4e2 c39c802 e4cf4e2 152d61c c39c802 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
##########################################
# Step 0: Import required libraries
##########################################
import streamlit as st # For building the web application
from transformers import (
pipeline,
SpeechT5Processor,
SpeechT5ForTextToSpeech,
SpeechT5HifiGan,
AutoModelForCausalLM,
AutoTokenizer
) # For emotion analysis, text-to-speech, and text generation
from datasets import load_dataset # For loading datasets (e.g., speaker embeddings)
import torch # For tensor operations
import soundfile as sf # For saving audio as .wav files
##########################################
# Streamlit application title and input
##########################################
st.title("Comment Reply for You") # Application title
st.write("Generate automatic replies for user comments") # Application description
text = st.text_area("Enter your comment", "") # Text input for user to enter comments
##########################################
# Step 1: Sentiment Analysis Function
##########################################
def analyze_dominant_emotion(user_review):
"""
Analyze the dominant emotion in the user's review using a text classification model.
"""
emotion_classifier = pipeline(
"text-classification",
model="Thea231/jhartmann_emotion_finetuning",
return_all_scores=True
) # Load pre-trained emotion classification model
emotion_results = emotion_classifier(user_review)[0] # Get emotion scores for the review
dominant_emotion = max(emotion_results, key=lambda x: x['score']) # Find the emotion with the highest confidence
return dominant_emotion
##########################################
# Step 2: Response Generation Function
##########################################
def response_gen(user_review):
"""
Generate a response based on the sentiment of the user's review.
"""
# Use Llama-based model to create a response based on a generated prompt
dominant_emotion = analyze_dominant_emotion(user_review) # Get the dominant emotion
emotion_label = dominant_emotion['label'].lower() # Extract emotion label
# Define response templates for each emotion
emotion_prompts = {
"anger": (
"Customer complaint: '{review}'\n\n"
"As a customer service representative, write a response that:\n"
"- Sincerely apologizes for the issue\n"
"- Explains how the issue will be resolved\n"
"- Offers compensation where appropriate\n\n"
"Response:"
),
"joy": (
"Customer review: '{review}'\n\n"
"As a customer service representative, write a positive response that:\n"
"- Thanks the customer for their feedback\n"
"- Acknowledges both positive and constructive comments\n"
"- Invites them to explore loyalty programs\n\n"
"Response:"
),
# Add other emotions as needed...
}
# Format the prompt with the user's review
prompt = emotion_prompts.get(emotion_label, "Neutral").format(review=user_review)
# Load a pre-trained text generation model (replace 'meta-llama/Llama-3.2-1B' with an available model)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")
inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the prompt
outputs = model.generate(**inputs, max_new_tokens=100) # Generate a response
input_length = inputs.input_ids.shape[1] # Length of the input text
response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True) # Decode the generated text
return response
##########################################
# Step 3: Text-to-Speech Conversion Function
##########################################
def sound_gen(response):
"""
Convert the generated response to speech and save as a .wav file.
"""
# Load the pre-trained TTS models
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
# Load speaker embeddings (e.g., neutral female voice)
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
# Process the input text and generate a spectrogram
inputs = processor(text=response, return_tensors="pt")
spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
# Use the vocoder to generate a waveform
with torch.no_grad():
speech = vocoder(spectrogram)
# Save the generated speech as a .wav file
sf.write("customer_service_response.wav", speech.numpy(), samplerate=16000)
st.audio("customer_service_response.wav") # Play the audio in Streamlit
##########################################
# Main Function
##########################################
def main():
"""
Main function to orchestrate the workflow of sentiment analysis, response generation, and text-to-speech.
"""
if text: # Check if the user entered a comment
response = response_gen(text) # Generate a response
st.write(f"Generated response: {response}") # Display the generated response
sound_gen(response) # Convert the response to speech and play it
# Run the main function
if __name__ == "__main__":
main()
|