File size: 5,618 Bytes
152d61c
 
 
e4cf4e2
c39c802
152d61c
 
 
 
 
 
e4cf4e2
c39c802
152d61c
 
c39c802
152d61c
 
 
e4cf4e2
 
 
152d61c
 
f26186a
152d61c
 
 
e4cf4e2
152d61c
 
c39c802
 
152d61c
e4cf4e2
c39c802
e4cf4e2
 
 
 
152d61c
c39c802
152d61c
 
b70e6a4
e4cf4e2
b70e6a4
e4cf4e2
 
 
 
 
c39c802
 
e4cf4e2
c39c802
 
 
 
 
 
 
e4cf4e2
c39c802
 
 
 
 
 
e4cf4e2
c39c802
e4cf4e2
 
 
 
 
 
 
 
 
 
 
 
 
152d61c
 
 
 
 
 
e4cf4e2
152d61c
e4cf4e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152d61c
 
 
 
 
 
e4cf4e2
152d61c
e4cf4e2
 
 
 
c39c802
e4cf4e2
152d61c
c39c802
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
##########################################
# Step 0: Import required libraries
##########################################
import streamlit as st  # For building the web application
from transformers import (
    pipeline,
    SpeechT5Processor,
    SpeechT5ForTextToSpeech,
    SpeechT5HifiGan,
    AutoModelForCausalLM,
    AutoTokenizer
)  # For emotion analysis, text-to-speech, and text generation
from datasets import load_dataset  # For loading datasets (e.g., speaker embeddings)
import torch  # For tensor operations
import soundfile as sf  # For saving audio as .wav files

##########################################
# Streamlit application title and input
##########################################
st.title("Comment Reply for You")  # Application title
st.write("Generate automatic replies for user comments")  # Application description
text = st.text_area("Enter your comment", "")  # Text input for user to enter comments

##########################################
# Step 1: Sentiment Analysis Function
##########################################
def analyze_dominant_emotion(user_review):
    """
    Analyze the dominant emotion in the user's review using a text classification model.
    """
    emotion_classifier = pipeline(
        "text-classification", 
        model="Thea231/jhartmann_emotion_finetuning", 
        return_all_scores=True
    )  # Load pre-trained emotion classification model
    
    emotion_results = emotion_classifier(user_review)[0]  # Get emotion scores for the review
    dominant_emotion = max(emotion_results, key=lambda x: x['score'])  # Find the emotion with the highest confidence
    return dominant_emotion

##########################################
# Step 2: Response Generation Function
##########################################
def response_gen(user_review):
    """
    Generate a response based on the sentiment of the user's review.
    """
    # Use Llama-based model to create a response based on a generated prompt
    dominant_emotion = analyze_dominant_emotion(user_review)  # Get the dominant emotion
    emotion_label = dominant_emotion['label'].lower()  # Extract emotion label
    
    # Define response templates for each emotion
    emotion_prompts = {
        "anger": (
            "Customer complaint: '{review}'\n\n"
            "As a customer service representative, write a response that:\n"
            "- Sincerely apologizes for the issue\n"
            "- Explains how the issue will be resolved\n"
            "- Offers compensation where appropriate\n\n"
            "Response:"
        ),
        "joy": (
            "Customer review: '{review}'\n\n"
            "As a customer service representative, write a positive response that:\n"
            "- Thanks the customer for their feedback\n"
            "- Acknowledges both positive and constructive comments\n"
            "- Invites them to explore loyalty programs\n\n"
            "Response:"
        ),
        # Add other emotions as needed...
    }
    
    # Format the prompt with the user's review
    prompt = emotion_prompts.get(emotion_label, "Neutral").format(review=user_review)
    
    # Load a pre-trained text generation model (replace 'meta-llama/Llama-3.2-1B' with an available model)
    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
    model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")
    inputs = tokenizer(prompt, return_tensors="pt")  # Tokenize the prompt
    outputs = model.generate(**inputs, max_new_tokens=100)  # Generate a response
    
    input_length = inputs.input_ids.shape[1]  # Length of the input text
    response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)  # Decode the generated text
    return response

##########################################
# Step 3: Text-to-Speech Conversion Function
##########################################
def sound_gen(response):
    """
    Convert the generated response to speech and save as a .wav file.
    """
    # Load the pre-trained TTS models
    processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
    model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
    
    # Load speaker embeddings (e.g., neutral female voice)
    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
    speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
    
    # Process the input text and generate a spectrogram
    inputs = processor(text=response, return_tensors="pt")
    spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
    
    # Use the vocoder to generate a waveform
    with torch.no_grad():
        speech = vocoder(spectrogram)
    
    # Save the generated speech as a .wav file
    sf.write("customer_service_response.wav", speech.numpy(), samplerate=16000)
    st.audio("customer_service_response.wav")  # Play the audio in Streamlit

##########################################
# Main Function
##########################################
def main():
    """
    Main function to orchestrate the workflow of sentiment analysis, response generation, and text-to-speech.
    """
    if text:  # Check if the user entered a comment
        response = response_gen(text)  # Generate a response
        st.write(f"Generated response: {response}")  # Display the generated response
        sound_gen(response)  # Convert the response to speech and play it

# Run the main function
if __name__ == "__main__":
    main()