joey1101 commited on
Commit
a735e8f
·
verified ·
1 Parent(s): 9e4badf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -26
app.py CHANGED
@@ -2,7 +2,6 @@
2
  # Step 0: Import required libraries
3
  ##########################################
4
  import streamlit as st # For building the web application interface
5
- import soundfile as sf # For saving audio as .wav files
6
  from transformers import (
7
  pipeline,
8
  SpeechT5Processor,
@@ -12,6 +11,9 @@ from transformers import (
12
  AutoTokenizer
13
  ) # For sentiment analysis, text-to-speech, and text generation
14
  from datasets import load_dataset # For loading datasets (e.g., speaker embeddings)
 
 
 
15
 
16
 
17
  ##########################################
@@ -27,7 +29,7 @@ st.markdown(
27
  st.markdown(
28
  "<h3 style='text-align: center; color: #5D6D7E; font-style: italic;'>I'm listening to you, my friend</h3>",
29
  unsafe_allow_html=True
30
- ) # Use HTML to add a friendly and soft-styled subtitle
31
 
32
  # Add a well-designed text area for user input
33
  text = st.text_area(
@@ -48,17 +50,17 @@ def analyze_dominant_emotion(user_review):
48
  "text-classification",
49
  model="Thea231/jhartmann_emotion_finetuning",
50
  return_all_scores=True
51
- ) # Load the fine-tuned text classification model from Hugging Face
52
 
53
  emotion_results = emotion_classifier(user_review)[0] # Perform sentiment analysis on the input text
54
- dominant_emotion = max(emotion_results, key=lambda x: x['score']) # Identify the emotion with the highest confidence score
55
  return dominant_emotion # Return the dominant emotion (label and score)
56
 
57
-
58
  ##########################################
59
  # Step 2: Response Generation Function
60
  ##########################################
61
 
 
62
  def response_gen(user_review):
63
  """
64
  Generate a concise and logical response based on the sentiment of the user's comment.
@@ -132,28 +134,28 @@ def response_gen(user_review):
132
  )
133
  }
134
 
135
- # Select the appropriate prompt based on the user's emotion or default to neutral
136
  prompt = emotion_prompts.get(
137
  emotion_label,
138
  f"Neutral feedback: '{user_review}'\n\nWrite a professional and concise response (50-200 words max).\n\nResponse:"
139
  )
140
 
141
  # Load the tokenizer and language model for response generation
142
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") # Load tokenizer for processing text inputs
143
  model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B") # Load language model for text generation
144
 
145
  inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
146
  outputs = model.generate(
147
  **inputs,
148
- max_new_tokens=300, # Set an upper limit on token generation to ensure concise output
149
- min_length=75, # Set a minimum length to ensure the response is complete
150
  no_repeat_ngram_size=2, # Avoid repetitive phrases
151
- temperature=0.7 # Add randomness for more natural responses
152
  )
153
 
154
- # Decode the generated response back into readable text
155
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
156
- print(f"{response}") # Print the response for debugging
157
  return response # Return the generated response
158
 
159
  ##########################################
@@ -163,43 +165,43 @@ def sound_gen(response):
163
  """
164
  Convert the generated response to speech and save it as a .wav file.
165
  """
166
- processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") # Pre-trained processor for TTS
167
- model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") # Pre-trained TTS model
168
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") # Vocoder for generating waveforms
169
 
170
- # Load a neutral female voice embedding from a pre-trained dataset
171
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
172
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default speaker embedding
173
 
174
- # Process the input text and create a speech spectrogram
175
  inputs = processor(text=response, return_tensors="pt")
176
  spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
177
 
178
- # Convert the spectrogram into an audio waveform using the vocoder
179
  with torch.no_grad():
180
  speech = vocoder(spectrogram)
181
 
182
- # Save the audio as a .wav file
183
  sf.write("customer_service_response.wav", speech.numpy(), samplerate=16000)
184
 
185
- # Embed an auto-playing audio player in the web app
186
- st.audio("customer_service_response.wav", start_time=0) # Allow audio playback with autoplay feature
187
 
188
  ##########################################
189
  # Main Function
190
  ##########################################
191
  def main():
192
  """
193
- Main function to orchestrate the workflow of sentiment analysis, response generation, and text-to-speech.
194
  """
195
  if text: # Check if the user has entered a comment
196
- response = response_gen(text) # Generate a logical and concise response
197
  st.markdown(
198
  f"<p style='color:#2ECC71; font-size:20px;'>{response}</p>",
199
  unsafe_allow_html=True
200
- ) # Display the generated response in a cute, styled font
201
- sound_gen(response) # Convert the response to speech and make it available for playback
202
 
203
- # Run the main function when the script is executed
204
  if __name__ == "__main__":
205
  main()
 
2
  # Step 0: Import required libraries
3
  ##########################################
4
  import streamlit as st # For building the web application interface
 
5
  from transformers import (
6
  pipeline,
7
  SpeechT5Processor,
 
11
  AutoTokenizer
12
  ) # For sentiment analysis, text-to-speech, and text generation
13
  from datasets import load_dataset # For loading datasets (e.g., speaker embeddings)
14
+ import torch # For tensor operations
15
+ import soundfile as sf # For saving audio as .wav files
16
+ import sentencepiece # Required by SpeechT5Processor for tokenization
17
 
18
 
19
  ##########################################
 
29
  st.markdown(
30
  "<h3 style='text-align: center; color: #5D6D7E; font-style: italic;'>I'm listening to you, my friend</h3>",
31
  unsafe_allow_html=True
32
+ ) # Use HTML for a friendly and soft-styled subtitle
33
 
34
  # Add a well-designed text area for user input
35
  text = st.text_area(
 
50
  "text-classification",
51
  model="Thea231/jhartmann_emotion_finetuning",
52
  return_all_scores=True
53
+ ) # Load the fine-tuned text classification model
54
 
55
  emotion_results = emotion_classifier(user_review)[0] # Perform sentiment analysis on the input text
56
+ dominant_emotion = max(emotion_results, key=lambda x: x['score']) # Identify the emotion with the highest confidence
57
  return dominant_emotion # Return the dominant emotion (label and score)
58
 
 
59
  ##########################################
60
  # Step 2: Response Generation Function
61
  ##########################################
62
 
63
+
64
  def response_gen(user_review):
65
  """
66
  Generate a concise and logical response based on the sentiment of the user's comment.
 
134
  )
135
  }
136
 
137
+ # Select the appropriate prompt based on the user's emotion
138
  prompt = emotion_prompts.get(
139
  emotion_label,
140
  f"Neutral feedback: '{user_review}'\n\nWrite a professional and concise response (50-200 words max).\n\nResponse:"
141
  )
142
 
143
  # Load the tokenizer and language model for response generation
144
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") # Load tokenizer for text processing
145
  model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B") # Load language model for text generation
146
 
147
  inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
148
  outputs = model.generate(
149
  **inputs,
150
+ max_new_tokens=300, # Limit generated tokens to ensure concise responses
151
+ min_length=75, # Ensure the generated response is logical and complete
152
  no_repeat_ngram_size=2, # Avoid repetitive phrases
153
+ temperature=0.7 # Add randomness for natural-sounding responses
154
  )
155
 
156
+ # Decode the generated response back into text
157
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
158
+ print(f"Generated response: {response}") # Debugging: print the response
159
  return response # Return the generated response
160
 
161
  ##########################################
 
165
  """
166
  Convert the generated response to speech and save it as a .wav file.
167
  """
168
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") # Load processor for TTS
169
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") # Load pre-trained TTS model
170
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") # Load vocoder for waveform generation
171
 
172
+ # Load neutral female voice embedding from dataset
173
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
174
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default speaker embedding
175
 
176
+ # Process the input text and generate a spectrogram
177
  inputs = processor(text=response, return_tensors="pt")
178
  spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
179
 
180
+ # Use vocoder to convert the spectrogram into a waveform
181
  with torch.no_grad():
182
  speech = vocoder(spectrogram)
183
 
184
+ # Save the audio file as .wav
185
  sf.write("customer_service_response.wav", speech.numpy(), samplerate=16000)
186
 
187
+ # Create an auto-playing audio player in Streamlit
188
+ st.audio("customer_service_response.wav", start_time=0) # Enable audio playback with autoplay
189
 
190
  ##########################################
191
  # Main Function
192
  ##########################################
193
  def main():
194
  """
195
+ Main function to handle sentiment analysis, response generation, and text-to-speech functionalities.
196
  """
197
  if text: # Check if the user has entered a comment
198
+ response = response_gen(text) # Generate a concise and logical response
199
  st.markdown(
200
  f"<p style='color:#2ECC71; font-size:20px;'>{response}</p>",
201
  unsafe_allow_html=True
202
+ ) # Display the response in a styled font
203
+ sound_gen(response) # Convert the response to speech and play it
204
 
205
+ # Execute the main function
206
  if __name__ == "__main__":
207
  main()