joey1101 commited on
Commit
464a6d4
·
verified ·
1 Parent(s): 45f3db1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -41
app.py CHANGED
@@ -15,7 +15,6 @@ import torch # For tensor operations
15
  import soundfile as sf # For saving audio as .wav files
16
  import sentencepiece # Required by SpeechT5Processor for tokenization
17
 
18
-
19
  ##########################################
20
  # Streamlit application title and input
21
  ##########################################
@@ -23,19 +22,19 @@ import sentencepiece # Required by SpeechT5Processor for tokenization
23
  st.markdown(
24
  "<h1 style='text-align: center; color: #FF5720; font-size: 50px;'>Just Comment</h1>",
25
  unsafe_allow_html=True
26
- ) # Use HTML and CSS to set a custom title design
27
 
28
- # Display a smaller, gentle and warm subtitle below the title
29
  st.markdown(
30
- "<h3 style='text-align: center; color: #5D6D7E; font-style: italic;'>I'm listening to you, my friend~</h3>",
31
  unsafe_allow_html=True
32
  ) # Use HTML for a friendly and soft-styled subtitle
33
 
34
  # Add a well-designed text area for user input
35
  text = st.text_area(
36
- "Enter your comment",
37
- placeholder="Type something here...",
38
- height=280,
39
  help="Write a comment you would like us to analyze and respond to!" # Provide a helpful tooltip
40
  )
41
 
@@ -47,10 +46,10 @@ def analyze_dominant_emotion(user_review):
47
  Analyze the dominant emotion in the user's comment using a fine-tuned text classification model.
48
  """
49
  emotion_classifier = pipeline(
50
- "text-classification",
51
- model="Thea231/jhartmann_emotion_finetuning",
52
  return_all_scores=True
53
- ) # Load the fine-tuned text classification model
54
 
55
  emotion_results = emotion_classifier(user_review)[0] # Perform sentiment analysis on the input text
56
  dominant_emotion = max(emotion_results, key=lambda x: x['score']) # Identify the emotion with the highest confidence
@@ -134,29 +133,25 @@ def response_gen(user_review):
134
  )
135
  }
136
 
137
- # Select the appropriate prompt based on the user's emotion
138
  prompt = emotion_prompts.get(
139
- emotion_label,
140
  f"Neutral feedback: '{user_review}'\n\nWrite a professional and concise response (50-200 words max).\n\nResponse:"
141
- )
142
 
143
  # Load the tokenizer and language model for response generation
144
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") # Load tokenizer for text processing
145
- model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B") # Load language model for text generation
146
 
147
  inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
148
  outputs = model.generate(
149
  **inputs,
150
- max_new_tokens=300, # Limit generated tokens to ensure concise responses
151
- min_length=75, # Ensure the generated response is logical and complete
152
  no_repeat_ngram_size=2, # Avoid repetitive phrases
153
- temperature=0.7 # Add randomness for natural-sounding responses
154
  )
155
-
156
- # Decode the generated response back into text
157
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
158
- print(f"Generated response: {response}") # Debugging: print the response
159
- return response # Return the generated response
160
 
161
  ##########################################
162
  # Step 3: Text-to-Speech Conversion Function
@@ -165,27 +160,23 @@ def sound_gen(response):
165
  """
166
  Convert the generated response to speech and save it as a .wav file.
167
  """
168
- processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") # Load processor for TTS
169
- model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") # Load pre-trained TTS model
170
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") # Load vocoder for waveform generation
171
 
172
- # Load neutral female voice embedding from dataset
173
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
174
- speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default speaker embedding
175
 
176
- # Process the input text and generate a spectrogram
177
- inputs = processor(text=response, return_tensors="pt")
178
- spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
179
 
180
- # Use vocoder to convert the spectrogram into a waveform
181
  with torch.no_grad():
182
- speech = vocoder(spectrogram)
183
-
184
- # Save the audio file as .wav
185
- sf.write("customer_service_response.wav", speech.numpy(), samplerate=16000)
186
 
187
- # Create an auto-playing audio player in Streamlit
188
- st.audio("customer_service_response.wav", start_time=0) # Enable audio playback with autoplay
189
 
190
  ##########################################
191
  # Main Function
@@ -195,11 +186,11 @@ def main():
195
  Main function to handle sentiment analysis, response generation, and text-to-speech functionalities.
196
  """
197
  if text: # Check if the user has entered a comment
198
- response = response_gen(text) # Generate a concise and logical response
199
  st.markdown(
200
- f"<p style='color:#2ECC71; font-size:20px;'>{response}</p>",
201
  unsafe_allow_html=True
202
- ) # Display the response in a styled font
203
  sound_gen(response) # Convert the response to speech and play it
204
 
205
  # Execute the main function
 
15
  import soundfile as sf # For saving audio as .wav files
16
  import sentencepiece # Required by SpeechT5Processor for tokenization
17
 
 
18
  ##########################################
19
  # Streamlit application title and input
20
  ##########################################
 
22
  st.markdown(
23
  "<h1 style='text-align: center; color: #FF5720; font-size: 50px;'>Just Comment</h1>",
24
  unsafe_allow_html=True
25
+ ) # Use HTML and CSS for a custom title design
26
 
27
+ # Display a smaller, gentle subtitle below the title
28
  st.markdown(
29
+ "<h3 style='text-align: center; color: #5D6D7E; font-style: italic;'>I'm listening to you, my friend</h3>",
30
  unsafe_allow_html=True
31
  ) # Use HTML for a friendly and soft-styled subtitle
32
 
33
  # Add a well-designed text area for user input
34
  text = st.text_area(
35
+ "Enter your comment",
36
+ placeholder="Type something here...",
37
+ height=150,
38
  help="Write a comment you would like us to analyze and respond to!" # Provide a helpful tooltip
39
  )
40
 
 
46
  Analyze the dominant emotion in the user's comment using a fine-tuned text classification model.
47
  """
48
  emotion_classifier = pipeline(
49
+ "text-classification",
50
+ model="Thea231/jhartmann_emotion_finetuning",
51
  return_all_scores=True
52
+ ) # Load the fine-tuned text classification model from Hugging Face
53
 
54
  emotion_results = emotion_classifier(user_review)[0] # Perform sentiment analysis on the input text
55
  dominant_emotion = max(emotion_results, key=lambda x: x['score']) # Identify the emotion with the highest confidence
 
133
  )
134
  }
135
 
 
136
  prompt = emotion_prompts.get(
137
+ emotion_label,
138
  f"Neutral feedback: '{user_review}'\n\nWrite a professional and concise response (50-200 words max).\n\nResponse:"
139
+ ) # Default to neutral if emotion is not found
140
 
141
  # Load the tokenizer and language model for response generation
142
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") # Load tokenizer for processing text inputs
143
+ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B") # Load language model for response generation
144
 
145
  inputs = tokenizer(prompt, return_tensors="pt") # Tokenize the input prompt
146
  outputs = model.generate(
147
  **inputs,
148
+ max_new_tokens=300,
149
+ min_length=75, # Ensure concise and complete responses
150
  no_repeat_ngram_size=2, # Avoid repetitive phrases
151
+ temperature=0.7 # Add randomness for more natural responses
152
  )
153
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Decode the generated response
154
+ return response # Return the response
 
 
 
155
 
156
  ##########################################
157
  # Step 3: Text-to-Speech Conversion Function
 
160
  """
161
  Convert the generated response to speech and save it as a .wav file.
162
  """
163
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") # Pre-trained processor for TTS
164
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") # Pre-trained TTS model
165
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") # Vocoder for generating waveforms
166
 
167
+ # Create speaker embedding to match text input
168
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") # Load speaker embeddings
169
+ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Use a default embedding
170
 
171
+ inputs = processor(text=response, return_tensors="pt") # Process text for spectrogram generation
172
+ inputs["input_ids"] = inputs["input_ids"].to(torch.int32) # Match tensor format (fix runtime error)
173
+ spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings) # Generate the spectrogram
174
 
 
175
  with torch.no_grad():
176
+ speech = vocoder(spectrogram) # Convert spectrogram to waveform
 
 
 
177
 
178
+ sf.write("customer_service_response.wav", speech.numpy(), samplerate=16000) # Save as .wav file
179
+ st.audio("customer_service_response.wav", start_time=0) # Embed an auto-playing audio player
180
 
181
  ##########################################
182
  # Main Function
 
186
  Main function to handle sentiment analysis, response generation, and text-to-speech functionalities.
187
  """
188
  if text: # Check if the user has entered a comment
189
+ response = response_gen(text) # Generate the response
190
  st.markdown(
191
+ f"<p style='color:#3498DB; font-size:20px;'>{response}</p>",
192
  unsafe_allow_html=True
193
+ ) # Display the response with styled formatting
194
  sound_gen(response) # Convert the response to speech and play it
195
 
196
  # Execute the main function