Zasha1 commited on
Commit
53f1073
·
verified ·
1 Parent(s): 326936f

Update sentiment_analysis.py

Browse files
Files changed (1) hide show
  1. sentiment_analysis.py +78 -80
sentiment_analysis.py CHANGED
@@ -1,14 +1,16 @@
1
  import os
2
  import json
3
  import time
4
- from speech_recognition import Recognizer, Microphone, AudioData, UnknownValueError, RequestError
 
5
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
6
  from huggingface_hub import login
7
  from product_recommender import ProductRecommender
8
- from objection_handler import load_objections, check_objections # Ensure check_objections is imported
9
  from objection_handler import ObjectionHandler
10
  from env_setup import config
11
  from sentence_transformers import SentenceTransformer
 
12
  from dotenv import load_dotenv
13
 
14
  # Load environment variables
@@ -24,9 +26,6 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name)
24
  tokenizer = AutoTokenizer.from_pretrained(model_name)
25
  sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
26
 
27
- # Speech Recognition Setup
28
- recognizer = Recognizer()
29
-
30
  # Function to analyze sentiment
31
  def preprocess_text(text):
32
  """Preprocess text for better sentiment analysis."""
@@ -37,12 +36,12 @@ def analyze_sentiment(text):
37
  try:
38
  if not text.strip():
39
  return "NEUTRAL", 0.0
40
-
41
  processed_text = preprocess_text(text)
42
  result = sentiment_analyzer(processed_text)[0]
43
-
44
  print(f"Sentiment Analysis Result: {result}")
45
-
46
  # Map raw labels to sentiments
47
  sentiment_map = {
48
  'Very Negative': "NEGATIVE",
@@ -51,14 +50,35 @@ def analyze_sentiment(text):
51
  'Positive': "POSITIVE",
52
  'Very Positive': "POSITIVE"
53
  }
54
-
55
  sentiment = sentiment_map.get(result['label'], "NEUTRAL")
56
  return sentiment, result['score']
57
-
58
  except Exception as e:
59
  print(f"Error in sentiment analysis: {e}")
60
  return "NEUTRAL", 0.5
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  def transcribe_with_chunks(objections_dict):
63
  """Perform real-time transcription with sentiment analysis."""
64
  print("Say 'start listening' to begin transcription. Say 'stop listening' to stop.")
@@ -68,86 +88,64 @@ def transcribe_with_chunks(objections_dict):
68
  chunk_start_time = time.time()
69
 
70
  # Initialize handlers with semantic search capabilities
71
- objection_handler = ObjectionHandler(r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet3.csv")
72
- product_recommender = ProductRecommender(r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet2.csv")
73
 
74
  # Load the embeddings model once
75
  model = SentenceTransformer('all-MiniLM-L6-v2')
76
 
77
  try:
78
- with Microphone() as source:
79
- recognizer.adjust_for_ambient_noise(source)
80
- print("Microphone calibrated. Please speak.")
81
-
82
- while True:
83
- print("Listening for speech...")
84
- try:
85
- audio_data = recognizer.listen(source, timeout=5)
86
- text = recognizer.recognize_google(audio_data)
87
-
88
- if "start listening" in text.lower():
89
- is_listening = True
90
- print("Listening started. Speak into the microphone.")
91
- continue
92
- elif "stop listening" in text.lower():
93
- is_listening = False
94
- print("Listening stopped.")
95
- if current_chunk:
96
- chunk_text = " ".join(current_chunk)
97
- sentiment, score = analyze_sentiment(chunk_text)
98
- chunks.append((chunk_text, sentiment, score))
99
- current_chunk = []
100
- continue
101
-
102
- if is_listening and text.strip():
103
- print(f"Transcription: {text}")
104
- current_chunk.append(text)
105
-
106
- if time.time() - chunk_start_time > 3:
107
- if current_chunk:
108
- chunk_text = " ".join(current_chunk)
109
-
110
- # Always process sentiment
111
- sentiment, score = analyze_sentiment(chunk_text)
112
- chunks.append((chunk_text, sentiment, score))
113
-
114
- # Get objection responses and check similarity score
115
- query_embedding = model.encode([chunk_text])
116
- distances, indices = objection_handler.index.search(query_embedding, 1)
117
-
118
- # If similarity is high enough, show objection response
119
- if distances[0][0] < 1.5: # Threshold for similarity
120
- responses = objection_handler.handle_objection(chunk_text)
121
- if responses:
122
- print("\nSuggested Response:")
123
- for response in responses:
124
- print(f"→ {response}")
125
-
126
- # Get product recommendations and check similarity score
127
- distances, indices = product_recommender.index.search(query_embedding, 1)
128
-
129
- # If similarity is high enough, show recommendations
130
- if distances[0][0] < 1.5: # Threshold for similarity
131
- recommendations = product_recommender.get_recommendations(chunk_text)
132
- if recommendations:
133
- print(f"\nRecommendations for this response:")
134
- for idx, rec in enumerate(recommendations, 1):
135
- print(f"{idx}. {rec}")
136
-
137
- print("\n")
138
- current_chunk = []
139
- chunk_start_time = time.time()
140
- except UnknownValueError:
141
- print("Could not understand the audio.")
142
- except RequestError as e:
143
- print(f"Could not request results from Google Speech Recognition service; {e}")
144
 
145
  except KeyboardInterrupt:
146
  print("\nExiting...")
147
- return chunks
 
148
 
149
  if __name__ == "__main__":
150
- objections_file_path = r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet3.csv"
151
  objections_dict = load_objections(objections_file_path)
152
  transcribed_chunks = transcribe_with_chunks(objections_dict)
153
  print("Final transcriptions and sentiments:", transcribed_chunks)
 
1
  import os
2
  import json
3
  import time
4
+ import numpy as np
5
+ import sounddevice as sd
6
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
7
  from huggingface_hub import login
8
  from product_recommender import ProductRecommender
9
+ from objection_handler import load_objections, check_objections
10
  from objection_handler import ObjectionHandler
11
  from env_setup import config
12
  from sentence_transformers import SentenceTransformer
13
+ from scipy.io.wavfile import write
14
  from dotenv import load_dotenv
15
 
16
  # Load environment variables
 
26
  tokenizer = AutoTokenizer.from_pretrained(model_name)
27
  sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
28
 
 
 
 
29
  # Function to analyze sentiment
30
  def preprocess_text(text):
31
  """Preprocess text for better sentiment analysis."""
 
36
  try:
37
  if not text.strip():
38
  return "NEUTRAL", 0.0
39
+
40
  processed_text = preprocess_text(text)
41
  result = sentiment_analyzer(processed_text)[0]
42
+
43
  print(f"Sentiment Analysis Result: {result}")
44
+
45
  # Map raw labels to sentiments
46
  sentiment_map = {
47
  'Very Negative': "NEGATIVE",
 
50
  'Positive': "POSITIVE",
51
  'Very Positive': "POSITIVE"
52
  }
53
+
54
  sentiment = sentiment_map.get(result['label'], "NEUTRAL")
55
  return sentiment, result['score']
56
+
57
  except Exception as e:
58
  print(f"Error in sentiment analysis: {e}")
59
  return "NEUTRAL", 0.5
60
 
61
+ def record_audio(duration=5, sample_rate=44100):
62
+ """Record audio for a specified duration."""
63
+ print("Recording audio...")
64
+ audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='float32')
65
+ sd.wait() # Wait for the recording to finish
66
+ print("Recording completed.")
67
+ return np.squeeze(audio)
68
+
69
+ def transcribe_audio(audio, sample_rate=44100):
70
+ """Transcribe recorded audio using a speech-to-text API."""
71
+ try:
72
+ # Save audio to a temporary file for transcription
73
+ audio_file = "temp_audio.wav"
74
+ write(audio_file, sample_rate, audio)
75
+ # Call external transcription service (e.g., Whisper, AssemblyAI, or Google)
76
+ transcription = "Example transcription text from audio." # Placeholder
77
+ return transcription
78
+ except Exception as e:
79
+ print(f"Error in audio transcription: {e}")
80
+ return ""
81
+
82
  def transcribe_with_chunks(objections_dict):
83
  """Perform real-time transcription with sentiment analysis."""
84
  print("Say 'start listening' to begin transcription. Say 'stop listening' to stop.")
 
88
  chunk_start_time = time.time()
89
 
90
  # Initialize handlers with semantic search capabilities
91
+ objection_handler = ObjectionHandler("path_to_objections.csv")
92
+ product_recommender = ProductRecommender("path_to_recommendations.csv")
93
 
94
  # Load the embeddings model once
95
  model = SentenceTransformer('all-MiniLM-L6-v2')
96
 
97
  try:
98
+ while True:
99
+ if not is_listening:
100
+ command = input("Enter 'start' to begin listening or 'stop' to quit: ").lower()
101
+ if command == "start":
102
+ is_listening = True
103
+ print("Listening started. Speak into the microphone.")
104
+ continue
105
+ elif command == "stop":
106
+ break
107
+
108
+ # Record and process audio in chunks
109
+ audio_data = record_audio(duration=5)
110
+ text = transcribe_audio(audio_data)
111
+ if text.strip():
112
+ print(f"Transcription: {text}")
113
+ current_chunk.append(text)
114
+
115
+ if time.time() - chunk_start_time > 3:
116
+ if current_chunk:
117
+ chunk_text = " ".join(current_chunk)
118
+
119
+ # Process sentiment
120
+ sentiment, score = analyze_sentiment(chunk_text)
121
+ chunks.append((chunk_text, sentiment, score))
122
+
123
+ # Handle objections and recommendations
124
+ query_embedding = model.encode([chunk_text])
125
+ responses = objection_handler.handle_objection(chunk_text)
126
+ recommendations = product_recommender.get_recommendations(chunk_text)
127
+
128
+ # Print results
129
+ if responses:
130
+ print("\nSuggested Response:")
131
+ for response in responses:
132
+ print(f"→ {response}")
133
+ if recommendations:
134
+ print("\nRecommendations for this response:")
135
+ for idx, rec in enumerate(recommendations, 1):
136
+ print(f"{idx}. {rec}")
137
+
138
+ print("\n")
139
+ current_chunk = []
140
+ chunk_start_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  except KeyboardInterrupt:
143
  print("\nExiting...")
144
+
145
+ return chunks
146
 
147
  if __name__ == "__main__":
148
+ objections_file_path = "path_to_objections.csv"
149
  objections_dict = load_objections(objections_file_path)
150
  transcribed_chunks = transcribe_with_chunks(objections_dict)
151
  print("Final transcriptions and sentiments:", transcribed_chunks)