File size: 8,037 Bytes
bdb2fb9 47434c8 bdb2fb9 47434c8 bdb2fb9 0c09011 bdb2fb9 0c09011 bdb2fb9 0c09011 bdb2fb9 0c09011 bdb2fb9 623d82f 7101094 bdb2fb9 0c09011 3ea6377 e17ad0e bdb2fb9 3ea6377 bdb2fb9 c655bb6 811fd5e bdb2fb9 606d176 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# https://huggingface.co./tasks/token-classification
# https://huggingface.co./spacy/en_core_web_sm
# pip install https://huggingface.co./spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
import gradio as gr
import os
import time
import openai
import numpy as np
import pandas as pd
import pickle5 as pickle
import spacy
import en_core_web_sm
import plotly.express as px
openai.organization = os.environ.get('ORGANIZATION')
openai.api_key = os.environ.get('API_KEY')
df_phrases_path = './df_phrases.pkl'
df_phrases = pd.read_pickle(df_phrases_path)
# print(df_phrases.shape)
# df_phrases.head(3)
nlp = spacy.load("en_core_web_sm")
def transcript_to_sentences(transcript):
doc = nlp(transcript)
sentences = [ sentence.text for sentence in list(doc.sents) ]
# print(sentences[:3])
return sentences
def calculate_embeddings_with_gpt3(text, engine="text-similarity-davinci-001", interval = 1.5, verbose=True):
if verbose:
print(f'Calculating embedding for {text}...')
time.sleep(interval)
response = openai.Embedding.create(
input=text,
engine=engine
)
embedding = response['data'][0]['embedding']
return embedding
def quantified_classification(transcript, threshold):
df_sentences = pd.DataFrame(columns=['line', 'sentence', 'embedding'])
for idx, sentence in enumerate(transcript_to_sentences(transcript)):
embedding = calculate_embeddings_with_gpt3(sentence)
# Create new row
new_row = {
'line': idx + 1,
'sentence': sentence,
'embedding': embedding
}
df_sentences = df_sentences.append(new_row, ignore_index=True)
# print(df_sentences.shape)
# df_sentences.head()
targets = np.array([ np.array(value[0]) for value in df_phrases[["embedding"]].values ])
# print(f"targets:{targets.shape}")
df_cosines = pd.DataFrame(columns=['line'])
for i, row in df_sentences.iterrows():
line = f'{row["line"]:03}'
# print(f'Calculating cosines for [ {line} ] {row["sentence"][:50]}...')
source = np.array(row["embedding"])
cosine = np.dot(targets,source)/(np.linalg.norm(targets, axis=1)*np.linalg.norm(source))
# Create new row
new_row = dict([(f"Cosine{f'{key:02}'}", value) for key, value in enumerate(cosine.flatten(), 1)])
new_row["line"] = row["line"]
df_cosines = df_cosines.append(new_row, ignore_index=True)
df_cosines['line'] = df_cosines['line'].astype('int')
# print(df_cosines.shape)
# df_cosines.head(3)
df_comparison = df_cosines #[(df_cosines.filter(regex='Cosine') > threshold).any(axis=1)]
# print(df_comparison.shape)
# df_comparison.head(3)
threshold = threshold / 100
df_results = pd.DataFrame(columns=['line', 'sentence', 'phrase', 'category', 'tag', 'similarity'])
for i, row in df_comparison.iterrows():
for n in range(1,64+1):
col = f"Cosine{f'{n:02}'}"
# if row[col] > threshold:
phrase = df_phrases.loc[[ n - 1 ]]
new_row = {
'line': row["line"],
'sentence': df_sentences.at[int(row["line"])-1,"sentence"],
'phrase': df_phrases.at[n-1,"example"],
'category': df_phrases.at[n-1,"category"],
'tag': df_phrases.at[n-1,"label"],
'similarity': row[col]
}
df_results = df_results.append(new_row, ignore_index=True)
df_results['line'] = df_cosines['line'].astype('int')
# print(df_results.shape)
# df_results.head(3)
df_summary = df_results.groupby(['tag'])['similarity'].agg('max').to_frame()
df_summary['ok'] = np.where(df_summary['similarity'] > threshold, True, False)
# df_summary
fig = px.bar(
df_summary,
y='similarity',
color='ok',
color_discrete_map={ True: px.colors.qualitative.Plotly[2], False: px.colors.qualitative.Set2[7] },
text='similarity',
text_auto='.3f',
labels={'tag': 'Category', 'similarity': 'Similarity'},
title = f"{transcript[:200]}..."
)
fig.add_shape( # add a horizontal "target" line
type="line", line_color="salmon", line_width=3, opacity=1, line_dash="dot",
x0=0, x1=1, xref="paper", y0=threshold, y1=threshold, yref="y"
)
fig.update_traces(textfont_size=24, textangle=0, textposition="inside", cliponaxis=False)
fig.update_yaxes(range=[0, 1])
# fig.show()
details = df_results.drop(labels='line',axis=1).sort_values(['tag','similarity'],ascending=[True,False]).groupby('tag').head(3).reset_index() .drop(labels='index',axis=1)
res = df_summary['similarity'].to_dict()
return res, fig, details
# Gradio UI
with gr.Blocks(css=".gradio-container { background-color: white; background-image: url('file=./qc-logo.png'); background-size: 75px 75px; background-repeat: no-repeat; background-position: 0px 0px; }") as demo:
gr.Markdown(f"# {' ' * 8}Transcript classifier with GPT-3")
with gr.Row():
transcript = gr.Textbox(lines=3, label="Transcript", placeholder="Transcript Here...")
with gr.Row():
threshold = gr.Slider(0, 100, 80)
btn = gr.Button(value="Analyze!", variant="primary")
with gr.Row():
label = gr.Label()
plot = gr.Plot()
with gr.Row():
grid = gr.Dataframe(wrap=True)
btn.click(fn=quantified_classification, inputs=[transcript,threshold], outputs=[label,plot,grid])
gr.Examples(
[
[ "Oh, so the quantified platform is one of the most advanced communication intelligence in AI powered coaching systems. And what does that really mean? So, um, communication coaching is something that is typically delivered one on one between a communication coach who has a, uh, a doctorate or a, um, background and experience in teaching people how to be better communicators and how to express themselves effectively. Um, those coaches would work one-on-one with individuals, um, maybe put their information in front of audiences and see how well they respond. And that can be a very costly process as well as a time consuming. And, um, not always backed by the science of what really drives great communication. Thank you very much.", 80 ],
[ "So you can go from where you are today to being exceptional in the way that you communicate and speak. Who's helped most by quantified. Well, everybody communicates all day as part of their jobs. We actually study that 80% of your time at work is spent communicating. So who's helped most anyone that talks to customers, anyone that talks to other team members, anyone that talks to people for a living is going to be helped the most, really the more that you communicate as a critical component of your job, the more you're gonna be helped. Finally, how can quantified have the greatest impact on my organization?", 80 ],
[ "It's hard to find time for managers to coach, and it is hard for us to give feedback. It's hard for people to feel empowered, to work on something that is personal and private in a safe space. Uh, how can it have the greatest impact, uh, customer experience communicating with the customer, spending time spending a lot of time, communicating with other members of your team, internal communication and external communication. Uh, we wanna make you remarkably better. We want to make you extraordinary at that behavior.", 80 ],
[ "So you'll have a good understanding of how you come across a relay back to exactly how you're doing, how you can get better. Um, and coach you using that artificial intelligence. It's able to give you that objective feedback. It's gonna be exactly as if you have the world's best communications coach was sitting, um, there with you in every one of your conversations, telling you how to get better and telling you how to optimize your behavior, who does it help? So literally anyone that has conversations, if you're an entry level person, um, or if you're the most senior executive, you will benefit from our platform, the most powerful group that we can help our customer facing teams, that's sales teams, customer service teams, customer support, and customer success team.", 80 ]
],
[transcript, threshold],
fn=quantified_classification
)
demo.launch(debug=True)
|