Commit
·
0c09011
1
Parent(s):
03ceb87
Update app.py
Browse files
app.py
CHANGED
@@ -39,8 +39,8 @@ def calculate_embeddings_with_gpt3(text, engine="text-similarity-davinci-001", i
|
|
39 |
)
|
40 |
embedding = response['data'][0]['embedding']
|
41 |
return embedding
|
42 |
-
|
43 |
-
|
44 |
|
45 |
df_sentences = pd.DataFrame(columns=['line', 'sentence', 'embedding'])
|
46 |
for idx, sentence in enumerate(transcript_to_sentences(transcript)):
|
@@ -60,14 +60,14 @@ def calculate_embeddings_with_gpt3(text, engine="text-similarity-davinci-001", i
|
|
60 |
df_cosines = pd.DataFrame(columns=['line'])
|
61 |
|
62 |
for i, row in df_sentences.iterrows():
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
|
72 |
df_cosines['line'] = df_cosines['line'].astype('int')
|
73 |
# print(df_cosines.shape)
|
@@ -115,13 +115,11 @@ def calculate_embeddings_with_gpt3(text, engine="text-similarity-davinci-001", i
|
|
115 |
title = f"{transcript[:200]}..."
|
116 |
)
|
117 |
fig.add_shape( # add a horizontal "target" line
|
118 |
-
|
119 |
-
|
120 |
)
|
121 |
fig.update_traces(textfont_size=24, textangle=0, textposition="inside", cliponaxis=False)
|
122 |
-
fig.update_yaxes(
|
123 |
-
range=[0, 1]
|
124 |
-
)
|
125 |
# fig.show()
|
126 |
|
127 |
details = df_results.drop(labels='line',axis=1).sort_values(['tag','similarity'],ascending=True,False]).groupby('tag').head(3).reset_index().drop(labels='index',axis=1)
|
@@ -130,6 +128,8 @@ def calculate_embeddings_with_gpt3(text, engine="text-similarity-davinci-001", i
|
|
130 |
|
131 |
return res, fig, details
|
132 |
|
|
|
|
|
133 |
with gr.Blocks(css=".gradio-container { background-color: white; background-image: url('file=,/qc-logo.png'); background-size: 75px 75px; background-repeat: no-repeat; background-position: 0px 0px; }") as demo:
|
134 |
gr.Markdown("# Transcript classifier")
|
135 |
with gr.Row():
|
|
|
39 |
)
|
40 |
embedding = response['data'][0]['embedding']
|
41 |
return embedding
|
42 |
+
|
43 |
+
def quantified_classification(transcript, threshold):
|
44 |
|
45 |
df_sentences = pd.DataFrame(columns=['line', 'sentence', 'embedding'])
|
46 |
for idx, sentence in enumerate(transcript_to_sentences(transcript)):
|
|
|
60 |
df_cosines = pd.DataFrame(columns=['line'])
|
61 |
|
62 |
for i, row in df_sentences.iterrows():
|
63 |
+
line = f'{row["line"]:03}'
|
64 |
+
# print(f'Calculating cosines for [ {line} ] {row["sentence"][:50]}...')
|
65 |
+
source = np.array(row["embedding"])
|
66 |
+
cosine = np.dot(targets,source)/(np.linalg.norm(targets, axis=1)*np.linalg.norm(source))
|
67 |
+
# Create new row
|
68 |
+
new_row = dict([(f"Cosine{f'{key:02}'}", value) for key, value in enumerate(cosine.flatten(), 1)])
|
69 |
+
new_row["line"] = row["line"]
|
70 |
+
df_cosines = df_cosines.append(new_row, ignore_index=True)
|
71 |
|
72 |
df_cosines['line'] = df_cosines['line'].astype('int')
|
73 |
# print(df_cosines.shape)
|
|
|
115 |
title = f"{transcript[:200]}..."
|
116 |
)
|
117 |
fig.add_shape( # add a horizontal "target" line
|
118 |
+
type="line", line_color="salmon", line_width=3, opacity=1, line_dash="dot",
|
119 |
+
x0=0, x1=1, xref="paper", y0=threshold, y1=threshold, yref="y"
|
120 |
)
|
121 |
fig.update_traces(textfont_size=24, textangle=0, textposition="inside", cliponaxis=False)
|
122 |
+
fig.update_yaxes(range=[0, 1])
|
|
|
|
|
123 |
# fig.show()
|
124 |
|
125 |
details = df_results.drop(labels='line',axis=1).sort_values(['tag','similarity'],ascending=True,False]).groupby('tag').head(3).reset_index().drop(labels='index',axis=1)
|
|
|
128 |
|
129 |
return res, fig, details
|
130 |
|
131 |
+
# Gradio UI
|
132 |
+
|
133 |
with gr.Blocks(css=".gradio-container { background-color: white; background-image: url('file=,/qc-logo.png'); background-size: 75px 75px; background-repeat: no-repeat; background-position: 0px 0px; }") as demo:
|
134 |
gr.Markdown("# Transcript classifier")
|
135 |
with gr.Row():
|