Create functions for each visualization

#18
app.py CHANGED
@@ -30,15 +30,20 @@ def example_display(input):
30
  display = False
31
  return [gr.update(visible=display),gr.update(visible=display)]
32
 
33
- css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;}"
34
 
35
  with gr.Blocks(css=css, delete_cache=(3600,3600)) as demo:
36
  title = gr.HTML("<h1 style='text-align:center;'>Virtual Data Analyst</h1>")
37
- description = gr.HTML("""<p style='text-align:center;'>Upload a data file and chat with our virtual data analyst
38
- to get insights on your data set. Currently accepts CSV, TSV, TXT, XLS, XLSX, XML, and JSON files.
39
- Can now generate charts and graphs! Can run linear regressions!
40
  Try a sample file to get started!</p>
41
- <p style='text-align:center;'>This tool is under active development. If you experience bugs with use,
 
 
 
 
 
 
42
  open a discussion in the community tab and I will respond.</p>""")
43
  example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
44
  example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
 
30
  display = False
31
  return [gr.update(visible=display),gr.update(visible=display)]
32
 
33
+ css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;} .padding{padding:0;}"
34
 
35
  with gr.Blocks(css=css, delete_cache=(3600,3600)) as demo:
36
  title = gr.HTML("<h1 style='text-align:center;'>Virtual Data Analyst</h1>")
37
+ description = gr.HTML("""<p style='text-align:center;'>A helpful tool for data analysis, visualizations, regressions, and more.
38
+ Upload a data file and chat with our virtual data analyst to get insights on your data set.
 
39
  Try a sample file to get started!</p>
40
+ <div style="margin:auto;max-width: 500px;">
41
+ <p style="margin:0;font-style:italic;">Currently accepts CSV, TSV, TXT, XLS, XLSX, XML, and JSON files.</p>
42
+ <p style="margin:0;font-style:italic;">Can run SQL queries, linear regressions, and analyze the results.</p>
43
+ <p style="margin:0;font-style:italic;">Can generate scatter plots, line charts, pie charts, bar graphs, histograms, time series, and more.
44
+ New visualizations types added regularly.</p>
45
+ </div>
46
+ <p style='text-align:center;'>This application is under active development. If you experience bugs with use,
47
  open a discussion in the community tab and I will respond.</p>""")
48
  example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
49
  example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
functions/__init__.py CHANGED
@@ -1,6 +1,9 @@
1
  from .sqlite_functions import SQLiteQuery, sqlite_query_func
2
- from .chart_functions import chart_generation_func, table_generation_func
 
3
  from .chat_functions import example_question_generator, chatbot_with_fc
4
  from .stat_functions import regression_func
5
 
6
- __all__ = ["SQLiteQuery","sqlite_query_func","chart_generation_func","table_generation_func","regression_func","example_question_generator","chatbot_with_fc"]
 
 
 
1
  from .sqlite_functions import SQLiteQuery, sqlite_query_func
2
+ from .chart_functions import table_generation_func, scatter_chart_generation_func, \
3
+ line_chart_generation_func, bar_chart_generation_func, pie_chart_generation_func, histogram_generation_func, scatter_chart_fig
4
  from .chat_functions import example_question_generator, chatbot_with_fc
5
  from .stat_functions import regression_func
6
 
7
+ __all__ = ["SQLiteQuery","sqlite_query_func","table_generation_func","scatter_chart_generation_func",
8
+ "line_chart_generation_func","bar_chart_generation_func","regression_func", "pie_chart_generation_func", "histogram_generation_func",
9
+ "scatter_chart_fig","example_question_generator","chatbot_with_fc"]
functions/chart_functions.py CHANGED
@@ -12,92 +12,325 @@ load_dotenv()
12
 
13
  root_url = os.getenv("ROOT_URL")
14
 
15
- def chart_generation_func(data: List[dict], x_column: str, y_column: str, graph_type: str, session_hash: str, layout: List[dict]=[{}], category: str=""):
16
- print("CHART GENERATION")
17
- print(data)
18
- print(graph_type)
19
- print(x_column)
20
- print(y_column)
21
- print(category)
22
- print(layout)
23
- try:
24
- dir_path = TEMP_DIR / str(session_hash)
25
- chart_path = f'{dir_path}/chart.html'
26
- csv_query_path = f'{dir_path}/query.csv'
27
 
28
- df = pd.read_csv(csv_query_path)
 
 
 
29
 
30
- #setting up the plotly express objects
31
- if graph_type == "bar":
32
- initial_graph = px.bar(df, x=x_column, y=y_column, barmode="group")
33
- elif graph_type == "scatter":
34
- if category in df.columns:
35
- initial_graph = px.scatter(df, x=x_column, y=y_column, color=category)
36
- else:
37
- initial_graph = px.scatter(df, x=x_column, y=y_column)
38
- elif graph_type == "line":
39
- if category in df.columns:
40
- initial_graph = px.line(df, x=x_column, y=y_column, color=category)
41
- else:
42
- initial_graph = px.line(df, x=x_column, y=y_column)
43
- elif graph_type == "pie":
44
- initial_graph = px.pie(df, x=x_column, y=y_column)
45
-
46
- fig = initial_graph.to_dict()
47
-
48
- #Processing data to account for variation from LLM
49
- data_list = []
50
- layout_dict = {}
51
-
52
- if isinstance(data, list):
53
- data_list = data
54
- else:
55
- data_list.append(data)
56
-
57
- data_dict = {}
58
- for data_obj in data_list:
59
- if isinstance(data_obj, str):
60
- data_obj = data_obj.replace("\n", "")
61
- if not data_obj.startswith('{') or not data_obj.endswith('}'):
62
- data_obj = "{" + data_obj + "}"
63
- data_dict = ast.literal_eval(data_obj)
64
- else:
65
- data_dict = data_obj
66
-
67
- if layout and isinstance(layout, list):
68
- layout_obj = layout[0]
69
- else:
70
- layout_obj = layout
71
-
72
- if layout_obj and isinstance(layout_obj, str):
73
- layout_dict = ast.literal_eval(layout_obj)
74
- else:
75
- layout_dict = layout_obj
76
-
77
- #Applying stylings and settings generated from LLM
78
- if layout_dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  fig["layout"] = layout_dict
80
 
81
- for key, value in data_dict.items():
82
- if key not in ["x","y","type"]:
83
  for data_item in fig["data"]:
84
  data_item[key] = value
85
-
86
- pio.write_html(fig, chart_path, full_html=False)
87
 
88
- chart_url = f'{root_url}/gradio_api/file/temp/{session_hash}/chart.html'
89
 
90
- iframe = '<div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + chart_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'
91
 
92
- return {"reply": iframe}
93
-
94
- except Exception as e:
95
- print("CHART ERROR")
96
  print(e)
97
- reply = f"""There was an error generating the Plotly Chart from {x_column}, {y_column}, {graph_type}, and {layout}
98
- The error is {e},
99
- You should probably try again.
100
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  return {"reply": reply}
102
 
103
  def table_generation_func(session_hash):
 
12
 
13
  root_url = os.getenv("ROOT_URL")
14
 
15
+ def llm_chart_data_scrub(data, layout):
16
+ #Processing data to account for variation from LLM
17
+ data_list = []
18
+ layout_dict = {}
 
 
 
 
 
 
 
 
19
 
20
+ if isinstance(data, list):
21
+ data_list = data
22
+ else:
23
+ data_list.append(data)
24
 
25
+ data_dict = {}
26
+ for data_obj in data_list:
27
+ if isinstance(data_obj, str):
28
+ data_obj = data_obj.replace("\n", "")
29
+ if not data_obj.startswith('{') or not data_obj.endswith('}'):
30
+ data_obj = "{" + data_obj + "}"
31
+ data_dict = ast.literal_eval(data_obj)
32
+ else:
33
+ data_dict = data_obj
34
+
35
+ if layout and isinstance(layout, list):
36
+ layout_obj = layout[0]
37
+ else:
38
+ layout_obj = layout
39
+
40
+ if layout_obj and isinstance(layout_obj, str):
41
+ layout_dict = ast.literal_eval(layout_obj)
42
+ else:
43
+ layout_dict = layout_obj
44
+
45
+ return data_dict, layout_dict
46
+
47
+ def scatter_chart_fig(df, x_column: List[str], y_column: str, category: str="", trendline: str="",
48
+ trendline_options: List[dict]=[{}], marginal_x: str="", marginal_y: str=""):
49
+
50
+ function_args = {"data_frame":df, "x":x_column, "y":y_column}
51
+
52
+ if category:
53
+ function_args["color"] = category
54
+ if trendline:
55
+ function_args["trendline"] = trendline
56
+ if marginal_x:
57
+ function_args["marginal_x"] = marginal_x
58
+ if marginal_y:
59
+ function_args["marginal_y"] = marginal_y
60
+ if trendline_options:
61
+ trendline_options_dict = {}
62
+ if trendline_options and isinstance(trendline_options, list):
63
+ trendline_options_obj = trendline_options[0]
64
+ else:
65
+ trendline_options_obj = trendline_options
66
+
67
+ if trendline_options_obj and isinstance(trendline_options_obj, str):
68
+ trendline_options_dict = ast.literal_eval(trendline_options_obj)
69
+ else:
70
+ trendline_options_dict = trendline_options_obj
71
+ function_args["trendline_options"] = trendline_options_dict
72
+
73
+ fig = px.scatter(**function_args)
74
+
75
+ return fig
76
+
77
+ def scatter_chart_generation_func(x_column: List[str], y_column: str, session_hash, data: List[dict]=[{}], layout: List[dict]=[{}],
78
+ category: str="", trendline: str="", trendline_options: List[dict]=[{}], marginal_x: str="", marginal_y: str=""):
79
+ try:
80
+ dir_path = TEMP_DIR / str(session_hash)
81
+ chart_path = f'{dir_path}/chart.html'
82
+ csv_query_path = f'{dir_path}/query.csv'
83
+
84
+ df = pd.read_csv(csv_query_path)
85
+
86
+ initial_graph = scatter_chart_fig(df, x_column=x_column, y_column=y_column,
87
+ category=category, trendline=trendline, trendline_options=trendline_options,
88
+ marginal_x=marginal_x, marginal_y=marginal_y)
89
+
90
+ fig = initial_graph.to_dict()
91
+
92
+ data_dict,layout_dict = llm_chart_data_scrub(data,layout)
93
+
94
+ print(data_dict)
95
+ print(layout_dict)
96
+
97
+ #Applying stylings and settings generated from LLM
98
+ if layout_dict:
99
  fig["layout"] = layout_dict
100
 
101
+ for key, value in data_dict.items():
102
+ if key not in ["x","y","type"]:
103
  for data_item in fig["data"]:
104
  data_item[key] = value
105
+
106
+ pio.write_html(fig, chart_path, full_html=False)
107
 
108
+ chart_url = f'{root_url}/gradio_api/file/temp/{session_hash}/chart.html'
109
 
110
+ iframe = '<div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + chart_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'
111
 
112
+ return {"reply": iframe}
113
+
114
+ except Exception as e:
115
+ print("SCATTER PLOT ERROR")
116
  print(e)
117
+ reply = f"""There was an error generating the Plotly Scatter Plot from {x_column}, {y_column}, {data}, and {layout}
118
+ The error is {e},
119
+ You should probably try again.
120
+ """
121
+ return {"reply": reply}
122
+
123
+ def line_chart_generation_func(data: List[dict], x_column: str, y_column: str, session_hash, layout: List[dict]=[{}],
124
+ category: str=""):
125
+ try:
126
+ dir_path = TEMP_DIR / str(session_hash)
127
+ chart_path = f'{dir_path}/chart.html'
128
+ csv_query_path = f'{dir_path}/query.csv'
129
+
130
+ df = pd.read_csv(csv_query_path)
131
+
132
+ function_args = {"data_frame":df, "x":x_column, "y":y_column}
133
+
134
+ if category:
135
+ function_args["color"] = category
136
+
137
+ initial_graph = px.line(**function_args)
138
+
139
+ fig = initial_graph.to_dict()
140
+
141
+ data_dict,layout_dict = llm_chart_data_scrub(data,layout)
142
+
143
+ print(data_dict)
144
+ print(layout_dict)
145
+
146
+ #Applying stylings and settings generated from LLM
147
+ if layout_dict:
148
+ fig["layout"] = layout_dict
149
+
150
+ for key, value in data_dict.items():
151
+ if key not in ["x","y","type"]:
152
+ for data_item in fig["data"]:
153
+ data_item[key] = value
154
+
155
+ print(fig)
156
+
157
+ pio.write_html(fig, chart_path, full_html=False)
158
+
159
+ chart_url = f'{root_url}/gradio_api/file/temp/{session_hash}/chart.html'
160
+
161
+ iframe = '<div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + chart_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'
162
+
163
+ return {"reply": iframe}
164
+
165
+ except Exception as e:
166
+ print("LINE CHART ERROR")
167
+ print(e)
168
+ reply = f"""There was an error generating the Plotly Line Chart from {x_column}, {y_column}, {data}, and {layout}
169
+ The error is {e},
170
+ You should probably try again.
171
+ """
172
+ return {"reply": reply}
173
+
174
+ def bar_chart_generation_func(data: List[dict], x_column: str, y_column: str, session_hash, layout: List[dict]=[{}],
175
+ category: str="", facet_row: str="", facet_col: str=""):
176
+ try:
177
+ dir_path = TEMP_DIR / str(session_hash)
178
+ chart_path = f'{dir_path}/chart.html'
179
+ csv_query_path = f'{dir_path}/query.csv'
180
+
181
+ df = pd.read_csv(csv_query_path)
182
+
183
+ function_args = {"data_frame":df, "x":x_column, "y":y_column}
184
+
185
+ if category:
186
+ function_args["color"] = category
187
+ if facet_row:
188
+ function_args["facet_row"] = facet_row
189
+ if facet_col:
190
+ function_args["facet_col"] = facet_col
191
+
192
+ initial_graph = px.bar(**function_args)
193
+
194
+ fig = initial_graph.to_dict()
195
+
196
+ data_dict,layout_dict = llm_chart_data_scrub(data,layout)
197
+
198
+ print(data_dict)
199
+ print(layout_dict)
200
+
201
+ #Applying stylings and settings generated from LLM
202
+ if layout_dict:
203
+ fig["layout"] = layout_dict
204
+
205
+ for key, value in data_dict.items():
206
+ if key not in ["x","y","type"]:
207
+ for data_item in fig["data"]:
208
+ data_item[key] = value
209
+
210
+ print(fig)
211
+
212
+ pio.write_html(fig, chart_path, full_html=False)
213
+
214
+ chart_url = f'{root_url}/gradio_api/file/temp/{session_hash}/chart.html'
215
+
216
+ iframe = '<div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + chart_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'
217
+
218
+ return {"reply": iframe}
219
+
220
+ except Exception as e:
221
+ print("BAR CHART ERROR")
222
+ print(e)
223
+ reply = f"""There was an error generating the Plotly Bar Chart from {x_column}, {y_column}, {data}, and {layout}
224
+ The error is {e},
225
+ You should probably try again.
226
+ """
227
+ return {"reply": reply}
228
+
229
+ def pie_chart_generation_func(data: List[dict], values: str, names: str, session_hash, layout: List[dict]=[{}]):
230
+ try:
231
+ dir_path = TEMP_DIR / str(session_hash)
232
+ chart_path = f'{dir_path}/chart.html'
233
+ csv_query_path = f'{dir_path}/query.csv'
234
+
235
+ df = pd.read_csv(csv_query_path)
236
+
237
+ function_args = {"data_frame":df, "values":values, "names":names}
238
+
239
+ initial_graph = px.pie(**function_args)
240
+
241
+ fig = initial_graph.to_dict()
242
+
243
+ data_dict,layout_dict = llm_chart_data_scrub(data,layout)
244
+
245
+ print(data_dict)
246
+ print(layout_dict)
247
+
248
+ #Applying stylings and settings generated from LLM
249
+ if layout_dict:
250
+ fig["layout"] = layout_dict
251
+
252
+ for key, value in data_dict.items():
253
+ if key not in ["x","y","type"]:
254
+ for data_item in fig["data"]:
255
+ data_item[key] = value
256
+
257
+ print(fig)
258
+
259
+ pio.write_html(fig, chart_path, full_html=False)
260
+
261
+ chart_url = f'{root_url}/gradio_api/file/temp/{session_hash}/chart.html'
262
+
263
+ iframe = '<div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + chart_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'
264
+
265
+ return {"reply": iframe}
266
+
267
+ except Exception as e:
268
+ print("PIE CHART ERROR")
269
+ print(e)
270
+ reply = f"""There was an error generating the Plotly Pie Chart from {values}, {names}, {data}, and {layout}
271
+ The error is {e},
272
+ You should probably try again.
273
+ """
274
+ return {"reply": reply}
275
+
276
+ def histogram_generation_func(x_column: str, session_hash, y_column: str="", data: List[dict]=[{}], layout: List[dict]=[{}], histnorm: str="", category: str="",
277
+ histfunc: str=""):
278
+ try:
279
+ dir_path = TEMP_DIR / str(session_hash)
280
+ chart_path = f'{dir_path}/chart.html'
281
+ csv_query_path = f'{dir_path}/query.csv'
282
+
283
+ df = pd.read_csv(csv_query_path)
284
+
285
+ print(df)
286
+ print(x_column)
287
+
288
+ function_args = {"data_frame":df, "x":x_column}
289
+
290
+ if y_column:
291
+ function_args["y"] = y_column
292
+ if histnorm:
293
+ function_args["histnorm"] = histnorm
294
+ if category:
295
+ function_args["color"] = category
296
+ if histfunc:
297
+ function_args["histfunc"] = histfunc
298
+
299
+ initial_graph = px.histogram(**function_args)
300
+
301
+ fig = initial_graph.to_dict()
302
+
303
+ data_dict,layout_dict = llm_chart_data_scrub(data,layout)
304
+
305
+ print(data_dict)
306
+ print(layout_dict)
307
+
308
+ #Applying stylings and settings generated from LLM
309
+ if layout_dict:
310
+ fig["layout"] = layout_dict
311
+
312
+ for key, value in data_dict.items():
313
+ if key not in ["x","y","type"]:
314
+ for data_item in fig["data"]:
315
+ data_item[key] = value
316
+
317
+ print(fig)
318
+
319
+ pio.write_html(fig, chart_path, full_html=False)
320
+
321
+ chart_url = f'{root_url}/gradio_api/file/temp/{session_hash}/chart.html'
322
+
323
+ iframe = '<div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + chart_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'
324
+
325
+ return {"reply": iframe}
326
+
327
+ except Exception as e:
328
+ print("HISTOGRAM ERROR")
329
+ print(e)
330
+ reply = f"""There was an error generating the Plotly Histogram from {x_column}.
331
+ The error is {e},
332
+ You should probably try again.
333
+ """
334
  return {"reply": reply}
335
 
336
  def table_generation_func(session_hash):
functions/chat_functions.py CHANGED
@@ -36,10 +36,15 @@ def example_question_generator(session_hash):
36
  return example_response["replies"][0].text
37
 
38
  def chatbot_with_fc(message, history, session_hash):
39
- from functions import sqlite_query_func, chart_generation_func, table_generation_func, regression_func
 
40
  import tools
41
 
42
- available_functions = {"sql_query_func": sqlite_query_func, "chart_generation_func": chart_generation_func, "table_generation_func":table_generation_func, "regression_func":regression_func }
 
 
 
 
43
 
44
  if message_dict[session_hash] != None:
45
  message_dict[session_hash].append(ChatMessage.from_user(message))
@@ -47,8 +52,12 @@ def chatbot_with_fc(message, history, session_hash):
47
  messages = [
48
  ChatMessage.from_system(
49
  """You are a helpful and knowledgeable agent who has access to an SQLite database which has a table called 'data_source'.
50
- You also have access to a chart function that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate charts and graphs and returns an iframe that we can display in our chat window.
51
  You also have access to a function, called table_generation_func, that builds table formatted html.
 
 
 
 
 
52
  You also have access to a linear regression function, called regression_func, that can take a query.csv file generated from our sql query and a list of column names for our independent and dependent variables and return a regression data string and a regression chart which is returned as an iframe."""
53
  )
54
  ]
 
36
  return example_response["replies"][0].text
37
 
38
  def chatbot_with_fc(message, history, session_hash):
39
+ from functions import sqlite_query_func, table_generation_func, regression_func, scatter_chart_generation_func, \
40
+ line_chart_generation_func,bar_chart_generation_func,pie_chart_generation_func,histogram_generation_func
41
  import tools
42
 
43
+ available_functions = {"sql_query_func": sqlite_query_func,"table_generation_func":table_generation_func,
44
+ "line_chart_generation_func":line_chart_generation_func,"bar_chart_generation_func":bar_chart_generation_func,
45
+ "scatter_chart_generation_func":scatter_chart_generation_func, "pie_chart_generation_func":pie_chart_generation_func,
46
+ "histogram_generation_func":histogram_generation_func,
47
+ "regression_func":regression_func }
48
 
49
  if message_dict[session_hash] != None:
50
  message_dict[session_hash].append(ChatMessage.from_user(message))
 
52
  messages = [
53
  ChatMessage.from_system(
54
  """You are a helpful and knowledgeable agent who has access to an SQLite database which has a table called 'data_source'.
 
55
  You also have access to a function, called table_generation_func, that builds table formatted html.
56
+ You also have access to a scatter plot function, called scatter_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a scatter plot and returns an iframe that we can display in our chat window.
57
+ You also have access to a line chart function, called line_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a line chart and returns an iframe that we can display in our chat window.
58
+ You also have access to a bar graph function, called line_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a bar graph and returns an iframe that we can display in our chat window.
59
+ You also have access to a pie chart function, called pie_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a pie chart and returns an iframe that we can display in our chat window.
60
+ You also have access to a histogram function, called histogram_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a histogram and returns an iframe that we can display in our chat window.
61
  You also have access to a linear regression function, called regression_func, that can take a query.csv file generated from our sql query and a list of column names for our independent and dependent variables and return a regression data string and a regression chart which is returned as an iframe."""
62
  )
63
  ]
functions/stat_functions.py CHANGED
@@ -5,16 +5,14 @@ from utils import TEMP_DIR
5
  import plotly.express as px
6
  import plotly.io as pio
7
  import os
 
8
  from dotenv import load_dotenv
9
 
10
  load_dotenv()
11
 
12
  root_url = os.getenv("ROOT_URL")
13
 
14
- def basic_stats_function(data_list: List[str]):
15
- return
16
-
17
- def regression_func(independent_variables: List[str], dependent_variable: str, session_hash, category: str='', regression_type: str="ols"):
18
  print("LINEAR REGRESSION CALCULATION")
19
  print(independent_variables)
20
  print(dependent_variable)
@@ -26,9 +24,11 @@ def regression_func(independent_variables: List[str], dependent_variable: str, s
26
  df = pd.read_csv(csv_query_path)
27
 
28
  if category in df.columns:
29
- fig = px.scatter(df, x=independent_variables, y=dependent_variable, color=category, trendline="ols")
 
30
  else:
31
- fig = px.scatter(df, x=independent_variables, y=dependent_variable, trendline="ols")
 
32
 
33
  pio.write_html(fig, chart_path, full_html=False)
34
 
 
5
  import plotly.express as px
6
  import plotly.io as pio
7
  import os
8
+ from functions import scatter_chart_fig
9
  from dotenv import load_dotenv
10
 
11
  load_dotenv()
12
 
13
  root_url = os.getenv("ROOT_URL")
14
 
15
+ def regression_func(independent_variables: List[str], dependent_variable: str, session_hash, category: str=''):
 
 
 
16
  print("LINEAR REGRESSION CALCULATION")
17
  print(independent_variables)
18
  print(dependent_variable)
 
24
  df = pd.read_csv(csv_query_path)
25
 
26
  if category in df.columns:
27
+ fig = scatter_chart_fig(df=df, x_column=independent_variables,y_column=dependent_variable,
28
+ category=category,trendline="ols")
29
  else:
30
+ fig = scatter_chart_fig(df=df,x_column=independent_variables,y_column=dependent_variable,
31
+ trendline="ols")
32
 
33
  pio.write_html(fig, chart_path, full_html=False)
34
 
tools.py CHANGED
@@ -12,14 +12,16 @@ def tools_call(session_hash):
12
  cur.close()
13
  connection.close()
14
 
 
 
15
  return [
16
  {
17
  "type": "function",
18
  "function": {
19
  "name": "sql_query_func",
20
- "description": f"""This a tool useful to query a SQLite table called 'data_source' with the following Columns: {columns}.
21
- This function also saves the results of the query to csv file called query.csv. This is useful when query results are too large to process
22
- or need to be used in an another function.""",
23
  "parameters": {
24
  "type": "object",
25
  "properties": {
@@ -38,8 +40,8 @@ def tools_call(session_hash):
38
  {
39
  "type": "function",
40
  "function": {
41
- "name": "chart_generation_func",
42
- "description": f"""This a chart generation tool useful to generate charts and graphs from queried data from our SQL table called 'data_source.
43
  The data values will come from the columns of our query.csv (the 'x' and 'y' values of each graph) file but the layout section of the plotly dictionary objects will be generated by you.
44
  Returns an iframe string which will be displayed inline in our chat window. Do not edit the string returned
45
  from the chart_generation_func function in any way and display it fully to the user in the chat window. You can add your own text supplementary
@@ -58,8 +60,9 @@ def tools_call(session_hash):
58
  }
59
  },
60
  "x_column": {
61
- "type": "string",
62
- "description": f"""The column in our query.csv file that contain the x values of the graph.""",
 
63
  "items": {
64
  "type": "string",
65
  }
@@ -78,16 +81,45 @@ def tools_call(session_hash):
78
  "type": "string",
79
  }
80
  },
81
- "graph_type": {
82
  "type": "string",
83
- "description": f"""The type of plotly graph we wish to generate.
84
- This graph_type value can be one of ['bar','scatter','line','pie'].
85
  Do not send any values outside of this array as the function will fail.
86
  Infer this from the user's message.""",
87
  "items": {
88
  "type": "string",
89
  }
90
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  "layout": {
92
  "type": "array",
93
  "description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
@@ -97,11 +129,263 @@ def tools_call(session_hash):
97
  }
98
  }
99
  },
100
- "required": ["graph_type","x_column","y_column","layout"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  },
102
  },
103
  },
104
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  "type": "function",
106
  "function": {
107
  "name": "table_generation_func",
@@ -150,14 +434,7 @@ def tools_call(session_hash):
150
  "items": {
151
  "type": "string",
152
  }
153
- },
154
- "regression_type": {
155
- "type": "string",
156
- "description": f"""A parameter that specifies the type of regression being used from the trendline options that plotly offers. Defaults to 'ols'.""",
157
- "items": {
158
- "type": "string",
159
- }
160
- },
161
  },
162
  "required": ["independent_variables","dependent_variable"],
163
  },
 
12
  cur.close()
13
  connection.close()
14
 
15
+ column_string = (columns[:625] + '..') if len(columns) > 625 else columns
16
+
17
  return [
18
  {
19
  "type": "function",
20
  "function": {
21
  "name": "sql_query_func",
22
+ "description": f"""This is a tool useful to query a SQLite table called 'data_source' with the following Columns: {column_string}.
23
+ There may also be more columns in the table if the number of columns is too large to process.
24
+ This function also saves the results of the query to csv file called query.csv.""",
25
  "parameters": {
26
  "type": "object",
27
  "properties": {
 
40
  {
41
  "type": "function",
42
  "function": {
43
+ "name": "scatter_chart_generation_func",
44
+ "description": f"""This is a scatter plot generation tool useful to generate scatter plots from queried data from our SQL table called 'data_source'.
45
  The data values will come from the columns of our query.csv (the 'x' and 'y' values of each graph) file but the layout section of the plotly dictionary objects will be generated by you.
46
  Returns an iframe string which will be displayed inline in our chat window. Do not edit the string returned
47
  from the chart_generation_func function in any way and display it fully to the user in the chat window. You can add your own text supplementary
 
60
  }
61
  },
62
  "x_column": {
63
+ "type": "array",
64
+ "description": f"""An array of strings that correspond to the the columns in our query.csv file that contain the x values of the graph. There can be more than one column
65
+ that can each be plotted against the y_column, if needed.""",
66
  "items": {
67
  "type": "string",
68
  }
 
81
  "type": "string",
82
  }
83
  },
84
+ "trendline": {
85
  "type": "string",
86
+ "description": f"""An optional field to specify the type of plotly trendline we wish to use in the scatter plot.
87
+ This trendline value can be one of ['ols','lowess','rolling','ewm','expanding'].
88
  Do not send any values outside of this array as the function will fail.
89
  Infer this from the user's message.""",
90
  "items": {
91
  "type": "string",
92
  }
93
  },
94
+ "trendline_options": {
95
+ "type": "array",
96
+ "description": """An array containing a dictionary that contains the 'trendline_options' portion of the plotly chart generation.
97
+ The 'lowess', 'rolling', and 'ewm' options require trendline_options to be included.
98
+ The array must contain a dictionary, any other format will not work.""",
99
+ "items": {
100
+ "type": "string",
101
+ }
102
+ },
103
+ "marginal_x": {
104
+ "type": "string",
105
+ "description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the x axis.
106
+ This marginal_x value can be one of ['histogram','rug','box','violin'].
107
+ Do not send any values outside of this array as the function will fail.
108
+ Infer this from the user's message.""",
109
+ "items": {
110
+ "type": "string",
111
+ }
112
+ },
113
+ "marginal_y": {
114
+ "type": "string",
115
+ "description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the y axis.
116
+ This marginal_y value can be one of ['histogram','rug','box','violin'].
117
+ Do not send any values outside of this array as the function will fail.
118
+ Infer this from the user's message.""",
119
+ "items": {
120
+ "type": "string",
121
+ }
122
+ },
123
  "layout": {
124
  "type": "array",
125
  "description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
 
129
  }
130
  }
131
  },
132
+ "required": ["x_column","y_column"],
133
+ },
134
+ },
135
+ },
136
+ {
137
+ "type": "function",
138
+ "function": {
139
+ "name": "line_chart_generation_func",
140
+ "description": f"""This is a line chart generation tool useful to generate line charts from queried data from our SQL table called 'data_source'.
141
+ The data values will come from the columns of our query.csv (the 'x' and 'y' values of each graph) file but the layout section of the plotly dictionary objects will be generated by you.
142
+ Returns an iframe string which will be displayed inline in our chat window. Do not edit the string returned
143
+ from the chart_generation_func function in any way and display it fully to the user in the chat window. You can add your own text supplementary
144
+ to it for context if desired.""",
145
+ "parameters": {
146
+ "type": "object",
147
+ "properties": {
148
+ "data": {
149
+ "type": "array",
150
+ "description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
151
+ The array must contain a dictionary, any other format will not work.
152
+ Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
153
+ Infer this from the user's message.""",
154
+ "items": {
155
+ "type": "string",
156
+ }
157
+ },
158
+ "x_column": {
159
+ "type": "string",
160
+ "description": f"""The column in our query.csv file that contain the x values of the graph.""",
161
+ "items": {
162
+ "type": "string",
163
+ }
164
+ },
165
+ "y_column": {
166
+ "type": "string",
167
+ "description": f"""The column in our query.csv file that contain the y values of the graph.""",
168
+ "items": {
169
+ "type": "string",
170
+ }
171
+ },
172
+ "category": {
173
+ "type": "string",
174
+ "description": f"""An optional column in our query.csv file that contain a parameter that will define the category for the data.""",
175
+ "items": {
176
+ "type": "string",
177
+ }
178
+ },
179
+ "layout": {
180
+ "type": "array",
181
+ "description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
182
+ The array must contain a dictionary, any other format will not work.""",
183
+ "items": {
184
+ "type": "string",
185
+ }
186
+ }
187
+ },
188
+ "required": ["x_column","y_column","layout"],
189
+ },
190
+ },
191
+ },
192
+ {
193
+ "type": "function",
194
+ "function": {
195
+ "name": "bar_chart_generation_func",
196
+ "description": f"""This is a bar chart generation tool useful to generate line charts from queried data from our SQL table called 'data_source'.
197
+ The data values will come from the columns of our query.csv (the 'x' and 'y' values of each graph) file but the layout section of the plotly dictionary objects will be generated by you.
198
+ Returns an iframe string which will be displayed inline in our chat window. Do not edit the string returned
199
+ from the chart_generation_func function in any way and display it fully to the user in the chat window. You can add your own text supplementary
200
+ to it for context if desired.""",
201
+ "parameters": {
202
+ "type": "object",
203
+ "properties": {
204
+ "data": {
205
+ "type": "array",
206
+ "description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
207
+ The array must contain a dictionary, any other format will not work.
208
+ Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
209
+ Infer this from the user's message.""",
210
+ "items": {
211
+ "type": "string",
212
+ }
213
+ },
214
+ "x_column": {
215
+ "type": "string",
216
+ "description": f"""The column in our query.csv file that contains the x values of the graph.""",
217
+ "items": {
218
+ "type": "string",
219
+ }
220
+ },
221
+ "y_column": {
222
+ "type": "string",
223
+ "description": f"""The column in our query.csv file that contains the y values of the graph.""",
224
+ "items": {
225
+ "type": "string",
226
+ }
227
+ },
228
+ "category": {
229
+ "type": "string",
230
+ "description": f"""An optional column in our query.csv file that contains a parameter that will define the category for the data.""",
231
+ "items": {
232
+ "type": "string",
233
+ }
234
+ },
235
+ "facet_row": {
236
+ "type": "string",
237
+ "description": f"""An optional column in our query.csv file that contains a parameter that will define a faceted subplot, where different rows
238
+ correspond to different values of the query specified in this parameter.""",
239
+ "items": {
240
+ "type": "string",
241
+ }
242
+ },
243
+ "facet_col": {
244
+ "type": "string",
245
+ "description": f"""An optional column in our query.csv file that contain a parameter that will define the faceted column, corresponding to
246
+ different values of our query specified in this parameter.""",
247
+ "items": {
248
+ "type": "string",
249
+ }
250
+ },
251
+ "layout": {
252
+ "type": "array",
253
+ "description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
254
+ The array must contain a dictionary, any other format will not work.""",
255
+ "items": {
256
+ "type": "string",
257
+ }
258
+ }
259
+ },
260
+ "required": ["x_column","y_column","layout"],
261
+ },
262
+ },
263
+ },
264
+ {
265
+ "type": "function",
266
+ "function": {
267
+ "name": "pie_chart_generation_func",
268
+ "description": f"""This is a pie chart generation tool useful to generate pie charts from queried data from our SQL table called 'data_source'.
269
+ The data values will come from the columns of our query.csv (the 'values' and 'names' values of each graph) file but the layout section of the plotly dictionary objects will be generated by you.
270
+ Returns an iframe string which will be displayed inline in our chat window. Do not edit the string returned
271
+ from the chart_generation_func function in any way and display it fully to the user in the chat window. You can add your own text supplementary
272
+ to it for context if desired.""",
273
+ "parameters": {
274
+ "type": "object",
275
+ "properties": {
276
+ "data": {
277
+ "type": "array",
278
+ "description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
279
+ The array must contain a dictionary, any other format will not work.
280
+ Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
281
+ Infer this from the user's message.""",
282
+ "items": {
283
+ "type": "string",
284
+ }
285
+ },
286
+ "values": {
287
+ "type": "string",
288
+ "description": f"""The column in our query.csv file that contain the values of the pie chart.""",
289
+ "items": {
290
+ "type": "string",
291
+ }
292
+ },
293
+ "names": {
294
+ "type": "string",
295
+ "description": f"""The column in our query.csv file that contain the label or section of each piece of the pie graph and allow us to know what each piece of the pie chart represents.""",
296
+ "items": {
297
+ "type": "string",
298
+ }
299
+ },
300
+ "layout": {
301
+ "type": "array",
302
+ "description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
303
+ The array must contain a dictionary, any other format will not work.""",
304
+ "items": {
305
+ "type": "string",
306
+ }
307
+ }
308
+ },
309
+ "required": ["values","names","layout"],
310
  },
311
  },
312
  },
313
  {
314
+ "type": "function",
315
+ "function": {
316
+ "name": "histogram_generation_func",
317
+ "description": f"""This is a histogram generation tool useful to generate histograms from queried data from our SQL table called 'data_source'.
318
+ The data values will come from the columns of our query.csv (the 'values' and 'names' values of each graph) file but the layout section of the plotly dictionary objects will be generated by you.
319
+ Returns an iframe string which will be displayed inline in our chat window. Do not edit the string returned
320
+ from the chart_generation_func function in any way and display it fully to the user in the chat window. You can add your own text supplementary
321
+ to it for context if desired.""",
322
+ "parameters": {
323
+ "type": "object",
324
+ "properties": {
325
+ "data": {
326
+ "type": "array",
327
+ "description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
328
+ The array must contain a dictionary, any other format will not work.
329
+ Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
330
+ Infer this from the user's message.""",
331
+ "items": {
332
+ "type": "string",
333
+ }
334
+ },
335
+ "x_column": {
336
+ "type": "string",
337
+ "description": f"""The column in our query.csv file that contains the x values of the histogram.
338
+ This would correspond to the counts that would be distributed in the histogram.""",
339
+ "items": {
340
+ "type": "string",
341
+ }
342
+ },
343
+ "y_column": {
344
+ "type": "string",
345
+ "description": f"""An optional column in our query.csv file that contains the y values of the histogram.""",
346
+ "items": {
347
+ "type": "string",
348
+ }
349
+ },
350
+ "histnorm": {
351
+ "type": "string",
352
+ "description": f"""An optional argument to specify the type of normalization if the default isn't used.
353
+ This histnorm value can be one of ['percent','probability','density','probability density'].
354
+ Do not send any values outside of this array as the function will fail.""",
355
+ "items": {
356
+ "type": "string",
357
+ }
358
+ },
359
+ "category": {
360
+ "type": "string",
361
+ "description": f"""An optional column in our query.csv file that contains a parameter that will define the category for the data.""",
362
+ "items": {
363
+ "type": "string",
364
+ }
365
+ },
366
+ "histfunc": {
367
+ "type": "string",
368
+ "description": f"""An optional value that represents the function of data to compute the function which is used on the optional y column.
369
+ This histfunc value can be one of ['avg','sum','count'].
370
+ Do not send any values outside of this array as the function will fail.""",
371
+ "items": {
372
+ "type": "string",
373
+ }
374
+ },
375
+ "layout": {
376
+ "type": "array",
377
+ "description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
378
+ The array must contain a dictionary, any other format will not work.""",
379
+ "items": {
380
+ "type": "string",
381
+ }
382
+ }
383
+ },
384
+ "required": ["x_column"],
385
+ },
386
+ },
387
+ },
388
+ {
389
  "type": "function",
390
  "function": {
391
  "name": "table_generation_func",
 
434
  "items": {
435
  "type": "string",
436
  }
437
+ }
 
 
 
 
 
 
 
438
  },
439
  "required": ["independent_variables","dependent_variable"],
440
  },