nolanzandi's picture
Add bubble charts
3b9f8b1 verified
raw
history blame
28.6 kB
import sqlite3
from utils import TEMP_DIR
def tools_call(session_hash):
dir_path = TEMP_DIR / str(session_hash)
connection = sqlite3.connect(f'{dir_path}/data_source.db')
print("Querying Database in Tools.py");
cur=connection.execute('select * from data_source')
columns = [i[0] for i in cur.description]
print("COLUMNS 2")
print(columns)
cur.close()
connection.close()
column_string = (columns[:625] + '..') if len(columns) > 625 else columns
return [
{
"type": "function",
"function": {
"name": "sql_query_func",
"description": f"""This is a tool useful to query a SQLite table called 'data_source' with the following Columns: {column_string}.
There may also be more columns in the table if the number of columns is too large to process.
This function also saves the results of the query to csv file called query.csv.""",
"parameters": {
"type": "object",
"properties": {
"queries": {
"type": "array",
"description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
"items": {
"type": "string",
}
}
},
"required": ["queries"],
},
},
},
{
"type": "function",
"function": {
"name": "scatter_chart_generation_func",
"description": f"""This is a scatter plot generation tool useful to generate scatter plots from queried data from our SQL table called 'data_source'.
The data values will come from the columns of our query.csv (the 'x' and 'y' values of each graph) file but the layout section of the plotly dictionary objects will be generated by you.
Returns an iframe string which will be displayed inline in our chat window. Do not edit the iframe string returned
from the scatter_chart_generation_func function in any way and always display the iframe fully to the user in the chat window. You can add your own text supplementary
to it for context if desired.""",
"parameters": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
The array must contain a json formatted dictionary with outer brackets included, any other format will not work.
Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
Infer this from the user's message.""",
"items": {
"type": "string",
}
},
"x_column": {
"type": "array",
"description": f"""An array of strings that correspond to the the columns in our query.csv file that contain the x values of the graph. There can be more than one column
that can each be plotted against the y_column, if needed.""",
"items": {
"type": "string",
}
},
"y_column": {
"type": "string",
"description": f"""The column in our query.csv file that contain the y values of the graph.""",
"items": {
"type": "string",
}
},
"category": {
"type": "string",
"description": f"""An optional column in our query.csv file that contain a parameter that will define the category for the data.""",
"items": {
"type": "string",
}
},
"trendline": {
"type": "string",
"description": f"""An optional field to specify the type of plotly trendline we wish to use in the scatter plot.
This trendline value can be one of ['ols','lowess','rolling','ewm','expanding'].
Do not send any values outside of this array as the function will fail.
Infer this from the user's message.""",
"items": {
"type": "string",
}
},
"trendline_options": {
"type": "array",
"description": """An array containing a dictionary that contains the 'trendline_options' portion of the plotly chart generation.
The 'lowess', 'rolling', and 'ewm' options require trendline_options to be included.
The array must contain a json formatted dictionary with outer brackets included, any other format will not work.""",
"items": {
"type": "string",
}
},
"marginal_x": {
"type": "string",
"description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the x axis.
This marginal_x value can be one of ['histogram','rug','box','violin'].
Do not send any values outside of this array as the function will fail.
Infer this from the user's message.""",
"items": {
"type": "string",
}
},
"marginal_y": {
"type": "string",
"description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the y axis.
This marginal_y value can be one of ['histogram','rug','box','violin'].
Do not send any values outside of this array as the function will fail.
Infer this from the user's message.""",
"items": {
"type": "string",
}
},
"layout": {
"type": "array",
"description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
The array must contain a json formatted dictionary with outer brackets included, any other format will not work.""",
"items": {
"type": "string",
}
},
"size": {
"type": "string",
"description": f"""An optional column in our query.csv file that contain a parameter that will define the size of each plot point.
This is useful for a bubble chart where another value in our query can be represented by the size of the plotted point.
Values must be greater than or equal to 0 and so in our query, all values less than 0 should be set equal to zero.""",
"items": {
"type": "string",
}
}
},
"required": ["x_column","y_column"],
},
},
},
{
"type": "function",
"function": {
"name": "line_chart_generation_func",
"description": f"""This is a line chart generation tool useful to generate line charts from queried data from our SQL table called 'data_source'.
The data values will come from the columns of our query.csv (the 'x' and 'y' values of each graph) file but the layout section of the plotly dictionary objects will be generated by you.
Returns an iframe string which will be displayed inline in our chat window. Do not edit the iframe string returned
from the line_chart_generation_func function in any way and always display the iframe fully to the user in the chat window. You can add your own text supplementary
to it for context if desired.""",
"parameters": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
The array must contain a json formatted dictionary with outer brackets included, any other format will not work.
Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
Infer this from the user's message.""",
"items": {
"type": "string",
}
},
"x_column": {
"type": "string",
"description": f"""The column in our query.csv file that contain the x values of the graph.""",
"items": {
"type": "string",
}
},
"y_column": {
"type": "string",
"description": f"""The column in our query.csv file that contain the y values of the graph.""",
"items": {
"type": "string",
}
},
"category": {
"type": "string",
"description": f"""An optional column in our query.csv file that contain a parameter that will define the category for the data.""",
"items": {
"type": "string",
}
},
"layout": {
"type": "array",
"description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
The array must contain a json formatted dictionary with outer brackets included, any other format will not work.""",
"items": {
"type": "string",
}
}
},
"required": ["x_column","y_column","layout"],
},
},
},
{
"type": "function",
"function": {
"name": "bar_chart_generation_func",
"description": f"""This is a bar chart generation tool useful to generate line charts from queried data from our SQL table called 'data_source'.
The data values will come from the columns of our query.csv (the 'x' and 'y' values of each graph) file but the layout section of the plotly dictionary objects will be generated by you.
Returns an iframe string which will be displayed inline in our chat window. Do not edit the iframe string returned
from the bar_chart_generation_func function in any way and always display the iframe fully to the user in the chat window. You can add your own text supplementary
to it for context if desired.""",
"parameters": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
The array must contain a json formatted dictionary with outer brackets included, any other format will not work.
Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
Infer this from the user's message.""",
"items": {
"type": "string",
}
},
"x_column": {
"type": "string",
"description": f"""The column in our query.csv file that contains the x values of the graph.""",
"items": {
"type": "string",
}
},
"y_column": {
"type": "string",
"description": f"""The column in our query.csv file that contains the y values of the graph.""",
"items": {
"type": "string",
}
},
"category": {
"type": "string",
"description": f"""An optional column in our query.csv file that contains a parameter that will define the category for the data.""",
"items": {
"type": "string",
}
},
"facet_row": {
"type": "string",
"description": f"""An optional column in our query.csv file that contains a parameter that will define a faceted subplot, where different rows
correspond to different values of the query specified in this parameter.""",
"items": {
"type": "string",
}
},
"facet_col": {
"type": "string",
"description": f"""An optional column in our query.csv file that contain a parameter that will define the faceted column, corresponding to
different values of our query specified in this parameter.""",
"items": {
"type": "string",
}
},
"layout": {
"type": "array",
"description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
The array must contain a json formatted dictionary with outer brackets included, any other format will not work.""",
"items": {
"type": "string",
}
}
},
"required": ["x_column","y_column","layout"],
},
},
},
{
"type": "function",
"function": {
"name": "pie_chart_generation_func",
"description": f"""This is a pie chart generation tool useful to generate pie charts from queried data from our SQL table called 'data_source'.
The data values will come from the columns of our query.csv (the 'values' and 'names' values of each graph) file but the layout section of the plotly dictionary objects will be generated by you.
Returns an iframe string which will be displayed inline in our chat window. Do not edit the iframe string returned
from the pie_chart_generation_func function in any way and always display the iframe fully to the user in the chat window. You can add your own text supplementary
to it for context if desired.""",
"parameters": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
The array must contain a json formatted dictionary with outer brackets included, any other format will not work.
Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
Infer this from the user's message.""",
"items": {
"type": "string",
}
},
"values": {
"type": "string",
"description": f"""The column in our query.csv file that contain the values of the pie chart.""",
"items": {
"type": "string",
}
},
"names": {
"type": "string",
"description": f"""The column in our query.csv file that contain the label or section of each piece of the pie graph and allow us to know what each piece of the pie chart represents.""",
"items": {
"type": "string",
}
},
"layout": {
"type": "array",
"description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
The array must contain a json formatted dictionary with outer brackets included, any other format will not work.""",
"items": {
"type": "string",
}
}
},
"required": ["values","names","layout"],
},
},
},
{
"type": "function",
"function": {
"name": "histogram_generation_func",
"description": f"""This is a histogram generation tool useful to generate histograms from queried data from our SQL table called 'data_source'.
The data values will come from the columns of our query.csv (the 'values' and 'names' values of each graph) file but the layout section of the plotly dictionary objects will be generated by you.
Returns an iframe string which will be displayed inline in our chat window. Do not edit the iframe string returned
from the histogram_generation_func function in any way and always display the iframe fully to the user in the chat window. You can add your own text supplementary
to it for context if desired.""",
"parameters": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
The array must contain a json formatted dictionary with outer brackets included, any other format will not work.
Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
Infer this from the user's message.""",
"items": {
"type": "string",
}
},
"x_column": {
"type": "string",
"description": f"""The column in our query.csv file that contains the x values of the histogram.
This would correspond to the counts that would be distributed in the histogram.""",
"items": {
"type": "string",
}
},
"y_column": {
"type": "string",
"description": f"""An optional column in our query.csv file that contains the y values of the histogram.""",
"items": {
"type": "string",
}
},
"histnorm": {
"type": "string",
"description": f"""An optional argument to specify the type of normalization if the default isn't used.
This histnorm value can be one of ['percent','probability','density','probability density'].
Do not send any values outside of this array as the function will fail.""",
"items": {
"type": "string",
}
},
"category": {
"type": "string",
"description": f"""An optional column in our query.csv file that contains a parameter that will define the category for the data.""",
"items": {
"type": "string",
}
},
"histfunc": {
"type": "string",
"description": f"""An optional value that represents the function of data to compute the function which is used on the optional y column.
This histfunc value can be one of ['avg','sum','count'].
Do not send any values outside of this array as the function will fail.""",
"items": {
"type": "string",
}
},
"layout": {
"type": "array",
"description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
The array must contain a json formatted dictionary with outer brackets included, any other format will not work.""",
"items": {
"type": "string",
}
}
},
"required": ["x_column"],
},
},
},
{
"type": "function",
"function": {
"name": "table_generation_func",
"description": f"""This an table generation tool useful to format data as a table from queried data from our SQL table called 'data_source'.
Takes no parameters as it uses data queried in our query.csv file to build the table.
Call this function after running our SQLite query and generating query.csv.
Returns an html string generated from the pandas library and pandas.to_html()
function which will be displayed inline in our chat window.
Do not edit the string returned by the function in any way when displaying to the user.""",
"parameters": {},
},
},
{
"type": "function",
"function": {
"name": "regression_func",
"description": f"""This a tool to calculate regressions on our SQLite table called 'data_source'.
We can run queries with our 'sql_query_func' function and they will be available to use in this function via the query.csv file that is generated.
Returns a dictionary of values that includes a regression_summary and a regression chart (which is an iframe displaying the
linear regression in chart form and should be shown to the user).""",
"parameters": {
"type": "object",
"properties": {
"independent_variables": {
"type": "array",
"description": f"""An array of strings that states the independent variables in our data set which should be column names in our query.csv file that is generated
in the 'sql_query_func' function. This will allow us to identify the data to use for our independent variables.
Infer this from the user's message.""",
"items": {
"type": "string",
}
},
"dependent_variable": {
"type": "string",
"description": f"""A string that states the dependent variables in our data set which should be a column name in our query.csv file that is generated
in the 'sql_query_func' function. This will allow us to identify the data to use for our dependent variables.
Infer this from the user's message.""",
"items": {
"type": "string",
}
},
"category": {
"type": "string",
"description": f"""An optional column in our query.csv file that contain a parameter that will define the category for the data.
Do not send value if no category is needed or specified. This category must be present in our query.csv file to be valid.""",
"items": {
"type": "string",
}
}
},
"required": ["independent_variables","dependent_variable"],
},
},
}
]