Display upload errors to user, better prompts for chart layouts
Browse files- app.py +45 -43
- data_sources/upload_file.py +35 -28
- functions/chart_functions.py +3 -3
- tools.py +6 -4
app.py
CHANGED
@@ -56,55 +56,57 @@ with gr.Blocks(css=css, delete_cache=(3600,3600)) as demo:
|
|
56 |
print(filename)
|
57 |
message_dict[request.session_hash] = None
|
58 |
if filename:
|
59 |
-
process_upload(filename, request.session_hash)
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
["What levels of education have the highest and lowest average balance?"],
|
64 |
-
["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
|
65 |
-
["Can you generate a bar chart of education vs. average balance?"],
|
66 |
-
["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
|
67 |
-
["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
|
68 |
-
]
|
69 |
-
elif "online_retail_data" in filename:
|
70 |
-
example_questions = [
|
71 |
-
["Describe the dataset"],
|
72 |
-
["What month had the highest revenue?"],
|
73 |
-
["Is revenue higher in the morning or afternoon?"],
|
74 |
-
["Can you generate a line graph of revenue per month?"],
|
75 |
-
["Can you generate a table of revenue per month?"],
|
76 |
-
["Can we predict how time of day affects revenue in this data set?"],
|
77 |
-
]
|
78 |
-
else:
|
79 |
-
try:
|
80 |
-
generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
|
81 |
example_questions = [
|
82 |
-
["Describe the dataset"]
|
|
|
|
|
|
|
|
|
|
|
83 |
]
|
84 |
-
|
85 |
-
example_questions.append([example])
|
86 |
-
except:
|
87 |
example_questions = [
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
def process_upload(upload_value, session_hash):
|
105 |
if upload_value:
|
106 |
-
process_data_upload(upload_value, session_hash)
|
107 |
-
return
|
108 |
|
109 |
demo.unload(delete_db)
|
110 |
|
|
|
56 |
print(filename)
|
57 |
message_dict[request.session_hash] = None
|
58 |
if filename:
|
59 |
+
process_message = process_upload(filename, request.session_hash)
|
60 |
+
gr.HTML(value=process_message[1], padding=False)
|
61 |
+
if process_message[0] == "success":
|
62 |
+
if "bank_marketing_campaign" in filename:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
example_questions = [
|
64 |
+
["Describe the dataset"],
|
65 |
+
["What levels of education have the highest and lowest average balance?"],
|
66 |
+
["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
|
67 |
+
["Can you generate a bar chart of education vs. average balance?"],
|
68 |
+
["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
|
69 |
+
["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
|
70 |
]
|
71 |
+
elif "online_retail_data" in filename:
|
|
|
|
|
72 |
example_questions = [
|
73 |
+
["Describe the dataset"],
|
74 |
+
["What month had the highest revenue?"],
|
75 |
+
["Is revenue higher in the morning or afternoon?"],
|
76 |
+
["Can you generate a line graph of revenue per month?"],
|
77 |
+
["Can you generate a table of revenue per month?"],
|
78 |
+
["Can we predict how time of day affects revenue in this data set?"],
|
79 |
+
]
|
80 |
+
else:
|
81 |
+
try:
|
82 |
+
generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
|
83 |
+
example_questions = [
|
84 |
+
["Describe the dataset"]
|
85 |
+
]
|
86 |
+
for example in generated_examples:
|
87 |
+
example_questions.append([example])
|
88 |
+
except:
|
89 |
+
example_questions = [
|
90 |
+
["Describe the dataset"],
|
91 |
+
["List the columns in the dataset"],
|
92 |
+
["What could this data be used for?"],
|
93 |
+
]
|
94 |
+
parameters = gr.Textbox(visible=False, value=request.session_hash)
|
95 |
+
bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
|
96 |
+
chat = gr.ChatInterface(
|
97 |
+
fn=chatbot_with_fc,
|
98 |
+
type='messages',
|
99 |
+
chatbot=bot,
|
100 |
+
title="Chat with your data file",
|
101 |
+
concurrency_limit=None,
|
102 |
+
examples=example_questions,
|
103 |
+
additional_inputs=parameters
|
104 |
+
)
|
105 |
|
106 |
def process_upload(upload_value, session_hash):
|
107 |
if upload_value:
|
108 |
+
process_message = process_data_upload(upload_value, session_hash)
|
109 |
+
return process_message
|
110 |
|
111 |
demo.unload(delete_db)
|
112 |
|
data_sources/upload_file.py
CHANGED
@@ -45,36 +45,43 @@ def read_file(file):
|
|
45 |
return df
|
46 |
|
47 |
def process_data_upload(data_file, session_hash):
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
|
70 |
-
|
71 |
-
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
45 |
return df
|
46 |
|
47 |
def process_data_upload(data_file, session_hash):
|
48 |
+
try:
|
49 |
+
total_time = 0
|
50 |
+
while not is_file_done_saving(data_file):
|
51 |
+
total_time += .5
|
52 |
+
time.sleep(.5)
|
53 |
+
if total_time > 10:
|
54 |
+
break
|
55 |
+
|
56 |
+
df = read_file(data_file)
|
57 |
|
58 |
+
# Read each sheet and store data in a DataFrame
|
59 |
+
#data = df.parse(sheet_name)
|
60 |
+
# Process the data as needed
|
61 |
+
# ...
|
62 |
+
df.columns = df.columns.str.replace(' ', '_')
|
63 |
+
df.columns = df.columns.str.replace('/', '_')
|
64 |
|
65 |
+
for column in df.columns:
|
66 |
+
if "date" in column.lower() or "time" in column.lower():
|
67 |
+
df[column] = pd.to_datetime(df[column])
|
68 |
+
if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):
|
69 |
+
df[column] = df[column].explode()
|
70 |
|
71 |
+
dir_path = TEMP_DIR / str(session_hash)
|
72 |
+
os.makedirs(dir_path, exist_ok=True)
|
73 |
|
74 |
+
connection = sqlite3.connect(f'{dir_path}/data_source.db')
|
75 |
+
print("Opened database successfully");
|
76 |
+
print(df.columns)
|
77 |
+
|
78 |
+
df.to_sql('data_source', connection, if_exists='replace', index = False)
|
79 |
+
|
80 |
+
connection.commit()
|
81 |
+
connection.close()
|
82 |
|
83 |
+
return ["success","<p style='color:green;text-align:center;font-size:18px;'>Data upload successful</p>"]
|
84 |
+
except Exception as e:
|
85 |
+
print("UPLOAD ERROR")
|
86 |
+
print(e)
|
87 |
+
return ["error",f"<p style='color:red;text-align:center;font-size:18px;font-weight:bold;'>ERROR: {e}</p>"]
|
functions/chart_functions.py
CHANGED
@@ -12,7 +12,7 @@ load_dotenv()
|
|
12 |
|
13 |
root_url = os.getenv("ROOT_URL")
|
14 |
|
15 |
-
def chart_generation_func(data: List[
|
16 |
print("CHART GENERATION")
|
17 |
print(data)
|
18 |
print(graph_type)
|
@@ -69,13 +69,13 @@ def chart_generation_func(data: List[str], x_column: str, y_column: str, graph_t
|
|
69 |
else:
|
70 |
layout_obj = layout
|
71 |
|
72 |
-
if
|
73 |
layout_dict = ast.literal_eval(layout_obj)
|
74 |
else:
|
75 |
layout_dict = layout_obj
|
76 |
|
77 |
#Applying stylings and settings generated from LLM
|
78 |
-
if
|
79 |
fig["layout"] = layout_dict
|
80 |
|
81 |
for key, value in data_dict.items():
|
|
|
12 |
|
13 |
root_url = os.getenv("ROOT_URL")
|
14 |
|
15 |
+
def chart_generation_func(data: List[dict], x_column: str, y_column: str, graph_type: str, session_hash: str, layout: List[dict]=[{}], category: str=""):
|
16 |
print("CHART GENERATION")
|
17 |
print(data)
|
18 |
print(graph_type)
|
|
|
69 |
else:
|
70 |
layout_obj = layout
|
71 |
|
72 |
+
if layout_obj and isinstance(layout_obj, str):
|
73 |
layout_dict = ast.literal_eval(layout_obj)
|
74 |
else:
|
75 |
layout_dict = layout_obj
|
76 |
|
77 |
#Applying stylings and settings generated from LLM
|
78 |
+
if layout_dict:
|
79 |
fig["layout"] = layout_dict
|
80 |
|
81 |
for key, value in data_dict.items():
|
tools.py
CHANGED
@@ -49,7 +49,8 @@ def tools_call(session_hash):
|
|
49 |
"properties": {
|
50 |
"data": {
|
51 |
"type": "array",
|
52 |
-
"description": """The
|
|
|
53 |
Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
|
54 |
Infer this from the user's message.""",
|
55 |
"items": {
|
@@ -81,7 +82,7 @@ def tools_call(session_hash):
|
|
81 |
"type": "string",
|
82 |
"description": f"""The type of plotly graph we wish to generate.
|
83 |
This graph_type value can be one of ['bar','scatter','line','pie'].
|
84 |
-
Do not send any values outside of this
|
85 |
Infer this from the user's message.""",
|
86 |
"items": {
|
87 |
"type": "string",
|
@@ -89,7 +90,8 @@ def tools_call(session_hash):
|
|
89 |
},
|
90 |
"layout": {
|
91 |
"type": "array",
|
92 |
-
"description": """
|
|
|
93 |
"items": {
|
94 |
"type": "string",
|
95 |
}
|
@@ -125,7 +127,7 @@ def tools_call(session_hash):
|
|
125 |
"properties": {
|
126 |
"independent_variables": {
|
127 |
"type": "array",
|
128 |
-
"description": f"""
|
129 |
in the 'sql_query_func' function. This will allow us to identify the data to use for our independent variables.
|
130 |
Infer this from the user's message.""",
|
131 |
"items": {
|
|
|
49 |
"properties": {
|
50 |
"data": {
|
51 |
"type": "array",
|
52 |
+
"description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
|
53 |
+
The array must contain a dictionary, any other format will not work.
|
54 |
Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
|
55 |
Infer this from the user's message.""",
|
56 |
"items": {
|
|
|
82 |
"type": "string",
|
83 |
"description": f"""The type of plotly graph we wish to generate.
|
84 |
This graph_type value can be one of ['bar','scatter','line','pie'].
|
85 |
+
Do not send any values outside of this array as the function will fail.
|
86 |
Infer this from the user's message.""",
|
87 |
"items": {
|
88 |
"type": "string",
|
|
|
90 |
},
|
91 |
"layout": {
|
92 |
"type": "array",
|
93 |
+
"description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
|
94 |
+
The array must contain a dictionary, any other format will not work.""",
|
95 |
"items": {
|
96 |
"type": "string",
|
97 |
}
|
|
|
127 |
"properties": {
|
128 |
"independent_variables": {
|
129 |
"type": "array",
|
130 |
+
"description": f"""An array of strings that states the independent variables in our data set which should be column names in our query.csv file that is generated
|
131 |
in the 'sql_query_func' function. This will allow us to identify the data to use for our independent variables.
|
132 |
Infer this from the user's message.""",
|
133 |
"items": {
|