nolanzandi commited on
Commit
bed69c5
·
verified ·
1 Parent(s): 17c6c25

Display upload errors to user, better prompts for chart layouts

Browse files
Files changed (4) hide show
  1. app.py +45 -43
  2. data_sources/upload_file.py +35 -28
  3. functions/chart_functions.py +3 -3
  4. tools.py +6 -4
app.py CHANGED
@@ -56,55 +56,57 @@ with gr.Blocks(css=css, delete_cache=(3600,3600)) as demo:
56
  print(filename)
57
  message_dict[request.session_hash] = None
58
  if filename:
59
- process_upload(filename, request.session_hash)
60
- if "bank_marketing_campaign" in filename:
61
- example_questions = [
62
- ["Describe the dataset"],
63
- ["What levels of education have the highest and lowest average balance?"],
64
- ["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
65
- ["Can you generate a bar chart of education vs. average balance?"],
66
- ["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
67
- ["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
68
- ]
69
- elif "online_retail_data" in filename:
70
- example_questions = [
71
- ["Describe the dataset"],
72
- ["What month had the highest revenue?"],
73
- ["Is revenue higher in the morning or afternoon?"],
74
- ["Can you generate a line graph of revenue per month?"],
75
- ["Can you generate a table of revenue per month?"],
76
- ["Can we predict how time of day affects revenue in this data set?"],
77
- ]
78
- else:
79
- try:
80
- generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
81
  example_questions = [
82
- ["Describe the dataset"]
 
 
 
 
 
83
  ]
84
- for example in generated_examples:
85
- example_questions.append([example])
86
- except:
87
  example_questions = [
88
- ["Describe the dataset"],
89
- ["List the columns in the dataset"],
90
- ["What could this data be used for?"],
91
- ]
92
- parameters = gr.Textbox(visible=False, value=request.session_hash)
93
- bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
94
- chat = gr.ChatInterface(
95
- fn=chatbot_with_fc,
96
- type='messages',
97
- chatbot=bot,
98
- title="Chat with your data file",
99
- concurrency_limit=None,
100
- examples=example_questions,
101
- additional_inputs=parameters
102
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  def process_upload(upload_value, session_hash):
105
  if upload_value:
106
- process_data_upload(upload_value, session_hash)
107
- return [], []
108
 
109
  demo.unload(delete_db)
110
 
 
56
  print(filename)
57
  message_dict[request.session_hash] = None
58
  if filename:
59
+ process_message = process_upload(filename, request.session_hash)
60
+ gr.HTML(value=process_message[1], padding=False)
61
+ if process_message[0] == "success":
62
+ if "bank_marketing_campaign" in filename:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  example_questions = [
64
+ ["Describe the dataset"],
65
+ ["What levels of education have the highest and lowest average balance?"],
66
+ ["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
67
+ ["Can you generate a bar chart of education vs. average balance?"],
68
+ ["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
69
+ ["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
70
  ]
71
+ elif "online_retail_data" in filename:
 
 
72
  example_questions = [
73
+ ["Describe the dataset"],
74
+ ["What month had the highest revenue?"],
75
+ ["Is revenue higher in the morning or afternoon?"],
76
+ ["Can you generate a line graph of revenue per month?"],
77
+ ["Can you generate a table of revenue per month?"],
78
+ ["Can we predict how time of day affects revenue in this data set?"],
79
+ ]
80
+ else:
81
+ try:
82
+ generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
83
+ example_questions = [
84
+ ["Describe the dataset"]
85
+ ]
86
+ for example in generated_examples:
87
+ example_questions.append([example])
88
+ except:
89
+ example_questions = [
90
+ ["Describe the dataset"],
91
+ ["List the columns in the dataset"],
92
+ ["What could this data be used for?"],
93
+ ]
94
+ parameters = gr.Textbox(visible=False, value=request.session_hash)
95
+ bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
96
+ chat = gr.ChatInterface(
97
+ fn=chatbot_with_fc,
98
+ type='messages',
99
+ chatbot=bot,
100
+ title="Chat with your data file",
101
+ concurrency_limit=None,
102
+ examples=example_questions,
103
+ additional_inputs=parameters
104
+ )
105
 
106
  def process_upload(upload_value, session_hash):
107
  if upload_value:
108
+ process_message = process_data_upload(upload_value, session_hash)
109
+ return process_message
110
 
111
  demo.unload(delete_db)
112
 
data_sources/upload_file.py CHANGED
@@ -45,36 +45,43 @@ def read_file(file):
45
  return df
46
 
47
  def process_data_upload(data_file, session_hash):
48
- total_time = 0
49
- while not is_file_done_saving(data_file):
50
- total_time += .5
51
- time.sleep(.5)
52
- if total_time > 10:
53
- break
54
-
55
- df = read_file(data_file)
 
56
 
57
- # Read each sheet and store data in a DataFrame
58
- #data = df.parse(sheet_name)
59
- # Process the data as needed
60
- # ...
61
- df.columns = df.columns.str.replace(' ', '_')
62
- df.columns = df.columns.str.replace('/', '_')
63
 
64
- for column in df.columns:
65
- if "date" in column.lower() or "time" in column.lower():
66
- df[column] = pd.to_datetime(df[column])
67
- if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):
68
- df[column] = df[column].explode()
69
 
70
- dir_path = TEMP_DIR / str(session_hash)
71
- os.makedirs(dir_path, exist_ok=True)
72
 
73
- connection = sqlite3.connect(f'{dir_path}/data_source.db')
74
- print("Opened database successfully");
75
- print(df.columns)
 
 
 
 
 
76
 
77
- df.to_sql('data_source', connection, if_exists='replace', index = False)
78
-
79
- connection.commit()
80
- connection.close()
 
 
45
  return df
46
 
47
  def process_data_upload(data_file, session_hash):
48
+ try:
49
+ total_time = 0
50
+ while not is_file_done_saving(data_file):
51
+ total_time += .5
52
+ time.sleep(.5)
53
+ if total_time > 10:
54
+ break
55
+
56
+ df = read_file(data_file)
57
 
58
+ # Read each sheet and store data in a DataFrame
59
+ #data = df.parse(sheet_name)
60
+ # Process the data as needed
61
+ # ...
62
+ df.columns = df.columns.str.replace(' ', '_')
63
+ df.columns = df.columns.str.replace('/', '_')
64
 
65
+ for column in df.columns:
66
+ if "date" in column.lower() or "time" in column.lower():
67
+ df[column] = pd.to_datetime(df[column])
68
+ if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):
69
+ df[column] = df[column].explode()
70
 
71
+ dir_path = TEMP_DIR / str(session_hash)
72
+ os.makedirs(dir_path, exist_ok=True)
73
 
74
+ connection = sqlite3.connect(f'{dir_path}/data_source.db')
75
+ print("Opened database successfully");
76
+ print(df.columns)
77
+
78
+ df.to_sql('data_source', connection, if_exists='replace', index = False)
79
+
80
+ connection.commit()
81
+ connection.close()
82
 
83
+ return ["success","<p style='color:green;text-align:center;font-size:18px;'>Data upload successful</p>"]
84
+ except Exception as e:
85
+ print("UPLOAD ERROR")
86
+ print(e)
87
+ return ["error",f"<p style='color:red;text-align:center;font-size:18px;font-weight:bold;'>ERROR: {e}</p>"]
functions/chart_functions.py CHANGED
@@ -12,7 +12,7 @@ load_dotenv()
12
 
13
  root_url = os.getenv("ROOT_URL")
14
 
15
- def chart_generation_func(data: List[str], x_column: str, y_column: str, graph_type: str, session_hash: str, layout: Dict[str,str]={}, category: str=""):
16
  print("CHART GENERATION")
17
  print(data)
18
  print(graph_type)
@@ -69,13 +69,13 @@ def chart_generation_func(data: List[str], x_column: str, y_column: str, graph_t
69
  else:
70
  layout_obj = layout
71
 
72
- if layout and isinstance(layout_obj, str):
73
  layout_dict = ast.literal_eval(layout_obj)
74
  else:
75
  layout_dict = layout_obj
76
 
77
  #Applying stylings and settings generated from LLM
78
- if layout:
79
  fig["layout"] = layout_dict
80
 
81
  for key, value in data_dict.items():
 
12
 
13
  root_url = os.getenv("ROOT_URL")
14
 
15
+ def chart_generation_func(data: List[dict], x_column: str, y_column: str, graph_type: str, session_hash: str, layout: List[dict]=[{}], category: str=""):
16
  print("CHART GENERATION")
17
  print(data)
18
  print(graph_type)
 
69
  else:
70
  layout_obj = layout
71
 
72
+ if layout_obj and isinstance(layout_obj, str):
73
  layout_dict = ast.literal_eval(layout_obj)
74
  else:
75
  layout_dict = layout_obj
76
 
77
  #Applying stylings and settings generated from LLM
78
+ if layout_dict:
79
  fig["layout"] = layout_dict
80
 
81
  for key, value in data_dict.items():
tools.py CHANGED
@@ -49,7 +49,8 @@ def tools_call(session_hash):
49
  "properties": {
50
  "data": {
51
  "type": "array",
52
- "description": """The list containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
 
53
  Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
54
  Infer this from the user's message.""",
55
  "items": {
@@ -81,7 +82,7 @@ def tools_call(session_hash):
81
  "type": "string",
82
  "description": f"""The type of plotly graph we wish to generate.
83
  This graph_type value can be one of ['bar','scatter','line','pie'].
84
- Do not send any values outside of this list as the function will fail.
85
  Infer this from the user's message.""",
86
  "items": {
87
  "type": "string",
@@ -89,7 +90,8 @@ def tools_call(session_hash):
89
  },
90
  "layout": {
91
  "type": "array",
92
- "description": """The dictionary that contains the 'layout' portion of the plotly chart generation""",
 
93
  "items": {
94
  "type": "string",
95
  }
@@ -125,7 +127,7 @@ def tools_call(session_hash):
125
  "properties": {
126
  "independent_variables": {
127
  "type": "array",
128
- "description": f"""A list of strings that states the independent variables in our data set which should be column names in our query.csv file that is generated
129
  in the 'sql_query_func' function. This will allow us to identify the data to use for our independent variables.
130
  Infer this from the user's message.""",
131
  "items": {
 
49
  "properties": {
50
  "data": {
51
  "type": "array",
52
+ "description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
53
+ The array must contain a dictionary, any other format will not work.
54
  Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
55
  Infer this from the user's message.""",
56
  "items": {
 
82
  "type": "string",
83
  "description": f"""The type of plotly graph we wish to generate.
84
  This graph_type value can be one of ['bar','scatter','line','pie'].
85
+ Do not send any values outside of this array as the function will fail.
86
  Infer this from the user's message.""",
87
  "items": {
88
  "type": "string",
 
90
  },
91
  "layout": {
92
  "type": "array",
93
+ "description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
94
+ The array must contain a dictionary, any other format will not work.""",
95
  "items": {
96
  "type": "string",
97
  }
 
127
  "properties": {
128
  "independent_variables": {
129
  "type": "array",
130
+ "description": f"""An array of strings that states the independent variables in our data set which should be column names in our query.csv file that is generated
131
  in the 'sql_query_func' function. This will allow us to identify the data to use for our independent variables.
132
  Infer this from the user's message.""",
133
  "items": {