Spaces:

nolanzandi
/

virtual-data-analyst

Running

App Files Files Community

nolanzandi commited on Feb 22

Commit

95c52e2

verified ·

1 Parent(s): 63c3a67

Create example questions when file is uploaded (#11)

Browse files

- Create example questions when file is uploaded (cfbedd80b99e0cbb428c01c8b685d2e7158e40c6)

Files changed (1) hide show

functions/chat_functions.py +58 -13

functions/chat_functions.py CHANGED Viewed

@@ -6,6 +6,7 @@ from haystack.dataclasses import ChatMessage
 from haystack.components.generators.chat import OpenAIChatGenerator
 import os
 from getpass import getpass
 from dotenv import load_dotenv
@@ -16,11 +17,35 @@ if "OPENAI_API_KEY" not in os.environ:
 chat_generator = OpenAIChatGenerator(model="gpt-4o")
 response = None
-messages = [
-    ChatMessage.from_system(
-        "You are a helpful and knowledgeable agent who has access to an SQLite database which has a table called 'data_source'. You also have access to a chart API that uses chart.js dictionaries formatted as a string to generate charts and graphs."
-    )
-]
 def chatbot_with_fc(message, history, session_hash):
     from functions import sqlite_query_func, chart_generation_func
@@ -29,15 +54,25 @@ def chatbot_with_fc(message, history, session_hash):
     available_functions = {"sql_query_func": sqlite_query_func, "rag_pipeline_func": rag_pipeline_func, "chart_generation_func": chart_generation_func}
-    messages.append(ChatMessage.from_user(message))
-    response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools_call(session_hash)})
     while True:
         # if OpenAI response is a tool call
         if response and response["replies"][0].meta["finish_reason"] == "tool_calls" or response["replies"][0].tool_calls:
             function_calls = response["replies"][0].tool_calls
             for function_call in function_calls:
-                messages.append(ChatMessage.from_assistant(tool_calls=[function_call]))
                 ## Parse function calling information
                 function_name = function_call.tool_name
                 function_args = function_call.arguments
@@ -47,12 +82,12 @@ def chatbot_with_fc(message, history, session_hash):
                 function_response = function_to_call(**function_args, session_hash=session_hash)
                 print(function_name)
                 ## Append function response to the messages list using `ChatMessage.from_tool`
-                messages.append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
-                response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools_call(session_hash)})
         # Regular Conversation
         else:
-            messages.append(response["replies"][0])
             break
     return response["replies"][0].text
@@ -60,6 +95,7 @@ def delete_db(req: gr.Request):
     db_file_path = f'data_source_{req.session_hash}.db'
     if os.path.exists(db_file_path):
         os.remove(db_file_path)
 def run_example(input):
     return input
@@ -95,7 +131,9 @@ with gr.Blocks(css=css, delete_cache=(3600,3600)) as demo:
     @gr.render(inputs=file_output)
     def data_options(filename, request: gr.Request):
         print(filename)
         if filename:
             if "bank_marketing_campaign" in filename:
                 example_questions = [
                                         ["Describe the dataset"],
@@ -111,7 +149,15 @@ with gr.Blocks(css=css, delete_cache=(3600,3600)) as demo:
                                         ["Can you generate a line graph of revenue per month?"]
                                     ]
             else:
-                example_questions = [
                                         ["Describe the dataset"],
                                         ["List the columns in the dataset"],
                                         ["What could this data be used for?"],
@@ -127,7 +173,6 @@ with gr.Blocks(css=css, delete_cache=(3600,3600)) as demo:
                                 examples=example_questions,
                                 additional_inputs=parameters
                                 )
-            process_upload(filename, request.session_hash)
     def process_upload(upload_value, session_hash):
         if upload_value:

 from haystack.components.generators.chat import OpenAIChatGenerator
 import os
+import ast
 from getpass import getpass
 from dotenv import load_dotenv
 chat_generator = OpenAIChatGenerator(model="gpt-4o")
 response = None
+message_dict = {}
+def example_question_generator(session_hash):
+    import sqlite3
+    example_response = None
+    example_messages = [
+        ChatMessage.from_system(
+            "You are a helpful and knowledgeable agent who has access to an SQLite database which has a table called 'data_source'."
+        )
+    ]
+    connection = sqlite3.connect(f'data_source_{session_hash}.db')
+    print("Querying questions");
+    cur=connection.execute('select * from data_source')
+    columns = [i[0] for i in cur.description]
+    print("QUESTION COLUMNS")
+    print(columns)
+    cur.close()
+    connection.close()
+    example_messages.append(ChatMessage.from_user(text=f"""We have a SQLite database with the following {columns}.
+                                                  We also have an AI agent with access to the same database that will be performing data analysis.
+                                                  Please return an array of seven strings, each one being a question for our data analysis agent
+                                                  that we can suggest that you believe will be insightful or helpful to a data analysis looking for
+                                                  data insights. Return nothing more than the array of questions because I need that specific data structure
+                                                  to process your response. No other response type or data structure will work."""))
+    example_response = chat_generator.run(messages=example_messages)
+    return example_response["replies"][0].text
 def chatbot_with_fc(message, history, session_hash):
     from functions import sqlite_query_func, chart_generation_func
     available_functions = {"sql_query_func": sqlite_query_func, "rag_pipeline_func": rag_pipeline_func, "chart_generation_func": chart_generation_func}
+    if message_dict[session_hash] != None:
+        message_dict[session_hash].append(ChatMessage.from_user(message))
+    else:
+        messages = [
+            ChatMessage.from_system(
+                "You are a helpful and knowledgeable agent who has access to an SQLite database which has a table called 'data_source'. You also have access to a chart API that uses chart.js dictionaries formatted as a string to generate charts and graphs."
+            )
+        ]
+        messages.append(ChatMessage.from_user(message))
+        message_dict[session_hash] = messages
+    response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.tools_call(session_hash)})
     while True:
         # if OpenAI response is a tool call
         if response and response["replies"][0].meta["finish_reason"] == "tool_calls" or response["replies"][0].tool_calls:
             function_calls = response["replies"][0].tool_calls
             for function_call in function_calls:
+                message_dict[session_hash].append(ChatMessage.from_assistant(tool_calls=[function_call]))
                 ## Parse function calling information
                 function_name = function_call.tool_name
                 function_args = function_call.arguments
                 function_response = function_to_call(**function_args, session_hash=session_hash)
                 print(function_name)
                 ## Append function response to the messages list using `ChatMessage.from_tool`
+                message_dict[session_hash].append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
+                response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.tools_call(session_hash)})
         # Regular Conversation
         else:
+            message_dict[session_hash].append(response["replies"][0])
             break
     return response["replies"][0].text
     db_file_path = f'data_source_{req.session_hash}.db'
     if os.path.exists(db_file_path):
         os.remove(db_file_path)
+        message_dict[req.session_hash] = None
 def run_example(input):
     return input
     @gr.render(inputs=file_output)
     def data_options(filename, request: gr.Request):
         print(filename)
+        message_dict[request.session_hash] = None
         if filename:
+            process_upload(filename, request.session_hash)
             if "bank_marketing_campaign" in filename:
                 example_questions = [
                                         ["Describe the dataset"],
                                         ["Can you generate a line graph of revenue per month?"]
                                     ]
             else:
+                try:
+                    generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
+                    example_questions = [
+                                            ["Describe the dataset"]
+                                        ]
+                    for example in generated_examples:
+                        example_questions.append([example])
+                except:
+                    example_questions = [
                                         ["Describe the dataset"],
                                         ["List the columns in the dataset"],
                                         ["What could this data be used for?"],
                                 examples=example_questions,
                                 additional_inputs=parameters
                                 )
     def process_upload(upload_value, session_hash):
         if upload_value: