Spaces:

nolanzandi
/

virtual-data-analyst

Running

App Files Files Community

Nolan Zandi commited on Feb 14

Commit

fb65c41

1 Parent(s): 2946020

fix missing init file

Browse files

Files changed (8) hide show

__init__.py +0 -3
data_sources/upload_file.py +2 -3
functions/chat_functions.py +19 -13
functions/sqlite_functions.py +3 -3
pipelines/__init__.py +2 -2
pipelines/pipelines.py +51 -51
requirements.txt +3 -3
tools.py +45 -43

__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from .app import data_url
-__all__ = ["data_url"]

data_sources/upload_file.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import pandas as pd
 import sqlite3
-def process_data_upload(data_file):
     df = pd.read_csv(data_file, sep=";")
     # Read each sheet and store data in a DataFrame
@@ -12,7 +11,7 @@ def process_data_upload(data_file):
     df.columns = df.columns.str.replace(' ', '_')
     df.columns = df.columns.str.replace('/', '_')
-    connection = sqlite3.connect('data_source.db')
     print("Opened database successfully");
     print(df.columns)

 import pandas as pd
 import sqlite3
+def process_data_upload(data_file, session_hash):
     df = pd.read_csv(data_file, sep=";")
     # Read each sheet and store data in a DataFrame
     df.columns = df.columns.str.replace(' ', '_')
     df.columns = df.columns.str.replace('/', '_')
+    connection = sqlite3.connect(f'data_source_{session_hash}.db')
     print("Opened database successfully");
     print(df.columns)

functions/chat_functions.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from data_sources import process_data_upload
 import gradio as gr
-import json
 from haystack.dataclasses import ChatMessage
 from haystack.components.generators.chat import OpenAIChatGenerator
@@ -23,20 +22,18 @@ messages = [
     )
 ]
-def chatbot_with_fc(message, history):
     from functions import sqlite_query_func
     from pipelines import rag_pipeline_func
     import tools
-    import importlib
-    importlib.reload(tools)
     available_functions = {"sql_query_func": sqlite_query_func, "rag_pipeline_func": rag_pipeline_func}
     messages.append(ChatMessage.from_user(message))
-    response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools})
     while True:
         # if OpenAI response is a tool call
-        if response and response["replies"][0].meta["finish_reason"] == "tool_calls":
             function_calls = response["replies"][0].tool_calls
             for function_call in function_calls:
                 messages.append(ChatMessage.from_assistant(tool_calls=[function_call]))
@@ -46,10 +43,10 @@ def chatbot_with_fc(message, history):
                 ## Find the correspoding function and call it with the given arguments
                 function_to_call = available_functions[function_name]
-                function_response = function_to_call(**function_args)
                 ## Append function response to the messages list using `ChatMessage.from_tool`
                 messages.append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
-                response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools})
         # Regular Conversation
         else:
@@ -57,6 +54,11 @@ def chatbot_with_fc(message, history):
             break
     return response["replies"][0].text
 css= ".file_marker .large{min-height:50px !important;}"
 with gr.Blocks(css=css) as demo:
@@ -65,27 +67,31 @@ with gr.Blocks(css=css) as demo:
     file_output = gr.File(label="CSV File", show_label=True, elem_classes="file_marker", file_types=['.csv'])
     @gr.render(inputs=file_output)
-    def data_options(filename):
         print(filename)
         if filename:
             bot = gr.Chatbot(type='messages', label="CSV Chat Window", show_label=True, render=False, visible=True, elem_classes="chatbot")
             chat = gr.ChatInterface(
                                 fn=chatbot_with_fc,
                                 type='messages',
                                 chatbot=bot,
                                 title="Chat with your data file",
                                 examples=[
                                     ["Describe the dataset"],
                                     ["List the columns in the dataset"],
                                     ["What could this data be used for?"],
                                 ],
                                 )
-            process_upload(filename)
-    def process_upload(upload_value):
         if upload_value:
-            process_data_upload(upload_value)
         return [], []

 from data_sources import process_data_upload
 import gradio as gr
 from haystack.dataclasses import ChatMessage
 from haystack.components.generators.chat import OpenAIChatGenerator
     )
 ]
+def chatbot_with_fc(message, history, session_hash):
     from functions import sqlite_query_func
     from pipelines import rag_pipeline_func
     import tools
     available_functions = {"sql_query_func": sqlite_query_func, "rag_pipeline_func": rag_pipeline_func}
     messages.append(ChatMessage.from_user(message))
+    response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools_call(session_hash)})
     while True:
         # if OpenAI response is a tool call
+        if response and response["replies"][0].meta["finish_reason"] == "tool_calls" or response["replies"][0].tool_calls:
             function_calls = response["replies"][0].tool_calls
             for function_call in function_calls:
                 messages.append(ChatMessage.from_assistant(tool_calls=[function_call]))
                 ## Find the correspoding function and call it with the given arguments
                 function_to_call = available_functions[function_name]
+                function_response = function_to_call(**function_args, session_hash=session_hash)
                 ## Append function response to the messages list using `ChatMessage.from_tool`
                 messages.append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
+                response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools_call(session_hash)})
         # Regular Conversation
         else:
             break
     return response["replies"][0].text
+def delete_db(req: gr.Request):
+    db_file_path = f'data_source_{req.session_hash}.db'
+    if os.path.exists(db_file_path):
+        os.remove(db_file_path)
 css= ".file_marker .large{min-height:50px !important;}"
 with gr.Blocks(css=css) as demo:
     file_output = gr.File(label="CSV File", show_label=True, elem_classes="file_marker", file_types=['.csv'])
     @gr.render(inputs=file_output)
+    def data_options(filename, request: gr.Request):
         print(filename)
         if filename:
+            parameters = gr.Textbox(visible=False, value=request.session_hash)
             bot = gr.Chatbot(type='messages', label="CSV Chat Window", show_label=True, render=False, visible=True, elem_classes="chatbot")
             chat = gr.ChatInterface(
                                 fn=chatbot_with_fc,
                                 type='messages',
                                 chatbot=bot,
                                 title="Chat with your data file",
+                                concurrency_limit=None,
                                 examples=[
                                     ["Describe the dataset"],
                                     ["List the columns in the dataset"],
                                     ["What could this data be used for?"],
                                 ],
+                                additional_inputs=parameters
                                 )
+            process_upload(filename, request.session_hash)
+    def process_upload(upload_value, session_hash):
         if upload_value:
+            process_data_upload(upload_value, session_hash)
         return [], []
+    demo.unload(delete_db)

functions/sqlite_functions.py CHANGED Viewed

@@ -16,13 +16,13 @@ class SQLiteQuery:
         for query in queries:
           result = pd.read_sql(query, self.connection)
           results.append(f"{result}")
-        "self.connection.close()"
         return {"results": results, "queries": queries}
-sql_query = SQLiteQuery('data_source.db')
-def sqlite_query_func(queries: List[str]):
     try:
       result = sql_query.run(queries)
       return {"reply": result["results"][0]}

         for query in queries:
           result = pd.read_sql(query, self.connection)
           results.append(f"{result}")
+        self.connection.close()
         return {"results": results, "queries": queries}
+def sqlite_query_func(queries: List[str], session_hash):
+    sql_query = SQLiteQuery(f'data_source_{session_hash}.db')
     try:
       result = sql_query.run(queries)
       return {"reply": result["results"][0]}

pipelines/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from .pipelines import conditional_sql_pipeline, rag_pipeline_func
-__all__ = ["conditional_sql_pipeline", "rag_pipeline_func"]


1	+ from .pipelines import rag_pipeline_func
2
3	+ __all__ = ["rag_pipeline_func"]

pipelines/pipelines.py CHANGED Viewed

@@ -21,57 +21,57 @@ from haystack.components.builders import PromptBuilder
 from haystack.components.generators import OpenAIGenerator
 llm = OpenAIGenerator(model="gpt-4o")
-sql_query = SQLiteQuery('data_source.db')
-connection = sqlite3.connect('data_source.db')
-cur=connection.execute('select * from data_source')
-columns = [i[0] for i in cur.description]
-cur.close()
-#Rag Pipeline
-prompt = PromptBuilder(template="""Please generate an SQL query. The query should answer the following Question: {{question}};
-            If the question cannot be answered given the provided table and columns, return 'no_answer'
-            The query is to be answered for the table is called 'data_source' with the following
-            Columns: {{columns}};
-            Answer:""")
-routes = [
-     {
-        "condition": "{{'no_answer' not in replies[0]}}",
-        "output": "{{replies}}",
-        "output_name": "sql",
-        "output_type": List[str],
-    },
-    {
-        "condition": "{{'no_answer' in replies[0]}}",
-        "output": "{{question}}",
-        "output_name": "go_to_fallback",
-        "output_type": str,
-    },
-]
-router = ConditionalRouter(routes)
-fallback_prompt = PromptBuilder(template="""User entered a query that cannot be answered with the given table.
-                                            The query was: {{question}} and the table had columns: {{columns}}.
-                                            Let the user know why the question cannot be answered""")
-fallback_llm = OpenAIGenerator(model="gpt-4")
-conditional_sql_pipeline = Pipeline()
-conditional_sql_pipeline.add_component("prompt", prompt)
-conditional_sql_pipeline.add_component("llm", llm)
-conditional_sql_pipeline.add_component("router", router)
-conditional_sql_pipeline.add_component("fallback_prompt", fallback_prompt)
-conditional_sql_pipeline.add_component("fallback_llm", fallback_llm)
-conditional_sql_pipeline.add_component("sql_querier", sql_query)
-conditional_sql_pipeline.connect("prompt", "llm")
-conditional_sql_pipeline.connect("llm.replies", "router.replies")
-conditional_sql_pipeline.connect("router.sql", "sql_querier.queries")
-conditional_sql_pipeline.connect("router.go_to_fallback", "fallback_prompt.question")
-conditional_sql_pipeline.connect("fallback_prompt", "fallback_llm")
-def rag_pipeline_func(queries: str, columns: str):
    print("RAG PIPELINE FUNCTION")
    result = conditional_sql_pipeline.run({"prompt": {"question": queries,
                                                   "columns": columns},

 from haystack.components.generators import OpenAIGenerator
 llm = OpenAIGenerator(model="gpt-4o")
+def rag_pipeline_func(queries: str, columns: str, session_hash):
+   sql_query = SQLiteQuery(f'data_source_{session_hash}.db')
+   connection = sqlite3.connect(f'data_source_{session_hash}.db')
+   cur=connection.execute('select * from data_source')
+   columns = [i[0] for i in cur.description]
+   cur.close()
+   #Rag Pipeline
+   prompt = PromptBuilder(template="""Please generate an SQL query. The query should answer the following Question: {{question}};
+               If the question cannot be answered given the provided table and columns, return 'no_answer'
+               The query is to be answered for the table is called 'data_source' with the following
+               Columns: {{columns}};
+               Answer:""")
+   routes = [
+      {
+         "condition": "{{'no_answer' not in replies[0]}}",
+         "output": "{{replies}}",
+         "output_name": "sql",
+         "output_type": List[str],
+      },
+      {
+         "condition": "{{'no_answer' in replies[0]}}",
+         "output": "{{question}}",
+         "output_name": "go_to_fallback",
+         "output_type": str,
+      },
+   ]
+   router = ConditionalRouter(routes)
+   fallback_prompt = PromptBuilder(template="""User entered a query that cannot be answered with the given table.
+                                             The query was: {{question}} and the table had columns: {{columns}}.
+                                             Let the user know why the question cannot be answered""")
+   fallback_llm = OpenAIGenerator(model="gpt-4")
+   conditional_sql_pipeline = Pipeline()
+   conditional_sql_pipeline.add_component("prompt", prompt)
+   conditional_sql_pipeline.add_component("llm", llm)
+   conditional_sql_pipeline.add_component("router", router)
+   conditional_sql_pipeline.add_component("fallback_prompt", fallback_prompt)
+   conditional_sql_pipeline.add_component("fallback_llm", fallback_llm)
+   conditional_sql_pipeline.add_component("sql_querier", sql_query)
+   conditional_sql_pipeline.connect("prompt", "llm")
+   conditional_sql_pipeline.connect("llm.replies", "router.replies")
+   conditional_sql_pipeline.connect("router.sql", "sql_querier.queries")
+   conditional_sql_pipeline.connect("router.go_to_fallback", "fallback_prompt.question")
+   conditional_sql_pipeline.connect("fallback_prompt", "fallback_llm")
    print("RAG PIPELINE FUNCTION")
    result = conditional_sql_pipeline.run({"prompt": {"question": queries,
                                                   "columns": columns},

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-haystack-ai
-python-dotenv
-gradio
 pandas

+haystack-ai
+python-dotenv
+gradio
 pandas

tools.py CHANGED Viewed

@@ -1,52 +1,54 @@
 import sqlite3
-connection = sqlite3.connect('data_source.db')
-print("Querying Database in Tools.py");
-cur=connection.execute('select * from data_source')
-columns = [i[0] for i in cur.description]
-print("COLUMNS 2")
-print(columns)
-cur.close()
-tools = [
-    {
-        "type": "function",
-        "function": {
-            "name": "sql_query_func",
-            "description": f"This a tool useful to query a SQL table called 'data_source' with the following Columns: {columns}",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "queries": {
-                        "type": "array",
-                        "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
-                        "items": {
-                            "type": "string",
                         }
-                    }
                 },
-                "required": ["question"],
             },
         },
-    },
-    {
-       "type": "function",
-        "function": {
-            "name": "rag_pipeline_func",
-            "description": f"This a tool useful to query a SQL table called 'data_source' with the following Columns: {columns}",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "queries": {
-                        "type": "array",
-                        "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
-                        "items": {
-                            "type": "string",
                         }
-                    }
                 },
-                "required": ["question"],
-            },
-        },
-    }
-]

 import sqlite3
+def tools_call(session_hash):
+    connection = sqlite3.connect(f'data_source_{session_hash}.db')
+    print("Querying Database in Tools.py");
+    cur=connection.execute('select * from data_source')
+    columns = [i[0] for i in cur.description]
+    print("COLUMNS 2")
+    print(columns)
+    cur.close()
+    connection.close()
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": "sql_query_func",
+                "description": f"This a tool useful to query a SQL table called 'data_source' with the following Columns: {columns}",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "queries": {
+                            "type": "array",
+                            "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
+                            "items": {
+                                "type": "string",
+                            }
                         }
+                    },
+                    "required": ["question"],
                 },
             },
         },
+        {
+        "type": "function",
+            "function": {
+                "name": "rag_pipeline_func",
+                "description": f"This a tool useful to query a SQL table called 'data_source' with the following Columns: {columns}",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "queries": {
+                            "type": "array",
+                            "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
+                            "items": {
+                                "type": "string",
+                            }
                         }
+                    },
+                    "required": ["question"],
                 },
+            },
+        }
+    ]