Spaces:

nolanzandi
/

virtual-data-analyst

Running

App Files Files Community

nolanzandi commited on Feb 1

Commit

24371db

verified ·

1 Parent(s): 85079bb

Upload 11 files

Browse files

initial demo files

Files changed (11) hide show

__init__.py +3 -0
data_sources/__init__.py +3 -0
data_sources/upload_file.py +22 -0
functions/__init__.py +4 -0
functions/chat_functions.py +93 -0
functions/sqlite_functions.py +34 -0
main.py +13 -0
pipelines/__init__.py +3 -0
pipelines/pipelines.py +122 -0
requirements.txt +9 -0
tools.py +52 -0

__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .main import data_url
2	+
3	+ __all__ = ["data_url"]

data_sources/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .upload_file import process_data_upload
2	+
3	+ __all__ = ["process_data_upload"]

data_sources/upload_file.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import pandas as pd
+import sqlite3
+def process_data_upload(data_file):
+    df = pd.read_csv(data_file, sep=";")
+    # Read each sheet and store data in a DataFrame
+    #data = df.parse(sheet_name)
+    # Process the data as needed
+    # ...
+    df.columns = df.columns.str.replace(' ', '_')
+    df.columns = df.columns.str.replace('/', '_')
+    connection = sqlite3.connect('data_source.db')
+    print("Opened database successfully");
+    print(df.columns)
+    df.to_sql('data_source', connection, if_exists='replace', index = False)
+    connection.commit()
+    connection.close()

functions/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .sqlite_functions import SQLiteQuery, sqlite_query_func
+from .chat_functions import demo
+__all__ = ["SQLiteQuery","sqlite_query_func","demo"]

functions/chat_functions.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from data_sources import process_data_upload
+import gradio as gr
+import json
+from haystack.dataclasses import ChatMessage
+from haystack.components.generators.chat import OpenAIChatGenerator
+import os
+from getpass import getpass
+from dotenv import load_dotenv
+load_dotenv()
+if "OPENAI_API_KEY" not in os.environ:
+    os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")
+chat_generator = OpenAIChatGenerator(model="gpt-4o")
+response = None
+messages = [
+    ChatMessage.from_system(
+        "You are a helpful and knowledgeable agent who has access to an SQL database which has a table called 'data_source'"
+    )
+]
+def chatbot_with_fc(message, history):
+    print("CHATBOT FUNCTIONS")
+    from functions import sqlite_query_func
+    from pipelines import rag_pipeline_func
+    import tools
+    import importlib
+    importlib.reload(tools)
+    available_functions = {"sql_query_func": sqlite_query_func, "rag_pipeline_func": rag_pipeline_func}
+    messages.append(ChatMessage.from_user(message))
+    response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools})
+    while True:
+        # if OpenAI response is a tool call
+        if response and response["replies"][0].meta["finish_reason"] == "tool_calls":
+            function_calls = json.loads(response["replies"][0].content)
+            for function_call in function_calls:
+                ## Parse function calling information
+                function_name = function_call["function"]["name"]
+                function_args = json.loads(function_call["function"]["arguments"])
+                ## Find the correspoding function and call it with the given arguments
+                function_to_call = available_functions[function_name]
+                function_response = function_to_call(**function_args)
+                ## Append function response to the messages list using `ChatMessage.from_function`
+                messages.append(ChatMessage.from_function(content=function_response['reply'], name=function_name))
+                response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools})
+        # Regular Conversation
+        else:
+            messages.append(response["replies"][0])
+            break
+    return response["replies"][0].content
+css= ".file_marker .large{min-height:50px !important;}"
+with gr.Blocks(css=css) as demo:
+    title = gr.HTML("<h1 style='text-align:center;'>Virtual Data Analyst</h1>")
+    description = gr.HTML("<p style='text-align:center;'>Upload a CSV file and chat with our virtual data analyst to get insights on your data set</p>")
+    file_output = gr.File(label="CSV File", show_label=True, elem_classes="file_marker", file_types=['.csv'])
+    @gr.render(inputs=file_output)
+    def data_options(filename):
+        print(filename)
+        if filename:
+            bot = gr.Chatbot(type='messages', label="CSV Chat Window", show_label=True, render=False, visible=True, elem_classes="chatbot")
+            chat = gr.ChatInterface(
+                                fn=chatbot_with_fc,
+                                type='messages',
+                                chatbot=bot,
+                                title="Chat with your data file",
+                                examples=[
+                                    ["Describe the dataset"],
+                                    ["List the columns in the dataset"],
+                                    ["What could this data be used for?"],
+                                ],
+                                )
+            process_upload(filename)
+    def process_upload(upload_value):
+        if upload_value:
+            print("UPLOAD VALUE")
+            print(upload_value)
+            process_data_upload(upload_value)
+        return [], []

functions/sqlite_functions.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from typing import List
+from haystack import component
+import pandas as pd
+import sqlite3
+@component
+class SQLiteQuery:
+    def __init__(self, sql_database: str):
+      self.connection = sqlite3.connect(sql_database, check_same_thread=False)
+    @component.output_types(results=List[str], queries=List[str])
+    def run(self, queries: List[str]):
+        results = []
+        for query in queries:
+          result = pd.read_sql(query, self.connection)
+          results.append(f"{result}")
+        self.connection.close()
+        return {"results": results, "queries": queries}
+sql_query = SQLiteQuery('data_source.db')
+def sqlite_query_func(queries: List[str]):
+    try:
+      result = sql_query.run(queries)
+      return {"reply": result["results"][0]}
+    except Exception as e:
+      reply = f"""There was an error running the SQL Query = {queries}
+              The error is {e},
+              You should probably try again.
+              """
+      return {"reply": reply}

main.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from functions import demo
+import os
+from getpass import getpass
+from dotenv import load_dotenv
+load_dotenv()
+if "OPENAI_API_KEY" not in os.environ:
+    os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")
+## Uncomment the line below to launch the chat app with UI
+demo.launch(debug=True, share=True)

pipelines/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .pipelines import conditional_sql_pipeline, rag_pipeline_func
2	+
3	+ __all__ = ["conditional_sql_pipeline", "rag_pipeline_func"]

pipelines/pipelines.py ADDED Viewed

	@@ -0,0 +1,122 @@

+from haystack import Pipeline
+from haystack.components.builders import PromptBuilder
+from haystack.components.generators.openai import OpenAIGenerator
+from haystack.components.routers import ConditionalRouter
+from functions import SQLiteQuery
+from typing import List
+import sqlite3
+import os
+from getpass import getpass
+from dotenv import load_dotenv
+load_dotenv()
+if "OPENAI_API_KEY" not in os.environ:
+    os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")
+'''
+prompt = PromptBuilder(template="""Please generate an SQL query. The query should answer the following Question: {{question}};
+            The query is to be answered for the table is called 'data_source' with the following
+            Columns: {{columns}};
+            Answer:""")
+sql_query = SQLQuery('data_source.db')
+llm = OpenAIGenerator(model="gpt-4")
+sql_pipeline = Pipeline()
+sql_pipeline.add_component("prompt", prompt)
+sql_pipeline.add_component("llm", llm)
+sql_pipeline.add_component("sql_querier", sql_query)
+sql_pipeline.connect("prompt", "llm")
+sql_pipeline.connect("llm.replies", "sql_querier.queries")
+# If you want to draw the pipeline, uncomment below 👇
+sql_pipeline.show()
+print("PIPELINE RUNNING")
+result = sql_pipeline.run({"prompt": {"question": "On which days of the week are average sales highest?",
+                            "columns": columns}})
+print(result["sql_querier"]["results"][0])
+'''
+from haystack.components.builders import PromptBuilder
+from haystack.components.generators import OpenAIGenerator
+llm = OpenAIGenerator(model="gpt-4o")
+sql_query = SQLiteQuery('data_source.db')
+connection = sqlite3.connect('data_source.db')
+cur=connection.execute('select * from data_source')
+columns = [i[0] for i in cur.description]
+print("COLUMNS 2")
+print(columns)
+cur.close()
+#Rag Pipeline
+prompt = PromptBuilder(template="""Please generate an SQL query. The query should answer the following Question: {{question}};
+            If the question cannot be answered given the provided table and columns, return 'no_answer'
+            The query is to be answered for the table is called 'data_source' with the following
+            Columns: {{columns}};
+            Answer:""")
+routes = [
+     {
+        "condition": "{{'no_answer' not in replies[0]}}",
+        "output": "{{replies}}",
+        "output_name": "sql",
+        "output_type": List[str],
+    },
+    {
+        "condition": "{{'no_answer' in replies[0]}}",
+        "output": "{{question}}",
+        "output_name": "go_to_fallback",
+        "output_type": str,
+    },
+]
+router = ConditionalRouter(routes)
+fallback_prompt = PromptBuilder(template="""User entered a query that cannot be answered with the given table.
+                                            The query was: {{question}} and the table had columns: {{columns}}.
+                                            Let the user know why the question cannot be answered""")
+fallback_llm = OpenAIGenerator(model="gpt-4")
+conditional_sql_pipeline = Pipeline()
+conditional_sql_pipeline.add_component("prompt", prompt)
+conditional_sql_pipeline.add_component("llm", llm)
+conditional_sql_pipeline.add_component("router", router)
+conditional_sql_pipeline.add_component("fallback_prompt", fallback_prompt)
+conditional_sql_pipeline.add_component("fallback_llm", fallback_llm)
+conditional_sql_pipeline.add_component("sql_querier", sql_query)
+conditional_sql_pipeline.connect("prompt", "llm")
+conditional_sql_pipeline.connect("llm.replies", "router.replies")
+conditional_sql_pipeline.connect("router.sql", "sql_querier.queries")
+conditional_sql_pipeline.connect("router.go_to_fallback", "fallback_prompt.question")
+conditional_sql_pipeline.connect("fallback_prompt", "fallback_llm")
+question = "When is my birthday?"
+result = conditional_sql_pipeline.run({"prompt": {"question": question,
+                                                  "columns": columns},
+                                       "router": {"question": question},
+                                       "fallback_prompt": {"columns": columns}})
+def rag_pipeline_func(question: str, columns: str):
+   result = conditional_sql_pipeline.run({"prompt": {"question": question,
+                                                  "columns": columns},
+                                       "router": {"question": question},
+                                       "fallback_prompt": {"columns": columns}})
+   if 'sql_querier' in result:
+      reply = result['sql_querier']['results'][0]
+   elif 'fallback_llm' in result:
+      reply = result['fallback_llm']['replies'][0]
+   else:
+      reply = result["llm"]["replies"][0]
+   print("reply content")
+   print(reply.content)
+   return {"reply": reply.content}

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+haystack-ai
+hayhooks
+sentence-transformers>=3.0.0
+python-dotenv
+gradio
+pandas
+openpyxl
+snowflake-haystack
+psutil

tools.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import sqlite3
+connection = sqlite3.connect('data_source.db')
+print("Querying Database in Tools.py");
+cur=connection.execute('select * from data_source')
+columns = [i[0] for i in cur.description]
+print("COLUMNS 2")
+print(columns)
+cur.close()
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "sql_query_func",
+            "description": f"This a tool useful to query a SQL table called 'data_source' with the following Columns: {columns}",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "queries": {
+                        "type": "array",
+                        "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
+                        "items": {
+                            "type": "string",
+                        }
+                    }
+                },
+                "required": ["question"],
+            },
+        },
+    },
+    {
+       "type": "function",
+        "function": {
+            "name": "rag_pipeline_func",
+            "description": f"This a tool useful to query a SQL table called 'data_source' with the following Columns: {columns}",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "array",
+                        "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
+                        "items": {
+                            "type": "string",
+                        }
+                    }
+                },
+                "required": ["query"],
+            },
+        },
+    }
+]