virtual-data-analyst / functions /chat_functions.py
nolanzandi's picture
Upload 2 files (#4)
73a1633 verified
raw
history blame
6.46 kB
from data_sources import process_data_upload
import gradio as gr
from haystack.dataclasses import ChatMessage
from haystack.components.generators.chat import OpenAIChatGenerator
import os
from getpass import getpass
from dotenv import load_dotenv
load_dotenv()
if "OPENAI_API_KEY" not in os.environ:
os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")
chat_generator = OpenAIChatGenerator(model="gpt-4o")
response = None
messages = [
ChatMessage.from_system(
"You are a helpful and knowledgeable agent who has access to an SQL database which has a table called 'data_source'"
)
]
def chatbot_with_fc(message, history, session_hash):
from functions import sqlite_query_func
from pipelines import rag_pipeline_func
import tools
available_functions = {"sql_query_func": sqlite_query_func, "rag_pipeline_func": rag_pipeline_func}
messages.append(ChatMessage.from_user(message))
response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools_call(session_hash)})
while True:
# if OpenAI response is a tool call
if response and response["replies"][0].meta["finish_reason"] == "tool_calls" or response["replies"][0].tool_calls:
function_calls = response["replies"][0].tool_calls
for function_call in function_calls:
messages.append(ChatMessage.from_assistant(tool_calls=[function_call]))
## Parse function calling information
function_name = function_call.tool_name
function_args = function_call.arguments
## Find the correspoding function and call it with the given arguments
function_to_call = available_functions[function_name]
function_response = function_to_call(**function_args, session_hash=session_hash)
## Append function response to the messages list using `ChatMessage.from_tool`
messages.append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools_call(session_hash)})
# Regular Conversation
else:
messages.append(response["replies"][0])
break
return response["replies"][0].text
def delete_db(req: gr.Request):
db_file_path = f'data_source_{req.session_hash}.db'
if os.path.exists(db_file_path):
os.remove(db_file_path)
def run_example(input):
return input
def example_display(input):
if input == None:
display = True
else:
display = False
return [gr.update(visible=display),gr.update(visible=display)]
css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;}"
with gr.Blocks(css=css) as demo:
title = gr.HTML("<h1 style='text-align:center;'>Virtual Data Analyst</h1>")
description = gr.HTML("<p style='text-align:center;'>Upload a data file and chat with our virtual data analyst to get insights on your data set. Currently accepts CSV, TSV, TXT, XLS, XLSX, XML, and JSON files. Try a sample file to get started!</p>")
example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
with gr.Row():
example_btn_1 = gr.Button(value="Try Me: bank_marketing_campaign.csv", elem_classes="example_btn", size="md", variant="primary")
example_btn_2 = gr.Button(value="Try Me: online_retail_data.csv", elem_classes="example_btn", size="md", variant="primary")
file_output = gr.File(label="Data File (CSV, TSV, TXT, XLS, XLSX, XML, JSON)", show_label=True, elem_classes="file_marker", file_types=['.csv','.xlsx','.txt','.json','.xml','.xls','.tsv'])
example_btn_1.click(fn=run_example, inputs=example_file_1, outputs=file_output)
example_btn_2.click(fn=run_example, inputs=example_file_2, outputs=file_output)
file_output.change(fn=example_display, inputs=file_output, outputs=[example_btn_1, example_btn_2])
@gr.render(inputs=file_output)
def data_options(filename, request: gr.Request):
print(filename)
if filename:
if "bank_marketing_campaign" in filename:
example_questions = [
["Describe the dataset"],
["What levels of education have the highest and lowest average balance?"],
["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
]
elif "online_retail_data" in filename:
example_questions = [
["Describe the dataset"],
["What month had the highest revenue?"],
["Is revenue higher in the morning or afternoon?"],
]
else:
example_questions = [
["Describe the dataset"],
["List the columns in the dataset"],
["What could this data be used for?"],
]
parameters = gr.Textbox(visible=False, value=request.session_hash)
bot = gr.Chatbot(type='messages', label="CSV Chat Window", show_label=True, render=False, visible=True, elem_classes="chatbot")
chat = gr.ChatInterface(
fn=chatbot_with_fc,
type='messages',
chatbot=bot,
title="Chat with your data file",
concurrency_limit=None,
examples=example_questions,
additional_inputs=parameters
)
process_upload(filename, request.session_hash)
def process_upload(upload_value, session_hash):
if upload_value:
process_data_upload(upload_value, session_hash)
return [], []
demo.unload(delete_db)