File size: 9,782 Bytes
32f5b77
 
 
 
 
 
 
 
cfbedd8
32f5b77
 
 
 
 
 
 
 
 
 
cfbedd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32f5b77
fb65c41
60f68c4
32f5b77
 
 
60f68c4
 
cfbedd8
 
 
 
 
 
 
 
 
 
 
 
32f5b77
 
 
fb65c41
32f5b77
 
cfbedd8
32f5b77
 
 
 
60f68c4
32f5b77
fb65c41
60f68c4
32f5b77
cfbedd8
 
32f5b77
 
 
cfbedd8
32f5b77
 
 
fb65c41
 
 
 
cfbedd8
fb65c41
fedee8b
 
 
 
 
 
 
 
00dae37
fedee8b
 
32f5b77
63c3a67
32f5b77
60f68c4
 
 
 
 
 
00dae37
 
 
 
 
 
63c3a67
00dae37
 
 
32f5b77
 
fb65c41
32f5b77
cfbedd8
32f5b77
cfbedd8
8282cb1
 
 
 
 
60f68c4
8282cb1
 
 
 
 
 
60f68c4
8282cb1
 
cfbedd8
 
 
 
 
 
 
 
 
8282cb1
 
 
 
fb65c41
60f68c4
32f5b77
 
 
 
 
fb65c41
8282cb1
fb65c41
32f5b77
 
fb65c41
32f5b77
fb65c41
32f5b77
fb65c41
 
32f5b77
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
from data_sources import process_data_upload

import gradio as gr

from haystack.dataclasses import ChatMessage
from haystack.components.generators.chat import OpenAIChatGenerator

import os
import ast
from getpass import getpass
from dotenv import load_dotenv

load_dotenv()

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")

chat_generator = OpenAIChatGenerator(model="gpt-4o")
response = None
message_dict = {}

def example_question_generator(session_hash):
    import sqlite3
    example_response = None
    example_messages = [
        ChatMessage.from_system(
            "You are a helpful and knowledgeable agent who has access to an SQLite database which has a table called 'data_source'."
        )
    ]
    connection = sqlite3.connect(f'data_source_{session_hash}.db')
    print("Querying questions");
    cur=connection.execute('select * from data_source')
    columns = [i[0] for i in cur.description]
    print("QUESTION COLUMNS")
    print(columns)
    cur.close()
    connection.close()

    example_messages.append(ChatMessage.from_user(text=f"""We have a SQLite database with the following {columns}. 

                                                  We also have an AI agent with access to the same database that will be performing data analysis.

                                                  Please return an array of seven strings, each one being a question for our data analysis agent

                                                  that we can suggest that you believe will be insightful or helpful to a data analysis looking for

                                                  data insights. Return nothing more than the array of questions because I need that specific data structure

                                                  to process your response. No other response type or data structure will work."""))

    example_response = chat_generator.run(messages=example_messages)

    return example_response["replies"][0].text

def chatbot_with_fc(message, history, session_hash):
    from functions import sqlite_query_func, chart_generation_func
    from pipelines import rag_pipeline_func
    import tools

    available_functions = {"sql_query_func": sqlite_query_func, "rag_pipeline_func": rag_pipeline_func, "chart_generation_func": chart_generation_func}

    if message_dict[session_hash] != None:
        message_dict[session_hash].append(ChatMessage.from_user(message))
    else:
        messages = [
            ChatMessage.from_system(
                "You are a helpful and knowledgeable agent who has access to an SQLite database which has a table called 'data_source'. You also have access to a chart API that uses chart.js dictionaries formatted as a string to generate charts and graphs."
            )
        ]
        messages.append(ChatMessage.from_user(message))
        message_dict[session_hash] = messages

    response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.tools_call(session_hash)})

    while True:
        # if OpenAI response is a tool call
        if response and response["replies"][0].meta["finish_reason"] == "tool_calls" or response["replies"][0].tool_calls:
            function_calls = response["replies"][0].tool_calls
            for function_call in function_calls:
                message_dict[session_hash].append(ChatMessage.from_assistant(tool_calls=[function_call]))
                ## Parse function calling information
                function_name = function_call.tool_name
                function_args = function_call.arguments

                ## Find the corresponding function and call it with the given arguments
                function_to_call = available_functions[function_name]
                function_response = function_to_call(**function_args, session_hash=session_hash)
                print(function_name)
                ## Append function response to the messages list using `ChatMessage.from_tool`
                message_dict[session_hash].append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
                response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.tools_call(session_hash)})

        # Regular Conversation
        else:
            message_dict[session_hash].append(response["replies"][0])
            break
    return response["replies"][0].text

def delete_db(req: gr.Request):
    db_file_path = f'data_source_{req.session_hash}.db'
    if os.path.exists(db_file_path):
        os.remove(db_file_path)
        message_dict[req.session_hash] = None

def run_example(input):
    return input

def example_display(input):
    if input == None:
        display = True
    else:
        display = False
    return [gr.update(visible=display),gr.update(visible=display)]

css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;}"

with gr.Blocks(css=css, delete_cache=(3600,3600)) as demo:
    title = gr.HTML("<h1 style='text-align:center;'>Virtual Data Analyst</h1>")
    description = gr.HTML("""<p style='text-align:center;'>Upload a data file and chat with our virtual data analyst 

                          to get insights on your data set. Currently accepts CSV, TSV, TXT, XLS, XLSX, XML, and JSON files. 

                          Can now generate charts and graphs!

                          Try a sample file to get started!</p>

                          <p style='text-align:center;'>This tool is under active development. If you experience bugs with use, 

                          open a discussion in the community tab and I will respond.</p>""")
    example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
    example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
    with gr.Row():
        example_btn_1 = gr.Button(value="Try Me: bank_marketing_campaign.csv", elem_classes="example_btn", size="md", variant="primary")
        example_btn_2 = gr.Button(value="Try Me: online_retail_data.csv", elem_classes="example_btn", size="md", variant="primary")

    file_output = gr.File(label="Data File (CSV, TSV, TXT, XLS, XLSX, XML, JSON)", show_label=True, elem_classes="file_marker", file_types=['.csv','.xlsx','.txt','.json','.ndjson','.xml','.xls','.tsv'])
    example_btn_1.click(fn=run_example, inputs=example_file_1, outputs=file_output)
    example_btn_2.click(fn=run_example, inputs=example_file_2, outputs=file_output)
    file_output.change(fn=example_display, inputs=file_output, outputs=[example_btn_1, example_btn_2])

    @gr.render(inputs=file_output)
    def data_options(filename, request: gr.Request):
        print(filename)
        message_dict[request.session_hash] = None
        if filename:
            process_upload(filename, request.session_hash)
            if "bank_marketing_campaign" in filename:
                example_questions = [
                                        ["Describe the dataset"],
                                        ["What levels of education have the highest and lowest average balance?"],
                                        ["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
                                        ["Can you generate a bar chart of education vs. average balance?"]
                                    ]
            elif "online_retail_data" in filename:
                example_questions = [
                                        ["Describe the dataset"],
                                        ["What month had the highest revenue?"],
                                        ["Is revenue higher in the morning or afternoon?"],
                                        ["Can you generate a line graph of revenue per month?"]
                                    ]
            else:
                try:
                    generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
                    example_questions = [
                                            ["Describe the dataset"]
                                        ]
                    for example in generated_examples:
                        example_questions.append([example])
                except:
                    example_questions = [
                                        ["Describe the dataset"],
                                        ["List the columns in the dataset"],
                                        ["What could this data be used for?"],
                                    ]
            parameters = gr.Textbox(visible=False, value=request.session_hash)
            bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
            chat = gr.ChatInterface(
                                fn=chatbot_with_fc,
                                type='messages',
                                chatbot=bot,
                                title="Chat with your data file",
                                concurrency_limit=None,
                                examples=example_questions,
                                additional_inputs=parameters
                                ) 
    
    def process_upload(upload_value, session_hash):
        if upload_value:
            process_data_upload(upload_value, session_hash)
        return [], []
    
    demo.unload(delete_db)