Nolan Zandi commited on
Commit
f5b1bff
·
1 Parent(s): 1647e02

visualization updates and refactor for future release

Browse files
app.py CHANGED
@@ -1,18 +1,13 @@
1
- from data_sources import process_data_upload
2
- from functions import example_question_generator, chatbot_with_fc
3
  from utils import TEMP_DIR, message_dict
4
  import gradio as gr
 
5
 
6
- import ast
7
  import os
8
  from getpass import getpass
9
  from dotenv import load_dotenv
10
 
11
  load_dotenv()
12
 
13
- if "OPENAI_API_KEY" not in os.environ:
14
- os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")
15
-
16
  def delete_db(req: gr.Request):
17
  import shutil
18
  dir_path = TEMP_DIR / str(req.session_hash)
@@ -20,102 +15,72 @@ def delete_db(req: gr.Request):
20
  shutil.rmtree(dir_path)
21
  message_dict[req.session_hash] = None
22
 
23
- def run_example(input):
24
- return input
25
-
26
- def example_display(input):
27
- if input == None:
28
- display = True
29
- else:
30
- display = False
31
- return [gr.update(visible=display),gr.update(visible=display)]
32
 
33
  css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;} .padding{padding:0;}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- with gr.Blocks(css=css, delete_cache=(3600,3600)) as demo:
36
- title = gr.HTML("<h1 style='text-align:center;'>Virtual Data Analyst</h1>")
37
- description = gr.HTML("""<p style='text-align:center;'>A helpful tool for data analysis, visualizations, regressions, and more.
38
- Upload a data file and chat with our virtual data analyst to get insights on your data set.
39
- Try a sample file to get started!</p>
40
- <ul style="margin:auto;max-width: 500px;">
41
- <li style="margin:0;line-height:1;">Currently accepts CSV, TSV, TXT, XLS, XLSX, XML, and JSON files.</li>
42
- <li style="margin:0;line-height:1;">Can run SQL queries, linear regressions, and analyze the results.</li>
43
- <li style="margin:0;line-height:1;">Can generate scatter plots, line charts, pie charts, bar graphs, histograms, time series, and more.
44
- New visualizations types added regularly.</li>
45
- </ul>
46
- <p style='text-align:center;'>This application is under active development. If you experience bugs with use,
47
- open a discussion in the community tab and I will respond.</p>""")
48
- example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
49
- example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
50
- with gr.Row():
51
- example_btn_1 = gr.Button(value="Try Me: bank_marketing_campaign.csv", elem_classes="example_btn", size="md", variant="primary")
52
- example_btn_2 = gr.Button(value="Try Me: online_retail_data.csv", elem_classes="example_btn", size="md", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- file_output = gr.File(label="Data File (CSV, TSV, TXT, XLS, XLSX, XML, JSON)", show_label=True, elem_classes="file_marker", file_types=['.csv','.xlsx','.txt','.json','.ndjson','.xml','.xls','.tsv'])
55
- example_btn_1.click(fn=run_example, inputs=example_file_1, outputs=file_output)
56
- example_btn_2.click(fn=run_example, inputs=example_file_2, outputs=file_output)
57
- file_output.change(fn=example_display, inputs=file_output, outputs=[example_btn_1, example_btn_2])
58
 
59
- @gr.render(inputs=file_output)
60
- def data_options(filename, request: gr.Request):
61
- print(filename)
62
- message_dict[request.session_hash] = None
63
- if filename:
64
- process_message = process_upload(filename, request.session_hash)
65
- gr.HTML(value=process_message[1], padding=False)
66
- if process_message[0] == "success":
67
- if "bank_marketing_campaign" in filename:
68
- example_questions = [
69
- ["Describe the dataset"],
70
- ["What levels of education have the highest and lowest average balance?"],
71
- ["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
72
- ["Can you generate a bar chart of education vs. average balance?"],
73
- ["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
74
- ["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
75
- ["Can you plot the number of contacts performed before this campaign versus the duration and use balance as the size in a bubble chart?"]
76
- ]
77
- elif "online_retail_data" in filename:
78
- example_questions = [
79
- ["Describe the dataset"],
80
- ["What month had the highest revenue?"],
81
- ["Is revenue higher in the morning or afternoon?"],
82
- ["Can you generate a line graph of revenue per month?"],
83
- ["Can you generate a table of revenue per month?"],
84
- ["Can we predict how time of day affects transaction value in this data set?"],
85
- ["Can you plot revenue per month with size being the number of units sold that month in a bubble chart?"]
86
- ]
87
- else:
88
- try:
89
- generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
90
- example_questions = [
91
- ["Describe the dataset"]
92
- ]
93
- for example in generated_examples:
94
- example_questions.append([example])
95
- except:
96
- example_questions = [
97
- ["Describe the dataset"],
98
- ["List the columns in the dataset"],
99
- ["What could this data be used for?"],
100
- ]
101
- parameters = gr.Textbox(visible=False, value=request.session_hash)
102
- bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
103
- chat = gr.ChatInterface(
104
- fn=chatbot_with_fc,
105
- type='messages',
106
- chatbot=bot,
107
- title="Chat with your data file",
108
- concurrency_limit=None,
109
- examples=example_questions,
110
- additional_inputs=parameters
111
- )
112
-
113
- def process_upload(upload_value, session_hash):
114
- if upload_value:
115
- process_message = process_data_upload(upload_value, session_hash)
116
- return process_message
117
-
118
  demo.unload(delete_db)
119
 
120
  ## Uncomment the line below to launch the chat app with UI
121
- demo.launch(debug=True, allowed_paths=["temp/"])
 
 
 
1
  from utils import TEMP_DIR, message_dict
2
  import gradio as gr
3
+ import data_file, sql_db
4
 
 
5
  import os
6
  from getpass import getpass
7
  from dotenv import load_dotenv
8
 
9
  load_dotenv()
10
 
 
 
 
11
  def delete_db(req: gr.Request):
12
  import shutil
13
  dir_path = TEMP_DIR / str(req.session_hash)
 
15
  shutil.rmtree(dir_path)
16
  message_dict[req.session_hash] = None
17
 
18
+ if "OPENAI_API_KEY" not in os.environ:
19
+ os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")
 
 
 
 
 
 
 
20
 
21
  css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;} .padding{padding:0;}"
22
+ head = """<meta charset="UTF-8">
23
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
24
+ <title>Virtual Data Analyst</title>
25
+ <!-- Tailwind CSS -->
26
+ <script src="https://cdn.tailwindcss.com"></script>
27
+ <!-- Google Fonts -->
28
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
29
+ <!-- Font Awesome -->
30
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
31
+ <!-- Custom Styles -->
32
+ <link rel="stylesheet" href="/gradio_api/file=assets/styles.css">
33
+ """
34
+
35
+ theme = gr.themes.Base(primary_hue="sky", secondary_hue="slate",font=[gr.themes.GoogleFont("Inter"), "Inter", "sans-serif"]).set(
36
+ button_primary_background_fill="#3B82F6",
37
+ button_secondary_background_fill="#6B7280",
38
+ )
39
+
40
+ from pathlib import Path
41
+ gr.set_static_paths(paths=[Path.cwd().absolute()/"assets"])
42
 
43
+ with gr.Blocks(theme=theme, css=css, head=head, delete_cache=(3600,3600)) as demo:
44
+ header = gr.HTML("""
45
+ <!-- Header -->
46
+ <header class="max-w-4xl mx-auto mb-12 text-center">
47
+ <h1 class="text-4xl font-bold text-gray-900 mb-4">Virtual Data Analyst</h1>
48
+ <p class="text-lg text-gray-600 mb-6">
49
+ A powerful tool for data analysis, visualizations, and insights
50
+ </p>
51
+ </header>
52
+ <!-- Main Content -->
53
+ <main class="max-w-4xl mx-auto">
54
+ <!-- Features Preview -->
55
+ <div class="mt-12 grid md:grid-cols-3 gap-6" style="margin-bottom:3px !important;">
56
+ <div class="feature-card bg-white p-6 rounded-lg shadow-md">
57
+ <i class="feature-icon fas fa-chart-line text-primary text-2xl mb-4"></i>
58
+ <h3 class="font-semibold text-gray-800 mb-2">Advanced Analytics</h3>
59
+ <p class="text-gray-600 text-sm">Run SQL queries, perform regressions, and analyze results with ease</p>
60
+ </div>
61
+ <div class="feature-card bg-white p-6 rounded-lg shadow-md">
62
+ <i class="feature-icon fas fa-chart-pie text-primary text-2xl mb-4"></i>
63
+ <h3 class="font-semibold text-gray-800 mb-2">Rich Visualizations</h3>
64
+ <p class="text-gray-600 text-sm">Create scatter plots, line charts, pie charts, and more</p>
65
+ </div>
66
+ <div class="feature-card bg-white p-6 rounded-lg shadow-md">
67
+ <i class="feature-icon fas fa-magic text-primary text-2xl mb-4"></i>
68
+ <h3 class="font-semibold text-gray-800 mb-2">Automated Insights</h3>
69
+ <p class="text-gray-600 text-sm">Get instant insights and recommendations for your data</p>
70
+ </div>
71
+ </div>
72
+ </main>""")
73
+ #with gr.Tab("Data File"):
74
+ data_file.demo.render()
75
+ #with gr.Tab("SQL Database"):
76
+ # sql_db.demo.render()
77
 
78
+ footer = gr.HTML("""<!-- Footer -->
79
+ <footer class="max-w-4xl mx-auto mt-12 text-center text-gray-500 text-sm">
80
+ <p>This application is under active development. For bugs or feedback, please open a discussion in the community tab.</p>
81
+ </footer>""")
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  demo.unload(delete_db)
84
 
85
  ## Uncomment the line below to launch the chat app with UI
86
+ demo.launch(debug=True, allowed_paths=["temp/","assets/"])
assets/styles.css ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Loading Animation */
2
+ .loading-spinner {
3
+ display: none;
4
+ width: 50px;
5
+ height: 50px;
6
+ border: 5px solid #f3f3f3;
7
+ border-top: 5px solid #3B82F6;
8
+ border-radius: 50%;
9
+ animation: spin 1s linear infinite;
10
+ margin: 0 auto;
11
+ }
12
+
13
+ @keyframes spin {
14
+ 0% { transform: rotate(0deg); }
15
+ 100% { transform: rotate(360deg); }
16
+ }
17
+
18
+ /* File Upload Progress */
19
+ .progress-bar {
20
+ width: 100%;
21
+ height: 6px;
22
+ background-color: #e5e7eb;
23
+ border-radius: 3px;
24
+ overflow: hidden;
25
+ display: none;
26
+ margin: 1rem auto;
27
+ max-width: 300px;
28
+ }
29
+
30
+ .progress-bar-fill {
31
+ height: 100%;
32
+ background-color: #3B82F6;
33
+ width: 0%;
34
+ transition: width 0.3s ease;
35
+ }
36
+
37
+ /* Tooltip */
38
+ .tooltip {
39
+ position: relative;
40
+ display: inline-block;
41
+ }
42
+
43
+ .tooltip .tooltip-text {
44
+ visibility: hidden;
45
+ background-color: #1f2937;
46
+ color: white;
47
+ text-align: center;
48
+ padding: 8px 12px;
49
+ border-radius: 6px;
50
+ position: absolute;
51
+ z-index: 1;
52
+ bottom: 125%;
53
+ left: 50%;
54
+ transform: translateX(-50%);
55
+ opacity: 0;
56
+ transition: opacity 0.3s;
57
+ font-size: 0.875rem;
58
+ white-space: nowrap;
59
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
60
+ }
61
+
62
+ .tooltip:hover .tooltip-text {
63
+ visibility: visible;
64
+ opacity: 1;
65
+ }
66
+
67
+ /* File Type Icons */
68
+ .file-type-icon {
69
+ font-size: 1.5rem;
70
+ margin-right: 0.5rem;
71
+ color: #3B82F6;
72
+ }
73
+
74
+ /* Success Animation */
75
+ @keyframes checkmark {
76
+ 0% { transform: scale(0); opacity: 0; }
77
+ 50% { transform: scale(1.2); opacity: 0.8; }
78
+ 100% { transform: scale(1); opacity: 1; }
79
+ }
80
+
81
+ .success-checkmark {
82
+ display: none;
83
+ color: #10B981;
84
+ animation: checkmark 0.5s ease-in-out forwards;
85
+ }
86
+
87
+ /* Sample Data Cards */
88
+ .sample-btn {
89
+ transition: all 0.3s ease;
90
+ position: relative;
91
+ overflow: hidden;
92
+ }
93
+
94
+ .sample-btn::after {
95
+ content: '';
96
+ position: absolute;
97
+ top: 0;
98
+ left: 0;
99
+ width: 100%;
100
+ height: 100%;
101
+ background: linear-gradient(rgba(255,255,255,0.1), rgba(255,255,255,0));
102
+ transform: translateY(-100%);
103
+ transition: transform 0.3s ease;
104
+ }
105
+
106
+ .sample-btn:hover::after {
107
+ transform: translateY(0);
108
+ }
109
+
110
+ .sample-btn:hover {
111
+ transform: translateY(-2px);
112
+ box-shadow: 0 8px 15px rgba(0,0,0,0.1);
113
+ }
114
+
115
+ /* Drop Zone Enhancements */
116
+ .drop-zone {
117
+ transition: all 0.3s ease;
118
+ position: relative;
119
+ overflow: hidden;
120
+ }
121
+
122
+ .drop-zone::before {
123
+ content: '';
124
+ position: absolute;
125
+ top: 0;
126
+ left: 0;
127
+ right: 0;
128
+ bottom: 0;
129
+ border-radius: 8px;
130
+ border: 2px dashed #3B82F6;
131
+ opacity: 0;
132
+ transition: opacity 0.3s ease;
133
+ }
134
+
135
+ .drop-zone:hover::before {
136
+ opacity: 1;
137
+ }
138
+
139
+ /* File Info Card */
140
+ #fileInfo {
141
+ background: linear-gradient(to right, #f8fafc, #f1f5f9);
142
+ border: 1px solid #e2e8f0;
143
+ transition: all 0.3s ease;
144
+ }
145
+
146
+ #fileInfo:hover {
147
+ transform: translateY(-2px);
148
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
149
+ }
150
+
151
+ /* Features Section */
152
+ .feature-card {
153
+ transition: all 0.3s ease;
154
+ }
155
+
156
+ .feature-card:hover {
157
+ transform: translateY(-2px);
158
+ box-shadow: 0 8px 15px rgba(0,0,0,0.1);
159
+ }
160
+
161
+ .feature-icon {
162
+ transition: all 0.3s ease;
163
+ }
164
+
165
+ .feature-card:hover .feature-icon {
166
+ transform: scale(1.1);
167
+ color: #2563eb;
168
+ }
data_file.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from functions import example_question_generator, chatbot_with_fc
3
+ from data_sources import process_data_upload
4
+ from utils import message_dict
5
+ import ast
6
+
7
+ def run_example(input):
8
+ return input
9
+
10
+ def example_display(input):
11
+ if input == None:
12
+ display = True
13
+ else:
14
+ display = False
15
+ return [gr.update(visible=display),gr.update(visible=display),gr.update(visible=display)]
16
+
17
+ with gr.Blocks() as demo:
18
+ description = gr.HTML("""
19
+ <!-- Header -->
20
+ <div class="max-w-4xl mx-auto mb-12 text-center">
21
+ <div class="bg-blue-50 border border-blue-200 rounded-lg max-w-2xl mx-auto">
22
+ <h2 class="font-semibold text-blue-800 ">
23
+ <i class="fas fa-info-circle mr-2"></i>Supported Files
24
+ </h2>
25
+ <div class="flex flex-wrap justify-center gap-3 pb-4 text-blue-700">
26
+ <span class="tooltip">
27
+ <i class="fas fa-file-csv mr-1"></i>CSV
28
+ <span class="tooltip-text">Comma-separated values</span>
29
+ </span>
30
+ <span class="tooltip">
31
+ <i class="fas fa-file-alt mr-1"></i>TSV
32
+ <span class="tooltip-text">Tab-separated values</span>
33
+ </span>
34
+ <span class="tooltip">
35
+ <i class="fas fa-file-alt mr-1"></i>TXT
36
+ <span class="tooltip-text">Text files</span>
37
+ </span>
38
+ <span class="tooltip">
39
+ <i class="fas fa-file-excel mr-1"></i>XLS/XLSX
40
+ <span class="tooltip-text">Excel spreadsheets</span>
41
+ </span>
42
+ <span class="tooltip">
43
+ <i class="fas fa-file-code mr-1"></i>XML
44
+ <span class="tooltip-text">XML documents</span>
45
+ </span>
46
+ <span class="tooltip">
47
+ <i class="fas fa-file-code mr-1"></i>JSON
48
+ <span class="tooltip-text">JSON data files</span>
49
+ </span>
50
+ </div>
51
+ </div>
52
+ </div>
53
+ """)
54
+ example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
55
+ example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
56
+ with gr.Row():
57
+ example_btn_1 = gr.Button(value="Try Me: bank_marketing_campaign.csv", elem_classes="example_btn sample-btn bg-gradient-to-r from-purple-500 to-indigo-600 text-white p-6 rounded-lg text-left hover:shadow-lg", size="md", variant="primary")
58
+ example_btn_2 = gr.Button(value="Try Me: online_retail_data.csv", elem_classes="example_btn sample-btn bg-gradient-to-r from-purple-500 to-indigo-600 text-white p-6 rounded-lg text-left hover:shadow-lg", size="md", variant="primary")
59
+
60
+ file_output = gr.File(label="Data File (CSV, TSV, TXT, XLS, XLSX, XML, JSON)", show_label=True, elem_classes="file_marker drop-zone border-2 border-dashed border-gray-300 rounded-lg hover:border-primary cursor-pointer bg-gray-50 hover:bg-blue-50 transition-colors duration-300", file_types=['.csv','.xlsx','.txt','.json','.ndjson','.xml','.xls','.tsv'])
61
+ example_btn_1.click(fn=run_example, inputs=example_file_1, outputs=file_output)
62
+ example_btn_2.click(fn=run_example, inputs=example_file_2, outputs=file_output)
63
+ file_output.change(fn=example_display, inputs=file_output, outputs=[example_btn_1, example_btn_2, description])
64
+
65
+ @gr.render(inputs=file_output)
66
+ def data_options(filename, request: gr.Request):
67
+ print(filename)
68
+ message_dict[request.session_hash] = None
69
+ if filename:
70
+ process_message = process_upload(filename, request.session_hash)
71
+ gr.HTML(value=process_message[1], padding=False)
72
+ if process_message[0] == "success":
73
+ if "bank_marketing_campaign" in filename:
74
+ example_questions = [
75
+ ["Describe the dataset"],
76
+ ["What levels of education have the highest and lowest average balance?"],
77
+ ["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
78
+ ["Can you generate a bar chart of education vs. average balance?"],
79
+ ["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
80
+ ["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
81
+ ["Can you plot the number of contacts performed before this campaign versus the duration and use balance as the size in a bubble chart?"]
82
+ ]
83
+ elif "online_retail_data" in filename:
84
+ example_questions = [
85
+ ["Describe the dataset"],
86
+ ["What month had the highest revenue?"],
87
+ ["Is revenue higher in the morning or afternoon?"],
88
+ ["Can you generate a line graph of revenue per month?"],
89
+ ["Can you generate a table of revenue per month?"],
90
+ ["Can we predict how time of day affects transaction value in this data set?"],
91
+ ["Can you plot revenue per month with size being the number of units sold that month in a bubble chart?"]
92
+ ]
93
+ else:
94
+ try:
95
+ generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
96
+ example_questions = [
97
+ ["Describe the dataset"]
98
+ ]
99
+ for example in generated_examples:
100
+ example_questions.append([example])
101
+ except:
102
+ example_questions = [
103
+ ["Describe the dataset"],
104
+ ["List the columns in the dataset"],
105
+ ["What could this data be used for?"],
106
+ ]
107
+ parameters = gr.Textbox(visible=False, value=request.session_hash)
108
+ bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
109
+ chat = gr.ChatInterface(
110
+ fn=chatbot_with_fc,
111
+ type='messages',
112
+ chatbot=bot,
113
+ title="Chat with your data file",
114
+ concurrency_limit=None,
115
+ examples=example_questions,
116
+ additional_inputs=parameters
117
+ )
118
+
119
+ def process_upload(upload_value, session_hash):
120
+ if upload_value:
121
+ process_message = process_data_upload(upload_value, session_hash)
122
+ return process_message
123
+
124
+
125
+ if __name__ == "__main__":
126
+ demo.launch()
data_sources/upload_file.py CHANGED
@@ -68,7 +68,7 @@ def process_data_upload(data_file, session_hash):
68
  pattern = 'year|month|date|day|time'
69
  if re.search(pattern, column.lower()):
70
  try:
71
- df[column] = pd.to_datetime(df[column], infer_datetime_format=True)
72
  except:
73
  pass
74
  if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):
 
68
  pattern = 'year|month|date|day|time'
69
  if re.search(pattern, column.lower()):
70
  try:
71
+ df[column] = pd.to_datetime(df[column])
72
  except:
73
  pass
74
  if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):
functions/chat_functions.py CHANGED
@@ -38,7 +38,7 @@ def example_question_generator(session_hash):
38
  def chatbot_with_fc(message, history, session_hash):
39
  from functions import sqlite_query_func, table_generation_func, regression_func, scatter_chart_generation_func, \
40
  line_chart_generation_func,bar_chart_generation_func,pie_chart_generation_func,histogram_generation_func
41
- import tools
42
 
43
  available_functions = {"sql_query_func": sqlite_query_func,"table_generation_func":table_generation_func,
44
  "line_chart_generation_func":line_chart_generation_func,"bar_chart_generation_func":bar_chart_generation_func,
@@ -64,7 +64,7 @@ def chatbot_with_fc(message, history, session_hash):
64
  messages.append(ChatMessage.from_user(message))
65
  message_dict[session_hash] = messages
66
 
67
- response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.tools_call(session_hash)})
68
 
69
  while True:
70
  # if OpenAI response is a tool call
@@ -82,12 +82,11 @@ def chatbot_with_fc(message, history, session_hash):
82
  print(function_name)
83
  ## Append function response to the messages list using `ChatMessage.from_tool`
84
  message_dict[session_hash].append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
85
- response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.tools_call(session_hash)})
86
 
87
  # Regular Conversation
88
  else:
89
  message_dict[session_hash].append(response["replies"][0])
90
  break
91
- return response["replies"][0].text
92
-
93
-
 
38
  def chatbot_with_fc(message, history, session_hash):
39
  from functions import sqlite_query_func, table_generation_func, regression_func, scatter_chart_generation_func, \
40
  line_chart_generation_func,bar_chart_generation_func,pie_chart_generation_func,histogram_generation_func
41
+ import tools.tools as tools
42
 
43
  available_functions = {"sql_query_func": sqlite_query_func,"table_generation_func":table_generation_func,
44
  "line_chart_generation_func":line_chart_generation_func,"bar_chart_generation_func":bar_chart_generation_func,
 
64
  messages.append(ChatMessage.from_user(message))
65
  message_dict[session_hash] = messages
66
 
67
+ response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.data_file_tools_call(session_hash)})
68
 
69
  while True:
70
  # if OpenAI response is a tool call
 
82
  print(function_name)
83
  ## Append function response to the messages list using `ChatMessage.from_tool`
84
  message_dict[session_hash].append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
85
+ response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.data_file_tools_call(session_hash)})
86
 
87
  # Regular Conversation
88
  else:
89
  message_dict[session_hash].append(response["replies"][0])
90
  break
91
+
92
+ return response["replies"][0].text
 
tools.py → tools/chart_tools.py RENAMED
@@ -1,43 +1,5 @@
1
- import sqlite3
2
- from utils import TEMP_DIR
3
-
4
- def tools_call(session_hash):
5
- dir_path = TEMP_DIR / str(session_hash)
6
- connection = sqlite3.connect(f'{dir_path}/data_source.db')
7
- print("Querying Database in Tools.py");
8
- cur=connection.execute('select * from data_source')
9
- columns = [i[0] for i in cur.description]
10
- print("COLUMNS 2")
11
- print(columns)
12
- cur.close()
13
- connection.close()
14
-
15
- column_string = (columns[:625] + '..') if len(columns) > 625 else columns
16
-
17
- return [
18
- {
19
- "type": "function",
20
- "function": {
21
- "name": "sql_query_func",
22
- "description": f"""This is a tool useful to query a SQLite table called 'data_source' with the following Columns: {column_string}.
23
- There may also be more columns in the table if the number of columns is too large to process.
24
- This function also saves the results of the query to csv file called query.csv.""",
25
- "parameters": {
26
- "type": "object",
27
- "properties": {
28
- "queries": {
29
- "type": "array",
30
- "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
31
- "items": {
32
- "type": "string",
33
- }
34
- }
35
- },
36
- "required": ["queries"],
37
- },
38
- },
39
- },
40
- {
41
  "type": "function",
42
  "function": {
43
  "name": "scatter_chart_generation_func",
@@ -84,9 +46,9 @@ def tools_call(session_hash):
84
  "trendline": {
85
  "type": "string",
86
  "description": f"""An optional field to specify the type of plotly trendline we wish to use in the scatter plot.
87
- This trendline value can be one of ['ols','lowess','rolling','ewm','expanding'].
88
- Do not send any values outside of this array as the function will fail.
89
- Infer this from the user's message.""",
90
  "items": {
91
  "type": "string",
92
  }
@@ -103,9 +65,9 @@ def tools_call(session_hash):
103
  "marginal_x": {
104
  "type": "string",
105
  "description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the x axis.
106
- This marginal_x value can be one of ['histogram','rug','box','violin'].
107
- Do not send any values outside of this array as the function will fail.
108
- Infer this from the user's message.""",
109
  "items": {
110
  "type": "string",
111
  }
@@ -113,9 +75,9 @@ def tools_call(session_hash):
113
  "marginal_y": {
114
  "type": "string",
115
  "description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the y axis.
116
- This marginal_y value can be one of ['histogram','rug','box','violin'].
117
- Do not send any values outside of this array as the function will fail.
118
- Infer this from the user's message.""",
119
  "items": {
120
  "type": "string",
121
  }
@@ -376,7 +338,7 @@ def tools_call(session_hash):
376
  "type": "string",
377
  "description": f"""An optional value that represents the function of data to compute the function which is used on the optional y column.
378
  This histfunc value can be one of ['avg','sum','count'].
379
- Do not send any values outside of this array as the function will fail.""",
380
  "items": {
381
  "type": "string",
382
  }
@@ -405,47 +367,5 @@ def tools_call(session_hash):
405
  from the table_generation_func function in any way and always display the iframe fully to the user in the chat window.""",
406
  "parameters": {},
407
  },
408
- },
409
- {
410
- "type": "function",
411
- "function": {
412
- "name": "regression_func",
413
- "description": f"""This a tool to calculate regressions on our SQLite table called 'data_source'.
414
- We can run queries with our 'sql_query_func' function and they will be available to use in this function via the query.csv file that is generated.
415
- Returns a dictionary of values that includes a regression_summary and a regression chart (which is an iframe displaying the
416
- linear regression in chart form and should be shown to the user).""",
417
- "parameters": {
418
- "type": "object",
419
- "properties": {
420
- "independent_variables": {
421
- "type": "array",
422
- "description": f"""An array of strings that states the independent variables in our data set which should be column names in our query.csv file that is generated
423
- in the 'sql_query_func' function. This will allow us to identify the data to use for our independent variables.
424
- Infer this from the user's message.""",
425
- "items": {
426
- "type": "string",
427
- }
428
- },
429
- "dependent_variable": {
430
- "type": "string",
431
- "description": f"""A string that states the dependent variables in our data set which should be a column name in our query.csv file that is generated
432
- in the 'sql_query_func' function. This will allow us to identify the data to use for our dependent variables.
433
- Infer this from the user's message.""",
434
- "items": {
435
- "type": "string",
436
- }
437
- },
438
- "category": {
439
- "type": "string",
440
- "description": f"""An optional column in our query.csv file that contain a parameter that will define the category for the data.
441
- Do not send value if no category is needed or specified. This category must be present in our query.csv file to be valid.""",
442
- "items": {
443
- "type": "string",
444
- }
445
- }
446
- },
447
- "required": ["independent_variables","dependent_variable"],
448
- },
449
- },
450
  }
451
- ]
 
1
+ chart_tools = [
2
+ {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "type": "function",
4
  "function": {
5
  "name": "scatter_chart_generation_func",
 
46
  "trendline": {
47
  "type": "string",
48
  "description": f"""An optional field to specify the type of plotly trendline we wish to use in the scatter plot.
49
+ This trendline value can be one of ['ols','lowess','rolling','ewm','expanding'].
50
+ Do not send any values outside of this array as the function will fail.
51
+ Infer this from the user's message.""",
52
  "items": {
53
  "type": "string",
54
  }
 
65
  "marginal_x": {
66
  "type": "string",
67
  "description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the x axis.
68
+ This marginal_x value can be one of ['histogram','rug','box','violin'].
69
+ Do not send any values outside of this array as the function will fail.
70
+ Infer this from the user's message.""",
71
  "items": {
72
  "type": "string",
73
  }
 
75
  "marginal_y": {
76
  "type": "string",
77
  "description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the y axis.
78
+ This marginal_y value can be one of ['histogram','rug','box','violin'].
79
+ Do not send any values outside of this array as the function will fail.
80
+ Infer this from the user's message.""",
81
  "items": {
82
  "type": "string",
83
  }
 
338
  "type": "string",
339
  "description": f"""An optional value that represents the function of data to compute the function which is used on the optional y column.
340
  This histfunc value can be one of ['avg','sum','count'].
341
+ Do not send any values outside of this array as the function will fail.""",
342
  "items": {
343
  "type": "string",
344
  }
 
367
  from the table_generation_func function in any way and always display the iframe fully to the user in the chat window.""",
368
  "parameters": {},
369
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  }
371
+ ]
tools/stats_tools.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ stats_tools = [
2
+ {
3
+ "type": "function",
4
+ "function": {
5
+ "name": "regression_func",
6
+ "description": f"""This a tool to calculate regressions on our SQLite table called 'data_source'.
7
+ We can run queries with our 'sql_query_func' function and they will be available to use in this function via the query.csv file that is generated.
8
+ Returns a dictionary of values that includes a regression_summary and a regression chart (which is an iframe displaying the
9
+ linear regression in chart form and should be shown to the user).""",
10
+ "parameters": {
11
+ "type": "object",
12
+ "properties": {
13
+ "independent_variables": {
14
+ "type": "array",
15
+ "description": f"""An array of strings that states the independent variables in our data set which should be column names in our query.csv file that is generated
16
+ in the 'sql_query_func' function. This will allow us to identify the data to use for our independent variables.
17
+ Infer this from the user's message.""",
18
+ "items": {
19
+ "type": "string",
20
+ }
21
+ },
22
+ "dependent_variable": {
23
+ "type": "string",
24
+ "description": f"""A string that states the dependent variables in our data set which should be a column name in our query.csv file that is generated
25
+ in the 'sql_query_func' function. This will allow us to identify the data to use for our dependent variables.
26
+ Infer this from the user's message.""",
27
+ "items": {
28
+ "type": "string",
29
+ }
30
+ },
31
+ "category": {
32
+ "type": "string",
33
+ "description": f"""An optional column in our query.csv file that contain a parameter that will define the category for the data.
34
+ Do not send value if no category is needed or specified. This category must be present in our query.csv file to be valid.""",
35
+ "items": {
36
+ "type": "string",
37
+ }
38
+ }
39
+ },
40
+ "required": ["independent_variables","dependent_variable"],
41
+ },
42
+ },
43
+ }
44
+ ]
tools/tools.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ from .stats_tools import stats_tools
3
+ from .chart_tools import chart_tools
4
+ from utils import TEMP_DIR
5
+
6
+ def data_file_tools_call(session_hash):
7
+ dir_path = TEMP_DIR / str(session_hash)
8
+ connection = sqlite3.connect(f'{dir_path}/data_source.db')
9
+ print("Querying Database in Tools.py");
10
+ cur=connection.execute('select * from data_source')
11
+ columns = [i[0] for i in cur.description]
12
+ print("COLUMNS 2")
13
+ print(columns)
14
+ cur.close()
15
+ connection.close()
16
+
17
+ column_string = (columns[:625] + '..') if len(columns) > 625 else columns
18
+
19
+ tools_calls = [
20
+ {
21
+ "type": "function",
22
+ "function": {
23
+ "name": "sql_query_func",
24
+ "description": f"""This is a tool useful to query a SQLite table called 'data_source' with the following Columns: {column_string}.
25
+ There may also be more columns in the table if the number of columns is too large to process.
26
+ This function also saves the results of the query to csv file called query.csv.""",
27
+ "parameters": {
28
+ "type": "object",
29
+ "properties": {
30
+ "queries": {
31
+ "type": "array",
32
+ "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
33
+ "items": {
34
+ "type": "string",
35
+ }
36
+ }
37
+ },
38
+ "required": ["queries"],
39
+ },
40
+ },
41
+ },
42
+ ]
43
+
44
+ tools_calls.extend(chart_tools)
45
+ tools_calls.extend(stats_tools)
46
+
47
+ return tools_calls
48
+
49
+ def graphql_tools_call(sessions_hash):
50
+
51
+ tools_calls = [
52
+ {
53
+ "type": "function",
54
+ "function": {
55
+ "name": "graphql_query_func",
56
+ "description": f"""This is a tool useful to query a GraphQL endpoint with the following Columns: {column_string}.
57
+ There may also be more columns in the table if the number of columns is too large to process.
58
+ This function also saves the results of the query to csv file called query.csv.""",
59
+ "parameters": {
60
+ "type": "object",
61
+ "properties": {
62
+ "queries": {
63
+ "type": "array",
64
+ "description": "The graphQL query to use in the search. Infer this from the user's message. It should be a question or a statement",
65
+ "items": {
66
+ "type": "string",
67
+ }
68
+ }
69
+ },
70
+ "required": ["queries"],
71
+ },
72
+ },
73
+ },
74
+ ]
75
+
76
+ tools_calls.append(chart_tools)
77
+ tools_calls.append(stats_tools)
78
+
79
+ return