Add time series sales data as an example dataset
#2
by
nolanzandi
- opened
- .gitattributes +1 -0
- data_sources/upload_file.py +13 -1
- functions/chat_functions.py +10 -5
- samples/online_retail_data.csv +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
samples/online_retail_data.csv filter=lfs diff=lfs merge=lfs -text
|
data_sources/upload_file.py
CHANGED
@@ -1,8 +1,16 @@
|
|
1 |
import pandas as pd
|
2 |
import sqlite3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
def process_data_upload(data_file, session_hash):
|
5 |
-
|
|
|
6 |
|
7 |
# Read each sheet and store data in a DataFrame
|
8 |
#data = df.parse(sheet_name)
|
@@ -10,6 +18,10 @@ def process_data_upload(data_file, session_hash):
|
|
10 |
# ...
|
11 |
df.columns = df.columns.str.replace(' ', '_')
|
12 |
df.columns = df.columns.str.replace('/', '_')
|
|
|
|
|
|
|
|
|
13 |
|
14 |
connection = sqlite3.connect(f'data_source_{session_hash}.db')
|
15 |
print("Opened database successfully");
|
|
|
1 |
import pandas as pd
|
2 |
import sqlite3
|
3 |
+
import csv
|
4 |
+
|
5 |
+
def get_delimiter(file_path, bytes = 4096):
|
6 |
+
sniffer = csv.Sniffer()
|
7 |
+
data = open(file_path, "r").read(bytes)
|
8 |
+
delimiter = sniffer.sniff(data).delimiter
|
9 |
+
return delimiter
|
10 |
|
11 |
def process_data_upload(data_file, session_hash):
|
12 |
+
delimiter = get_delimiter(data_file)
|
13 |
+
df = pd.read_csv(data_file, sep=delimiter)
|
14 |
|
15 |
# Read each sheet and store data in a DataFrame
|
16 |
#data = df.parse(sheet_name)
|
|
|
18 |
# ...
|
19 |
df.columns = df.columns.str.replace(' ', '_')
|
20 |
df.columns = df.columns.str.replace('/', '_')
|
21 |
+
|
22 |
+
for column in df.columns:
|
23 |
+
if "date" in column.lower() or "time" in column.lower():
|
24 |
+
df[column] = pd.to_datetime(df[column])
|
25 |
|
26 |
connection = sqlite3.connect(f'data_source_{session_hash}.db')
|
27 |
print("Opened database successfully");
|
functions/chat_functions.py
CHANGED
@@ -67,18 +67,23 @@ def example_display(input):
|
|
67 |
display = True
|
68 |
else:
|
69 |
display = False
|
70 |
-
return gr.update(visible=display)
|
71 |
|
72 |
css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;}"
|
73 |
|
74 |
with gr.Blocks(css=css) as demo:
|
75 |
title = gr.HTML("<h1 style='text-align:center;'>Virtual Data Analyst</h1>")
|
76 |
description = gr.HTML("<p style='text-align:center;'>Upload a CSV file and chat with our virtual data analyst to get insights on your data set</p>")
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
79 |
file_output = gr.File(label="CSV File", show_label=True, elem_classes="file_marker", file_types=['.csv'])
|
80 |
-
|
81 |
-
|
|
|
82 |
|
83 |
@gr.render(inputs=file_output)
|
84 |
def data_options(filename, request: gr.Request):
|
|
|
67 |
display = True
|
68 |
else:
|
69 |
display = False
|
70 |
+
return [gr.update(visible=display),gr.update(visible=display)]
|
71 |
|
72 |
css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;}"
|
73 |
|
74 |
with gr.Blocks(css=css) as demo:
|
75 |
title = gr.HTML("<h1 style='text-align:center;'>Virtual Data Analyst</h1>")
|
76 |
description = gr.HTML("<p style='text-align:center;'>Upload a CSV file and chat with our virtual data analyst to get insights on your data set</p>")
|
77 |
+
example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
|
78 |
+
example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
|
79 |
+
with gr.Row():
|
80 |
+
example_btn_1 = gr.Button(value="Try Me: bank_marketing_campaign.csv", elem_classes="example_btn", size="md", variant="primary")
|
81 |
+
example_btn_2 = gr.Button(value="Try Me: online_retail_data.csv", elem_classes="example_btn", size="md", variant="primary")
|
82 |
+
|
83 |
file_output = gr.File(label="CSV File", show_label=True, elem_classes="file_marker", file_types=['.csv'])
|
84 |
+
example_btn_1.click(fn=run_example, inputs=example_file_1, outputs=file_output)
|
85 |
+
example_btn_2.click(fn=run_example, inputs=example_file_2, outputs=file_output)
|
86 |
+
file_output.change(fn=example_display, inputs=file_output, outputs=[example_btn_1, example_btn_2])
|
87 |
|
88 |
@gr.render(inputs=file_output)
|
89 |
def data_options(filename, request: gr.Request):
|
samples/online_retail_data.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0d4e4f72b6bc62ce831e8243bc02e491fda0085c1d94f3075063411539b1f04
|
3 |
+
size 45580673
|