Spaces:

nolanzandi
/

virtual-data-analyst

Running

App Files Files Community

nolanzandi commited on Mar 5

Commit

bed69c5

verified ·

1 Parent(s): 17c6c25

Display upload errors to user, better prompts for chart layouts

Browse files

Files changed (4) hide show

app.py +45 -43
data_sources/upload_file.py +35 -28
functions/chart_functions.py +3 -3
tools.py +6 -4

app.py CHANGED Viewed

@@ -56,55 +56,57 @@ with gr.Blocks(css=css, delete_cache=(3600,3600)) as demo:
         print(filename)
         message_dict[request.session_hash] = None
         if filename:
-            process_upload(filename, request.session_hash)
-            if "bank_marketing_campaign" in filename:
-                example_questions = [
-                                        ["Describe the dataset"],
-                                        ["What levels of education have the highest and lowest average balance?"],
-                                        ["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
-                                        ["Can you generate a bar chart of education vs. average balance?"],
-                                        ["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
-                                        ["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
-                                    ]
-            elif "online_retail_data" in filename:
-                example_questions = [
-                                        ["Describe the dataset"],
-                                        ["What month had the highest revenue?"],
-                                        ["Is revenue higher in the morning or afternoon?"],
-                                        ["Can you generate a line graph of revenue per month?"],
-                                        ["Can you generate a table of revenue per month?"],
-                                        ["Can we predict how time of day affects revenue in this data set?"],
-                                    ]
-            else:
-                try:
-                    generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
                     example_questions = [
-                                            ["Describe the dataset"]
                                         ]
-                    for example in generated_examples:
-                        example_questions.append([example])
-                except:
                     example_questions = [
-                                        ["Describe the dataset"],
-                                        ["List the columns in the dataset"],
-                                        ["What could this data be used for?"],
-                                    ]
-            parameters = gr.Textbox(visible=False, value=request.session_hash)
-            bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
-            chat = gr.ChatInterface(
-                                fn=chatbot_with_fc,
-                                type='messages',
-                                chatbot=bot,
-                                title="Chat with your data file",
-                                concurrency_limit=None,
-                                examples=example_questions,
-                                additional_inputs=parameters
-                                )
     def process_upload(upload_value, session_hash):
         if upload_value:
-            process_data_upload(upload_value, session_hash)
-        return [], []
     demo.unload(delete_db)

         print(filename)
         message_dict[request.session_hash] = None
         if filename:
+            process_message = process_upload(filename, request.session_hash)
+            gr.HTML(value=process_message[1], padding=False)
+            if process_message[0] == "success":
+                if "bank_marketing_campaign" in filename:
                     example_questions = [
+                                            ["Describe the dataset"],
+                                            ["What levels of education have the highest and lowest average balance?"],
+                                            ["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
+                                            ["Can you generate a bar chart of education vs. average balance?"],
+                                            ["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
+                                            ["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
                                         ]
+                elif "online_retail_data" in filename:
                     example_questions = [
+                                            ["Describe the dataset"],
+                                            ["What month had the highest revenue?"],
+                                            ["Is revenue higher in the morning or afternoon?"],
+                                            ["Can you generate a line graph of revenue per month?"],
+                                            ["Can you generate a table of revenue per month?"],
+                                            ["Can we predict how time of day affects revenue in this data set?"],
+                                        ]
+                else:
+                    try:
+                        generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
+                        example_questions = [
+                                                ["Describe the dataset"]
+                                            ]
+                        for example in generated_examples:
+                            example_questions.append([example])
+                    except:
+                        example_questions = [
+                                            ["Describe the dataset"],
+                                            ["List the columns in the dataset"],
+                                            ["What could this data be used for?"],
+                                        ]
+                parameters = gr.Textbox(visible=False, value=request.session_hash)
+                bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
+                chat = gr.ChatInterface(
+                                    fn=chatbot_with_fc,
+                                    type='messages',
+                                    chatbot=bot,
+                                    title="Chat with your data file",
+                                    concurrency_limit=None,
+                                    examples=example_questions,
+                                    additional_inputs=parameters
+                                    )
     def process_upload(upload_value, session_hash):
         if upload_value:
+            process_message = process_data_upload(upload_value, session_hash)
+        return process_message
     demo.unload(delete_db)

data_sources/upload_file.py CHANGED Viewed

@@ -45,36 +45,43 @@ def read_file(file):
     return df
 def process_data_upload(data_file, session_hash):
-    total_time = 0
-    while not is_file_done_saving(data_file):
-        total_time += .5
-        time.sleep(.5)
-        if total_time > 10:
-            break
-    df = read_file(data_file)
-    # Read each sheet and store data in a DataFrame
-    #data = df.parse(sheet_name)
-    # Process the data as needed
-    # ...
-    df.columns = df.columns.str.replace(' ', '_')
-    df.columns = df.columns.str.replace('/', '_')
-    for column in df.columns:
-        if "date" in column.lower() or "time" in column.lower():
-            df[column] = pd.to_datetime(df[column])
-        if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):
-            df[column] = df[column].explode()
-    dir_path = TEMP_DIR / str(session_hash)
-    os.makedirs(dir_path, exist_ok=True)
-    connection = sqlite3.connect(f'{dir_path}/data_source.db')
-    print("Opened database successfully");
-    print(df.columns)
-    df.to_sql('data_source', connection, if_exists='replace', index = False)
-    connection.commit()
-    connection.close()

     return df
 def process_data_upload(data_file, session_hash):
+    try:
+        total_time = 0
+        while not is_file_done_saving(data_file):
+            total_time += .5
+            time.sleep(.5)
+            if total_time > 10:
+                break
+        df = read_file(data_file)
+        # Read each sheet and store data in a DataFrame
+        #data = df.parse(sheet_name)
+        # Process the data as needed
+        # ...
+        df.columns = df.columns.str.replace(' ', '_')
+        df.columns = df.columns.str.replace('/', '_')
+        for column in df.columns:
+            if "date" in column.lower() or "time" in column.lower():
+                df[column] = pd.to_datetime(df[column])
+            if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):
+                df[column] = df[column].explode()
+        dir_path = TEMP_DIR / str(session_hash)
+        os.makedirs(dir_path, exist_ok=True)
+        connection = sqlite3.connect(f'{dir_path}/data_source.db')
+        print("Opened database successfully");
+        print(df.columns)
+        df.to_sql('data_source', connection, if_exists='replace', index = False)
+        connection.commit()
+        connection.close()
+        return ["success","<p style='color:green;text-align:center;font-size:18px;'>Data upload successful</p>"]
+    except Exception as e:
+        print("UPLOAD ERROR")
+        print(e)
+        return ["error",f"<p style='color:red;text-align:center;font-size:18px;font-weight:bold;'>ERROR: {e}</p>"]

functions/chart_functions.py CHANGED Viewed

@@ -12,7 +12,7 @@ load_dotenv()
 root_url = os.getenv("ROOT_URL")
-def chart_generation_func(data: List[str], x_column: str, y_column: str, graph_type: str, session_hash: str, layout: Dict[str,str]={}, category: str=""):
     print("CHART GENERATION")
     print(data)
     print(graph_type)
@@ -69,13 +69,13 @@ def chart_generation_func(data: List[str], x_column: str, y_column: str, graph_t
         else:
            layout_obj = layout
-        if layout and isinstance(layout_obj, str):
            layout_dict = ast.literal_eval(layout_obj)
         else:
            layout_dict = layout_obj
         #Applying stylings and settings generated from LLM
-        if layout:
          fig["layout"] = layout_dict
         for key, value in data_dict.items():

 root_url = os.getenv("ROOT_URL")
+def chart_generation_func(data: List[dict], x_column: str, y_column: str, graph_type: str, session_hash: str, layout: List[dict]=[{}], category: str=""):
     print("CHART GENERATION")
     print(data)
     print(graph_type)
         else:
            layout_obj = layout
+        if layout_obj and isinstance(layout_obj, str):
            layout_dict = ast.literal_eval(layout_obj)
         else:
            layout_dict = layout_obj
         #Applying stylings and settings generated from LLM
+        if layout_dict:
          fig["layout"] = layout_dict
         for key, value in data_dict.items():

tools.py CHANGED Viewed

@@ -49,7 +49,8 @@ def tools_call(session_hash):
                     "properties": {
                         "data": {
                             "type": "array",
-                            "description": """The list containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
                             Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
                             Infer this from the user's message.""",
                             "items": {
@@ -81,7 +82,7 @@ def tools_call(session_hash):
                             "type": "string",
                             "description": f"""The type of plotly graph we wish to generate.
                              This graph_type value can be one of ['bar','scatter','line','pie'].
-                             Do not send any values outside of this list as the function will fail.
                              Infer this from the user's message.""",
                             "items": {
                                 "type": "string",
@@ -89,7 +90,8 @@ def tools_call(session_hash):
                         },
                         "layout": {
                             "type": "array",
-                            "description": """The dictionary that contains the 'layout' portion of the plotly chart generation""",
                             "items": {
                                 "type": "string",
                             }
@@ -125,7 +127,7 @@ def tools_call(session_hash):
                     "properties": {
                         "independent_variables": {
                             "type": "array",
-                            "description": f"""A list of strings that states the independent variables in our data set which should be column names in our query.csv file that is generated
                             in the 'sql_query_func' function. This will allow us to identify the data to use for our independent variables.
                             Infer this from the user's message.""",
                             "items": {

                     "properties": {
                         "data": {
                             "type": "array",
+                            "description": """The array containing a dictionary that contains the 'data' portion of the plotly chart generation and will include the options requested by the user.
+                            The array must contain a dictionary, any other format will not work.
                             Do not include the 'x' or 'y' portions of the object as this will come from the query.csv file generated by our SQLite query.
                             Infer this from the user's message.""",
                             "items": {
                             "type": "string",
                             "description": f"""The type of plotly graph we wish to generate.
                              This graph_type value can be one of ['bar','scatter','line','pie'].
+                             Do not send any values outside of this array as the function will fail.
                              Infer this from the user's message.""",
                             "items": {
                                 "type": "string",
                         },
                         "layout": {
                             "type": "array",
+                            "description": """An array containing a dictionary that contains the 'layout' portion of the plotly chart generation.
+                            The array must contain a dictionary, any other format will not work.""",
                             "items": {
                                 "type": "string",
                             }
                     "properties": {
                         "independent_variables": {
                             "type": "array",
+                            "description": f"""An array of strings that states the independent variables in our data set which should be column names in our query.csv file that is generated
                             in the 'sql_query_func' function. This will allow us to identify the data to use for our independent variables.
                             Infer this from the user's message.""",
                             "items": {