File size: 4,656 Bytes
1c61d0a
80a54c1
1c61d0a
 
 
 
 
de3d152
1c61d0a
 
 
9fbad84
1c61d0a
fdd0e93
1c61d0a
 
 
 
e89ef0e
1c61d0a
 
 
69b18c8
d6042ff
 
 
e89ef0e
 
fdd0e93
e89ef0e
 
d6042ff
1c61d0a
 
 
d6042ff
1c61d0a
 
fdd0e93
1c61d0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6042ff
1c61d0a
d6042ff
1c61d0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6042ff
 
1c61d0a
 
e89ef0e
 
 
fdd0e93
 
e89ef0e
 
 
 
 
 
fdd0e93
e89ef0e
fdd0e93
3c7ef99
 
 
 
 
e89ef0e
 
 
 
fdd0e93
 
 
 
 
 
 
 
 
 
e89ef0e
1c61d0a
 
 
 
962b9f1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import shutil
import gradio as gr
from transformers import ReactCodeAgent, HfEngine, Tool
import pandas as pd

from gradio import Chatbot
from streaming import stream_to_gradio
from huggingface_hub import login
from gradio.data_classes import FileData

login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))

llm_engine = HfEngine("meta-llama/Llama-3.3-70B-Instruct")

agent = ReactCodeAgent(
    tools=[],
    llm_engine=llm_engine,
    additional_authorized_imports=["numpy", "pandas", "matplotlib", "seaborn","scipy","sklearn"],
    max_iterations=10,
)

base_prompt = """You are an expert full stack data analyst.
You are given a data file and the data structure below.
The data file is passed to you as the variable data_file, it is a pandas dataframe, you can use it directly.
DO NOT try to load data_file, it is already a dataframe pre-loaded in your python interpreter!
When plotting using matplotlib/seaborn save the figures to the (already existing) folder'./figures/': take care to clear 
each figure with plt.clf() before doing another plot.
When plotting make the plots as visually appealing as possible. Same with tables, charts, or anything else.

Use the data file to answer the question or perform a task below.

Structure of the data:
{structure_notes}

Question/Problem:
"""

example_notes="""What is the survival rate by class?"""

def get_images_in_directory(directory):
    image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}

    image_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if os.path.splitext(file)[1].lower() in image_extensions:
                image_files.append(os.path.join(root, file))
    return image_files

def interact_with_agent(file_input, additional_notes):
    shutil.rmtree("./figures")
    os.makedirs("./figures")

    data_file = pd.read_csv(file_input)
    data_structure_notes = f"""- Description (output of .describe()):
    {data_file.describe()}
    - Columns with dtypes:
    {data_file.dtypes}"""

    prompt = base_prompt.format(structure_notes=data_structure_notes)

    if additional_notes and len(additional_notes) > 0:
        prompt += additional_notes

    messages = [gr.ChatMessage(role="user", content=additional_notes)]
    yield messages + [
        gr.ChatMessage(role="assistant", content="⏳ _Starting task..._")
    ]

    plot_image_paths = {}
    for msg in stream_to_gradio(agent, prompt, data_file=data_file):
        messages.append(msg)
        for image_path in get_images_in_directory("./figures"):
            if image_path not in plot_image_paths:
                image_message = gr.ChatMessage(
                    role="assistant",
                    content=FileData(path=image_path, mime_type="image/png"),
                )
                plot_image_paths[image_path] = True
                messages.append(image_message)
        yield messages + [
            gr.ChatMessage(role="assistant", content="⏳ _Still processing..._")
        ]
    yield messages


with gr.Blocks(
    theme=gr.themes.Soft(
        primary_hue=gr.themes.colors.blue,
        secondary_hue=gr.themes.colors.yellow,
    )
) as demo:
    gr.Markdown("""# Data Analyst (ReAct Code Agent) πŸ“ŠπŸ€” 
                
**Who am I?** 
I'm your personal Data Analyst built on top of Llama-3.3-70B-Instruct model and the ReAct (Reasoning and Acting) framework.
I break down the task step-by-step until I reach an answer/solution.
Along the way I share my thoughts, actions (Python code blobs), and observations.
I come packed with pandas, numpy, sklearn, matplotlib, seaborn, and more!
                
**Instructions**
1. Drop or upload a `.csv` file below.
2. Ask a question or give it a task.
3. **Watch the AI Agent think, act, and observe until final answer.
\n**For an example, click on the example at the bottom of page to auto populate.**""")

    file_input = gr.File(label="Drop/upload a .csv file to analyze")
    text_input = gr.Textbox(
        label="Ask a question or give it a task."
    )
    submit = gr.Button("Run", variant="primary")
    gr.Examples(
        examples=[["./example/titanic.csv", example_notes]],
        inputs=[file_input, text_input],
        cache_examples=False,
        label='Click on an example below.'
    )
    chatbot = gr.Chatbot(
        label="Data Analyst Agent",
        type="messages",
        avatar_images=(
            None,
            "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
        ),
        height = 1000
    )

    submit.click(interact_with_agent, [file_input, text_input], [chatbot])

if __name__ == "__main__":
    demo.launch()