Nolan Zandi commited on
Commit
fb65c41
·
1 Parent(s): 2946020

fix missing init file

Browse files
__init__.py DELETED
@@ -1,3 +0,0 @@
1
- from .app import data_url
2
-
3
- __all__ = ["data_url"]
 
 
 
 
data_sources/upload_file.py CHANGED
@@ -1,8 +1,7 @@
1
  import pandas as pd
2
  import sqlite3
3
 
4
- def process_data_upload(data_file):
5
-
6
  df = pd.read_csv(data_file, sep=";")
7
 
8
  # Read each sheet and store data in a DataFrame
@@ -12,7 +11,7 @@ def process_data_upload(data_file):
12
  df.columns = df.columns.str.replace(' ', '_')
13
  df.columns = df.columns.str.replace('/', '_')
14
 
15
- connection = sqlite3.connect('data_source.db')
16
  print("Opened database successfully");
17
  print(df.columns)
18
 
 
1
  import pandas as pd
2
  import sqlite3
3
 
4
+ def process_data_upload(data_file, session_hash):
 
5
  df = pd.read_csv(data_file, sep=";")
6
 
7
  # Read each sheet and store data in a DataFrame
 
11
  df.columns = df.columns.str.replace(' ', '_')
12
  df.columns = df.columns.str.replace('/', '_')
13
 
14
+ connection = sqlite3.connect(f'data_source_{session_hash}.db')
15
  print("Opened database successfully");
16
  print(df.columns)
17
 
functions/chat_functions.py CHANGED
@@ -1,7 +1,6 @@
1
  from data_sources import process_data_upload
2
 
3
  import gradio as gr
4
- import json
5
 
6
  from haystack.dataclasses import ChatMessage
7
  from haystack.components.generators.chat import OpenAIChatGenerator
@@ -23,20 +22,18 @@ messages = [
23
  )
24
  ]
25
 
26
- def chatbot_with_fc(message, history):
27
  from functions import sqlite_query_func
28
  from pipelines import rag_pipeline_func
29
  import tools
30
- import importlib
31
- importlib.reload(tools)
32
 
33
  available_functions = {"sql_query_func": sqlite_query_func, "rag_pipeline_func": rag_pipeline_func}
34
  messages.append(ChatMessage.from_user(message))
35
- response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools})
36
 
37
  while True:
38
  # if OpenAI response is a tool call
39
- if response and response["replies"][0].meta["finish_reason"] == "tool_calls":
40
  function_calls = response["replies"][0].tool_calls
41
  for function_call in function_calls:
42
  messages.append(ChatMessage.from_assistant(tool_calls=[function_call]))
@@ -46,10 +43,10 @@ def chatbot_with_fc(message, history):
46
 
47
  ## Find the correspoding function and call it with the given arguments
48
  function_to_call = available_functions[function_name]
49
- function_response = function_to_call(**function_args)
50
  ## Append function response to the messages list using `ChatMessage.from_tool`
51
  messages.append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
52
- response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools})
53
 
54
  # Regular Conversation
55
  else:
@@ -57,6 +54,11 @@ def chatbot_with_fc(message, history):
57
  break
58
  return response["replies"][0].text
59
 
 
 
 
 
 
60
  css= ".file_marker .large{min-height:50px !important;}"
61
 
62
  with gr.Blocks(css=css) as demo:
@@ -65,27 +67,31 @@ with gr.Blocks(css=css) as demo:
65
  file_output = gr.File(label="CSV File", show_label=True, elem_classes="file_marker", file_types=['.csv'])
66
 
67
  @gr.render(inputs=file_output)
68
- def data_options(filename):
69
  print(filename)
70
  if filename:
 
71
  bot = gr.Chatbot(type='messages', label="CSV Chat Window", show_label=True, render=False, visible=True, elem_classes="chatbot")
72
  chat = gr.ChatInterface(
73
  fn=chatbot_with_fc,
74
  type='messages',
75
  chatbot=bot,
76
  title="Chat with your data file",
 
77
  examples=[
78
  ["Describe the dataset"],
79
  ["List the columns in the dataset"],
80
  ["What could this data be used for?"],
81
  ],
 
82
  )
83
-
84
- process_upload(filename)
85
 
86
- def process_upload(upload_value):
87
  if upload_value:
88
- process_data_upload(upload_value)
89
  return [], []
 
 
90
 
91
 
 
1
  from data_sources import process_data_upload
2
 
3
  import gradio as gr
 
4
 
5
  from haystack.dataclasses import ChatMessage
6
  from haystack.components.generators.chat import OpenAIChatGenerator
 
22
  )
23
  ]
24
 
25
+ def chatbot_with_fc(message, history, session_hash):
26
  from functions import sqlite_query_func
27
  from pipelines import rag_pipeline_func
28
  import tools
 
 
29
 
30
  available_functions = {"sql_query_func": sqlite_query_func, "rag_pipeline_func": rag_pipeline_func}
31
  messages.append(ChatMessage.from_user(message))
32
+ response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools_call(session_hash)})
33
 
34
  while True:
35
  # if OpenAI response is a tool call
36
+ if response and response["replies"][0].meta["finish_reason"] == "tool_calls" or response["replies"][0].tool_calls:
37
  function_calls = response["replies"][0].tool_calls
38
  for function_call in function_calls:
39
  messages.append(ChatMessage.from_assistant(tool_calls=[function_call]))
 
43
 
44
  ## Find the correspoding function and call it with the given arguments
45
  function_to_call = available_functions[function_name]
46
+ function_response = function_to_call(**function_args, session_hash=session_hash)
47
  ## Append function response to the messages list using `ChatMessage.from_tool`
48
  messages.append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
49
+ response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools.tools_call(session_hash)})
50
 
51
  # Regular Conversation
52
  else:
 
54
  break
55
  return response["replies"][0].text
56
 
57
+ def delete_db(req: gr.Request):
58
+ db_file_path = f'data_source_{req.session_hash}.db'
59
+ if os.path.exists(db_file_path):
60
+ os.remove(db_file_path)
61
+
62
  css= ".file_marker .large{min-height:50px !important;}"
63
 
64
  with gr.Blocks(css=css) as demo:
 
67
  file_output = gr.File(label="CSV File", show_label=True, elem_classes="file_marker", file_types=['.csv'])
68
 
69
  @gr.render(inputs=file_output)
70
+ def data_options(filename, request: gr.Request):
71
  print(filename)
72
  if filename:
73
+ parameters = gr.Textbox(visible=False, value=request.session_hash)
74
  bot = gr.Chatbot(type='messages', label="CSV Chat Window", show_label=True, render=False, visible=True, elem_classes="chatbot")
75
  chat = gr.ChatInterface(
76
  fn=chatbot_with_fc,
77
  type='messages',
78
  chatbot=bot,
79
  title="Chat with your data file",
80
+ concurrency_limit=None,
81
  examples=[
82
  ["Describe the dataset"],
83
  ["List the columns in the dataset"],
84
  ["What could this data be used for?"],
85
  ],
86
+ additional_inputs=parameters
87
  )
88
+ process_upload(filename, request.session_hash)
 
89
 
90
+ def process_upload(upload_value, session_hash):
91
  if upload_value:
92
+ process_data_upload(upload_value, session_hash)
93
  return [], []
94
+
95
+ demo.unload(delete_db)
96
 
97
 
functions/sqlite_functions.py CHANGED
@@ -16,13 +16,13 @@ class SQLiteQuery:
16
  for query in queries:
17
  result = pd.read_sql(query, self.connection)
18
  results.append(f"{result}")
19
- "self.connection.close()"
20
  return {"results": results, "queries": queries}
21
 
22
 
23
- sql_query = SQLiteQuery('data_source.db')
24
 
25
- def sqlite_query_func(queries: List[str]):
 
26
  try:
27
  result = sql_query.run(queries)
28
  return {"reply": result["results"][0]}
 
16
  for query in queries:
17
  result = pd.read_sql(query, self.connection)
18
  results.append(f"{result}")
19
+ self.connection.close()
20
  return {"results": results, "queries": queries}
21
 
22
 
 
23
 
24
+ def sqlite_query_func(queries: List[str], session_hash):
25
+ sql_query = SQLiteQuery(f'data_source_{session_hash}.db')
26
  try:
27
  result = sql_query.run(queries)
28
  return {"reply": result["results"][0]}
pipelines/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- from .pipelines import conditional_sql_pipeline, rag_pipeline_func
2
 
3
- __all__ = ["conditional_sql_pipeline", "rag_pipeline_func"]
 
1
+ from .pipelines import rag_pipeline_func
2
 
3
+ __all__ = ["rag_pipeline_func"]
pipelines/pipelines.py CHANGED
@@ -21,57 +21,57 @@ from haystack.components.builders import PromptBuilder
21
  from haystack.components.generators import OpenAIGenerator
22
 
23
  llm = OpenAIGenerator(model="gpt-4o")
24
- sql_query = SQLiteQuery('data_source.db')
25
-
26
- connection = sqlite3.connect('data_source.db')
27
- cur=connection.execute('select * from data_source')
28
- columns = [i[0] for i in cur.description]
29
- cur.close()
30
-
31
- #Rag Pipeline
32
- prompt = PromptBuilder(template="""Please generate an SQL query. The query should answer the following Question: {{question}};
33
- If the question cannot be answered given the provided table and columns, return 'no_answer'
34
- The query is to be answered for the table is called 'data_source' with the following
35
- Columns: {{columns}};
36
- Answer:""")
37
-
38
- routes = [
39
- {
40
- "condition": "{{'no_answer' not in replies[0]}}",
41
- "output": "{{replies}}",
42
- "output_name": "sql",
43
- "output_type": List[str],
44
- },
45
- {
46
- "condition": "{{'no_answer' in replies[0]}}",
47
- "output": "{{question}}",
48
- "output_name": "go_to_fallback",
49
- "output_type": str,
50
- },
51
- ]
52
-
53
- router = ConditionalRouter(routes)
54
-
55
- fallback_prompt = PromptBuilder(template="""User entered a query that cannot be answered with the given table.
56
- The query was: {{question}} and the table had columns: {{columns}}.
57
- Let the user know why the question cannot be answered""")
58
- fallback_llm = OpenAIGenerator(model="gpt-4")
59
-
60
- conditional_sql_pipeline = Pipeline()
61
- conditional_sql_pipeline.add_component("prompt", prompt)
62
- conditional_sql_pipeline.add_component("llm", llm)
63
- conditional_sql_pipeline.add_component("router", router)
64
- conditional_sql_pipeline.add_component("fallback_prompt", fallback_prompt)
65
- conditional_sql_pipeline.add_component("fallback_llm", fallback_llm)
66
- conditional_sql_pipeline.add_component("sql_querier", sql_query)
67
-
68
- conditional_sql_pipeline.connect("prompt", "llm")
69
- conditional_sql_pipeline.connect("llm.replies", "router.replies")
70
- conditional_sql_pipeline.connect("router.sql", "sql_querier.queries")
71
- conditional_sql_pipeline.connect("router.go_to_fallback", "fallback_prompt.question")
72
- conditional_sql_pipeline.connect("fallback_prompt", "fallback_llm")
73
-
74
- def rag_pipeline_func(queries: str, columns: str):
75
  print("RAG PIPELINE FUNCTION")
76
  result = conditional_sql_pipeline.run({"prompt": {"question": queries,
77
  "columns": columns},
 
21
  from haystack.components.generators import OpenAIGenerator
22
 
23
  llm = OpenAIGenerator(model="gpt-4o")
24
+ def rag_pipeline_func(queries: str, columns: str, session_hash):
25
+ sql_query = SQLiteQuery(f'data_source_{session_hash}.db')
26
+
27
+ connection = sqlite3.connect(f'data_source_{session_hash}.db')
28
+ cur=connection.execute('select * from data_source')
29
+ columns = [i[0] for i in cur.description]
30
+ cur.close()
31
+
32
+ #Rag Pipeline
33
+ prompt = PromptBuilder(template="""Please generate an SQL query. The query should answer the following Question: {{question}};
34
+ If the question cannot be answered given the provided table and columns, return 'no_answer'
35
+ The query is to be answered for the table is called 'data_source' with the following
36
+ Columns: {{columns}};
37
+ Answer:""")
38
+
39
+ routes = [
40
+ {
41
+ "condition": "{{'no_answer' not in replies[0]}}",
42
+ "output": "{{replies}}",
43
+ "output_name": "sql",
44
+ "output_type": List[str],
45
+ },
46
+ {
47
+ "condition": "{{'no_answer' in replies[0]}}",
48
+ "output": "{{question}}",
49
+ "output_name": "go_to_fallback",
50
+ "output_type": str,
51
+ },
52
+ ]
53
+
54
+ router = ConditionalRouter(routes)
55
+
56
+ fallback_prompt = PromptBuilder(template="""User entered a query that cannot be answered with the given table.
57
+ The query was: {{question}} and the table had columns: {{columns}}.
58
+ Let the user know why the question cannot be answered""")
59
+ fallback_llm = OpenAIGenerator(model="gpt-4")
60
+
61
+ conditional_sql_pipeline = Pipeline()
62
+ conditional_sql_pipeline.add_component("prompt", prompt)
63
+ conditional_sql_pipeline.add_component("llm", llm)
64
+ conditional_sql_pipeline.add_component("router", router)
65
+ conditional_sql_pipeline.add_component("fallback_prompt", fallback_prompt)
66
+ conditional_sql_pipeline.add_component("fallback_llm", fallback_llm)
67
+ conditional_sql_pipeline.add_component("sql_querier", sql_query)
68
+
69
+ conditional_sql_pipeline.connect("prompt", "llm")
70
+ conditional_sql_pipeline.connect("llm.replies", "router.replies")
71
+ conditional_sql_pipeline.connect("router.sql", "sql_querier.queries")
72
+ conditional_sql_pipeline.connect("router.go_to_fallback", "fallback_prompt.question")
73
+ conditional_sql_pipeline.connect("fallback_prompt", "fallback_llm")
74
+
75
  print("RAG PIPELINE FUNCTION")
76
  result = conditional_sql_pipeline.run({"prompt": {"question": queries,
77
  "columns": columns},
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- haystack-ai
2
- python-dotenv
3
- gradio
4
  pandas
 
1
+ haystack-ai
2
+ python-dotenv
3
+ gradio
4
  pandas
tools.py CHANGED
@@ -1,52 +1,54 @@
1
  import sqlite3
2
 
3
- connection = sqlite3.connect('data_source.db')
4
- print("Querying Database in Tools.py");
5
- cur=connection.execute('select * from data_source')
6
- columns = [i[0] for i in cur.description]
7
- print("COLUMNS 2")
8
- print(columns)
9
- cur.close()
 
 
10
 
11
- tools = [
12
- {
13
- "type": "function",
14
- "function": {
15
- "name": "sql_query_func",
16
- "description": f"This a tool useful to query a SQL table called 'data_source' with the following Columns: {columns}",
17
- "parameters": {
18
- "type": "object",
19
- "properties": {
20
- "queries": {
21
- "type": "array",
22
- "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
23
- "items": {
24
- "type": "string",
 
25
  }
26
- }
 
27
  },
28
- "required": ["question"],
29
  },
30
  },
31
- },
32
- {
33
- "type": "function",
34
- "function": {
35
- "name": "rag_pipeline_func",
36
- "description": f"This a tool useful to query a SQL table called 'data_source' with the following Columns: {columns}",
37
- "parameters": {
38
- "type": "object",
39
- "properties": {
40
- "queries": {
41
- "type": "array",
42
- "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
43
- "items": {
44
- "type": "string",
45
  }
46
- }
 
47
  },
48
- "required": ["question"],
49
- },
50
- },
51
- }
52
- ]
 
1
  import sqlite3
2
 
3
+ def tools_call(session_hash):
4
+ connection = sqlite3.connect(f'data_source_{session_hash}.db')
5
+ print("Querying Database in Tools.py");
6
+ cur=connection.execute('select * from data_source')
7
+ columns = [i[0] for i in cur.description]
8
+ print("COLUMNS 2")
9
+ print(columns)
10
+ cur.close()
11
+ connection.close()
12
 
13
+ return [
14
+ {
15
+ "type": "function",
16
+ "function": {
17
+ "name": "sql_query_func",
18
+ "description": f"This a tool useful to query a SQL table called 'data_source' with the following Columns: {columns}",
19
+ "parameters": {
20
+ "type": "object",
21
+ "properties": {
22
+ "queries": {
23
+ "type": "array",
24
+ "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
25
+ "items": {
26
+ "type": "string",
27
+ }
28
  }
29
+ },
30
+ "required": ["question"],
31
  },
 
32
  },
33
  },
34
+ {
35
+ "type": "function",
36
+ "function": {
37
+ "name": "rag_pipeline_func",
38
+ "description": f"This a tool useful to query a SQL table called 'data_source' with the following Columns: {columns}",
39
+ "parameters": {
40
+ "type": "object",
41
+ "properties": {
42
+ "queries": {
43
+ "type": "array",
44
+ "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
45
+ "items": {
46
+ "type": "string",
47
+ }
48
  }
49
+ },
50
+ "required": ["question"],
51
  },
52
+ },
53
+ }
54
+ ]