tosin2013 commited on
Commit
ebdc0a4
·
verified ·
1 Parent(s): 55c7367

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -161
app.py CHANGED
@@ -4,112 +4,81 @@ from langchain_huggingface import HuggingFaceEmbeddings
4
  from datasets import load_dataset, Dataset
5
  from sklearn.neighbors import NearestNeighbors
6
  import numpy as np
7
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TextStreamer
8
- import torch
9
- from typing import List
10
- from langchain_core.output_parsers import StrOutputParser
11
- from langchain_core.prompts import ChatPromptTemplate
12
  import gradio as gr
13
  import spaces
14
- from huggingface_hub import InferenceClient
15
- import time # Added for timing logs
16
 
17
  # Configuration
18
-
19
- # Sample questions:
20
- # 1. What are the key features of AutoGen v0.4 that I should utilize when converting user requests into agent code?
21
- # 2. How can I leverage asynchronous messaging in AutoGen v0.4 to enhance my agents performance?
22
- # 3. What are best practices for writing modular and extensible agent code using AutoGen v0.4?
23
- # 4. Can you convert this user request into AutoGen v0.4 agent code: "Create an agent that classifies customer feedback into positive, negative, or neutral sentiments."
24
-
25
  DEFAULT_QUESTION = "Ask me anything about converting user requests into AutoGen v0.4 agent code..."
26
 
27
  # Validate API keys
28
- assert os.getenv("OPENAI_API_KEY") or os.getenv("HF_TOKEN"), "API keys are not set in the environment variables."
29
-
30
- os.environ['OPENAI_BASE'] = "https://api.openai.com/v1"
31
- os.environ['OPENAI_MODEL'] = "gpt-4"
32
- os.environ['MODEL_PROVIDER'] = "huggingface"
33
- model_provider = os.environ.get("MODEL_PROVIDER")
34
-
35
- # Instantiate the client for openai v1.x
36
- if model_provider.lower() == "openai":
37
- MODEL_NAME = os.environ['OPENAI_MODEL']
38
- client = OpenAI(
39
- base_url=os.environ.get("OPENAI_BASE"),
40
- api_key=os.environ.get("OPENAI_API_KEY")
41
- )
42
  else:
43
- MODEL_NAME = "deepseek-ai/deepseek-coder-33b-instruct"
44
- # Initialize Hugging Face InferenceClient with GPU support
45
- hf_client = InferenceClient(
46
- model=MODEL_NAME,
47
- api_key=os.environ.get("HF_TOKEN"),
48
- timeout=60 # Reduced timeout for faster response
49
- )
50
 
51
  # Load the Hugging Face dataset
52
  try:
53
- start = time.time()
54
  dataset = load_dataset('tosin2013/autogen', streaming=True)
55
  dataset = Dataset.from_list(list(dataset['train']))
56
- end = time.time()
57
- print(f"[TIMING] Dataset loading took {end - start:.2f} seconds")
58
  except Exception as e:
59
  print(f"[ERROR] Failed to load dataset: {e}")
60
  exit(1)
61
 
62
  # Initialize embeddings
63
  print("[EMBEDDINGS] Loading sentence-transformers model...")
64
- start = time.time()
65
  embeddings = HuggingFaceEmbeddings(
66
  model_name="sentence-transformers/all-MiniLM-L6-v2",
67
  model_kwargs={"device": "cpu"}
68
  )
69
- end = time.time()
70
- print(f"[EMBEDDINGS] Sentence-transformers model loaded successfully in {end - start:.2f} seconds")
71
 
72
  # Extract texts from the dataset
73
  texts = dataset['input']
74
 
75
- # Create and cache embeddings for the texts
76
- if not os.path.exists('embeddings.npy'):
 
 
 
 
 
 
 
 
77
  print("[LOG] Generating embeddings...")
78
- start = time.time()
79
  text_embeddings = embeddings.embed_documents(texts)
80
- np.save('embeddings.npy', text_embeddings)
81
- end = time.time()
82
- print(f"[EMBEDDINGS] Generated embeddings for {len(texts)} documents in {end - start:.2f} seconds")
83
- else:
84
- print("[LOG] Loading cached embeddings...")
85
- start = time.time()
86
- text_embeddings = np.load('embeddings.npy')
87
- end = time.time()
88
- print(f"[TIMING] Loaded cached embeddings in {end - start:.2f} seconds")
89
 
90
- # Fit and cache nearest neighbor model
91
- if not os.path.exists('nn_model.pkl'):
 
 
 
92
  print("[LOG] Fitting nearest neighbors model...")
93
- start = time.time()
94
  nn = NearestNeighbors(n_neighbors=5, metric='cosine')
95
  nn.fit(np.array(text_embeddings))
96
- with open('nn_model.pkl', 'wb') as f:
97
  pickle.dump(nn, f)
98
- end = time.time()
99
- print(f"[TIMING] Fitted nearest neighbors model in {end - start:.2f} seconds")
100
- else:
101
- print("[LOG] Loading cached nearest neighbors model...")
102
- start = time.time()
103
- with open('nn_model.pkl', 'rb') as f:
104
- nn = pickle.load(f)
105
- end = time.time()
106
- print(f"[TIMING] Loaded nearest neighbors model in {end - start:.2f} seconds")
107
 
108
  @spaces.GPU
109
  def get_relevant_documents(query, k=5):
110
- """
111
- Retrieves the k most relevant documents to the query.
112
- """
113
  start_time = time.time()
114
  print("[EMBEDDINGS] Generating embedding for query...")
115
  query_embedding = embeddings.embed_query(query)
@@ -117,11 +86,11 @@ def get_relevant_documents(query, k=5):
117
  distances, indices = nn.kneighbors([query_embedding], n_neighbors=k)
118
  relevant_docs = [texts[i] for i in indices[0]]
119
  elapsed_time = time.time() - start_time
120
- print(f"[TIMING] get_relevant_documents took {elapsed_time:.2f} seconds")
121
  return relevant_docs
122
 
123
- @spaces.GPU
124
  def generate_response(question, history):
 
125
  start_time = time.time()
126
  try:
127
  response = _generate_response_gpu(question, history)
@@ -129,92 +98,88 @@ def generate_response(question, history):
129
  print(f"[WARNING] GPU failed: {str(e)}")
130
  response = _generate_response_cpu(question, history)
131
  elapsed_time = time.time() - start_time
132
- print(f"[TIMING] generate_response took {elapsed_time:.2f} seconds")
133
- return response
134
 
135
  @spaces.GPU
136
  def _generate_response_gpu(question, history):
 
137
  print(f"\n[LOG] Received question: {question}")
138
- start_time = time.time()
139
- # Get relevant documents based on the query
140
  relevant_docs = get_relevant_documents(question, k=3)
141
  print(f"[LOG] Retrieved {len(relevant_docs)} relevant documents")
142
  context = "\n".join(relevant_docs)
143
- prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
144
- print(f"[LOG] Generated prompt: {prompt[:200]}...") # Log first 200 chars of prompt
145
- if model_provider.lower() == "huggingface":
146
- messages = [
147
- {
148
- "role": "system",
149
- "content": "### MEMORY ###\nRecall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence."
150
- },
151
- {
152
- "role": "user",
153
- "content": prompt
154
- }
155
- ]
156
- start_api = time.time()
157
- completion = hf_client.chat.completions.create(
158
- model=MODEL_NAME,
159
- messages=messages,
160
- max_tokens=500
161
- )
162
- end_api = time.time()
163
- print(f"[TIMING] Hugging Face API call took {end_api - start_api:.2f} seconds")
 
 
 
 
 
 
 
 
 
 
 
 
164
  response = completion.choices[0].message.content
165
- elif model_provider.lower() == "openai":
166
- start_api = time.time()
 
167
  response = client.chat.completions.create(
168
- model=os.environ.get("OPENAI_MODEL"),
169
- messages=[
170
- {"role": "system", "content": "You are a helpful assistant. Answer the question based on the provided context."},
171
- {"role": "user", "content": prompt},
172
- ]
173
  ).choices[0].message.content
174
- end_api = time.time()
175
- print(f"[TIMING] OpenAI API call took {end_api - start_api:.2f} seconds")
176
- elapsed_time = time.time() - start_time
177
- print(f"[TIMING] _generate_response_gpu took {elapsed_time:.2f} seconds")
178
  history.append((question, response))
179
  return history
180
 
181
- # Simplified CPU fallback
182
- @spaces.GPU
183
  def _generate_response_cpu(question, history):
 
184
  print(f"[LOG] Running on CPU")
185
  try:
186
- start_time = time.time()
187
  relevant_docs = get_relevant_documents(question, k=3)
188
  context = "\n".join(relevant_docs)
189
- prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
 
 
 
 
 
 
190
  print(f"[LOG] Generated prompt: {prompt[:200]}...")
191
- if model_provider.lower() == "huggingface":
192
- messages = [
193
- {"role": "system", "content": "### MEMORY ###\nRecall all previously provided instructions, context, and data."},
194
- {"role": "user", "content": prompt}
195
- ]
196
- start_api = time.time()
197
- completion = hf_client.chat.completions.create(
198
- model=MODEL_NAME,
199
- messages=messages,
200
- max_tokens=500
201
- )
202
- end_api = time.time()
203
- print(f"[TIMING] Hugging Face API call took {end_api - start_api:.2f} seconds")
204
  response = completion.choices[0].message.content
205
- elif model_provider.lower() == "openai":
206
- start_api = time.time()
207
  response = client.chat.completions.create(
208
- model=os.environ.get("OPENAI_MODEL"),
209
- messages=[
210
- {"role": "system", "content": "You are a helpful assistant."},
211
- {"role": "user", "content": prompt},
212
- ]
213
  ).choices[0].message.content
214
- end_api = time.time()
215
- print(f"[TIMING] OpenAI API call took {end_api - start_api:.2f} seconds")
216
- elapsed_time = time.time() - start_time
217
- print(f"[TIMING] _generate_response_cpu took {elapsed_time:.2f} seconds")
218
  history.append((question, response))
219
  return history
220
  except Exception as e:
@@ -223,50 +188,42 @@ def _generate_response_cpu(question, history):
223
  history.append((question, error_msg))
224
  return history
225
 
226
- # Gradio interface
227
  print("[CHAT] Initializing chat interface...")
228
  with gr.Blocks() as demo:
229
  gr.Markdown(f"""
230
  ## AutoGen v0.4 Agent Code Generator QA Agent
231
  **Current Model:** {MODEL_NAME}
232
-
 
233
  The AutoGen v0.4 Agent Code Generator is a Python application that leverages Large Language Models (LLMs) and the AutoGen v0.4 framework to dynamically generate agent code from user requests. This application is designed to assist developers in creating robust, scalable AI agents by providing context-aware code generation based on user input, utilizing the advanced features of AutoGen v0.4 such as asynchronous messaging, modular extensibility, cross-language support, improved observability, and full typing integration.
234
-
235
  **Sample questions:**
236
  1. What are the key features of AutoGen v0.4 that I should utilize when converting user requests into agent code?
237
  2. How can I leverage asynchronous messaging in AutoGen v0.4 to enhance my agent's performance?
238
  3. What are best practices for writing modular and extensible agent code using AutoGen v0.4?
239
  4. Can you convert this user request into AutoGen v0.4 agent code: "Create an agent that classifies customer feedback into positive, negative, or neutral sentiments."
240
-
241
  **Related repository:** [autogen](https://github.com/microsoft/autogen)
242
  """)
243
-
244
- with gr.Row():
245
- chatbot = gr.Chatbot(label="Chat History")
246
-
247
- with gr.Row():
248
- question = gr.Textbox(
249
- value=DEFAULT_QUESTION,
250
- label="Your Question",
251
- placeholder=DEFAULT_QUESTION
252
- )
253
-
254
- with gr.Row():
255
- submit_btn = gr.Button("Submit")
256
- clear_btn = gr.Button("Clear")
257
-
258
- submit_btn.click(
259
  fn=generate_response,
260
- inputs=[question, chatbot],
261
- outputs=[chatbot],
262
  queue=True
263
  )
264
-
265
- clear_btn.click(
266
  lambda: (None, ""),
267
  inputs=[],
268
- outputs=[chatbot, question]
269
  )
270
 
271
  if __name__ == "__main__":
272
- demo.launch()
 
4
  from datasets import load_dataset, Dataset
5
  from sklearn.neighbors import NearestNeighbors
6
  import numpy as np
7
+ from huggingface_hub import InferenceClient
 
 
 
 
8
  import gradio as gr
9
  import spaces
10
+ import pickle
11
+ import time
12
 
13
  # Configuration
 
 
 
 
 
 
 
14
  DEFAULT_QUESTION = "Ask me anything about converting user requests into AutoGen v0.4 agent code..."
15
 
16
  # Validate API keys
17
+ assert os.getenv("OPENAI_API_KEY") or os.getenv("HF_TOKEN"), "API keys are not set in the environment variables (either OPENAI_API_KEY or HF_TOKEN)."
18
+
19
+ # Model Provider Configuration
20
+ MODEL_PROVIDER = os.getenv("MODEL_PROVIDER", "huggingface").lower() # Default to Hugging Face
21
+ if MODEL_PROVIDER == "openai":
22
+ OPENAI_BASE = os.getenv("OPENAI_BASE", "https://api.openai.com/v1")
23
+ OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4")
24
+ client = OpenAI(base_url=OPENAI_BASE, api_key=os.getenv("OPENAI_API_KEY"))
25
+ MODEL_NAME = OPENAI_MODEL
26
+ elif MODEL_PROVIDER == "huggingface":
27
+ HF_MODEL_NAME = os.getenv("HF_MODEL_NAME", "deepseek-ai/deepseek-coder-33b-instruct")
28
+ HF_TOKEN = os.getenv("HF_TOKEN")
29
+ hf_client = InferenceClient(model=HF_MODEL_NAME, token=HF_TOKEN, timeout=120)
30
+ MODEL_NAME = HF_MODEL_NAME
31
  else:
32
+ raise ValueError(f"Unsupported MODEL_PROVIDER: {MODEL_PROVIDER}. Choose 'openai' or 'huggingface'.")
 
 
 
 
 
 
33
 
34
  # Load the Hugging Face dataset
35
  try:
 
36
  dataset = load_dataset('tosin2013/autogen', streaming=True)
37
  dataset = Dataset.from_list(list(dataset['train']))
 
 
38
  except Exception as e:
39
  print(f"[ERROR] Failed to load dataset: {e}")
40
  exit(1)
41
 
42
  # Initialize embeddings
43
  print("[EMBEDDINGS] Loading sentence-transformers model...")
 
44
  embeddings = HuggingFaceEmbeddings(
45
  model_name="sentence-transformers/all-MiniLM-L6-v2",
46
  model_kwargs={"device": "cpu"}
47
  )
48
+ print("[EMBEDDINGS] Sentence-transformers model loaded successfully")
 
49
 
50
  # Extract texts from the dataset
51
  texts = dataset['input']
52
 
53
+ # Create and load embeddings and Nearest Neighbors model (in-memory caching)
54
+ EMBEDDINGS_FILE = 'embeddings.npy'
55
+ NN_MODEL_FILE = 'nn_model.pkl'
56
+ text_embeddings = None
57
+ nn = None
58
+
59
+ if os.path.exists(EMBEDDINGS_FILE):
60
+ print("[LOG] Loading cached embeddings...")
61
+ text_embeddings = np.load(EMBEDDINGS_FILE)
62
+ else:
63
  print("[LOG] Generating embeddings...")
 
64
  text_embeddings = embeddings.embed_documents(texts)
65
+ print(f"[EMBEDDINGS] Generated embeddings for {len(texts)} documents")
66
+ np.save(EMBEDDINGS_FILE, text_embeddings)
 
 
 
 
 
 
 
67
 
68
+ if os.path.exists(NN_MODEL_FILE):
69
+ print("[LOG] Loading cached nearest neighbors model...")
70
+ with open(NN_MODEL_FILE, 'rb') as f:
71
+ nn = pickle.load(f)
72
+ else:
73
  print("[LOG] Fitting nearest neighbors model...")
 
74
  nn = NearestNeighbors(n_neighbors=5, metric='cosine')
75
  nn.fit(np.array(text_embeddings))
76
+ with open(NN_MODEL_FILE, 'wb') as f:
77
  pickle.dump(nn, f)
 
 
 
 
 
 
 
 
 
78
 
79
  @spaces.GPU
80
  def get_relevant_documents(query, k=5):
81
+ """Retrieves the k most relevant documents to the query."""
 
 
82
  start_time = time.time()
83
  print("[EMBEDDINGS] Generating embedding for query...")
84
  query_embedding = embeddings.embed_query(query)
 
86
  distances, indices = nn.kneighbors([query_embedding], n_neighbors=k)
87
  relevant_docs = [texts[i] for i in indices[0]]
88
  elapsed_time = time.time() - start_time
89
+ print(f"[PERF] get_relevant_documents took {elapsed_time:.2f} seconds")
90
  return relevant_docs
91
 
 
92
  def generate_response(question, history):
93
+ """Generates a response to the user's question, handling GPU/CPU fallback."""
94
  start_time = time.time()
95
  try:
96
  response = _generate_response_gpu(question, history)
 
98
  print(f"[WARNING] GPU failed: {str(e)}")
99
  response = _generate_response_cpu(question, history)
100
  elapsed_time = time.time() - start_time
101
+ print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
102
+ return history, history # Return updated history twice for Gradio
103
 
104
  @spaces.GPU
105
  def _generate_response_gpu(question, history):
106
+ """Generates a response using the GPU."""
107
  print(f"\n[LOG] Received question: {question}")
 
 
108
  relevant_docs = get_relevant_documents(question, k=3)
109
  print(f"[LOG] Retrieved {len(relevant_docs)} relevant documents")
110
  context = "\n".join(relevant_docs)
111
+ prompt = f"""### MEMORY ###
112
+ Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
113
+ ### VISIONARY GUIDANCE ###
114
+ This prompt is designed to empower users to seamlessly convert their requests into AutoGen v0.4 agent code. By harnessing the advanced features of AutoGen v0.4, we aim to provide a scalable and flexible solution that is both user-friendly and technically robust. The collaborative effort of the personas ensures a comprehensive, innovative, and user-centric approach to meet the user's objectives.
115
+ ### CONTEXT ###
116
+ AutoGen v0.4 is a comprehensive rewrite aimed at building robust, scalable, and cross-language AI agents. Key features include asynchronous messaging, scalable distributed agents support, modular extensibility, cross-language capabilities, improved observability, and full typing integration.
117
+ ### OBJECTIVE ###
118
+ Translate user requests into AutoGen v0.4 agent code that leverages the framework's new features. Ensure the code is syntactically correct, scalable, and aligns with best practices.
119
+ ### STYLE ###
120
+ Professional, clear, and focused on code quality.
121
+ ### TONE ###
122
+ Informative, helpful, and user-centric.
123
+ ### AUDIENCE ###
124
+ Users seeking to implement their requests using AutoGen v0.4 agents.
125
+ ### RESPONSE FORMAT ###
126
+ Provide the AutoGen v0.4 agent code that fulfills the user's request. Utilize features like asynchronous messaging and modular design where appropriate. Include comments to explain key components and enhance understandability.
127
+ ### TEAM PERSONAS’ CONTRIBUTIONS ###
128
+ - **Analyst:** Ensured the prompt provides clear, structured instructions to accurately convert user requests into code, emphasizing full typing integration for precision.
129
+ - **Creative:** Suggested incorporating comments and explanations within the code to foster innovative usage and enhance user engagement with AutoGen v0.4 features.
130
+ - **Strategist:** Focused on aligning the prompt with long-term scalability by encouraging the use of modular and extensible design principles inherent in AutoGen v0.4.
131
+ - **Empathizer:** Enhanced the prompt to be user-centric, ensuring it addresses user needs effectively and makes the code accessible and easy to understand.
132
+ - **Researcher:** Integrated the latest information about AutoGen v0.4, ensuring the prompt and generated code reflect current capabilities and best practices.
133
+ ### SYSTEM GUARDRAILS ###
134
+ - If unsure about the user's request, ask clarifying questions rather than making assumptions.
135
+ - Do not fabricate data or features not supported by AutoGen v0.4.
136
+ - Ensure the code is scalable, modular, and adheres to best practices.
137
+ ### START ###
138
+ Context: {context}\n\nQuestion: {question}\n\nAnswer:"""
139
+ print(f"[LOG] Generated prompt: {prompt[:200]}...")
140
+
141
+ if MODEL_PROVIDER == "huggingface":
142
+ messages = [{"role": "user", "content": prompt}]
143
+ completion = hf_client.chat.completions.create(model=MODEL_NAME, messages=messages, max_tokens=500)
144
  response = completion.choices[0].message.content
145
+ print(f"[LOG] Using Hugging Face model (serverless): {MODEL_NAME}")
146
+ print(f"[LOG] Hugging Face response: {response[:200]}...")
147
+ elif MODEL_PROVIDER == "openai":
148
  response = client.chat.completions.create(
149
+ model=OPENAI_MODEL,
150
+ messages=[{"role": "user", "content": prompt}]
 
 
 
151
  ).choices[0].message.content
152
+ print(f"[LOG] Using OpenAI model: {OPENAI_MODEL}")
153
+ print(f"[LOG] OpenAI response: {response[:200]}...")
154
+
 
155
  history.append((question, response))
156
  return history
157
 
 
 
158
  def _generate_response_cpu(question, history):
159
+ """Generates a response using the CPU (fallback)."""
160
  print(f"[LOG] Running on CPU")
161
  try:
 
162
  relevant_docs = get_relevant_documents(question, k=3)
163
  context = "\n".join(relevant_docs)
164
+ prompt = f"""### MEMORY ###
165
+ Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
166
+ ### SYSTEM GUARDRAILS ###
167
+ If unsure about the user's request, ask clarifying questions rather than making assumptions.
168
+ Do not fabricate data or features not supported by AutoGen v0.4.
169
+ Ensure the code is scalable, modular, and adheres to best practices.
170
+ Context: {context}\n\nQuestion: {question}\n\nAnswer:"""
171
  print(f"[LOG] Generated prompt: {prompt[:200]}...")
172
+
173
+ if MODEL_PROVIDER == "huggingface":
174
+ messages = [{"role": "user", "content": prompt}]
175
+ completion = hf_client.chat.completions.create(model=MODEL_NAME, messages=messages, max_tokens=500)
 
 
 
 
 
 
 
 
 
176
  response = completion.choices[0].message.content
177
+ elif MODEL_PROVIDER == "openai":
 
178
  response = client.chat.completions.create(
179
+ model=OPENAI_MODEL,
180
+ messages=[{"role": "user", "content": prompt}]
 
 
 
181
  ).choices[0].message.content
182
+
 
 
 
183
  history.append((question, response))
184
  return history
185
  except Exception as e:
 
188
  history.append((question, error_msg))
189
  return history
190
 
191
+ # Gradio Interface
192
  print("[CHAT] Initializing chat interface...")
193
  with gr.Blocks() as demo:
194
  gr.Markdown(f"""
195
  ## AutoGen v0.4 Agent Code Generator QA Agent
196
  **Current Model:** {MODEL_NAME}
197
+ **Model Provider:** {MODEL_PROVIDER.upper()}
198
+
199
  The AutoGen v0.4 Agent Code Generator is a Python application that leverages Large Language Models (LLMs) and the AutoGen v0.4 framework to dynamically generate agent code from user requests. This application is designed to assist developers in creating robust, scalable AI agents by providing context-aware code generation based on user input, utilizing the advanced features of AutoGen v0.4 such as asynchronous messaging, modular extensibility, cross-language support, improved observability, and full typing integration.
200
+
201
  **Sample questions:**
202
  1. What are the key features of AutoGen v0.4 that I should utilize when converting user requests into agent code?
203
  2. How can I leverage asynchronous messaging in AutoGen v0.4 to enhance my agent's performance?
204
  3. What are best practices for writing modular and extensible agent code using AutoGen v0.4?
205
  4. Can you convert this user request into AutoGen v0.4 agent code: "Create an agent that classifies customer feedback into positive, negative, or neutral sentiments."
206
+
207
  **Related repository:** [autogen](https://github.com/microsoft/autogen)
208
  """)
209
+
210
+ chatbot = gr.Chatbot(label="Chat History")
211
+ question_textbox = gr.Textbox(value=DEFAULT_QUESTION, label="Your Question", placeholder=DEFAULT_QUESTION)
212
+ submit_button = gr.Button("Submit")
213
+ clear_button = gr.Button("Clear")
214
+
215
+ submit_button.click(
 
 
 
 
 
 
 
 
 
216
  fn=generate_response,
217
+ inputs=[question_textbox, chatbot],
218
+ outputs=[chatbot, chatbot], # Output the updated history to the chatbot
219
  queue=True
220
  )
221
+
222
+ clear_button.click(
223
  lambda: (None, ""),
224
  inputs=[],
225
+ outputs=[chatbot, question_textbox]
226
  )
227
 
228
  if __name__ == "__main__":
229
+ demo.launch()