Files changed (1) hide show
  1. app.py +18 -353
app.py CHANGED
@@ -1,359 +1,24 @@
1
- import os
2
- from typing import List, Optional
3
- from pydantic import BaseModel, Field
4
- import gradio as gr
5
- from datasets import load_dataset
6
- from huggingface_hub import InferenceClient
7
- import black
8
 
9
- # Initialize the inference client
10
- HF_TOKEN = os.getenv("HF_TOKEN")
11
- HF_API_URL = os.getenv("HF_API_URL", "Qwen/Qwen2.5-Coder-32B-Instruct")
12
- client = InferenceClient(model=HF_API_URL, token=HF_TOKEN)
13
 
14
- # Load questions from Hugging Face dataset
15
- EXAM_MAX_QUESTIONS = int(os.getenv("EXAM_MAX_QUESTIONS", 1))
16
- EXAM_DATASET_ID = "agents-course/smolagents-quiz-data"
17
 
18
- # prep the dataset for the quiz
19
- ds = load_dataset(EXAM_DATASET_ID, split="train", download_mode="force_redownload")
20
- quiz_data = list(ds)
21
- if EXAM_MAX_QUESTIONS:
22
- quiz_data = quiz_data[:EXAM_MAX_QUESTIONS]
23
 
24
- # Check if dataset has image feature
25
- HAS_IMAGE_FEATURE = "image" in ds.features
26
 
 
 
 
 
 
 
 
 
27
 
28
- class CriterionFeedback(BaseModel):
29
- """Feedback for a single assessment criterion"""
30
-
31
- criterion: str = Field(..., description="The assessment criterion being evaluated")
32
- met: bool = Field(..., description="Whether the criterion was met")
33
- explanation: str = Field(
34
- ..., description="Detailed explanation of how well the criterion was met"
35
- )
36
- improvement_tips: Optional[str] = Field(
37
- None, description="Specific tips for improvement if needed"
38
- )
39
-
40
-
41
- class CodeFeedback(BaseModel):
42
- """Structured feedback for code submission"""
43
-
44
- overall_feedback: str = Field(
45
- ..., description="Overall assessment of the code solution"
46
- )
47
- criteria_feedback: List[CriterionFeedback] = Field(
48
- ..., description="Detailed feedback for each assessment criterion"
49
- )
50
-
51
-
52
- def format_python_code(code: str) -> str:
53
- """Format Python code using black."""
54
- try:
55
- return black.format_str(code, mode=black.Mode())
56
- except Exception as e:
57
- gr.Warning(f"Code formatting failed: {str(e)}")
58
- return code
59
-
60
-
61
- EVALUATION_TEMPLATE = """Evaluate this Python code solution:
62
-
63
- Challenge:
64
- {challenge}
65
-
66
- Reference Solution:
67
- ```python
68
-
69
- {solution}
70
-
71
- ```
72
-
73
- Student's Solution:
74
-
75
- ```python
76
-
77
- {student_code}
78
-
79
- ```
80
-
81
- Assessment Criteria:
82
- {criteria}
83
-
84
- Approach:
85
- Be highly tollerent of differences in approach, as long as they meet Assessment Criteria.
86
-
87
- Provide detailed feedback on how well each criterion was met."""
88
-
89
-
90
- def check_code(
91
- user_code: str, solution: str, challenge: str, assessment_criteria: List[str]
92
- ) -> dict:
93
- """
94
- Use LLM to evaluate the user's code solution and provide structured feedback.
95
- """
96
- # Format both user code and solution
97
- formatted_user_code = format_python_code(user_code)
98
- formatted_solution = format_python_code(solution)
99
-
100
- # Format criteria as bullet points
101
- criteria_text = "\n".join(f"- {c}" for c in assessment_criteria)
102
-
103
- # Fill the template
104
- prompt = EVALUATION_TEMPLATE.format(
105
- challenge=challenge,
106
- solution=formatted_solution,
107
- student_code=formatted_user_code,
108
- criteria=criteria_text,
109
- )
110
-
111
- try:
112
- # Get structured feedback using response_format with schema from Pydantic model
113
- response = client.text_generation(
114
- prompt=prompt,
115
- grammar={
116
- "type": "json_object",
117
- "value": CodeFeedback.model_json_schema(),
118
- },
119
- )
120
-
121
- # Parse response into Pydantic model
122
- feedback = CodeFeedback.model_validate_json(response)
123
-
124
- # Format the feedback for display
125
- formatted_feedback = [
126
- f"### Overall Assessment\n{feedback.overall_feedback}\n\n"
127
- ]
128
-
129
- for cf in feedback.criteria_feedback:
130
- tip = cf.improvement_tips or ""
131
- tip_text = f"\n💡 Tip: {tip}" if tip else ""
132
-
133
- formatted_feedback.append(
134
- f"### {cf.criterion}\n"
135
- f"{'✅' if cf.met else '❌'} {cf.explanation}"
136
- f"{tip_text}\n"
137
- )
138
-
139
- return {"feedback": "\n".join(formatted_feedback)}
140
-
141
- except Exception as e:
142
- gr.Warning(f"Error generating feedback: {str(e)}")
143
- return {"feedback": "Unable to generate detailed feedback due to an error."}
144
-
145
-
146
- def on_user_logged_in(token: gr.OAuthToken | None):
147
- """
148
- Handle user login state.
149
- On a valid token, hide the login button and reveal the Start button while keeping Next hidden.
150
- Also, clear the question text, code input, status, and image.
151
- """
152
- if token is not None:
153
- return (
154
- gr.update(visible=False), # login_btn hidden
155
- gr.update(visible=True), # start_btn shown
156
- gr.update(visible=False), # next_btn hidden
157
- "", # Clear question_text
158
- gr.update(value="", visible=False), # Clear code_input
159
- "", # Clear status_text
160
- gr.update(value="", visible=False), # Clear question_image
161
- )
162
- else:
163
- return (
164
- gr.update(visible=True), # login_btn visible
165
- gr.update(visible=False), # start_btn hidden
166
- gr.update(visible=False), # next_btn hidden
167
- "",
168
- gr.update(value="", visible=False),
169
- "",
170
- gr.update(value="", visible=False),
171
- )
172
-
173
-
174
- def handle_quiz(question_idx, user_answers, submitted_code, is_start):
175
- """Handle quiz state and progression"""
176
- if is_start:
177
- question_idx = 0
178
- else:
179
- # If not the first question and there's a submission, store it
180
- if question_idx < len(quiz_data) and submitted_code.strip():
181
- current_q = quiz_data[question_idx]
182
- # Format the submitted code before checking
183
- formatted_code = format_python_code(submitted_code)
184
- feedback_dict = check_code(
185
- formatted_code,
186
- current_q["solution"],
187
- current_q["challenge"],
188
- current_q["assessment_criteria"],
189
- )
190
- user_answers.append(
191
- {
192
- "challenge": current_q["challenge"],
193
- "submitted_code": formatted_code,
194
- "correct_solution": current_q["solution"],
195
- "assessment_criteria": current_q["assessment_criteria"],
196
- "feedback": feedback_dict["feedback"],
197
- }
198
- )
199
- question_idx += 1
200
-
201
- # If we've reached the end, show final results
202
- if question_idx >= len(quiz_data):
203
- results_text = """## Code Review Complete! 📚
204
- This feedback should help you improve your skills.
205
-
206
- ⛔️ The feedback uses Qwen/Qwen2.5-Coder-32B-Instruct to compare your response to a gold
207
- standard solution. As we know, LLMs are not perfect. You should compare your work against
208
- the assessment criteria if you doubt the feedback.
209
-
210
- Here's your detailed feedback:"""
211
-
212
- for idx, answer in enumerate(user_answers):
213
- # Format assessment criteria as bullet points
214
- criteria_bullets = "\n".join(
215
- f"- {c}" for c in answer["assessment_criteria"]
216
- )
217
-
218
- # Build the results text piece by piece
219
- results_text += (
220
- f"### Question {idx + 1}: {answer['challenge']}\n\n"
221
- "#### Your Solution:\n```python\n"
222
- f"{answer['submitted_code']}\n```\n\n"
223
- "#### Reference Solution:\n```python\n"
224
- f"{answer['correct_solution']}\n```\n\n"
225
- "#### Assessment Criteria:\n"
226
- f"{criteria_bullets}\n\n"
227
- "#### Feedback:\n"
228
- f"{answer['feedback']}\n\n"
229
- "---\n\n"
230
- )
231
-
232
- return (
233
- "", # question_text cleared
234
- gr.update(value="", visible=False), # hide code_input
235
- "Review your feedback below to improve your coding skills!",
236
- question_idx, # updated question index
237
- user_answers, # accumulated answers
238
- gr.update(visible=False), # start_btn hidden
239
- gr.update(visible=False), # next_btn hidden
240
- gr.update(value=results_text, visible=True), # final_markdown
241
- gr.update(visible=False), # question_image hidden
242
- )
243
- else:
244
- # Show the next question
245
- q = quiz_data[question_idx]
246
- # Format assessment criteria as bullet points
247
- criteria_bullets = "\n".join(f"- {c}" for c in q["assessment_criteria"])
248
- challenge_text = (
249
- f"## Question {question_idx + 1}\n\n"
250
- f"### Challenge:\n{q['challenge']}\n\n"
251
- "### Assessment Criteria:\n"
252
- f"{criteria_bullets}"
253
- )
254
-
255
- # Only show image if the feature exists and question has an image
256
- show_image = HAS_IMAGE_FEATURE and q.get("image") is not None
257
- image_update = gr.update(
258
- value=q.get("image") if show_image else None, visible=show_image
259
- )
260
-
261
- return (
262
- challenge_text, # question_text
263
- gr.update(value=q["placeholder"], visible=True), # code_input
264
- "Submit your solution and click 'Next' to continue.",
265
- question_idx, # updated question_idx
266
- user_answers, # user_answers
267
- gr.update(visible=False), # start_btn hidden
268
- gr.update(visible=True), # next_btn visible
269
- gr.update(visible=False), # final_markdown hidden
270
- image_update, # question_image
271
- )
272
-
273
-
274
- with gr.Blocks() as demo:
275
- demo.title = f"Coding Quiz: {EXAM_DATASET_ID}"
276
- # State variables
277
- question_idx = gr.State(value=0)
278
- user_answers = gr.State(value=[])
279
-
280
- with gr.Row(variant="compact"):
281
- intro_text = """
282
- ## Welcome to the smolagents code reviewer
283
-
284
- This application will review your smolagents code, and provide feedback on your solutions. This exercise is not reviewed or certified! It's about trying out smolagents for the first time.
285
-
286
- ℹ️ Log in first, then click 'Start' to begin. Complete each coding challenge and click 'Next' to proceed. You'll get feedback on your solutions at the end."""
287
- intro_text = gr.Markdown(intro_text)
288
- with gr.Row(variant="panel"):
289
- with gr.Column():
290
- question_text = gr.Markdown("")
291
- question_image = gr.Image(
292
- label="Question Image",
293
- visible=True if HAS_IMAGE_FEATURE else False,
294
- type="pil",
295
- ) # Add image component
296
- with gr.Column():
297
- code_input = gr.Code(
298
- language="python", label="Your Solution", visible=False
299
- )
300
-
301
- with gr.Row(variant="compact"):
302
- status_text = gr.Markdown("")
303
-
304
- with gr.Row(variant="compact"):
305
- login_btn = gr.LoginButton()
306
- start_btn = gr.Button("Start")
307
- next_btn = gr.Button("Next ⏭️", visible=False)
308
-
309
- with gr.Row(variant="compact"):
310
- final_markdown = gr.Markdown("", visible=False)
311
-
312
- login_btn.click(
313
- fn=on_user_logged_in,
314
- inputs=None,
315
- outputs=[
316
- login_btn,
317
- start_btn,
318
- next_btn,
319
- question_text,
320
- code_input,
321
- status_text,
322
- question_image,
323
- ],
324
- )
325
-
326
- start_btn.click(
327
- fn=handle_quiz,
328
- inputs=[question_idx, user_answers, code_input, gr.State(True)],
329
- outputs=[
330
- question_text, # Markdown with question text
331
- code_input, # Code input field
332
- status_text, # Status text (instructions/status messages)
333
- question_idx, # Updated question index (state)
334
- user_answers, # Updated user answers (state)
335
- start_btn, # Update for start button (will be hidden)
336
- next_btn, # Update for next button (shown for in-progress quiz)
337
- final_markdown, # Final results markdown (hidden until quiz ends)
338
- question_image, # Image update for the quiz question
339
- ],
340
- )
341
-
342
- next_btn.click(
343
- fn=handle_quiz,
344
- inputs=[question_idx, user_answers, code_input, gr.State(False)],
345
- outputs=[
346
- question_text,
347
- code_input,
348
- status_text,
349
- question_idx,
350
- user_answers,
351
- start_btn,
352
- next_btn,
353
- final_markdown,
354
- question_image,
355
- ],
356
- )
357
-
358
- if __name__ == "__main__":
359
- demo.launch()
 
1
+ from smolagents import DuckDuckGoSearchTool, LiteLLMModel, ToolCallingAgent
2
+ from e2b import Sandbox
 
 
 
 
 
3
 
4
+ model = LiteLLMModel(
5
+ model_id="groq/meta-llama/llama-4-maverick-17b-128e-instruct",
6
+ api_key="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"
7
+ )
8
 
 
 
 
9
 
10
+ sandbox = Sandbox(template="", api_key="e2b_2139e46aaa78c6a307d269c2ffe98b05b4164b22") # You can specify CPU, memory, timeout, etc.
 
 
 
 
11
 
 
 
12
 
13
+ # Create web agent and manager agent structure
14
+ web_agent = ToolCallingAgent(
15
+ tools=[DuckDuckGoSearchTool()], # Add required tools
16
+ model=model, # Add model
17
+ max_steps=5, # Adjust steps
18
+ name="My browser", # Add name
19
+ description="Searching is easy" # Add description
20
+ )
21
 
22
+ with sandbox:
23
+ result = web_agent.run("What are some recent breakthroughs in AI?")
24
+ print(result)