unit2_smolagents_quiz

Running

App Files Files Community

Update app.py

#17

by vesjanamimini - opened 6 days ago

base: refs/heads/main

←

from: refs/pr/17

Discussion Files changed

+18

-353

Files changed (1) hide show

app.py +18 -353

app.py CHANGED Viewed

@@ -1,359 +1,24 @@
-import os
-from typing import List, Optional
-from pydantic import BaseModel, Field
-import gradio as gr
-from datasets import load_dataset
-from huggingface_hub import InferenceClient
-import black
-# Initialize the inference client
-HF_TOKEN = os.getenv("HF_TOKEN")
-HF_API_URL = os.getenv("HF_API_URL", "Qwen/Qwen2.5-Coder-32B-Instruct")
-client = InferenceClient(model=HF_API_URL, token=HF_TOKEN)
-# Load questions from Hugging Face dataset
-EXAM_MAX_QUESTIONS = int(os.getenv("EXAM_MAX_QUESTIONS", 1))
-EXAM_DATASET_ID = "agents-course/smolagents-quiz-data"
-# prep the dataset for the quiz
-ds = load_dataset(EXAM_DATASET_ID, split="train", download_mode="force_redownload")
-quiz_data = list(ds)
-if EXAM_MAX_QUESTIONS:
-    quiz_data = quiz_data[:EXAM_MAX_QUESTIONS]
-# Check if dataset has image feature
-HAS_IMAGE_FEATURE = "image" in ds.features
-class CriterionFeedback(BaseModel):
-    """Feedback for a single assessment criterion"""
-    criterion: str = Field(..., description="The assessment criterion being evaluated")
-    met: bool = Field(..., description="Whether the criterion was met")
-    explanation: str = Field(
-        ..., description="Detailed explanation of how well the criterion was met"
-    )
-    improvement_tips: Optional[str] = Field(
-        None, description="Specific tips for improvement if needed"
-    )
-class CodeFeedback(BaseModel):
-    """Structured feedback for code submission"""
-    overall_feedback: str = Field(
-        ..., description="Overall assessment of the code solution"
-    )
-    criteria_feedback: List[CriterionFeedback] = Field(
-        ..., description="Detailed feedback for each assessment criterion"
-    )
-def format_python_code(code: str) -> str:
-    """Format Python code using black."""
-    try:
-        return black.format_str(code, mode=black.Mode())
-    except Exception as e:
-        gr.Warning(f"Code formatting failed: {str(e)}")
-        return code
-EVALUATION_TEMPLATE = """Evaluate this Python code solution:
-Challenge:
-{challenge}
-Reference Solution:
-```python
-{solution}
-```
-Student's Solution:
-```python
-{student_code}
-```
-Assessment Criteria:
-{criteria}
-Approach:
-Be highly tollerent of differences in approach, as long as they meet Assessment Criteria.
-Provide detailed feedback on how well each criterion was met."""
-def check_code(
-    user_code: str, solution: str, challenge: str, assessment_criteria: List[str]
-) -> dict:
-    """
-    Use LLM to evaluate the user's code solution and provide structured feedback.
-    """
-    # Format both user code and solution
-    formatted_user_code = format_python_code(user_code)
-    formatted_solution = format_python_code(solution)
-    # Format criteria as bullet points
-    criteria_text = "\n".join(f"- {c}" for c in assessment_criteria)
-    # Fill the template
-    prompt = EVALUATION_TEMPLATE.format(
-        challenge=challenge,
-        solution=formatted_solution,
-        student_code=formatted_user_code,
-        criteria=criteria_text,
-    )
-    try:
-        # Get structured feedback using response_format with schema from Pydantic model
-        response = client.text_generation(
-            prompt=prompt,
-            grammar={
-                "type": "json_object",
-                "value": CodeFeedback.model_json_schema(),
-            },
-        )
-        # Parse response into Pydantic model
-        feedback = CodeFeedback.model_validate_json(response)
-        # Format the feedback for display
-        formatted_feedback = [
-            f"### Overall Assessment\n{feedback.overall_feedback}\n\n"
-        ]
-        for cf in feedback.criteria_feedback:
-            tip = cf.improvement_tips or ""
-            tip_text = f"\n💡 Tip: {tip}" if tip else ""
-            formatted_feedback.append(
-                f"### {cf.criterion}\n"
-                f"{'✅' if cf.met else '❌'} {cf.explanation}"
-                f"{tip_text}\n"
-            )
-        return {"feedback": "\n".join(formatted_feedback)}
-    except Exception as e:
-        gr.Warning(f"Error generating feedback: {str(e)}")
-        return {"feedback": "Unable to generate detailed feedback due to an error."}
-def on_user_logged_in(token: gr.OAuthToken | None):
-    """
-    Handle user login state.
-    On a valid token, hide the login button and reveal the Start button while keeping Next hidden.
-    Also, clear the question text, code input, status, and image.
-    """
-    if token is not None:
-        return (
-            gr.update(visible=False),  # login_btn hidden
-            gr.update(visible=True),  # start_btn shown
-            gr.update(visible=False),  # next_btn hidden
-            "",  # Clear question_text
-            gr.update(value="", visible=False),  # Clear code_input
-            "",  # Clear status_text
-            gr.update(value="", visible=False),  # Clear question_image
-        )
-    else:
-        return (
-            gr.update(visible=True),  # login_btn visible
-            gr.update(visible=False),  # start_btn hidden
-            gr.update(visible=False),  # next_btn hidden
-            "",
-            gr.update(value="", visible=False),
-            "",
-            gr.update(value="", visible=False),
-        )
-def handle_quiz(question_idx, user_answers, submitted_code, is_start):
-    """Handle quiz state and progression"""
-    if is_start:
-        question_idx = 0
-    else:
-        # If not the first question and there's a submission, store it
-        if question_idx < len(quiz_data) and submitted_code.strip():
-            current_q = quiz_data[question_idx]
-            # Format the submitted code before checking
-            formatted_code = format_python_code(submitted_code)
-            feedback_dict = check_code(
-                formatted_code,
-                current_q["solution"],
-                current_q["challenge"],
-                current_q["assessment_criteria"],
-            )
-            user_answers.append(
-                {
-                    "challenge": current_q["challenge"],
-                    "submitted_code": formatted_code,
-                    "correct_solution": current_q["solution"],
-                    "assessment_criteria": current_q["assessment_criteria"],
-                    "feedback": feedback_dict["feedback"],
-                }
-            )
-        question_idx += 1
-    # If we've reached the end, show final results
-    if question_idx >= len(quiz_data):
-        results_text = """## Code Review Complete! 📚
-This feedback should help you improve your skills.
-⛔️ The feedback uses Qwen/Qwen2.5-Coder-32B-Instruct to compare your response to a gold
-standard solution. As we know, LLMs are not perfect. You should compare your work against
-the assessment criteria if you doubt the feedback.
-Here's your detailed feedback:"""
-        for idx, answer in enumerate(user_answers):
-            # Format assessment criteria as bullet points
-            criteria_bullets = "\n".join(
-                f"- {c}" for c in answer["assessment_criteria"]
-            )
-            # Build the results text piece by piece
-            results_text += (
-                f"### Question {idx + 1}: {answer['challenge']}\n\n"
-                "#### Your Solution:\n```python\n"
-                f"{answer['submitted_code']}\n```\n\n"
-                "#### Reference Solution:\n```python\n"
-                f"{answer['correct_solution']}\n```\n\n"
-                "#### Assessment Criteria:\n"
-                f"{criteria_bullets}\n\n"
-                "#### Feedback:\n"
-                f"{answer['feedback']}\n\n"
-                "---\n\n"
-            )
-        return (
-            "",  # question_text cleared
-            gr.update(value="", visible=False),  # hide code_input
-            "Review your feedback below to improve your coding skills!",
-            question_idx,  # updated question index
-            user_answers,  # accumulated answers
-            gr.update(visible=False),  # start_btn hidden
-            gr.update(visible=False),  # next_btn hidden
-            gr.update(value=results_text, visible=True),  # final_markdown
-            gr.update(visible=False),  # question_image hidden
-        )
-    else:
-        # Show the next question
-        q = quiz_data[question_idx]
-        # Format assessment criteria as bullet points
-        criteria_bullets = "\n".join(f"- {c}" for c in q["assessment_criteria"])
-        challenge_text = (
-            f"## Question {question_idx + 1}\n\n"
-            f"### Challenge:\n{q['challenge']}\n\n"
-            "### Assessment Criteria:\n"
-            f"{criteria_bullets}"
-        )
-        # Only show image if the feature exists and question has an image
-        show_image = HAS_IMAGE_FEATURE and q.get("image") is not None
-        image_update = gr.update(
-            value=q.get("image") if show_image else None, visible=show_image
-        )
-        return (
-            challenge_text,  # question_text
-            gr.update(value=q["placeholder"], visible=True),  # code_input
-            "Submit your solution and click 'Next' to continue.",
-            question_idx,  # updated question_idx
-            user_answers,  # user_answers
-            gr.update(visible=False),  # start_btn hidden
-            gr.update(visible=True),  # next_btn visible
-            gr.update(visible=False),  # final_markdown hidden
-            image_update,  # question_image
-        )
-with gr.Blocks() as demo:
-    demo.title = f"Coding Quiz: {EXAM_DATASET_ID}"
-    # State variables
-    question_idx = gr.State(value=0)
-    user_answers = gr.State(value=[])
-    with gr.Row(variant="compact"):
-        intro_text = """
-## Welcome to the smolagents code reviewer
-This application will review your smolagents code, and provide feedback on your solutions. This exercise is not reviewed or certified! It's about trying out smolagents for the first time.
-ℹ️ Log in first, then click 'Start' to begin. Complete each coding challenge and click 'Next' to proceed. You'll get feedback on your solutions at the end."""
-        intro_text = gr.Markdown(intro_text)
-    with gr.Row(variant="panel"):
-        with gr.Column():
-            question_text = gr.Markdown("")
-            question_image = gr.Image(
-                label="Question Image",
-                visible=True if HAS_IMAGE_FEATURE else False,
-                type="pil",
-            )  # Add image component
-        with gr.Column():
-            code_input = gr.Code(
-                language="python", label="Your Solution", visible=False
-            )
-    with gr.Row(variant="compact"):
-        status_text = gr.Markdown("")
-    with gr.Row(variant="compact"):
-        login_btn = gr.LoginButton()
-        start_btn = gr.Button("Start")
-        next_btn = gr.Button("Next ⏭️", visible=False)
-    with gr.Row(variant="compact"):
-        final_markdown = gr.Markdown("", visible=False)
-    login_btn.click(
-        fn=on_user_logged_in,
-        inputs=None,
-        outputs=[
-            login_btn,
-            start_btn,
-            next_btn,
-            question_text,
-            code_input,
-            status_text,
-            question_image,
-        ],
-    )
-    start_btn.click(
-        fn=handle_quiz,
-        inputs=[question_idx, user_answers, code_input, gr.State(True)],
-        outputs=[
-            question_text,  # Markdown with question text
-            code_input,  # Code input field
-            status_text,  # Status text (instructions/status messages)
-            question_idx,  # Updated question index (state)
-            user_answers,  # Updated user answers (state)
-            start_btn,  # Update for start button (will be hidden)
-            next_btn,  # Update for next button (shown for in-progress quiz)
-            final_markdown,  # Final results markdown (hidden until quiz ends)
-            question_image,  # Image update for the quiz question
-        ],
-    )
-    next_btn.click(
-        fn=handle_quiz,
-        inputs=[question_idx, user_answers, code_input, gr.State(False)],
-        outputs=[
-            question_text,
-            code_input,
-            status_text,
-            question_idx,
-            user_answers,
-            start_btn,
-            next_btn,
-            final_markdown,
-            question_image,
-        ],
-    )
-if __name__ == "__main__":
-    demo.launch()

+from smolagents import DuckDuckGoSearchTool, LiteLLMModel, ToolCallingAgent
+from e2b import Sandbox
+model = LiteLLMModel(
+model_id="groq/meta-llama/llama-4-maverick-17b-128e-instruct",
+api_key="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"
+)
+sandbox = Sandbox(template="", api_key="e2b_2139e46aaa78c6a307d269c2ffe98b05b4164b22")  # You can specify CPU, memory, timeout, etc.
+# Create web agent and manager agent structure
+web_agent = ToolCallingAgent(
+    tools=[DuckDuckGoSearchTool()],           # Add required tools
+    model=model,         # Add model
+    max_steps=5,        # Adjust steps
+    name="My browser",           # Add name
+    description="Searching is easy"      # Add description
+)
+with sandbox:
+    result = web_agent.run("What are some recent breakthroughs in AI?")
+    print(result)