Spaces:

DawnC
/

VisionScout

Running on Zero

App Files Files Community

DawnC commited on 3 days ago

Commit

e4aac34

verified ·

1 Parent(s): 4a77aaa

Upload 6 files

Browse files

Files changed (6) hide show

app.py +555 -0
color_mapper.py +270 -0
detection_model.py +164 -0
evaluation_metrics.py +323 -0
requirements.txt +8 -0
visualization_helper.py +147 -0

app.py ADDED Viewed

	@@ -0,0 +1,555 @@

+import os
+import numpy as np
+import torch
+import cv2
+import matplotlib.pyplot as plt
+import gradio as gr
+import io
+from PIL import Image, ImageDraw, ImageFont
+import spaces
+from typing import Dict, List, Any, Optional, Tuple
+from ultralytics import YOLO
+from detection_model import DetectionModel
+from color_mapper import ColorMapper
+from visualization_helper import VisualizationHelper
+from evaluation_metrics import EvaluationMetrics
+color_mapper = ColorMapper()
+model_instances = {}
+@spaces.GPU
+def process_image(image, model_instance, confidence_threshold, filter_classes=None):
+    """
+    Process an image for object detection
+    Args:
+        image: Input image (numpy array or PIL Image)
+        model_instance: DetectionModel instance to use
+        confidence_threshold: Confidence threshold for detection
+        filter_classes: Optional list of classes to filter results
+    Returns:
+        Tuple of (result_image, result_text, stats_data)
+    """
+    # initialize key variables
+    result = None
+    stats = {}
+    temp_path = None
+    try:
+        # update confidence threshold
+        model_instance.confidence = confidence_threshold
+        # processing input image
+        if isinstance(image, np.ndarray):
+            # Convert BGR to RGB if needed
+            if image.shape[2] == 3:
+                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            else:
+                image_rgb = image
+            pil_image = Image.fromarray(image_rgb)
+        elif image is None:
+            return None, "No image provided. Please upload an image.", {}
+        else:
+            pil_image = image
+        # store temp files
+        import uuid
+        import tempfile
+        temp_dir = tempfile.gettempdir()  # use system temp directory
+        temp_filename = f"temp_{uuid.uuid4().hex}.jpg"
+        temp_path = os.path.join(temp_dir, temp_filename)
+        pil_image.save(temp_path)
+        # object detection
+        result = model_instance.detect(temp_path)
+        if result is None:
+            return None, "Detection failed. Please try again with a different image.", {}
+        # calculate stats
+        stats = EvaluationMetrics.calculate_basic_stats(result)
+        # add space calculation
+        spatial_metrics = EvaluationMetrics.calculate_distance_metrics(result)
+        stats["spatial_metrics"] = spatial_metrics
+        if filter_classes and len(filter_classes) > 0:
+            # get classes, boxes, confidence
+            classes = result.boxes.cls.cpu().numpy().astype(int)
+            confs = result.boxes.conf.cpu().numpy()
+            boxes = result.boxes.xyxy.cpu().numpy()
+            mask = np.zeros_like(classes, dtype=bool)
+            for cls_id in filter_classes:
+                mask = np.logical_or(mask, classes == cls_id)
+            filtered_stats = {
+                "total_objects": int(np.sum(mask)),
+                "class_statistics": {},
+                "average_confidence": float(np.mean(confs[mask])) if np.any(mask) else 0,
+                "spatial_metrics": stats["spatial_metrics"]
+            }
+            # update stats
+            names = result.names
+            for cls, conf in zip(classes[mask], confs[mask]):
+                cls_name = names[int(cls)]
+                if cls_name not in filtered_stats["class_statistics"]:
+                    filtered_stats["class_statistics"][cls_name] = {
+                        "count": 0,
+                        "average_confidence": 0
+                    }
+                filtered_stats["class_statistics"][cls_name]["count"] += 1
+                filtered_stats["class_statistics"][cls_name]["average_confidence"] = conf
+            stats = filtered_stats
+        viz_data = EvaluationMetrics.generate_visualization_data(
+            result,
+            color_mapper.get_all_colors()
+        )
+        result_image = VisualizationHelper.visualize_detection(
+            temp_path, result, color_mapper=color_mapper, figsize=(12, 12), return_pil=True
+        )
+        result_text = EvaluationMetrics.format_detection_summary(viz_data)
+        return result_image, result_text, stats
+    except Exception as e:
+        error_message = f"Error Occurs: {str(e)}"
+        import traceback
+        traceback.print_exc()
+        print(error_message)
+        return None, error_message, {}
+    finally:
+        if temp_path and os.path.exists(temp_path):
+            try:
+                os.remove(temp_path)
+            except Exception as e:
+                print(f"Cannot delete temp files {temp_path}: {str(e)}")
+def format_result_text(stats):
+    """Format detection statistics into readable text"""
+    if not stats or "total_objects" not in stats:
+        return "No objects detected."
+    lines = [
+        f"Detected {stats['total_objects']} objects.",
+        f"Average confidence: {stats.get('average_confidence', 0):.2f}",
+        "",
+        "Objects by class:",
+    ]
+    if "class_statistics" in stats and stats["class_statistics"]:
+        # Sort classes by count
+        sorted_classes = sorted(
+            stats["class_statistics"].items(),
+            key=lambda x: x[1]["count"],
+            reverse=True
+        )
+        for cls_name, cls_stats in sorted_classes:
+            lines.append(f"• {cls_name}: {cls_stats['count']} (avg conf: {cls_stats.get('average_confidence', 0):.2f})")
+    else:
+        lines.append("No class information available.")
+    return "\n".join(lines)
+def get_all_classes():
+    """Get all available COCO classes"""
+    try:
+        class_names = model.class_names
+        return [(idx, name) for idx, name in class_names.items()]
+    except:
+        # Fallback to standard COCO classes
+        return [
+            (0, 'person'), (1, 'bicycle'), (2, 'car'), (3, 'motorcycle'), (4, 'airplane'),
+            (5, 'bus'), (6, 'train'), (7, 'truck'), (8, 'boat'), (9, 'traffic light'),
+            (10, 'fire hydrant'), (11, 'stop sign'), (12, 'parking meter'), (13, 'bench'),
+            (14, 'bird'), (15, 'cat'), (16, 'dog'), (17, 'horse'), (18, 'sheep'), (19, 'cow'),
+            (20, 'elephant'), (21, 'bear'), (22, 'zebra'), (23, 'giraffe'), (24, 'backpack'),
+            (25, 'umbrella'), (26, 'handbag'), (27, 'tie'), (28, 'suitcase'), (29, 'frisbee'),
+            (30, 'skis'), (31, 'snowboard'), (32, 'sports ball'), (33, 'kite'), (34, 'baseball bat'),
+            (35, 'baseball glove'), (36, 'skateboard'), (37, 'surfboard'), (38, 'tennis racket'),
+            (39, 'bottle'), (40, 'wine glass'), (41, 'cup'), (42, 'fork'), (43, 'knife'),
+            (44, 'spoon'), (45, 'bowl'), (46, 'banana'), (47, 'apple'), (48, 'sandwich'),
+            (49, 'orange'), (50, 'broccoli'), (51, 'carrot'), (52, 'hot dog'), (53, 'pizza'),
+            (54, 'donut'), (55, 'cake'), (56, 'chair'), (57, 'couch'), (58, 'potted plant'),
+            (59, 'bed'), (60, 'dining table'), (61, 'toilet'), (62, 'tv'), (63, 'laptop'),
+            (64, 'mouse'), (65, 'remote'), (66, 'keyboard'), (67, 'cell phone'), (68, 'microwave'),
+            (69, 'oven'), (70, 'toaster'), (71, 'sink'), (72, 'refrigerator'), (73, 'book'),
+            (74, 'clock'), (75, 'vase'), (76, 'scissors'), (77, 'teddy bear'), (78, 'hair drier'),
+            (79, 'toothbrush')
+        ]
+def create_interface():
+    """Create the Gradio interface"""
+    # Get CSS styles
+    css = """
+    body {
+        font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
+        background: linear-gradient(120deg, #e0f7fa, #b2ebf2);
+        margin: 0;
+        padding: 0;
+    }
+    .gradio-container {
+        max-width: 1200px !important;
+    }
+    .app-header {
+        text-align: center;
+        margin-bottom: 2rem;
+        background: rgba(255, 255, 255, 0.8);
+        padding: 1.5rem;
+        border-radius: 10px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+    .app-title {
+        color: #2D3748;
+        font-size: 2.5rem;
+        margin-bottom: 0.5rem;
+        background: linear-gradient(90deg, #4299e1, #48bb78);
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+    }
+    .app-subtitle {
+        color: #4A5568;
+        font-size: 1.2rem;
+        font-weight: normal;
+        margin-top: 0.25rem;
+    }
+    .app-divider {
+        width: 50px;
+        height: 3px;
+        background: linear-gradient(90deg, #4299e1, #48bb78);
+        margin: 1rem auto;
+    }
+    .input-panel, .output-panel {
+        background: white;
+        border-radius: 10px;
+        padding: 1rem;
+        box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
+    }
+    .detect-btn {
+        background: linear-gradient(90deg, #4299e1, #48bb78) !important;
+        color: white !important;
+        border: none !important;
+        transition: transform 0.3s, box-shadow 0.3s !important;
+    }
+    .detect-btn:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2) !important;
+    }
+    .detect-btn:active {
+        transform: translateY(1px) !important;
+        box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2) !important;
+    }
+    .footer {
+        text-align: center;
+        margin-top: 2rem;
+        font-size: 0.9rem;
+        color: #4A5568;
+    }
+    /* Responsive adjustments */
+    @media (max-width: 768px) {
+        .app-title {
+            font-size: 2rem;
+        }
+        .app-subtitle {
+            font-size: 1rem;
+        }
+    }
+    """
+    # get the models info
+    available_models = DetectionModel.get_available_models()
+    model_choices = [model["model_file"] for model in available_models]
+    model_labels = [f"{model['name']} - {model['inference_speed']}" for model in available_models]
+    # Available classes for filtering
+    available_classes = get_all_classes()
+    class_choices = [f"{id}: {name}" for id, name in available_classes]
+    # Create Gradio Blocks interface
+    with gr.Blocks(css=css) as demo:
+        # Header
+        with gr.Group(elem_classes="app-header"):
+            gr.HTML("""
+                <h1 class="app-title">VisionScout</h1>
+                <h2 class="app-subtitle">Detect and identify objects in your images</h2>
+                <div class="app-divider"></div>
+            """)
+        current_model = gr.State("yolov8m.pt")  # use medium size as default
+        # Input and Output panels
+        with gr.Row():
+            # Left column - Input controls
+            with gr.Column(scale=4, elem_classes="input-panel"):
+                with gr.Group():
+                    gr.Markdown("### Upload Image")
+                    image_input = gr.Image(type="pil", label="Upload an image")
+                    with gr.Accordion("Advanced Settings", open=False):
+                        with gr.Row():
+                            model_dropdown = gr.Dropdown(
+                                choices=model_choices,
+                                value="yolov8m.pt",
+                                label="Select Model",
+                                info="Choose different models based on your needs for speed vs. accuracy"
+                            )
+                        # display model info
+                        model_info = gr.Markdown(DetectionModel.get_model_description("yolov8m.pt"))
+                        confidence = gr.Slider(
+                            minimum=0.1,
+                            maximum=0.9,
+                            value=0.25,
+                            step=0.05,
+                            label="Confidence Threshold",
+                            info="Higher values show fewer but more confident detections"
+                        )
+                        with gr.Accordion("Filter Classes", open=False):
+                            # Common object categories
+                            with gr.Row():
+                                people_btn = gr.Button("People")
+                                vehicles_btn = gr.Button("Vehicles")
+                                animals_btn = gr.Button("Animals")
+                                objects_btn = gr.Button("Common Objects")
+                            # Class selection
+                            class_filter = gr.Dropdown(
+                                choices=class_choices,
+                                multiselect=True,
+                                label="Select Classes to Display",
+                                info="Leave empty to show all detected objects"
+                            )
+                    detect_btn = gr.Button("Detect Objects", variant="primary", elem_classes="detect-btn")
+                with gr.Group():
+                    gr.Markdown("### How to Use")
+                    gr.Markdown("""
+                    1. Upload an image or use the camera
+                    2. Adjust confidence threshold if needed
+                    3. Optionally filter to specific object classes
+                    4. Click "Detect Objects" button
+                    The model will identify objects in your image and display them with bounding boxes.
+                    **Note:** Detection quality depends on image clarity and object visibility. The model can detect up to 80 different types of common objects.
+                    """)
+            # Right column - Results display
+            with gr.Column(scale=6, elem_classes="output-panel"):
+                with gr.Tab("Detection Result"):
+                    result_image = gr.Image(type="pil", label="Detection Result")
+                    result_text = gr.Textbox(label="Detection Details", lines=10)
+                with gr.Tab("Statistics"):
+                    with gr.Row():
+                        with gr.Column(scale=1):
+                            stats_json = gr.Json(label="Full Statistics")
+                        with gr.Column(scale=1):
+                            gr.Markdown("### Object Distribution")
+                            plot_output = gr.Plot(label="Object Distribution")
+        # model option
+        model_dropdown.change(
+            fn=lambda model: (model, DetectionModel.get_model_description(model)),
+            inputs=[model_dropdown],
+            outputs=[current_model, model_info]
+        )
+        # change the buttom of different model
+        detect_btn.click(
+            fn=lambda img, model, conf, classes: process_and_plot(img, model, conf, classes),
+            inputs=[image_input, current_model, confidence, class_filter],
+            outputs=[result_image, result_text, stats_json, plot_output]
+        )
+        # Quick filter buttons
+        people_classes = [0]  # Person
+        vehicles_classes = [1, 2, 3, 4, 5, 6, 7, 8]  # Various vehicles
+        animals_classes = list(range(14, 24))  # Animals in COCO
+        common_objects = [41, 42, 43, 44, 45, 67, 73, 74, 76]  # Common household items
+        people_btn.click(
+            lambda: [f"{id}: {name}" for id, name in available_classes if id in people_classes],
+            outputs=class_filter
+        )
+        vehicles_btn.click(
+            lambda: [f"{id}: {name}" for id, name in available_classes if id in vehicles_classes],
+            outputs=class_filter
+        )
+        animals_btn.click(
+            lambda: [f"{id}: {name}" for id, name in available_classes if id in animals_classes],
+            outputs=class_filter
+        )
+        objects_btn.click(
+            lambda: [f"{id}: {name}" for id, name in available_classes if id in common_objects],
+            outputs=class_filter
+        )
+        # Set up example images
+        example_images = [
+            "room_01.jpg",
+            "street_01.jpg",
+            "street_02.jpg",
+            "street_03.jpg"
+        ]
+        gr.Examples(
+            examples=example_images,
+            inputs=image_input,
+            outputs=None,
+            fn=None,
+            cache_examples=False,
+        )
+        # Footer
+        gr.HTML("""
+            <div class="footer">
+                <p>Powered by YOLOv8 and Ultralytics • Created with Gradio</p>
+                <p>Model can detect 80 different classes of objects</p>
+            </div>
+        """)
+    return demo
+@spaces.GPU
+def process_and_plot(image, model_name, confidence_threshold, filter_classes=None):
+    """
+    Process image and create plots for statistics
+    Args:
+        image: Input image
+        model_name: Name of the model to use
+        confidence_threshold: Confidence threshold for detection
+        filter_classes: Optional list of classes to filter results
+    Returns:
+        Tuple of (result_image, result_text, stats_json, plot_figure)
+    """
+    global model_instances
+    if model_name not in model_instances:
+        print(f"Creating new model instance for {model_name}")
+        model_instances[model_name] = DetectionModel(model_name=model_name, confidence=confidence_threshold, iou=0.45)
+    else:
+        print(f"Using existing model instance for {model_name}")
+        model_instances[model_name].confidence = confidence_threshold
+    class_ids = None
+    if filter_classes:
+        class_ids = []
+        for class_str in filter_classes:
+            try:
+                # Extract ID from format "id: name"
+                class_id = int(class_str.split(":")[0].strip())
+                class_ids.append(class_id)
+            except:
+                continue
+    # execute detection
+    result_image, result_text, stats = process_image(
+        image,
+        model_instances[model_name],
+        confidence_threshold,
+        class_ids
+    )
+    # create stats table
+    plot_figure = create_stats_plot(stats)
+    return result_image, result_text, stats, plot_figure
+def create_stats_plot(stats):
+    """
+    Create a visualization of statistics data
+    Args:
+        stats: Dictionary containing detection statistics
+    Returns:
+        Matplotlib figure with visualization
+    """
+    if not stats or "class_statistics" not in stats or not stats["class_statistics"]:
+        # Create empty plot if no data
+        fig, ax = plt.subplots(figsize=(8, 6))
+        ax.text(0.5, 0.5, "No detection data available",
+                ha='center', va='center', fontsize=12)
+        ax.set_xlim(0, 1)
+        ax.set_ylim(0, 1)
+        ax.axis('off')
+        return fig
+    # preparing visualization data
+    viz_data = {
+        "total_objects": stats.get("total_objects", 0),
+        "average_confidence": stats.get("average_confidence", 0),
+        "class_data": []
+    }
+    # get current model classes
+    # This uses the get_all_classes function which should retrieve from the current model
+    available_classes = dict(get_all_classes())
+    # process class data
+    for cls_name, cls_stats in stats.get("class_statistics", {}).items():
+        # search for class ID
+        class_id = -1
+        # Try to find the class ID from class names
+        for id, name in available_classes.items():
+            if name == cls_name:
+                class_id = id
+                break
+        cls_data = {
+            "name": cls_name,
+            "class_id": class_id,
+            "count": cls_stats.get("count", 0),
+            "average_confidence": cls_stats.get("average_confidence", 0),
+            "color": color_mapper.get_color(class_id if class_id >= 0 else cls_name)
+        }
+        viz_data["class_data"].append(cls_data)
+    # Sort by count in descending order
+    viz_data["class_data"].sort(key=lambda x: x["count"], reverse=True)
+    return EvaluationMetrics.create_stats_plot(viz_data)
+if __name__ == "__main__":
+    import time
+    demo = create_interface()
+    demo.launch()

color_mapper.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import numpy as np
+from typing import Dict, List, Tuple, Union, Any
+class ColorMapper:
+    """
+    A class for consistent color mapping of object detection classes
+    Provides color schemes for visualization in both RGB and hex formats
+    """
+    # Class categories for better organization
+    CATEGORIES = {
+        "person": [0],
+        "vehicles": [1, 2, 3, 4, 5, 6, 7, 8],
+        "traffic": [9, 10, 11, 12],
+        "animals": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
+        "outdoor": [13, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33],
+        "sports": [34, 35, 36, 37, 38],
+        "kitchen": [39, 40, 41, 42, 43, 44, 45],
+        "food": [46, 47, 48, 49, 50, 51, 52, 53, 54, 55],
+        "furniture": [56, 57, 58, 59, 60, 61],
+        "electronics": [62, 63, 64, 65, 66, 67, 68, 69, 70],
+        "household": [71, 72, 73, 74, 75, 76, 77, 78, 79]
+    }
+    # Base colors for each category (in HSV for easier variation)
+    CATEGORY_COLORS = {
+        "person": (0, 0.8, 0.9),       # Red
+        "vehicles": (210, 0.8, 0.9),   # Blue
+        "traffic": (45, 0.8, 0.9),     # Orange
+        "animals": (120, 0.7, 0.8),    # Green
+        "outdoor": (180, 0.7, 0.9),    # Cyan
+        "sports": (270, 0.7, 0.8),     # Purple
+        "kitchen": (30, 0.7, 0.9),     # Light Orange
+        "food": (330, 0.7, 0.85),      # Pink
+        "furniture": (150, 0.5, 0.85), # Light Green
+        "electronics": (240, 0.6, 0.9), # Light Blue
+        "household": (60, 0.6, 0.9)    # Yellow
+    }
+    def __init__(self):
+        """Initialize the ColorMapper with COCO class mappings"""
+        self.class_names = self._get_coco_classes()
+        self.color_map = self._generate_color_map()
+    def _get_coco_classes(self) -> Dict[int, str]:
+        """Get the standard COCO class names with their IDs"""
+        return {
+            0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane',
+            5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light',
+            10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',
+            14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
+            20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack',
+            25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee',
+            30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat',
+            35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket',
+            39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife',
+            44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich',
+            49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza',
+            54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant',
+            59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop',
+            64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave',
+            69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book',
+            74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier',
+            79: 'toothbrush'
+        }
+    def _hsv_to_rgb(self, h: float, s: float, v: float) -> Tuple[int, int, int]:
+        """
+        Convert HSV color to RGB
+        Args:
+            h: Hue (0-360)
+            s: Saturation (0-1)
+            v: Value (0-1)
+        Returns:
+            Tuple of (R, G, B) values (0-255)
+        """
+        h = h / 60
+        i = int(h)
+        f = h - i
+        p = v * (1 - s)
+        q = v * (1 - s * f)
+        t = v * (1 - s * (1 - f))
+        if i == 0:
+            r, g, b = v, t, p
+        elif i == 1:
+            r, g, b = q, v, p
+        elif i == 2:
+            r, g, b = p, v, t
+        elif i == 3:
+            r, g, b = p, q, v
+        elif i == 4:
+            r, g, b = t, p, v
+        else:
+            r, g, b = v, p, q
+        return (int(r * 255), int(g * 255), int(b * 255))
+    def _rgb_to_hex(self, rgb: Tuple[int, int, int]) -> str:
+        """
+        Convert RGB color to hex color code
+        Args:
+            rgb: Tuple of (R, G, B) values (0-255)
+        Returns:
+            Hex color code (e.g. '#FF0000')
+        """
+        return f'#{rgb[0]:02x}{rgb[1]:02x}{rgb[2]:02x}'
+    def _find_category(self, class_id: int) -> str:
+        """
+        Find the category for a given class ID
+        Args:
+            class_id: Class ID (0-79)
+        Returns:
+            Category name
+        """
+        for category, ids in self.CATEGORIES.items():
+            if class_id in ids:
+                return category
+        return "other"  # Fallback
+    def _generate_color_map(self) -> Dict:
+        """
+        Generate a color map for all 80 COCO classes
+        Returns:
+            Dictionary mapping class IDs and names to color values
+        """
+        color_map = {
+            'by_id': {},      # Map class ID to RGB and hex
+            'by_name': {},    # Map class name to RGB and hex
+            'categories': {}  # Map category to base color
+        }
+        # Generate colors for categories
+        for category, hsv in self.CATEGORY_COLORS.items():
+            rgb = self._hsv_to_rgb(hsv[0], hsv[1], hsv[2])
+            hex_color = self._rgb_to_hex(rgb)
+            color_map['categories'][category] = {
+                'rgb': rgb,
+                'hex': hex_color
+            }
+        # Generate variations for each class within a category
+        for class_id, class_name in self.class_names.items():
+            category = self._find_category(class_id)
+            base_hsv = self.CATEGORY_COLORS.get(category, (0, 0, 0.8))  # Default gray
+            # Slightly vary the hue and saturation within the category
+            ids_in_category = self.CATEGORIES.get(category, [])
+            if ids_in_category:
+                position = ids_in_category.index(class_id) if class_id in ids_in_category else 0
+                variation = position / max(1, len(ids_in_category) - 1)  # 0 to 1
+                # Vary hue slightly (±15°) and saturation
+                h_offset = 30 * variation - 15  # -15 to +15
+                s_offset = 0.2 * variation  # 0 to 0.2
+                h = (base_hsv[0] + h_offset) % 360
+                s = min(1.0, base_hsv[1] + s_offset)
+                v = base_hsv[2]
+            else:
+                h, s, v = base_hsv
+            rgb = self._hsv_to_rgb(h, s, v)
+            hex_color = self._rgb_to_hex(rgb)
+            # Store in both mappings
+            color_map['by_id'][class_id] = {
+                'rgb': rgb,
+                'hex': hex_color,
+                'category': category
+            }
+            color_map['by_name'][class_name] = {
+                'rgb': rgb,
+                'hex': hex_color,
+                'category': category
+            }
+        return color_map
+    def get_color(self, class_identifier: Union[int, str], format: str = 'hex') -> Any:
+        """
+        Get color for a specific class
+        Args:
+            class_identifier: Class ID (int) or name (str)
+            format: Color format ('hex', 'rgb', or 'bgr')
+        Returns:
+            Color in requested format
+        """
+        # Determine if identifier is an ID or name
+        if isinstance(class_identifier, int):
+            color_info = self.color_map['by_id'].get(class_identifier)
+        else:
+            color_info = self.color_map['by_name'].get(class_identifier)
+        if not color_info:
+            # Fallback color if not found
+            return '#CCCCCC' if format == 'hex' else (204, 204, 204)
+        if format == 'hex':
+            return color_info['hex']
+        elif format == 'rgb':
+            return color_info['rgb']
+        elif format == 'bgr':
+            # Convert RGB to BGR for OpenCV
+            r, g, b = color_info['rgb']
+            return (b, g, r)
+        else:
+            return color_info['rgb']
+    def get_all_colors(self, format: str = 'hex') -> Dict:
+        """
+        Get all colors in the specified format
+        Args:
+            format: Color format ('hex', 'rgb', or 'bgr')
+        Returns:
+            Dictionary mapping class names to colors
+        """
+        result = {}
+        for class_id, class_name in self.class_names.items():
+            result[class_name] = self.get_color(class_id, format)
+        return result
+    def get_category_colors(self, format: str = 'hex') -> Dict:
+        """
+        Get base colors for each category
+        Args:
+            format: Color format ('hex', 'rgb', or 'bgr')
+        Returns:
+            Dictionary mapping categories to colors
+        """
+        result = {}
+        for category, color_info in self.color_map['categories'].items():
+            if format == 'hex':
+                result[category] = color_info['hex']
+            elif format == 'bgr':
+                r, g, b = color_info['rgb']
+                result[category] = (b, g, r)
+            else:
+                result[category] = color_info['rgb']
+        return result
+    def get_category_for_class(self, class_identifier: Union[int, str]) -> str:
+        """
+        Get the category for a specific class
+        Args:
+            class_identifier: Class ID (int) or name (str)
+        Returns:
+            Category name
+        """
+        if isinstance(class_identifier, int):
+            return self.color_map['by_id'].get(class_identifier, {}).get('category', 'other')
+        else:
+            return self.color_map['by_name'].get(class_identifier, {}).get('category', 'other')

detection_model.py ADDED Viewed

	@@ -0,0 +1,164 @@

+from ultralytics import YOLO
+from typing import Any, List, Dict, Optional
+import torch
+import numpy as np
+import os
+class DetectionModel:
+    """Core detection model class for object detection using YOLOv8"""
+    # Model information dictionary
+    MODEL_INFO = {
+        "yolov8n.pt": {
+            "name": "YOLOv8n (Nano)",
+            "description": "Fastest model with smallest size (3.2M parameters). Best for speed-critical applications.",
+            "size_mb": 6,
+            "inference_speed": "Very Fast"
+        },
+        "yolov8m.pt": {
+            "name": "YOLOv8m (Medium)",
+            "description": "Balanced model with good accuracy-speed tradeoff (25.9M parameters). Recommended for general use.",
+            "size_mb": 25,
+            "inference_speed": "Medium"
+        },
+        "yolov8x.pt": {
+            "name": "YOLOv8x (XLarge)",
+            "description": "Most accurate but slower model (68.2M parameters). Best for accuracy-critical applications.",
+            "size_mb": 68,
+            "inference_speed": "Slower"
+        }
+    }
+    def __init__(self, model_name: str = 'yolov8m.pt', confidence: float = 0.25, iou: float = 0.45):
+        """
+        Initialize the detection model
+        Args:
+            model_name: Model name or path, default is yolov8m.pt
+            confidence: Confidence threshold, default is 0.25
+            iou: IoU threshold for non-maximum suppression, default is 0.45
+        """
+        self.model_name = model_name
+        self.confidence = confidence
+        self.iou = iou
+        self.model = None
+        self.class_names = {}
+        self.is_model_loaded = False
+        # Load model on initialization
+        self._load_model()
+    def _load_model(self):
+        """Load the YOLO model"""
+        try:
+            print(f"Loading model: {self.model_name}")
+            self.model = YOLO(self.model_name)
+            self.class_names = self.model.names
+            self.is_model_loaded = True
+            print(f"Successfully loaded model: {self.model_name}")
+            print(f"Number of classes the model can recognize: {len(self.class_names)}")
+        except Exception as e:
+            print(f"Error occurred when loading the model: {e}")
+            self.is_model_loaded = False
+    def change_model(self, new_model_name: str) -> bool:
+        """
+        Change the currently loaded model
+        Args:
+            new_model_name: Name of the new model to load
+        Returns:
+            bool: True if model changed successfully, False otherwise
+        """
+        if self.model_name == new_model_name and self.is_model_loaded:
+            print(f"Model {new_model_name} is already loaded")
+            return True
+        print(f"Changing model from {self.model_name} to {new_model_name}")
+        # Unload current model to free memory
+        if self.model is not None:
+            del self.model
+            self.model = None
+            # Clean GPU memory if available
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        # Update model name and load new model
+        self.model_name = new_model_name
+        self._load_model()
+        return self.is_model_loaded
+    def reload_model(self):
+        """Reload the model (useful for changing model or after error)"""
+        if self.model is not None:
+            del self.model
+            self.model = None
+            # Clean GPU memory if available
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        self._load_model()
+    def detect(self, image_input: Any) -> Optional[Any]:
+        """
+        Perform object detection on a single image
+        Args:
+            image_input: Image path (str), PIL Image, or numpy array
+        Returns:
+            Detection result object or None if error occurred
+        """
+        if self.model is None or not self.is_model_loaded:
+            print("Model not found or not loaded. Attempting to reload...")
+            self._load_model()
+            if self.model is None or not self.is_model_loaded:
+                print("Failed to load model. Cannot perform detection.")
+                return None
+        try:
+            results = self.model(image_input, conf=self.confidence, iou=self.iou)
+            return results[0]
+        except Exception as e:
+            print(f"Error occurred during detection: {e}")
+            return None
+    def get_class_names(self, class_id: int) -> str:
+        """Get class name for a given class ID"""
+        return self.class_names.get(class_id, "Unknown Class")
+    def get_supported_classes(self) -> Dict[int, str]:
+        """Get all supported classes as a dictionary of {id: class_name}"""
+        return self.class_names
+    @classmethod
+    def get_available_models(cls) -> List[Dict]:
+        """
+        Get list of available models with their information
+        Returns:
+            List of dictionaries containing model information
+        """
+        models = []
+        for model_file, info in cls.MODEL_INFO.items():
+            models.append({
+                "model_file": model_file,
+                "name": info["name"],
+                "description": info["description"],
+                "size_mb": info["size_mb"],
+                "inference_speed": info["inference_speed"]
+            })
+        return models
+    @classmethod
+    def get_model_description(cls, model_name: str) -> str:
+        """Get description for a specific model"""
+        if model_name in cls.MODEL_INFO:
+            info = cls.MODEL_INFO[model_name]
+            return f"{info['name']}: {info['description']} (Size: ~{info['size_mb']}MB, Speed: {info['inference_speed']})"
+        return "Model information not available"

evaluation_metrics.py ADDED Viewed

	@@ -0,0 +1,323 @@

+import numpy as np
+import matplotlib.pyplot as plt
+from typing import Dict, List, Any, Optional, Tuple
+class EvaluationMetrics:
+    """Class for computing detection metrics, generating statistics and visualization data"""
+    @staticmethod
+    def calculate_basic_stats(result: Any) -> Dict:
+        """
+        Calculate basic statistics for a single detection result
+        Args:
+            result: Detection result object
+        Returns:
+            Dictionary with basic statistics
+        """
+        if result is None:
+            return {"error": "No detection result provided"}
+        # Get classes and confidences
+        classes = result.boxes.cls.cpu().numpy().astype(int)
+        confidences = result.boxes.conf.cpu().numpy()
+        names = result.names
+        # Count by class
+        class_counts = {}
+        for cls, conf in zip(classes, confidences):
+            cls_name = names[int(cls)]
+            if cls_name not in class_counts:
+                class_counts[cls_name] = {"count": 0, "total_confidence": 0, "confidences": []}
+            class_counts[cls_name]["count"] += 1
+            class_counts[cls_name]["total_confidence"] += float(conf)
+            class_counts[cls_name]["confidences"].append(float(conf))
+        # Calculate average confidence
+        for cls_name, stats in class_counts.items():
+            if stats["count"] > 0:
+                stats["average_confidence"] = stats["total_confidence"] / stats["count"]
+                stats["confidence_std"] = float(np.std(stats["confidences"])) if len(stats["confidences"]) > 1 else 0
+                stats.pop("total_confidence")  # Remove intermediate calculation
+        # Prepare summary
+        stats = {
+            "total_objects": len(classes),
+            "class_statistics": class_counts,
+            "average_confidence": float(np.mean(confidences)) if len(confidences) > 0 else 0
+        }
+        return stats
+    @staticmethod
+    def generate_visualization_data(result: Any, class_colors: Dict = None) -> Dict:
+        """
+        Generate structured data suitable for visualization
+        Args:
+            result: Detection result object
+            class_colors: Dictionary mapping class names to color codes (optional)
+        Returns:
+            Dictionary with visualization-ready data
+        """
+        if result is None:
+            return {"error": "No detection result provided"}
+        # Get basic stats first
+        stats = EvaluationMetrics.calculate_basic_stats(result)
+        # Create visualization-specific data structure
+        viz_data = {
+            "total_objects": stats["total_objects"],
+            "average_confidence": stats["average_confidence"],
+            "class_data": []
+        }
+        # Sort classes by count (descending)
+        sorted_classes = sorted(
+            stats["class_statistics"].items(),
+            key=lambda x: x[1]["count"],
+            reverse=True
+        )
+        # Create class-specific visualization data
+        for cls_name, cls_stats in sorted_classes:
+            class_id = -1
+            # Find the class ID based on the name
+            for idx, name in result.names.items():
+                if name == cls_name:
+                    class_id = idx
+                    break
+            cls_data = {
+                "name": cls_name,
+                "class_id": class_id,
+                "count": cls_stats["count"],
+                "average_confidence": cls_stats.get("average_confidence", 0),
+                "confidence_std": cls_stats.get("confidence_std", 0),
+                "color": class_colors.get(cls_name, "#CCCCCC") if class_colors else "#CCCCCC"
+            }
+            viz_data["class_data"].append(cls_data)
+        return viz_data
+    @staticmethod
+    def create_stats_plot(viz_data: Dict, figsize: Tuple[int, int] = (10, 7),
+                          max_classes: int = 30) -> plt.Figure:
+        """
+        Create a horizontal bar chart showing detection statistics
+        Args:
+            viz_data: Visualization data generated by generate_visualization_data
+            figsize: Figure size (width, height) in inches
+            max_classes: Maximum number of classes to display
+        Returns:
+            Matplotlib figure object
+        """
+        if "error" in viz_data:
+            # Create empty plot if error
+            fig, ax = plt.subplots(figsize=figsize)
+            ax.text(0.5, 0.5, viz_data["error"],
+                    ha='center', va='center', fontsize=12)
+            ax.set_xlim(0, 1)
+            ax.set_ylim(0, 1)
+            ax.axis('off')
+            return fig
+        if "class_data" not in viz_data or not viz_data["class_data"]:
+            # Create empty plot if no data
+            fig, ax = plt.subplots(figsize=figsize)
+            ax.text(0.5, 0.5, "No detection data available",
+                    ha='center', va='center', fontsize=12)
+            ax.set_xlim(0, 1)
+            ax.set_ylim(0, 1)
+            ax.axis('off')
+            return fig
+        # Limit to max_classes
+        class_data = viz_data["class_data"][:max_classes]
+        # Extract data for plotting
+        class_names = [item["name"] for item in class_data]
+        counts = [item["count"] for item in class_data]
+        colors = [item["color"] for item in class_data]
+        # Create figure and horizontal bar chart
+        fig, ax = plt.subplots(figsize=figsize)
+        y_pos = np.arange(len(class_names))
+        # Create horizontal bars with class-specific colors
+        bars = ax.barh(y_pos, counts, color=colors, alpha=0.8)
+        # Add count values at end of each bar
+        for i, bar in enumerate(bars):
+            width = bar.get_width()
+            conf = class_data[i]["average_confidence"]
+            ax.text(width + 0.3, bar.get_y() + bar.get_height()/2,
+                    f"{width:.0f} (conf: {conf:.2f})",
+                    va='center', fontsize=9)
+        # Customize axis and labels
+        ax.set_yticks(y_pos)
+        ax.set_yticklabels(class_names)
+        ax.invert_yaxis()  # Labels read top-to-bottom
+        ax.set_xlabel('Count')
+        ax.set_title(f'Objects Detected: {viz_data["total_objects"]} Total')
+        # Add grid for better readability
+        ax.set_axisbelow(True)
+        ax.grid(axis='x', linestyle='--', alpha=0.7)
+        # Add detection summary as a text box
+        summary_text = (
+            f"Total Objects: {viz_data['total_objects']}\n"
+            f"Average Confidence: {viz_data['average_confidence']:.2f}\n"
+            f"Unique Classes: {len(viz_data['class_data'])}"
+        )
+        plt.figtext(0.02, 0.02, summary_text, fontsize=9,
+                   bbox=dict(facecolor='white', alpha=0.8, boxstyle='round'))
+        plt.tight_layout()
+        return fig
+    @staticmethod
+    def format_detection_summary(viz_data: Dict) -> str:
+        """
+        Format detection results as a readable text summary
+        """
+        if "error" in viz_data:
+            return viz_data["error"]
+        if "total_objects" not in viz_data:
+            return "No detection data available."
+        # 移除時間顯示
+        total_objects = viz_data["total_objects"]
+        avg_confidence = viz_data["average_confidence"]
+        # 創建標題
+        lines = [
+            f"Detected {total_objects} objects.",
+            f"Average confidence: {avg_confidence:.2f}",
+            "",
+            "Objects by class:",
+        ]
+        # 添加類別詳情
+        if "class_data" in viz_data and viz_data["class_data"]:
+            for item in viz_data["class_data"]:
+                lines.append(
+                    f"• {item['name']}: {item['count']} (avg conf: {item['average_confidence']:.2f})"
+                )
+        else:
+            lines.append("No class information available.")
+        return "\n".join(lines)
+    @staticmethod
+    def calculate_distance_metrics(result: Any) -> Dict:
+        """
+        Calculate distance-related metrics for detected objects
+        Args:
+            result: Detection result object
+        Returns:
+            Dictionary with distance metrics
+        """
+        if result is None:
+            return {"error": "No detection result provided"}
+        boxes = result.boxes.xyxy.cpu().numpy()
+        classes = result.boxes.cls.cpu().numpy().astype(int)
+        names = result.names
+        # Initialize metrics
+        metrics = {
+            "proximity": {},  # Classes that appear close to each other
+            "spatial_distribution": {},  # Distribution across the image
+            "size_distribution": {}  # Size distribution of objects
+        }
+        # Calculate image dimensions (assuming normalized coordinates or extract from result)
+        img_width, img_height = 1, 1
+        if hasattr(result, "orig_shape"):
+            img_height, img_width = result.orig_shape[:2]
+        # Calculate bounding box areas and centers
+        areas = []
+        centers = []
+        class_names = []
+        for box, cls in zip(boxes, classes):
+            x1, y1, x2, y2 = box
+            width, height = x2 - x1, y2 - y1
+            area = width * height
+            center_x, center_y = (x1 + x2) / 2, (y1 + y2) / 2
+            areas.append(area)
+            centers.append((center_x, center_y))
+            class_names.append(names[int(cls)])
+        # Calculate spatial distribution
+        if centers:
+            x_coords = [c[0] for c in centers]
+            y_coords = [c[1] for c in centers]
+            metrics["spatial_distribution"] = {
+                "x_mean": float(np.mean(x_coords)) / img_width,
+                "y_mean": float(np.mean(y_coords)) / img_height,
+                "x_std": float(np.std(x_coords)) / img_width,
+                "y_std": float(np.std(y_coords)) / img_height
+            }
+        # Calculate size distribution
+        if areas:
+            metrics["size_distribution"] = {
+                "mean_area": float(np.mean(areas)) / (img_width * img_height),
+                "std_area": float(np.std(areas)) / (img_width * img_height),
+                "min_area": float(np.min(areas)) / (img_width * img_height),
+                "max_area": float(np.max(areas)) / (img_width * img_height)
+            }
+        # Calculate proximity between different classes
+        class_centers = {}
+        for cls_name, center in zip(class_names, centers):
+            if cls_name not in class_centers:
+                class_centers[cls_name] = []
+            class_centers[cls_name].append(center)
+        # Find classes that appear close to each other
+        proximity_pairs = []
+        for i, cls1 in enumerate(class_centers.keys()):
+            for j, cls2 in enumerate(class_centers.keys()):
+                if i >= j:  # Avoid duplicate pairs and self-comparison
+                    continue
+                # Calculate minimum distance between any two objects of these classes
+                min_distance = float('inf')
+                for center1 in class_centers[cls1]:
+                    for center2 in class_centers[cls2]:
+                        dist = np.sqrt((center1[0] - center2[0])**2 + (center1[1] - center2[1])**2)
+                        min_distance = min(min_distance, dist)
+                # Normalize by image diagonal
+                img_diagonal = np.sqrt(img_width**2 + img_height**2)
+                norm_distance = min_distance / img_diagonal
+                proximity_pairs.append({
+                    "class1": cls1,
+                    "class2": cls2,
+                    "distance": float(norm_distance)
+                })
+        # Sort by distance and keep the closest pairs
+        proximity_pairs.sort(key=lambda x: x["distance"])
+        metrics["proximity"] = proximity_pairs[:5]  # Keep top 5 closest pairs
+        return metrics

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch>=2.0.0
+torchvision>=0.15.0
+ultralytics>=8.0.0
+opencv-python>=4.7.0
+pillow>=9.4.0
+numpy>=1.23.5
+matplotlib>=3.7.0
+gradio>=3.32.0

visualization_helper.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+from typing import Any, List, Dict, Tuple, Optional
+import io
+from PIL import Image
+class VisualizationHelper:
+    """Helper class for visualizing detection results"""
+    @staticmethod
+    def visualize_detection(image: Any, result: Any, color_mapper: Optional[Any] = None,
+                            figsize: Tuple[int, int] = (12, 12),
+                            return_pil: bool = False) -> Optional[Image.Image]:
+        """
+        Visualize detection results on a single image
+        Args:
+            image: Image path or numpy array
+            result: Detection result object
+            color_mapper: ColorMapper instance for consistent colors
+            figsize: Figure size
+            return_pil: If True, returns a PIL Image object
+        Returns:
+            PIL Image if return_pil is True, otherwise displays the plot
+        """
+        if result is None:
+            print('No data for visualization')
+            return None
+        # Read image if path is provided
+        if isinstance(image, str):
+            img = cv2.imread(image)
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        else:
+            img = image
+            if len(img.shape) == 3 and img.shape[2] == 3:
+                # Check if BGR format (OpenCV) and convert to RGB if needed
+                if isinstance(img, np.ndarray):
+                    # Assuming BGR format from OpenCV
+                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        # Create figure
+        fig, ax = plt.subplots(figsize=figsize)
+        ax.imshow(img)
+        # Get bounding boxes, classes and confidences
+        boxes = result.boxes.xyxy.cpu().numpy()
+        classes = result.boxes.cls.cpu().numpy()
+        confs = result.boxes.conf.cpu().numpy()
+        # Get class names
+        names = result.names
+        # Create a default color mapper if none is provided
+        if color_mapper is None:
+            # For backward compatibility, fallback to a simple color function
+            from matplotlib import colormaps
+            cmap = colormaps['tab10']
+            def get_color(class_id):
+                return cmap(class_id % 10)
+        else:
+            # Use the provided color mapper
+            def get_color(class_id):
+                hex_color = color_mapper.get_color(class_id)
+                # Convert hex to RGB float values for matplotlib
+                hex_color = hex_color.lstrip('#')
+                return tuple(int(hex_color[i:i+2], 16) / 255 for i in (0, 2, 4)) + (1.0,)
+        # Draw detection results
+        for box, cls, conf in zip(boxes, classes, confs):
+            x1, y1, x2, y2 = box
+            cls_id = int(cls)
+            cls_name = names[cls_id]
+            # Get color for this class
+            box_color = get_color(cls_id)
+            # Add text label with colored background
+            ax.text(x1, y1 - 5, f'{cls_name}: {conf:.2f}',
+                    color='white', fontsize=10,
+                    bbox=dict(facecolor=box_color[:3], alpha=0.7))
+            # Add bounding box
+            ax.add_patch(plt.Rectangle((x1, y1), x2-x1, y2-y1,
+                                    fill=False, edgecolor=box_color[:3], linewidth=2))
+        ax.axis('off')
+        # ax.set_title('Detection Result')
+        plt.tight_layout()
+        if return_pil:
+            # Convert plot to PIL Image
+            buf = io.BytesIO()
+            fig.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
+            buf.seek(0)
+            pil_img = Image.open(buf)
+            plt.close(fig)
+            return pil_img
+        else:
+            plt.show()
+            return None
+    @staticmethod
+    def create_summary(result: Any) -> Dict:
+        """
+        Create a summary of detection results
+        Args:
+            result: Detection result object
+        Returns:
+            Dictionary with detection summary statistics
+        """
+        if result is None:
+            return {"error": "No detection result provided"}
+        # Get classes and confidences
+        classes = result.boxes.cls.cpu().numpy().astype(int)
+        confidences = result.boxes.conf.cpu().numpy()
+        names = result.names
+        # Count detections by class
+        class_counts = {}
+        for cls, conf in zip(classes, confidences):
+            cls_name = names[int(cls)]
+            if cls_name not in class_counts:
+                class_counts[cls_name] = {"count": 0, "confidences": []}
+            class_counts[cls_name]["count"] += 1
+            class_counts[cls_name]["confidences"].append(float(conf))
+        # Calculate average confidence for each class
+        for cls_name, stats in class_counts.items():
+            if stats["confidences"]:
+                stats["average_confidence"] = float(np.mean(stats["confidences"]))
+                stats.pop("confidences")  # Remove detailed confidences list to keep summary concise
+        # Prepare summary
+        summary = {
+            "total_objects": len(classes),
+            "class_counts": class_counts,
+            "unique_classes": len(class_counts)
+        }
+        return summary