Spaces:

syedfaisalabrar
/

License_Classification

Sleeping

App Files Files Community

syedfaisalabrar commited on Feb 25

Commit

f9de43f

verified ·

1 Parent(s): b9c0709

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -67

app.py CHANGED Viewed

@@ -4,16 +4,17 @@ import cv2
 import numpy as np
 from PIL import Image, ImageEnhance
 from ultralytics import YOLO
-import json
 model_path = "best.pt"
 model = YOLO(model_path)
-def preprocess_image(image):
-    """Preprocesses the image: enhances sharpness, contrast, brightness, and resizes it."""
-    if isinstance(image, np.ndarray):  # Ensure it's a PIL image
-        image = Image.fromarray(image)
     image = ImageEnhance.Sharpness(image).enhance(2.0)  # Increase sharpness
     image = ImageEnhance.Contrast(image).enhance(1.5)   # Increase contrast
     image = ImageEnhance.Brightness(image).enhance(0.8) # Reduce brightness
@@ -26,90 +27,94 @@ def preprocess_image(image):
     return image
-def vision_ai_api(image, label):
-    """Dummy function simulating an API call. Returns dummy JSON response."""
-    return {
-        "label": label,
-        "extracted_data": {
-            "name": "-------",
-            "dob": "-------",
-            "id_number": "-------"
-        }
-    }
-def predict(image):
-    """Runs YOLO object detection on the input image and processes detected regions."""
-    # Ensure image is PIL format before preprocessing
-    if isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    image = preprocess_image(image)  # Apply preprocessing
-    # Convert image to NumPy array for YOLO model
-    image_np = np.array(image)
-    # Run YOLO prediction
-    results = model(image_np, conf=0.80)
-    detected_classes = set()
     labels = []
-    cropped_images = {}
-    # Ensure results contain boxes
     for result in results:
-        if result.boxes is None or len(result.boxes) == 0:
-            print("No objects detected.")
-            continue
         for box in result.boxes:
             x1, y1, x2, y2 = map(int, box.xyxy[0])
             conf = box.conf[0]
             cls = int(box.cls[0])
             class_name = model.names[cls]
-            print(f"Detected: {class_name} ({conf:.2f}) at [{x1}, {y1}, {x2}, {y2}]")
             detected_classes.add(class_name)
-            labels.append(f"{class_name} {conf:.2f}")
-            # Ensure bounding boxes are within the image dimensions
-            height, width = image_np.shape[:2]
-            x1, y1, x2, y2 = max(0, x1), max(0, y1), min(width, x2), min(height, y2)
-            if x1 >= x2 or y1 >= y2:
-                print("Invalid bounding box, skipping.")
-                continue
-            # Crop the detected region
-            cropped = image_np[y1:y2, x1:x2]
-            cropped_pil = Image.fromarray(cropped)  # Convert to PIL for API
-            # Call API
-            api_response = vision_ai_api(cropped_pil, class_name)
-            cropped_images[class_name] = {
-                "image": cropped_pil,
-                "api_response": json.dumps(api_response, indent=4)
-            }
-    # Ensure outputs exist even if no detections were made
-    if not cropped_images:
-        return None, "No front detected", None, "No back detected", ["No valid detections"]
-    return (
-        cropped_images.get("front", {}).get("image", None),
-        cropped_images.get("front", {}).get("api_response", "{}"),
-        cropped_images.get("back", {}).get("image", None),
-        cropped_images.get("back", {}).get("api_response", "{}"),
-        labels
-    )
 iface = gr.Interface(
     fn=predict,
-    inputs=gr.Image(type="pil"),  # Ensure input is PIL image
-    outputs=["image", "text"],
-    title="License Field Detection (Front & Back Card)",
-    description="Detect front & back of a license card, crop the images, and call Vision AI API separately for each."
 )
 iface.launch()

 import numpy as np
 from PIL import Image, ImageEnhance
 from ultralytics import YOLO
+# Load YOLOv11 Model
 model_path = "best.pt"
 model = YOLO(model_path)
+# ---------------- Preprocessing Function ---------------- #
+def preprocessing(image):
+    """Apply three enhancement filters, including brightness reduction, and resize."""
+    image = Image.fromarray(np.array(image))
+    # Apply enhancements
     image = ImageEnhance.Sharpness(image).enhance(2.0)  # Increase sharpness
     image = ImageEnhance.Contrast(image).enhance(1.5)   # Increase contrast
     image = ImageEnhance.Brightness(image).enhance(0.8) # Reduce brightness
     return image
+# ---------------- Dummy Image Rotation Function ---------------- #
+def imageRotation(image):
+    """Dummy function for image rotation."""
+    return image
+# ---------------- Document Detection Function ---------------- #
+def detect_document(image):
+    """Detects front and back of the document using YOLO."""
+    image = np.array(image)
+    results = model(image, conf=0.85)
+    detected_classes = set()
     labels = []
+    bounding_boxes = []
     for result in results:
         for box in result.boxes:
             x1, y1, x2, y2 = map(int, box.xyxy[0])
             conf = box.conf[0]
             cls = int(box.cls[0])
             class_name = model.names[cls]
             detected_classes.add(class_name)
+            label = f"{class_name} {conf:.2f}"
+            labels.append(label)
+            bounding_boxes.append((x1, y1, x2, y2, class_name, conf))  # Store bounding box with class and confidence
+            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
+            cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+    possible_classes = {"front", "back"}
+    missing_classes = possible_classes - detected_classes
+    if missing_classes:
+        labels.append(f"Missing: {', '.join(missing_classes)}")
+    return Image.fromarray(image), labels, bounding_boxes
+# ---------------- Cropping Function ---------------- #
+def crop_image(image, bounding_boxes):
+    """Crops detected bounding boxes from the image."""
+    cropped_images = {}
+    image = np.array(image)
+    for (x1, y1, x2, y2, class_name, conf) in bounding_boxes:
+        cropped = image[y1:y2, x1:x2]
+        cropped_images[class_name] = Image.fromarray(cropped)
+    return cropped_images
+# ---------------- Vision AI API Call (Dummy) ---------------- #
+def vision_ai_api(image, doc_type):
+    """Dummy API call for Vision AI, returns a fake JSON response."""
+    return {
+        "document_type": doc_type,
+        "extracted_text": "Dummy OCR result for " + doc_type,
+        "confidence": 0.99
+    }
+# ---------------- Prediction Function ---------------- #
+def predict(image):
+    """Pipeline: Preprocess -> Detect -> Crop -> Vision AI API."""
+    processed_image = preprocessing(image)
+    rotated_image = imageRotation(processed_image)  # Placeholder for rotation
+    detected_image, labels, bounding_boxes = detect_document(rotated_image)
+    cropped_images = crop_image(rotated_image, bounding_boxes)
+    # Call Vision AI separately for front and back if detected
+    front_result, back_result = None, None
+    if "front" in cropped_images:
+        front_result = vision_ai_api(cropped_images["front"], "front")
+    if "back" in cropped_images:
+        back_result = vision_ai_api(cropped_images["back"], "back")
+    # Combine API results into a single response
+    api_results = {
+        "front": front_result,
+        "back": back_result
+    }
+    return detected_image, labels, api_results
+# ---------------- Gradio Interface ---------------- #
 iface = gr.Interface(
     fn=predict,
+    inputs="image",
+    outputs=["image", "text", "json"],
+    title="License Field Detection (Front & Back Card)"
 )
 iface.launch()