Spaces:

syedfaisalabrar
/

License_Classification

Sleeping

App Files Files Community

syedfaisalabrar commited on Feb 25

Commit

d144786

verified ·

1 Parent(s): 6246e83

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -72

app.py CHANGED Viewed

@@ -2,43 +2,54 @@ import gradio as gr
 import torch
 import cv2
 import numpy as np
-from PIL import Image, ImageEnhance
 from ultralytics import YOLO
 model_path = "best.pt"
 model = YOLO(model_path)
-def preprocessing(image):
-    image = Image.fromarray(np.array(image))
-    image = ImageEnhance.Sharpness(image).enhance(2.0)
-    image = ImageEnhance.Contrast(image).enhance(1.5)
-    image = ImageEnhance.Brightness(image).enhance(0.8)
-    width = 800
-    aspect_ratio = image.height / image.width
-    height = int(width * aspect_ratio)
-    image = image.resize((width, height))
     return image
 def imageRotation(image):
-    """Dummy function for image rotation."""
     return image
-def detect_document(image):
-    """Detects front and back of the document using YOLO."""
-    image = np.array(image)
-    results = model(image, conf=0.85)
-    detected_classes = set()
     labels = []
-    bounding_boxes = []
     for result in results:
         for box in result.boxes:
@@ -48,71 +59,43 @@ def detect_document(image):
             class_name = model.names[cls]
             detected_classes.add(class_name)
-            label = f"{class_name} {conf:.2f}"
-            labels.append(label)
-            bounding_boxes.append((x1, y1, x2, y2, class_name, conf))  # Store bounding box with class and confidence
-            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
-            cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
     possible_classes = {"front", "back"}
     missing_classes = possible_classes - detected_classes
     if missing_classes:
         labels.append(f"Missing: {', '.join(missing_classes)}")
-    return Image.fromarray(image), labels, bounding_boxes
-def crop_image(image, bounding_boxes):
-    """Crops detected bounding boxes from the image."""
-    cropped_images = {}
-    image = np.array(image)
-    for (x1, y1, x2, y2, class_name, conf) in bounding_boxes:
-        cropped = image[y1:y2, x1:x2]
-        cropped_images[class_name] = Image.fromarray(cropped)
-    return cropped_images
-def vision_ai_api(image, doc_type):
-    """Dummy API call for Vision AI, returns a fake JSON response."""
-    return {
-        "document_type": doc_type,
-        "extracted_text": "Dummy OCR result for " + doc_type,
-        "confidence": 0.99
-    }
-# ---------------- Prediction Function ---------------- #
-def predict(image):
-    """Pipeline: Preprocess -> Detect -> Crop -> Vision AI API."""
-    processed_image = preprocessing(image)
-    rotated_image = imageRotation(processed_image)
-    detected_image, labels, bounding_boxes = detect_document(rotated_image)
-    cropped_images = crop_image(rotated_image, bounding_boxes)
-    # Call Vision AI separately for front and back if detected
-    front_result, back_result = None, None
-    if "front" in cropped_images:
-        front_result = vision_ai_api(cropped_images["front"], "front")
-    if "back" in cropped_images:
-        back_result = vision_ai_api(cropped_images["back"], "back")
-    api_results = {
-        "front": front_result,
-        "back": back_result
-    }
-    return detected_image, labels, api_results
 iface = gr.Interface(
     fn=predict,
     inputs="image",
-    outputs=["image", "text", "json"],
-    title="License Field Detection (Front & Back Card)"
 )
 iface.launch()

 import torch
 import cv2
 import numpy as np
+from PIL import Image
 from ultralytics import YOLO
+import json
 model_path = "best.pt"
 model = YOLO(model_path)
+def preprocess_image(image):
+    """Apply enhancement filters and resize image before detection."""
+    image = np.array(image)
+    image = cv2.convertScaleAbs(image, alpha=0.8, beta=0)  # Brightness reduction
+    image = cv2.GaussianBlur(image, (3, 3), 0)  # Denoising
+    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])  # Sharpening
+    image = cv2.filter2D(image, -1, kernel)
+    height, width = image.shape[:2]
+    new_width = 800
+    new_height = int((new_width / width) * height)
+    image = cv2.resize(image, (new_width, new_height))
     return image
 def imageRotation(image):
+    """Dummy function for now."""
     return image
+def vision_ai_api(image, label):
+    """Dummy function simulating API call. Returns dummy JSON response."""
+    return {
+        "label": label,
+        "extracted_data": {
+            "name": "John Doe",
+            "dob": "01-01-1990",
+            "id_number": "1234567890"
+        }
+    }
+def predict(image):
+    image = preprocess_image(image)  # Apply preprocessing
+    results = model(image, conf=0.85)
+    detected_classes = set()
     labels = []
+    cropped_images = {}
     for result in results:
         for box in result.boxes:
             class_name = model.names[cls]
             detected_classes.add(class_name)
+            labels.append(f"{class_name} {conf:.2f}")
+            # Crop detected region
+            cropped = image[y1:y2, x1:x2]
+            cropped_pil = Image.fromarray(cropped)
+            # Call Vision AI API separately for front & back
+            api_response = vision_ai_api(cropped_pil, class_name)
+            # Store cropped images & API response
+            cropped_images[class_name] = {
+                "image": cropped_pil,
+                "api_response": json.dumps(api_response, indent=4)
+            }
+    # Identify missing classes
     possible_classes = {"front", "back"}
     missing_classes = possible_classes - detected_classes
     if missing_classes:
         labels.append(f"Missing: {', '.join(missing_classes)}")
+    # Prepare Gradio outputs (separate front & back images and responses)
+    front_image = cropped_images.get("front", {}).get("image", None)
+    back_image = cropped_images.get("back", {}).get("image", None)
+    front_response = cropped_images.get("front", {}).get("api_response", "{}")
+    back_response = cropped_images.get("back", {}).get("api_response", "{}")
+    return front_image, front_response, back_image, back_response, labels
+# Gradio Interface
 iface = gr.Interface(
     fn=predict,
     inputs="image",
+    outputs=["image", "text", "image", "text", "text"],
+    title="License Field Detection (Front & Back Card)",
+    description="Detect front & back of a license card, crop the images, and call Vision AI API separately for each."
 )
 iface.launch()