syedfaisalabrar commited on
Commit
f9de43f
·
verified ·
1 Parent(s): b9c0709

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -67
app.py CHANGED
@@ -4,16 +4,17 @@ import cv2
4
  import numpy as np
5
  from PIL import Image, ImageEnhance
6
  from ultralytics import YOLO
7
- import json
8
 
 
9
  model_path = "best.pt"
10
  model = YOLO(model_path)
11
 
12
- def preprocess_image(image):
13
- """Preprocesses the image: enhances sharpness, contrast, brightness, and resizes it."""
14
- if isinstance(image, np.ndarray): # Ensure it's a PIL image
15
- image = Image.fromarray(image)
16
 
 
17
  image = ImageEnhance.Sharpness(image).enhance(2.0) # Increase sharpness
18
  image = ImageEnhance.Contrast(image).enhance(1.5) # Increase contrast
19
  image = ImageEnhance.Brightness(image).enhance(0.8) # Reduce brightness
@@ -26,90 +27,94 @@ def preprocess_image(image):
26
 
27
  return image
28
 
29
- def vision_ai_api(image, label):
30
- """Dummy function simulating an API call. Returns dummy JSON response."""
31
- return {
32
- "label": label,
33
- "extracted_data": {
34
- "name": "-------",
35
- "dob": "-------",
36
- "id_number": "-------"
37
- }
38
- }
39
-
40
- def predict(image):
41
- """Runs YOLO object detection on the input image and processes detected regions."""
42
- # Ensure image is PIL format before preprocessing
43
- if isinstance(image, np.ndarray):
44
- image = Image.fromarray(image)
45
-
46
- image = preprocess_image(image) # Apply preprocessing
47
 
48
- # Convert image to NumPy array for YOLO model
49
- image_np = np.array(image)
 
 
 
50
 
51
- # Run YOLO prediction
52
- results = model(image_np, conf=0.80)
53
-
54
- detected_classes = set()
55
  labels = []
56
- cropped_images = {}
57
 
58
- # Ensure results contain boxes
59
  for result in results:
60
- if result.boxes is None or len(result.boxes) == 0:
61
- print("No objects detected.")
62
- continue
63
-
64
  for box in result.boxes:
65
  x1, y1, x2, y2 = map(int, box.xyxy[0])
66
  conf = box.conf[0]
67
  cls = int(box.cls[0])
68
  class_name = model.names[cls]
69
-
70
- print(f"Detected: {class_name} ({conf:.2f}) at [{x1}, {y1}, {x2}, {y2}]")
71
 
72
  detected_classes.add(class_name)
73
- labels.append(f"{class_name} {conf:.2f}")
 
 
74
 
75
- # Ensure bounding boxes are within the image dimensions
76
- height, width = image_np.shape[:2]
77
- x1, y1, x2, y2 = max(0, x1), max(0, y1), min(width, x2), min(height, y2)
78
 
79
- if x1 >= x2 or y1 >= y2:
80
- print("Invalid bounding box, skipping.")
81
- continue
 
82
 
83
- # Crop the detected region
84
- cropped = image_np[y1:y2, x1:x2]
85
- cropped_pil = Image.fromarray(cropped) # Convert to PIL for API
86
 
87
- # Call API
88
- api_response = vision_ai_api(cropped_pil, class_name)
89
- cropped_images[class_name] = {
90
- "image": cropped_pil,
91
- "api_response": json.dumps(api_response, indent=4)
92
- }
 
 
 
93
 
94
- # Ensure outputs exist even if no detections were made
95
- if not cropped_images:
96
- return None, "No front detected", None, "No back detected", ["No valid detections"]
97
 
98
- return (
99
- cropped_images.get("front", {}).get("image", None),
100
- cropped_images.get("front", {}).get("api_response", "{}"),
101
- cropped_images.get("back", {}).get("image", None),
102
- cropped_images.get("back", {}).get("api_response", "{}"),
103
- labels
104
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
 
106
 
 
107
  iface = gr.Interface(
108
  fn=predict,
109
- inputs=gr.Image(type="pil"), # Ensure input is PIL image
110
- outputs=["image", "text"],
111
- title="License Field Detection (Front & Back Card)",
112
- description="Detect front & back of a license card, crop the images, and call Vision AI API separately for each."
113
  )
114
 
115
  iface.launch()
 
4
  import numpy as np
5
  from PIL import Image, ImageEnhance
6
  from ultralytics import YOLO
 
7
 
8
+ # Load YOLOv11 Model
9
  model_path = "best.pt"
10
  model = YOLO(model_path)
11
 
12
+ # ---------------- Preprocessing Function ---------------- #
13
+ def preprocessing(image):
14
+ """Apply three enhancement filters, including brightness reduction, and resize."""
15
+ image = Image.fromarray(np.array(image))
16
 
17
+ # Apply enhancements
18
  image = ImageEnhance.Sharpness(image).enhance(2.0) # Increase sharpness
19
  image = ImageEnhance.Contrast(image).enhance(1.5) # Increase contrast
20
  image = ImageEnhance.Brightness(image).enhance(0.8) # Reduce brightness
 
27
 
28
  return image
29
 
30
+ # ---------------- Dummy Image Rotation Function ---------------- #
31
+ def imageRotation(image):
32
+ """Dummy function for image rotation."""
33
+ return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # ---------------- Document Detection Function ---------------- #
36
+ def detect_document(image):
37
+ """Detects front and back of the document using YOLO."""
38
+ image = np.array(image)
39
+ results = model(image, conf=0.85)
40
 
41
+ detected_classes = set()
 
 
 
42
  labels = []
43
+ bounding_boxes = []
44
 
 
45
  for result in results:
 
 
 
 
46
  for box in result.boxes:
47
  x1, y1, x2, y2 = map(int, box.xyxy[0])
48
  conf = box.conf[0]
49
  cls = int(box.cls[0])
50
  class_name = model.names[cls]
 
 
51
 
52
  detected_classes.add(class_name)
53
+ label = f"{class_name} {conf:.2f}"
54
+ labels.append(label)
55
+ bounding_boxes.append((x1, y1, x2, y2, class_name, conf)) # Store bounding box with class and confidence
56
 
57
+ cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
58
+ cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
 
59
 
60
+ possible_classes = {"front", "back"}
61
+ missing_classes = possible_classes - detected_classes
62
+ if missing_classes:
63
+ labels.append(f"Missing: {', '.join(missing_classes)}")
64
 
65
+ return Image.fromarray(image), labels, bounding_boxes
 
 
66
 
67
+ # ---------------- Cropping Function ---------------- #
68
+ def crop_image(image, bounding_boxes):
69
+ """Crops detected bounding boxes from the image."""
70
+ cropped_images = {}
71
+ image = np.array(image)
72
+
73
+ for (x1, y1, x2, y2, class_name, conf) in bounding_boxes:
74
+ cropped = image[y1:y2, x1:x2]
75
+ cropped_images[class_name] = Image.fromarray(cropped)
76
 
77
+ return cropped_images
 
 
78
 
79
+ # ---------------- Vision AI API Call (Dummy) ---------------- #
80
+ def vision_ai_api(image, doc_type):
81
+ """Dummy API call for Vision AI, returns a fake JSON response."""
82
+ return {
83
+ "document_type": doc_type,
84
+ "extracted_text": "Dummy OCR result for " + doc_type,
85
+ "confidence": 0.99
86
+ }
87
+
88
+ # ---------------- Prediction Function ---------------- #
89
+ def predict(image):
90
+ """Pipeline: Preprocess -> Detect -> Crop -> Vision AI API."""
91
+ processed_image = preprocessing(image)
92
+ rotated_image = imageRotation(processed_image) # Placeholder for rotation
93
+ detected_image, labels, bounding_boxes = detect_document(rotated_image)
94
+
95
+ cropped_images = crop_image(rotated_image, bounding_boxes)
96
+
97
+ # Call Vision AI separately for front and back if detected
98
+ front_result, back_result = None, None
99
+ if "front" in cropped_images:
100
+ front_result = vision_ai_api(cropped_images["front"], "front")
101
+ if "back" in cropped_images:
102
+ back_result = vision_ai_api(cropped_images["back"], "back")
103
+
104
+ # Combine API results into a single response
105
+ api_results = {
106
+ "front": front_result,
107
+ "back": back_result
108
+ }
109
 
110
+ return detected_image, labels, api_results
111
 
112
+ # ---------------- Gradio Interface ---------------- #
113
  iface = gr.Interface(
114
  fn=predict,
115
+ inputs="image",
116
+ outputs=["image", "text", "json"],
117
+ title="License Field Detection (Front & Back Card)"
 
118
  )
119
 
120
  iface.launch()