syedfaisalabrar commited on
Commit
d144786
·
verified ·
1 Parent(s): 6246e83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -72
app.py CHANGED
@@ -2,43 +2,54 @@ import gradio as gr
2
  import torch
3
  import cv2
4
  import numpy as np
5
- from PIL import Image, ImageEnhance
6
  from ultralytics import YOLO
 
7
 
8
 
9
  model_path = "best.pt"
10
  model = YOLO(model_path)
11
 
12
- def preprocessing(image):
13
-
14
- image = Image.fromarray(np.array(image))
15
 
16
- image = ImageEnhance.Sharpness(image).enhance(2.0)
17
- image = ImageEnhance.Contrast(image).enhance(1.5)
18
- image = ImageEnhance.Brightness(image).enhance(0.8)
 
 
19
 
20
-
21
- width = 800
22
- aspect_ratio = image.height / image.width
23
- height = int(width * aspect_ratio)
24
- image = image.resize((width, height))
25
 
26
  return image
27
 
28
-
29
  def imageRotation(image):
30
- """Dummy function for image rotation."""
31
  return image
32
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- def detect_document(image):
35
- """Detects front and back of the document using YOLO."""
36
- image = np.array(image)
37
- results = model(image, conf=0.85)
38
 
39
- detected_classes = set()
 
40
  labels = []
41
- bounding_boxes = []
42
 
43
  for result in results:
44
  for box in result.boxes:
@@ -48,71 +59,43 @@ def detect_document(image):
48
  class_name = model.names[cls]
49
 
50
  detected_classes.add(class_name)
51
- label = f"{class_name} {conf:.2f}"
52
- labels.append(label)
53
- bounding_boxes.append((x1, y1, x2, y2, class_name, conf)) # Store bounding box with class and confidence
54
 
55
- cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
56
- cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
 
57
 
 
 
 
 
 
 
 
 
 
 
58
  possible_classes = {"front", "back"}
59
  missing_classes = possible_classes - detected_classes
60
  if missing_classes:
61
  labels.append(f"Missing: {', '.join(missing_classes)}")
62
 
63
- return Image.fromarray(image), labels, bounding_boxes
64
-
65
-
66
- def crop_image(image, bounding_boxes):
67
- """Crops detected bounding boxes from the image."""
68
- cropped_images = {}
69
- image = np.array(image)
70
-
71
- for (x1, y1, x2, y2, class_name, conf) in bounding_boxes:
72
- cropped = image[y1:y2, x1:x2]
73
- cropped_images[class_name] = Image.fromarray(cropped)
74
-
75
- return cropped_images
76
-
77
-
78
- def vision_ai_api(image, doc_type):
79
- """Dummy API call for Vision AI, returns a fake JSON response."""
80
- return {
81
- "document_type": doc_type,
82
- "extracted_text": "Dummy OCR result for " + doc_type,
83
- "confidence": 0.99
84
- }
85
-
86
- # ---------------- Prediction Function ---------------- #
87
- def predict(image):
88
- """Pipeline: Preprocess -> Detect -> Crop -> Vision AI API."""
89
- processed_image = preprocessing(image)
90
- rotated_image = imageRotation(processed_image)
91
- detected_image, labels, bounding_boxes = detect_document(rotated_image)
92
-
93
- cropped_images = crop_image(rotated_image, bounding_boxes)
94
-
95
- # Call Vision AI separately for front and back if detected
96
- front_result, back_result = None, None
97
- if "front" in cropped_images:
98
- front_result = vision_ai_api(cropped_images["front"], "front")
99
- if "back" in cropped_images:
100
- back_result = vision_ai_api(cropped_images["back"], "back")
101
-
102
-
103
- api_results = {
104
- "front": front_result,
105
- "back": back_result
106
- }
107
 
108
- return detected_image, labels, api_results
 
109
 
 
110
 
 
111
  iface = gr.Interface(
112
  fn=predict,
113
  inputs="image",
114
- outputs=["image", "text", "json"],
115
- title="License Field Detection (Front & Back Card)"
 
116
  )
117
 
118
  iface.launch()
 
2
  import torch
3
  import cv2
4
  import numpy as np
5
+ from PIL import Image
6
  from ultralytics import YOLO
7
+ import json
8
 
9
 
10
  model_path = "best.pt"
11
  model = YOLO(model_path)
12
 
13
+ def preprocess_image(image):
14
+ """Apply enhancement filters and resize image before detection."""
15
+ image = np.array(image)
16
 
17
+
18
+ image = cv2.convertScaleAbs(image, alpha=0.8, beta=0) # Brightness reduction
19
+ image = cv2.GaussianBlur(image, (3, 3), 0) # Denoising
20
+ kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) # Sharpening
21
+ image = cv2.filter2D(image, -1, kernel)
22
 
23
+
24
+ height, width = image.shape[:2]
25
+ new_width = 800
26
+ new_height = int((new_width / width) * height)
27
+ image = cv2.resize(image, (new_width, new_height))
28
 
29
  return image
30
 
 
31
  def imageRotation(image):
32
+ """Dummy function for now."""
33
  return image
34
 
35
+ def vision_ai_api(image, label):
36
+ """Dummy function simulating API call. Returns dummy JSON response."""
37
+ return {
38
+ "label": label,
39
+ "extracted_data": {
40
+ "name": "John Doe",
41
+ "dob": "01-01-1990",
42
+ "id_number": "1234567890"
43
+ }
44
+ }
45
 
46
+ def predict(image):
47
+ image = preprocess_image(image) # Apply preprocessing
 
 
48
 
49
+ results = model(image, conf=0.85)
50
+ detected_classes = set()
51
  labels = []
52
+ cropped_images = {}
53
 
54
  for result in results:
55
  for box in result.boxes:
 
59
  class_name = model.names[cls]
60
 
61
  detected_classes.add(class_name)
62
+ labels.append(f"{class_name} {conf:.2f}")
 
 
63
 
64
+ # Crop detected region
65
+ cropped = image[y1:y2, x1:x2]
66
+ cropped_pil = Image.fromarray(cropped)
67
 
68
+ # Call Vision AI API separately for front & back
69
+ api_response = vision_ai_api(cropped_pil, class_name)
70
+
71
+ # Store cropped images & API response
72
+ cropped_images[class_name] = {
73
+ "image": cropped_pil,
74
+ "api_response": json.dumps(api_response, indent=4)
75
+ }
76
+
77
+ # Identify missing classes
78
  possible_classes = {"front", "back"}
79
  missing_classes = possible_classes - detected_classes
80
  if missing_classes:
81
  labels.append(f"Missing: {', '.join(missing_classes)}")
82
 
83
+ # Prepare Gradio outputs (separate front & back images and responses)
84
+ front_image = cropped_images.get("front", {}).get("image", None)
85
+ back_image = cropped_images.get("back", {}).get("image", None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ front_response = cropped_images.get("front", {}).get("api_response", "{}")
88
+ back_response = cropped_images.get("back", {}).get("api_response", "{}")
89
 
90
+ return front_image, front_response, back_image, back_response, labels
91
 
92
+ # Gradio Interface
93
  iface = gr.Interface(
94
  fn=predict,
95
  inputs="image",
96
+ outputs=["image", "text", "image", "text", "text"],
97
+ title="License Field Detection (Front & Back Card)",
98
+ description="Detect front & back of a license card, crop the images, and call Vision AI API separately for each."
99
  )
100
 
101
  iface.launch()