satyam007 commited on
Commit
3a6ab9e
·
verified ·
1 Parent(s): e72b437

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
3
+ from qwen_vl_utils import process_vision_info
4
+ import torch
5
+ import pandas as pd
6
+ import pytesseract
7
+ import cv2
8
+
9
+ # Set Tesseract command (only works if Tesseract is already installed on the hosting server)
10
+ pytesseract.pytesseract_cmd = r'/usr/bin/tesseract'
11
+
12
+ # Initialize the model and processor
13
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
14
+ "Qwen/Qwen2-VL-2B-Instruct-AWQ",
15
+ torch_dtype="auto"
16
+ )
17
+ model.to("cpu")
18
+
19
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct-AWQ")
20
+
21
+
22
+ # Preprocessing image for OCR
23
+ def preprocess_image(image_path):
24
+ image = cv2.imread(image_path)
25
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
26
+ _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
27
+ return binary
28
+
29
+
30
+ # OCR-based text extraction
31
+ def ocr_extract_text(image_path):
32
+ preprocessed_image = preprocess_image(image_path)
33
+ return pytesseract.image_to_string(preprocessed_image)
34
+
35
+
36
+ # Model-based image processing
37
+ def process_image(image_path):
38
+ try:
39
+ messages = [{
40
+ "role": "user",
41
+ "content": [
42
+ {"type": "image", "image": image_path},
43
+ {"type": "text", "text": (
44
+ "Extract the following details from the invoice:\n"
45
+ "- 'invoice_number'\n"
46
+ "- 'date'\n"
47
+ "- 'place'\n"
48
+ "- 'amount' (monetary value in the relevant currency)\n"
49
+ "- 'category' (based on the invoice type)"
50
+ )}
51
+ ]
52
+ }]
53
+
54
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
55
+ image_inputs, video_inputs = process_vision_info(messages)
56
+ inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")
57
+ inputs = inputs.to(model.device)
58
+
59
+ generated_ids = model.generate(**inputs, max_new_tokens=128)
60
+ output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
61
+
62
+ return parse_details(output_text[0])
63
+
64
+ except Exception as e:
65
+ print(f"Model failed, falling back to OCR: {e}")
66
+ ocr_text = ocr_extract_text(image_path)
67
+ return parse_details(ocr_text)
68
+
69
+
70
+ # Parsing details from text
71
+ def parse_details(details):
72
+ parsed_data = {
73
+ "Invoice Number": None,
74
+ "Date": None,
75
+ "Place": None,
76
+ "Amount": None,
77
+ "Category": None
78
+ }
79
+
80
+ lines = details.split("\n")
81
+ for line in lines:
82
+ lower_line = line.lower()
83
+ if "invoice" in lower_line:
84
+ parsed_data["Invoice Number"] = line.split(":")[-1].strip()
85
+ elif "date" in lower_line:
86
+ parsed_data["Date"] = line.split(":")[-1].strip()
87
+ elif "place" in lower_line:
88
+ parsed_data["Place"] = line.split(":")[-1].strip()
89
+ elif any(keyword in lower_line for keyword in ["total", "amount", "cost"]):
90
+ parsed_data["Amount"] = line.split(":")[-1].strip()
91
+ else:
92
+ parsed_data["Category"] = "General"
93
+
94
+ return parsed_data
95
+
96
+
97
+ # Gradio Interface
98
+ def gradio_interface(image_files):
99
+ results = []
100
+ for image_file in image_files:
101
+ details = process_image(image_file.name)
102
+ results.append(details)
103
+
104
+ df = pd.DataFrame(results)
105
+ return df
106
+
107
+
108
+ # Launch Gradio App
109
+ grpc_interface = gr.Interface(
110
+ fn=gradio_interface,
111
+ inputs=gr.File(label="Upload Invoice Images", file_types=["image"]),
112
+ outputs=gr.Dataframe(interactive=True),
113
+ title="Invoice Extraction System"
114
+ )
115
+
116
+ if __name__ == "__main__":
117
+ grpc_interface.launch(share=True)