Spaces:

winamnd
/

ocr-llm-test

Running

App Files Files Community

winamnd commited on Feb 16

Commit

4639dba

verified ·

1 Parent(s): dad8a00

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -38

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import torch
 import json
 import os
 import cv2
 import numpy as np
@@ -9,7 +10,7 @@ import keras_ocr
 from paddleocr import PaddleOCR
 from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
 import torch.nn.functional as F
-from save_results import save_results_to_repo
 # Paths
 MODEL_PATH = "./distilbert_spam_model"
@@ -26,10 +27,10 @@ else:
     model = DistilBertForSequenceClassification.from_pretrained(MODEL_PATH)
     tokenizer = DistilBertTokenizer.from_pretrained(MODEL_PATH)
-# Set model to evaluation mode
 model.eval()
-# OCR Methods
 def ocr_with_paddle(img):
     ocr = PaddleOCR(lang='en', use_angle_cls=True)
     result = ocr.ocr(img)
@@ -47,10 +48,10 @@ def ocr_with_easy(img):
     results = reader.readtext(gray_image, detail=0)
     return ' '.join(results)
-# OCR Extraction Function
-def extract_text(method, img):
     if img is None:
-        return "Error: Please upload an image!", ""
     # Convert PIL Image to OpenCV format
     img = np.array(img)
@@ -63,52 +64,42 @@ def extract_text(method, img):
     else:  # KerasOCR
         text_output = ocr_with_keras(img)
-    # Clean extracted text
     text_output = text_output.strip()
     if len(text_output) == 0:
-        return "No text detected!", ""
-    return text_output, ""
-# Classification Function
-def classify_text(text_output):
-    if text_output.strip() in ["No text detected!", "Error: Please upload an image!"]:
-        return text_output, "Cannot classify"
-    # Tokenize text
     inputs = tokenizer(text_output, return_tensors="pt", truncation=True, padding=True, max_length=512)
-    # Model inference
     with torch.no_grad():
         outputs = model(**inputs)
-        probs = F.softmax(outputs.logits, dim=1)
-        prediction = torch.argmax(probs, dim=1).item()
-    label_map = {0: "Not Spam", 1: "Spam"}
-    label = label_map[prediction]
-    # Save results automatically
     save_results_to_repo(text_output, label)
     return text_output, label
 # Gradio Interface
-with gr.Blocks() as demo:
-    gr.Markdown("## OCR Spam Classifier")
-    method_input = gr.Radio(["PaddleOCR", "EasyOCR", "KerasOCR"], value="PaddleOCR", label="Choose OCR Method")
-    image_input = gr.Image(label="Upload Image")
-    extract_button = gr.Button("Submit")
-    classify_button = gr.Button("Classify")
-    output_text = gr.Textbox(label="Extracted Text", interactive=True)
-    output_label = gr.Textbox(label="Spam Classification", interactive=False)
-    # Button Click Bindings
-    extract_button.click(fn=extract_text, inputs=[method_input, image_input], outputs=[output_text, output_label])
-    classify_button.click(fn=classify_text, inputs=[output_text], outputs=[output_text, output_label])
 # Launch App
 if __name__ == "__main__":

 import gradio as gr
 import torch
 import json
+import csv
 import os
 import cv2
 import numpy as np
 from paddleocr import PaddleOCR
 from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
 import torch.nn.functional as F
+from save_results import save_results_to_repo  # Import the save function
 # Paths
 MODEL_PATH = "./distilbert_spam_model"
     model = DistilBertForSequenceClassification.from_pretrained(MODEL_PATH)
     tokenizer = DistilBertTokenizer.from_pretrained(MODEL_PATH)
+# 🔹 Ensure model is in evaluation mode
 model.eval()
+# OCR Functions (No changes here)
 def ocr_with_paddle(img):
     ocr = PaddleOCR(lang='en', use_angle_cls=True)
     result = ocr.ocr(img)
     results = reader.readtext(gray_image, detail=0)
     return ' '.join(results)
+# OCR & Classification Function
+def generate_ocr(method, img):
     if img is None:
+        raise gr.Error("Please upload an image!")
     # Convert PIL Image to OpenCV format
     img = np.array(img)
     else:  # KerasOCR
         text_output = ocr_with_keras(img)
+    # 🔹 Preprocess text properly
     text_output = text_output.strip()
     if len(text_output) == 0:
+        return "No text detected!", "Cannot classify"
+    # 🔹 Tokenize text
     inputs = tokenizer(text_output, return_tensors="pt", truncation=True, padding=True, max_length=512)
+    # 🔹 Perform inference
     with torch.no_grad():
         outputs = model(**inputs)
+        probs = F.softmax(outputs.logits, dim=1)  # Convert logits to probabilities
+        spam_prob = probs[0][1].item()  # Probability of Spam
+    # 🔹 Adjust classification based on threshold (better than argmax)
+    label = "Spam" if spam_prob > 0.5 else "Not Spam"
+    # 🔹 Save results using external function
     save_results_to_repo(text_output, label)
     return text_output, label
 # Gradio Interface
+image_input = gr.Image()
+method_input = gr.Radio(["PaddleOCR", "EasyOCR", "KerasOCR"], value="PaddleOCR")
+output_text = gr.Textbox(label="Extracted Text")
+output_label = gr.Textbox(label="Spam Classification")
+demo = gr.Interface(
+    generate_ocr,
+    inputs=[method_input, image_input],
+    outputs=[output_text, output_label],
+    title="OCR Spam Classifier",
+    description="Upload an image, extract text, and classify it as Spam or Not Spam.",
+    theme="compact",
+)
 # Launch App
 if __name__ == "__main__":