winamnd commited on
Commit
19736cf
·
verified ·
1 Parent(s): 8a8eafe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -126
app.py CHANGED
@@ -1,144 +1,120 @@
 
 
 
 
 
1
  import os
 
2
  import numpy as np
3
- import json
4
- import shutil
5
- import requests
6
- import re as r
7
- from urllib.request import urlopen
8
  from datetime import datetime
9
- from datasets import Image
 
 
 
10
  from PIL import Image
11
- from huggingface_hub import Repository, upload_file
12
-
13
- HF_TOKEN = os.environ.get("HF_TOKEN")
14
- DATASET_NAME = "OCR-img-to-text"
15
- DATASET_REPO_URL = "https://huggingface.co/datasets/pragnakalp/OCR-img-to-text"
16
- DATA_FILENAME = "ocr_data.csv"
17
- DATA_FILE = os.path.join("ocr_data", DATA_FILENAME)
18
- DATASET_REPO_ID = "pragnakalp/OCR-img-to-text"
19
- print("is none?", HF_TOKEN is None)
20
- REPOSITORY_DIR = "data"
21
- LOCAL_DIR = 'data_local'
22
- os.makedirs(LOCAL_DIR,exist_ok=True)
23
-
24
- try:
25
- hf_hub_download(
26
- repo_id=DATASET_REPO_ID,
27
- filename=DATA_FILENAME,
28
- cache_dir=DATA_DIRNAME,
29
- force_filename=DATA_FILENAME
30
- )
31
-
32
- except:
33
- print("file not found")
34
 
35
- try:
36
- repo = Repository(local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
37
- repo.git_pull()
38
- except Exception as e:
39
- print("Error occurred during git pull:", e)
40
 
41
- # repo = Repository(local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
42
- # repo.git_pull()
 
 
 
 
 
 
 
 
 
 
43
 
44
- def getIP():
45
- ip_address = ''
46
- try:
47
- d = str(urlopen('http://checkip.dyndns.com/')
48
- .read())
49
-
50
- return r.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(d).group(1)
51
- except Exception as e:
52
- print("Error while getting IP address -->",e)
53
- return ip_address
54
 
55
- def get_location(ip_addr):
56
- location = {}
57
- try:
58
- ip=ip_addr
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- req_data={
61
- "ip":ip,
62
- "token":"pkml123"
63
- }
64
- url = "https://demos.pragnakalp.com/get-ip-location"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- # req_data=json.dumps(req_data)
67
- # print("req_data",req_data)
68
- headers = {'Content-Type': 'application/json'}
 
69
 
70
- response = requests.request("POST", url, headers=headers, data=json.dumps(req_data))
71
- response = response.json()
72
- print("response======>>",response)
73
- return response
74
- except Exception as e:
75
- print("Error while getting location -->",e)
76
- return location
77
 
78
  """
79
- Save generated details
80
  """
81
- def dump_json(thing,file):
82
- with open(file,'w+',encoding="utf8") as f:
83
- json.dump(thing,f)
84
 
85
- def flag(Method,text_output,input_image):
86
-
87
- print("saving data------------------------")
88
- # try:
89
- adversarial_number = 0
90
- adversarial_number = 0 if None else adversarial_number
91
 
92
- ip_address= getIP()
93
- print("ip_address :",ip_address)
94
- location = get_location(ip_address)
95
- print("location :",location)
 
 
 
 
 
96
 
97
- metadata_name = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
98
- SAVE_FILE_DIR = os.path.join(LOCAL_DIR,metadata_name)
99
- os.makedirs(SAVE_FILE_DIR,exist_ok=True)
100
- image_output_filename = os.path.join(SAVE_FILE_DIR,'image.png')
101
- print("image_output_filename :",image_output_filename)
102
- print(input_image)
103
- try:
104
- Image.fromarray(input_image).save(image_output_filename)
105
- # input_image.save(image_output_filename)
106
- except Exception:
107
- raise Exception(f"Had issues saving np array image to file")
108
 
109
- # Write metadata.json to file
110
- json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl')
111
- metadata= {'id':metadata_name,'method':Method,'file_name':'image.png',
112
- 'generated_text':text_output,'ip':ip_address, 'location':location
113
- }
114
-
115
- dump_json(metadata,json_file_path)
116
-
117
- # Simply upload the image file and metadata using the hub's upload_file
118
- # Upload the image
119
- repo_image_path = os.path.join(REPOSITORY_DIR,os.path.join(metadata_name,'image.png'))
120
-
121
- _ = upload_file(path_or_fileobj = image_output_filename,
122
- path_in_repo =repo_image_path,
123
- repo_id=DATASET_REPO_ID,
124
- repo_type='dataset',
125
- token=HF_TOKEN
126
- )
127
-
128
- # Upload the metadata
129
- repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join(metadata_name,'metadata.jsonl'))
130
- _ = upload_file(path_or_fileobj = json_file_path,
131
- path_in_repo =repo_json_path,
132
- repo_id= DATASET_REPO_ID,
133
- repo_type='dataset',
134
- token=HF_TOKEN
135
- )
136
- adversarial_number+=1
137
- repo.git_pull()
138
-
139
- url = 'http://pragnakalpdev35.pythonanywhere.com/HF_space_image_to_text'
140
- myobj = {'Method': Method,'text_output':text_output,'img':input_image.tolist(),'ip_address':ip_address, 'loc':location}
141
- x = requests.post(url, json = myobj)
142
- print("mail status code",x.status_code)
143
-
144
- return "*****Logs save successfully!!!!"
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ import keras_ocr
4
+ import requests
5
+ import cv2
6
  import os
7
+ import csv
8
  import numpy as np
9
+ import pandas as pd
10
+ import huggingface_hub
11
+ from huggingface_hub import Repository
 
 
12
  from datetime import datetime
13
+ import scipy.ndimage.interpolation as inter
14
+ import easyocr
15
+ import datasets
16
+ from datasets import load_dataset, Image
17
  from PIL import Image
18
+ from paddleocr import PaddleOCR
19
+ from save_data import flag
20
+
21
+ """
22
+ Paddle OCR
23
+ """
24
+ def ocr_with_paddle(img):
25
+ finaltext = ''
26
+ ocr = PaddleOCR(lang='en', use_angle_cls=True)
27
+ # img_path = 'exp.jpeg'
28
+ result = ocr.ocr(img)
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ for i in range(len(result[0])):
31
+ text = result[0][i][1][0]
32
+ finaltext += ' '+ text
33
+ return finaltext
 
34
 
35
+ """
36
+ Keras OCR
37
+ """
38
+ def ocr_with_keras(img):
39
+ output_text = ''
40
+ pipeline=keras_ocr.pipeline.Pipeline()
41
+ images=[keras_ocr.tools.read(img)]
42
+ predictions=pipeline.recognize(images)
43
+ first=predictions[0]
44
+ for text,box in first:
45
+ output_text += ' '+ text
46
+ return output_text
47
 
48
+ """
49
+ easy OCR
50
+ """
51
+ # gray scale image
52
+ def get_grayscale(image):
53
+ return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
 
 
 
54
 
55
+ # Thresholding or Binarization
56
+ def thresholding(src):
57
+ return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1]
58
+ def ocr_with_easy(img):
59
+ gray_scale_image=get_grayscale(img)
60
+ thresholding(gray_scale_image)
61
+ cv2.imwrite('image.png',gray_scale_image)
62
+ reader = easyocr.Reader(['th','en'])
63
+ bounds = reader.readtext('image.png',paragraph="False",detail = 0)
64
+ bounds = ''.join(bounds)
65
+ return bounds
66
+
67
+ """
68
+ Generate OCR
69
+ """
70
+ def generate_ocr(Method,img):
71
 
72
+ text_output = ''
73
+ if (img).any():
74
+ add_csv = []
75
+ image_id = 1
76
+ print("Method___________________",Method)
77
+ if Method == 'EasyOCR':
78
+ text_output = ocr_with_easy(img)
79
+ if Method == 'KerasOCR':
80
+ text_output = ocr_with_keras(img)
81
+ if Method == 'PaddleOCR':
82
+ text_output = ocr_with_paddle(img)
83
+
84
+ try:
85
+ flag(Method,text_output,img)
86
+ except Exception as e:
87
+ print(e)
88
+ return text_output
89
+ else:
90
+ raise gr.Error("Please upload an image!!!!")
91
 
92
+ # except Exception as e:
93
+ # print("Error in ocr generation ==>",e)
94
+ # text_output = "Something went wrong"
95
+ # return text_output
96
 
 
 
 
 
 
 
 
97
 
98
  """
99
+ Create user interface for OCR demo
100
  """
 
 
 
101
 
102
+ # image = gr.Image(shape=(300, 300))
103
+ image = gr.Image()
104
+ method = gr.Radio(["PaddleOCR","EasyOCR", "KerasOCR"],value="PaddleOCR")
105
+ output = gr.Textbox(label="Output")
 
 
106
 
107
+ demo = gr.Interface(
108
+ generate_ocr,
109
+ [method,image],
110
+ output,
111
+ title="Optical Character Recognition",
112
+ css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
113
+ article = """<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at
114
+ <a href="mailto:[email protected]" target="_blank">[email protected]</a>
115
+ <p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com" target="_blank">Pragnakalp Techlabs</a></p>"""
116
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ )
119
+ # demo.launch(enable_queue = False)
120
+ demo.launch()