omkar56 commited on
Commit
47b56d3
·
1 Parent(s): b6eb6b5

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +95 -68
main.py CHANGED
@@ -1,78 +1,105 @@
1
- import os
2
- # os.system("apt-get install tesseract-ocr")
3
- from fastapi import FastAPI, File, Request, UploadFile, Body, Depends, HTTPException
4
- from fastapi.security.api_key import APIKeyHeader
5
- from typing import Optional, Annotated
6
- from fastapi.encoders import jsonable_encoder
7
- from PIL import Image
8
- import io
9
- import cv2
10
- import numpy as np
11
- import pytesseract
12
- from nltk.tokenize import sent_tokenize
13
- from transformers import MarianMTModel, MarianTokenizer
14
 
15
- API_KEY = os.environ.get("API_KEY")
16
 
17
- app = FastAPI()
18
- api_key_header = APIKeyHeader(name="api_key", auto_error=False)
 
 
 
 
 
19
 
20
- def get_api_key(api_key: Optional[str] = Depends(api_key_header)):
21
- if api_key is None or api_key != API_KEY:
22
- raise HTTPException(status_code=401, detail="Unauthorized access")
23
- return api_key
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- @app.post("/api/ocr", response_model=dict)
26
- async def ocr(
27
- api_key: str = Depends(get_api_key),
28
- image: UploadFile = File(...),
29
- # languages: list = Body(["eng"])
30
- ):
31
- try:
32
- print("[1]",os.popen(f'cat /etc/debian_version').read())
33
- print("[2]",os.popen(f'cat /etc/issue').read())
34
- print("[3]",os.popen(f'apt search tesseract').read())
35
- # content = await image.read()
36
- # image = Image.open(BytesIO(content))
37
- image_stream = io.BytesIO(image)
38
- image_stream.seek(0)
39
- file_bytes = np.asarray(bytearray(image_stream.read()), dtype=np.uint8)
40
- frame = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
41
- # label = read_img(frame)
42
- print("[image]",frame)
43
- if hasattr(pytesseract, "image_to_string"):
44
- print("Image to string function is available")
45
- else:
46
- print("Image to string function is not available")
47
- # text = pytesseract.image_to_string(image, lang="+".join(languages))
48
- # text = pytesseract.image_to_string(image, lang = 'eng')
49
- except Exception as e:
50
- return {"error": str(e)}, 500
51
 
52
- # return jsonable_encoder({"text": text})
53
- return {"ImageText": "text"}
 
 
 
 
 
 
 
54
 
55
- @app.post("/api/translate", response_model=dict)
56
- async def translate(
57
- api_key: str = Depends(get_api_key),
58
- text: str = Body(...),
59
- src: str = "en",
60
- trg: str = "zh",
61
- ):
62
- if api_key != API_KEY:
63
- return {"error": "Invalid API key"}, 401
64
 
65
- tokenizer, model = get_model(src, trg)
 
 
 
66
 
67
- translated_text = ""
68
- for sentence in sent_tokenize(text):
69
- translated_sub = model.generate(**tokenizer(sentence, return_tensors="pt"))[0]
70
- translated_text += tokenizer.decode(translated_sub, skip_special_tokens=True) + "\n"
71
 
72
- return jsonable_encoder({"translated_text": translated_text})
 
 
 
 
73
 
74
- def get_model(src: str, trg: str):
75
- model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
76
- tokenizer = MarianTokenizer.from_pretrained(model_name)
77
- model = MarianMTModel.from_pretrained(model_name)
78
- return tokenizer, model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import os
2
+ # from fastapi import FastAPI, File, Request, UploadFile, Body, Depends, HTTPException
3
+ # from fastapi.security.api_key import APIKeyHeader
4
+ # from typing import Optional, Annotated
5
+ # from fastapi.encoders import jsonable_encoder
6
+ # from PIL import Image
7
+ # import io
8
+ # import cv2
9
+ # import numpy as np
10
+ # import pytesseract
11
+ # from nltk.tokenize import sent_tokenize
12
+ # from transformers import MarianMTModel, MarianTokenizer
 
13
 
14
+ # API_KEY = os.environ.get("API_KEY")
15
 
16
+ # app = FastAPI()
17
+ # api_key_header = APIKeyHeader(name="api_key", auto_error=False)
18
+
19
+ # def get_api_key(api_key: Optional[str] = Depends(api_key_header)):
20
+ # if api_key is None or api_key != API_KEY:
21
+ # raise HTTPException(status_code=401, detail="Unauthorized access")
22
+ # return api_key
23
 
24
+ # @app.post("/api/ocr", response_model=dict)
25
+ # async def ocr(
26
+ # api_key: str = Depends(get_api_key),
27
+ # image: UploadFile = File(...),
28
+ # # languages: list = Body(["eng"])
29
+ # ):
30
+ # try:
31
+ # print("[1]",os.popen(f'cat /etc/debian_version').read())
32
+ # print("[2]",os.popen(f'cat /etc/issue').read())
33
+ # print("[3]",os.popen(f'apt search tesseract').read())
34
+ # # content = await image.read()
35
+ # # image = Image.open(BytesIO(content))
36
+ # image_stream = io.BytesIO(image)
37
+ # image_stream.seek(0)
38
+ # file_bytes = np.asarray(bytearray(image_stream.read()), dtype=np.uint8)
39
+ # frame = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
40
+ # # label = read_img(frame)
41
+ # print("[image]",frame)
42
+ # if hasattr(pytesseract, "image_to_string"):
43
+ # print("Image to string function is available")
44
+ # else:
45
+ # print("Image to string function is not available")
46
+ # # text = pytesseract.image_to_string(image, lang="+".join(languages))
47
+ # # text = pytesseract.image_to_string(image, lang = 'eng')
48
+ # except Exception as e:
49
+ # return {"error": str(e)}, 500
50
 
51
+ # # return jsonable_encoder({"text": text})
52
+ # return {"ImageText": "text"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ # @app.post("/api/translate", response_model=dict)
55
+ # async def translate(
56
+ # api_key: str = Depends(get_api_key),
57
+ # text: str = Body(...),
58
+ # src: str = "en",
59
+ # trg: str = "zh",
60
+ # ):
61
+ # if api_key != API_KEY:
62
+ # return {"error": "Invalid API key"}, 401
63
 
64
+ # tokenizer, model = get_model(src, trg)
 
 
 
 
 
 
 
 
65
 
66
+ # translated_text = ""
67
+ # for sentence in sent_tokenize(text):
68
+ # translated_sub = model.generate(**tokenizer(sentence, return_tensors="pt"))[0]
69
+ # translated_text += tokenizer.decode(translated_sub, skip_special_tokens=True) + "\n"
70
 
71
+ # return jsonable_encoder({"translated_text": translated_text})
 
 
 
72
 
73
+ # def get_model(src: str, trg: str):
74
+ # model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
75
+ # tokenizer = MarianTokenizer.from_pretrained(model_name)
76
+ # model = MarianMTModel.from_pretrained(model_name)
77
+ # return tokenizer, model
78
 
79
+
80
+ import numpy as np
81
+ import sys, os
82
+ from fastapi import FastAPI, UploadFile, File
83
+ from starlette.requests import Request
84
+ import io
85
+ import cv2
86
+ import pytesseract
87
+ from pydantic import BaseModel
88
+ def read_img(img):
89
+ text = pytesseract.image_to_string(img)
90
+ return(text)
91
+
92
+ app = FastAPI()
93
+ class ImageType(BaseModel):
94
+ url: str
95
+ @app.post(“/api/ocr”)
96
+ def ocr(request: Request,
97
+ file: bytes = File(…)):
98
+ if request.method == “POST”:
99
+ image_stream = io.BytesIO(file)
100
+ image_stream.seek(0)
101
+ file_bytes = np.asarray(bytearray(image_stream.read()), dtype=np.uint8)
102
+ frame = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
103
+ label = read_img(frame)
104
+ return label
105
+ return “No post request found”