omkar56 commited on
Commit
eebad8b
·
1 Parent(s): 2d14bdf

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +28 -35
main.py CHANGED
@@ -1,17 +1,13 @@
1
  import os
2
-
3
- # os.system("sudo apt-get install xclip")
4
-
5
- import nltk
6
- import pyclip
7
- import pytesseract
8
- from nltk.tokenize import sent_tokenize
9
- from transformers import MarianMTModel, MarianTokenizer
10
- # Newly added below
11
  from fastapi import FastAPI, File, UploadFile, Body, Depends, HTTPException
12
  from fastapi.security.api_key import APIKeyHeader
13
  from typing import Optional
14
  from fastapi.encoders import jsonable_encoder
 
 
 
 
 
15
 
16
  API_KEY = os.environ.get("API_KEY")
17
 
@@ -29,39 +25,36 @@ async def ocr(
29
  image: UploadFile = File(...),
30
  languages: list = Body(["eng"])
31
  ):
32
- # if api_key != API_KEY:
33
- # return {"error": "Invalid API key"}, 401
34
-
35
  try:
36
- text = image_to_string(await image.read(), lang="+".join(languages))
 
 
37
  except Exception as e:
38
  return {"error": str(e)}, 500
39
 
40
  return jsonable_encoder({"text": text})
41
 
 
 
 
 
 
 
 
 
 
42
 
43
- #@app.post("/api/translate", response_model=dict)
44
- #async def translate(
45
- #api_key: str = Depends(get_api_key),
46
- #text: str = Body(...),
47
- #src: str = "en",
48
- #trg: str = "zh",
49
- #):
50
- # if api_key != API_KEY:
51
- # return {"error": "Invalid API key"}, 401
52
-
53
- # tokenizer, model = get_model(src, trg)
54
-
55
- # translated_text = ""
56
- # for sentence in sent_tokenize(text):
57
- # translated_sub = model.generate(**tokenizer(sentence, return_tensors="pt"))[0]
58
- # translated_text += tokenizer.decode(translated_sub, skip_special_tokens=True) + "\n"
59
 
60
- # return jsonable_encoder({"translated_text": translated_text})
 
 
 
61
 
 
62
 
63
- #def get_model(src: str, trg: str):
64
- # model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
65
- #tokenizer = MarianTokenizer.from_pretrained(model_name)
66
- #model = MarianMTModel.from_pretrained(model_name)
67
- #return tokenizer, model
 
1
  import os
 
 
 
 
 
 
 
 
 
2
  from fastapi import FastAPI, File, UploadFile, Body, Depends, HTTPException
3
  from fastapi.security.api_key import APIKeyHeader
4
  from typing import Optional
5
  from fastapi.encoders import jsonable_encoder
6
+ from PIL import Image
7
+ from io import BytesIO
8
+ import pytesseract
9
+ from nltk.tokenize import sent_tokenize
10
+ from transformers import MarianMTModel, MarianTokenizer
11
 
12
  API_KEY = os.environ.get("API_KEY")
13
 
 
25
  image: UploadFile = File(...),
26
  languages: list = Body(["eng"])
27
  ):
 
 
 
28
  try:
29
+ content = await image.read()
30
+ image = Image.open(BytesIO(content))
31
+ text = pytesseract.image_to_string(image, lang="+".join(languages))
32
  except Exception as e:
33
  return {"error": str(e)}, 500
34
 
35
  return jsonable_encoder({"text": text})
36
 
37
+ @app.post("/api/translate", response_model=dict)
38
+ async def translate(
39
+ api_key: str = Depends(get_api_key),
40
+ text: str = Body(...),
41
+ src: str = "en",
42
+ trg: str = "zh",
43
+ ):
44
+ if api_key != API_KEY:
45
+ return {"error": "Invalid API key"}, 401
46
 
47
+ tokenizer, model = get_model(src, trg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ translated_text = ""
50
+ for sentence in sent_tokenize(text):
51
+ translated_sub = model.generate(**tokenizer(sentence, return_tensors="pt"))[0]
52
+ translated_text += tokenizer.decode(translated_sub, skip_special_tokens=True) + "\n"
53
 
54
+ return jsonable_encoder({"translated_text": translated_text})
55
 
56
+ def get_model(src: str, trg: str):
57
+ model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
58
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
59
+ model = MarianMTModel.from_pretrained(model_name)
60
+ return tokenizer, model