ikraamkb commited on
Commit
4ab997d
Β·
verified Β·
1 Parent(s): 2e00ca7

Upload 4 files

Browse files
qtAnswering/app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### βœ… app.py β€” Document QA Backend (Cleaned)
2
+ from fastapi import FastAPI
3
+ from fastapi.responses import FileResponse, JSONResponse
4
+ import fitz # PyMuPDF
5
+ import easyocr
6
+ import openpyxl
7
+ import pptx
8
+ import docx
9
+ from transformers import pipeline
10
+ from gtts import gTTS
11
+ import tempfile
12
+ import os
13
+
14
+ app = FastAPI()
15
+
16
+ qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
17
+ reader = easyocr.Reader(['en', 'fr'])
18
+
19
+ def extract_text_from_pdf(pdf_file):
20
+ try:
21
+ with fitz.open(pdf_file) as doc:
22
+ return "\n".join(page.get_text("text") for page in doc)
23
+ except Exception as e:
24
+ return f"Error reading PDF: {e}"
25
+
26
+ def extract_text_from_docx(docx_file):
27
+ doc = docx.Document(docx_file)
28
+ return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
29
+
30
+ def extract_text_from_pptx(pptx_file):
31
+ try:
32
+ prs = pptx.Presentation(pptx_file)
33
+ return "\n".join(shape.text for slide in prs.slides for shape in slide.shapes if hasattr(shape, "text"))
34
+ except Exception as e:
35
+ return f"Error reading PPTX: {e}"
36
+
37
+ def extract_text_from_xlsx(xlsx_file):
38
+ try:
39
+ wb = openpyxl.load_workbook(xlsx_file)
40
+ return "\n".join(" ".join(str(cell) for cell in row if cell) for sheet in wb.sheetnames for row in wb[sheet].iter_rows(values_only=True))
41
+ except Exception as e:
42
+ return f"Error reading XLSX: {e}"
43
+
44
+ def answer_question_from_doc(file, question):
45
+ ext = file.filename.split(".")[-1].lower()
46
+ file_path = f"/tmp/{file.filename}"
47
+
48
+ with open(file_path, "wb") as f:
49
+ f.write(file.read())
50
+
51
+ if ext == "pdf":
52
+ context = extract_text_from_pdf(file_path)
53
+ elif ext == "docx":
54
+ context = extract_text_from_docx(file_path)
55
+ elif ext == "pptx":
56
+ context = extract_text_from_pptx(file_path)
57
+ elif ext == "xlsx":
58
+ context = extract_text_from_xlsx(file_path)
59
+ else:
60
+ return "Unsupported file format.", None
61
+
62
+ if not context.strip():
63
+ return "No text found in the document.", None
64
+
65
+ try:
66
+ result = qa_model({"question": question, "context": context})
67
+ answer = result["answer"]
68
+ tts = gTTS(answer)
69
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
70
+ tts.save(tmp.name)
71
+ return answer, tmp.name
72
+ except Exception as e:
73
+ return f"Error generating answer: {e}", None
qtAnswering/appImage.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from fastapi import FastAPI
3
+ from fastapi.responses import RedirectResponse, JSONResponse, FileResponse
4
+ import os
5
+ from PIL import Image
6
+ from transformers import ViltProcessor, ViltForQuestionAnswering, pipeline
7
+ from gtts import gTTS
8
+ import easyocr
9
+ import torch
10
+ import tempfile
11
+ import numpy as np
12
+ from io import BytesIO
13
+
14
+ app = FastAPI()
15
+
16
+ vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
17
+ vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
18
+ captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
19
+ reader = easyocr.Reader(['en', 'fr'])
20
+
21
+ def classify_question(question: str):
22
+ q = question.lower()
23
+ if any(w in q for w in ["text", "say", "written", "read"]):
24
+ return "ocr"
25
+ if any(w in q for w in ["caption", "describe", "what is in the image"]):
26
+ return "caption"
27
+ return "vqa"
28
+
29
+ def answer_question_from_image(image, question):
30
+ if image is None or not question.strip():
31
+ return "Please upload an image and ask a question.", None
32
+
33
+ mode = classify_question(question)
34
+
35
+ try:
36
+ if mode == "ocr":
37
+ result = reader.readtext(np.array(image))
38
+ answer = " ".join([entry[1] for entry in result]) or "No readable text found."
39
+
40
+ elif mode == "caption":
41
+ answer = captioner(image)[0]['generated_text']
42
+
43
+ else:
44
+ inputs = vqa_processor(image, question, return_tensors="pt")
45
+ with torch.no_grad():
46
+ outputs = vqa_model(**inputs)
47
+ predicted_id = outputs.logits.argmax(-1).item()
48
+ answer = vqa_model.config.id2label[predicted_id]
49
+
50
+ tts = gTTS(text=answer)
51
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
52
+ tts.save(tmp.name)
53
+ return answer, tmp.name
54
+
55
+ except Exception as e:
56
+ return f"Error: {e}", None
57
+
58
+ @app.get("/")
59
+ def home():
60
+ return RedirectResponse(url="/templates/home.html")
qtAnswering/main.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, Form, Request
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
4
+ from fastapi.staticfiles import StaticFiles
5
+ from fastapi.templating import Jinja2Templates
6
+ import shutil, os
7
+ from tempfile import gettempdir
8
+
9
+ app = FastAPI()
10
+
11
+ # βœ… CORS to allow frontend access
12
+ app.add_middleware(
13
+ CORSMiddleware,
14
+ allow_origins=["*"],
15
+ allow_credentials=True,
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+
20
+ # βœ… Static assets
21
+ app.mount("/resources", StaticFiles(directory="resources"), name="resources")
22
+ app.mount("/static", StaticFiles(directory="static"), name="static")
23
+
24
+ # βœ… Jinja2 Templates
25
+ templates = Jinja2Templates(directory="templates")
26
+
27
+ # βœ… Serve Homepage
28
+ @app.get("/", response_class=HTMLResponse)
29
+ async def serve_home(request: Request):
30
+ return templates.TemplateResponse("home.html", {"request": request})
31
+
32
+ # βœ… Predict endpoint (handles image + document)
33
+ @app.post("/predict")
34
+ async def predict(question: str = Form(...), file: UploadFile = Form(...)):
35
+ try:
36
+ temp_path = f"temp_{file.filename}"
37
+ with open(temp_path, "wb") as f:
38
+ shutil.copyfileobj(file.file, f)
39
+
40
+ is_image = file.content_type.startswith("image/")
41
+
42
+ if is_image:
43
+ from appImage import answer_question_from_image
44
+ from PIL import Image
45
+ image = Image.open(temp_path).convert("RGB")
46
+ answer, audio_path = answer_question_from_image(image, question)
47
+
48
+ else:
49
+ from app import answer_question_from_doc
50
+ class NamedFile:
51
+ def __init__(self, name): self.filename = name
52
+ def read(self): return open(self.filename, "rb").read()
53
+ answer, audio_path = answer_question_from_doc(NamedFile(temp_path), question)
54
+
55
+ os.remove(temp_path)
56
+
57
+ if audio_path and os.path.exists(audio_path):
58
+ return JSONResponse({
59
+ "answer": answer,
60
+ "audio": f"/audio/{os.path.basename(audio_path)}"
61
+ })
62
+ else:
63
+ return JSONResponse({"answer": answer})
64
+
65
+ except Exception as e:
66
+ return JSONResponse({"error": str(e)}, status_code=500)
67
+
68
+ # βœ… Serve audio
69
+ @app.get("/audio/{filename}")
70
+ async def get_audio(filename: str):
71
+ filepath = os.path.join(gettempdir(), filename)
72
+ return FileResponse(filepath, media_type="audio/mpeg")
qtAnswering/requirements.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ gradio==3.50.2
4
+ pandas
5
+ matplotlib
6
+ seaborn
7
+ transformers
8
+ torch
9
+ pdfplumber
10
+ python-docx
11
+ pydantic<2.0
12
+ tools
13
+ openpyxl
14
+ pytesseract
15
+ deep-translator
16
+ frontend
17
+ pillow
18
+ easyocr
19
+ python-pptx
20
+ pymupdf
21
+ tika
22
+ hf_xet
23
+ gTTS