File size: 1,203 Bytes
0545b56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eff9d98
 
 
 
 
 
 
 
 
 
0545b56
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from pathlib import Path
from typing import Union

from pypdf import PdfReader
from transformers import pipeline
import gradio as gr


question_answerer = pipeline(task="question-answering", model="deepset/tinyroberta-squad2")


def get_text_from_pdf(pdf_file: Union[str, Path]) -> str:
    """Read the PDF from the given path and return a string with its entire content."""
    reader = PdfReader(pdf_file)

    # Extract text from all pages
    full_text = ""
    for page in reader.pages:
        full_text += page.extract_text()
    return full_text


def answer_doc_question(pdf_file, question):
    pdf_text = get_text_from_pdf(pdf_file)
    answer = question_answerer(question, pdf_text)
    return answer["answer"]


# Add default a file and question, so it's easy to try out the app.
pdf_input = gr.File(
    value="https://ris.uni-paderborn.de/download/30236/30237/author_version.pdf",
    file_types=[".pdf"],
    label="Upload a PDF document and ask a question about it.",
)
question = gr.Textbox(
    value="What is mobile-env?",
    label="Type a question regarding the uploaded document here.",
)
gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch()