File size: 626 Bytes
07de8c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import streamlit as st
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.schema import Document

# Function to format document content
def format_doc(doc: Document) -> str:
    return f"Document_Title: {doc.metadata.get('title', 'Unknown')}\nPage: {doc.metadata.get('page', 'Unknown')}\nContent: {doc.page_content}"

# Function to load and process document
def load_document(uploaded_file):
    file_path = f"/tmp/{uploaded_file.name}"  
    with open(file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    loader = PyMuPDFLoader(file_path)
    docs = loader.load()
    return docs