import streamlit as st from langchain_community.document_loaders import PyMuPDFLoader from langchain.schema import Document # Function to format document content def format_doc(doc: Document) -> str: return f"Document_Title: {doc.metadata.get('title', 'Unknown')}\nPage: {doc.metadata.get('page', 'Unknown')}\nContent: {doc.page_content}" # Function to load and process document def load_document(uploaded_file): file_path = f"/tmp/{uploaded_file.name}" with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) loader = PyMuPDFLoader(file_path) docs = loader.load() return docs