Spaces:

vharika31
/

wolf

Sleeping

App Files Files Community

wolf / app.py

vharika31

Create app.py

da0b26a verified 9 days ago

raw

history blame contribute delete

4.46 kB

	# ✅ Install dependencies

	# 📚 Imports
	import fitz # PyMuPDF
	import requests
	import gradio as gr
	import tempfile
	import os
	import io

	# 🔑 Enter your OpenRouter API key here
	OPENROUTER_API_KEY = "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"

	# Global variable to store the extracted text
	pdf_text = ""

	# 📄 Extract text from PDF
	def extract_text_from_pdf(file_obj):
	global pdf_text

	if file_obj is None:
	return "Please upload a PDF file first."

	try:
	# Get the file path from the file object
	# In Gradio, the file object has a name attribute that contains the path
	file_path = file_obj.name

	# Now open the file with PyMuPDF
	doc = fitz.open(file_path)
	text = ""
	for page in doc:
	text += page.get_text()
	doc.close()

	# Store the text for later use
	pdf_text = text

	# Return preview of the extracted text
	preview = text[:500] + "..." if len(text) > 500 else text
	return f"✅ PDF uploaded and processed successfully. Preview:\n\n{preview}"

	except Exception as e:
	return f"❌ Error processing PDF: {str(e)}"

	# 💬 Ask the open-source LLM (Mistral-7B via OpenRouter)
	def ask_open_source_llm(question, model_choice="nvidia/llama-3.1-nemotron-nano-8b-v1:free"):
	global pdf_text

	if not pdf_text:
	return "⚠️ Please upload a PDF document first."

	# Limit text to prevent token overflow
	limited_text = pdf_text[:3000] # First 3000 characters

	# Create prompt based on question
	if not question:
	prompt = f"Summarize the following document:\n\n{limited_text}"
	else:
	prompt = f"The document says:\n\n{limited_text}\n\nNow answer this: {question}"

	# Call the API
	url = "https://openrouter.ai/api/v1/chat/completions"
	headers = {
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"Content-Type": "application/json"
	}

	data = {
	"model": model_choice,
	"messages": [{"role": "user", "content": prompt}]
	}

	try:
	response = requests.post(url, headers=headers, json=data)
	if response.status_code == 200:
	return response.json()["choices"][0]["message"]["content"]
	else:
	return f"❌ Error: {response.text}"
	except Exception as e:
	return f"❌ An error occurred: {str(e)}"

	# Gradio app function
	def process_query(pdf_file, question, model_choice):
	# First extract text if a PDF is uploaded
	if pdf_file is not None:
	result = extract_text_from_pdf(pdf_file)
	if result.startswith("❌ Error"):
	return result

	# Then process the question
	if question:
	return ask_open_source_llm(question, model_choice)
	else:
	return ask_open_source_llm("Please summarize this document.", model_choice)

	# Create Gradio interface
	with gr.Blocks(title="PDF Document Analysis") as app:
	gr.Markdown("# 📚 PDF Document Analysis with LLM")
	gr.Markdown("Upload a PDF document and ask questions about its content.")

	with gr.Row():
	with gr.Column(scale=1):
	pdf_input = gr.File(label="Upload PDF Document", file_types=[".pdf"])
	model_choice = gr.Dropdown(
	choices=[
	"nvidia/llama-3.1-nemotron-nano-8b-v1:free",
	"mistralai/mistral-7b-instruct-v0.1:free",
	"meta-llama/llama-2-13b-chat:free"
	],
	label="LLM Model",
	value="nvidia/llama-3.1-nemotron-nano-8b-v1:free"
	)
	question_input = gr.Textbox(label="Ask a question (or leave empty for summary)", lines=2)
	submit_btn = gr.Button("Process", variant="primary")

	with gr.Column(scale=2):
	output = gr.Textbox(label="Response", lines=15)

	# Set up event handlers
	submit_btn.click(
	fn=process_query,
	inputs=[pdf_input, question_input, model_choice],
	outputs=output
	)

	gr.Markdown("### 📝 Notes")
	gr.Markdown("- For large documents, only the first 3000 characters are analyzed")
	gr.Markdown("- You can change the LLM model from the dropdown menu")
	gr.Markdown("- Leave the question field empty to get a general summary")

	# Launch the app
	app.launch(debug=True, share=True)