Spaces:

Omarrran
/

Analyse_Proposal

Sleeping

App Files Files Community

Omarrran commited on 23 days ago

Commit

e657d8c

verified ·

1 Parent(s): 6f14fd9

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -65

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ import gradio as gr
 import google.generativeai as genai
 from PyPDF2 import PdfReader
 from tika import parser
-from unstructured.partition.pdf import partition_pdf
 # Configure logging
 tmp_log = "pdf_processor_log.txt"
@@ -22,6 +21,14 @@ logging.basicConfig(
 )
 logger = logging.getLogger("pdf_processor")
 # Load API key from environment
 API_KEY = os.getenv("GOOGLE_API_KEY", None)
 if not API_KEY:
@@ -116,70 +123,13 @@ def process_pdf(pdf_file, progress=gr.Progress()):
     tmp = tempfile.gettempdir()
     path = os.path.join(tmp, pdf_file.name)
     with open(path, 'wb') as f: f.write(pdf_file.read())
-    methods = [("unstructured", extract_text_with_unstructured),
-               ("pypdf", extract_text_with_pypdf),
-               ("tika", extract_text_with_tika)]
-    for name, fn in methods:
-        try:
-            secs = fn(path)
-            if secs:
-                EXTRACTION_METHOD = name
-                PDF_SECTIONS = secs
-                break
-        except:
-            continue
-    if not PDF_SECTIONS:
-        return None, None, "❌ Extraction failed.", ""
-    combined, struct = "", ""
-    for i,sec in enumerate(PDF_SECTIONS,1):
-        struct += f"{i}. {sec['title']}\n"
-        block = f"## {sec['title']}\n{sec['content']}\n\n"
-        combined += block if len(combined+block)<30000 else f"## {sec['title']}\n[Truncated]\n\n"
-    EXTRACTED_TEXT = combined
-    summary, err = generate_greg_brockman_summary(combined)
-    if err:
-        return None, struct, f"❌ {err}", combined
-    return summary, struct, "✅ Done", f"Used {EXTRACTION_METHOD}, {len(PDF_SECTIONS)} sections"
-def ask_question(question):
-    if not API_KEY: return "❌ Set GOOGLE_API_KEY."
-    if not EXTRACTED_TEXT: return "❌ Process a PDF first."
-    if not question.strip(): return "❌ Enter a question."
-    ans, err = answer_question_about_pdf(EXTRACTED_TEXT, question)
-    return ans if not err else f"❌ {err}"
-def view_log():
-    try:
-        return open(tmp_log).read()
-    except:
-        return "Error reading log."
-def save_summary(summary):
-    if not summary: return "❌ No summary."
-    fn = f"summary_{datetime.now():%Y%m%d_%H%M%S}.txt"
-    open(fn, 'w', encoding='utf-8').write(summary)
-    return f"✅ Saved to {fn}"
-def save_qa(question, answer):
-    if not question or not answer: return "❌ Incomplete Q&A."
-    fn = f"qa_{datetime.now():%Y%m%d_%H%M%S}.txt"
-    with open(fn,'w',encoding='utf-8') as f:
-        f.write(f"Q: {question}\n\nA: {answer}")
-    return f"✅ Saved to {fn}"
-# --- Gradio UI ---
-with gr.Blocks(title="PDF Analyzer with Gemini API") as app:
-    gr.Markdown("# 📄 PDF Analyzer with Gemini API")
-    gr.Markdown("Upload a PDF, get a summary, ask questions.")
-    with gr.Tab("PDF Processing"):
-        pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"], type="binary")
-        process_btn = gr.Button("Process PDF")
-        summary_out = gr.Textbox(label="Summary", lines=15)
-        struct_out = gr.Textbox(label="Structure", lines=8)
-        status = gr.Markdown("")
-        log_out = gr.Textbox(label="Log", lines=8)
-        process_btn.click(process_pdf, inputs=[pdf_file],
-                          outputs=[summary_out, struct_out, status, log_out])
     with gr.Tab("Ask Questions"):
         question = gr.Textbox(label="Question", lines=2)
         ask_btn = gr.Button("Ask")

 import google.generativeai as genai
 from PyPDF2 import PdfReader
 from tika import parser
 # Configure logging
 tmp_log = "pdf_processor_log.txt"
 )
 logger = logging.getLogger("pdf_processor")
+# Attempt to import Unstructured.io partitioning
+try:
+    from unstructured.partition.pdf import partition_pdf
+    UNSTRUCTURED_AVAILABLE = True
+except ImportError:
+    UNSTRUCTURED_AVAILABLE = False
+    logger.warning("unstructured.partition.pdf not available; skipping that extraction method")
 # Load API key from environment
 API_KEY = os.getenv("GOOGLE_API_KEY", None)
 if not API_KEY:
     tmp = tempfile.gettempdir()
     path = os.path.join(tmp, pdf_file.name)
     with open(path, 'wb') as f: f.write(pdf_file.read())
+    methods = []
+if UNSTRUCTURED_AVAILABLE:
+    methods.append(("unstructured", extract_text_with_unstructured))
+methods.extend([
+    ("pypdf", extract_text_with_pypdf),
+    ("tika", extract_text_with_tika)
+])
     with gr.Tab("Ask Questions"):
         question = gr.Textbox(label="Question", lines=2)
         ask_btn = gr.Button("Ask")