Spaces:

Manishkumaryadav
/

smart-document-explorer

Runtime error

App Files Files Community

Manishkumaryadav commited on Feb 22

Commit

90462dd

verified ·

1 Parent(s): ab28335

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -19

app.py CHANGED Viewed

@@ -1,57 +1,157 @@
 import gradio as gr
 import pdfplumber
 import pytesseract
 from PIL import Image
 from transformers import pipeline
 from sentence_transformers import SentenceTransformer, util
-# Load Hugging Face models
 qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-# Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
     text = ""
     with pdfplumber.open(pdf_file) as pdf:
         for page in pdf.pages:
             text += page.extract_text() + "\n"
-    return text
-# Function to extract text from image using OCR
 def extract_text_from_image(image_file):
     image = Image.open(image_file)
-    return pytesseract.image_to_string(image)
-# Function to process document and answer questions
 def document_processor(uploaded_file, query):
     text = ""
     if uploaded_file.name.endswith(".pdf"):
         text = extract_text_from_pdf(uploaded_file.name)
     elif uploaded_file.name.endswith((".png", ".jpg", ".jpeg")):
         text = extract_text_from_image(uploaded_file.name)
     else:
-        text = uploaded_file.read().decode("utf-8")
     if query.lower() == "summarize":
-        summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
-        return summary[0]["summary_text"]
-    # Find the best-matching answer
-    answer = qa_pipeline(question=query, context=text)
-    return answer["answer"]
 # Gradio UI
 with gr.Blocks() as app:
-    gr.Markdown("# 📄 Smart Document Explorer")
     with gr.Row():
-        uploaded_file = gr.File(label="Upload Document (PDF, Image, or Text)")
-        query = gr.Textbox(label="Ask a question (or type 'summarize')", placeholder="What is this document about?")
-    output_text = gr.Textbox(label="AI Response")
-    submit_btn = gr.Button("Process Document")
-    submit_btn.click(document_processor, inputs=[uploaded_file, query], outputs=output_text)
 app.launch()

 import gradio as gr
 import pdfplumber
 import pytesseract
+import faiss
+import nltk
+import spacy
+import re
+import numpy as np
+import os
+import speech_recognition as sr
+from gtts import gTTS
+from nltk.corpus import stopwords
 from PIL import Image
 from transformers import pipeline
 from sentence_transformers import SentenceTransformer, util
+# Download stopwords and load NLP tools
+nltk.download("stopwords")
+nlp = spacy.load("en_core_web_sm")
+stop_words = set(stopwords.words("english"))
+# Load AI models from Hugging Face
 qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
+summarizer = pipeline("summarization", model="t5-small")
 embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+# FAISS index for fast search
+dimension = 384  # Embedding size
+index = faiss.IndexFlatL2(dimension)
+# Dummy database of documents (for recommendations)
+document_database = {
+    "Machine Learning Basics": "Introduction to ML, Supervised vs Unsupervised, Algorithms",
+    "Deep Learning Advanced": "Neural Networks, CNN, RNN, Transformers",
+    "Data Science Fundamentals": "Data Preprocessing, Feature Engineering, Statistics",
+    "AI in Healthcare": "Medical Image Analysis, AI in Diagnosis, Predictive Analytics",
+    "Blockchain Technology": "Decentralized Networks, Smart Contracts, Cryptography"
+}
+# Function to recommend relevant documents
+def recommend_documents(query):
+    query_embedding = embedder.encode(query, convert_to_tensor=True)
+    doc_embeddings = embedder.encode(list(document_database.values()), convert_to_tensor=True)
+    scores = util.pytorch_cos_sim(query_embedding, doc_embeddings).cpu().numpy()
+    top_indices = np.argsort(scores[0])[-3:][::-1]  # Top 3 recommendations
+    recommended_docs = [list(document_database.keys())[i] for i in top_indices]
+    return recommended_docs
+# Function to preprocess text
+def preprocess_text(text):
+    text = text.lower()
+    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove special characters
+    text = " ".join([word for word in text.split() if word not in stop_words])  # Remove stopwords
+    return text
+# Extract text from PDF
 def extract_text_from_pdf(pdf_file):
     text = ""
     with pdfplumber.open(pdf_file) as pdf:
         for page in pdf.pages:
             text += page.extract_text() + "\n"
+    return preprocess_text(text)
+# Extract text from image using OCR
 def extract_text_from_image(image_file):
     image = Image.open(image_file)
+    return preprocess_text(pytesseract.image_to_string(image))
+# Convert speech to text
+def voice_to_text(audio_file):
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_file) as source:
+        audio = recognizer.record(source)
+    try:
+        return recognizer.recognize_google(audio)
+    except sr.UnknownValueError:
+        return "Could not understand the audio."
+    except sr.RequestError:
+        return "Speech recognition service unavailable."
+# Convert text to speech
+def text_to_speech(answer_text):
+    tts = gTTS(text=answer_text, lang="en")
+    tts.save("response.mp3")
+    return "response.mp3"
+# Process document and answer questions
 def document_processor(uploaded_file, query):
     text = ""
+    # File type handling
     if uploaded_file.name.endswith(".pdf"):
         text = extract_text_from_pdf(uploaded_file.name)
     elif uploaded_file.name.endswith((".png", ".jpg", ".jpeg")):
         text = extract_text_from_image(uploaded_file.name)
     else:
+        text = preprocess_text(uploaded_file.read().decode("utf-8"))
+    # If user asks for a summary
     if query.lower() == "summarize":
+        summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
+        return summary[0]["summary_text"], text_to_speech(summary[0]["summary_text"]), recommend_documents(summary[0]["summary_text"])
+    # Multi-question processing
+    queries = [q.strip() for q in query.split(";")]
+    responses = {}
+    for q in queries:
+        # Sentence embeddings for better accuracy
+        sentences = text.split(". ")
+        sentence_embeddings = embedder.encode(sentences, convert_to_tensor=True)
+        query_embedding = embedder.encode(q, convert_to_tensor=True)
+        # Find most relevant sentence
+        scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)
+        best_sentence = sentences[np.argmax(scores.cpu().numpy())]
+        # Generate answer
+        answer = qa_pipeline(question=q, context=best_sentence)
+        responses[q] = answer["answer"]
+    # Convert answer to speech
+    combined_answers = " ".join(responses.values())
+    speech_output = text_to_speech(combined_answers)
+    return responses, speech_output, recommend_documents(query)
 # Gradio UI
 with gr.Blocks() as app:
+    gr.Markdown("# 📄 Smart Document Explorer 🚀")
+    with gr.Row():
+        uploaded_file = gr.File(label="📂 Upload Document (PDF, Image, or Text)")
     with gr.Row():
+        query = gr.Textbox(label="💬 Ask Questions (Separate with ';') or Type 'summarize'", placeholder="e.g. What is the topic?; Who wrote it?")
+    with gr.Row():
+        voice_input = gr.Audio(label="🎤 Speak Your Query", type="filepath")
+        voice_btn = gr.Button("🎙️ Convert Speech to Text")
+    with gr.Row():
+        output_text = gr.JSON(label="🧠 AI Response")
+        output_audio = gr.Audio(label="🔊 AI Voice Answer", type="filepath")
+    with gr.Row():
+        recommendations = gr.JSON(label="📌 Recommended Topics")
+    submit_btn = gr.Button("🚀 Process Document")
+    # Button Actions
+    voice_btn.click(voice_to_text, inputs=voice_input, outputs=query)
+    submit_btn.click(document_processor, inputs=[uploaded_file, query], outputs=[output_text, output_audio, recommendations])
 app.launch()