Manishkumaryadav commited on
Commit
90462dd
Β·
verified Β·
1 Parent(s): ab28335

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -19
app.py CHANGED
@@ -1,57 +1,157 @@
1
  import gradio as gr
2
  import pdfplumber
3
  import pytesseract
 
 
 
 
 
 
 
 
 
4
  from PIL import Image
5
  from transformers import pipeline
6
  from sentence_transformers import SentenceTransformer, util
7
 
8
- # Load Hugging Face models
 
 
 
 
 
9
  qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
10
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
11
  embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
12
 
13
- # Function to extract text from PDF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def extract_text_from_pdf(pdf_file):
15
  text = ""
16
  with pdfplumber.open(pdf_file) as pdf:
17
  for page in pdf.pages:
18
  text += page.extract_text() + "\n"
19
- return text
20
 
21
- # Function to extract text from image using OCR
22
  def extract_text_from_image(image_file):
23
  image = Image.open(image_file)
24
- return pytesseract.image_to_string(image)
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- # Function to process document and answer questions
 
 
 
 
 
 
27
  def document_processor(uploaded_file, query):
28
  text = ""
 
 
29
  if uploaded_file.name.endswith(".pdf"):
30
  text = extract_text_from_pdf(uploaded_file.name)
31
  elif uploaded_file.name.endswith((".png", ".jpg", ".jpeg")):
32
  text = extract_text_from_image(uploaded_file.name)
33
  else:
34
- text = uploaded_file.read().decode("utf-8")
35
 
 
36
  if query.lower() == "summarize":
37
- summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
38
- return summary[0]["summary_text"]
 
 
 
 
39
 
40
- # Find the best-matching answer
41
- answer = qa_pipeline(question=query, context=text)
42
- return answer["answer"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # Gradio UI
45
  with gr.Blocks() as app:
46
- gr.Markdown("# πŸ“„ Smart Document Explorer")
 
 
 
47
 
48
  with gr.Row():
49
- uploaded_file = gr.File(label="Upload Document (PDF, Image, or Text)")
50
- query = gr.Textbox(label="Ask a question (or type 'summarize')", placeholder="What is this document about?")
51
 
52
- output_text = gr.Textbox(label="AI Response")
 
 
 
 
 
 
53
 
54
- submit_btn = gr.Button("Process Document")
55
- submit_btn.click(document_processor, inputs=[uploaded_file, query], outputs=output_text)
 
 
 
 
 
 
56
 
57
  app.launch()
 
1
  import gradio as gr
2
  import pdfplumber
3
  import pytesseract
4
+ import faiss
5
+ import nltk
6
+ import spacy
7
+ import re
8
+ import numpy as np
9
+ import os
10
+ import speech_recognition as sr
11
+ from gtts import gTTS
12
+ from nltk.corpus import stopwords
13
  from PIL import Image
14
  from transformers import pipeline
15
  from sentence_transformers import SentenceTransformer, util
16
 
17
+ # Download stopwords and load NLP tools
18
+ nltk.download("stopwords")
19
+ nlp = spacy.load("en_core_web_sm")
20
+ stop_words = set(stopwords.words("english"))
21
+
22
+ # Load AI models from Hugging Face
23
  qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
24
+ summarizer = pipeline("summarization", model="t5-small")
25
  embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
26
 
27
+ # FAISS index for fast search
28
+ dimension = 384 # Embedding size
29
+ index = faiss.IndexFlatL2(dimension)
30
+
31
+ # Dummy database of documents (for recommendations)
32
+ document_database = {
33
+ "Machine Learning Basics": "Introduction to ML, Supervised vs Unsupervised, Algorithms",
34
+ "Deep Learning Advanced": "Neural Networks, CNN, RNN, Transformers",
35
+ "Data Science Fundamentals": "Data Preprocessing, Feature Engineering, Statistics",
36
+ "AI in Healthcare": "Medical Image Analysis, AI in Diagnosis, Predictive Analytics",
37
+ "Blockchain Technology": "Decentralized Networks, Smart Contracts, Cryptography"
38
+ }
39
+
40
+ # Function to recommend relevant documents
41
+ def recommend_documents(query):
42
+ query_embedding = embedder.encode(query, convert_to_tensor=True)
43
+ doc_embeddings = embedder.encode(list(document_database.values()), convert_to_tensor=True)
44
+
45
+ scores = util.pytorch_cos_sim(query_embedding, doc_embeddings).cpu().numpy()
46
+ top_indices = np.argsort(scores[0])[-3:][::-1] # Top 3 recommendations
47
+
48
+ recommended_docs = [list(document_database.keys())[i] for i in top_indices]
49
+ return recommended_docs
50
+
51
+ # Function to preprocess text
52
+ def preprocess_text(text):
53
+ text = text.lower()
54
+ text = re.sub(r"[^a-zA-Z0-9\s]", "", text) # Remove special characters
55
+ text = " ".join([word for word in text.split() if word not in stop_words]) # Remove stopwords
56
+ return text
57
+
58
+ # Extract text from PDF
59
  def extract_text_from_pdf(pdf_file):
60
  text = ""
61
  with pdfplumber.open(pdf_file) as pdf:
62
  for page in pdf.pages:
63
  text += page.extract_text() + "\n"
64
+ return preprocess_text(text)
65
 
66
+ # Extract text from image using OCR
67
  def extract_text_from_image(image_file):
68
  image = Image.open(image_file)
69
+ return preprocess_text(pytesseract.image_to_string(image))
70
+
71
+ # Convert speech to text
72
+ def voice_to_text(audio_file):
73
+ recognizer = sr.Recognizer()
74
+ with sr.AudioFile(audio_file) as source:
75
+ audio = recognizer.record(source)
76
+ try:
77
+ return recognizer.recognize_google(audio)
78
+ except sr.UnknownValueError:
79
+ return "Could not understand the audio."
80
+ except sr.RequestError:
81
+ return "Speech recognition service unavailable."
82
 
83
+ # Convert text to speech
84
+ def text_to_speech(answer_text):
85
+ tts = gTTS(text=answer_text, lang="en")
86
+ tts.save("response.mp3")
87
+ return "response.mp3"
88
+
89
+ # Process document and answer questions
90
  def document_processor(uploaded_file, query):
91
  text = ""
92
+
93
+ # File type handling
94
  if uploaded_file.name.endswith(".pdf"):
95
  text = extract_text_from_pdf(uploaded_file.name)
96
  elif uploaded_file.name.endswith((".png", ".jpg", ".jpeg")):
97
  text = extract_text_from_image(uploaded_file.name)
98
  else:
99
+ text = preprocess_text(uploaded_file.read().decode("utf-8"))
100
 
101
+ # If user asks for a summary
102
  if query.lower() == "summarize":
103
+ summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
104
+ return summary[0]["summary_text"], text_to_speech(summary[0]["summary_text"]), recommend_documents(summary[0]["summary_text"])
105
+
106
+ # Multi-question processing
107
+ queries = [q.strip() for q in query.split(";")]
108
+ responses = {}
109
 
110
+ for q in queries:
111
+ # Sentence embeddings for better accuracy
112
+ sentences = text.split(". ")
113
+ sentence_embeddings = embedder.encode(sentences, convert_to_tensor=True)
114
+ query_embedding = embedder.encode(q, convert_to_tensor=True)
115
+
116
+ # Find most relevant sentence
117
+ scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)
118
+ best_sentence = sentences[np.argmax(scores.cpu().numpy())]
119
+
120
+ # Generate answer
121
+ answer = qa_pipeline(question=q, context=best_sentence)
122
+ responses[q] = answer["answer"]
123
+
124
+ # Convert answer to speech
125
+ combined_answers = " ".join(responses.values())
126
+ speech_output = text_to_speech(combined_answers)
127
+
128
+ return responses, speech_output, recommend_documents(query)
129
 
130
  # Gradio UI
131
  with gr.Blocks() as app:
132
+ gr.Markdown("# πŸ“„ Smart Document Explorer πŸš€")
133
+
134
+ with gr.Row():
135
+ uploaded_file = gr.File(label="πŸ“‚ Upload Document (PDF, Image, or Text)")
136
 
137
  with gr.Row():
138
+ query = gr.Textbox(label="πŸ’¬ Ask Questions (Separate with ';') or Type 'summarize'", placeholder="e.g. What is the topic?; Who wrote it?")
 
139
 
140
+ with gr.Row():
141
+ voice_input = gr.Audio(label="🎀 Speak Your Query", type="filepath")
142
+ voice_btn = gr.Button("πŸŽ™οΈ Convert Speech to Text")
143
+
144
+ with gr.Row():
145
+ output_text = gr.JSON(label="🧠 AI Response")
146
+ output_audio = gr.Audio(label="πŸ”Š AI Voice Answer", type="filepath")
147
 
148
+ with gr.Row():
149
+ recommendations = gr.JSON(label="πŸ“Œ Recommended Topics")
150
+
151
+ submit_btn = gr.Button("πŸš€ Process Document")
152
+
153
+ # Button Actions
154
+ voice_btn.click(voice_to_text, inputs=voice_input, outputs=query)
155
+ submit_btn.click(document_processor, inputs=[uploaded_file, query], outputs=[output_text, output_audio, recommendations])
156
 
157
  app.launch()