Spaces:

abubasith86
/

Chatbot

Sleeping

App Files Files Community

abubasith86 commited on 9 days ago

Commit

5426913

1 Parent(s): 9e78176

Test

Browse files

Files changed (1) hide show

app.py +89 -119

app.py CHANGED Viewed

@@ -1,153 +1,123 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-import pymupdf
 from duckduckgo_search import DDGS
 from serpapi import GoogleSearch
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
-# PDF Parsing
-def extract_text_from_pdf(pdf_file):
-    doc = pymupdf.open(pdf_file)
-    text = " ".join([page.get_textpage().extractTEXT() for page in doc])
-    return text
-# Web search fallback
-def search_web(query):
-    with DDGS() as ddgs:
-        results = ddgs.text(query)
-        if results:
-            return results[0]["body"]
-    return "No relevant results found on the web."
-def google_search(query):
-    params = {
-        "q": query,
-        "api_key": "b11d4a3660600e7e7f481b3288f107fbf993389a20125b0a97ebe7ab207854a5",  # Replace this with your real key
-        "engine": "google",
-    }
-    search = GoogleSearch(params)
-    results = search.get_dict()
-    if "organic_results" in results:
-        # Combine top 3 results
-        summaries = []
-        for res in results["organic_results"][:3]:
-            title = res.get("title", "")
-            snippet = res.get("snippet", "")
-            summaries.append(f"{title}: {snippet}")
-        return "\n".join(summaries)
-    return None
 SYSTEM_PROMPT = """
-You are an intelligent and friendly AI assistant.
 Your goals:
-- Answer user questions clearly and concisely.
-- If a PDF document is provided, use its content to give informed answers.
-- For questions about recent or live topics (e.g., news, prices, events), you may perform a web search and summarize the result.
-- If no document or web context is available, still try to help using general knowledge.
-- Be honest if you don’t know something.
-- Always be polite, helpful, and respectful.
 """
 def respond(
-    message,
     history: list[tuple[str, str]],
-    max_tokens=2048,
-    temperature=0.4,
-    top_p=0.1,
 ):
-    recent_keywords = [
-        "latest",
-        "today",
-        "current",
-        "now",
-        "recent",
-        "news",
-        "update",
-        "price",
-        "who won",
-        "what happened",
-        "trending",
-        "breaking",
-        "just in",
-        "new release",
-        "live",
-        "score",
-        "results",
-        "weather",
-        "forecast",
-        "report",
-        "market",
-        "stocks",
-        "crypto",
-        "rate",
-        "exchange",
-        "gold price",
-        "happening",
-        "event",
-        "updates",
-        "hot",
-        "viral",
-        "announcement",
-        "today's",
-        "this week",
-        "schedule",
-        "calendar",
-        "launch",
-        "drop",
-        "release date",
-        "opening",
-        "closing",
-        "deadline",
-    ]
     message_lower = message.lower()
-    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    if any(kw in message_lower for kw in recent_keywords):
-        web_context = google_search(message)
         if web_context:
-            # Inject web context as part of the user's query
-            message = f"{message}\n\n[Relevant web search results to help you answer]:\n{web_context}"
     messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
         messages,
-        max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(respond)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+import fitz  # pymupdf
 from duckduckgo_search import DDGS
 from serpapi import GoogleSearch
+import tempfile
+import os
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 SYSTEM_PROMPT = """
+You are an intelligent and friendly AI assistant.
 Your goals:
+- Use provided documents to answer questions accurately.
+- When the query is recent or about current events, leverage web search results.
+- If nothing is provided, rely on your general knowledge.
+- Always be honest, polite, and helpful.
 """
+RECENT_KEYWORDS = {
+    "latest", "today", "current", "now", "recent", "news", "update", "price",
+    "who won", "what happened", "trending", "breaking", "just in", "live",
+    "score", "results", "weather", "forecast", "report", "market", "stocks",
+    "crypto", "rate", "exchange", "gold price", "happening", "event", "updates",
+    "hot", "viral", "announcement", "today's", "this week", "schedule", "calendar",
+    "launch", "drop", "release date", "opening", "closing", "deadline",
+}
+def extract_text_from_pdf(pdf_file) -> str:
+    try:
+        with fitz.open(pdf_file.name) as doc:
+            return " ".join(page.get_text() for page in doc)
+    except Exception as e:
+        return f"Failed to read PDF: {e}"
+def search_web(query: str) -> str:
+    try:
+        params = {
+            "q": query,
+            "api_key": os.getenv("SERPAPI_KEY", ""),  # Keep it optional and env-based
+            "engine": "google",
+        }
+        if params["api_key"]:
+            results = GoogleSearch(params).get_dict()
+            if "organic_results" in results:
+                return "\n".join(
+                    f"{r.get('title', '')}: {r.get('snippet', '')}"
+                    for r in results["organic_results"][:3]
+                )
+    except Exception:
+        pass
+    try:
+        with DDGS() as ddgs:
+            results = ddgs.text(query)
+            if results:
+                return results[0]["body"]
+    except Exception:
+        pass
+    return "No relevant web results found."
 def respond(
+    message: str,
     history: list[tuple[str, str]],
+    pdf: object = None,
+    temperature: float = 0.4,
+    top_p: float = 0.1,
+    max_tokens: int = 2048,
 ):
+    context = ""
     message_lower = message.lower()
+    # 1. Use PDF content if available
+    if pdf is not None:
+        context = extract_text_from_pdf(pdf)
+        message += f"\n\n[Document context provided below for reference:]\n{context}"
+    # 2. Use web search if query looks recent
+    if any(keyword in message_lower for keyword in RECENT_KEYWORDS):
+        web_context = search_web(message)
         if web_context:
+            message += f"\n\n[Relevant web search results to help you answer]:\n{web_context}"
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    for user, assistant in history:
+        messages.append({"role": "user", "content": user})
+        messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": message})
+    # Stream LLM response
+    full_response = ""
+    for chunk in client.chat_completion(
         messages,
         stream=True,
         temperature=temperature,
         top_p=top_p,
+        max_tokens=max_tokens,
     ):
+        token = chunk.choices[0].delta.content or ""
+        full_response += token
+        yield full_response
+with gr.Blocks() as demo:
+    gr.Markdown("## 💬 Smart Assistant with Web & Document Context")
+    with gr.Row():
+        pdf_input = gr.File(label="📄 Upload PDF (optional)", file_types=[".pdf"])
+        temperature = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Temperature")
+        top_p = gr.Slider(0.1, 1.0, value=0.1, step=0.05, label="Top-p")
+    chat = gr.ChatInterface(
+        fn=respond,
+        additional_inputs=[pdf_input, temperature, top_p],
+    )
 if __name__ == "__main__":
     demo.launch()