Spaces:

Copain22
/

Cafe-Chatbot

Running on Zero

App Files Files Community

Copain22 commited on 1 day ago

Commit

8ca6b5f

verified ·

1 Parent(s): 9f57aad

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -56

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # 0. Install custom transformers and imports
 import os
 os.system("pip install git+https://github.com/shumingma/transformers.git")
-os.system("pip install sentence-transformers")
 import threading
 import torch
@@ -13,13 +13,9 @@ from transformers import (
     AutoTokenizer,
     TextIteratorStreamer,
 )
-from sentence_transformers import SentenceTransformer
 import gradio as gr
 import spaces
-import pdfplumber
-from pathlib import Path
-from PyPDF2 import PdfReader
 # 1. System prompt
 SYSTEM_PROMPT = """
@@ -46,55 +42,28 @@ model = AutoModelForCausalLM.from_pretrained(
 print(f"Model loaded on device: {model.device}")
-# 3. Load PDF files
-def load_pdfs(folder_path="."):
-    docs = []
-    current_section = None
-    for pdf_file in Path(folder_path).glob("*.pdf"):
-        with pdfplumber.open(str(pdf_file)) as pdf:
-            for page in pdf.pages:
-                text = page.extract_text()
-                if text:
-                    lines = text.split("\n")
-                    for line in lines:
-                        line = line.strip()
-                        if not line:
-                            continue
-                        if line.isupper() and len(line.split()) <= 6:
-                            if current_section:
-                                docs.append(current_section)
-                            current_section = line
-                        else:
-                            if current_section:
-                                current_section += f" | {line}"
-                            else:
-                                current_section = line
-            if current_section:
-                docs.append(current_section)
-                current_section = None
-    return docs
-document_chunks = load_pdfs(".")
-print(f"Loaded {len(document_chunks)} text chunks from PDFs.")
-# 4. Create embeddings
-embedder = SentenceTransformer("all-MiniLM-L6-v2")  # Fast small model
-doc_embeddings = embedder.encode(document_chunks, normalize_embeddings=True)
-# 5. Retrieval function with float32 fix
 def retrieve_context(question, top_k=3):
-    question_embedding = embedder.encode(question, normalize_embeddings=True)
-    question_embedding = torch.tensor(question_embedding, dtype=torch.float32)
-    doc_embeds = torch.tensor(doc_embeddings, dtype=torch.float32)
-    scores = doc_embeds @ question_embedding
-    top_indices = torch.topk(scores, k=min(top_k, len(scores))).indices.tolist()
-    return "\n\n".join([document_chunks[idx] for idx in top_indices])
-# 6. Chat respond function
 @spaces.GPU
 def respond(
     message: str,
@@ -138,11 +107,11 @@ def respond(
         response += new_text
         yield response
-# 7. Gradio ChatInterface
 demo = gr.ChatInterface(
     fn=respond,
     title="Café Eleven Assistant",
-    description="Friendly café assistant with real menu knowledge!",
     examples=[
         [
             "What kinds of burgers do you have?",
@@ -152,7 +121,7 @@ demo = gr.ChatInterface(
             0.95,
         ],
         [
-            "Do you have any gluten-free pastries?",
             SYSTEM_PROMPT.strip(),
             512,
             0.7,
@@ -188,6 +157,6 @@ demo = gr.ChatInterface(
     ],
 )
-# 8. Launch
 if __name__ == "__main__":
     demo.launch(share=True)

 # 0. Install custom transformers and imports
 import os
 os.system("pip install git+https://github.com/shumingma/transformers.git")
+os.system("pip install python-docx")
 import threading
 import torch
     AutoTokenizer,
     TextIteratorStreamer,
 )
 import gradio as gr
 import spaces
+from docx import Document
 # 1. System prompt
 SYSTEM_PROMPT = """
 print(f"Model loaded on device: {model.device}")
+# 3. Load Menu Text from Word document
+def load_menu_text(docx_path):
+    doc = Document(docx_path)
+    full_text = []
+    for para in doc.paragraphs:
+        if para.text.strip():
+            full_text.append(para.text.strip())
+    return "\n".join(full_text)
+MENU_TEXT = load_menu_text("menu.docx")
+print(f"Loaded menu text from Word document.")
+# 4. Simple retrieval function (search inside MENU_TEXT)
 def retrieve_context(question, top_k=3):
+    question = question.lower()
+    sentences = MENU_TEXT.split("\n")
+    matches = [s for s in sentences if any(word in s.lower() for word in question.split())]
+    if not matches:
+        return "Sorry, I couldn't find relevant menu information."
+    return "\n\n".join(matches[:top_k])
+# 5. Chat respond function
 @spaces.GPU
 def respond(
     message: str,
         response += new_text
         yield response
+# 6. Gradio ChatInterface
 demo = gr.ChatInterface(
     fn=respond,
     title="Café Eleven Assistant",
+    description="Friendly café assistant based on real menu loaded from Word document!",
     examples=[
         [
             "What kinds of burgers do you have?",
             0.95,
         ],
         [
+            "Do you have gluten-free pastries?",
             SYSTEM_PROMPT.strip(),
             512,
             0.7,
     ],
 )
+# 7. Launch
 if __name__ == "__main__":
     demo.launch(share=True)