Spaces:

SyedHutter
/

prompting_hutter_test1

Sleeping

App Files Files Community

SyedHutter commited on Mar 9

Commit

32d7156

verified ·

1 Parent(s): 5e3fd94

app.py Beta 2

Browse files

Files changed (1) hide show

app.py +138 -148

app.py CHANGED Viewed

@@ -1,148 +1,138 @@
-from fastapi import FastAPI
-from pydantic import BaseModel
-from typing import List, Dict, Any
-from pymongo import MongoClient
-from transformers import pipeline
-import spacy
-import subprocess
-import sys
-# FastAPI app setup
-app = FastAPI()
-# ==========================
-# MongoDB Connection Setup
-# ==========================
-connection_string = "mongodb+srv://clician:[email protected]/?retryWrites=true&w=majority&appName=Hutterdev"
-client = MongoClient(connection_string)
-db = client["test"]  # Replace with your database name
-products_collection = db["products"]  # Replace with your collection name
-# ==========================
-# Transformers Pipeline Setup
-# ==========================
-# Load the Question-Answering pipeline
-qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
-# ==========================
-# Static Context Message
-# ==========================
-context_msg = (
-    "Hutter Products GmbH provides a wide array of services to help businesses create high-quality, sustainable products. "
-    "Their offerings include comprehensive product design, ensuring items are both visually appealing and functional, and product consulting, "
-    "which provides expert advice on features, materials, and design elements. They also offer sustainability consulting to integrate eco-friendly practices, "
-    "such as using recycled materials and Ocean Bound Plastic. Additionally, they manage customized production to ensure products meet the highest standards "
-    "and offer product animation services, creating realistic rendered images and animations to enhance online engagement. These services collectively enable "
-    "businesses to develop products that are sustainable, market-responsive, and aligned with their brand identity."
-)
-# ==========================
-# spaCy NER Setup
-# ==========================
-# ==========================
-# spaCy NER Setup
-# ==========================
-from spacy.util import is_package
-# Ensure 'en_core_web_sm' is available; otherwise, download it
-try:
-    spacy_model_path = "/home/user/app/en_core_web_sm-3.8.0"
-    nlp = spacy.load(spacy_model_path)
-except OSError:
-    # print("Downloading 'en_core_web_sm' model...")
-    # subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"], check=True)
-    nlp = spacy.load(spacy_model_path)
-# ==========================
-# Pydantic Models
-# ==========================
-class PromptRequest(BaseModel):
-    input_text: str
-class CombinedResponse(BaseModel):
-    ner: Dict[str, Any]
-    qa: Dict[str, Any]
-    products_matched: List[Dict[str, Any]]
-# ==========================
-# Helper Functions
-# ==========================
-def extract_keywords(text: str) -> List[str]:
-    """
-    Extract keywords (nouns and proper nouns) using spaCy.
-    """
-    doc = nlp(text)
-    keywords = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]]
-    return list(set(keywords))
-def search_products_by_keywords(keywords: List[str]) -> List[Dict[str, Any]]:
-    """
-    Search MongoDB for products that match any of the extracted keywords.
-    """
-    regex_patterns = [{"name": {"$regex": keyword, "$options": "i"}} for keyword in keywords]
-    query = {"$or": regex_patterns}
-    matched_products = []
-    cursor = products_collection.find(query)
-    for product in cursor:
-        matched_products.append({
-            "id": str(product.get("_id", "")),
-            "name": product.get("name", ""),
-            "description": product.get("description", ""),
-            "skuNumber": product.get("skuNumber", ""),
-            "baseModel": product.get("baseModel", ""),
-        })
-    return matched_products
-def get_combined_context(products: List[Dict]) -> str:
-    """
-    Combine the static context with product descriptions fetched from MongoDB.
-    """
-    product_descriptions = " ".join([p["description"] for p in products if "description" in p and p["description"]])
-    combined_context = f"{product_descriptions} {context_msg}"
-    return combined_context
-# ==========================
-# FastAPI Endpoints
-# ==========================
-@app.get("/")
-async def root():
-    return {"message": "Welcome to the NER and QA API!"}
-@app.post("/process/", response_model=CombinedResponse)
-async def process_prompt(request: PromptRequest):
-    input_text = request.input_text
-    # Step 1: Extract keywords using spaCy NER
-    keywords = extract_keywords(input_text)
-    ner_response = {"extracted_keywords": keywords}
-    # Step 2: Search MongoDB for matching products
-    products = search_products_by_keywords(keywords)
-    # Step 3: Generate Combined Context
-    combined_context = get_combined_context(products)
-    # Step 4: Use Q&A Model
-    if combined_context.strip():  # Ensure the combined context is not empty
-        qa_input = {"question": input_text, "context": combined_context}
-        qa_output = qa_pipeline(qa_input)
-        qa_response = {
-            "question": input_text,
-            "answer": qa_output["answer"],
-            "score": qa_output["score"]
-        }
-    else:
-        qa_response = {
-            "question": input_text,
-            "answer": "No relevant context available.",
-            "score": 0.0
-        }
-    # Step 5: Return Combined Response
-    return {
-        "ner": ner_response,
-        "qa": qa_response,
-        "products_matched": products
-    }

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List, Dict, Any
+from pymongo import MongoClient
+from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
+import spacy
+import os
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI()
+# MongoDB Setup
+connection_string = os.getenv("MONGO_URI", "mongodb+srv://clician:[email protected]/?retryWrites=true&w=majority&appName=Hutterdev")
+client = MongoClient(connection_string)
+db = client["test"]
+products_collection = db["products"]
+# BlenderBot Setup
+model_name = "SyedHutter/blenderbot_model/blenderbot_model"  # Points to subdirectory
+model_dir = "/home/user/app/blenderbot_model"
+if not os.path.exists(model_dir):
+    logger.info(f"Downloading {model_name} to {model_dir}...")
+    tokenizer = BlenderbotTokenizer.from_pretrained(model_name)
+    model = BlenderbotForConditionalGeneration.from_pretrained(model_name)
+    os.makedirs(model_dir, exist_ok=True)
+    tokenizer.save_pretrained(model_dir)
+    model.save_pretrained(model_dir)
+    logger.info("Model download complete.")
+else:
+    logger.info(f"Loading pre-existing model from {model_dir}.")
+tokenizer = BlenderbotTokenizer.from_pretrained(model_dir)
+model = BlenderbotForConditionalGeneration.from_pretrained(model_dir)
+# Static Context
+context_msg = "Hutter Products GmbH provides sustainable products like shirts and shorts..."
+# spaCy Setup
+spacy_model_path = "/home/user/app/en_core_web_sm-3.8.0"
+nlp = spacy.load(spacy_model_path)
+# Pydantic Models
+class PromptRequest(BaseModel):
+    input_text: str
+    conversation_history: List[str] = []
+class CombinedResponse(BaseModel):
+    ner: Dict[str, Any]
+    qa: Dict[str, Any]
+    products_matched: List[Dict[str, Any]]
+# Helper Functions
+def extract_keywords(text: str) -> List[str]:
+    doc = nlp(text)
+    keywords = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]]
+    return list(set(keywords))
+def detect_intent(text: str) -> str:
+    doc = nlp(text.lower())
+    if any(token.text in ["shirt", "shirts"] for token in doc):
+        return "recommend_shirt"
+    elif any(token.text in ["short", "shorts"] for token in doc):
+        return "recommend_shorts"
+    elif any(token.text in ["what", "who", "company", "do", "products"] for token in doc):
+        return "company_info"
+    return "unknown"
+def search_products_by_keywords(keywords: List[str]) -> List[Dict[str, Any]]:
+    query = {"$or": [{"name": {"$regex": keyword, "$options": "i"}} for keyword in keywords]}
+    matched_products = [dict(p, id=str(p["_id"])) for p in products_collection.find(query)]
+    return matched_products
+def get_product_context(products: List[Dict]) -> str:
+    if not products:
+        return ""
+    product_str = "Here are some products: "
+    product_str += ", ".join([f"'{p['name']}' (SKU: {p['skuNumber']}) - {p['description']}" for p in products[:2]])
+    return product_str
+def format_response(response: str, products: List[Dict], intent: str) -> str:
+    if intent in ["recommend_shirt", "recommend_shorts"] and products:
+        product = products[0]
+        return f"{response} For example, check out our '{product['name']}' (SKU: {product['skuNumber']})—it’s {product['description'].lower()}!"
+    elif intent == "company_info":
+        return f"{response} At Hutter Products GmbH, we specialize in sustainable product design and production!"
+    return response
+# Endpoints
+@app.get("/")
+async def root():
+    return {"message": "Welcome to the NER and Chat API!"}
+@app.post("/process/", response_model=CombinedResponse)
+async def process_prompt(request: PromptRequest):
+    try:
+        input_text = request.input_text
+        history = request.conversation_history[-3:] if request.conversation_history else []
+        intent = detect_intent(input_text)
+        keywords = extract_keywords(input_text)
+        ner_response = {"extracted_keywords": keywords}
+        products = search_products_by_keywords(keywords)
+        product_context = get_product_context(products)
+        history_str = " || ".join(history)
+        full_input = f"{history_str} || {product_context} {context_msg} || {input_text}" if history else f"{product_context} {context_msg} || {input_text}"
+        inputs = tokenizer(full_input, return_tensors="pt", truncation=True, max_length=512)
+        outputs = model.generate(**inputs, max_length=150, num_beams=5, no_repeat_ngram_size=2)
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        enhanced_response = format_response(response, products, intent)
+        qa_response = {
+            "question": input_text,
+            "answer": enhanced_response,
+            "score": 1.0
+        }
+        return {
+            "ner": ner_response,
+            "qa": qa_response,
+            "products_matched": products
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Oops, something went wrong: {str(e)}. Try again!")
+@app.on_event("startup")
+async def startup_event():
+    logger.info("API is running with BlenderBot-400M-distill, connected to MongoDB.")
+@app.on_event("shutdown")
+def shutdown_event():
+    client.close()