SyedHutter's picture
Upload 28 files
37203c0 verified
raw
history blame
5.56 kB
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List, Dict, Any
from pymongo import MongoClient
from transformers import pipeline
import spacy
import subprocess
import sys
# FastAPI app setup
app = FastAPI()
# ==========================
# MongoDB Connection Setup
# ==========================
connection_string = "mongodb+srv://clician:[email protected]/?retryWrites=true&w=majority&appName=Hutterdev"
client = MongoClient(connection_string)
db = client["test"] # Replace with your database name
products_collection = db["products"] # Replace with your collection name
# ==========================
# Transformers Pipeline Setup
# ==========================
# Load the Question-Answering pipeline
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
# ==========================
# Static Context Message
# ==========================
context_msg = (
"Hutter Products GmbH provides a wide array of services to help businesses create high-quality, sustainable products. "
"Their offerings include comprehensive product design, ensuring items are both visually appealing and functional, and product consulting, "
"which provides expert advice on features, materials, and design elements. They also offer sustainability consulting to integrate eco-friendly practices, "
"such as using recycled materials and Ocean Bound Plastic. Additionally, they manage customized production to ensure products meet the highest standards "
"and offer product animation services, creating realistic rendered images and animations to enhance online engagement. These services collectively enable "
"businesses to develop products that are sustainable, market-responsive, and aligned with their brand identity."
)
# ==========================
# spaCy NER Setup
# ==========================
# ==========================
# spaCy NER Setup
# ==========================
from spacy.util import is_package
# Ensure 'en_core_web_sm' is available; otherwise, download it
try:
spacy_model_path = "/home/user/app/en_core_web_sm-3.8.0"
nlp = spacy.load(spacy_model_path)
except OSError:
# print("Downloading 'en_core_web_sm' model...")
# subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"], check=True)
nlp = spacy.load(spacy_model_path)
# ==========================
# Pydantic Models
# ==========================
class PromptRequest(BaseModel):
input_text: str
class CombinedResponse(BaseModel):
ner: Dict[str, Any]
qa: Dict[str, Any]
products_matched: List[Dict[str, Any]]
# ==========================
# Helper Functions
# ==========================
def extract_keywords(text: str) -> List[str]:
"""
Extract keywords (nouns and proper nouns) using spaCy.
"""
doc = nlp(text)
keywords = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]]
return list(set(keywords))
def search_products_by_keywords(keywords: List[str]) -> List[Dict[str, Any]]:
"""
Search MongoDB for products that match any of the extracted keywords.
"""
regex_patterns = [{"name": {"$regex": keyword, "$options": "i"}} for keyword in keywords]
query = {"$or": regex_patterns}
matched_products = []
cursor = products_collection.find(query)
for product in cursor:
matched_products.append({
"id": str(product.get("_id", "")),
"name": product.get("name", ""),
"description": product.get("description", ""),
"skuNumber": product.get("skuNumber", ""),
"baseModel": product.get("baseModel", ""),
})
return matched_products
def get_combined_context(products: List[Dict]) -> str:
"""
Combine the static context with product descriptions fetched from MongoDB.
"""
product_descriptions = " ".join([p["description"] for p in products if "description" in p and p["description"]])
combined_context = f"{product_descriptions} {context_msg}"
return combined_context
# ==========================
# FastAPI Endpoints
# ==========================
@app.get("/")
async def root():
return {"message": "Welcome to the NER and QA API!"}
@app.post("/process/", response_model=CombinedResponse)
async def process_prompt(request: PromptRequest):
input_text = request.input_text
# Step 1: Extract keywords using spaCy NER
keywords = extract_keywords(input_text)
ner_response = {"extracted_keywords": keywords}
# Step 2: Search MongoDB for matching products
products = search_products_by_keywords(keywords)
# Step 3: Generate Combined Context
combined_context = get_combined_context(products)
# Step 4: Use Q&A Model
if combined_context.strip(): # Ensure the combined context is not empty
qa_input = {"question": input_text, "context": combined_context}
qa_output = qa_pipeline(qa_input)
qa_response = {
"question": input_text,
"answer": qa_output["answer"],
"score": qa_output["score"]
}
else:
qa_response = {
"question": input_text,
"answer": "No relevant context available.",
"score": 0.0
}
# Step 5: Return Combined Response
return {
"ner": ner_response,
"qa": qa_response,
"products_matched": products
}