Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from typing import List, Dict, Any | |
from pymongo import MongoClient | |
from transformers import pipeline | |
import spacy | |
import subprocess | |
import sys | |
# FastAPI app setup | |
app = FastAPI() | |
# ========================== | |
# MongoDB Connection Setup | |
# ========================== | |
connection_string = "mongodb+srv://clician:[email protected]/?retryWrites=true&w=majority&appName=Hutterdev" | |
client = MongoClient(connection_string) | |
db = client["test"] # Replace with your database name | |
products_collection = db["products"] # Replace with your collection name | |
# ========================== | |
# Transformers Pipeline Setup | |
# ========================== | |
# Load the Question-Answering pipeline | |
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad") | |
# ========================== | |
# Static Context Message | |
# ========================== | |
context_msg = ( | |
"Hutter Products GmbH provides a wide array of services to help businesses create high-quality, sustainable products. " | |
"Their offerings include comprehensive product design, ensuring items are both visually appealing and functional, and product consulting, " | |
"which provides expert advice on features, materials, and design elements. They also offer sustainability consulting to integrate eco-friendly practices, " | |
"such as using recycled materials and Ocean Bound Plastic. Additionally, they manage customized production to ensure products meet the highest standards " | |
"and offer product animation services, creating realistic rendered images and animations to enhance online engagement. These services collectively enable " | |
"businesses to develop products that are sustainable, market-responsive, and aligned with their brand identity." | |
) | |
# ========================== | |
# spaCy NER Setup | |
# ========================== | |
# ========================== | |
# spaCy NER Setup | |
# ========================== | |
from spacy.util import is_package | |
# Ensure 'en_core_web_sm' is available; otherwise, download it | |
try: | |
spacy_model_path = "/home/user/app/en_core_web_sm-3.8.0" | |
nlp = spacy.load(spacy_model_path) | |
except OSError: | |
# print("Downloading 'en_core_web_sm' model...") | |
# subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"], check=True) | |
nlp = spacy.load(spacy_model_path) | |
# ========================== | |
# Pydantic Models | |
# ========================== | |
class PromptRequest(BaseModel): | |
input_text: str | |
class CombinedResponse(BaseModel): | |
ner: Dict[str, Any] | |
qa: Dict[str, Any] | |
products_matched: List[Dict[str, Any]] | |
# ========================== | |
# Helper Functions | |
# ========================== | |
def extract_keywords(text: str) -> List[str]: | |
""" | |
Extract keywords (nouns and proper nouns) using spaCy. | |
""" | |
doc = nlp(text) | |
keywords = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]] | |
return list(set(keywords)) | |
def search_products_by_keywords(keywords: List[str]) -> List[Dict[str, Any]]: | |
""" | |
Search MongoDB for products that match any of the extracted keywords. | |
""" | |
regex_patterns = [{"name": {"$regex": keyword, "$options": "i"}} for keyword in keywords] | |
query = {"$or": regex_patterns} | |
matched_products = [] | |
cursor = products_collection.find(query) | |
for product in cursor: | |
matched_products.append({ | |
"id": str(product.get("_id", "")), | |
"name": product.get("name", ""), | |
"description": product.get("description", ""), | |
"skuNumber": product.get("skuNumber", ""), | |
"baseModel": product.get("baseModel", ""), | |
}) | |
return matched_products | |
def get_combined_context(products: List[Dict]) -> str: | |
""" | |
Combine the static context with product descriptions fetched from MongoDB. | |
""" | |
product_descriptions = " ".join([p["description"] for p in products if "description" in p and p["description"]]) | |
combined_context = f"{product_descriptions} {context_msg}" | |
return combined_context | |
# ========================== | |
# FastAPI Endpoints | |
# ========================== | |
async def root(): | |
return {"message": "Welcome to the NER and QA API!"} | |
async def process_prompt(request: PromptRequest): | |
input_text = request.input_text | |
# Step 1: Extract keywords using spaCy NER | |
keywords = extract_keywords(input_text) | |
ner_response = {"extracted_keywords": keywords} | |
# Step 2: Search MongoDB for matching products | |
products = search_products_by_keywords(keywords) | |
# Step 3: Generate Combined Context | |
combined_context = get_combined_context(products) | |
# Step 4: Use Q&A Model | |
if combined_context.strip(): # Ensure the combined context is not empty | |
qa_input = {"question": input_text, "context": combined_context} | |
qa_output = qa_pipeline(qa_input) | |
qa_response = { | |
"question": input_text, | |
"answer": qa_output["answer"], | |
"score": qa_output["score"] | |
} | |
else: | |
qa_response = { | |
"question": input_text, | |
"answer": "No relevant context available.", | |
"score": 0.0 | |
} | |
# Step 5: Return Combined Response | |
return { | |
"ner": ner_response, | |
"qa": qa_response, | |
"products_matched": products | |
} | |