File size: 5,563 Bytes
37203c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List, Dict, Any
from pymongo import MongoClient
from transformers import pipeline
import spacy
import subprocess
import sys

# FastAPI app setup
app = FastAPI()

# ==========================
# MongoDB Connection Setup
# ==========================
connection_string = "mongodb+srv://clician:[email protected]/?retryWrites=true&w=majority&appName=Hutterdev"
client = MongoClient(connection_string)
db = client["test"]  # Replace with your database name
products_collection = db["products"]  # Replace with your collection name

# ==========================
# Transformers Pipeline Setup
# ==========================
# Load the Question-Answering pipeline
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")

# ==========================
# Static Context Message
# ==========================
context_msg = (
    "Hutter Products GmbH provides a wide array of services to help businesses create high-quality, sustainable products. "
    "Their offerings include comprehensive product design, ensuring items are both visually appealing and functional, and product consulting, "
    "which provides expert advice on features, materials, and design elements. They also offer sustainability consulting to integrate eco-friendly practices, "
    "such as using recycled materials and Ocean Bound Plastic. Additionally, they manage customized production to ensure products meet the highest standards "
    "and offer product animation services, creating realistic rendered images and animations to enhance online engagement. These services collectively enable "
    "businesses to develop products that are sustainable, market-responsive, and aligned with their brand identity."
)

# ==========================
# spaCy NER Setup
# ==========================
# ==========================
# spaCy NER Setup
# ==========================
from spacy.util import is_package

# Ensure 'en_core_web_sm' is available; otherwise, download it
try:
    spacy_model_path = "/home/user/app/en_core_web_sm-3.8.0"
    nlp = spacy.load(spacy_model_path)
except OSError:
    # print("Downloading 'en_core_web_sm' model...")
    # subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"], check=True)
    nlp = spacy.load(spacy_model_path)

# ==========================
# Pydantic Models
# ==========================
class PromptRequest(BaseModel):
    input_text: str

class CombinedResponse(BaseModel):
    ner: Dict[str, Any]
    qa: Dict[str, Any]
    products_matched: List[Dict[str, Any]]

# ==========================
# Helper Functions
# ==========================
def extract_keywords(text: str) -> List[str]:
    """

    Extract keywords (nouns and proper nouns) using spaCy.

    """
    doc = nlp(text)
    keywords = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]]
    return list(set(keywords))

def search_products_by_keywords(keywords: List[str]) -> List[Dict[str, Any]]:
    """

    Search MongoDB for products that match any of the extracted keywords.

    """
    regex_patterns = [{"name": {"$regex": keyword, "$options": "i"}} for keyword in keywords]
    query = {"$or": regex_patterns}

    matched_products = []
    cursor = products_collection.find(query)
    for product in cursor:
        matched_products.append({
            "id": str(product.get("_id", "")),
            "name": product.get("name", ""),
            "description": product.get("description", ""),
            "skuNumber": product.get("skuNumber", ""),
            "baseModel": product.get("baseModel", ""),
        })

    return matched_products

def get_combined_context(products: List[Dict]) -> str:
    """

    Combine the static context with product descriptions fetched from MongoDB.

    """
    product_descriptions = " ".join([p["description"] for p in products if "description" in p and p["description"]])
    combined_context = f"{product_descriptions} {context_msg}"
    return combined_context

# ==========================
# FastAPI Endpoints
# ==========================
@app.get("/")
async def root():
    return {"message": "Welcome to the NER and QA API!"}

@app.post("/process/", response_model=CombinedResponse)
async def process_prompt(request: PromptRequest):
    input_text = request.input_text

    # Step 1: Extract keywords using spaCy NER
    keywords = extract_keywords(input_text)
    ner_response = {"extracted_keywords": keywords}

    # Step 2: Search MongoDB for matching products
    products = search_products_by_keywords(keywords)

    # Step 3: Generate Combined Context
    combined_context = get_combined_context(products)

    # Step 4: Use Q&A Model
    if combined_context.strip():  # Ensure the combined context is not empty
        qa_input = {"question": input_text, "context": combined_context}
        qa_output = qa_pipeline(qa_input)
        qa_response = {
            "question": input_text,
            "answer": qa_output["answer"],
            "score": qa_output["score"]
        }
    else:
        qa_response = {
            "question": input_text,
            "answer": "No relevant context available.",
            "score": 0.0
        }

    # Step 5: Return Combined Response    
    return {
        "ner": ner_response,
        "qa": qa_response,
        "products_matched": products
    }