Spaces:
Sleeping
Sleeping
Updated with Content msg, product recommendation and history information3
Browse files
app.py
CHANGED
@@ -7,8 +7,9 @@ import spacy
|
|
7 |
import os
|
8 |
import logging
|
9 |
import re
|
|
|
10 |
|
11 |
-
# Set up logging
|
12 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
@@ -25,6 +26,10 @@ model_repo = "SyedHutter/blenderbot_model"
|
|
25 |
model_subfolder = "blenderbot_model"
|
26 |
model_dir = "/home/user/app/blenderbot_model"
|
27 |
|
|
|
|
|
|
|
|
|
28 |
if not os.path.exists(model_dir):
|
29 |
logger.info(f"Downloading {model_repo}/{model_subfolder} to {model_dir}...")
|
30 |
tokenizer = BlenderbotTokenizer.from_pretrained(model_repo, subfolder=model_subfolder)
|
@@ -35,12 +40,12 @@ if not os.path.exists(model_dir):
|
|
35 |
logger.info("Model download complete.")
|
36 |
else:
|
37 |
logger.info(f"Loading pre-existing model from {model_dir}.")
|
38 |
-
|
39 |
tokenizer = BlenderbotTokenizer.from_pretrained(model_dir)
|
40 |
-
model = BlenderbotForConditionalGeneration.from_pretrained(model_dir)
|
|
|
41 |
|
42 |
-
# Static Context
|
43 |
-
context_msg = "I am Hutter, your shopping guide for Hutter Products GmbH
|
44 |
|
45 |
# spaCy Setup
|
46 |
spacy_model_path = "/home/user/app/en_core_web_sm-3.8.0"
|
@@ -65,7 +70,6 @@ def extract_keywords(text: str) -> List[str]:
|
|
65 |
def detect_intent(text: str) -> str:
|
66 |
doc = nlp(text.lower())
|
67 |
text_lower = text.lower()
|
68 |
-
# General product-related intent based on shopping context
|
69 |
if any(token.text in ["buy", "shop", "find", "recommend", "product", "products", "item", "store", "catalog"] for token in doc) or "what" in text_lower.split()[:2]:
|
70 |
return "recommend_product"
|
71 |
elif any(token.text in ["company", "who", "do"] for token in doc):
|
@@ -74,11 +78,10 @@ def detect_intent(text: str) -> str:
|
|
74 |
return "ask_name"
|
75 |
elif re.search(r"\d+\s*[\+\-\*/]\s*\d+", text_lower):
|
76 |
return "math_query"
|
77 |
-
return "recommend_product"
|
78 |
|
79 |
def search_products_by_keywords(keywords: List[str]) -> List[Dict[str, Any]]:
|
80 |
if not keywords:
|
81 |
-
logger.info("No keywords provided, returning empty product list.")
|
82 |
return []
|
83 |
query = {"$or": [{"name": {"$regex": f"\\b{keyword}\\b", "$options": "i"}} for keyword in keywords]}
|
84 |
matched_products = [
|
@@ -95,39 +98,32 @@ def search_products_by_keywords(keywords: List[str]) -> List[Dict[str, Any]]:
|
|
95 |
def get_product_context(products: List[Dict]) -> str:
|
96 |
if not products:
|
97 |
return ""
|
98 |
-
product_str = "
|
99 |
-
product_str += ", ".join([f"'{p['name']}' - {p['description']}" for p in products[:2]])
|
100 |
return product_str
|
101 |
|
102 |
def format_response(response: str, products: List[Dict], intent: str, input_text: str) -> str:
|
103 |
-
# Handle product recommendation intent
|
104 |
if intent == "recommend_product":
|
105 |
if not products:
|
106 |
-
return "I’d love to recommend something
|
107 |
product = products[0]
|
108 |
-
return f"Check out our '{product['name']}'—it’s {product['description'].lower()}. Want
|
109 |
elif intent == "company_info":
|
110 |
-
return "Hutter Products GmbH
|
111 |
elif intent == "ask_name":
|
112 |
-
return "I’m Hutter, your shopping guide for Hutter Products GmbH. How can I
|
113 |
elif intent == "math_query":
|
114 |
match = re.search(r"(\d+)\s*([\+\-\*/])\s*(\d+)", input_text.lower())
|
115 |
if match:
|
116 |
num1, op, num2 = int(match.group(1)), match.group(2), int(match.group(3))
|
117 |
-
if op == "+":
|
118 |
-
|
119 |
-
elif op == "
|
120 |
-
|
121 |
-
|
122 |
-
return f"{num1} times {num2} is {num1 * num2}. Want to explore our products?"
|
123 |
-
elif op == "/":
|
124 |
-
return f"{num1} divided by {num2} is {num1 / num2}." if num2 != 0 else "Can’t divide by zero! How about some sustainable products instead?"
|
125 |
-
return "I can do simple math—try '2 + 2'. What else can I help you with?"
|
126 |
-
# Fallback with product nudge if available
|
127 |
if products:
|
128 |
product = products[0]
|
129 |
-
return f"{response}
|
130 |
-
return response if response else "How can I assist
|
131 |
|
132 |
# Endpoints
|
133 |
@app.get("/")
|
@@ -139,7 +135,7 @@ async def process_prompt(request: PromptRequest):
|
|
139 |
try:
|
140 |
logger.info(f"Processing request: {request.input_text}")
|
141 |
input_text = request.input_text
|
142 |
-
history = request.conversation_history[-
|
143 |
|
144 |
intent = detect_intent(input_text)
|
145 |
keywords = extract_keywords(input_text)
|
@@ -150,15 +146,23 @@ async def process_prompt(request: PromptRequest):
|
|
150 |
logger.info(f"Products matched: {len(products)}")
|
151 |
|
152 |
history_str = " || ".join(history)
|
153 |
-
full_input = f"{context_msg} || {
|
154 |
logger.info(f"Full input to model: {full_input}")
|
155 |
|
156 |
logger.info("Tokenizing input...")
|
157 |
-
inputs = tokenizer(full_input, return_tensors="pt", truncation=True, max_length=
|
158 |
logger.info("Input tokenized successfully.")
|
159 |
|
160 |
logger.info("Generating model response...")
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
logger.info("Model generation complete.")
|
163 |
|
164 |
logger.info("Decoding model output...")
|
@@ -180,7 +184,7 @@ async def process_prompt(request: PromptRequest):
|
|
180 |
}
|
181 |
except Exception as e:
|
182 |
logger.error(f"Error processing request: {str(e)}", exc_info=True)
|
183 |
-
raise HTTPException(status_code=500, detail=f"Oops, something went wrong: {str(e)}
|
184 |
|
185 |
@app.on_event("startup")
|
186 |
async def startup_event():
|
|
|
7 |
import os
|
8 |
import logging
|
9 |
import re
|
10 |
+
import torch
|
11 |
|
12 |
+
# Set up logging
|
13 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
14 |
logger = logging.getLogger(__name__)
|
15 |
|
|
|
26 |
model_subfolder = "blenderbot_model"
|
27 |
model_dir = "/home/user/app/blenderbot_model"
|
28 |
|
29 |
+
# Device setup
|
30 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
31 |
+
logger.info(f"Using device: {device}")
|
32 |
+
|
33 |
if not os.path.exists(model_dir):
|
34 |
logger.info(f"Downloading {model_repo}/{model_subfolder} to {model_dir}...")
|
35 |
tokenizer = BlenderbotTokenizer.from_pretrained(model_repo, subfolder=model_subfolder)
|
|
|
40 |
logger.info("Model download complete.")
|
41 |
else:
|
42 |
logger.info(f"Loading pre-existing model from {model_dir}.")
|
|
|
43 |
tokenizer = BlenderbotTokenizer.from_pretrained(model_dir)
|
44 |
+
model = BlenderbotForConditionalGeneration.from_pretrained(model_dir).to(device)
|
45 |
+
model.eval() # Set to evaluation mode for faster inference
|
46 |
|
47 |
+
# Static Context (shortened for efficiency)
|
48 |
+
context_msg = "I am Hutter, your shopping guide for Hutter Products GmbH, here to help you find sustainable products."
|
49 |
|
50 |
# spaCy Setup
|
51 |
spacy_model_path = "/home/user/app/en_core_web_sm-3.8.0"
|
|
|
70 |
def detect_intent(text: str) -> str:
|
71 |
doc = nlp(text.lower())
|
72 |
text_lower = text.lower()
|
|
|
73 |
if any(token.text in ["buy", "shop", "find", "recommend", "product", "products", "item", "store", "catalog"] for token in doc) or "what" in text_lower.split()[:2]:
|
74 |
return "recommend_product"
|
75 |
elif any(token.text in ["company", "who", "do"] for token in doc):
|
|
|
78 |
return "ask_name"
|
79 |
elif re.search(r"\d+\s*[\+\-\*/]\s*\d+", text_lower):
|
80 |
return "math_query"
|
81 |
+
return "recommend_product"
|
82 |
|
83 |
def search_products_by_keywords(keywords: List[str]) -> List[Dict[str, Any]]:
|
84 |
if not keywords:
|
|
|
85 |
return []
|
86 |
query = {"$or": [{"name": {"$regex": f"\\b{keyword}\\b", "$options": "i"}} for keyword in keywords]}
|
87 |
matched_products = [
|
|
|
98 |
def get_product_context(products: List[Dict]) -> str:
|
99 |
if not products:
|
100 |
return ""
|
101 |
+
product_str = "Products: " + ", ".join([f"'{p['name']}' - {p['description']}" for p in products[:2]])
|
|
|
102 |
return product_str
|
103 |
|
104 |
def format_response(response: str, products: List[Dict], intent: str, input_text: str) -> str:
|
|
|
105 |
if intent == "recommend_product":
|
106 |
if not products:
|
107 |
+
return "I’d love to recommend something! What are you looking for in our sustainable catalog?"
|
108 |
product = products[0]
|
109 |
+
return f"Check out our '{product['name']}'—it’s {product['description'].lower()}. Want more options?"
|
110 |
elif intent == "company_info":
|
111 |
+
return "Hutter Products GmbH offers sustainable products like recycled textiles and ocean plastic goods."
|
112 |
elif intent == "ask_name":
|
113 |
+
return "I’m Hutter, your shopping guide for Hutter Products GmbH. How can I help?"
|
114 |
elif intent == "math_query":
|
115 |
match = re.search(r"(\d+)\s*([\+\-\*/])\s*(\d+)", input_text.lower())
|
116 |
if match:
|
117 |
num1, op, num2 = int(match.group(1)), match.group(2), int(match.group(3))
|
118 |
+
if op == "+": return f"{num1} + {num2} = {num1 + num2}. Need shopping help?"
|
119 |
+
elif op == "-": return f"{num1} - {num2} = {num1 - num2}. Anything else?"
|
120 |
+
elif op == "*": return f"{num1} * {num2} = {num1 * num2}. Explore our products?"
|
121 |
+
elif op == "/": return f"{num1} / {num2} = {num1 / num2}." if num2 != 0 else "Can’t divide by zero! Try our products?"
|
122 |
+
return "I can do math—try '2 + 2'. What else can I help with?"
|
|
|
|
|
|
|
|
|
|
|
123 |
if products:
|
124 |
product = products[0]
|
125 |
+
return f"{response} Also, check out '{product['name']}'—it’s {product['description'].lower()}."
|
126 |
+
return response if response else "How can I assist with our sustainable products?"
|
127 |
|
128 |
# Endpoints
|
129 |
@app.get("/")
|
|
|
135 |
try:
|
136 |
logger.info(f"Processing request: {request.input_text}")
|
137 |
input_text = request.input_text
|
138 |
+
history = request.conversation_history[-1:] if request.conversation_history else [] # Limit to last message
|
139 |
|
140 |
intent = detect_intent(input_text)
|
141 |
keywords = extract_keywords(input_text)
|
|
|
146 |
logger.info(f"Products matched: {len(products)}")
|
147 |
|
148 |
history_str = " || ".join(history)
|
149 |
+
full_input = f"{context_msg} || {product_context} || {input_text}" if product_context else f"{context_msg} || {input_text}"
|
150 |
logger.info(f"Full input to model: {full_input}")
|
151 |
|
152 |
logger.info("Tokenizing input...")
|
153 |
+
inputs = tokenizer(full_input, return_tensors="pt", truncation=True, max_length=64).to(device) # Reduced max_length
|
154 |
logger.info("Input tokenized successfully.")
|
155 |
|
156 |
logger.info("Generating model response...")
|
157 |
+
with torch.no_grad(): # Disable gradient computation
|
158 |
+
outputs = model.generate(
|
159 |
+
**inputs,
|
160 |
+
max_new_tokens=30, # Limit new tokens for speed
|
161 |
+
do_sample=True, # Faster sampling over beam search
|
162 |
+
top_p=0.9, # Nucleus sampling
|
163 |
+
temperature=0.7, # Controlled randomness
|
164 |
+
no_repeat_ngram_size=2
|
165 |
+
)
|
166 |
logger.info("Model generation complete.")
|
167 |
|
168 |
logger.info("Decoding model output...")
|
|
|
184 |
}
|
185 |
except Exception as e:
|
186 |
logger.error(f"Error processing request: {str(e)}", exc_info=True)
|
187 |
+
raise HTTPException(status_code=500, detail=f"Oops, something went wrong: {str(e)}")
|
188 |
|
189 |
@app.on_event("startup")
|
190 |
async def startup_event():
|