Update app.py
Browse files
app.py
CHANGED
@@ -210,22 +210,25 @@ HTML_CONTENT = '''
|
|
210 |
</body>
|
211 |
</html>
|
212 |
'''
|
213 |
-
|
214 |
def download_model():
|
215 |
model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
|
216 |
-
model_file = "deepseek-coder-v2-lite-instruct.Q6_K.gguf" #
|
217 |
-
return hf_hub_download(model_name, filename=model_file)
|
218 |
|
219 |
-
def initialize_model(
|
220 |
-
|
221 |
-
model_path=
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
-
|
228 |
-
llm = initialize_model(model_path)
|
229 |
|
230 |
system_prompt = (
|
231 |
"You are a helpful AI coding assistant. Your mission is to help people with programming "
|
@@ -236,24 +239,35 @@ chat_history = [{"role": "system", "content": system_prompt}]
|
|
236 |
|
237 |
@app.route('/')
|
238 |
def index():
|
239 |
-
return
|
240 |
|
241 |
@app.route('/chat')
|
242 |
def chat():
|
243 |
global chat_history
|
244 |
user_message = request.args.get('message', '')
|
|
|
|
|
|
|
245 |
chat_history.append({"role": "user", "content": user_message})
|
246 |
|
247 |
-
full_prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history])
|
248 |
-
full_prompt += "\nAssistant:"
|
249 |
-
|
250 |
def generate():
|
251 |
ai_response = ""
|
252 |
-
|
253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
if chunk:
|
255 |
ai_response += chunk
|
256 |
yield f"data: {chunk}\n\n"
|
|
|
257 |
chat_history.append({"role": "assistant", "content": ai_response.strip()})
|
258 |
if len(chat_history) > 10: # Limit history to last 10 messages
|
259 |
chat_history = chat_history[-10:]
|
|
|
210 |
</body>
|
211 |
</html>
|
212 |
'''
|
|
|
213 |
def download_model():
|
214 |
model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
|
215 |
+
model_file = "deepseek-coder-v2-lite-instruct.Q6_K.gguf" # or another quantized version
|
216 |
+
return hf_hub_download(repo_id=model_name, filename=model_file)
|
217 |
|
218 |
+
def initialize_model():
|
219 |
+
try:
|
220 |
+
model_path = download_model()
|
221 |
+
return Llama(
|
222 |
+
model_path=model_path,
|
223 |
+
n_ctx=4096,
|
224 |
+
n_threads=4,
|
225 |
+
n_gpu_layers=-1 # Use GPU if available
|
226 |
+
)
|
227 |
+
except Exception as e:
|
228 |
+
print(f"Error initializing model: {e}")
|
229 |
+
return None
|
230 |
|
231 |
+
llm = initialize_model()
|
|
|
232 |
|
233 |
system_prompt = (
|
234 |
"You are a helpful AI coding assistant. Your mission is to help people with programming "
|
|
|
239 |
|
240 |
@app.route('/')
|
241 |
def index():
|
242 |
+
return render_template('index.html') # You should move your HTML to a templates folder
|
243 |
|
244 |
@app.route('/chat')
|
245 |
def chat():
|
246 |
global chat_history
|
247 |
user_message = request.args.get('message', '')
|
248 |
+
if not llm:
|
249 |
+
return Response("data: Model not loaded\n\ndata: [DONE]\n\n", content_type='text/event-stream')
|
250 |
+
|
251 |
chat_history.append({"role": "user", "content": user_message})
|
252 |
|
|
|
|
|
|
|
253 |
def generate():
|
254 |
ai_response = ""
|
255 |
+
# Format messages for the model
|
256 |
+
messages = [{"role": msg["role"], "content": msg["content"]} for msg in chat_history]
|
257 |
+
|
258 |
+
stream = llm.create_chat_completion(
|
259 |
+
messages=messages,
|
260 |
+
max_tokens=1000,
|
261 |
+
stop=["User:"],
|
262 |
+
stream=True
|
263 |
+
)
|
264 |
+
|
265 |
+
for output in stream:
|
266 |
+
chunk = output['choices'][0]['delta'].get('content', '')
|
267 |
if chunk:
|
268 |
ai_response += chunk
|
269 |
yield f"data: {chunk}\n\n"
|
270 |
+
|
271 |
chat_history.append({"role": "assistant", "content": ai_response.strip()})
|
272 |
if len(chat_history) > 10: # Limit history to last 10 messages
|
273 |
chat_history = chat_history[-10:]
|