soiz1 commited on
Commit
413d245
·
verified ·
1 Parent(s): d6af3fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -210,22 +210,25 @@ HTML_CONTENT = '''
210
  </body>
211
  </html>
212
  '''
213
-
214
  def download_model():
215
  model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
216
- model_file = "deepseek-coder-v2-lite-instruct.Q6_K.gguf" # 適切なGGUFファイル名に変更
217
- return hf_hub_download(model_name, filename=model_file)
218
 
219
- def initialize_model(model_path):
220
- return Llama(
221
- model_path=model_path,
222
- n_ctx=4096,
223
- n_threads=4,
224
- n_gpu_layers=-1 # Use GPU if available
225
- )
 
 
 
 
 
226
 
227
- model_path = "/app/pytorch_model.bin"
228
- llm = initialize_model(model_path)
229
 
230
  system_prompt = (
231
  "You are a helpful AI coding assistant. Your mission is to help people with programming "
@@ -236,24 +239,35 @@ chat_history = [{"role": "system", "content": system_prompt}]
236
 
237
  @app.route('/')
238
  def index():
239
- return HTML_CONTENT
240
 
241
  @app.route('/chat')
242
  def chat():
243
  global chat_history
244
  user_message = request.args.get('message', '')
 
 
 
245
  chat_history.append({"role": "user", "content": user_message})
246
 
247
- full_prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history])
248
- full_prompt += "\nAssistant:"
249
-
250
  def generate():
251
  ai_response = ""
252
- for token in llm(full_prompt, max_tokens=1000, stop=["User:"], stream=True):
253
- chunk = token['choices'][0]['text']
 
 
 
 
 
 
 
 
 
 
254
  if chunk:
255
  ai_response += chunk
256
  yield f"data: {chunk}\n\n"
 
257
  chat_history.append({"role": "assistant", "content": ai_response.strip()})
258
  if len(chat_history) > 10: # Limit history to last 10 messages
259
  chat_history = chat_history[-10:]
 
210
  </body>
211
  </html>
212
  '''
 
213
  def download_model():
214
  model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
215
+ model_file = "deepseek-coder-v2-lite-instruct.Q6_K.gguf" # or another quantized version
216
+ return hf_hub_download(repo_id=model_name, filename=model_file)
217
 
218
+ def initialize_model():
219
+ try:
220
+ model_path = download_model()
221
+ return Llama(
222
+ model_path=model_path,
223
+ n_ctx=4096,
224
+ n_threads=4,
225
+ n_gpu_layers=-1 # Use GPU if available
226
+ )
227
+ except Exception as e:
228
+ print(f"Error initializing model: {e}")
229
+ return None
230
 
231
+ llm = initialize_model()
 
232
 
233
  system_prompt = (
234
  "You are a helpful AI coding assistant. Your mission is to help people with programming "
 
239
 
240
  @app.route('/')
241
  def index():
242
+ return render_template('index.html') # You should move your HTML to a templates folder
243
 
244
  @app.route('/chat')
245
  def chat():
246
  global chat_history
247
  user_message = request.args.get('message', '')
248
+ if not llm:
249
+ return Response("data: Model not loaded\n\ndata: [DONE]\n\n", content_type='text/event-stream')
250
+
251
  chat_history.append({"role": "user", "content": user_message})
252
 
 
 
 
253
  def generate():
254
  ai_response = ""
255
+ # Format messages for the model
256
+ messages = [{"role": msg["role"], "content": msg["content"]} for msg in chat_history]
257
+
258
+ stream = llm.create_chat_completion(
259
+ messages=messages,
260
+ max_tokens=1000,
261
+ stop=["User:"],
262
+ stream=True
263
+ )
264
+
265
+ for output in stream:
266
+ chunk = output['choices'][0]['delta'].get('content', '')
267
  if chunk:
268
  ai_response += chunk
269
  yield f"data: {chunk}\n\n"
270
+
271
  chat_history.append({"role": "assistant", "content": ai_response.strip()})
272
  if len(chat_history) > 10: # Limit history to last 10 messages
273
  chat_history = chat_history[-10:]