Tanifh commited on
Commit
42d2bef
Β·
verified Β·
1 Parent(s): 9e36cc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -24,14 +24,14 @@ if not os.path.exists(MODEL_PATH):
24
  st.error(f"🚨 Model download failed: {e}")
25
  st.stop()
26
 
27
- # βœ… Load optimized model
28
  try:
29
  if "model" not in st.session_state:
30
  st.session_state["model"] = Llama(
31
  model_path=MODEL_PATH,
32
- n_ctx=1024, # Reduce context window for faster inference
33
- n_threads=2, # Match available CPU cores (2 vCPUs)
34
- numa=True # Enable NUMA optimization
35
  )
36
  st.write("βœ… Model loaded successfully!")
37
  except Exception as e:
@@ -61,11 +61,8 @@ if st.button("Send") and user_input:
61
  st.session_state["messages"].append(("user", user_input))
62
  st.chat_message("user").write(user_input)
63
 
64
- # βœ… Format messages using Phi-3 chat template
65
- formatted_messages = [
66
- {"role": "system", "content": "You are an AI assistant. Provide clear and concise answers."},
67
- {"role": "user", "content": user_input}
68
- ]
69
 
70
  # βœ… Streamed response for faster user experience
71
  response_data = st.session_state["model"].create_chat_completion(
 
24
  st.error(f"🚨 Model download failed: {e}")
25
  st.stop()
26
 
27
+ # βœ… Load optimized model with reduced context length
28
  try:
29
  if "model" not in st.session_state:
30
  st.session_state["model"] = Llama(
31
  model_path=MODEL_PATH,
32
+ n_ctx=512, # βœ… Lower memory usage, speeds up responses
33
+ n_threads=2, # Matches available vCPUs
34
+ numa=True # Optimize CPU memory access
35
  )
36
  st.write("βœ… Model loaded successfully!")
37
  except Exception as e:
 
61
  st.session_state["messages"].append(("user", user_input))
62
  st.chat_message("user").write(user_input)
63
 
64
+ # βœ… Use a minimal prompt format (no system message)
65
+ formatted_messages = [{"role": "user", "content": user_input}]
 
 
 
66
 
67
  # βœ… Streamed response for faster user experience
68
  response_data = st.session_state["model"].create_chat_completion(