Tanifh commited on
Commit
f555c72
Β·
verified Β·
1 Parent(s): 42d2bef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -15
app.py CHANGED
@@ -31,7 +31,8 @@ try:
31
  model_path=MODEL_PATH,
32
  n_ctx=512, # βœ… Lower memory usage, speeds up responses
33
  n_threads=2, # Matches available vCPUs
34
- numa=True # Optimize CPU memory access
 
35
  )
36
  st.write("βœ… Model loaded successfully!")
37
  except Exception as e:
@@ -64,24 +65,27 @@ if st.button("Send") and user_input:
64
  # βœ… Use a minimal prompt format (no system message)
65
  formatted_messages = [{"role": "user", "content": user_input}]
66
 
67
- # βœ… Streamed response for faster user experience
68
  response_data = st.session_state["model"].create_chat_completion(
69
  messages=formatted_messages,
70
- max_tokens=256, temperature=0.7, top_p=0.9,
71
- stream=True # βœ… Enables real-time streaming
72
  )
73
 
74
- response_text = ""
75
- response_container = st.empty() # Placeholder for live updates
76
-
77
- for chunk in response_data:
78
- if "choices" in chunk and len(chunk["choices"]) > 0:
79
- choice = chunk["choices"][0]
80
- if "message" in choice:
81
- response_text += choice["message"]["content"]
82
- response_container.markdown(f"**AI:** {response_text}")
83
- if choice.get("finish_reason") == "stop":
84
- break
 
 
 
85
 
86
 
87
 
 
31
  model_path=MODEL_PATH,
32
  n_ctx=512, # βœ… Lower memory usage, speeds up responses
33
  n_threads=2, # Matches available vCPUs
34
+ numa=True,
35
+ n_batch=32 # βœ… Faster token processing
36
  )
37
  st.write("βœ… Model loaded successfully!")
38
  except Exception as e:
 
65
  # βœ… Use a minimal prompt format (no system message)
66
  formatted_messages = [{"role": "user", "content": user_input}]
67
 
68
+ # βœ… Disable streaming for debugging
69
  response_data = st.session_state["model"].create_chat_completion(
70
  messages=formatted_messages,
71
+ max_tokens=128, temperature=0.7, top_p=0.9,
72
+ stream=False # ❌ Disabled streaming for debugging
73
  )
74
 
75
+ # βœ… Debugging output
76
+ st.write("πŸ” Debug: Raw Model Response:", response_data)
77
+
78
+ if "choices" in response_data and len(response_data["choices"]) > 0:
79
+ choice = response_data["choices"][0]
80
+ if "message" in choice and "content" in choice["message"]:
81
+ response_text = choice["message"]["content"].strip()
82
+ st.session_state["messages"].append(("assistant", response_text))
83
+ st.chat_message("assistant").write(response_text)
84
+ else:
85
+ st.error("⚠️ No valid response content found.")
86
+ else:
87
+ st.error("⚠️ Model did not return any choices.")
88
+
89
 
90
 
91