Spaces:
Running
Running
File size: 3,297 Bytes
5416384 2f813e0 8a095f7 4246f74 6e3b4c1 4246f74 5416384 4246f74 5281b2e 4246f74 7ff24a3 4246f74 42d2bef 2f813e0 9e36cc1 7fa8485 42d2bef f555c72 7fa8485 9e36cc1 2f813e0 7ff24a3 2f813e0 5416384 7ff24a3 5416384 7ff24a3 5416384 7ff24a3 5416384 7ff24a3 9e36cc1 42d2bef 9e36cc1 dbd53de 9e36cc1 77f91dd dbd53de 7ff24a3 9e36cc1 7ff24a3 f555c72 7ff24a3 f555c72 7ff24a3 f555c72 7ff24a3 f555c72 9e36cc1 4246f74 5416384 5281b2e 79ed0a3 6e3b4c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import os
import requests
import streamlit as st
from llama_cpp import Llama
# β
Streamlit Page Config (Must be first)
st.set_page_config(page_title="Phi-3 Mini Chatbot", layout="centered")
# β
Define model path
MODEL_PATH = "./Phi-3-mini-4k-instruct-q4.gguf"
MODEL_URL = "https://huggingface.co./microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf"
# β
Check if model exists, otherwise download
if not os.path.exists(MODEL_PATH):
st.info("Downloading the model file. Please wait...")
try:
with requests.get(MODEL_URL, stream=True) as response:
response.raise_for_status() # Stops the script if download fails
with open(MODEL_PATH, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
st.success("Model downloaded successfully!")
except requests.exceptions.HTTPError as e:
st.error("π¨ Model download failed. Please try again later.")
st.stop()
# β
Load optimized model with reduced context length
try:
if "model" not in st.session_state:
st.session_state["model"] = Llama(
model_path=MODEL_PATH,
n_ctx=256, # β
Lower memory usage, speeds up responses
n_threads=2, # Matches available vCPUs
numa=True,
n_batch=64 # β
Faster token processing
)
except Exception as e:
st.error("π¨ Error loading model. Please restart the application.")
st.stop()
# π User-Friendly Chat Interface
st.title("π€ Phi-3 Mini Chatbot")
st.markdown("### Ask me anything and I'll provide helpful responses!")
# Chat history
if "messages" not in st.session_state:
st.session_state["messages"] = []
# Display chat history
for role, text in st.session_state["messages"]:
with st.chat_message(role):
st.write(text)
# Input field for user message
user_input = st.text_input("Your Message:", "", key="user_input")
if st.button("Send") and user_input:
# Add user input to chat history
st.session_state["messages"].append(("user", user_input))
with st.chat_message("user"):
st.write(user_input)
# β
Use a minimal prompt format (no system message)
formatted_messages = [{"role": "user", "content": user_input}]
# β
Speed improvements: Increase max_tokens for full responses & ensure proper stopping
response_data = st.session_state["model"].create_chat_completion(
messages=formatted_messages,
max_tokens=110, temperature=0.5, top_p=0.8,
stop=["\n", "<|endoftext|>"], # β
Ensures responses end properly
stream=False
)
# β
Extract and display response
if "choices" in response_data and len(response_data["choices"]) > 0:
choice = response_data["choices"][0]
if "message" in choice and "content" in choice["message"]:
response_text = choice["message"]["content"].strip()
st.session_state["messages"].append(("assistant", response_text))
with st.chat_message("assistant"):
st.write(response_text)
else:
st.error("β οΈ Unable to generate a response. Please try again.")
else:
st.error("β οΈ No response received. Please ask again.")
|