Spaces:

Tanifh
/

phi3-chatbot

Running

App Files Files Community

phi3-chatbot / app.py

Tanifh

Update app.py

77f91dd verified about 2 months ago

raw

history blame contribute delete

3.3 kB

	import os
	import requests
	import streamlit as st
	from llama_cpp import Llama

	# ✅ Streamlit Page Config (Must be first)
	st.set_page_config(page_title="Phi-3 Mini Chatbot", layout="centered")

	# ✅ Define model path
	MODEL_PATH = "./Phi-3-mini-4k-instruct-q4.gguf"
	MODEL_URL = "https://huggingface.co./microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf"

	# ✅ Check if model exists, otherwise download
	if not os.path.exists(MODEL_PATH):
	st.info("Downloading the model file. Please wait...")
	try:
	with requests.get(MODEL_URL, stream=True) as response:
	response.raise_for_status() # Stops the script if download fails
	with open(MODEL_PATH, "wb") as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)
	st.success("Model downloaded successfully!")
	except requests.exceptions.HTTPError as e:
	st.error("🚨 Model download failed. Please try again later.")
	st.stop()

	# ✅ Load optimized model with reduced context length
	try:
	if "model" not in st.session_state:
	st.session_state["model"] = Llama(
	model_path=MODEL_PATH,
	n_ctx=256, # ✅ Lower memory usage, speeds up responses
	n_threads=2, # Matches available vCPUs
	numa=True,
	n_batch=64 # ✅ Faster token processing
	)
	except Exception as e:
	st.error("🚨 Error loading model. Please restart the application.")
	st.stop()

	# 🌟 User-Friendly Chat Interface
	st.title("🤖 Phi-3 Mini Chatbot")
	st.markdown("### Ask me anything and I'll provide helpful responses!")

	# Chat history
	if "messages" not in st.session_state:
	st.session_state["messages"] = []

	# Display chat history
	for role, text in st.session_state["messages"]:
	with st.chat_message(role):
	st.write(text)

	# Input field for user message
	user_input = st.text_input("Your Message:", "", key="user_input")
	if st.button("Send") and user_input:
	# Add user input to chat history
	st.session_state["messages"].append(("user", user_input))
	with st.chat_message("user"):
	st.write(user_input)

	# ✅ Use a minimal prompt format (no system message)
	formatted_messages = [{"role": "user", "content": user_input}]

	# ✅ Speed improvements: Increase max_tokens for full responses & ensure proper stopping
	response_data = st.session_state["model"].create_chat_completion(
	messages=formatted_messages,
	max_tokens=110, temperature=0.5, top_p=0.8,
	stop=["\n", "<\|endoftext\|>"], # ✅ Ensures responses end properly
	stream=False
	)

	# ✅ Extract and display response
	if "choices" in response_data and len(response_data["choices"]) > 0:
	choice = response_data["choices"][0]
	if "message" in choice and "content" in choice["message"]:
	response_text = choice["message"]["content"].strip()
	st.session_state["messages"].append(("assistant", response_text))
	with st.chat_message("assistant"):
	st.write(response_text)
	else:
	st.error("⚠️ Unable to generate a response. Please try again.")
	else:
	st.error("⚠️ No response received. Please ask again.")