Sophia / app.py
Ronaldo1111's picture
Update app.py
ab4687d verified
import json
import numpy as np
import faiss
import torch
from sentence_transformers import SentenceTransformer
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
import jieba
import jieba.analyse
from numpy.linalg import norm
import gradio as gr
with open("dialog.json", "r", encoding="utf-8") as f:
dialog_data = json.load(f)
with open("corpus.json", "r", encoding="utf-8") as f:
corpus_texts = json.load(f)
with open("knowledge.json", "r", encoding="utf-8") as f:
knowledge_texts = json.load(f)
docs = [Document(page_content=text) for text in corpus_texts]
knowledge_docs = [Document(page_content=text) for text in knowledge_texts]
all_docs = docs + knowledge_docs
embedding_model = SentenceTransformer("BAAI/bge-base-zh")
embeddings = embedding_model.encode(corpus_texts + knowledge_texts, show_progress_bar=True)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))
index_to_docstore_id = {i: str(i) for i in range(len(all_docs))}
docstore = {str(i): doc for i, doc in enumerate(all_docs)}
vectorstore = FAISS(
embedding_function=HuggingFaceEmbeddings(model_name='BAAI/bge-base-zh'),
index=index,
docstore=InMemoryDocstore(docstore),
index_to_docstore_id=index_to_docstore_id
)
retriever = vectorstore.as_retriever()
model_name = "deepseek-ai/deepseek-llm-7b-chat"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype=torch.float16,
trust_remote_code=True
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=100,
temperature=0.8,
top_p=0.9,
do_sample=True,
repetition_penalty=1.2,
return_full_text=False,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
llm = HuggingFacePipeline(pipeline=pipe)
# ========= Step 4: Prompt 模板 =========
system_prompt = (
"你是豌豆,一个毒舌但可爱的微信好友。讲话要尖锐、嘴碎、有攻击性、但又让人上头。\n"
"你会用emoji、口语、网络热词说话,像个懂事但不好惹的姐姐。\n"
"你不是AI,也不是机器人,就是我微信里的老朋友。跟你对话的都是你的好朋友,但你要对他们刻薄有攻击性\n"
"记住,你的风格是“毒中带撩”,回答要短、狠、有情绪。\n"
"千万别客气,别解释太多,更别太官方,懂?"
)
prompt_template = PromptTemplate(
input_variables=["system", "examples", "context", "question"],
template="""{system}
👇以下是你平时说话的风格参考(不许忘):
{examples}
👇这是我和你以前的聊天记录,你的语气都在这了:
{context}
现在我问你:
{question}
你要怎么回我?记得口语化、毒舌点、别啰嗦:
"""
)
def extract_keywords(text, topk=5):
return jieba.analyse.extract_tags(text, topK=topk)
def hybrid_retrieval(query, corpus_docs, faiss_index, embedding_model, k=3, kw_weight=2.0, vec_weight=1.0):
query_embedding = embedding_model.encode([query])[0]
keywords = extract_keywords(query, topk=5)
scored_docs = []
for i, doc in enumerate(corpus_docs):
doc_text = doc.page_content
keyword_score = sum(1 for kw in keywords if kw in doc_text)
doc_embedding = faiss_index.reconstruct(i)
vector_score = 1 / (norm(query_embedding - doc_embedding) + 1e-5)
total_score = kw_weight * keyword_score + vec_weight * vector_score
scored_docs.append((total_score, doc))
scored_docs.sort(key=lambda x: x[0], reverse=True)
return [doc for _, doc in scored_docs[:k]]
import random
def choose_fallback_topic(user_input, knowledge_docs):
if len(user_input.strip()) < 5:
candidates = [doc.page_content for doc in knowledge_docs if "?" in doc.page_content]
if not candidates:
candidates = [doc.page_content for doc in knowledge_docs]
if candidates:
return f"{user_input}{random.choice(candidates)}"
return user_input
def chat(user_input, history):
history = history or []
history = history[-8:]
prompt_question = choose_fallback_topic(user_input, knowledge_docs)
context_text = "\n".join([
f"用户:{msg['content']}" if msg['role'] == "user" else f"sophia:{msg['content']}"
for msg in history
])
retrieved_docs = hybrid_retrieval(
query=prompt_question,
corpus_docs=all_docs,
faiss_index=index,
embedding_model=embedding_model,
k=3
)
retrieved_context = "\n".join([doc.page_content for doc in retrieved_docs])
example_pairs = dialog_data[:5]
example_text = "\n".join([f"user:{pair['user']}\nsophia:{pair['sophia']}" for pair in example_pairs])
prompt = prompt_template.format(
system=system_prompt,
examples=example_text,
context=retrieved_context + "\n" + context_text,
question=prompt_question
)
try:
reply = llm.invoke(prompt)
except Exception as e:
reply = f"勾巴出错了:{str(e)}"
history.append({"role": "user", "content": user_input})
history.append({"role": "assistant", "content": reply})
return history, history
import gradio as gr
background_images = [
f"https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/family{i}.jpg"
for i in ["", 1, 2, 3, 4, 5, 6, 7, 8, 9]
]
background_css_rules = "".join([
f" {i * 10}% {{ background-image: url('{img}'); }}\n"
for i, img in enumerate(background_images)
])
background_css = f"@keyframes backgroundCycle {{\n{background_css_rules}}}"
avatar_url = "https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/bean.jpg"
cake_url = "https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/birthday.jpg"
gift_url = "https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/gift.jpg"
popup_url = "https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/srkl.jpg"
popup2_url = "https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/srkl1.jpg"
music1 = "https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/FNG.mp3"
music2 = "https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/PGY.mp3"
bark_sound = "https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/voice.mp3"
html_template = '''
<style>
body {
margin: 0;
animation: backgroundCycle 60s infinite;
background-size: cover;
background-position: center;
transition: background-image 1s ease-in-out;
}
{background_css}
.gr-chatbot {
background: rgba(255, 255, 255, 0.3) !important; /* 更轻的透明白 */
border-radius: 16px;
padding: 10px;
backdrop-filter: blur(12px); /* 毛玻璃核心效果 */
-webkit-backdrop-filter: blur(12px); /* 兼容 Safari */
border: 1px solid rgba(255, 255, 255, 0.4); /* 边框更精致 */
}
.gr-textbox textarea {
font-family: monospace;
font-size: 1.1em;
animation: typewriter 1s steps(40, end);
}
@keyframes typewriter {
from { width: 0 }
to { width: 100% }
}
#sophia-avatar {
position: fixed;
top: 40px;
left: 30px;
width: 80px;
height: 80px;
border-radius: 50%;
z-index: 9999;
cursor: grab;
animation: spinBounce 4s infinite;
}
@keyframes spinBounce {
0% { transform: rotate(0deg) translateY(0); }
50% { transform: rotate(180deg) translateY(-10px); }
100% { transform: rotate(360deg) translateY(0); }
}
#birthday-cake {
position: fixed;
bottom: 20px;
right: 20px;
width: 80px;
animation: bounce 1.5s infinite;
z-index: 9999;
}
@keyframes bounce {
0% { transform: translateY(0); }
50% { transform: translateY(-15px); }
100% { transform: translateY(0); }
}
#gift {
position: fixed;
width: 60px;
cursor: pointer;
z-index: 9998;
animation: moveAround 10s infinite linear;
}
@keyframes moveAround {
0% { top: 10%; left: 10%; }
25% { top: 20%; left: 80%; }
50% { top: 70%; left: 60%; }
75% { top: 80%; left: 20%; }
100% { top: 10%; left: 10%; }
}
#popup, #popup2 {
display: none;
position: fixed;
top: 50%; left: 50%;
transform: translate(-50%, -50%);
max-width: 80vw;
max-height: 80vh;
z-index: 10000;
border: 4px solid #fff;
border-radius: 12px;
box-shadow: 0 0 20px rgba(0,0,0,0.5);
}
#popup-close {
position: absolute;
top: 8px; right: 12px;
font-size: 24px;
color: #fff;
cursor: pointer;
z-index: 10001;
}
#firework {
position: fixed;
top: 50%;
left: 50%;
width: 120px;
height: 120px;
background: url("https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/firework.gif") no-repeat center center;
background-size: contain;
z-index: 99999;
animation: fadeOut 1s ease-out forwards;
}
@keyframes fadeOut {
0% { opacity: 1; }
100% { opacity: 0; }
}
.balloon {
position: fixed;
width: 60px;
height: 80px;
background-size: contain;
background-repeat: no-repeat;
z-index: 10000; /* 使气球位于对话框之上 */
animation: floatUp 12s linear infinite;
}
#balloon1 {
background-image: url("https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/balloon1.png");
left: 10%;
top: 0; /* 确保气球从页面顶部开始 */
animation-delay: 0s;
}
#balloon2 {
background-image: url("https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/ballon2.png");
left: 30%;
top: 0; /* 确保气球从页面顶部开始 */
animation-delay: 2s;
}
#balloon3 {
background-image: url("https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/ballon3.png");
left: 50%;
top: 0; /* 确保气球从页面顶部开始 */
animation-delay: 4s;
}
#balloon4 {
background-image: url("https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/ballon4.png");
left: 70%;
top: 0; /* 确保气球从页面顶部开始 */
animation-delay: 6s;
}
#balloon5 {
background-image: url("https://huggingface.co./spaces/Ronaldo1111/Sophia/resolve/main/ballon5.png");
left: 90%;
top: 0; /* 确保气球从页面顶部开始 */
animation-delay: 8s;
}
@keyframes floatUp {
0% { transform: translateY(0); }
100% { transform: translateY(-120vh); }
}
#music-toggle, #next-track {
position: fixed;
padding: 8px 12px;
font-size: 14px;
background: rgba(255,255,255,0.7);
border-radius: 8px;
cursor: pointer;
z-index: 10000;
}
#music-toggle { bottom: 20px; left: 20px; }
#next-track { bottom: 60px; left: 20px; }
</style>
<img id="sophia-avatar" src="{avatar_url}" />
<img id="birthday-cake" src="{cake_url}" />
<img id="gift" src="{gift_url}" />
<img id="popup" />
<img id="popup2" />
<div id="popup-close">×</div>
<div id="music-toggle">⏸️音乐</div>
<div id="next-track">🎵切歌</div>
<div id="balloon1" class="balloon"></div>
<div id="balloon2" class="balloon"></div>
<div id="balloon3" class="balloon"></div>
<div id="balloon4" class="balloon"></div>
<div id="balloon5" class="balloon"></div>
<audio id="bg-music" autoplay loop>
<source src="{music1}" type="audio/mpeg" />
</audio>
<audio id="bark" src="{bark_sound}"></audio>
<script>
const tracks = ["{music1}", "{music2}"];
const audio = document.getElementById("bg-music");
let current = 0;
audio.addEventListener("ended", () => {
current = (current + 1) % tracks.length;
audio.src = tracks[current];
audio.load();
audio.play();
});
const toggleBtn = document.getElementById("music-toggle");
toggleBtn.addEventListener("click", () => {
if (audio.paused) {
audio.play();
toggleBtn.textContent = "⏸️音乐";
} else {
audio.pause();
toggleBtn.textContent = "▶️音乐";
}
});
document.getElementById("next-track").addEventListener("click", () => {
current = (current + 1) % tracks.length;
audio.src = tracks[current];
audio.load();
audio.play();
});
const avatar = document.getElementById("sophia-avatar");
const bark = document.getElementById("bark");
avatar.onmousedown = function(e) {
const shiftX = e.clientX - avatar.getBoundingClientRect().left;
const shiftY = e.clientY - avatar.getBoundingClientRect().top;
function moveAt(e) {
avatar.style.left = e.pageX - shiftX + 'px';
avatar.style.top = e.pageY - shiftY + 'px';
}
document.addEventListener('mousemove', moveAt);
avatar.onmouseup = () => { document.removeEventListener('mousemove', moveAt); avatar.onmouseup = null; };
};
avatar.ondragstart = () => false;
avatar.addEventListener("click", () => {
bark.pause(); bark.currentTime = 0; bark.play();
const fw = document.createElement("div");
fw.id = "firework";
document.body.appendChild(fw);
setTimeout(() => fw.remove(), 1200);
});
const gift = document.getElementById("gift");
const popup = document.getElementById("popup");
const popup2 = document.getElementById("popup2");
const closeBtn = document.getElementById("popup-close");
gift.addEventListener("click", () => {
popup.src = "{popup_url}";
popup.style.display = "block";
closeBtn.style.display = "block";
setTimeout(() => {
popup2.src = "{popup2_url}";
popup2.style.display = "block";
}, 2000);
setTimeout(() => {
popup.style.display = "none";
popup2.style.display = "none";
closeBtn.style.display = "none";
}, 5000);
});
closeBtn.addEventListener("click", () => {
popup.style.display = "none";
popup2.style.display = "none";
closeBtn.style.display = "none";
});
</script>
'''
html_content = html_template.replace("{background_css}", background_css) \
.replace("{avatar_url}", avatar_url) \
.replace("{cake_url}", cake_url) \
.replace("{music1}", music1) \
.replace("{music2}", music2) \
.replace("{bark_sound}", bark_sound) \
.replace("{gift_url}", gift_url) \
.replace("{popup_url}", popup_url) \
.replace("{popup2_url}", popup2_url)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.HTML(html_content)
gr.Markdown("## 🌸 Horse and 7 Agent:欢迎进入豌豆的世界 🌸")
chatbot = gr.Chatbot(label="Pea", type="messages", show_copy_button=True)
msg = gr.Textbox(label="想对豌豆说啥?", placeholder="小勾巴,你在干嘛?", lines=2)
state = gr.State([])
btn = gr.Button("投喂")
btn.click(chat, inputs=[msg, state], outputs=[chatbot, state])
msg.submit(chat, inputs=[msg, state], outputs=[chatbot, state])
if __name__ == "__main__":
demo.launch()