Spaces:
Sleeping
Sleeping
File size: 8,720 Bytes
c6b3ffb e00ad77 c6b3ffb aed3be4 c6b3ffb aed3be4 c6b3ffb 6d28f19 c456c47 6d28f19 c456c47 efb491c 6d28f19 efb491c 6d28f19 15152ff c6b3ffb aed3be4 c6b3ffb efb491c c6b3ffb efb491c c6b3ffb efb491c c6b3ffb efb491c c6b3ffb efb491c c6b3ffb aed3be4 c6b3ffb ca509cb c6b3ffb aed3be4 c6b3ffb c456c47 c6b3ffb c456c47 aed3be4 15152ff c456c47 15152ff 9d6a6b8 c6b3ffb 15152ff ca509cb c6b3ffb 15152ff efb491c c6b3ffb 15152ff c1faa76 c6b3ffb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import os
import gradio as gr
# ------------------------------------------------------------------------------
# Environment and Model/Client Initialization
# ------------------------------------------------------------------------------
try:
# Assume we’re in Google Colab or another local environment with PyTorch
from google.colab import userdata
HF_TOKEN = userdata.get('HF_TOKEN')
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Performance tweak
torch.backends.cudnn.benchmark = True
model_name = "HuggingFaceH4/zephyr-7b-beta"
model = AutoModelForCausalLM.from_pretrained(
model_name,
use_auth_token=HF_TOKEN,
torch_dtype=torch.bfloat16,
device_map="auto"
)
if hasattr(torch, "compile"):
model = torch.compile(model)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HF_TOKEN)
inference_mode = "local"
except ImportError:
# Not in Colab: use the Hugging Face InferenceClient.
model_name = "HuggingFaceH4/zephyr-7b-beta"
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
hf_token = os.getenv("HF_TOKEN", None)
if hf_token:
client = InferenceClient(model_name, token=hf_token)
else:
client = InferenceClient(model_name)
inference_mode = "client"
# ------------------------------------------------------------------------------
# SYSTEM PROMPT (PATIENT ROLE)
# ------------------------------------------------------------------------------
nvc_prompt_template = """You are simulating a single patient (and only the patient) seeking support for personal and emotional challenges.
BEHAVIOR INSTRUCTIONS:
- When the conversation starts, please answer the questions or generate questions based on the provided context.
- You will respond ONLY as this patient.
- You will speak in the first person about your own situations, feelings, and worries.
- You will NOT provide counseling or solutions—your role is to share feelings, concerns, and perspectives.
- You are experiencing a range of ongoing issues that are causing you frustration, sadness, and uncertainty. These issues include:
- Problems with noisy neighbors, causing you extreme annoyance.
- Worries about your career and feeling like you're not progressing, feeling like a failure while others advance.
- Arguments with your partner about finances, feeling your partner isn't contributing fairly.
- Feeling left out and not valued at work, as your manager doesn't involve you in new initiatives.
- Feeling unsafe or judged in learning environments like a classroom, feeling judged and ignored.
- A recent positive experience of help from a stranger that made you feel supported and grateful.
- Difficulties and sadness related to conflict among friends, seeing your friend group fall apart and wanting reconciliation but not knowing how.
- An overall feeling of sadness and being unsure of what to do, unsure whether to act on situations or let them go.
- When asked, elaborate on these issues and your feelings related to them. You can invent specific details and scenarios within these themes to make your experiences vivid and realistic.
- Continue to speak from this patient's perspective throughout the conversation.
- Keep your responses concise, aiming for a maximum of {max_response_words} words.
Begin by sharing your present feelings or challenges from a patient’s point of view. You may do so in one or two brief sentences."""
# ------------------------------------------------------------------------------
# Utility Functions
# ------------------------------------------------------------------------------
def build_prompt(history: list[tuple[str, str]], system_message: str, message: str, max_response_words: int) -> str:
"""
Build a text prompt (for local inference) that starts with the system message,
includes conversation history with "Doctor:" and "Patient:" labels,
and ends with a new "Doctor:" line prompting the patient.
"""
prompt = system_message.format(max_response_words=max_response_words) + "\n"
for user_msg, assistant_msg in history:
prompt += f"Doctor: {user_msg}\n"
if assistant_msg:
prompt += f"Patient: {assistant_msg}\n"
prompt += f"Doctor: {message}\nPatient: "
return prompt
def build_messages(history: list[tuple[str, str]], system_message: str, message: str, max_response_words: int):
"""
Build a messages list (for InferenceClient) using OpenAI-style formatting.
"""
formatted_system_message = system_message.format(max_response_words=max_response_words)
messages = [{"role": "system", "content": formatted_system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": f"Doctor: {user_msg}"})
if assistant_msg:
messages.append({"role": "assistant", "content": f"Patient: {assistant_msg}"})
messages.append({"role": "user", "content": f"Doctor: {message}\nPatient:"})
return messages
def truncate_response(text: str, max_words: int) -> str:
"""
Truncate the response text to the specified maximum number of words.
"""
words = text.split()
if len(words) > max_words:
return " ".join(words[:max_words]) + "..."
return text
# ------------------------------------------------------------------------------
# Response Function
# ------------------------------------------------------------------------------
def respond(
message: str,
history: list[tuple[str, str]],
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
max_response_words: int,
):
"""
Generate a response. For local inference, use model.generate() on a prompt.
For non-local inference, use client.chat_completion() with streaming tokens.
"""
if inference_mode == "local":
prompt = build_prompt(history, system_message, message, max_response_words)
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
output_ids = model.generate(
input_ids,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
)
full_generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
generated_response = full_generated_text[len(prompt):].strip()
final_response = truncate_response(generated_response, max_response_words)
return final_response
else:
messages = build_messages(history, system_message, message, max_response_words)
response = ""
try:
# Generate response using streaming chat_completion
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = chunk.choices[0].delta.get("content", "")
response += token
truncated_response = truncate_response(response, max_response_words)
return truncated_response
except Exception as e:
print(f"An error occurred: {e}")
return "I'm sorry, I encountered an error. Please try again."
# ------------------------------------------------------------------------------
# Optional Initial Message and Gradio Interface
# ------------------------------------------------------------------------------
initial_user_message = (
"I’m sorry you’ve been feeling overwhelmed. Could you tell me more about your arguments with your partner and how that’s affecting you?"
)
# Remove chatbot_kwargs (unsupported in the current ChatInterface) to avoid error.
demo = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(value=nvc_prompt_template, label="System message", visible=True),
gr.Slider(minimum=1, maximum=2048, value=256, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
gr.Slider(minimum=10, maximum=200, value=100, step=10, label="Max response words"),
],
title="Patient Interview Practice Chatbot",
description="Simulate a patient interview. You (the user) act as the doctor, and the chatbot replies with the patient's perspective only.",
)
if __name__ == "__main__":
demo.launch()
|