Spaces:

Ruurd
/

radiolm

Running on Zero

App Files Files Community

Ruurd commited on 3 days ago

Commit

713dc22

1 Parent(s): 2ad2507

Change tokenizer selection

Browse files

Add system prompt
Add start message
Add incorporation of patient data

Files changed (1) hide show

app.py +61 -68

app.py CHANGED Viewed

@@ -9,51 +9,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStream
 import threading
 import queue
-class RichTextStreamer(TextIteratorStreamer):
-    def __init__(self, tokenizer, prompt_len=0, **kwargs):
-        super().__init__(tokenizer, **kwargs)
-        self.token_queue = queue.Queue()
-        self.prompt_len = prompt_len
-        self.count = 0
-    def put(self, value):
-        if isinstance(value, torch.Tensor):
-            token_ids = value.view(-1).tolist()
-        elif isinstance(value, list):
-            token_ids = value
-        else:
-            token_ids = [value]
-        for token_id in token_ids:
-            self.count += 1
-            if self.count <= self.prompt_len:
-                continue  # skip prompt tokens
-            token_str = self.tokenizer.decode([token_id], **self.decode_kwargs)
-            is_special = token_id in self.tokenizer.all_special_ids
-            self.token_queue.put({
-                "token_id": token_id,
-                "token": token_str,
-                "is_special": is_special
-            })
-    def __iter__(self):
-        while True:
-            try:
-                token_info = self.token_queue.get(timeout=self.timeout)
-                yield token_info
-            except queue.Empty:
-                if self.end_of_generation.is_set():
-                    break
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
-import threading
-from transformers import TextIteratorStreamer
-import threading
-from transformers import TextIteratorStreamer
-import queue
 class RichTextStreamer(TextIteratorStreamer):
     def __init__(self, tokenizer, prompt_len=0, **kwargs):
         super().__init__(tokenizer, **kwargs)
@@ -108,22 +63,66 @@ def chat_with_model(messages):
     max_new_tokens = 1024
     generated_tokens = 0
-    prompt = format_prompt(messages)
     device = torch.device("cuda")
     current_model.to(device).half()
-    # 1. Tokenize prompt
     inputs = current_tokenizer(prompt, return_tensors="pt").to(device)
     prompt_len = inputs["input_ids"].shape[-1]
-    # 2. Init streamer with prompt_len
     streamer = RichTextStreamer(
         tokenizer=current_tokenizer,
         prompt_len=prompt_len,
         skip_special_tokens=False
     )
-    # 3. Build generation kwargs
     generation_kwargs = dict(
         **inputs,
         max_new_tokens=max_new_tokens,
@@ -133,27 +132,20 @@ def chat_with_model(messages):
         pad_token_id=pad_id
     )
-    # 4. Launch generation in a thread
     thread = threading.Thread(target=current_model.generate, kwargs=generation_kwargs)
     thread.start()
     messages = messages.copy()
     messages.append({"role": "assistant", "content": ""})
-    print(f'Step 1: {messages}')
-    prompt_text = current_tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=False)
     for token_info in streamer:
         token_str = token_info["token"]
         token_id = token_info["token_id"]
         is_special = token_info["is_special"]
-        # Stop immediately at EOS
         if token_id == eos_id:
             break
-        # Detect reasoning block
         if "<think>" in token_str:
             in_think = True
             token_str = token_str.replace("<think>", "")
@@ -166,7 +158,6 @@ def chat_with_model(messages):
         else:
             output_text += token_str
-        # Early stopping if user reappears
         if "\nUser" in output_text:
             output_text = output_text.split("\nUser")[0].rstrip()
             messages[-1]["content"] = output_text
@@ -178,34 +169,35 @@ def chat_with_model(messages):
         messages[-1]["content"] = output_text
-        print(f'Step 2: {messages}')
         yield messages
     if in_think:
         output_text += "*"
         messages[-1]["content"] = output_text
-    # Wait for thread to finish
-    # current_model.to("cpu")
-    torch.cuda.empty_cache()
     messages[-1]["content"] = output_text
-    print(f'Step 3: {messages}')
     return messages
 # Globals
 current_model = None
 current_tokenizer = None
 def load_model_on_selection(model_name, progress=gr.Progress(track_tqdm=False)):
     global current_model, current_tokenizer
     token = os.getenv("HF_TOKEN")
-    progress(0, desc="Loading tokenizer...")
     current_tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
     progress(0.5, desc="Loading model...")
@@ -219,6 +211,7 @@ def load_model_on_selection(model_name, progress=gr.Progress(track_tqdm=False)):
     progress(1, desc="Model ready.")
     return f"{model_name} loaded and ready!"
 # Format conversation as plain text
 def format_prompt(messages):
     prompt = ""
@@ -239,7 +232,7 @@ model_choices = [
     "meta-llama/Llama-3.2-3B-Instruct",
     "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
     "google/gemma-7b",
-    "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
 ]
 # Example patient database
@@ -277,7 +270,7 @@ def autofill_patient(patient_key):
     return "", "", "", ""
 with gr.Blocks(css=".gradio-container {height: 100vh; overflow: hidden;}") as demo:
-    gr.Markdown("## Radiologist's Companion")
     default_model = gr.State(model_choices[0])

 import threading
 import queue
 class RichTextStreamer(TextIteratorStreamer):
     def __init__(self, tokenizer, prompt_len=0, **kwargs):
         super().__init__(tokenizer, **kwargs)
     max_new_tokens = 1024
     generated_tokens = 0
+    # ⮕ PREPARE SYSTEM + INITIAL MESSAGES
+    system_messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are a radiologist's companion, here to answer questions about the patient and assist in the diagnosis if asked to do so. "
+                "You are able to call specialized tools. "
+                "At the moment, you have one tool available: an organ segmentation algorithm for abdominal CTs.\n\n"
+                "If the user requests an organ segmentation, output a JSON object in this structure:\n"
+                "{\n"
+                "  \"function\": \"segment_organ\",\n"
+                "  \"arguments\": {\n"
+                "    \"scan_path\": \"<path_to_ct_scan>\",\n"
+                "    \"organ\": \"<organ_name>\"\n"
+                "  }\n"
+                "}\n\n"
+                "Once you call the function, the app will execute it and return the result."
+            )
+        },
+        {
+            "role": "system",
+            "content": f"Patient Information:\nName: {patient_name.value}\nAge: {patient_age.value}\nID: {patient_id.value}\nNotes: {patient_notes.value}"
+        }
+    ]
+    # Optional: if you later add available_images, you could append another system message.
+    welcome_message = (
+        "**Welcome to the Radiologist's Companion!**\n\n"
+        "You can ask me about the patient's medical history or available imaging data.\n"
+        "- I can summarize key details from the EHR.\n"
+        "- I can tell you which medical images are available.\n"
+        "- If you'd like an organ segmentation (e.g. spleen, liver, kidney_left, colon, femur_right) on an abdominal CT scan, just ask!\n\n"
+        "**Example Requests:**\n"
+        "- \"What do we know about this patient?\"\n"
+        "- \"Which images are available for this patient?\"\n"
+        "- \"Can you segment the spleen from the CT scan?\"\n"
+    )
+    # If it's the first user message (i.e., no assistant yet), prepend welcome
+    if len(messages) == 1 and messages[0]['role'] == 'user':
+        messages = [{"role": "assistant", "content": welcome_message}] + messages
+    # Merge full conversation
+    full_messages = system_messages + messages
+    prompt = format_prompt(full_messages)
     device = torch.device("cuda")
     current_model.to(device).half()
     inputs = current_tokenizer(prompt, return_tensors="pt").to(device)
     prompt_len = inputs["input_ids"].shape[-1]
     streamer = RichTextStreamer(
         tokenizer=current_tokenizer,
         prompt_len=prompt_len,
         skip_special_tokens=False
     )
     generation_kwargs = dict(
         **inputs,
         max_new_tokens=max_new_tokens,
         pad_token_id=pad_id
     )
     thread = threading.Thread(target=current_model.generate, kwargs=generation_kwargs)
     thread.start()
     messages = messages.copy()
     messages.append({"role": "assistant", "content": ""})
     for token_info in streamer:
         token_str = token_info["token"]
         token_id = token_info["token_id"]
         is_special = token_info["is_special"]
         if token_id == eos_id:
             break
         if "<think>" in token_str:
             in_think = True
             token_str = token_str.replace("<think>", "")
         else:
             output_text += token_str
         if "\nUser" in output_text:
             output_text = output_text.split("\nUser")[0].rstrip()
             messages[-1]["content"] = output_text
         messages[-1]["content"] = output_text
         yield messages
     if in_think:
         output_text += "*"
         messages[-1]["content"] = output_text
+    torch.cuda.empty_cache()
     messages[-1]["content"] = output_text
     return messages
 # Globals
 current_model = None
 current_tokenizer = None
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, LlamaTokenizer
 def load_model_on_selection(model_name, progress=gr.Progress(track_tqdm=False)):
     global current_model, current_tokenizer
     token = os.getenv("HF_TOKEN")
+    progress(0, desc="Loading config...")
+    config = AutoConfig.from_pretrained(model_name, use_auth_token=token)
+    progress(0.2, desc="Loading tokenizer...")
+    # Default
     current_tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
     progress(0.5, desc="Loading model...")
     progress(1, desc="Model ready.")
     return f"{model_name} loaded and ready!"
 # Format conversation as plain text
 def format_prompt(messages):
     prompt = ""
     "meta-llama/Llama-3.2-3B-Instruct",
     "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
     "google/gemma-7b",
+    "mistralai/Mistral-Nemo-Instruct-FP8-2407"
 ]
 # Example patient database
     return "", "", "", ""
 with gr.Blocks(css=".gradio-container {height: 100vh; overflow: hidden;}") as demo:
+    gr.Markdown("<h2 style='text-align: center;'>Radiologist's Companion</h2>")
     default_model = gr.State(model_choices[0])