Spaces:

UVA-MSBA
/

ADR_Detector

Running

App Files Files Community

paragon-analytics commited on 5 days ago

Commit

74d7743

verified ·

1 Parent(s): d2b9b3e

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -114

app.py CHANGED Viewed

@@ -1,127 +1,173 @@
 import numpy as np
 import torch
-import shap
-from transformers import (
-    pipeline,
-    AutoTokenizer,
-    AutoModelForSequenceClassification,
-    AutoModelForTokenClassification
-)
-import gradio as gr
-# ————————— 1) Device setup —————————
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# ————————— 2) ADR classifier —————————
-model_name = "paragon-analytics/ADRv1"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
-pred_pipeline = pipeline(
-    "text-classification",
-    model=model,
-    tokenizer=tokenizer,
-    return_all_scores=True,
-    device=0 if device.type == "cuda" else -1
-)
-def predict_proba(texts):
-    if isinstance(texts, str):
-        texts = [texts]
-    results = pred_pipeline(texts)
-    return np.array([[d["score"] for d in sample] for sample in results])
-def predict_proba_shap(inputs):
-    texts = [" ".join(x) if isinstance(x, list) else x for x in inputs]
-    return predict_proba(texts)
-# ————————— 3) SHAP explainer —————————
-masker = shap.maskers.Text(tokenizer)
-_example = pred_pipeline(["test"])[0]
-class_labels = [d["label"] for d in _example]
-explainer = shap.Explainer(
-    predict_proba_shap,
-    masker=masker,
-    output_names=class_labels
-)
-# ————————— 4) Biomedical NER —————————
-ner_name = "d4data/biomedical-ner-all"
-ner_tokenizer = AutoTokenizer.from_pretrained(ner_name)
-ner_model = AutoModelForTokenClassification.from_pretrained(ner_name).to(device)
-ner_pipe = pipeline(
-    "ner",
-    model=ner_model,
-    tokenizer=ner_tokenizer,
-    aggregation_strategy="simple",
-    device=0 if device.type == "cuda" else -1
-)
-ENTITY_COLORS = {
-    "Severity": "red",
-    "Sign_symptom": "green",
-    "Medication": "lightblue",
-    "Age": "yellow",
-    "Sex": "yellow",
-    "Diagnostic_procedure": "gray",
-    "Biological_structure": "silver"
-}
-# ————————— 5) Prediction + SHAP + NER —————————
-def adr_predict(text: str):
-    # Probabilities
-    probs = predict_proba([text])[0]
-    prob_dict = {cls: float(probs[i]) for i, cls in enumerate(class_labels)}
-    # SHAP
-    shap_vals = explainer([text])
-    fig = shap.plots.text(shap_vals[0], display=False)
-    # NER highlight
-    ents = ner_pipe(text)
-    highlighted, last = "", 0
-    for ent in ents:
-        s, e = ent["start"], ent["end"]
-        w = ent["word"].replace("##", "")
-        color = ENTITY_COLORS.get(ent["entity_group"], "lightgray")
-        highlighted += text[last:s] + f"<mark style='background-color:{color};'>{w}</mark>"
-        last = e
-    highlighted += text[last:]
-    return prob_dict, fig, highlighted
-# ————————— 6) Gradio UI —————————
-with gr.Blocks() as demo:
-    gr.Markdown("## Welcome to **ADR Detector** 🪐")
-    gr.Markdown(
-        "Predicts how likely your text describes a **severe** vs. **non-severe** adverse reaction.  \n"
-        "_(Not for medical or diagnostic use.)_"
-    )
-    txt = gr.Textbox(
-        label="Enter Your Text Here:", lines=3,
-        placeholder="Type a sentence about an adverse reaction…"
-    )
-    btn = gr.Button("Analyze")
-    with gr.Row():
-        out_prob = gr.Label(label="Predicted Probabilities")
-        out_shap = gr.Plot(label="SHAP Explanation")
-        out_ner  = gr.HTML(label="Biomedical Entities Highlighted")
-    btn.click(
-        fn=adr_predict,
-        inputs=txt,
-        outputs=[out_prob, out_shap, out_ner]
     )
     gr.Examples(
         examples=[
-            "A 35-year-old male experienced severe headache after taking Aspirin.",
-            "A 35-year-old female had minor abdominal pain after Acetaminophen."
         ],
-        inputs=txt,
-        outputs=[out_prob, out_shap, out_ner],
-        fn=adr_predict,
-        cache_examples=False   # ← disable startup caching here
     )
-if __name__ == "__main__":
-    demo.launch()

+import streamlit as st
+import gradio as gr
+import shap
 import numpy as np
+import scipy as sp
 import torch
+import tensorflow as tf
+import transformers
+from transformers import pipeline
+from transformers import RobertaTokenizer, RobertaModel
+from transformers import AutoModelForSequenceClassification
+from transformers import TFAutoModelForSequenceClassification # Although imported, this is not used in the provided code
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+import matplotlib.pyplot as plt
+import sys
+import csv
+csv.field_size_limit(sys.maxsize)
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+# Load models and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("paragon-analytics/ADRv1")
+model = AutoModelForSequenceClassification.from_pretrained("paragon-analytics/ADRv1").to(device)
+# Build a pipeline object for predictions
+# Note: return_all_scores is deprecated, use top_k=None instead
+pred = transformers.pipeline("text-classification", model=model,
+                              tokenizer=tokenizer, return_all_scores=True) # This will likely issue a warning or error in newer versions.
+# SHAP explainer
+# Check SHAP documentation for potential changes in Explainer initialization if issues arise.
+explainer = shap.Explainer(pred)
+# NER pipeline
+ner_tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
+ner_model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
+ner_pipe = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, aggregation_strategy="simple") # pass device=0 if using gpu
+# def adr_predict(x):
+def adr_predict(x):
+    # Ensure input is treated as a string
+    text_input = str(x).lower()
+    encoded_input = tokenizer(text_input, return_tensors='pt').to(device) # Move input to device
+    output = model(**encoded_input)
+    # Use torch.softmax instead of tf.nn.softmax for consistency with torch model
+    scores = torch.softmax(output.logits, dim=-1)[0].detach().cpu().numpy() # Apply softmax on logits, move to cpu and convert to numpy
+    # SHAP values - The explainer might need the raw prediction output or a wrapped function
+    # that returns the model output in a format SHAP expects.
+    # The current usage `explainer([text_input])` assumes the explainer can directly
+    # handle the text input and the pipeline output structure. This might need adjustment
+    # based on the SHAP version and how it interfaces with Hugging Face pipelines.
+    # If explainer([text_input]) doesn't work, you might need to create a wrapper function
+    # like `def f(text): return pred(text)` and pass `f` to shap.Explainer.
+    # Also, shap.plots.text might have changes in how it's called or its return value.
+    try:
+        shap_values = explainer([text_input])
+        # Assuming shap_values structure is compatible and shap.plots.text works as before
+        # You might need to explicitly handle the expected output format from the pipeline
+        # which, with top_k=None, is a list of dictionaries. SHAP expects a consistent
+        # output format from the prediction function.
+        # If the pipeline output with top_k=None is different, the explainer might fail.
+        # Let's assume for now the explainer can handle the output format.
+        # shap.plots.text often returns a matplotlib figure or renders directly.
+        # To display in Gradio HTML, you might need to save the plot to a string (e.g., SVG or HTML).
+        # The display=False might prevent direct rendering, but check if it returns a string representation.
+        # If not, you'll need to generate an HTML/SVG string from the plot.
+        # As a fallback, we'll assume display=False makes it suitable for embedding or returns a string representation.
+        local_plot = shap.plots.text(shap_values[0], display=False) # This might need adjustment
+    except Exception as e:
+        print(f"SHAP explanation failed: {e}")
+        local_plot = "<p>SHAP explanation not available.</p>" # Provide a fallback
+    # NER processing
+    try:
+        res = ner_pipe(text_input)
+        entity_colors = {
+            'Severity': 'red',
+            'Sign_symptom': 'green',
+            'Medication': 'lightblue',
+            'Age': 'yellow',
+            'Sex':'yellow',
+            'Diagnostic_procedure':'gray',
+            'Biological_structure':'silver'
+        }
+        htext = ""
+        prev_end = 0
+        # Sort entities by start position to build the highlighted text correctly
+        res = sorted(res, key=lambda x: x['start'])
+        for entity in res:
+            start = entity['start']
+            end = entity['end']
+            word = text_input[start:end] # Extract original text segment
+            entity_type = entity['entity_group']
+            color = entity_colors.get(entity_type, 'lightgray') # Use get with a default color
+            # Append text before the entity
+            htext += f"{text_input[prev_end:start]}"
+            # Append the highlighted entity
+            htext += f"<mark style='background-color:{color};'>{word}</mark>"
+            prev_end = end
+        # Append any remaining text after the last entity
+        htext += text_input[prev_end:]
+    except Exception as e:
+        print(f"NER processing failed: {e}")
+        htext = "<p>NER processing not available.</p>" # Provide a fallback
+    # The original code returns a tuple of results. Gradio's click function expects
+    # the number of returned values to match the number of output components.
+    # The original return was: {"Severe Reaction": float(scores.numpy()[1]), "Non-severe Reaction": float(scores.numpy()[0])}, local_plot, htext
+    # The output components are: label, local_plot, htext
+    # The score output for the label component should be a dictionary as expected by gr.Label
+    label_output = {"Severe Reaction": float(scores[1]), "Non-severe Reaction": float(scores[0])}
+    return label_output, local_plot, htext
+def main(prob1):
+    # The main function now directly calls adr_predict and returns its results.
+    # This matches the expected signature for a Gradio interface function.
+    return adr_predict(prob1)
+title = "Welcome to **ADR Detector** 🪐"
+description1 = """This app takes text (up to a few sentences) and predicts to what extent the text describes severe (or non-severe) adverse reaction to medicaitons. Please do NOT use for medical diagnosis."""
+# Use the 'with' syntax for Blocks
+with gr.Blocks(title=title) as demo:
+    gr.Markdown(f"## {title}")
+    gr.Markdown(description1)
+    gr.Markdown("""---""")
+    # Define input and output components
+    prob1 = gr.Textbox(label="Enter Your Text Here:",lines=2, placeholder="Type it here ...")
+    # Output components matching the return values of the main function
+    label = gr.Label(label = "Predicted Label")
+    local_plot = gr.HTML(label = 'Shap Explanation') # Changed label for clarity
+    htext = gr.HTML(label="Named Entity Recognition") # Changed label for clarity
+    submit_btn = gr.Button("Analyze")
+    # Use .click() on the button to define the interaction
+    submit_btn.click(
+        fn=main, # The function to call
+        inputs=[prob1], # The input components
+        outputs=[label, local_plot, htext], # The output components
+        api_name="adr" # Keep the api_name if you intend to use the API
     )
+    # Examples section
+    gr.Markdown("### Click on any of the examples below to see how it works:")
+    # Gradio 4.0+ Examples usage. Pass inputs and outputs components directly.
+    # cache_examples is recommended for faster loading of examples.
     gr.Examples(
         examples=[
+            ["A 35 year-old male had severe headache after taking Aspirin. The lab results were normal."],
+            ["A 35 year-old female had minor pain in upper abdomen after taking Acetaminophen."]
         ],
+        inputs=[prob1],
+        outputs=[label, local_plot, htext],
+        fn=main, # Provide the function to run for caching examples
+        cache_examples=True,
+        # run_on_click=True # This might be needed depending on exact Gradio version and desired behavior, but cache_examples=True implies running the function.
     )
+# Launch the demo
+demo.launch()