hackerbyhobby
commited on
changes
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ import requests
|
|
10 |
import json
|
11 |
import os
|
12 |
import numpy as np
|
|
|
13 |
|
14 |
# Translator instance
|
15 |
translator = GoogleTranslator(source="auto", target="es")
|
@@ -26,17 +27,8 @@ model_name = "joeddav/xlm-roberta-large-xnli"
|
|
26 |
classifier = pipeline("zero-shot-classification", model=model_name)
|
27 |
CANDIDATE_LABELS = ["SMiShing", "Other Scam", "Legitimate"]
|
28 |
|
29 |
-
# 3. SHAP
|
30 |
-
|
31 |
-
original_invariants = shap.maskers._text.Text.invariants
|
32 |
-
|
33 |
-
def patched_invariants(self, *args):
|
34 |
-
return np.zeros(len(self._tokenized_s), dtype=np.bool_) # Use np.bool_ instead
|
35 |
-
|
36 |
-
shap.maskers._text.Text.invariants = patched_invariants
|
37 |
-
|
38 |
-
# SHAP explainer setup
|
39 |
-
explainer = shap.Explainer(classifier)
|
40 |
|
41 |
# Retrieve the Google Safe Browsing API key from the environment
|
42 |
SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
|
@@ -147,6 +139,9 @@ def explain_classification(text):
|
|
147 |
"""
|
148 |
Generate SHAP explanations for the classification.
|
149 |
"""
|
|
|
|
|
|
|
150 |
shap_values = explainer([text])
|
151 |
shap.force_plot(
|
152 |
explainer.expected_value[0], shap_values[0].values[0], shap_values[0].data
|
@@ -218,23 +213,4 @@ demo = gr.Interface(
|
|
218 |
inputs=[
|
219 |
gr.Textbox(
|
220 |
lines=3,
|
221 |
-
label="Paste
|
222 |
-
placeholder="Type or paste the message here..."
|
223 |
-
),
|
224 |
-
gr.Image(
|
225 |
-
type="pil",
|
226 |
-
label="Or Upload a Screenshot (Optional)"
|
227 |
-
)
|
228 |
-
],
|
229 |
-
outputs="json",
|
230 |
-
title="SMiShing & Scam Detector with Safe Browsing",
|
231 |
-
description="""
|
232 |
-
This tool classifies messages as SMiShing, Other Scam, or Legitimate using a zero-shot model
|
233 |
-
(joeddav/xlm-roberta-large-xnli). It automatically detects if the text is Spanish or English.
|
234 |
-
It uses SHAP for explainability and checks URLs against Google's Safe Browsing API for enhanced analysis.
|
235 |
-
""",
|
236 |
-
flagging_mode="never"
|
237 |
-
)
|
238 |
-
|
239 |
-
if __name__ == "__main__":
|
240 |
-
demo.launch()
|
|
|
10 |
import json
|
11 |
import os
|
12 |
import numpy as np
|
13 |
+
from shap.maskers import Text
|
14 |
|
15 |
# Translator instance
|
16 |
translator = GoogleTranslator(source="auto", target="es")
|
|
|
27 |
classifier = pipeline("zero-shot-classification", model=model_name)
|
28 |
CANDIDATE_LABELS = ["SMiShing", "Other Scam", "Legitimate"]
|
29 |
|
30 |
+
# 3. SHAP Explainer Setup
|
31 |
+
explainer = shap.Explainer(classifier, masker=Text(tokenizer=classifier.tokenizer))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
# Retrieve the Google Safe Browsing API key from the environment
|
34 |
SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
|
|
|
139 |
"""
|
140 |
Generate SHAP explanations for the classification.
|
141 |
"""
|
142 |
+
if not text.strip():
|
143 |
+
raise ValueError("Cannot generate SHAP explanations for empty text.")
|
144 |
+
|
145 |
shap_values = explainer([text])
|
146 |
shap.force_plot(
|
147 |
explainer.expected_value[0], shap_values[0].values[0], shap_values[0].data
|
|
|
213 |
inputs=[
|
214 |
gr.Textbox(
|
215 |
lines=3,
|
216 |
+
label="Paste
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|