hackerbyhobby
commited on
updated to improve google safebrowsing scoring
Browse files
app.py
CHANGED
@@ -159,12 +159,12 @@ def boost_probabilities(probabilities: dict, text: str):
|
|
159 |
else:
|
160 |
p_smishing, p_other_scam, p_legit = 0.0, 0.0, 1.0
|
161 |
|
162 |
-
# **Now** check Safe Browsing
|
|
|
163 |
if found_urls:
|
164 |
-
|
165 |
# If any malicious => set p_smishing=1.0
|
166 |
-
if any(
|
167 |
-
# Bump SMiShing to max
|
168 |
p_smishing = 1.0
|
169 |
p_other_scam = 0.0
|
170 |
p_legit = 0.0
|
@@ -173,7 +173,9 @@ def boost_probabilities(probabilities: dict, text: str):
|
|
173 |
"SMiShing": p_smishing,
|
174 |
"Other Scam": p_other_scam,
|
175 |
"Legitimate": p_legit,
|
176 |
-
"detected_lang": detected_lang
|
|
|
|
|
177 |
}
|
178 |
|
179 |
def query_llm_for_classification(raw_message: str) -> dict:
|
@@ -334,15 +336,19 @@ def smishing_detector(input_type, text, image):
|
|
334 |
)
|
335 |
original_probs = {k: float(v) for k, v in zip(local_result["labels"], local_result["scores"])}
|
336 |
|
337 |
-
# 2. Basic boosting from keywords & URLs
|
338 |
-
|
339 |
-
detected_lang =
|
340 |
|
341 |
-
|
342 |
-
|
|
|
|
|
|
|
|
|
343 |
|
344 |
-
local_label = max(
|
345 |
-
local_conf = round(
|
346 |
|
347 |
# 3. LLM Classification
|
348 |
llm_classification = query_llm_for_classification(combined_text)
|
@@ -350,19 +356,15 @@ def smishing_detector(input_type, text, image):
|
|
350 |
llm_reason = llm_classification.get("reason", "No reason provided")
|
351 |
|
352 |
# 4. Incorporate LLM’s label into final probabilities
|
353 |
-
|
354 |
|
355 |
-
final_label = max(
|
356 |
-
final_confidence = round(
|
357 |
|
358 |
-
# 5. Gather found keywords & URLs
|
359 |
lower_text = combined_text.lower()
|
360 |
smishing_keys, scam_keys, _ = get_keywords_by_language(combined_text)
|
361 |
|
362 |
-
found_urls = re.findall(
|
363 |
-
r"(https?://[^\s]+|\b[a-zA-Z0-9.-]+\.(?:com|net|org|edu|gov|mil|io|ai|co|info|biz|us|uk|de|fr|es|ru|jp|cn|in|au|ca|br|mx|it|nl|se|no|fi|ch|pl|kr|vn|id|tw|sg|hk)\b)",
|
364 |
-
lower_text
|
365 |
-
)
|
366 |
found_smishing = [kw for kw in smishing_keys if kw in lower_text]
|
367 |
found_other_scam = [kw for kw in scam_keys if kw in lower_text]
|
368 |
|
@@ -381,6 +383,7 @@ def smishing_detector(input_type, text, image):
|
|
381 |
detected_lang=detected_lang
|
382 |
)
|
383 |
|
|
|
384 |
return {
|
385 |
"detected_language": detected_lang,
|
386 |
"text_used_for_classification": combined_text,
|
@@ -388,12 +391,13 @@ def smishing_detector(input_type, text, image):
|
|
388 |
"boosted_probabilities_before_llm": {local_label: local_conf},
|
389 |
"llm_label": llm_label,
|
390 |
"llm_reason": llm_reason,
|
391 |
-
"boosted_probabilities_after_llm": {k: round(v, 3) for k, v in
|
392 |
"label": final_label,
|
393 |
"confidence": final_confidence,
|
394 |
"smishing_keywords_found": found_smishing,
|
395 |
"other_scam_keywords_found": found_other_scam,
|
396 |
"urls_found": found_urls,
|
|
|
397 |
"final_explanation": final_explanation,
|
398 |
}
|
399 |
|
@@ -405,11 +409,8 @@ def toggle_inputs(choice):
|
|
405 |
Return updates for (text_input, image_input) based on the radio selection.
|
406 |
"""
|
407 |
if choice == "Text":
|
408 |
-
# Show text input, hide image
|
409 |
return gr.update(visible=True), gr.update(visible=False)
|
410 |
else:
|
411 |
-
# choice == "Screenshot"
|
412 |
-
# Hide text input, show image
|
413 |
return gr.update(visible=False), gr.update(visible=True)
|
414 |
|
415 |
with gr.Blocks() as demo:
|
@@ -426,16 +427,15 @@ with gr.Blocks() as demo:
|
|
426 |
lines=3,
|
427 |
label="Paste Suspicious SMS Text",
|
428 |
placeholder="Type or paste the message here...",
|
429 |
-
visible=True
|
430 |
)
|
431 |
|
432 |
image_input = gr.Image(
|
433 |
type="pil",
|
434 |
label="Upload Screenshot",
|
435 |
-
visible=False
|
436 |
)
|
437 |
|
438 |
-
# Whenever input_type changes, toggle which input is visible
|
439 |
input_type.change(
|
440 |
fn=toggle_inputs,
|
441 |
inputs=input_type,
|
@@ -443,11 +443,9 @@ with gr.Blocks() as demo:
|
|
443 |
queue=False
|
444 |
)
|
445 |
|
446 |
-
# Button to run classification
|
447 |
analyze_btn = gr.Button("Classify")
|
448 |
output_json = gr.JSON(label="Result")
|
449 |
|
450 |
-
# On button click, call the smishing_detector
|
451 |
analyze_btn.click(
|
452 |
fn=smishing_detector,
|
453 |
inputs=[input_type, text_input, image_input],
|
|
|
159 |
else:
|
160 |
p_smishing, p_other_scam, p_legit = 0.0, 0.0, 1.0
|
161 |
|
162 |
+
# **Now** check Safe Browsing
|
163 |
+
sb_results = {}
|
164 |
if found_urls:
|
165 |
+
sb_results = check_urls_with_google_safebrowsing(found_urls)
|
166 |
# If any malicious => set p_smishing=1.0
|
167 |
+
if any(sb_results[u] for u in sb_results):
|
|
|
168 |
p_smishing = 1.0
|
169 |
p_other_scam = 0.0
|
170 |
p_legit = 0.0
|
|
|
173 |
"SMiShing": p_smishing,
|
174 |
"Other Scam": p_other_scam,
|
175 |
"Legitimate": p_legit,
|
176 |
+
"detected_lang": detected_lang,
|
177 |
+
"found_urls": found_urls,
|
178 |
+
"safe_browsing_results": sb_results # <--- show which are malicious
|
179 |
}
|
180 |
|
181 |
def query_llm_for_classification(raw_message: str) -> dict:
|
|
|
336 |
)
|
337 |
original_probs = {k: float(v) for k, v in zip(local_result["labels"], local_result["scores"])}
|
338 |
|
339 |
+
# 2. Basic boosting from keywords & URLs (plus Safe Browsing)
|
340 |
+
boosted_dict = boost_probabilities(original_probs, combined_text)
|
341 |
+
detected_lang = boosted_dict.pop("detected_lang", "en")
|
342 |
|
343 |
+
# Also retrieve the safe_browsing_results
|
344 |
+
sb_results = boosted_dict.pop("safe_browsing_results", {})
|
345 |
+
found_urls = boosted_dict.pop("found_urls", [])
|
346 |
+
|
347 |
+
for k in boosted_dict:
|
348 |
+
boosted_dict[k] = float(boosted_dict[k])
|
349 |
|
350 |
+
local_label = max(boosted_dict, key=boosted_dict.get)
|
351 |
+
local_conf = round(boosted_dict[local_label], 3)
|
352 |
|
353 |
# 3. LLM Classification
|
354 |
llm_classification = query_llm_for_classification(combined_text)
|
|
|
356 |
llm_reason = llm_classification.get("reason", "No reason provided")
|
357 |
|
358 |
# 4. Incorporate LLM’s label into final probabilities
|
359 |
+
boosted_dict = incorporate_llm_label(boosted_dict, llm_label)
|
360 |
|
361 |
+
final_label = max(boosted_dict, key=boosted_dict.get)
|
362 |
+
final_confidence = round(boosted_dict[final_label], 3)
|
363 |
|
364 |
+
# 5. Gather found keywords & URLs for the final explanation
|
365 |
lower_text = combined_text.lower()
|
366 |
smishing_keys, scam_keys, _ = get_keywords_by_language(combined_text)
|
367 |
|
|
|
|
|
|
|
|
|
368 |
found_smishing = [kw for kw in smishing_keys if kw in lower_text]
|
369 |
found_other_scam = [kw for kw in scam_keys if kw in lower_text]
|
370 |
|
|
|
383 |
detected_lang=detected_lang
|
384 |
)
|
385 |
|
386 |
+
# Combine results in final output
|
387 |
return {
|
388 |
"detected_language": detected_lang,
|
389 |
"text_used_for_classification": combined_text,
|
|
|
391 |
"boosted_probabilities_before_llm": {local_label: local_conf},
|
392 |
"llm_label": llm_label,
|
393 |
"llm_reason": llm_reason,
|
394 |
+
"boosted_probabilities_after_llm": {k: round(v, 3) for k, v in boosted_dict.items()},
|
395 |
"label": final_label,
|
396 |
"confidence": final_confidence,
|
397 |
"smishing_keywords_found": found_smishing,
|
398 |
"other_scam_keywords_found": found_other_scam,
|
399 |
"urls_found": found_urls,
|
400 |
+
"safe_browsing_results": sb_results, # <--- show if malicious
|
401 |
"final_explanation": final_explanation,
|
402 |
}
|
403 |
|
|
|
409 |
Return updates for (text_input, image_input) based on the radio selection.
|
410 |
"""
|
411 |
if choice == "Text":
|
|
|
412 |
return gr.update(visible=True), gr.update(visible=False)
|
413 |
else:
|
|
|
|
|
414 |
return gr.update(visible=False), gr.update(visible=True)
|
415 |
|
416 |
with gr.Blocks() as demo:
|
|
|
427 |
lines=3,
|
428 |
label="Paste Suspicious SMS Text",
|
429 |
placeholder="Type or paste the message here...",
|
430 |
+
visible=True
|
431 |
)
|
432 |
|
433 |
image_input = gr.Image(
|
434 |
type="pil",
|
435 |
label="Upload Screenshot",
|
436 |
+
visible=False
|
437 |
)
|
438 |
|
|
|
439 |
input_type.change(
|
440 |
fn=toggle_inputs,
|
441 |
inputs=input_type,
|
|
|
443 |
queue=False
|
444 |
)
|
445 |
|
|
|
446 |
analyze_btn = gr.Button("Classify")
|
447 |
output_json = gr.JSON(label="Result")
|
448 |
|
|
|
449 |
analyze_btn.click(
|
450 |
fn=smishing_detector,
|
451 |
inputs=[input_type, text_input, image_input],
|