hackerbyhobby commited on
Commit
535ff02
·
unverified ·
1 Parent(s): 07e1348

updated to improve google safebrowsing scoring

Browse files
Files changed (1) hide show
  1. app.py +27 -29
app.py CHANGED
@@ -159,12 +159,12 @@ def boost_probabilities(probabilities: dict, text: str):
159
  else:
160
  p_smishing, p_other_scam, p_legit = 0.0, 0.0, 1.0
161
 
162
- # **Now** check Safe Browsing. If any URL is malicious => p_smishing=1.0
 
163
  if found_urls:
164
- malicious_results = check_urls_with_google_safebrowsing(found_urls)
165
  # If any malicious => set p_smishing=1.0
166
- if any(malicious_results[u] for u in malicious_results):
167
- # Bump SMiShing to max
168
  p_smishing = 1.0
169
  p_other_scam = 0.0
170
  p_legit = 0.0
@@ -173,7 +173,9 @@ def boost_probabilities(probabilities: dict, text: str):
173
  "SMiShing": p_smishing,
174
  "Other Scam": p_other_scam,
175
  "Legitimate": p_legit,
176
- "detected_lang": detected_lang
 
 
177
  }
178
 
179
  def query_llm_for_classification(raw_message: str) -> dict:
@@ -334,15 +336,19 @@ def smishing_detector(input_type, text, image):
334
  )
335
  original_probs = {k: float(v) for k, v in zip(local_result["labels"], local_result["scores"])}
336
 
337
- # 2. Basic boosting from keywords & URLs
338
- boosted = boost_probabilities(original_probs, combined_text)
339
- detected_lang = boosted.pop("detected_lang", "en")
340
 
341
- for k in boosted:
342
- boosted[k] = float(boosted[k])
 
 
 
 
343
 
344
- local_label = max(boosted, key=boosted.get)
345
- local_conf = round(boosted[local_label], 3)
346
 
347
  # 3. LLM Classification
348
  llm_classification = query_llm_for_classification(combined_text)
@@ -350,19 +356,15 @@ def smishing_detector(input_type, text, image):
350
  llm_reason = llm_classification.get("reason", "No reason provided")
351
 
352
  # 4. Incorporate LLM’s label into final probabilities
353
- boosted = incorporate_llm_label(boosted, llm_label)
354
 
355
- final_label = max(boosted, key=boosted.get)
356
- final_confidence = round(boosted[final_label], 3)
357
 
358
- # 5. Gather found keywords & URLs
359
  lower_text = combined_text.lower()
360
  smishing_keys, scam_keys, _ = get_keywords_by_language(combined_text)
361
 
362
- found_urls = re.findall(
363
- r"(https?://[^\s]+|\b[a-zA-Z0-9.-]+\.(?:com|net|org|edu|gov|mil|io|ai|co|info|biz|us|uk|de|fr|es|ru|jp|cn|in|au|ca|br|mx|it|nl|se|no|fi|ch|pl|kr|vn|id|tw|sg|hk)\b)",
364
- lower_text
365
- )
366
  found_smishing = [kw for kw in smishing_keys if kw in lower_text]
367
  found_other_scam = [kw for kw in scam_keys if kw in lower_text]
368
 
@@ -381,6 +383,7 @@ def smishing_detector(input_type, text, image):
381
  detected_lang=detected_lang
382
  )
383
 
 
384
  return {
385
  "detected_language": detected_lang,
386
  "text_used_for_classification": combined_text,
@@ -388,12 +391,13 @@ def smishing_detector(input_type, text, image):
388
  "boosted_probabilities_before_llm": {local_label: local_conf},
389
  "llm_label": llm_label,
390
  "llm_reason": llm_reason,
391
- "boosted_probabilities_after_llm": {k: round(v, 3) for k, v in boosted.items()},
392
  "label": final_label,
393
  "confidence": final_confidence,
394
  "smishing_keywords_found": found_smishing,
395
  "other_scam_keywords_found": found_other_scam,
396
  "urls_found": found_urls,
 
397
  "final_explanation": final_explanation,
398
  }
399
 
@@ -405,11 +409,8 @@ def toggle_inputs(choice):
405
  Return updates for (text_input, image_input) based on the radio selection.
406
  """
407
  if choice == "Text":
408
- # Show text input, hide image
409
  return gr.update(visible=True), gr.update(visible=False)
410
  else:
411
- # choice == "Screenshot"
412
- # Hide text input, show image
413
  return gr.update(visible=False), gr.update(visible=True)
414
 
415
  with gr.Blocks() as demo:
@@ -426,16 +427,15 @@ with gr.Blocks() as demo:
426
  lines=3,
427
  label="Paste Suspicious SMS Text",
428
  placeholder="Type or paste the message here...",
429
- visible=True # default
430
  )
431
 
432
  image_input = gr.Image(
433
  type="pil",
434
  label="Upload Screenshot",
435
- visible=False # hidden by default
436
  )
437
 
438
- # Whenever input_type changes, toggle which input is visible
439
  input_type.change(
440
  fn=toggle_inputs,
441
  inputs=input_type,
@@ -443,11 +443,9 @@ with gr.Blocks() as demo:
443
  queue=False
444
  )
445
 
446
- # Button to run classification
447
  analyze_btn = gr.Button("Classify")
448
  output_json = gr.JSON(label="Result")
449
 
450
- # On button click, call the smishing_detector
451
  analyze_btn.click(
452
  fn=smishing_detector,
453
  inputs=[input_type, text_input, image_input],
 
159
  else:
160
  p_smishing, p_other_scam, p_legit = 0.0, 0.0, 1.0
161
 
162
+ # **Now** check Safe Browsing
163
+ sb_results = {}
164
  if found_urls:
165
+ sb_results = check_urls_with_google_safebrowsing(found_urls)
166
  # If any malicious => set p_smishing=1.0
167
+ if any(sb_results[u] for u in sb_results):
 
168
  p_smishing = 1.0
169
  p_other_scam = 0.0
170
  p_legit = 0.0
 
173
  "SMiShing": p_smishing,
174
  "Other Scam": p_other_scam,
175
  "Legitimate": p_legit,
176
+ "detected_lang": detected_lang,
177
+ "found_urls": found_urls,
178
+ "safe_browsing_results": sb_results # <--- show which are malicious
179
  }
180
 
181
  def query_llm_for_classification(raw_message: str) -> dict:
 
336
  )
337
  original_probs = {k: float(v) for k, v in zip(local_result["labels"], local_result["scores"])}
338
 
339
+ # 2. Basic boosting from keywords & URLs (plus Safe Browsing)
340
+ boosted_dict = boost_probabilities(original_probs, combined_text)
341
+ detected_lang = boosted_dict.pop("detected_lang", "en")
342
 
343
+ # Also retrieve the safe_browsing_results
344
+ sb_results = boosted_dict.pop("safe_browsing_results", {})
345
+ found_urls = boosted_dict.pop("found_urls", [])
346
+
347
+ for k in boosted_dict:
348
+ boosted_dict[k] = float(boosted_dict[k])
349
 
350
+ local_label = max(boosted_dict, key=boosted_dict.get)
351
+ local_conf = round(boosted_dict[local_label], 3)
352
 
353
  # 3. LLM Classification
354
  llm_classification = query_llm_for_classification(combined_text)
 
356
  llm_reason = llm_classification.get("reason", "No reason provided")
357
 
358
  # 4. Incorporate LLM’s label into final probabilities
359
+ boosted_dict = incorporate_llm_label(boosted_dict, llm_label)
360
 
361
+ final_label = max(boosted_dict, key=boosted_dict.get)
362
+ final_confidence = round(boosted_dict[final_label], 3)
363
 
364
+ # 5. Gather found keywords & URLs for the final explanation
365
  lower_text = combined_text.lower()
366
  smishing_keys, scam_keys, _ = get_keywords_by_language(combined_text)
367
 
 
 
 
 
368
  found_smishing = [kw for kw in smishing_keys if kw in lower_text]
369
  found_other_scam = [kw for kw in scam_keys if kw in lower_text]
370
 
 
383
  detected_lang=detected_lang
384
  )
385
 
386
+ # Combine results in final output
387
  return {
388
  "detected_language": detected_lang,
389
  "text_used_for_classification": combined_text,
 
391
  "boosted_probabilities_before_llm": {local_label: local_conf},
392
  "llm_label": llm_label,
393
  "llm_reason": llm_reason,
394
+ "boosted_probabilities_after_llm": {k: round(v, 3) for k, v in boosted_dict.items()},
395
  "label": final_label,
396
  "confidence": final_confidence,
397
  "smishing_keywords_found": found_smishing,
398
  "other_scam_keywords_found": found_other_scam,
399
  "urls_found": found_urls,
400
+ "safe_browsing_results": sb_results, # <--- show if malicious
401
  "final_explanation": final_explanation,
402
  }
403
 
 
409
  Return updates for (text_input, image_input) based on the radio selection.
410
  """
411
  if choice == "Text":
 
412
  return gr.update(visible=True), gr.update(visible=False)
413
  else:
 
 
414
  return gr.update(visible=False), gr.update(visible=True)
415
 
416
  with gr.Blocks() as demo:
 
427
  lines=3,
428
  label="Paste Suspicious SMS Text",
429
  placeholder="Type or paste the message here...",
430
+ visible=True
431
  )
432
 
433
  image_input = gr.Image(
434
  type="pil",
435
  label="Upload Screenshot",
436
+ visible=False
437
  )
438
 
 
439
  input_type.change(
440
  fn=toggle_inputs,
441
  inputs=input_type,
 
443
  queue=False
444
  )
445
 
 
446
  analyze_btn = gr.Button("Classify")
447
  output_json = gr.JSON(label="Result")
448
 
 
449
  analyze_btn.click(
450
  fn=smishing_detector,
451
  inputs=[input_type, text_input, image_input],