hackerbyhobby commited on
Commit
9cacd96
·
unverified ·
1 Parent(s): d56e828

updated requirements

Browse files
Files changed (1) hide show
  1. app.py +14 -16
app.py CHANGED
@@ -23,9 +23,7 @@ CANDIDATE_LABELS = ["SMiShing", "Other Scam", "Legitimate"]
23
 
24
  def get_keywords_by_language(text: str):
25
  """
26
- 1. Detect language using `langdetect`.
27
- 2. If Spanish ('es'), translate each English-based keyword to Spanish using `deep-translator`.
28
- 3. If English (or other languages), use the original English lists.
29
  """
30
  snippet = text[:200] # Use a snippet for detection
31
  try:
@@ -34,7 +32,6 @@ def get_keywords_by_language(text: str):
34
  detected_lang = "en" # Default to English if detection fails
35
 
36
  if detected_lang == "es":
37
- # Translate all SMiShing and Other Scam keywords to Spanish
38
  smishing_in_spanish = [
39
  translator.translate(kw).lower() for kw in SMISHING_KEYWORDS
40
  ]
@@ -43,7 +40,6 @@ def get_keywords_by_language(text: str):
43
  ]
44
  return smishing_in_spanish, other_scam_in_spanish, "es"
45
  else:
46
- # Default to English keywords
47
  return SMISHING_KEYWORDS, OTHER_SCAM_KEYWORDS, "en"
48
 
49
  def boost_probabilities(probabilities: dict, text: str):
@@ -63,20 +59,17 @@ def boost_probabilities(probabilities: dict, text: str):
63
  if found_urls:
64
  smishing_boost += 0.35
65
 
66
- p_smishing = probabilities["SMiShing"]
67
- p_other_scam = probabilities["Other Scam"]
68
- p_legit = probabilities["Legitimate"]
69
 
70
  p_smishing += smishing_boost
71
  p_other_scam += other_scam_boost
72
  p_legit -= (smishing_boost + other_scam_boost)
73
 
74
- if p_smishing < 0:
75
- p_smishing = 0.0
76
- if p_other_scam < 0:
77
- p_other_scam = 0.0
78
- if p_legit < 0:
79
- p_legit = 0.0
80
 
81
  total = p_smishing + p_other_scam + p_legit
82
  if total > 0:
@@ -94,6 +87,9 @@ def boost_probabilities(probabilities: dict, text: str):
94
  }
95
 
96
  def smishing_detector(text, image):
 
 
 
97
  combined_text = text or ""
98
  if image is not None:
99
  ocr_text = pytesseract.image_to_string(image, lang="spa+eng")
@@ -114,11 +110,13 @@ def smishing_detector(text, image):
114
  candidate_labels=CANDIDATE_LABELS,
115
  hypothesis_template="This message is {}."
116
  )
117
- original_probs = dict(zip(result["labels"], result["scores"]))
118
  boosted = boost_probabilities(original_probs, combined_text)
 
 
 
119
  final_label = max(boosted, key=boosted.get)
120
  final_confidence = round(boosted[final_label], 3)
121
- detected_lang = boosted.pop("detected_lang", "en")
122
 
123
  lower_text = combined_text.lower()
124
  smishing_keys, scam_keys, _ = get_keywords_by_language(combined_text)
 
23
 
24
  def get_keywords_by_language(text: str):
25
  """
26
+ Detect language using `langdetect` and translate keywords if needed.
 
 
27
  """
28
  snippet = text[:200] # Use a snippet for detection
29
  try:
 
32
  detected_lang = "en" # Default to English if detection fails
33
 
34
  if detected_lang == "es":
 
35
  smishing_in_spanish = [
36
  translator.translate(kw).lower() for kw in SMISHING_KEYWORDS
37
  ]
 
40
  ]
41
  return smishing_in_spanish, other_scam_in_spanish, "es"
42
  else:
 
43
  return SMISHING_KEYWORDS, OTHER_SCAM_KEYWORDS, "en"
44
 
45
  def boost_probabilities(probabilities: dict, text: str):
 
59
  if found_urls:
60
  smishing_boost += 0.35
61
 
62
+ p_smishing = probabilities.get("SMiShing", 0.0)
63
+ p_other_scam = probabilities.get("Other Scam", 0.0)
64
+ p_legit = probabilities.get("Legitimate", 1.0)
65
 
66
  p_smishing += smishing_boost
67
  p_other_scam += other_scam_boost
68
  p_legit -= (smishing_boost + other_scam_boost)
69
 
70
+ p_smishing = max(p_smishing, 0.0)
71
+ p_other_scam = max(p_other_scam, 0.0)
72
+ p_legit = max(p_legit, 0.0)
 
 
 
73
 
74
  total = p_smishing + p_other_scam + p_legit
75
  if total > 0:
 
87
  }
88
 
89
  def smishing_detector(text, image):
90
+ """
91
+ Main detection function combining text and OCR.
92
+ """
93
  combined_text = text or ""
94
  if image is not None:
95
  ocr_text = pytesseract.image_to_string(image, lang="spa+eng")
 
110
  candidate_labels=CANDIDATE_LABELS,
111
  hypothesis_template="This message is {}."
112
  )
113
+ original_probs = {k: float(v) for k, v in zip(result["labels"], result["scores"])}
114
  boosted = boost_probabilities(original_probs, combined_text)
115
+
116
+ boosted = {k: float(v) for k, v in boosted.items() if isinstance(v, (int, float))}
117
+ detected_lang = boosted.pop("detected_lang", "en")
118
  final_label = max(boosted, key=boosted.get)
119
  final_confidence = round(boosted[final_label], 3)
 
120
 
121
  lower_text = combined_text.lower()
122
  smishing_keys, scam_keys, _ = get_keywords_by_language(combined_text)