File size: 13,376 Bytes
7169f21
 
 
 
 
 
 
 
 
aaede28
 
 
 
 
 
65494f9
7169f21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aaede28
7169f21
aaede28
 
 
 
 
 
 
 
7169f21
aaede28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7169f21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aaede28
 
 
 
7169f21
 
 
 
 
 
 
 
 
 
 
2a76f84
7169f21
 
 
 
 
 
 
 
 
 
 
65494f9
7169f21
 
 
 
2a76f84
7169f21
 
2a76f84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aaede28
2a76f84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7169f21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a76f84
 
 
7169f21
 
 
 
 
 
 
 
 
2a76f84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aaede28
 
 
 
2a76f84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7169f21
 
 
 
 
2a76f84
 
 
 
7169f21
 
2a76f84
 
7169f21
2a76f84
7169f21
 
aaede28
 
 
 
2a76f84
aaede28
 
 
2a76f84
aaede28
 
 
 
 
 
 
 
 
 
 
7169f21
 
 
 
 
aaede28
7169f21
aaede28
 
7169f21
 
2a76f84
 
7169f21
 
 
 
 
 
 
aaede28
7169f21
 
 
 
aaede28
7169f21
 
 
 
 
 
 
 
 
 
 
aaede28
 
 
 
 
7169f21
aaede28
7169f21
aaede28
7169f21
 
 
 
2a76f84
7169f21
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
import gradio as gr
import pytesseract
from PIL import Image
from transformers import pipeline
import re
from langdetect import detect
from deep_translator import GoogleTranslator
import openai
import os
import io
import requests
import json

# For text-to-speech
from gtts import gTTS

# Set your OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

# Translator instance
translator = GoogleTranslator(source="auto", target="es")

# 1. Load separate keywords for SMiShing and Other Scam (assumed in English)
with open("smishing_keywords.txt", "r", encoding="utf-8") as f:
    SMISHING_KEYWORDS = [line.strip().lower() for line in f if line.strip()]

with open("other_scam_keywords.txt", "r", encoding="utf-8") as f:
    OTHER_SCAM_KEYWORDS = [line.strip().lower() for line in f if line.strip()]

# 2. Zero-Shot Classification Pipeline
model_name = "joeddav/xlm-roberta-large-xnli"
classifier = pipeline("zero-shot-classification", model=model_name)
CANDIDATE_LABELS = ["SMiShing", "Other Scam", "Legitimate"]

def tts_explanation(explanation: str, detected_lang: str):
    """
    Generate TTS audio from the final explanation text.
    We'll choose English or Spanish voices in gTTS, but cannot guarantee 
    a specific "female" voice. We'll do a best approximation.

    - If text is Spanish: set lang="es"
    - If text is English (or other): set lang="en"
    - We'll set tld="co.uk" for a British accent that might sound female. 
      Adjust if needed or switch to a more advanced TTS service.
    """
    # Choose language for gTTS
    if detected_lang == "es":
        lang_code = "es"
        tld = "com"
    else:
        lang_code = "en"
        # Attempt a 'comforting female' accent:
        # gTTS doesn't let you pick male/female directly, but you can pick a TLD for a different accent
        tld = "co.uk"

    try:
        tts = gTTS(text=explanation, lang=lang_code, tld=tld, slow=False)
        mp3_bytes = io.BytesIO()
        tts.write_to_fp(mp3_bytes)
        mp3_bytes.seek(0)
        return mp3_bytes
    except Exception as e:
        print("TTS generation error:", e)
        # If TTS fails, return an empty buffer
        return io.BytesIO()

def get_keywords_by_language(text: str):
    snippet = text[:200]
    try:
        detected_lang = detect(snippet)
    except Exception:
        detected_lang = "en"

    if detected_lang == "es":
        smishing_in_spanish = [
            translator.translate(kw).lower() for kw in SMISHING_KEYWORDS
        ]
        other_scam_in_spanish = [
            translator.translate(kw).lower() for kw in OTHER_SCAM_KEYWORDS
        ]
        return smishing_in_spanish, other_scam_in_spanish, "es"
    else:
        return SMISHING_KEYWORDS, OTHER_SCAM_KEYWORDS, "en"

def boost_probabilities(probabilities: dict, text: str):
    lower_text = text.lower()
    smishing_keywords, other_scam_keywords, detected_lang = get_keywords_by_language(text)

    smishing_count = sum(1 for kw in smishing_keywords if kw in lower_text)
    other_scam_count = sum(1 for kw in other_scam_keywords if kw in lower_text)

    smishing_boost = 0.30 * smishing_count
    other_scam_boost = 0.30 * other_scam_count

    found_urls = re.findall(
        r"(https?://[^\s]+|\b[a-zA-Z0-9.-]+\.(?:com|net|org|edu|gov|mil|io|ai|co|info|biz|us|uk|de|fr|es|ru|jp|cn|in|au|ca|br|mx|it|nl|se|no|fi|ch|pl|kr|vn|id|tw|sg|hk)\b)",
        lower_text
    )
    if found_urls:
        smishing_boost += 0.35

    p_smishing = probabilities.get("SMiShing", 0.0)
    p_other_scam = probabilities.get("Other Scam", 0.0)
    p_legit = probabilities.get("Legitimate", 1.0)

    p_smishing += smishing_boost
    p_other_scam += other_scam_boost
    p_legit -= (smishing_boost + other_scam_boost)

    # Clamp
    p_smishing = max(p_smishing, 0.0)
    p_other_scam = max(p_other_scam, 0.0)
    p_legit = max(p_legit, 0.0)

    total = p_smishing + p_other_scam + p_legit
    if total > 0:
        p_smishing /= total
        p_other_scam /= total
        p_legit /= total
    else:
        p_smishing, p_other_scam, p_legit = 0.0, 0.0, 1.0

    return {
        "SMiShing": p_smishing,
        "Other Scam": p_other_scam,
        "Legitimate": p_legit,
        "detected_lang": detected_lang
    }

def query_llm_for_classification(raw_message: str) -> dict:
    if not raw_message.strip():
        return {"label": "Unknown", "reason": "No message provided to the LLM."}

    system_prompt = (
        "You are a cybersecurity expert. You will classify the user's message "
        "as one of: SMiShing, Other Scam, or Legitimate. Provide a short reason. "
        "Return only JSON with keys: label, reason."
    )
    user_prompt = f"Message: {raw_message}\nClassify it as SMiShing, Other Scam, or Legitimate."

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.2
        )
        raw_reply = response["choices"][0]["message"]["content"].strip()

        llm_result = json.loads(raw_reply)
        if "label" not in llm_result or "reason" not in llm_result:
            return {"label": "Unknown", "reason": f"Unexpected format: {raw_reply}"}

        return llm_result

    except Exception as e:
        return {"label": "Unknown", "reason": f"LLM error: {e}"}

def incorporate_llm_label(boosted: dict, llm_label: str) -> dict:
    if llm_label == "SMiShing":
        boosted["SMiShing"] += 0.2
    elif llm_label == "Other Scam":
        boosted["Other Scam"] += 0.2
    elif llm_label == "Legitimate":
        boosted["Legitimate"] += 0.2

    for k in boosted:
        if boosted[k] < 0:
            boosted[k] = 0.0

    total = sum(boosted.values())
    if total > 0:
        for k in boosted:
            boosted[k] /= total
    else:
        boosted["Legitimate"] = 1.0
        boosted["SMiShing"] = 0.0
        boosted["Other Scam"] = 0.0

    return boosted

def query_llm_for_explanation(
    text: str,
    final_label: str,
    final_conf: float,
    local_label: str,
    local_conf: float,
    llm_label: str,
    llm_reason: str,
    found_smishing: list,
    found_other_scam: list,
    found_urls: list,
    detected_lang: str
) -> str:
    if detected_lang == "es":
        system_prompt = (
            "Eres un experto en ciberseguridad. Proporciona una explicaci贸n final al usuario en espa帽ol. "
            "Combina la clasificaci贸n local, la clasificaci贸n LLM y la etiqueta final en una sola explicaci贸n breve. "
            "No reveles el c贸digo interno ni el JSON bruto; simplemente da una breve explicaci贸n f谩cil de entender. "
            "Termina con la etiqueta final."
        )
    else:
        system_prompt = (
            "You are a cybersecurity expert providing a final explanation to the user in English. "
            "Combine the local classification, the LLM classification, and the final label "
            "into one concise explanation. Do not reveal internal code or raw JSON. "
            "End with a final statement of the final label."
        )

    user_context = f"""
User Message:
{text}

Local Classification => Label: {local_label}, Confidence: {local_conf}
LLM Classification => Label: {llm_label}, Reason: {llm_reason}
Final Overall Label => {final_label} (confidence {final_conf})

Suspicious SMiShing Keywords => {found_smishing}
Suspicious Other Scam Keywords => {found_other_scam}
URLs => {found_urls}
"""

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_context}
            ],
            temperature=0.2
        )
        final_explanation = response["choices"][0]["message"]["content"].strip()
        return final_explanation
    except Exception as e:
        return f"Could not generate final explanation due to error: {e}"

def smishing_detector(input_type, text, image):
    if input_type == "Text":
        combined_text = text.strip() if text else ""
    else:
        combined_text = ""
        if image is not None:
            combined_text = pytesseract.image_to_string(image, lang="spa+eng").strip()

    if not combined_text:
        return {
            "text_used_for_classification": "(none)",
            "label": "No text provided",
            "confidence": 0.0,
            "keywords_found": [],
            "urls_found": [],
            "llm_label": "Unknown",
            "llm_reason": "No text to analyze",
            "final_explanation": "No text provided"
        }

    local_result = classifier(
        sequences=combined_text,
        candidate_labels=CANDIDATE_LABELS,
        hypothesis_template="This message is {}."
    )
    original_probs = {k: float(v) for k, v in zip(local_result["labels"], local_result["scores"])}

    boosted = boost_probabilities(original_probs, combined_text)
    detected_lang = boosted.pop("detected_lang", "en")

    for k in boosted:
        boosted[k] = float(boosted[k])

    local_label = max(boosted, key=boosted.get)
    local_conf = round(boosted[local_label], 3)

    llm_classification = query_llm_for_classification(combined_text)
    llm_label = llm_classification.get("label", "Unknown")
    llm_reason = llm_classification.get("reason", "No reason provided")

    boosted = incorporate_llm_label(boosted, llm_label)

    final_label = max(boosted, key=boosted.get)
    final_confidence = round(boosted[final_label], 3)

    lower_text = combined_text.lower()
    smishing_keys, scam_keys, _ = get_keywords_by_language(combined_text)
    found_urls = re.findall(
        r"(https?://[^\s]+|\b[a-zA-Z0-9.-]+\.(?:com|net|org|edu|gov|mil|io|ai|co|info|biz|us|uk|de|fr|es|ru|jp|cn|in|au|ca|br|mx|it|nl|se|no|fi|ch|pl|kr|vn|id|tw|sg|hk)\b)",
        lower_text
    )
    found_smishing = [kw for kw in smishing_keys if kw in lower_text]
    found_other_scam = [kw for kw in scam_keys if kw in lower_text]

    final_explanation = query_llm_for_explanation(
        text=combined_text,
        final_label=final_label,
        final_conf=final_confidence,
        local_label=local_label,
        local_conf=local_conf,
        llm_label=llm_label,
        llm_reason=llm_reason,
        found_smishing=found_smishing,
        found_other_scam=found_other_scam,
        found_urls=found_urls,
        detected_lang=detected_lang
    )

    return {
        "detected_language": detected_lang,
        "text_used_for_classification": combined_text,
        "original_probabilities": {k: round(v, 3) for k, v in original_probs.items()},
        "boosted_probabilities_before_llm": {local_label: local_conf},
        "llm_label": llm_label,
        "llm_reason": llm_reason,
        "boosted_probabilities_after_llm": {k: round(v, 3) for k, v in boosted.items()},
        "label": final_label,
        "confidence": final_confidence,
        "smishing_keywords_found": found_smishing,
        "other_scam_keywords_found": found_other_scam,
        "urls_found": found_urls,
        "final_explanation": final_explanation,
    }

###
# Combined function to produce both text (JSON) and TTS audio
###
def classify_and_tts(input_type, text, image):
    """
    1. Perform the classification logic (smishing_detector).
    2. Generate TTS audio from the final explanation in a comforting female voice.
    3. Return both the JSON result & the audio bytes.
    """
    result = smishing_detector(input_type, text, image)
    final_explanation = result["final_explanation"]
    detected_lang = result.get("detected_language", "en")

    # Generate TTS from final_explanation
    audio_data = tts_explanation(final_explanation, detected_lang)
    # Return both
    return result, audio_data


def toggle_inputs(choice):
    if choice == "Text":
        return gr.update(visible=True), gr.update(visible=False)
    else:
        return gr.update(visible=False), gr.update(visible=True)


with gr.Blocks() as demo:
    gr.Markdown("## SMiShing & Scam Detector with LLM-Enhanced Logic + TTS Explanation")

    with gr.Row():
        input_type = gr.Radio(
            choices=["Text", "Screenshot"], 
            value="Text", 
            label="Choose Input Type"
        )

    text_input = gr.Textbox(
        lines=3,
        label="Paste Suspicious SMS Text",
        placeholder="Type or paste the message here...",
        visible=True
    )
    image_input = gr.Image(
        type="pil",
        label="Upload Screenshot",
        visible=False
    )

    input_type.change(
        fn=toggle_inputs,
        inputs=input_type,
        outputs=[text_input, image_input],
        queue=False
    )

    analyze_btn = gr.Button("Classify")

    # We'll show the classification JSON + TTS audio
    output_json = gr.JSON(label="Classification Result")
    audio_output = gr.Audio(label="TTS Explanation")

    # We call classify_and_tts, which returns (dict_result, audio_data)
    analyze_btn.click(
        fn=classify_and_tts,
        inputs=[input_type, text_input, image_input],
        outputs=[output_json, audio_output]
    )

if __name__ == "__main__":
    if not openai.api_key:
        print("WARNING: OPENAI_API_KEY not set. LLM calls will fail or be skipped.")
    demo.launch()