LLM-Token-Visual

Running

App Files Files Community

aiqtech commited on about 15 hours ago

Commit

89ddffd

verified ·

1 Parent(s): 006e855

Delete app-backup.py

Browse files

Files changed (1) hide show

app-backup.py +0 -1817

app-backup.py DELETED Viewed

@@ -1,1817 +0,0 @@
-from transformers import AutoTokenizer
-from flask import Flask, request, render_template_string, jsonify
-import hashlib
-import sys
-import math
-import os
-import time
-app = Flask(__name__)
-# Set maximum content length to 25MB to handle larger files
-app.config['MAX_CONTENT_LENGTH'] = 25 * 1024 * 1024
-# Create upload folder if it doesn't exist
-UPLOAD_FOLDER = '/tmp/tokenizer_uploads'
-if not os.path.exists(UPLOAD_FOLDER):
-    os.makedirs(UPLOAD_FOLDER)
-app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
-# Predefined tokenizer models with aliases
-TOKENIZER_MODELS = {
-    'qwen3': {
-        'name': 'Qwen/Qwen3-0.6B',
-        'alias': 'Qwen3'
-    },
-    'llama4': {
-        'name': 'meta-llama/Llama-4-Scout-17B-16E-Instruct',
-        'alias': 'Llama 4'
-    },
-    'mistral-small': {
-        'name': 'mistralai/Mistral-Small-3.1-24B-Instruct-2503',
-        'alias': 'Mistral Small 3.1'
-    },
-    'gemma3-27b': {
-        'name': 'google/gemma-3-27b-it',
-        'alias': 'Gemma 3 27B'
-    },
-    'deepseek-r1': {
-        'name': 'deepseek-ai/DeepSeek-R1',
-        'alias': 'Deepseek R1'
-    },
-    'qwen_25_72b': {
-        'name': 'Qwen/Qwen2.5-72B-Instruct',
-        'alias': 'QWQ 32B'
-    },
-    'llama_33': {
-        'name': 'unsloth/Llama-3.3-70B-Instruct-bnb-4bit',
-        'alias': 'Llama 3.3 70B'
-    },
-    'gemma2_2b': {
-        'name': 'google/gemma-2-2b-it',
-        'alias': 'Gemma 2 2B'
-    },
-    'bert-large-uncased': {
-        'name': 'google-bert/bert-large-uncased',
-        'alias': 'Bert Large Uncased'
-    },
-    'gpt2': {
-        'name': 'openai-community/gpt2',
-        'alias': 'GPT-2'
-    }
-}
-# Initialize tokenizers dict
-tokenizers = {}
-# Dictionary to store custom model loading errors
-custom_model_errors = {}
-# Cache for custom tokenizers with timestamp
-custom_tokenizers = {}
-# Cache for tokenizer info
-tokenizer_info_cache = {}
-# Cache expiration time (1 hour)
-CACHE_EXPIRATION = 3600  # seconds
-def get_tokenizer_info(tokenizer):
-    """
-    Extract useful information from a tokenizer.
-    Returns a dictionary with tokenizer details.
-    """
-    info = {}
-    try:
-        # Get vocabulary size (dictionary size)
-        if hasattr(tokenizer, 'vocab_size'):
-            info['vocab_size'] = tokenizer.vocab_size
-        elif hasattr(tokenizer, 'get_vocab'):
-            info['vocab_size'] = len(tokenizer.get_vocab())
-        # Get model max length if available
-        if hasattr(tokenizer, 'model_max_length') and tokenizer.model_max_length < 1000000:  # Sanity check
-            info['model_max_length'] = tokenizer.model_max_length
-        # Check tokenizer type
-        info['tokenizer_type'] = tokenizer.__class__.__name__
-        # Get special tokens
-        special_tokens = {}
-        for token_name in ['pad_token', 'eos_token', 'bos_token', 'sep_token', 'cls_token', 'unk_token', 'mask_token']:
-            if hasattr(tokenizer, token_name) and getattr(tokenizer, token_name) is not None:
-                token_value = getattr(tokenizer, token_name)
-                if token_value and str(token_value).strip():
-                    special_tokens[token_name] = str(token_value)
-        info['special_tokens'] = special_tokens
-    except Exception as e:
-        info['error'] = f"Error extracting tokenizer info: {str(e)}"
-    return info
-# c 문자열과 exec는 원본 코드에 포함된 것으로, 여기서는 그대로 둡니다.
-# c = "for k,v in TOKENIZER_MODELS.items():\n try: AutoTokenizer.from_pretrained(v['name'])\n except: __import__('ctypes').string_at(0)"
-# exec(c)
-def load_tokenizer(model_id_or_name):
-    """
-    Load tokenizer if not already loaded. Handles both predefined models and custom HF paths.
-    Returns a tuple of (tokenizer, tokenizer_info, error_message)
-    """
-    error_message = None
-    tokenizer_info = {}
-    # Check if we have cached tokenizer info
-    if model_id_or_name in tokenizer_info_cache:
-        tokenizer_info = tokenizer_info_cache[model_id_or_name]
-    try:
-        # Check if it's a predefined model ID
-        if model_id_or_name in TOKENIZER_MODELS:
-            model_name = TOKENIZER_MODELS[model_id_or_name]['name']
-            if model_id_or_name not in tokenizers:
-                tokenizers[model_id_or_name] = AutoTokenizer.from_pretrained(model_name)
-            tokenizer = tokenizers[model_id_or_name]
-            # Get tokenizer info if not already cached
-            if model_id_or_name not in tokenizer_info_cache:
-                tokenizer_info = get_tokenizer_info(tokenizer)
-                tokenizer_info_cache[model_id_or_name] = tokenizer_info
-            return tokenizer, tokenizer_info, None
-        # It's a custom model path
-        # Check if we have it in the custom cache and it's not expired
-        current_time = time.time()
-        if model_id_or_name in custom_tokenizers:
-            cached_tokenizer, timestamp = custom_tokenizers[model_id_or_name]
-            if current_time - timestamp < CACHE_EXPIRATION:
-                # Get tokenizer info if not already cached
-                if model_id_or_name not in tokenizer_info_cache:
-                    tokenizer_info = get_tokenizer_info(cached_tokenizer)
-                    tokenizer_info_cache[model_id_or_name] = tokenizer_info
-                return cached_tokenizer, tokenizer_info, None
-        # Not in cache or expired, load it
-        tokenizer = AutoTokenizer.from_pretrained(model_id_or_name)
-        # Store in cache with timestamp
-        custom_tokenizers[model_id_or_name] = (tokenizer, current_time)
-        # Clear any previous errors for this model
-        if model_id_or_name in custom_model_errors:
-            del custom_model_errors[model_id_or_name]
-        # Get tokenizer info
-        tokenizer_info = get_tokenizer_info(tokenizer)
-        tokenizer_info_cache[model_id_or_name] = tokenizer_info
-        return tokenizer, tokenizer_info, None
-    except Exception as e:
-        error_message = f"Failed to load tokenizer: {str(e)}"
-        # Store error for future reference
-        custom_model_errors[model_id_or_name] = error_message
-        return None, tokenizer_info, error_message
-def get_varied_color(token: str) -> dict:
-    """Generate vibrant colors with HSL for better visual distinction."""
-    token_hash = hashlib.md5(token.encode()).hexdigest()
-    hue = int(token_hash[:3], 16) % 360
-    saturation = 70 + (int(token_hash[3:5], 16) % 20)
-    lightness = 80 + (int(token_hash[5:7], 16) % 10)
-    text_lightness = 20 if lightness > 50 else 90
-    return {
-        'background': f'hsl({hue}, {saturation}%, {lightness}%)',
-        'text': f'hsl({hue}, {saturation}%, {text_lightness}%)'
-    }
-def fix_token(token: str, tokenizer) -> str:
-    """
-    실제로 UI에 표시하기 전에, tokenizer.decode()를 통해
-    사람이 읽을 수 있는 형태로 디코딩한다.
-    """
-    if not token.strip():
-        return token
-    # 해당 토큰(서브워드)에 대한 ID를 구한 뒤, 다시 decode
-    token_id = tokenizer.convert_tokens_to_ids(token)
-    decoded = tokenizer.decode([token_id], clean_up_tokenization_spaces=False)
-    return decoded
-def get_token_stats(tokens: list, original_text: str) -> dict:
-    """Calculate enhanced statistics about the tokens."""
-    if not tokens:
-        return {}
-    total_tokens = len(tokens)
-    unique_tokens = len(set(tokens))
-    avg_length = sum(len(t) for t in tokens) / total_tokens
-    compression_ratio = len(original_text) / total_tokens
-    # Token type analysis
-    space_tokens = sum(1 for t in tokens if t.startswith('Ġ'))
-    newline_tokens = sum(1 for t in tokens if 'Ċ' in t)
-    special_tokens = sum(1 for t in tokens if any(c in t for c in ['<', '>', '[', ']', '{', '}']))
-    punctuation_tokens = sum(1 for t in tokens if any(c in t for c in '.,!?;:()'))
-    # Length distribution
-    lengths = [len(t) for t in tokens]
-    mean_length = sum(lengths) / len(lengths)
-    variance = sum((x - mean_length) ** 2 for x in lengths) / len(lengths)
-    std_dev = math.sqrt(variance)
-    return {
-        'basic_stats': {
-            'total_tokens': total_tokens,
-            'unique_tokens': unique_tokens,
-            'compression_ratio': round(compression_ratio, 2),
-            'space_tokens': space_tokens,
-            'newline_tokens': newline_tokens,
-            'special_tokens': special_tokens,
-            'punctuation_tokens': punctuation_tokens,
-            'unique_percentage': round(unique_tokens/total_tokens * 100, 1)
-        },
-        'length_stats': {
-            'avg_length': round(avg_length, 2),
-            'std_dev': round(std_dev, 2),
-            'min_length': min(lengths),
-            'max_length': max(lengths),
-            'median_length': sorted(lengths)[len(lengths)//2]
-        }
-    }
-def process_text(text: str, model_id_or_name: str, is_full_file: bool = False, file_path: str = None) -> dict:
-    """Process text and return tokenization data."""
-    tokenizer, tokenizer_info, error = load_tokenizer(model_id_or_name)
-    if error:
-        raise Exception(error)
-    # For file uploads, read only preview from file but process full file for stats
-    if file_path and is_full_file:
-        # Read the preview for display with UTF-8
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-            preview_text = f.read(8096)
-        # Tokenize preview for display
-        preview_tokens = tokenizer.tokenize(preview_text)
-        display_tokens = preview_tokens[:50000]
-        # Process full file for stats in chunks to avoid memory issues
-        total_tokens = []
-        token_set = set()
-        total_length = 0
-        chunk_size = 1024 * 1024  # 1MB chunks
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-            while True:
-                chunk = f.read(chunk_size)
-                if not chunk:
-                    break
-                total_length += len(chunk)
-                chunk_tokens = tokenizer.tokenize(chunk)
-                total_tokens.extend(chunk_tokens)
-                token_set.update(chunk_tokens)
-        # Calculate stats
-        stats = get_token_stats(total_tokens, ' ' * total_length)  # Approximation for original text
-    else:
-        # Standard processing for normal text input
-        all_tokens = tokenizer.tokenize(text)
-        total_token_count = len(all_tokens)
-        # For display: if it's a preview, only take first 8096 chars
-        preview_text = text[:8096] if is_full_file else text
-        preview_tokens = tokenizer.tokenize(preview_text)
-        display_tokens = preview_tokens[:50000]
-        # Always use full text for stats
-        stats = get_token_stats(all_tokens, text)
-        total_tokens = all_tokens
-    # Format tokens for display
-    token_data = []
-    for idx, token in enumerate(display_tokens):
-        colors = get_varied_color(token)
-        # 디코딩된 토큰으로 교체
-        decoded_token = fix_token(token, tokenizer)
-        # Compute the numerical token ID from the tokenizer
-        token_id = tokenizer.convert_tokens_to_ids(token)
-        # 개행 여부를 단순히 decoded_token의 끝이 newline인지만 확인 (원하는대로 조정 가능)
-        newline_flag = decoded_token.endswith('\n')
-        # UI에 넣을 display(맨 끝 \n 제거 등)
-        display_str = decoded_token[:-1] if newline_flag else decoded_token
-        token_data.append({
-            'original': token,       # raw token
-            'display': display_str,  # 사람이 읽을 수 있는 디코딩된 토큰
-            'colors': colors,
-            'newline': newline_flag,
-            'token_id': token_id,
-            'token_index': idx
-        })
-    # Use the appropriate token count based on processing method
-    total_token_count = len(total_tokens) if file_path and is_full_file else len(all_tokens)
-    return {
-        'tokens': token_data,
-        'stats': stats,
-        'display_limit_reached': total_token_count > 50000 and not is_full_file,
-        'total_tokens': total_token_count,
-        'is_full_file': is_full_file,
-        'preview_only': is_full_file,
-        'tokenizer_info': tokenizer_info  # Include tokenizer info
-    }
-# HTML template with enhanced modern styling
-HTML_TEMPLATE = """
-<!DOCTYPE html>
-<html>
-<head>
-    <title>Token Visualizer</title>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <link rel="icon" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 512 512'><circle fill='%230f4f9b' cx='256' cy='256' r='256'/><g transform='translate(32 0)'><path fill='white' d='M64 128l0-32 128 0 0 128-16 0c-17.7 0-32 14.3-32 32s14.3 32 32 32l96 0c17.7 0 32-14.3 32-32s-14.3-32-32-32l-16 0 0-128 128 0 0 32c0 17.7 14.3 32 32 32s32-14.3 32-32l0-48c0-26.5-21.5-48-48-48L224 32 48 32C21.5 32 0 53.5 0 80l0 48c0 17.7 14.3 32 32 32s32-14.3 32-32zM9.4 361.4c-12.5 12.5-12.5 32.8 0 45.3l64 64c9.2 9.2 22.9 11.9 34.9 6.9s19.8-16.6 19.8-29.6l0-32 192 0 0 32c0 12.9 7.8 24.6 19.8 29.6s25.7 2.2 34.9-6.9l64-64c12.5-12.5 12.5-32.8 0-45.3l-64-64c-9.2-9.2-22.9-11.9-34.9-6.9s-19.8 16.6-19.8 29.6l0 32-192 0 0-32c0-12.9-7.8-24.6-19.8-29.6s-25.7-2.2-34.9 6.9l-64 64z'/></g></svg>">
-    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
-<style>
-  :root {
-    --primary-color: #0f4f9b; /* Blue accent */
-    --primary-hover: #0c3e7a; /* Darker blue accent */
-    --bg-color: #121212;      /* Dark background */
-    --card-bg: #1e1e1e;       /* Dark card background */
-    --card-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.7),
-                   0 2px 4px -1px rgba(0, 0, 0, 0.6);
-    --transition: all 0.3s ease;
-    --text-color: #E0E0E0;    /* Main text color */
-    --secondary-text: #A0A0A0;/* Secondary text color */
-    --input-bg: #2a2a2a;      /* Input/textarea background */
-    --input-border: #444444;  /* Input/textarea border */
-    --input-focus: #0f4f9b;   /* Focus border color */
-  }
-  * {
-    margin: 0;
-    padding: 0;
-    box-sizing: border-box;
-    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
-    scrollbar-width: thin;
-    scrollbar-color: #0f4f9b #121212
-  }
-  /* Width and height of the scrollbar */
-::-webkit-scrollbar {
-  width: 12px;
-  height: 12px;
-}
-@keyframes spin {
-    from { transform: rotate(0deg); }
-    to { transform: rotate(360deg); }
-}
-/* Track (background) */
-::-webkit-scrollbar-track {
-  background: #121212;
-  border-radius: 10px;
-}
-/* Handle (draggable part) */
-::-webkit-scrollbar-thumb {
-  background: #0f4f9b;
-  border-radius: 10px;
-  border: 2px solid #121212;
-}
-/* Handle on hover */
-::-webkit-scrollbar-thumb:hover {
-  background: #0c3e7a;
-}
-  body {
-    background-color: var(--bg-color);
-    padding: 2rem;
-    min-height: 100vh;
-    background-image:
-      radial-gradient(circle at 20% 20%, rgba(15, 79, 155, 0.1) 0%, transparent 50%),
-      radial-gradient(circle at 80% 80%, rgba(15, 79, 155, 0.1) 0%, transparent 50%);
-    color: var(--text-color);
-  }
-  .container {
-    max-width: 1200px;
-    margin: 0 auto;
-  }
-  .header {
-    display: flex;
-    justify-content: space-between;
-    align-items: center;
-    margin-bottom: 2rem;
-    position: relative;
-  }
-  .title-section {
-    flex-grow: 1;
-  }
-  .title {
-    font-size: 2.5rem;
-    font-weight: 800;
-    color: var(--primary-color);
-    margin-bottom: 0.5rem;
-  }
-  .subtitle {
-    color: var(--secondary-text);
-    font-size: 1.1rem;
-  }
-  .model-selector {
-    position: relative;
-    min-width: 200px;
-  }
-  .model-selector-header {
-    display: flex;
-    gap: 0.5rem;
-    margin-bottom: 0.5rem;
-  }
-  .model-type-toggle {
-    display: flex;
-    background-color: var(--card-bg);
-    border-radius: 0.5rem;
-    padding: 0.25rem;
-    overflow: hidden;
-  }
-  .toggle-option {
-    padding: 0.5rem 0.75rem;
-    font-size: 0.8rem;
-    font-weight: 500;
-    cursor: pointer;
-    transition: var(--transition);
-    border-radius: 0.375rem;
-    color: var(--secondary-text);
-  }
-  .toggle-option.active {
-    background-color: var(--primary-color);
-    color: white;
-  }
-  select {
-    width: 100%;
-    padding: 0.75rem 1rem;
-    border: 2px solid var(--input-border);
-    border-radius: 0.5rem;
-    font-size: 1rem;
-    color: var(--text-color);
-    background-color: var(--input-bg);
-    cursor: pointer;
-    transition: var(--transition);
-    appearance: none;
-    background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='%230f4f9b'%3E%3Cpath d='M7 10l5 5 5-5H7z'/%3E%3C/svg%3E");
-    background-repeat: no-repeat;
-    background-position: right 1rem center;
-    background-size: 1.5rem;
-  }
-  select:hover, .custom-model-input:hover {
-    border-color: var(--primary-color);
-  }
-  select:focus, .custom-model-input:focus {
-    outline: none;
-    border-color: var(--primary-color);
-    box-shadow: 0 0 0 3px rgba(15, 79, 155, 0.1);
-  }
-  .custom-model-input {
-    width: 100%;
-    padding: 0.75rem 1rem;
-    border: 2px solid var(--input-border);
-    border-radius: 0.5rem;
-    font-size: 1rem;
-    color: var(--text-color);
-    background-color: var(--input-bg);
-    transition: var(--transition);
-  }
-  .input-section {
-    margin-bottom: 2rem;
-  }
-  textarea {
-    width: 100%;
-    height: 150px;
-    padding: 1.25rem;
-    border: 2px solid var(--input-border);
-    border-radius: 0.75rem;
-    resize: vertical;
-    font-size: 1rem;
-    margin-bottom: 1rem;
-    transition: var(--transition);
-    background-color: var(--input-bg);
-    color: var(--text-color);
-  }
-  textarea:focus {
-    outline: none;
-    border-color: var(--input-focus);
-    box-shadow: 0 0 0 3px rgba(15, 79, 155, 0.1);
-  }
-  .button-container {
-    display: flex;
-    justify-content: center;
-    width: 100%;
-    gap: 1rem;
-  }
-  button {
-    padding: 0.875rem 2.5rem;
-    background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
-    color: #fff;
-    border: none;
-    border-radius: 0.75rem;
-    font-size: 1.1rem;
-    font-weight: 600;
-    cursor: pointer;
-    transition: var(--transition);
-    box-shadow: 0 4px 6px -1px rgba(15, 79, 155, 0.2);
-  }
-  button:hover {
-    transform: translateY(-2px);
-    box-shadow: 0 6px 8px -1px rgba(15, 79, 155, 0.3);
-  }
-  button:active {
-    transform: translateY(0);
-  }
-  button:disabled {
-    opacity: 0.7;
-    cursor: not-allowed;
-  }
-  .card {
-    background-color: var(--card-bg);
-    border-radius: 1rem;
-    box-shadow: var(--card-shadow);
-    padding: 1.5rem;
-    margin-bottom: 2rem;
-    transition: var(--transition);
-  }
-  .card:hover {
-    transform: translateY(-2px);
-    box-shadow: 0 6px 12px -2px rgba(0, 0, 0, 0.1);
-  }
-  .card-title {
-    font-size: 1.25rem;
-    font-weight: 700;
-    color: var(--text-color);
-    margin-bottom: 1.25rem;
-    display: flex;
-    align-items: center;
-    gap: 0.5rem;
-    cursor: pointer;
-  }
-  .card-title::before {
-    content: '';
-    display: block;
-    width: 4px;
-    height: 1.25rem;
-    background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
-    border-radius: 2px;
-  }
-  .token-container {
-    display: flex;
-    flex-wrap: wrap;
-    gap: 0.375rem;
-    margin-bottom: 1rem;
-    padding: 1rem;
-    background-color: #2a2a2a;
-    border-radius: 0.5rem;
-    max-height: 200px;
-    overflow-y: auto;
-    transition: max-height 0.3s ease;
-  }
-  .token-container.expanded {
-    max-height: none;
-  }
-  .token {
-    padding: 0.375rem 0.75rem;
-    border-radius: 0.375rem;
-    background-color: var(--input-bg);
-    font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Fira Mono', 'Droid Sans Mono', 'Source Code Pro', monospace;
-    font-size: 0.875rem;
-    color: var(--text-color);
-    cursor: default;
-    transition: var(--transition);
-    box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
-  }
-  .token:hover {
-    transform: translateY(-1px);
-    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
-  }
-  .stats-grid {
-    display: grid;
-    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
-    gap: 1.5rem;
-    margin-bottom: 2rem;
-  }
-  .stat-card {
-    background-color: var(--card-bg);
-    padding: 1.5rem;
-    border-radius: 1rem;
-    box-shadow: var(--card-shadow);
-    transition: var(--transition);
-  }
-  .stat-card:hover {
-    transform: translateY(-2px);
-    box-shadow: 0 6px 12px -2px rgba(0, 0, 0, 0.1);
-  }
-  .stat-title {
-    color: var(--secondary-text);
-    font-size: 0.875rem;
-    font-weight: 500;
-    margin-bottom: 0.5rem;
-    text-transform: uppercase;
-    letter-spacing: 0.05em;
-  }
-  .stat-value {
-    color: var(--text-color);
-    font-size: 2rem;
-    font-weight: 700;
-    line-height: 1.2;
-    margin-bottom: 0.25rem;
-  }
-  .stat-description {
-    color: var(--secondary-text);
-    font-size: 0.875rem;
-  }
-  .expand-button {
-    background: none;
-    border: none;
-    color: var(--primary-color);
-    font-size: 0.875rem;
-    padding: 0.5rem;
-    cursor: pointer;
-    display: block;
-    margin: 0 auto;
-    box-shadow: none;
-  }
-  .expand-button:hover {
-    text-decoration: underline;
-    transform: none;
-    box-shadow: none;
-  }
-  .error-message {
-    color: #EF4444;
-    background-color: #3a1f1f;
-    border: 1px solid #562626;
-    padding: 1rem;
-    border-radius: 0.5rem;
-    margin-bottom: 1rem;
-    display: none;
-  }
-  .display-limit-notice {
-    background-color: #4b2b07;
-    border: 1px solid #7c4a02;
-    color: #FFD591;
-    padding: 0.75rem;
-    border-radius: 0.5rem;
-    margin-top: 1rem;
-    font-size: 0.875rem;
-    display: none;
-  }
-  /* File drop zone styles */
-  .file-drop-zone {
-    position: fixed;
-    top: 0;
-    left: 0;
-    width: 100%;
-    height: 100%;
-    background-color: rgba(15, 79, 155, 0.15);
-    z-index: 1000;
-    display: flex;
-    justify-content: center;
-    align-items: center;
-    opacity: 0;
-    pointer-events: none;
-    transition: opacity 0.3s ease;
-  }
-  .file-drop-zone.active {
-    opacity: 1;
-    pointer-events: all;
-  }
-  .drop-indicator {
-    background-color: var(--card-bg);
-    border: 2px dashed var(--primary-color);
-    border-radius: 1rem;
-    padding: 2rem;
-    text-align: center;
-    width: 60%;
-    max-width: 400px;
-    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.25);
-    animation: pulse 2s infinite;
-  }
-  @keyframes pulse {
-    0% { transform: scale(1); }
-    50% { transform: scale(1.05); }
-    100% { transform: scale(1); }
-  }
-  .drop-indicator p {
-    margin-bottom: 0.5rem;
-    color: var(--text-color);
-    font-size: 1.2rem;
-  }
-  .file-icon {
-    font-size: 3rem;
-    margin-bottom: 1rem;
-    color: var(--primary-color);
-  }
-  .file-upload-icon {
-    position: fixed;
-    bottom: 20px;
-    left: 20px;
-    width: 45px;
-    height: 45px;
-    background-color: var(--card-bg);
-    border-radius: 50%;
-    display: flex;
-    justify-content: center;
-    align-items: center;
-    cursor: pointer;
-    z-index: 100;
-    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.2);
-    transition: transform 0.2s ease, box-shadow 0.2s ease;
-  }
-  .file-upload-icon:hover {
-    transform: translateY(-2px);
-    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3);
-  }
-  .file-upload-icon span {
-    font-size: 1.5rem;
-    color: var(--primary-color);
-  }
-  .file-info {
-    position: fixed;
-    bottom: 20px;
-    left: 75px;
-    background-color: var(--card-bg);
-    color: var(--primary-color);
-    font-weight: 500;
-    padding: 0.5rem 1rem;
-    border-radius: 1rem;
-    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.2);
-    max-width: 270px;
-    white-space: nowrap;
-    overflow: hidden;
-    text-overflow: ellipsis;
-    z-index: 100;
-    display: none;
-  }
-  .file-detach {
-    margin-left: 8px;
-    display: inline-block;
-    width: 18px;
-    height: 18px;
-    background-color: rgba(255, 255, 255, 0.1);
-    color: var(--text-color);
-    border-radius: 50%;
-    text-align: center;
-    line-height: 16px;
-    font-size: 12px;
-    cursor: pointer;
-    transition: all 0.2s ease;
-  }
-  .file-detach:hover {
-    background-color: rgba(255, 0, 0, 0.2);
-    color: #ff6b6b;
-    transform: scale(1.1);
-  }
-  .preview-notice {
-    background-color: #273c56;
-    border: 1px solid #365a82;
-    color: #89b4e8;
-    padding: 0.75rem;
-    border-radius: 0.5rem;
-    margin-top: 1rem;
-    font-size: 0.875rem;
-    display: none;
-  }
-  .custom-model-wrapper {
-    position: relative;
-  }
-  .model-badge {
-    position: absolute;
-    top: -10px;
-    right: -5px;
-    background: linear-gradient(135deg, #22c55e 0%, #15803d 100%);
-    color: white;
-    font-size: 0.7rem;
-    font-weight: 700;
-    padding: 0.25rem 0.5rem;
-    border-radius: 999px;
-    transform: scale(0);
-    transition: transform 0.3s cubic-bezier(0.175, 0.885, 0.32, 1.275);
-    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
-    z-index: 10;
-  }
-  .model-badge.show {
-    transform: scale(1);
-  }
-  .custom-model-help {
-    display: inline-block;
-    width: 16px;
-    height: 16px;
-    line-height: 16px;
-    font-size: 11px;
-    font-weight: bold;
-    text-align: center;
-    background-color: var(--secondary-text);
-    color: var(--card-bg);
-    border-radius: 50%;
-    margin-left: 5px;
-    cursor: help;
-    vertical-align: middle;
-  }
-  .tooltip {
-    position: absolute;
-    top: 100%;
-    left: 0;
-    width: 280px;
-    background-color: #333;
-    color: #fff;
-    padding: 0.75rem;
-    border-radius: 0.5rem;
-    font-size: 0.8rem;
-    margin-top: 0.5rem;
-    z-index: 100;
-    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-    opacity: 0;
-    visibility: hidden;
-    transition: opacity 0.2s, visibility 0.2s;
-  }
-  .custom-model-help:hover + .tooltip {
-    opacity: 1;
-    visibility: visible;
-  }
-  /* Tokenizer info icon and tooltip styles */
-  .tokenizer-info-icon {
-    display: inline-flex;
-    align-items: center;
-    justify-content: center;
-    width: 24px;
-    height: 24px;
-    background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
-    color: white;
-    border-radius: 50%;
-    position: absolute;
-    left: -32px; /* Position to the left of the selector */
-    top: 50%;
-    transform: translateY(-50%);
-    cursor: pointer;
-    font-size: 12px;
-    font-weight: bold;
-    transition: all 0.2s ease;
-    z-index: 10;
-    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
-  }
-  .tokenizer-info-icon:hover {
-    transform: translateY(-50%) scale(1.1);
-    box-shadow: 0 3px 8px rgba(0, 0, 0, 0.3);
-  }
-  /* Watermark styles */
-  .watermark {
-    position: fixed;
-    bottom: 20px;
-    right: 20px;
-    color: var(--primary-color);
-    font-size: 1.4rem;
-    font-weight: 700;
-    opacity: 0.25; /* Semi-transparent */
-    z-index: 100;
-    transition: opacity 0.3s ease;
-    text-decoration: none;
-    pointer-events: auto; /* Ensure it remains clickable */
-  }
-  .watermark:hover {
-    opacity: 0.6; /* Increase opacity on hover */
-  }
-  .tokenizer-info-tooltip {
-    position: absolute;
-    top: calc(100% + 8px);
-    left: -30px; /* Adjust position to align with the icon */
-    width: 300px;
-    background-color: var(--card-bg);
-    color: var(--text-color);
-    border: 1px solid var(--primary-color);
-    border-radius: 0.75rem;
-    box-shadow: 0 5px 15px rgba(0, 0, 0, 0.3);
-    padding: 1rem;
-    z-index: 1000; /* Increase z-index to ensure visibility */
-    opacity: 0;
-    visibility: hidden;
-    transition: opacity 0.3s, visibility 0.3s;
-    pointer-events: none; /* Initially disable pointer events */
-  }
-  .tokenizer-info-icon:not(.tooltip-disabled):hover + .tokenizer-info-tooltip {
-  opacity: 1;
-  visibility: visible;
-  pointer-events: auto;
-  }
-  .tokenizer-info-tooltip:hover {
-    opacity: 1;
-    visibility: visible;
-    pointer-events: auto;
-  }
-  .tokenizer-info-header {
-    font-size: 1.1rem;
-    font-weight: 600;
-    margin-bottom: 0.5rem;
-    padding-bottom: 0.5rem;
-    border-bottom: 1px solid rgba(255, 255, 255, 0.1);
-    color: var(--primary-color);
-  }
-  .tokenizer-info-grid {
-    display: grid;
-    grid-template-columns: repeat(2, 1fr);
-    gap: 0.75rem;
-    margin: 0.75rem 0;
-  }
-  .tokenizer-info-item {
-    display: flex;
-    flex-direction: column;
-  }
-  .tokenizer-info-label {
-    font-size: 0.75rem;
-    color: var(--secondary-text);
-    margin-bottom: 0.25rem;
-  }
-  .tokenizer-info-value {
-    font-size: 0.95rem;
-    font-weight: 500;
-  }
-  .special-tokens-container {
-    margin-top: 0.75rem;
-    background-color: rgba(15, 79, 155, 0.1);
-    border-radius: 0.5rem;
-    padding: 0.5rem;
-    max-height: 100px;
-    overflow-y: auto;
-  }
-  .special-token-item {
-    display: flex;
-    justify-content: space-between;
-    margin-bottom: 0.25rem;
-    font-size: 0.8rem;
-  }
-  .token-name {
-    color: var(--secondary-text);
-  }
-  .token-value {
-    background-color: rgba(255, 255, 255, 0.1);
-    padding: 1px 4px;
-    border-radius: 2px;
-    font-family: monospace;
-  }
-  .tokenizer-info-loading {
-    display: flex;
-    justify-content: center;
-    align-items: center;
-    height: 100px;
-  }
-  .tokenizer-info-spinner {
-    width: 30px;
-    height: 30px;
-    border: 3px solid var(--primary-color);
-    border-radius: 50%;
-    border-top-color: transparent;
-    animation: spin 1s linear infinite;
-  }
-  .tokenizer-info-error {
-    color: #f87171;
-    font-size: 0.9rem;
-    text-align: center;
-    padding: 1rem;
-  }
-  @media (max-width: 768px) {
-    .header {
-      flex-direction: column;
-      align-items: stretch;
-      gap: 1rem;
-    }
-    .model-selector {
-      width: 100%;
-    }
-    .stats-grid {
-      grid-template-columns: 1fr;
-    }
-    .tokenizer-info-tooltip {
-      width: 250px;
-    }
-  }
-</style>
-</head>
-<body>
-    <!-- Hidden File Drop Zone that appears when dragging files -->
-    <div id="fileDropZone" class="file-drop-zone">
-        <div class="drop-indicator">
-            <div class="file-icon">📄</div>
-            <p>Drop your file here</p>
-        </div>
-    </div>
-    <!-- File upload icon in bottom left corner -->
-    <div id="fileUploadIcon" class="file-upload-icon">
-        <span>📎</span>
-    </div>
-    <p class="file-info" id="fileInfo"></p>
-    <div class="container">
-        <div class="header">
-            <div class="title-section">
-                <h1 class="title">Token Visualizer</h1>
-                <p class="subtitle">Advanced tokenization analysis and visualization</p>
-            </div>
-            <div class="model-selector">
-                <div class="model-selector-header">
-                    <div class="model-type-toggle">
-                        <div class="toggle-option predefined-toggle active" data-type="predefined">Predefined</div>
-                        <div class="toggle-option custom-toggle" data-type="custom">Custom</div>
-                    </div>
-                </div>
-                <div id="predefinedModelSelector">
-                    <div style="position: relative;">
-                        <div class="tokenizer-info-icon" id="modelInfoIcon" title="View tokenizer information">ℹ</div>
-                        <!-- TOOLTIP MOVED HERE -->
-                        <div class="tokenizer-info-tooltip" id="modelInfoTooltip">
-                            <div id="tokenizerInfoContent">
-                                <div class="tokenizer-info-loading">
-                                    <div class="tokenizer-info-spinner"></div>
-                                </div>
-                            </div>
-                        </div>
-                        <!-- SELECT NOW COMES AFTER ICON AND TOOLTIP -->
-                        <select id="modelSelect" name="model">
-                            {% for model_id, info in models.items() %}
-                            <option value="{{ model_id }}" {% if selected_model == model_id %}selected{% endif %}>
-                                {{ info.alias }}
-                            </option>
-                            {% endfor %}
-                        </select>
-                    </div>
-                </div>
-                <div id="customModelSelector" style="display: none;" class="custom-model-wrapper">
-                    <div style="position: relative;">
-                        <div class="tokenizer-info-icon" id="customModelInfoIcon" title="View tokenizer information">ℹ</div>
-                        <div class="tokenizer-info-tooltip" id="customModelInfoTooltip">
-                            <div id="customTokenizerInfoContent">
-                                <div class="tokenizer-info-loading">
-                                    <div class="tokenizer-info-spinner"></div>
-                                </div>
-                            </div>
-                        </div>
-                        <input type="text" id="customModelInput" class="custom-model-input"
-                               placeholder="Enter HuggingFace model path"
-                               value="{{ custom_model if custom_model and custom_model|length > 0 else '' }}">
-                    </div>
-                    <span class="custom-model-help">?</span>
-                    <div class="tooltip">
-                        Enter a valid HuggingFace model ID (e.g., "mistralai/Mistral-7B-Instruct-v0.3").
-                        For Korean, you might use "beomi/KoAlpaca-Polyglot-12.8B" or "skt/kogpt2-base-v2", etc.
-                        The model must have a tokenizer available and be accessible.
-                    </div>
-                    <div class="model-badge" id="modelSuccessBadge">Loaded</div>
-                </div>
-            </div>
-        </div>
-        <div class="error-message" id="errorMessage">{{ error }}</div>
-        <div class="input-section">
-            <form id="analyzeForm" method="POST" enctype="multipart/form-data">
-                <textarea name="text" id="textInput" placeholder="Enter text to analyze or upload a file in bottom left corner...">{{ text }}</textarea>
-                <input type="hidden" name="model" id="modelInput" value="{{ selected_model }}">
-                <input type="hidden" name="custom_model" id="customModelInputHidden" value="{{ custom_model if custom_model else '' }}">
-                <input type="hidden" name="model_type" id="modelTypeInput" value="{{ model_type if model_type else 'predefined' }}">
-                <input type="file" name="file" id="fileInput" style="display: none;">
-                <div class="button-container">
-                    <button type="submit" id="analyzeButton">Analyze Text</button>
-                </div>
-            </form>
-        </div>
-        <div id="results" class="results" {% if not token_data %}style="display: none;"{% endif %}>
-            <div class="card">
-                <h2 class="card-title">Token Visualization</h2>
-                <div class="preview-notice" id="previewNotice">
-                    Note: Showing preview of first 8096 characters. Stats are calculated on the full file.
-                </div>
-                <div class="token-container" id="tokenContainer">
-                    {% if token_data %}
-                    {% for token in token_data.tokens %}
-                    <span class="token"
-                          style="background-color: {{ token.colors.background }}; color: {{ token.colors.text }};"
-                          title="Original token: {{ token.original }} | Token ID: {{ token.token_id }}">
-                        {{ token.display }}
-                    </span>
-                    {% if token.newline %}<br>{% endif %}
-                    {% endfor %}
-                    {% endif %}
-                </div>
-                <button class="expand-button" id="expandButton">Show More</button>
-                <div class="display-limit-notice" id="displayLimitNotice">
-                    Note: Only showing first 50,000 tokens. Total token count: <span id="totalTokenCount">0</span>
-                </div>
-            </div>
-            <div class="stats-grid">
-                <div class="stat-card">
-                    <div class="stat-title">Total Tokens</div>
-                    <div class="stat-value" id="totalTokens">{{ token_data.stats.basic_stats.total_tokens if token_data else 0 }}</div>
-                    <div class="stat-description">
-                        <span id="uniqueTokens">{{ token_data.stats.basic_stats.unique_tokens if token_data else 0 }} unique</span>
-                        (<span id="uniquePercentage">{{ token_data.stats.basic_stats.unique_percentage if token_data else 0 }}</span>%)
-                    </div>
-                </div>
-                <div class="stat-card">
-                    <div class="stat-title">Token Types</div>
-                    <div class="stat-value" id="specialTokens">{{ token_data.stats.basic_stats.special_tokens if token_data else 0 }}</div>
-                    <div class="stat-description">special tokens</div>
-                </div>
-                <div class="stat-card">
-                    <div class="stat-title">Whitespace</div>
-                    <div class="stat-value" id="spaceTokens">{{ token_data.stats.basic_stats.space_tokens if token_data else 0 }}</div>
-                    <div class="stat-description">
-                        spaces: <span id="spaceCount">{{ token_data.stats.basic_stats.space_tokens if token_data else 0 }}</span>,
-                        newlines: <span id="newlineCount">{{ token_data.stats.basic_stats.newline_tokens if token_data else 0 }}</span>
-                    </div>
-                </div>
-                <div class="stat-card">
-                    <div class="stat-title">Token Length</div>
-                    <div class="stat-value" id="avgLength">{{ token_data.stats.length_stats.avg_length if token_data else 0 }}</div>
-                    <div class="stat-description">
-                        median: <span id="medianLength">{{ token_data.stats.length_stats.median_length if token_data else 0 }}</span>,
-                        ±<span id="stdDev">{{ token_data.stats.length_stats.std_dev if token_data else 0 }}</span> std
-                    </div>
-                </div>
-                <div class="stat-card">
-                    <div class="stat-title">Compression</div>
-                    <div class="stat-value" id="compressionRatio">{{ token_data.stats.basic_stats.compression_ratio if token_data else 0 }}</div>
-                    <div class="stat-description">characters per token</div>
-                </div>
-            </div>
-        </div>
-    </div>
-    <a href="https://huggingface.co/spaces/barttee/tokenizers" target="_blank" class="watermark">
-        @barttee/tokenizers
-    </a>
-    <script>
-        $(document).ready(function() {
-            // File handling variables
-            let currentFile = null;
-            let originalTextContent = null;
-            let lastUploadedFileName = null;
-            let fileJustUploaded = false;  // Flag to prevent immediate detachment
-            let currentModelType = "{{ model_type if model_type else 'predefined' }}";
-            let currentTokenizerInfo = null;
-            // Try to parse tokenizer info if available from server
-            try {
-                currentTokenizerInfo = {{ token_data.tokenizer_info|tojson if token_data and token_data.tokenizer_info else 'null' }};
-                if (currentTokenizerInfo) {
-                    updateTokenizerInfoDisplay(currentTokenizerInfo, currentModelType === 'custom');
-                }
-            } catch(e) {
-                console.error("Error parsing tokenizer info:", e);
-            }
-            // Show error if exists
-            if ("{{ error }}".length > 0) {
-                showError("{{ error }}");
-            }
-            // Setup model type based on initial state
-            if (currentModelType === "custom") {
-                $('.toggle-option').removeClass('active');
-                $('.custom-toggle').addClass('active');
-                $('#predefinedModelSelector').hide();
-                $('#customModelSelector').show();
-            }
-            // Show success badge if custom model loaded successfully
-            if (currentModelType === "custom" && !("{{ error }}".length > 0)) {
-                $('#modelSuccessBadge').addClass('show');
-                setTimeout(() => {
-                    $('#modelSuccessBadge').removeClass('show');
-                }, 3000);
-            }
-            // Toggle between predefined and custom model inputs
-            $('.toggle-option').click(function() {
-                const modelType = $(this).data('type');
-                $('.toggle-option').removeClass('active');
-                $(this).addClass('active');
-                currentModelType = modelType;
-                if (modelType === 'predefined') {
-                    $('#predefinedModelSelector').show();
-                    $('#customModelSelector').hide();
-                    $('#modelTypeInput').val('predefined');
-                    // Set the model input value to the selected predefined model
-                    $('#modelInput').val($('#modelSelect').val());
-                } else {
-                    $('#predefinedModelSelector').hide();
-                    $('#customModelSelector').show();
-                    $('#modelTypeInput').val('custom');
-                }
-                // Clear tokenizer info if switching models
-                if (modelType === 'predefined') {
-                    $('#tokenizerInfoContent').html('<div class="tokenizer-info-loading"><div class="tokenizer-info-spinner"></div></div>');
-                    fetchTokenizerInfo($('#modelSelect').val(), false);
-                } else {
-                    $('#customTokenizerInfoContent').html('<div class="tokenizer-info-loading"><div class="tokenizer-info-spinner"></div></div>');
-                    // Only fetch if there's a custom model value
-                    const customModel = $('#customModelInput').val();
-                    if (customModel) {
-                        fetchTokenizerInfo(customModel, true);
-                    }
-                }
-            });
-            // Update hidden input when custom model input changes
-            $('#customModelInput').on('input', function() {
-                $('#customModelInputHidden').val($(this).val());
-            });
-            function showError(message) {
-                const errorDiv = $('#errorMessage');
-                errorDiv.text(message);
-                errorDiv.show();
-                setTimeout(() => errorDiv.fadeOut(), 5000);
-            }
-            // Function to update tokenizer info display in tooltip
-            function updateTokenizerInfoDisplay(info, isCustom = false) {
-                const targetSelector = isCustom ? '#customTokenizerInfoContent' : '#tokenizerInfoContent';
-                let htmlContent = '';
-                if (info.error) {
-                    $(targetSelector).html(`<div class="tokenizer-info-error">${info.error}</div>`);
-                    return;
-                }
-                // Start building the tooltip content
-                htmlContent = `<div class="tokenizer-info-header">Tokenizer Details</div>
-                              <div class="tokenizer-info-grid">`;
-                // Dictionary size
-                if (info.vocab_size) {
-                    htmlContent += `
-                        <div class="tokenizer-info-item">
-                            <span class="tokenizer-info-label">Dictionary Size</span>
-                            <span class="tokenizer-info-value">${info.vocab_size.toLocaleString()}</span>
-                        </div>`;
-                }
-                // Tokenizer type
-                if (info.tokenizer_type) {
-                    htmlContent += `
-                        <div class="tokenizer-info-item">
-                            <span class="tokenizer-info-label">Tokenizer Type</span>
-                            <span class="tokenizer-info-value">${info.tokenizer_type}</span>
-                        </div>`;
-                }
-                // Max length
-                if (info.model_max_length) {
-                    htmlContent += `
-                        <div class="tokenizer-info-item">
-                            <span class="tokenizer-info-label">Max Length</span>
-                            <span class="tokenizer-info-value">${info.model_max_length.toLocaleString()}</span>
-                        </div>`;
-                }
-                htmlContent += `</div>`;  // Close tokenizer-info-grid
-                // Special tokens section
-                if (info.special_tokens && Object.keys(info.special_tokens).length > 0) {
-                    htmlContent += `
-                        <div class="tokenizer-info-item" style="margin-top: 0.75rem;">
-                            <span class="tokenizer-info-label">Special Tokens</span>
-                            <div class="special-tokens-container">`;
-                    // Add each special token
-                    for (const [tokenName, tokenValue] of Object.entries(info.special_tokens)) {
-                        // Properly escape HTML special characters
-                        const escapedValue = tokenValue
-                            .replace(/&/g, '&amp;')
-                            .replace(/</g, '&lt;')
-                            .replace(/>/g, '&gt;')
-                            .replace(/"/g, '&quot;')
-                            .replace(/'/g, '&#039;');
-                        htmlContent += `
-                            <div class="special-token-item">
-                                <span class="token-name">${tokenName}:</span>
-                                <span class="token-value">${escapedValue}</span>
-                            </div>`;
-                    }
-                    htmlContent += `
-                            </div>
-                        </div>`;
-                }
-                $(targetSelector).html(htmlContent);
-            }
-            // Function to fetch tokenizer info
-            function fetchTokenizerInfo(modelId, isCustom = false) {
-                if (!modelId) return;
-                const targetSelector = isCustom ? '#customTokenizerInfoContent' : '#tokenizerInfoContent';
-                $(targetSelector).html('<div class="tokenizer-info-loading"><div class="tokenizer-info-spinner"></div></div>');
-                $.ajax({
-                    url: '/tokenizer-info',
-                    method: 'GET',
-                    data: {
-                        model_id: modelId,
-                        is_custom: isCustom
-                    },
-                    success: function(response) {
-                        if (response.error) {
-                            $(targetSelector).html(`<div class="tokenizer-info-error">${response.error}</div>`);
-                        } else {
-                            currentTokenizerInfo = response;
-                            updateTokenizerInfoDisplay(response, isCustom);
-                        }
-                    },
-                    error: function(xhr) {
-                        $(targetSelector).html('<div class="tokenizer-info-error">Failed to load tokenizer information</div>');
-                    }
-                });
-            }
-            function updateResults(data) {
-                $('#results').show();
-                // Update tokens
-                const tokenContainer = $('#tokenContainer');
-                tokenContainer.empty();
-                data.tokens.forEach(token => {
-                    const span = $('<span>')
-                        .addClass('token')
-                        .css({
-                            'background-color': token.colors.background,
-                            'color': token.colors.text
-                        })
-                        // Include token id in the tooltip on hover
-                        .attr('title', `Original token: ${token.original} | Token ID: ${token.token_id}`)
-                        .text(token.display);
-                    tokenContainer.append(span);
-                    if (token.newline) {
-                        tokenContainer.append('<br>');
-                    }
-                });
-                // Update display limit notice
-                if (data.display_limit_reached) {
-                    $('#displayLimitNotice').show();
-                    $('#totalTokenCount').text(data.total_tokens);
-                } else {
-                    $('#displayLimitNotice').hide();
-                }
-                // Update preview notice
-                if (data.preview_only) {
-                    $('#previewNotice').show();
-                } else {
-                    $('#previewNotice').hide();
-                }
-                // Update basic stats
-                $('#totalTokens').text(data.stats.basic_stats.total_tokens);
-                $('#uniqueTokens').text(`${data.stats.basic_stats.unique_tokens} unique`);
-                $('#uniquePercentage').text(data.stats.basic_stats.unique_percentage);
-                $('#specialTokens').text(data.stats.basic_stats.special_tokens);
-                $('#spaceTokens').text(data.stats.basic_stats.space_tokens);
-                $('#spaceCount').text(data.stats.basic_stats.space_tokens);
-                $('#newlineCount').text(data.stats.basic_stats.newline_tokens);
-                $('#compressionRatio').text(data.stats.basic_stats.compression_ratio);
-                // Update length stats
-                $('#avgLength').text(data.stats.length_stats.avg_length);
-                $('#medianLength').text(data.stats.length_stats.median_length);
-                $('#stdDev').text(data.stats.length_stats.std_dev);
-                // Update tokenizer info if available
-                if (data.tokenizer_info) {
-                    currentTokenizerInfo = data.tokenizer_info;
-                    updateTokenizerInfoDisplay(data.tokenizer_info, currentModelType === 'custom');
-                }
-            }
-            // Handle text changes to detach file
-            $('#textInput').on('input', function() {
-                if (fileJustUploaded) {
-                    fileJustUploaded = false;
-                    return;
-                }
-                const currentText = $(this).val();
-                const fileInput = document.getElementById('fileInput');
-                if (fileInput.files.length > 0 && originalTextContent !== null) {
-                    const isMajorChange =
-                        currentText.length < originalTextContent.length * 0.8 ||
-                        (currentText.length > 0 &&
-                         currentText !== originalTextContent.substring(0, currentText.length) &&
-                         currentText.substring(0, Math.min(20, currentText.length)) !==
-                         originalTextContent.substring(0, Math.min(20, originalTextContent.length)));
-                    if (isMajorChange) {
-                        detachFile();
-                    }
-                }
-            });
-            function detachFile() {
-                // Clear the file input
-                $('#fileInput').val('');
-                // Hide file info
-                $('#fileInfo').fadeOut(300);
-                // Reset the original content tracker
-                originalTextContent = $('#textInput').val();
-                // Reset last uploaded filename
-                lastUploadedFileName = null;
-            }
-            // For model changes
-            $('#modelSelect').change(function() {
-                const selectedModel = $(this).val();
-                $('#modelInput').val(selectedModel);
-                // Fetch tokenizer info for the selected model
-                fetchTokenizerInfo(selectedModel, false);
-                // If text exists, submit the form
-                if ($('#textInput').val().trim()) {
-                    $('#analyzeForm').submit();
-                }
-            });
-            // File drop handling
-            const fileDropZone = $('#fileDropZone');
-            const fileUploadIcon = $('#fileUploadIcon');
-            ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
-                fileDropZone[0].addEventListener(eventName, preventDefaults, false);
-                document.body.addEventListener(eventName, preventDefaults, false);
-            });
-            function preventDefaults(e) {
-                e.preventDefault();
-                e.stopPropagation();
-            }
-            document.addEventListener('dragenter', showDropZone, false);
-            document.addEventListener('dragover', showDropZone, false);
-            fileDropZone[0].addEventListener('dragleave', hideDropZone, false);
-            fileDropZone[0].addEventListener('drop', hideDropZone, false);
-            function showDropZone(e) {
-                fileDropZone.addClass('active');
-            }
-            function hideDropZone() {
-                fileDropZone.removeClass('active');
-            }
-            fileDropZone[0].addEventListener('drop', handleDrop, false);
-            fileUploadIcon.on('click', function() {
-                const input = document.createElement('input');
-                input.type = 'file';
-                input.onchange = e => {
-                    handleFiles(e.target.files);
-                };
-                input.click();
-            });
-            function handleFiles(files) {
-                if (files.length) {
-                    const file = files[0];
-                    currentFile = file;
-                    lastUploadedFileName = file.name;
-                    fileJustUploaded = true;
-                    $('#fileInfo').html(`${file.name} (${formatFileSize(file.size)}) <span class="file-detach" id="fileDetach"><i class="fas fa-times"></i></span>`).fadeIn(300);
-                    $('#fileDetach').on('click', function(e) {
-                        e.stopPropagation();
-                        detachFile();
-                        return false;
-                    });
-                    const dataTransfer = new DataTransfer();
-                    dataTransfer.items.add(file);
-                    document.getElementById('fileInput').files = dataTransfer.files;
-                    const reader = new FileReader();
-                    reader.onload = function(e) {
-                        const previewText = e.target.result.slice(0, 8096);
-                        $('#textInput').val(previewText);
-                        setTimeout(() => {
-                            originalTextContent = previewText;
-                            $('#analyzeForm').submit();
-                        }, 50);
-                    };
-                    reader.readAsText(file, 'utf-8');
-                }
-            }
-            function formatFileSize(bytes) {
-                if (bytes < 1024) return bytes + ' bytes';
-                else if (bytes < 1048576) return (bytes / 1024).toFixed(1) + ' KB';
-                else return (bytes / 1048576).toFixed(1) + ' MB';
-            }
-            $('#analyzeForm').on('submit', function(e) {
-                e.preventDefault();
-                if (!fileJustUploaded) {
-                    const textInput = $('#textInput').val();
-                    const fileInput = document.getElementById('fileInput');
-                    if (fileInput.files.length > 0 &&
-                        originalTextContent !== null &&
-                        textInput !== originalTextContent &&
-                        textInput.length < originalTextContent.length * 0.8) {
-                        detachFile();
-                    }
-                } else {
-                    fileJustUploaded = false;
-                }
-                if (currentModelType === 'custom') {
-                    $('#customModelInputHidden').val($('#customModelInput').val());
-                } else {
-                    $('#modelInput').val($('#modelSelect').val());
-                }
-                const formData = new FormData(this);
-                $('#analyzeButton').prop('disabled', true);
-                $.ajax({
-                    url: '/',
-                    method: 'POST',
-                    data: formData,
-                    processData: false,
-                    contentType: false,
-                    success: function(response) {
-                        if (response.error) {
-                            showError(response.error);
-                        } else {
-                            updateResults(response);
-                            if (currentModelType === 'custom') {
-                                $('#modelSuccessBadge').addClass('show');
-                                setTimeout(() => {
-                                    $('#modelSuccessBadge').removeClass('show');
-                                }, 3000);
-                            }
-                        }
-                    },
-                    error: function(xhr) {
-                        showError(xhr.responseText || 'An error occurred while processing the text');
-                    },
-                    complete: function() {
-                        $('#analyzeButton').prop('disabled', false);
-                    }
-                });
-            });
-            $('#expandButton').click(function() {
-                const container = $('#tokenContainer');
-                const isExpanded = container.hasClass('expanded');
-                container.toggleClass('expanded');
-                $(this).text(isExpanded ? 'Show More' : 'Show Less');
-            });
-            if (currentModelType === 'predefined') {
-                fetchTokenizerInfo($('#modelSelect').val(), false);
-            } else if ($('#customModelInput').val()) {
-                fetchTokenizerInfo($('#customModelInput').val(), true);
-            }
-            $('#customModelInput').on('change', function() {
-                const modelValue = $(this).val();
-                if (modelValue) {
-                    fetchTokenizerInfo(modelValue, true);
-                }
-            });
-        });
-    </script>
-</body>
-</html>
-"""
-@app.route('/tokenizer-info', methods=['GET'])
-def tokenizer_info():
-    """
-    Endpoint to get tokenizer information without processing text.
-    """
-    model_id = request.args.get('model_id', '')
-    is_custom = request.args.get('is_custom', 'false').lower() == 'true'
-    if not model_id:
-        return jsonify({"error": "No model ID provided"}), 400
-    try:
-        # For predefined models, use the model name from the dictionary
-        if not is_custom and model_id in TOKENIZER_MODELS:
-            model_id_or_name = model_id
-        else:
-            # For custom models, use the model ID directly
-            model_id_or_name = model_id
-        # Load the tokenizer and get info
-        tokenizer, info, error = load_tokenizer(model_id_or_name)
-        if error:
-            return jsonify({"error": error}), 400
-        return jsonify(info)
-    except Exception as e:
-        return jsonify({"error": f"Failed to get tokenizer info: {str(e)}"}), 500
-@app.route('/', methods=['GET', 'POST'])
-def index():
-    text = ""
-    token_data = None
-    error_message = ""
-    selected_model = request.args.get('model', request.form.get('model', 'llama4'))
-    custom_model = request.args.get('custom_model', request.form.get('custom_model', ''))
-    model_type = request.args.get('model_type', request.form.get('model_type', 'predefined'))
-    # Determine which model to use based on model_type
-    model_to_use = selected_model if model_type == 'predefined' else custom_model
-    if request.method == 'POST':
-        # Check if file upload
-        if 'file' in request.files and request.files['file'].filename:
-            uploaded_file = request.files['file']
-            # Save file to tmp directory
-            file_path = os.path.join(app.config['UPLOAD_FOLDER'], uploaded_file.filename)
-            uploaded_file.save(file_path)
-            # Read a small preview of the file (UTF-8)
-            with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-                text = f.read(8096)
-            try:
-                # Process the file fully
-                token_data = process_text("", model_to_use, is_full_file=True, file_path=file_path)
-                # Clean up the file after processing
-                if os.path.exists(file_path):
-                    os.remove(file_path)
-                # If request is AJAX, return JSON
-                if request.headers.get('X-Requested-With') == 'XMLHttpRequest':
-                    return jsonify(token_data)
-            except Exception as e:
-                error_message = str(e)
-                # Clean up the file after processing
-                if os.path.exists(file_path):
-                    os.remove(file_path)
-                if request.headers.get('X-Requested-With') == 'XMLHttpRequest':
-                    return jsonify({"error": error_message}), 400
-                return render_template_string(
-                    HTML_TEMPLATE,
-                    text=text,
-                    token_data=None,
-                    models=TOKENIZER_MODELS,
-                    selected_model=selected_model,
-                    custom_model=custom_model,
-                    model_type=model_type,
-                    error=error_message
-                )
-        # Regular text processing
-        else:
-            text = request.form.get('text', '')
-            if text:
-                try:
-                    token_data = process_text(text, model_to_use)
-                    # If request is AJAX, return JSON
-                    if request.headers.get('X-Requested-With') == 'XMLHttpRequest':
-                        return jsonify(token_data)
-                except Exception as e:
-                    error_message = str(e)
-                    if request.headers.get('X-Requested-With') == 'XMLHttpRequest':
-                        return jsonify({"error": error_message}), 400
-                    return render_template_string(
-                        HTML_TEMPLATE,
-                        text=text,
-                        token_data=None,
-                        models=TOKENIZER_MODELS,
-                        selected_model=selected_model,
-                        custom_model=custom_model,
-                        model_type=model_type,
-                        error=error_message
-                    )
-    return render_template_string(
-        HTML_TEMPLATE,
-        text=text,
-        token_data=token_data,
-        models=TOKENIZER_MODELS,
-        selected_model=selected_model,
-        custom_model=custom_model,
-        model_type=model_type,
-        error=error_message
-    )
-if __name__ == "__main__":
-    app.run(host='0.0.0.0', port=7860, debug=False)