import streamlit as st import matplotlib.pyplot as plt import pandas as pd import torch from transformers import AutoConfig, AutoTokenizer # Page configuration st.set_page_config( page_title="Transformer Visualizer", page_icon="🧠", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS styling st.markdown(""" """, unsafe_allow_html=True) # Model database MODELS = { "BERT": {"model_name": "bert-base-uncased", "type": "Encoder", "layers": 12, "heads": 12, "params": 109.48}, "GPT-2": {"model_name": "gpt2", "type": "Decoder", "layers": 12, "heads": 12, "params": 117}, "T5-Small": {"model_name": "t5-small", "type": "Seq2Seq", "layers": 6, "heads": 8, "params": 60}, "RoBERTa": {"model_name": "roberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 125}, "DistilBERT": {"model_name": "distilbert-base-uncased", "type": "Encoder", "layers": 6, "heads": 12, "params": 66}, "ALBERT": {"model_name": "albert-base-v2", "type": "Encoder", "layers": 12, "heads": 12, "params": 11.8}, "ELECTRA": {"model_name": "google/electra-small-discriminator", "type": "Encoder", "layers": 12, "heads": 12, "params": 13.5}, "XLNet": {"model_name": "xlnet-base-cased", "type": "AutoRegressive", "layers": 12, "heads": 12, "params": 110}, "BART": {"model_name": "facebook/bart-base", "type": "Seq2Seq", "layers": 6, "heads": 16, "params": 139}, "DeBERTa": {"model_name": "microsoft/deberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 139} } def get_model_config(model_name): config = AutoConfig.from_pretrained(MODELS[model_name]["model_name"]) return config def plot_model_comparison(selected_model): model_names = list(MODELS.keys()) params = [m["params"] for m in MODELS.values()] fig, ax = plt.subplots(figsize=(10, 6)) bars = ax.bar(model_names, params) index = list(MODELS.keys()).index(selected_model) bars[index].set_color('#00ff00') ax.set_ylabel('Parameters (Millions)', color='white') ax.set_title('Model Size Comparison', color='white') ax.tick_params(axis='x', rotation=45, colors='white') ax.tick_params(axis='y', colors='white') ax.set_facecolor('#2c2c2c') fig.patch.set_facecolor('#2c2c2c') st.pyplot(fig) def visualize_architecture(model_info): architecture = [] model_type = model_info["type"] layers = model_info.get("layers", model_info.get("layers", 12)) # Handle key variations heads = model_info["heads"] architecture.append("Input") architecture.append("│") architecture.append("▼") if model_type == "Encoder": architecture.append("[Embedding Layer]") for i in range(layers): architecture.extend([ f"Encoder Layer {i+1}", "├─ Multi-Head Attention", f"│ └─ {heads} Heads", "├─ Layer Normalization", "└─ Feed Forward Network", "│", "▼" ]) architecture.append("[Output]") elif model_type == "Decoder": architecture.append("[Embedding Layer]") for i in range(layers): architecture.extend([ f"Decoder Layer {i+1}", "├─ Masked Multi-Head Attention", f"│ └─ {heads} Heads", "├─ Layer Normalization", "└─ Feed Forward Network", "│", "▼" ]) architecture.append("[Output]") elif model_type == "Seq2Seq": architecture.append("Encoder Stack") for i in range(layers): architecture.extend([ f"Encoder Layer {i+1}", "├─ Self-Attention", "└─ Feed Forward Network", "│", "▼" ]) architecture.append("→→→ [Context] →→→") architecture.append("Decoder Stack") for i in range(layers): architecture.extend([ f"Decoder Layer {i+1}", "├─ Masked Self-Attention", "├─ Encoder-Decoder Attention", "└─ Feed Forward Network", "│", "▼" ]) architecture.append("[Output]") return "\n".join(architecture) def visualize_attention_patterns(): fig, ax = plt.subplots(figsize=(8, 6)) data = torch.randn(5, 5) ax.imshow(data, cmap='viridis') ax.set_title('Attention Patterns Example', color='white') ax.set_facecolor('#2c2c2c') fig.patch.set_facecolor('#2c2c2c') st.pyplot(fig) def get_hardware_recommendation(params): if params < 100: return "CPU or Entry-level GPU (e.g., GTX 1060)" elif 100 <= params < 200: return "Mid-range GPU (e.g., RTX 2080, RTX 3060)" else: return "High-end GPU (e.g., RTX 3090, A100) or TPU" def main(): st.title("🧠 Transformer Model Visualizer") selected_model = st.sidebar.selectbox("Select Model", list(MODELS.keys())) model_info = MODELS[selected_model] config = get_model_config(selected_model) tokenizer = AutoTokenizer.from_pretrained(model_info["model_name"]) col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Model Type", model_info["type"]) with col2: st.metric("Layers", model_info.get("layers", model_info.get("layers", "N/A"))) with col3: st.metric("Attention Heads", model_info["heads"]) with col4: st.metric("Parameters", f"{model_info['params']}M") tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([ "Model Structure", "Comparison", "Model Attention", "Tokenization", "Hardware", "Memory" ]) with tab1: st.subheader("Architecture Diagram") architecture = visualize_architecture(model_info) st.markdown(f"
{architecture}
", unsafe_allow_html=True) st.markdown(""" **Legend:** - **Multi-Head Attention**: Self-attention mechanism with multiple parallel heads - **Layer Normalization**: Normalization operation between layers - **Feed Forward Network**: Position-wise fully connected network - **Masked Attention**: Attention with future token masking """) with tab2: st.subheader("Model Size Comparison") plot_model_comparison(selected_model) with tab3: st.subheader("Model-specific Visualizations") visualize_attention_patterns() with tab4: st.subheader("📝 Tokenization Visualization") input_text = st.text_input("Enter Text:", "Hello, how are you?") col1, col2 = st.columns(2) with col1: st.markdown("**Tokenized Output**") tokens = tokenizer.tokenize(input_text) st.write(tokens) with col2: st.markdown("**Token IDs**") encoded_ids = tokenizer.encode(input_text) st.write(encoded_ids) st.markdown("**Token-ID Mapping**") token_data = pd.DataFrame({ "Token": tokens, "ID": encoded_ids[1:-1] if tokenizer.cls_token else encoded_ids }) st.dataframe(token_data, height=150, use_container_width=True) st.markdown(f""" **Tokenizer Info:** - Vocabulary size: `{tokenizer.vocab_size}` - Special tokens: `{tokenizer.all_special_tokens}` - Padding token: `{tokenizer.pad_token}` - Max length: `{tokenizer.model_max_length}` """) with tab5: st.subheader("🖥️ Hardware Recommendation") params = model_info["params"] recommendation = get_hardware_recommendation(params) st.markdown(f"**Recommended hardware for {selected_model}:**") st.info(recommendation) st.markdown(""" **Recommendation Criteria:** - <100M parameters: Suitable for CPU or entry-level GPUs - 100-200M parameters: Requires mid-range GPUs - >200M parameters: Needs high-end GPUs/TPUs """) with tab6: st.subheader("💾 Memory Usage Estimation") params = model_info["params"] memory_mb = params * 4 # 1M params ≈ 4MB in FP32 memory_gb = memory_mb / 1024 st.metric("Estimated Memory (FP32)", f"{memory_mb:.1f} MB / {memory_gb:.2f} GB") st.markdown(""" **Memory Notes:** - Based on 4 bytes per parameter (FP32 precision) - Actual usage varies with: - Batch size - Sequence length - Precision (FP16/FP32) - Optimizer states (training) """) if __name__ == "__main__": main()