import streamlit as st import matplotlib.pyplot as plt import pandas as pd import torch from transformers import AutoConfig, AutoTokenizer # Page configuration st.set_page_config( page_title="Transformer Visualizer", page_icon="🧠", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS styling st.markdown(""" """, unsafe_allow_html=True) # Model database MODELS = { "BERT": {"model_name": "bert-base-uncased", "type": "Encoder", "layers": 12, "heads": 12, "params": 109.48}, "GPT-2": {"model_name": "gpt2", "type": "Decoder", "layers": 12, "heads": 12, "params": 117}, "T5-Small": {"model_name": "t5-small", "type": "Seq2Seq", "layers": 6, "heads": 8, "params": 60}, "RoBERTa": {"model_name": "roberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 125}, "DistilBERT": {"model_name": "distilbert-base-uncased", "type": "Encoder", "layers": 6, "heads": 12, "params": 66}, "ALBERT": {"model_name": "albert-base-v2", "type": "Encoder", "layers": 12, "heads": 12, "params": 11.8}, "ELECTRA": {"model_name": "google/electra-small-discriminator", "type": "Encoder", "layers": 12, "heads": 12, "params": 13.5}, "XLNet": {"model_name": "xlnet-base-cased", "type": "AutoRegressive", "layers": 12, "heads": 12, "params": 110}, "BART": {"model_name": "facebook/bart-base", "type": "Seq2Seq", "layers": 6, "heads": 16, "params": 139}, "DeBERTa": {"model_name": "microsoft/deberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 139} } def get_model_config(model_name): config = AutoConfig.from_pretrained(MODELS[model_name]["model_name"]) return config def plot_model_comparison(selected_model): model_names = list(MODELS.keys()) params = [m["params"] for m in MODELS.values()] fig, ax = plt.subplots(figsize=(10, 6)) bars = ax.bar(model_names, params) index = list(MODELS.keys()).index(selected_model) bars[index].set_color('#00ff00') ax.set_ylabel('Parameters (Millions)', color='white') ax.set_title('Model Size Comparison', color='white') ax.tick_params(axis='x', rotation=45, colors='white') ax.tick_params(axis='y', colors='white') ax.set_facecolor('#2c2c2c') fig.patch.set_facecolor('#2c2c2c') st.pyplot(fig) def visualize_architecture(model_info): architecture = [] model_type = model_info["type"] layers = model_info.get("layers", model_info.get("layers", 12)) # Handle key variations heads = model_info["heads"] architecture.append("Input") architecture.append("│") architecture.append("▼") if model_type == "Encoder": architecture.append("[Embedding Layer]") for i in range(layers): architecture.extend([ f"Encoder Layer {i+1}", "├─ Multi-Head Attention", f"│ └─ {heads} Heads", "├─ Layer Normalization", "└─ Feed Forward Network", "│", "▼" ]) architecture.append("[Output]") elif model_type == "Decoder": architecture.append("[Embedding Layer]") for i in range(layers): architecture.extend([ f"Decoder Layer {i+1}", "├─ Masked Multi-Head Attention", f"│ └─ {heads} Heads", "├─ Layer Normalization", "└─ Feed Forward Network", "│", "▼" ]) architecture.append("[Output]") elif model_type == "Seq2Seq": architecture.append("Encoder Stack") for i in range(layers): architecture.extend([ f"Encoder Layer {i+1}", "├─ Self-Attention", "└─ Feed Forward Network", "│", "▼" ]) architecture.append("→→→ [Context] →→→") architecture.append("Decoder Stack") for i in range(layers): architecture.extend([ f"Decoder Layer {i+1}", "├─ Masked Self-Attention", "├─ Encoder-Decoder Attention", "└─ Feed Forward Network", "│", "▼" ]) architecture.append("[Output]") return "\n".join(architecture) def visualize_attention_patterns(): fig, ax = plt.subplots(figsize=(8, 6)) data = torch.randn(5, 5) ax.imshow(data, cmap='viridis') ax.set_title('Attention Patterns Example', color='white') ax.set_facecolor('#2c2c2c') fig.patch.set_facecolor('#2c2c2c') st.pyplot(fig) def get_hardware_recommendation(params): if params < 100: return "CPU or Entry-level GPU (e.g., GTX 1060)" elif 100 <= params < 200: return "Mid-range GPU (e.g., RTX 2080, RTX 3060)" else: return "High-end GPU (e.g., RTX 3090, A100) or TPU" def main(): st.title("🧠 Transformer Model Visualizer") selected_model = st.sidebar.selectbox("Select Model", list(MODELS.keys())) model_info = MODELS[selected_model] config = get_model_config(selected_model) tokenizer = AutoTokenizer.from_pretrained(model_info["model_name"]) col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Model Type", model_info["type"]) with col2: st.metric("Layers", model_info.get("layers", model_info.get("layers", "N/A"))) with col3: st.metric("Attention Heads", model_info["heads"]) with col4: st.metric("Parameters", f"{model_info['params']}M") tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([ "Model Structure", "Comparison", "Model Attention", "Tokenization", "Hardware", "Memory" ]) with tab1: st.subheader("Architecture Diagram") architecture = visualize_architecture(model_info) st.markdown(f"