Spaces:
Sleeping
Sleeping
import streamlit as st | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import torch | |
from transformers import AutoConfig, AutoTokenizer | |
# Page configuration | |
st.set_page_config( | |
page_title="Transformer Visualizer", | |
page_icon="π§ ", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Custom CSS styling | |
st.markdown(""" | |
<style> | |
.reportview-container { | |
background: linear-gradient(45deg, #1a1a1a, #4a4a4a); | |
} | |
.sidebar .sidebar-content { | |
background: #2c2c2c !important; | |
} | |
h1, h2, h3, h4, h5, h6 { | |
color: #00ff00 !important; | |
} | |
.stMetric { | |
background-color: #333333; | |
border-radius: 10px; | |
padding: 15px; | |
} | |
.architecture { | |
font-family: monospace; | |
color: #00ff00; | |
white-space: pre-wrap; | |
background-color: #1a1a1a; | |
padding: 20px; | |
border-radius: 10px; | |
border: 1px solid #00ff00; | |
} | |
.token-table { | |
margin-top: 20px; | |
border: 1px solid #00ff00; | |
border-radius: 5px; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Model database | |
MODELS = { | |
"BERT": {"model_name": "bert-base-uncased", "type": "Encoder", "layers": 12, "heads": 12, "params": 109.48}, | |
"GPT-2": {"model_name": "gpt2", "type": "Decoder", "layers": 12, "heads": 12, "params": 117}, | |
"T5-Small": {"model_name": "t5-small", "type": "Seq2Seq", "layers": 6, "heads": 8, "params": 60}, | |
"RoBERTa": {"model_name": "roberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 125}, | |
"DistilBERT": {"model_name": "distilbert-base-uncased", "type": "Encoder", "layers": 6, "heads": 12, "params": 66}, | |
"ALBERT": {"model_name": "albert-base-v2", "type": "Encoder", "layers": 12, "heads": 12, "params": 11.8}, | |
"ELECTRA": {"model_name": "google/electra-small-discriminator", "type": "Encoder", "layers": 12, "heads": 12, "params": 13.5}, | |
"XLNet": {"model_name": "xlnet-base-cased", "type": "AutoRegressive", "layers": 12, "heads": 12, "params": 110}, | |
"BART": {"model_name": "facebook/bart-base", "type": "Seq2Seq", "layers": 6, "heads": 16, "params": 139}, | |
"DeBERTa": {"model_name": "microsoft/deberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 139} | |
} | |
def get_model_config(model_name): | |
config = AutoConfig.from_pretrained(MODELS[model_name]["model_name"]) | |
return config | |
def plot_model_comparison(selected_model): | |
model_names = list(MODELS.keys()) | |
params = [m["params"] for m in MODELS.values()] | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
bars = ax.bar(model_names, params) | |
index = list(MODELS.keys()).index(selected_model) | |
bars[index].set_color('#00ff00') | |
ax.set_ylabel('Parameters (Millions)', color='white') | |
ax.set_title('Model Size Comparison', color='white') | |
ax.tick_params(axis='x', rotation=45, colors='white') | |
ax.tick_params(axis='y', colors='white') | |
ax.set_facecolor('#2c2c2c') | |
fig.patch.set_facecolor('#2c2c2c') | |
st.pyplot(fig) | |
def visualize_architecture(model_info): | |
architecture = [] | |
model_type = model_info["type"] | |
layers = model_info.get("layers", model_info.get("layers", 12)) # Handle key variations | |
heads = model_info["heads"] | |
architecture.append("Input") | |
architecture.append("β") | |
architecture.append("βΌ") | |
if model_type == "Encoder": | |
architecture.append("[Embedding Layer]") | |
for i in range(layers): | |
architecture.extend([ | |
f"Encoder Layer {i+1}", | |
"ββ Multi-Head Attention", | |
f"β ββ {heads} Heads", | |
"ββ Layer Normalization", | |
"ββ Feed Forward Network", | |
"β", | |
"βΌ" | |
]) | |
architecture.append("[Output]") | |
elif model_type == "Decoder": | |
architecture.append("[Embedding Layer]") | |
for i in range(layers): | |
architecture.extend([ | |
f"Decoder Layer {i+1}", | |
"ββ Masked Multi-Head Attention", | |
f"β ββ {heads} Heads", | |
"ββ Layer Normalization", | |
"ββ Feed Forward Network", | |
"β", | |
"βΌ" | |
]) | |
architecture.append("[Output]") | |
elif model_type == "Seq2Seq": | |
architecture.append("Encoder Stack") | |
for i in range(layers): | |
architecture.extend([ | |
f"Encoder Layer {i+1}", | |
"ββ Self-Attention", | |
"ββ Feed Forward Network", | |
"β", | |
"βΌ" | |
]) | |
architecture.append("βββ [Context] βββ") | |
architecture.append("Decoder Stack") | |
for i in range(layers): | |
architecture.extend([ | |
f"Decoder Layer {i+1}", | |
"ββ Masked Self-Attention", | |
"ββ Encoder-Decoder Attention", | |
"ββ Feed Forward Network", | |
"β", | |
"βΌ" | |
]) | |
architecture.append("[Output]") | |
return "\n".join(architecture) | |
def visualize_attention_patterns(): | |
fig, ax = plt.subplots(figsize=(8, 6)) | |
data = torch.randn(5, 5) | |
ax.imshow(data, cmap='viridis') | |
ax.set_title('Attention Patterns Example', color='white') | |
ax.set_facecolor('#2c2c2c') | |
fig.patch.set_facecolor('#2c2c2c') | |
st.pyplot(fig) | |
def get_hardware_recommendation(params): | |
if params < 100: | |
return "CPU or Entry-level GPU (e.g., GTX 1060)" | |
elif 100 <= params < 200: | |
return "Mid-range GPU (e.g., RTX 2080, RTX 3060)" | |
else: | |
return "High-end GPU (e.g., RTX 3090, A100) or TPU" | |
def main(): | |
st.title("π§ Transformer Model Visualizer") | |
selected_model = st.sidebar.selectbox("Select Model", list(MODELS.keys())) | |
model_info = MODELS[selected_model] | |
config = get_model_config(selected_model) | |
tokenizer = AutoTokenizer.from_pretrained(model_info["model_name"]) | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
st.metric("Model Type", model_info["type"]) | |
with col2: | |
st.metric("Layers", model_info.get("layers", model_info.get("layers", "N/A"))) | |
with col3: | |
st.metric("Attention Heads", model_info["heads"]) | |
with col4: | |
st.metric("Parameters", f"{model_info['params']}M") | |
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([ | |
"Model Structure", "Comparison", "Model Attention", | |
"Tokenization", "Hardware", "Memory" | |
]) | |
with tab1: | |
st.subheader("Architecture Diagram") | |
architecture = visualize_architecture(model_info) | |
st.markdown(f"<div class='architecture'>{architecture}</div>", unsafe_allow_html=True) | |
st.markdown(""" | |
**Legend:** | |
- **Multi-Head Attention**: Self-attention mechanism with multiple parallel heads | |
- **Layer Normalization**: Normalization operation between layers | |
- **Feed Forward Network**: Position-wise fully connected network | |
- **Masked Attention**: Attention with future token masking | |
""") | |
with tab2: | |
st.subheader("Model Size Comparison") | |
plot_model_comparison(selected_model) | |
with tab3: | |
st.subheader("Model-specific Visualizations") | |
visualize_attention_patterns() | |
with tab4: | |
st.subheader("π Tokenization Visualization") | |
input_text = st.text_input("Enter Text:", "Hello, how are you?") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown("**Tokenized Output**") | |
tokens = tokenizer.tokenize(input_text) | |
st.write(tokens) | |
with col2: | |
st.markdown("**Token IDs**") | |
encoded_ids = tokenizer.encode(input_text) | |
st.write(encoded_ids) | |
st.markdown("**Token-ID Mapping**") | |
token_data = pd.DataFrame({ | |
"Token": tokens, | |
"ID": encoded_ids[1:-1] if tokenizer.cls_token else encoded_ids | |
}) | |
st.dataframe(token_data, height=150, use_container_width=True) | |
st.markdown(f""" | |
**Tokenizer Info:** | |
- Vocabulary size: `{tokenizer.vocab_size}` | |
- Special tokens: `{tokenizer.all_special_tokens}` | |
- Padding token: `{tokenizer.pad_token}` | |
- Max length: `{tokenizer.model_max_length}` | |
""") | |
with tab5: | |
st.subheader("π₯οΈ Hardware Recommendation") | |
params = model_info["params"] | |
recommendation = get_hardware_recommendation(params) | |
st.markdown(f"**Recommended hardware for {selected_model}:**") | |
st.info(recommendation) | |
st.markdown(""" | |
**Recommendation Criteria:** | |
- <100M parameters: Suitable for CPU or entry-level GPUs | |
- 100-200M parameters: Requires mid-range GPUs | |
- >200M parameters: Needs high-end GPUs/TPUs | |
""") | |
with tab6: | |
st.subheader("πΎ Memory Usage Estimation") | |
params = model_info["params"] | |
memory_mb = params * 4 # 1M params β 4MB in FP32 | |
memory_gb = memory_mb / 1024 | |
st.metric("Estimated Memory (FP32)", | |
f"{memory_mb:.1f} MB / {memory_gb:.2f} GB") | |
st.markdown(""" | |
**Memory Notes:** | |
- Based on 4 bytes per parameter (FP32 precision) | |
- Actual usage varies with: | |
- Batch size | |
- Sequence length | |
- Precision (FP16/FP32) | |
- Optimizer states (training) | |
""") | |
if __name__ == "__main__": | |
main() |