Spaces:
Sleeping
Sleeping
import streamlit as st | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import torch | |
import plotly.express as px | |
from sklearn.decomposition import PCA | |
from sklearn.manifold import TSNE | |
from transformers import AutoConfig, AutoTokenizer | |
# Page configuration | |
st.set_page_config( | |
page_title="Transformer Visualizer", | |
page_icon="๐ง ", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Custom CSS styling | |
st.markdown(""" | |
<style> | |
.reportview-container { | |
background: linear-gradient(45deg, #1a1a1a, #4a4a4a); | |
} | |
.sidebar .sidebar-content { | |
background: #2c2c2c !important; | |
} | |
h1, h2, h3, h4, h5, h6 { | |
color: #00ff00 !important; | |
} | |
.stMetric { | |
background-color: #333333; | |
border-radius: 10px; | |
padding: 15px; | |
} | |
.architecture { | |
font-family: monospace; | |
color: #00ff00; | |
white-space: pre-wrap; | |
background-color: #1a1a1a; | |
padding: 20px; | |
border-radius: 10px; | |
border: 1px solid #00ff00; | |
} | |
.token-table { | |
margin-top: 20px; | |
border: 1px solid #00ff00; | |
border-radius: 5px; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Enhanced Model database | |
MODELS = { | |
"BERT": {"model_name": "bert-base-uncased", "type": "Encoder", "layers": 12, "heads": 12, | |
"params": 109.48, "downloads": "10M+", "release_year": 2018, "gpu_req": "4GB+", | |
"cpu_req": "4 cores+", "ram_req": "8GB+"}, | |
"GPT-2": {"model_name": "gpt2", "type": "Decoder", "layers": 12, "heads": 12, | |
"params": 117, "downloads": "8M+", "release_year": 2019, "gpu_req": "6GB+", | |
"cpu_req": "4 cores+", "ram_req": "12GB+"}, | |
"T5-Small": {"model_name": "t5-small", "type": "Seq2Seq", "layers": 6, "heads": 8, | |
"params": 60, "downloads": "5M+", "release_year": 2019, "gpu_req": "3GB+", | |
"cpu_req": "2 cores+", "ram_req": "6GB+"}, | |
"RoBERTa": {"model_name": "roberta-base", "type": "Encoder", "layers": 12, "heads": 12, | |
"params": 125, "downloads": "7M+", "release_year": 2019, "gpu_req": "5GB+", | |
"cpu_req": "4 cores+", "ram_req": "10GB+"}, | |
"DistilBERT": {"model_name": "distilbert-base-uncased", "type": "Encoder", "layers": 6, | |
"heads": 12, "params": 66, "downloads": "9M+", "release_year": 2019, | |
"gpu_req": "2GB+", "cpu_req": "2 cores+", "ram_req": "4GB+"}, | |
"ALBERT": {"model_name": "albert-base-v2", "type": "Encoder", "layers": 12, "heads": 12, | |
"params": 11.8, "downloads": "3M+", "release_year": 2019, "gpu_req": "1GB+", | |
"cpu_req": "1 core+", "ram_req": "2GB+"}, | |
"ELECTRA": {"model_name": "google/electra-small-discriminator", "type": "Encoder", | |
"layers": 12, "heads": 12, "params": 13.5, "downloads": "2M+", | |
"release_year": 2020, "gpu_req": "2GB+", "cpu_req": "2 cores+", "ram_req": "4GB+"}, | |
"XLNet": {"model_name": "xlnet-base-cased", "type": "AutoRegressive", "layers": 12, | |
"heads": 12, "params": 110, "downloads": "4M+", "release_year": 2019, | |
"gpu_req": "5GB+", "cpu_req": "4 cores+", "ram_req": "8GB+"}, | |
"BART": {"model_name": "facebook/bart-base", "type": "Seq2Seq", "layers": 6, "heads": 16, | |
"params": 139, "downloads": "6M+", "release_year": 2020, "gpu_req": "6GB+", | |
"cpu_req": "4 cores+", "ram_req": "12GB+"}, | |
"DeBERTa": {"model_name": "microsoft/deberta-base", "type": "Encoder", "layers": 12, | |
"heads": 12, "params": 139, "downloads": "3M+", "release_year": 2021, | |
"gpu_req": "8GB+", "cpu_req": "6 cores+", "ram_req": "16GB+"} | |
} | |
def get_model_config(model_name): | |
config = AutoConfig.from_pretrained(MODELS[model_name]["model_name"]) | |
return config | |
def plot_model_comparison(selected_model): | |
model_names = list(MODELS.keys()) | |
params = [m["params"] for m in MODELS.values()] | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
bars = ax.bar(model_names, params) | |
index = list(MODELS.keys()).index(selected_model) | |
bars[index].set_color('#00ff00') | |
ax.set_ylabel('Parameters (Millions)', color='white') | |
ax.set_title('Model Size Comparison', color='white') | |
ax.tick_params(axis='x', rotation=45, colors='white') | |
ax.tick_params(axis='y', colors='white') | |
ax.set_facecolor('#2c2c2c') | |
fig.patch.set_facecolor('#2c2c2c') | |
st.pyplot(fig) | |
def visualize_architecture(model_info): | |
architecture = [] | |
model_type = model_info["type"] | |
layers = model_info["layers"] | |
heads = model_info["heads"] | |
architecture.append("Input") | |
architecture.append("โ") | |
architecture.append("โผ") | |
if model_type == "Encoder": | |
architecture.append("[Embedding Layer]") | |
for i in range(layers): | |
architecture.extend([ | |
f"Encoder Layer {i+1}", | |
"โโ Multi-Head Attention", | |
f"โ โโ {heads} Heads", | |
"โโ Layer Normalization", | |
"โโ Feed Forward Network", | |
"โ", | |
"โผ" | |
]) | |
architecture.append("[Output]") | |
elif model_type == "Decoder": | |
architecture.append("[Embedding Layer]") | |
for i in range(layers): | |
architecture.extend([ | |
f"Decoder Layer {i+1}", | |
"โโ Masked Multi-Head Attention", | |
f"โ โโ {heads} Heads", | |
"โโ Layer Normalization", | |
"โโ Feed Forward Network", | |
"โ", | |
"โผ" | |
]) | |
architecture.append("[Output]") | |
elif model_type == "Seq2Seq": | |
architecture.append("Encoder Stack") | |
for i in range(layers): | |
architecture.extend([ | |
f"Encoder Layer {i+1}", | |
"โโ Self-Attention", | |
"โโ Feed Forward Network", | |
"โ", | |
"โผ" | |
]) | |
architecture.append("โโโ [Context] โโโ") | |
architecture.append("Decoder Stack") | |
for i in range(layers): | |
architecture.extend([ | |
f"Decoder Layer {i+1}", | |
"โโ Masked Self-Attention", | |
"โโ Encoder-Decoder Attention", | |
"โโ Feed Forward Network", | |
"โ", | |
"โผ" | |
]) | |
architecture.append("[Output]") | |
return "\n".join(architecture) | |
def visualize_attention_patterns(): | |
fig, ax = plt.subplots(figsize=(8, 6)) | |
data = torch.randn(5, 5) | |
ax.imshow(data, cmap='viridis') | |
ax.set_title('Attention Patterns Example', color='white') | |
ax.set_facecolor('#2c2c2c') | |
fig.patch.set_facecolor('#2c2c2c') | |
st.pyplot(fig) | |
def embedding_projector(): | |
st.subheader("๐ Embedding Projector") | |
# Sample words for visualization | |
words = ["king", "queen", "man", "woman", "computer", "algorithm", | |
"neural", "network", "language", "processing"] | |
# Create dummy embeddings (3D for visualization) | |
embeddings = torch.randn(len(words), 256) | |
# Dimensionality reduction | |
method = st.selectbox("Reduction Method", ["PCA", "t-SNE"]) | |
if method == "PCA": | |
reduced = PCA(n_components=3).fit_transform(embeddings) | |
else: | |
reduced = TSNE(n_components=3).fit_transform(embeddings.numpy()) | |
# Create interactive 3D plot | |
fig = px.scatter_3d( | |
x=reduced[:,0], y=reduced[:,1], z=reduced[:,2], | |
text=words, | |
title=f"Word Embeddings ({method})" | |
) | |
fig.update_traces(marker=dict(size=5), textposition='top center') | |
st.plotly_chart(fig, use_container_width=True) | |
def hardware_recommendations(model_info): | |
st.subheader("๐ป Hardware Recommendations") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Minimum GPU", model_info.get("gpu_req", "4GB+")) | |
with col2: | |
st.metric("CPU Recommendation", model_info.get("cpu_req", "4 cores+")) | |
with col3: | |
st.metric("RAM Requirement", model_info.get("ram_req", "8GB+")) | |
st.markdown(""" | |
**Cloud Recommendations:** | |
- AWS: g4dn.xlarge instance | |
- GCP: n1-standard-4 with T4 GPU | |
- Azure: Standard_NC4as_T4_v3 | |
""") | |
def model_zoo_statistics(): | |
st.subheader("๐ Model Zoo Statistics") | |
df = pd.DataFrame.from_dict(MODELS, orient='index') | |
st.dataframe( | |
df[["release_year", "downloads", "params"]], | |
column_config={ | |
"release_year": "Release Year", | |
"downloads": "Downloads", | |
"params": "Params (M)" | |
}, | |
use_container_width=True, | |
height=400 | |
) | |
fig = px.bar(df, x=df.index, y="params", title="Model Parameters Comparison") | |
st.plotly_chart(fig, use_container_width=True) | |
def memory_usage_estimator(model_info): | |
st.subheader("๐งฎ Memory Usage Estimator") | |
precision = st.selectbox("Precision", ["FP32", "FP16", "INT8"]) | |
batch_size = st.slider("Batch size", 1, 128, 8) | |
# Memory calculation | |
bytes_map = {"FP32": 4, "FP16": 2, "INT8": 1} | |
estimated_memory = (model_info["params"] * 1e6 * bytes_map[precision] * batch_size) / (1024**3) | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric("Estimated VRAM", f"{estimated_memory:.1f} GB") | |
with col2: | |
st.metric("Recommended GPU", "RTX 3090" if estimated_memory > 24 else "RTX 3060") | |
st.progress(min(estimated_memory/40, 1.0), text="GPU Memory Utilization (of 40GB GPU)") | |
def main(): | |
st.title("๐ง Transformer Model Visualizer") | |
selected_model = st.sidebar.selectbox("Select Model", list(MODELS.keys())) | |
model_info = MODELS[selected_model] | |
config = get_model_config(selected_model) | |
tokenizer = AutoTokenizer.from_pretrained(model_info["model_name"]) | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
st.metric("Model Type", model_info["type"]) | |
with col2: | |
st.metric("Layers", model_info["layers"]) | |
with col3: | |
st.metric("Attention Heads", model_info["heads"]) | |
with col4: | |
st.metric("Parameters", f"{model_info['params']}M") | |
# Updated tabs with all 7 sections | |
tab1, tab2, tab3, tab4, tab5, tab6, tab7 = st.tabs([ | |
"Model Structure", "Comparison", "Model Attention", | |
"Tokenization", "Embeddings", "Hardware", "Stats & Memory" | |
]) | |
with tab1: | |
st.subheader("Architecture Diagram") | |
architecture = visualize_architecture(model_info) | |
st.markdown(f"<div class='architecture'>{architecture}</div>", unsafe_allow_html=True) | |
st.markdown(""" | |
**Legend:** | |
- **Multi-Head Attention**: Self-attention mechanism with multiple parallel heads | |
- **Layer Normalization**: Normalization operation between layers | |
- **Feed Forward Network**: Position-wise fully connected network | |
- **Masked Attention**: Attention with future token masking | |
""") | |
with tab2: | |
st.subheader("Model Size Comparison") | |
plot_model_comparison(selected_model) | |
with tab3: | |
st.subheader("Model-specific Visualizations") | |
visualize_attention_patterns() | |
if selected_model == "BERT": | |
st.write("BERT-specific visualization example") | |
elif selected_model == "GPT-2": | |
st.write("GPT-2 attention mask visualization") | |
with tab4: | |
st.subheader("๐ Tokenization Visualization") | |
input_text = st.text_input("Enter Text:", "Hello, how are you?") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown("**Tokenized Output**") | |
tokens = tokenizer.tokenize(input_text) | |
st.write(tokens) | |
with col2: | |
st.markdown("**Token IDs**") | |
encoded_ids = tokenizer.encode(input_text) | |
st.write(encoded_ids) | |
st.markdown("**Token-ID Mapping**") | |
token_data = pd.DataFrame({ | |
"Token": tokens, | |
"ID": encoded_ids[1:-1] if tokenizer.cls_token else encoded_ids | |
}) | |
st.dataframe( | |
token_data, | |
height=150, | |
use_container_width=True, | |
column_config={ | |
"Token": "Token", | |
"ID": {"header": "ID", "help": "Numerical representation of the token"} | |
} | |
) | |
st.markdown(f""" | |
**Tokenizer Info:** | |
- Vocabulary size: `{tokenizer.vocab_size}` | |
- Special tokens: `{tokenizer.all_special_tokens}` | |
- Padding token: `{tokenizer.pad_token}` | |
- Max length: `{tokenizer.model_max_length}` | |
""") | |
with tab5: | |
embedding_projector() | |
with tab6: | |
hardware_recommendations(model_info) | |
with tab7: | |
model_zoo_statistics() | |
memory_usage_estimator(model_info) | |
if __name__ == "__main__": | |
main() |