iisadia commited on
Commit
db0eecf
·
verified ·
1 Parent(s): cf92986

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -14
app.py CHANGED
@@ -2,6 +2,9 @@ import streamlit as st
2
  import matplotlib.pyplot as plt
3
  import pandas as pd
4
  import torch
 
 
 
5
  from transformers import AutoConfig, AutoTokenizer
6
 
7
  # Page configuration
@@ -46,18 +49,38 @@ st.markdown("""
46
  </style>
47
  """, unsafe_allow_html=True)
48
 
49
- # Model database
50
  MODELS = {
51
- "BERT": {"model_name": "bert-base-uncased", "type": "Encoder", "layers": 12, "heads": 12, "params": 109.48},
52
- "GPT-2": {"model_name": "gpt2", "type": "Decoder", "layers": 12, "heads": 12, "params": 117},
53
- "T5-Small": {"model_name": "t5-small", "type": "Seq2Seq", "layers": 6, "heads": 8, "params": 60},
54
- "RoBERTa": {"model_name": "roberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 125},
55
- "DistilBERT": {"model_name": "distilbert-base-uncased", "type": "Encoder", "layers": 6, "heads": 12, "params": 66},
56
- "ALBERT": {"model_name": "albert-base-v2", "type": "Encoder", "layers": 12, "heads": 12, "params": 11.8},
57
- "ELECTRA": {"model_name": "google/electra-small-discriminator", "type": "Encoder", "layers": 12, "heads": 12, "params": 13.5},
58
- "XLNet": {"model_name": "xlnet-base-cased", "type": "AutoRegressive", "layers": 12, "heads": 12, "params": 110},
59
- "BART": {"model_name": "facebook/bart-base", "type": "Seq2Seq", "layers": 6, "heads": 16, "params": 139},
60
- "DeBERTa": {"model_name": "microsoft/deberta-base", "type": "Encoder", "layers": 12, "heads": 12, "params": 139}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  }
62
 
63
  def get_model_config(model_name):
@@ -155,6 +178,87 @@ def visualize_attention_patterns():
155
  fig.patch.set_facecolor('#2c2c2c')
156
  st.pyplot(fig)
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  def main():
159
  st.title("🧠 Transformer Model Visualizer")
160
 
@@ -173,7 +277,11 @@ def main():
173
  with col4:
174
  st.metric("Parameters", f"{model_info['params']}M")
175
 
176
- tab1, tab2, tab3, tab4 = st.tabs(["Model Structure", "Comparison", "Model Attention", "Tokenization"])
 
 
 
 
177
 
178
  with tab1:
179
  st.subheader("Architecture Diagram")
@@ -202,11 +310,9 @@ def main():
202
 
203
  with tab4:
204
  st.subheader("📝 Tokenization Visualization")
205
-
206
  input_text = st.text_input("Enter Text:", "Hello, how are you?")
207
 
208
  col1, col2 = st.columns(2)
209
-
210
  with col1:
211
  st.markdown("**Tokenized Output**")
212
  tokens = tokenizer.tokenize(input_text)
@@ -239,6 +345,16 @@ def main():
239
  - Padding token: `{tokenizer.pad_token}`
240
  - Max length: `{tokenizer.model_max_length}`
241
  """)
 
 
 
 
 
 
 
 
 
 
242
 
243
  if __name__ == "__main__":
244
  main()
 
2
  import matplotlib.pyplot as plt
3
  import pandas as pd
4
  import torch
5
+ import plotly.express as px
6
+ from sklearn.decomposition import PCA
7
+ from sklearn.manifold import TSNE
8
  from transformers import AutoConfig, AutoTokenizer
9
 
10
  # Page configuration
 
49
  </style>
50
  """, unsafe_allow_html=True)
51
 
52
+ # Enhanced Model database
53
  MODELS = {
54
+ "BERT": {"model_name": "bert-base-uncased", "type": "Encoder", "layers": 12, "heads": 12,
55
+ "params": 109.48, "downloads": "10M+", "release_year": 2018, "gpu_req": "4GB+",
56
+ "cpu_req": "4 cores+", "ram_req": "8GB+"},
57
+ "GPT-2": {"model_name": "gpt2", "type": "Decoder", "layers": 12, "heads": 12,
58
+ "params": 117, "downloads": "8M+", "release_year": 2019, "gpu_req": "6GB+",
59
+ "cpu_req": "4 cores+", "ram_req": "12GB+"},
60
+ "T5-Small": {"model_name": "t5-small", "type": "Seq2Seq", "layers": 6, "heads": 8,
61
+ "params": 60, "downloads": "5M+", "release_year": 2019, "gpu_req": "3GB+",
62
+ "cpu_req": "2 cores+", "ram_req": "6GB+"},
63
+ "RoBERTa": {"model_name": "roberta-base", "type": "Encoder", "layers": 12, "heads": 12,
64
+ "params": 125, "downloads": "7M+", "release_year": 2019, "gpu_req": "5GB+",
65
+ "cpu_req": "4 cores+", "ram_req": "10GB+"},
66
+ "DistilBERT": {"model_name": "distilbert-base-uncased", "type": "Encoder", "layers": 6,
67
+ "heads": 12, "params": 66, "downloads": "9M+", "release_year": 2019,
68
+ "gpu_req": "2GB+", "cpu_req": "2 cores+", "ram_req": "4GB+"},
69
+ "ALBERT": {"model_name": "albert-base-v2", "type": "Encoder", "layers": 12, "heads": 12,
70
+ "params": 11.8, "downloads": "3M+", "release_year": 2019, "gpu_req": "1GB+",
71
+ "cpu_req": "1 core+", "ram_req": "2GB+"},
72
+ "ELECTRA": {"model_name": "google/electra-small-discriminator", "type": "Encoder",
73
+ "layers": 12, "heads": 12, "params": 13.5, "downloads": "2M+",
74
+ "release_year": 2020, "gpu_req": "2GB+", "cpu_req": "2 cores+", "ram_req": "4GB+"},
75
+ "XLNet": {"model_name": "xlnet-base-cased", "type": "AutoRegressive", "layers": 12,
76
+ "heads": 12, "params": 110, "downloads": "4M+", "release_year": 2019,
77
+ "gpu_req": "5GB+", "cpu_req": "4 cores+", "ram_req": "8GB+"},
78
+ "BART": {"model_name": "facebook/bart-base", "type": "Seq2Seq", "layers": 6, "heads": 16,
79
+ "params": 139, "downloads": "6M+", "release_year": 2020, "gpu_req": "6GB+",
80
+ "cpu_req": "4 cores+", "ram_req": "12GB+"},
81
+ "DeBERTa": {"model_name": "microsoft/deberta-base", "type": "Encoder", "layers": 12,
82
+ "heads": 12, "params": 139, "downloads": "3M+", "release_year": 2021,
83
+ "gpu_req": "8GB+", "cpu_req": "6 cores+", "ram_req": "16GB+"}
84
  }
85
 
86
  def get_model_config(model_name):
 
178
  fig.patch.set_facecolor('#2c2c2c')
179
  st.pyplot(fig)
180
 
181
+ def embedding_projector():
182
+ st.subheader("🔍 Embedding Projector")
183
+
184
+ # Sample words for visualization
185
+ words = ["king", "queen", "man", "woman", "computer", "algorithm",
186
+ "neural", "network", "language", "processing"]
187
+
188
+ # Create dummy embeddings (3D for visualization)
189
+ embeddings = torch.randn(len(words), 256)
190
+
191
+ # Dimensionality reduction
192
+ method = st.selectbox("Reduction Method", ["PCA", "t-SNE"])
193
+
194
+ if method == "PCA":
195
+ reduced = PCA(n_components=3).fit_transform(embeddings)
196
+ else:
197
+ reduced = TSNE(n_components=3).fit_transform(embeddings.numpy())
198
+
199
+ # Create interactive 3D plot
200
+ fig = px.scatter_3d(
201
+ x=reduced[:,0], y=reduced[:,1], z=reduced[:,2],
202
+ text=words,
203
+ title=f"Word Embeddings ({method})"
204
+ )
205
+ fig.update_traces(marker=dict(size=5), textposition='top center')
206
+ st.plotly_chart(fig, use_container_width=True)
207
+
208
+ def hardware_recommendations(model_info):
209
+ st.subheader("💻 Hardware Recommendations")
210
+
211
+ col1, col2, col3 = st.columns(3)
212
+ with col1:
213
+ st.metric("Minimum GPU", model_info.get("gpu_req", "4GB+"))
214
+ with col2:
215
+ st.metric("CPU Recommendation", model_info.get("cpu_req", "4 cores+"))
216
+ with col3:
217
+ st.metric("RAM Requirement", model_info.get("ram_req", "8GB+"))
218
+
219
+ st.markdown("""
220
+ **Cloud Recommendations:**
221
+ - AWS: g4dn.xlarge instance
222
+ - GCP: n1-standard-4 with T4 GPU
223
+ - Azure: Standard_NC4as_T4_v3
224
+ """)
225
+
226
+ def model_zoo_statistics():
227
+ st.subheader("📊 Model Zoo Statistics")
228
+
229
+ df = pd.DataFrame.from_dict(MODELS, orient='index')
230
+ st.dataframe(
231
+ df[["release_year", "downloads", "params"]],
232
+ column_config={
233
+ "release_year": "Release Year",
234
+ "downloads": "Downloads",
235
+ "params": "Params (M)"
236
+ },
237
+ use_container_width=True,
238
+ height=400
239
+ )
240
+
241
+ fig = px.bar(df, x=df.index, y="params", title="Model Parameters Comparison")
242
+ st.plotly_chart(fig, use_container_width=True)
243
+
244
+ def memory_usage_estimator(model_info):
245
+ st.subheader("🧮 Memory Usage Estimator")
246
+
247
+ precision = st.selectbox("Precision", ["FP32", "FP16", "INT8"])
248
+ batch_size = st.slider("Batch size", 1, 128, 8)
249
+
250
+ # Memory calculation
251
+ bytes_map = {"FP32": 4, "FP16": 2, "INT8": 1}
252
+ estimated_memory = (model_info["params"] * 1e6 * bytes_map[precision] * batch_size) / (1024**3)
253
+
254
+ col1, col2 = st.columns(2)
255
+ with col1:
256
+ st.metric("Estimated VRAM", f"{estimated_memory:.1f} GB")
257
+ with col2:
258
+ st.metric("Recommended GPU", "RTX 3090" if estimated_memory > 24 else "RTX 3060")
259
+
260
+ st.progress(min(estimated_memory/40, 1.0), text="GPU Memory Utilization (of 40GB GPU)")
261
+
262
  def main():
263
  st.title("🧠 Transformer Model Visualizer")
264
 
 
277
  with col4:
278
  st.metric("Parameters", f"{model_info['params']}M")
279
 
280
+ # Updated tabs with all 7 sections
281
+ tab1, tab2, tab3, tab4, tab5, tab6, tab7 = st.tabs([
282
+ "Model Structure", "Comparison", "Model Attention",
283
+ "Tokenization", "Embeddings", "Hardware", "Stats & Memory"
284
+ ])
285
 
286
  with tab1:
287
  st.subheader("Architecture Diagram")
 
310
 
311
  with tab4:
312
  st.subheader("📝 Tokenization Visualization")
 
313
  input_text = st.text_input("Enter Text:", "Hello, how are you?")
314
 
315
  col1, col2 = st.columns(2)
 
316
  with col1:
317
  st.markdown("**Tokenized Output**")
318
  tokens = tokenizer.tokenize(input_text)
 
345
  - Padding token: `{tokenizer.pad_token}`
346
  - Max length: `{tokenizer.model_max_length}`
347
  """)
348
+
349
+ with tab5:
350
+ embedding_projector()
351
+
352
+ with tab6:
353
+ hardware_recommendations(model_info)
354
+
355
+ with tab7:
356
+ model_zoo_statistics()
357
+ memory_usage_estimator(model_info)
358
 
359
  if __name__ == "__main__":
360
  main()