rasbt
/

llama-3.2-from-scratch

rasbt commited on 28 days ago

Commit

e436a25

verified ·

1 Parent(s): b984f18

Upload folder using huggingface_hub

Files changed (3) hide show

README.md CHANGED Viewed

@@ -116,6 +116,7 @@ import urllib.request
 url = f"https://huggingface.co/rasbt/llama-3.2-from-scratch/resolve/main/{MODEL_FILE}"
 if not os.path.exists(MODEL_FILE):
     urllib.request.urlretrieve(url, MODEL_FILE)
     print(f"Downloaded to {MODEL_FILE}")
 ```

 url = f"https://huggingface.co/rasbt/llama-3.2-from-scratch/resolve/main/{MODEL_FILE}"
 if not os.path.exists(MODEL_FILE):
+    print(f"Downloading {MODEL_FILE}...")
     urllib.request.urlretrieve(url, MODEL_FILE)
     print(f"Downloaded to {MODEL_FILE}")
 ```

generate_example.py CHANGED Viewed

@@ -40,6 +40,7 @@ TOP_K = 1
 url = f"https://huggingface.co/rasbt/llama-3.2-from-scratch/resolve/main/{MODEL_FILE}"
 if not os.path.exists(MODEL_FILE):
     urllib.request.urlretrieve(url, MODEL_FILE)
     print(f"Downloaded to {MODEL_FILE}")

 url = f"https://huggingface.co/rasbt/llama-3.2-from-scratch/resolve/main/{MODEL_FILE}"
 if not os.path.exists(MODEL_FILE):
+    print(f"Downloading {MODEL_FILE}...")
     urllib.request.urlretrieve(url, MODEL_FILE)
     print(f"Downloaded to {MODEL_FILE}")

model.py CHANGED Viewed

@@ -97,11 +97,8 @@ class TransformerBlock(nn.Module):
         self.att = GroupedQueryAttention(
             d_in=cfg["emb_dim"],
             d_out=cfg["emb_dim"],
-            context_length=cfg["context_length"],
             num_heads=cfg["n_heads"],
             num_kv_groups=cfg["n_kv_groups"],
-            rope_base=cfg["rope_base"],
-            rope_config=cfg["rope_freq"],
             dtype=cfg["dtype"]
         )
         self.ff = FeedForward(cfg)
@@ -140,10 +137,8 @@ class FeedForward(nn.Module):
 class GroupedQueryAttention(nn.Module):
     def __init__(
-            self, d_in, d_out, context_length, num_heads,
             num_kv_groups,
-            rope_base=10_000,
-            rope_config=None,
             dtype=None
     ):
         super().__init__()
@@ -306,14 +301,14 @@ def generate(model, idx, max_new_tokens, context_size, temperature=0.0, top_k=No
             logits = model(idx_cond)
         logits = logits[:, -1, :]
-        # New: Filter logits with top_k sampling
         if top_k is not None:
             # Keep only top_k values
             top_logits, _ = torch.topk(logits, top_k)
             min_val = top_logits[:, -1]
             logits = torch.where(logits < min_val, torch.tensor(float('-inf')).to(logits.device), logits)
-        # New: Apply temperature scaling
         if temperature > 0.0:
             logits = logits / temperature

         self.att = GroupedQueryAttention(
             d_in=cfg["emb_dim"],
             d_out=cfg["emb_dim"],
             num_heads=cfg["n_heads"],
             num_kv_groups=cfg["n_kv_groups"],
             dtype=cfg["dtype"]
         )
         self.ff = FeedForward(cfg)
 class GroupedQueryAttention(nn.Module):
     def __init__(
+            self, d_in, d_out, num_heads,
             num_kv_groups,
             dtype=None
     ):
         super().__init__()
             logits = model(idx_cond)
         logits = logits[:, -1, :]
+        # Filter logits with top_k sampling
         if top_k is not None:
             # Keep only top_k values
             top_logits, _ = torch.topk(logits, top_k)
             min_val = top_logits[:, -1]
             logits = torch.where(logits < min_val, torch.tensor(float('-inf')).to(logits.device), logits)
+        # Apply temperature scaling
         if temperature > 0.0:
             logits = logits / temperature