Spaces:

Ruurd
/

tini

Running on Zero

Ruurd commited on 24 days ago

Commit

a5ca1bf

1 Parent(s): f7efac8

input_size?

Files changed (1) hide show

llama_diffusion_model.py CHANGED Viewed

@@ -213,12 +213,11 @@ class CustomTransformerModel(PreTrainedModel):
         self.llama = get_peft_model(self.llama, lora_config)
         self.llama.print_trainable_parameters()  # Print number of trainable parameters
         self.llama = self.llama.to(torch.float16)
-        self.input_size = 256
     def forward(self, input_ids, labels=None, **kwargs):
         batch_size, seq_length = input_ids.shape
-        assert seq_length == self.input_size, f"Expected input length input_size, got {seq_length}"
         with autocast("cuda", dtype=torch.float16):  # ✅ Correct future-proof usage
@@ -233,7 +232,7 @@ class CustomTransformerModel(PreTrainedModel):
             loss = None
         if labels is not None:
-            assert labels.shape == (batch_size, self.input_size), f"Labels shape mismatch: expected (batch, input_size), got {labels.shape}"
             # Compute loss
             loss_fct = torch.nn.CrossEntropyLoss()

         self.llama = get_peft_model(self.llama, lora_config)
         self.llama.print_trainable_parameters()  # Print number of trainable parameters
         self.llama = self.llama.to(torch.float16)
     def forward(self, input_ids, labels=None, **kwargs):
         batch_size, seq_length = input_ids.shape
+        assert seq_length == 256, f"Expected input length input_size, got {seq_length}"
         with autocast("cuda", dtype=torch.float16):  # ✅ Correct future-proof usage
             loss = None
         if labels is not None:
+            assert labels.shape == (batch_size, 256), f"Labels shape mismatch: expected (batch, input_size), got {labels.shape}"
             # Compute loss
             loss_fct = torch.nn.CrossEntropyLoss()