Spaces:

Ruurd
/

tini

Running on Zero

Ruurd commited on 16 days ago

Commit

8851563

verified ·

1 Parent(s): 238c8f8

Make attention mask float

Files changed (1) hide show

llama_diffusion_model.py CHANGED Viewed

@@ -133,6 +133,8 @@ class CustomTransformerModel(PreTrainedModel):
             raise ValueError(f"Unknown masking type: {self.config.masking_type}")
         attention_mask = base_mask.unsqueeze(0).unsqueeze(1).expand(batch_size, 1, seq_len, seq_len).clone()
         with autocast("cuda", dtype=torch.float16):
             outputs = self.llama(

             raise ValueError(f"Unknown masking type: {self.config.masking_type}")
         attention_mask = base_mask.unsqueeze(0).unsqueeze(1).expand(batch_size, 1, seq_len, seq_len).clone()
+        attention_mask = attention_mask.to(dtype=torch.float32)  # required for SDPA and Flash attention
         with autocast("cuda", dtype=torch.float16):
             outputs = self.llama(