Spaces:

Ruurd
/

tini

Running on Zero

Ruurd commited on 23 days ago

Commit

ae08b25

1 Parent(s): 6034d83

Clip weights further for confidence based noising

Files changed (1) hide show

app.py CHANGED Viewed

@@ -82,13 +82,13 @@ def confidence_guided_noising(input_ids, answer_start, confidences, threshold, e
     if num_to_noise == 0:
         return noised
-    # Avoid zero-probability weights
     raw_weights = 1.0 - np.array(confidences[answer_start:])
-    raw_weights = np.clip(raw_weights, 1e-6, None)  # prevent exact 0s
     weights = raw_weights / raw_weights.sum()
     if num_to_noise > len(weights):
-        num_to_noise = len(weights)  # safety: can’t sample more than available
     indices = rng.choice(
         np.arange(answer_start, len(input_ids)),
@@ -97,8 +97,10 @@ def confidence_guided_noising(input_ids, answer_start, confidences, threshold, e
         p=weights
     )
     mixed_probs = token_probabilities.copy()
     mixed_probs[eot_token_id] *= eot_weight
     mixed_probs /= mixed_probs.sum()
     noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
@@ -109,6 +111,7 @@ def confidence_guided_noising(input_ids, answer_start, confidences, threshold, e
 @spaces.GPU
 def generate_diffusion_text(input_ids, answer_start):
     with torch.no_grad():

     if num_to_noise == 0:
         return noised
+    # Avoid zero-probability weights for selection
     raw_weights = 1.0 - np.array(confidences[answer_start:])
+    raw_weights = np.clip(raw_weights, 0.01, None)  # avoid 0s
     weights = raw_weights / raw_weights.sum()
     if num_to_noise > len(weights):
+        num_to_noise = len(weights)  # prevent oversampling
     indices = rng.choice(
         np.arange(answer_start, len(input_ids)),
         p=weights
     )
+    # Avoid zero-probability for token sampling
     mixed_probs = token_probabilities.copy()
     mixed_probs[eot_token_id] *= eot_weight
+    mixed_probs = np.clip(mixed_probs, 1e-5, None)  # fix for EOT weight near 0
     mixed_probs /= mixed_probs.sum()
     noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
 @spaces.GPU
 def generate_diffusion_text(input_ids, answer_start):
     with torch.no_grad():