Spaces:
Running on Zero

Ruurd commited on
Commit
ae08b25
·
1 Parent(s): 6034d83

Clip weights further for confidence based noising

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -82,13 +82,13 @@ def confidence_guided_noising(input_ids, answer_start, confidences, threshold, e
82
  if num_to_noise == 0:
83
  return noised
84
 
85
- # Avoid zero-probability weights
86
  raw_weights = 1.0 - np.array(confidences[answer_start:])
87
- raw_weights = np.clip(raw_weights, 1e-6, None) # prevent exact 0s
88
  weights = raw_weights / raw_weights.sum()
89
 
90
  if num_to_noise > len(weights):
91
- num_to_noise = len(weights) # safety: can’t sample more than available
92
 
93
  indices = rng.choice(
94
  np.arange(answer_start, len(input_ids)),
@@ -97,8 +97,10 @@ def confidence_guided_noising(input_ids, answer_start, confidences, threshold, e
97
  p=weights
98
  )
99
 
 
100
  mixed_probs = token_probabilities.copy()
101
  mixed_probs[eot_token_id] *= eot_weight
 
102
  mixed_probs /= mixed_probs.sum()
103
 
104
  noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
@@ -109,6 +111,7 @@ def confidence_guided_noising(input_ids, answer_start, confidences, threshold, e
109
 
110
 
111
 
 
112
  @spaces.GPU
113
  def generate_diffusion_text(input_ids, answer_start):
114
  with torch.no_grad():
 
82
  if num_to_noise == 0:
83
  return noised
84
 
85
+ # Avoid zero-probability weights for selection
86
  raw_weights = 1.0 - np.array(confidences[answer_start:])
87
+ raw_weights = np.clip(raw_weights, 0.01, None) # avoid 0s
88
  weights = raw_weights / raw_weights.sum()
89
 
90
  if num_to_noise > len(weights):
91
+ num_to_noise = len(weights) # prevent oversampling
92
 
93
  indices = rng.choice(
94
  np.arange(answer_start, len(input_ids)),
 
97
  p=weights
98
  )
99
 
100
+ # Avoid zero-probability for token sampling
101
  mixed_probs = token_probabilities.copy()
102
  mixed_probs[eot_token_id] *= eot_weight
103
+ mixed_probs = np.clip(mixed_probs, 1e-5, None) # fix for EOT weight near 0
104
  mixed_probs /= mixed_probs.sum()
105
 
106
  noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
 
111
 
112
 
113
 
114
+
115
  @spaces.GPU
116
  def generate_diffusion_text(input_ids, answer_start):
117
  with torch.no_grad():