Spaces:
Running on Zero

Ruurd commited on
Commit
02f6e21
·
verified ·
1 Parent(s): 034cffe

Use updated settings for initial and clustered noise

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -57,7 +57,7 @@ def get_noising_schedule(i, max_it, sharpness=5.0):
57
  x = i / max_it
58
  return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
59
 
60
- def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clustering=0.5, noise_start = 0.5):
61
  noised = input_ids.copy()
62
  answer_len = len(noised) - answer_start
63
  num_to_noise = int(threshold * answer_len * noise_start)
@@ -89,10 +89,10 @@ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clust
89
 
90
 
91
  # Add new noising function
92
- def confidence_guided_noising(input_ids, answer_start, confidences, threshold, eot_weight, noise_clipping):
93
  noised = input_ids.copy()
94
  answer_len = len(input_ids) - answer_start
95
- num_to_noise = int(threshold * answer_len)
96
 
97
  if num_to_noise == 0:
98
  return noised
@@ -164,8 +164,8 @@ def diffusion_chat(question, eot_weight, max_it, pause_length, sharpness, cluste
164
 
165
  ori_input_tokens = input_ids
166
  current_tokens, just_noised_indices = noisify_answer(
167
- ori_input_tokens, answer_start, threshold=1.0, eot_weight=eot_weight, clustering=clustering
168
- )
169
  last_tokens = []
170
  prev_decoded_tokens = []
171
 
@@ -209,7 +209,7 @@ def diffusion_chat(question, eot_weight, max_it, pause_length, sharpness, cluste
209
  threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
210
  if use_confidence_noising:
211
  noised_answer = confidence_guided_noising(
212
- current_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping
213
  )
214
  just_noised_indices = []
215
  else:
 
57
  x = i / max_it
58
  return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
59
 
60
+ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clustering=0.5, noise_start = 1.0):
61
  noised = input_ids.copy()
62
  answer_len = len(noised) - answer_start
63
  num_to_noise = int(threshold * answer_len * noise_start)
 
89
 
90
 
91
  # Add new noising function
92
+ def confidence_guided_noising(input_ids, answer_start, confidences, noise_clipping, threshold=1.0, eot_weight = 1.0, noise_start = 1.0):
93
  noised = input_ids.copy()
94
  answer_len = len(input_ids) - answer_start
95
+ num_to_noise = int(threshold * answer_len * noise_start)
96
 
97
  if num_to_noise == 0:
98
  return noised
 
164
 
165
  ori_input_tokens = input_ids
166
  current_tokens, just_noised_indices = noisify_answer(
167
+ current_tokens, answer_start, threshold=1.0, eot_weight=eot_weight, clustering=clustering, noise_start = noise_start,
168
+ )
169
  last_tokens = []
170
  prev_decoded_tokens = []
171
 
 
209
  threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
210
  if use_confidence_noising:
211
  noised_answer = confidence_guided_noising(
212
+ current_tokens, answer_start, confidences, noise_clipping, threshold=threshold, eot_weight=eot_weight, noise_start=noise_start
213
  )
214
  just_noised_indices = []
215
  else: