Dmitrii commited on
Commit
7e427fb
·
1 Parent(s): f0427f1

extra tokens option

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -71,6 +71,7 @@ with gr.Blocks() as demo:
71
  return layer_dropdown
72
 
73
  frequency = gr.Number(0, label="Total frequency (%)")
 
74
 
75
  # layer_dropdown.input(update_features, layer_dropdown, feature_dropdown)
76
  # histogram = gr.LinePlot(x="activation", y="freq")
@@ -85,7 +86,7 @@ with gr.Blocks() as demo:
85
  cm = gr.HighlightedText()
86
  frame = gr.Highlightedtext()
87
 
88
- def update(revision, layer, feature, tokenizer_name):
89
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
90
  table = pq.read_table(make_cache_name(layer, revision))
91
  table_feat = table.filter(pc.field("feature") == feature).to_pandas()
@@ -99,13 +100,13 @@ with gr.Blocks() as demo:
99
  table_feat = table_feat.sort_values("activation", ascending=False)
100
 
101
  texts = table_feat["token"].apply(
102
- lambda x: [tokenizer.decode(y).replace("\n", " ") for y in token_table[max(0, x - nearby + 1):x + nearby + 2]["tokens"].to_numpy()]
103
  ).tolist()
104
 
105
  # texts = [tokenizer.tokenize(text) for text in texts]
106
  activations = table_feat["nearby"].to_numpy()
107
 
108
- activations = [a for i, a in enumerate(activations) if len(texts[i]) > 0]
109
  texts = [text for text in texts if len(text) > 0]
110
 
111
  for t, a in zip(texts, activations):
@@ -144,7 +145,7 @@ with gr.Blocks() as demo:
144
 
145
 
146
  # feature_dropdown.change(update, [layer_dropdown, feature_dropdown, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
147
- feature_input.change(update, [revision_dropdown, layer_dropdown, feature_input, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
148
 
149
 
150
  if __name__ == "__main__":
 
71
  return layer_dropdown
72
 
73
  frequency = gr.Number(0, label="Total frequency (%)")
74
+ extra_tokens = gr.Number(0, label="Extra Max Act Tokens")
75
 
76
  # layer_dropdown.input(update_features, layer_dropdown, feature_dropdown)
77
  # histogram = gr.LinePlot(x="activation", y="freq")
 
86
  cm = gr.HighlightedText()
87
  frame = gr.Highlightedtext()
88
 
89
+ def update(revision, layer, feature, extra_tokens, tokenizer_name):
90
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
91
  table = pq.read_table(make_cache_name(layer, revision))
92
  table_feat = table.filter(pc.field("feature") == feature).to_pandas()
 
100
  table_feat = table_feat.sort_values("activation", ascending=False)
101
 
102
  texts = table_feat["token"].apply(
103
+ lambda x: [tokenizer.decode(y).replace("\n", " ") for y in token_table[max(0, x - nearby + 1 - extra_tokens):x + extra_tokens + nearby + 2]["tokens"].to_numpy()]
104
  ).tolist()
105
 
106
  # texts = [tokenizer.tokenize(text) for text in texts]
107
  activations = table_feat["nearby"].to_numpy()
108
 
109
+ activations = [[0] * extra_tokens + a.tolist() + [0] * extra_tokens for i, a in enumerate(activations) if len(texts[i]) > 0]
110
  texts = [text for text in texts if len(text) > 0]
111
 
112
  for t, a in zip(texts, activations):
 
145
 
146
 
147
  # feature_dropdown.change(update, [layer_dropdown, feature_dropdown, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
148
+ feature_input.change(update, [revision_dropdown, layer_dropdown, feature_input, extra_tokens, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
149
 
150
 
151
  if __name__ == "__main__":