Spaces:
Sleeping
Sleeping
Dmitrii
commited on
Commit
·
7e427fb
1
Parent(s):
f0427f1
extra tokens option
Browse files
app.py
CHANGED
@@ -71,6 +71,7 @@ with gr.Blocks() as demo:
|
|
71 |
return layer_dropdown
|
72 |
|
73 |
frequency = gr.Number(0, label="Total frequency (%)")
|
|
|
74 |
|
75 |
# layer_dropdown.input(update_features, layer_dropdown, feature_dropdown)
|
76 |
# histogram = gr.LinePlot(x="activation", y="freq")
|
@@ -85,7 +86,7 @@ with gr.Blocks() as demo:
|
|
85 |
cm = gr.HighlightedText()
|
86 |
frame = gr.Highlightedtext()
|
87 |
|
88 |
-
def update(revision, layer, feature, tokenizer_name):
|
89 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
|
90 |
table = pq.read_table(make_cache_name(layer, revision))
|
91 |
table_feat = table.filter(pc.field("feature") == feature).to_pandas()
|
@@ -99,13 +100,13 @@ with gr.Blocks() as demo:
|
|
99 |
table_feat = table_feat.sort_values("activation", ascending=False)
|
100 |
|
101 |
texts = table_feat["token"].apply(
|
102 |
-
lambda x: [tokenizer.decode(y).replace("\n", " ") for y in token_table[max(0, x - nearby + 1):x + nearby + 2]["tokens"].to_numpy()]
|
103 |
).tolist()
|
104 |
|
105 |
# texts = [tokenizer.tokenize(text) for text in texts]
|
106 |
activations = table_feat["nearby"].to_numpy()
|
107 |
|
108 |
-
activations = [a for i, a in enumerate(activations) if len(texts[i]) > 0]
|
109 |
texts = [text for text in texts if len(text) > 0]
|
110 |
|
111 |
for t, a in zip(texts, activations):
|
@@ -144,7 +145,7 @@ with gr.Blocks() as demo:
|
|
144 |
|
145 |
|
146 |
# feature_dropdown.change(update, [layer_dropdown, feature_dropdown, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
|
147 |
-
feature_input.change(update, [revision_dropdown, layer_dropdown, feature_input, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
|
148 |
|
149 |
|
150 |
if __name__ == "__main__":
|
|
|
71 |
return layer_dropdown
|
72 |
|
73 |
frequency = gr.Number(0, label="Total frequency (%)")
|
74 |
+
extra_tokens = gr.Number(0, label="Extra Max Act Tokens")
|
75 |
|
76 |
# layer_dropdown.input(update_features, layer_dropdown, feature_dropdown)
|
77 |
# histogram = gr.LinePlot(x="activation", y="freq")
|
|
|
86 |
cm = gr.HighlightedText()
|
87 |
frame = gr.Highlightedtext()
|
88 |
|
89 |
+
def update(revision, layer, feature, extra_tokens, tokenizer_name):
|
90 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
|
91 |
table = pq.read_table(make_cache_name(layer, revision))
|
92 |
table_feat = table.filter(pc.field("feature") == feature).to_pandas()
|
|
|
100 |
table_feat = table_feat.sort_values("activation", ascending=False)
|
101 |
|
102 |
texts = table_feat["token"].apply(
|
103 |
+
lambda x: [tokenizer.decode(y).replace("\n", " ") for y in token_table[max(0, x - nearby + 1 - extra_tokens):x + extra_tokens + nearby + 2]["tokens"].to_numpy()]
|
104 |
).tolist()
|
105 |
|
106 |
# texts = [tokenizer.tokenize(text) for text in texts]
|
107 |
activations = table_feat["nearby"].to_numpy()
|
108 |
|
109 |
+
activations = [[0] * extra_tokens + a.tolist() + [0] * extra_tokens for i, a in enumerate(activations) if len(texts[i]) > 0]
|
110 |
texts = [text for text in texts if len(text) > 0]
|
111 |
|
112 |
for t, a in zip(texts, activations):
|
|
|
145 |
|
146 |
|
147 |
# feature_dropdown.change(update, [layer_dropdown, feature_dropdown, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
|
148 |
+
feature_input.change(update, [revision_dropdown, layer_dropdown, feature_input, extra_tokens, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
|
149 |
|
150 |
|
151 |
if __name__ == "__main__":
|