Spaces:

kisate-team
/

self-explanation

Sleeping

App Files Files Community

kisate commited on Jun 27, 2024

Commit

73ab266

1 Parent(s): ae6c4fd

Test app

Browse files

Files changed (7) hide show

app.py +81 -0
requirements.txt +5 -0
weights/caches/phi-l12-r4-st0.25x128-activations.parquet +3 -0
weights/caches/phi-l14-r4-st0.25x128-activations.parquet +3 -0
weights/caches/phi-l16-r4-st0.25x128-activations.parquet +3 -0
weights/caches/phi-l18-r4-st0.25x128-activations.parquet +3 -0
weights/tokens.parquet +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import gradio as gr
+import pyarrow.parquet as pq
+import pyarrow.compute as pc
+from transformers import AutoTokenizer
+import os
+import numpy as np
+token_table = pq.read_table("weights/tokens.parquet")
+cache_path = "weights/caches"
+parquets = os.listdir(cache_path)
+TOKENIZER = "microsoft/Phi-3-mini-4k-instruct"
+nearby = 8
+stride = 0.25
+n_bins = 10
+with gr.Blocks() as demo:
+    feature_table = gr.State(None)
+    tokenizer_name = gr.Textbox(TOKENIZER)
+    dropdown = gr.Dropdown(parquets)
+    feature_input = gr.Number(0)
+    token_range = gr.Number(64)
+    frequency = gr.Number(0, label="Total frequency (%)")
+    histogram = gr.LinePlot(x="activation", y="freq")
+    cm = gr.HighlightedText()
+    frame = gr.Highlightedtext(
+        show_legend=True
+    )
+    def update(cache_name, feature, tokenizer_name, token_range):
+        if cache_name is None:
+            return
+        tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+        table = pq.read_table(f"{cache_path}/{cache_name}")
+        table_feat = table.filter(pc.field("feature") == feature).to_pandas()
+        freq_t = table_feat[["activation", "freq"]]
+        total_freq = float(table_feat["freq"].sum()) * 100
+        table_feat = table_feat[table_feat["activation"] > 0]
+        table_feat = table_feat[table_feat["freq"] > 0]
+        table_feat = table_feat.sort_values("activation", ascending=False)
+        texts = table_feat["token"].apply(
+            lambda x: tokenizer.decode(token_table[max(0, x - nearby - 1):x + nearby + 1]["tokens"].to_numpy())
+        )
+        texts = [tokenizer.tokenize(text) for text in texts]
+        activations = table_feat["nearby"].to_numpy()
+        if len(activations) > 0:
+            activations = np.stack(activations) * stride
+            max_act = table_feat["activation"].max()
+            activations = activations / max_act
+            highlight_data = [
+                [(token, activation) for token, activation in zip(text, activation)] + [("\n", 0)]
+                for text, activation in zip(texts, activations)
+            ]
+            flat_data = [item for sublist in highlight_data for item in sublist]
+            color_map_data = [i / n_bins for i in range(n_bins + 1)]
+            color_map_data = [(f"{i*max_act:.2f}", i) for i in color_map_data]
+        else:
+            flat_data = []
+            color_map_data = []
+        return flat_data, color_map_data, freq_t, total_freq
+    dropdown.change(update, [dropdown, feature_input, tokenizer_name, token_range], [frame, cm, histogram, frequency])
+    feature_input.change(update, [dropdown, feature_input, tokenizer_name, token_range], [frame, cm, histogram, frequency])
+if __name__ == "__main__":
+    demo.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+pyarrow
+transformers[cpu]
+numpy
+pandas
+datasets

weights/caches/phi-l12-r4-st0.25x128-activations.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b36de99f765834429dede8b705b92fa9e0fd804bf3a35f323d3c964fae0158d0
+size 12256546

weights/caches/phi-l14-r4-st0.25x128-activations.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1286d1fceb351bc6d67df3491892738fa515e81e1a7543e7a92024b535c6954a
+size 15270782

weights/caches/phi-l16-r4-st0.25x128-activations.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd1a604f72bf11d03f652f48d4a18d093a745312226c123baefabd77bad7e5e5
+size 12232213

weights/caches/phi-l18-r4-st0.25x128-activations.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c9331f82be59a4eaa9297fd12670462312d084839f471a4e5db109db54b8439
+size 13454437

weights/tokens.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abc26a2910593929e66edd0549529b0768562a225efe26960c619b41495394a8
+size 1550772