llm-token-counter / pages /huggingface_models.py
Timothy S. Phan
Splits into different pages to handle different models
176f432
import streamlit as st
from huggingface_hub import login
from transformers import AutoTokenizer
import os
from utils import HF_MODEL_OPTIONS
def display():
"""
Display the Hugging Face models tab.
"""
st.header("Hugging Face Models")
# HF Token input
hf_token = st.text_input(
"Enter your Hugging Face Token",
type="password",
help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
)
# If no token provided, try to get from environment
if not hf_token:
hf_token = os.environ.get("HF_TOKEN", "")
# Login status tracker
if "hf_logged_in" not in st.session_state:
st.session_state.hf_logged_in = False
# Login button
if not st.session_state.hf_logged_in:
if st.button("Login to Hugging Face"):
if not hf_token:
st.error(
"No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
)
else:
try:
login(token=hf_token)
st.session_state.hf_logged_in = True
st.session_state.hf_token = hf_token # Store the token in session state
st.success("Successfully logged in to Hugging Face")
except Exception as e:
st.error(f"Login failed: {str(e)}")
else:
st.success("Logged in to Hugging Face")
# Logout button
if st.button("Logout"):
st.session_state.hf_logged_in = False
st.session_state.hf_token = "" # Clear the token from session state
st.success("Successfully logged out from Hugging Face")
st.experimental_rerun() # Rerun the script to refresh the UI
if st.session_state.hf_logged_in or hf_token:
# Predefined popular models
selected_hf_model = st.selectbox("Select Hugging Face Model", HF_MODEL_OPTIONS)
# Custom model input
if selected_hf_model == "Other (specify)":
custom_hf_model = st.text_input(
"Enter model name (e.g., organization/model-name)"
)
selected_hf_model = (
custom_hf_model if custom_hf_model else "gpt2"
) # Default to gpt2 if empty
# User message input for HF
hf_user_message = st.text_area(
"Enter your message here",
placeholder="Hello, world!",
height=200,
key="hf_message",
)
# Button to count tokens for HF
if st.button("Count Tokens (Hugging Face)"):
if not hf_user_message:
st.warning("Please enter a message to count tokens")
else:
try:
with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)
# Count tokens in different ways
tokens = tokenizer.tokenize(hf_user_message)
token_ids = tokenizer.encode(hf_user_message)
# Display results
st.success(f"Token count: {len(tokens)}")
st.success(f"Token IDs count: {len(token_ids)}")
# Show the actual tokens
with st.expander("View Token Details"):
col1, col2 = st.columns(2)
with col1:
st.subheader("Tokens")
st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
with col2:
st.subheader("Token IDs")
st.json(
[
f"{i}: {token_id}"
for i, token_id in enumerate(token_ids)
]
)
except Exception as e:
st.error(f"An error occurred: {str(e)}")