File size: 2,028 Bytes
176f432
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import streamlit as st

def set_page_config():
    """
    Set the page configuration for the Streamlit app.
    """
    st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="centered")

def display_about_token_counting():
    """
    Display additional information about token counting.
    """
    with st.expander("About Token Counting"):
        st.markdown("""
        ### What are tokens?

        Tokens are chunks of text that language models process. They can be parts of words, whole words,
        or even punctuation. Different models tokenize text differently.

        ### Why count tokens?

        - **Cost Management**: Understanding token usage helps manage API costs
        - **Model Limitations**: Different models have different token limits
        - **Performance Optimization**: Helps optimize prompts for better responses

        ### Token Counting Tips

        - Shorter messages use fewer tokens
        - Special formatting, code blocks, and unusual characters may use more tokens
        - For Claude models, the system message also counts toward your token usage
        - Hugging Face models may tokenize text differently than Anthropic models
        """)

def display_footer():
    """
    Display the footer of the Streamlit app.
    """
    st.markdown("---")
    st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")

# Constants for model options
ANTHROPIC_MODEL_OPTIONS = {
    "Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
    "Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
    "Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
    "Claude 3 Haiku": "claude-3-haiku-20240307",
    "Claude 3 Opus": "claude-3-opus-20240229",
}

HF_MODEL_OPTIONS = [
    "mistralai/Mistral-Small-24B-Instruct-2501",
    "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
    "google/codegemma-7b",
    "Qwen/Qwen2.5-Coder-32B-Instruct",
    "microsoft/Phi-4-multimodal-instruct",
    "nvidia/Llama-3.3-70B-Instruct-FP4",
    "Other (specify)",
]