Tonic commited on
Commit
9aaad03
unverified
1 Parent(s): e463387

add bnb config

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -14,9 +14,24 @@ Join us : 馃専TeamTonic馃専 is always making cool demos! Join our active builder
14
  """
15
 
16
  model_id = "Tonic/c4ai-command-a-03-2025-4bit_fp4"
17
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
18
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16, token=HF_TOKEN)
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @spaces.GPU
22
  def generate_response(user_input, max_new_tokens, temperature):
 
14
  """
15
 
16
  model_id = "Tonic/c4ai-command-a-03-2025-4bit_fp4"
 
 
17
 
18
+ # Define quantization config for 4-bit
19
+ quantization_config = BitsAndBytesConfig(
20
+ load_in_4bit=True, # Enable 4-bit quantization
21
+ bnb_4bit_quant_type="fp4", # Use FP4 quantization
22
+ bnb_4bit_use_double_quant=True, # Optional: double quantization for better precision
23
+ llm_int8_enable_fp32_cpu_offload=True # Allow CPU offloading for 32-bit modules
24
+ )
25
+
26
+ # Load tokenizer and model
27
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
28
+ model = AutoModelForCausalLM.from_pretrained(
29
+ model_id,
30
+ quantization_config=quantization_config, # Apply quantization
31
+ device_map="auto", # Automatically map to available devices
32
+ torch_dtype=torch.bfloat16,
33
+ token=HF_TOKEN
34
+ )
35
 
36
  @spaces.GPU
37
  def generate_response(user_input, max_new_tokens, temperature):