Tonic commited on
Commit
d559f10
unverified
1 Parent(s): 0ac061c

reduce position embeddings

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -30,7 +30,8 @@ model = AutoModelForCausalLM.from_pretrained(
30
  quantization_config=quantization_config, # Apply quantization
31
  # device_map="auto", # Automatically map to available devices
32
  torch_dtype=torch.bfloat16,
33
- token=HF_TOKEN
 
34
  )
35
 
36
  @spaces.GPU
 
30
  quantization_config=quantization_config, # Apply quantization
31
  # device_map="auto", # Automatically map to available devices
32
  torch_dtype=torch.bfloat16,
33
+ token=HF_TOKEN,
34
+ max_position_embeddings=8192 # Reduce context window to 8k tokens (from 128k)
35
  )
36
 
37
  @spaces.GPU