kishkath commited on
Commit
8da2345
·
verified ·
1 Parent(s): be11d10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -19,14 +19,24 @@ tokenizer = AutoTokenizer.from_pretrained(model_dir)
19
  # offload_folder=offload_dir # Directory to offload layers (for larger models).
20
  # )
21
 
 
 
 
 
 
 
 
 
 
 
22
  base_model = AutoModelForCausalLM.from_pretrained(
23
  model_dir,
24
- device_map="cpu", # Force CPU usage
25
- torch_dtype=torch.float32, # Use float32 for CPU
26
- trust_remote_code=True,
27
- offload_folder=offload_dir # Directory to offload layers (for larger models).
28
  )
29
 
 
30
  # Load the adapter (PEFT) weights.
31
  model = PeftModel.from_pretrained(base_model, model_dir)
32
 
 
19
  # offload_folder=offload_dir # Directory to offload layers (for larger models).
20
  # )
21
 
22
+ ## CPU
23
+ # base_model = AutoModelForCausalLM.from_pretrained(
24
+ # model_dir,
25
+ # device_map="cpu", # Force CPU usage
26
+ # torch_dtype=torch.float32, # Use float32 for CPU
27
+ # trust_remote_code=True,
28
+ # offload_folder=offload_dir # Directory to offload layers (for larger models).
29
+ # )
30
+
31
+ ## GPU
32
  base_model = AutoModelForCausalLM.from_pretrained(
33
  model_dir,
34
+ device_map="auto", # Automatically uses available GPU(s)
35
+ torch_dtype=torch.float16, # Use half precision for faster inference on GPU
36
+ trust_remote_code=True
 
37
  )
38
 
39
+
40
  # Load the adapter (PEFT) weights.
41
  model = PeftModel.from_pretrained(base_model, model_dir)
42