Steph254 commited on
Commit
513c3f3
·
verified ·
1 Parent(s): f8d604d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -46
app.py CHANGED
@@ -18,58 +18,29 @@ def load_llama_model(model_path, is_guard=False):
18
  print(f"Loading model: {model_path}")
19
 
20
  try:
21
- # Check if token exists
22
  token = os.getenv("HUGGINGFACE_TOKEN")
23
  if not token:
24
- print("Warning: HUGGINGFACE_TOKEN not set, attempting to load without authentication")
25
- token = None # Set to None explicitly
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # First, try standard loading method with token handling
28
- try:
29
- tokenizer = LlamaTokenizer.from_pretrained(
30
- BASE_MODEL,
31
- use_auth_token=token # Use this parameter instead of token=
32
- )
33
-
34
- model = AutoModelForCausalLM.from_pretrained(
35
- model_path,
36
- use_auth_token=token,
37
- torch_dtype=torch.float16,
38
- low_cpu_mem_usage=True
39
- )
40
-
41
- except Exception as e:
42
- print(f"Standard loading failed: {e}, trying alternative method...")
43
-
44
- # Fall back to alternative loading method
45
- # Download files first to ensure they exist locally
46
- from huggingface_hub import snapshot_download
47
-
48
- cache_dir = snapshot_download(
49
- BASE_MODEL,
50
- use_auth_token=token,
51
- local_dir="./model_cache"
52
- )
53
-
54
- # Load tokenizer from local files
55
- tokenizer = LlamaTokenizer.from_pretrained(
56
- cache_dir,
57
- local_files_only=True
58
- )
59
-
60
- # Load model from local files
61
- model = AutoModelForCausalLM.from_pretrained(
62
- model_path,
63
- use_auth_token=token,
64
- torch_dtype=torch.float16,
65
- low_cpu_mem_usage=True
66
- )
67
-
68
  # Load QLoRA adapter if applicable
69
  if not is_guard and "QLORA" in model_path:
70
  print("Loading QLoRA adapter...")
71
- from peft import PeftConfig, PeftModel
72
-
73
  model = PeftModel.from_pretrained(
74
  model,
75
  model_path,
 
18
  print(f"Loading model: {model_path}")
19
 
20
  try:
21
+ # Get token from secrets
22
  token = os.getenv("HUGGINGFACE_TOKEN")
23
  if not token:
24
+ print("Warning: HUGGINGFACE_TOKEN not found in environment variables")
25
+ else:
26
+ print("HUGGINGFACE_TOKEN found in environment")
27
+
28
+ # Use the parameter name 'use_auth_token' instead of 'token'
29
+ tokenizer = LlamaTokenizer.from_pretrained(
30
+ BASE_MODEL,
31
+ use_auth_token=token
32
+ )
33
+
34
+ model = AutoModelForCausalLM.from_pretrained(
35
+ model_path,
36
+ use_auth_token=token,
37
+ torch_dtype=torch.float16,
38
+ low_cpu_mem_usage=True
39
+ )
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  # Load QLoRA adapter if applicable
42
  if not is_guard and "QLORA" in model_path:
43
  print("Loading QLoRA adapter...")
 
 
44
  model = PeftModel.from_pretrained(
45
  model,
46
  model_path,