Steph254 commited on
Commit
61d529e
·
verified ·
1 Parent(s): 1b4aac1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -37
app.py CHANGED
@@ -23,43 +23,44 @@ QLORA_ADAPTER = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8" # Ensure this
23
  LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4" # Ensure this is correct
24
 
25
  # Function to load Llama model
26
- def load_llama_model(model_path, is_guard=False):
27
- print(f"Loading model: {model_path}")
28
-
29
- try:
30
- # Get token from secrets
31
- token = os.getenv("HUGGINGFACE_TOKEN")
32
- if not token:
33
- print("Warning: HUGGINGFACE_TOKEN not found in environment variables")
34
- else:
35
- print("HUGGINGFACE_TOKEN found in environment")
36
-
37
- # Use the parameter name 'use_auth_token' instead of 'token'
38
- tokenizer = AutoTokenizer.from_pretrained(
39
- BASE_MODEL,
40
- use_auth_token=token
41
- )
42
-
43
- model = AutoModelForCausalLM.from_pretrained(
44
- model_path,
45
- use_auth_token=token,
46
- torch_dtype=torch.float16,
47
- low_cpu_mem_usage=True
48
- )
49
-
50
- # Load QLoRA adapter if applicable
51
- if not is_guard and "QLORA" in model_path:
52
- print("Loading QLoRA adapter...")
53
- model = PeftModel.from_pretrained(
54
- model,
55
- model_path,
56
- use_auth_token=token
57
- )
58
- print("Merging LoRA weights...")
59
- model = model.merge_and_unload()
60
-
61
- model.eval()
62
- return tokenizer, model
 
63
 
64
  except Exception as e:
65
  print(f"❌ Error loading model {model_path}: {e}")
 
23
  LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4" # Ensure this is correct
24
 
25
  # Function to load Llama model
26
+ def load_llama_model():
27
+ print(f"🔄 Loading Base Model: {BASE_MODEL}")
28
+
29
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_auth_token=HUGGINGFACE_TOKEN)
30
+ model = AutoModelForCausalLM.from_pretrained(
31
+ BASE_MODEL,
32
+ use_auth_token=HUGGINGFACE_TOKEN,
33
+ torch_dtype=torch.float16,
34
+ low_cpu_mem_usage=True
35
+ )
36
+
37
+ print(f"✅ Base Model Loaded Successfully")
38
+
39
+ # Load QLoRA adapter if available
40
+ print(f"🔄 Loading QLoRA Adapter: {QLORA_ADAPTER}")
41
+ model = PeftModel.from_pretrained(model, QLORA_ADAPTER, use_auth_token=HUGGINGFACE_TOKEN)
42
+ print("🔄 Merging LoRA Weights...")
43
+ model = model.merge_and_unload()
44
+ print("✅ QLoRA Adapter Loaded Successfully")
45
+
46
+ model.eval()
47
+ return tokenizer, model
48
+
49
+ # Function to load Llama Guard Model for content moderation
50
+ def load_llama_guard():
51
+ print(f"🔄 Loading Llama Guard Model: {LLAMA_GUARD_NAME}")
52
+
53
+ tokenizer = AutoTokenizer.from_pretrained(LLAMA_GUARD_NAME, use_auth_token=HUGGINGFACE_TOKEN)
54
+ model = AutoModelForCausalLM.from_pretrained(
55
+ LLAMA_GUARD_NAME,
56
+ use_auth_token=HUGGINGFACE_TOKEN,
57
+ torch_dtype=torch.float16,
58
+ low_cpu_mem_usage=True
59
+ )
60
+
61
+ model.eval()
62
+ print("✅ Llama Guard Model Loaded Successfully")
63
+ return tokenizer, model
64
 
65
  except Exception as e:
66
  print(f"❌ Error loading model {model_path}: {e}")