Update app.py
Browse files
app.py
CHANGED
@@ -18,20 +18,18 @@ QUANTIZED_MODEL = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8" # Directly
|
|
18 |
LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
|
19 |
|
20 |
# Function to load Llama model (without LoRA)
|
21 |
-
def load_llama_model(model_name
|
22 |
print(f"🔄 Loading Model: {model_name}")
|
23 |
-
|
24 |
tokenizer = LlamaTokenizer.from_pretrained(model_name, token=HUGGINGFACE_TOKEN)
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
)
|
31 |
-
|
32 |
-
|
33 |
-
print("✅ Model Loaded Successfully")
|
34 |
-
return tokenizer, model
|
35 |
|
36 |
# Load the quantized Llama model
|
37 |
tokenizer, model = load_llama_model(QUANTIZED_MODEL)
|
|
|
18 |
LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
|
19 |
|
20 |
# Function to load Llama model (without LoRA)
|
21 |
+
def load_llama_model(model_name):
|
22 |
print(f"🔄 Loading Model: {model_name}")
|
23 |
+
|
24 |
tokenizer = LlamaTokenizer.from_pretrained(model_name, token=HUGGINGFACE_TOKEN)
|
25 |
+
|
26 |
+
# Load the checkpoint manually
|
27 |
+
model_path = f"{model_name}/consolidated.00.pth"
|
28 |
+
state_dict = torch.load(model_path, map_location="cpu") # Adjust for GPU if needed
|
29 |
+
|
30 |
+
print("✅ Model state dictionary loaded successfully!")
|
31 |
+
|
32 |
+
return tokenizer, state_dict
|
|
|
|
|
33 |
|
34 |
# Load the quantized Llama model
|
35 |
tokenizer, model = load_llama_model(QUANTIZED_MODEL)
|