Steph254 commited on
Commit
6d76df7
·
verified ·
1 Parent(s): 47e05d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -17
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import gradio as gr
3
  import torch
4
  import json
5
- from transformers import LlamaTokenizer, AutoModelForCausalLM
6
  from peft import PeftModel
7
 
8
  # Set Hugging Face Token for Authentication
@@ -14,27 +14,36 @@ if not HUGGINGFACE_TOKEN:
14
  print("✅ HUGGINGFACE_TOKEN is set.")
15
 
16
  # Model Paths
17
- QUANTIZED_MODEL = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8" # Directly using quantized model
18
  LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
19
 
20
  # Function to load Llama model (without LoRA)
21
- def load_llama_model(model_name):
22
- print(f"🔄 Loading Model: {model_name}")
 
23
 
24
- tokenizer = LlamaTokenizer.from_pretrained(model_name, token=HUGGINGFACE_TOKEN)
25
-
26
- try:
27
- model = AutoModelForCausalLM.from_pretrained(
28
- model_name,
29
- token=HUGGINGFACE_TOKEN,
30
- trust_remote_code=True # Allows loading non-standard model formats
31
- )
32
- except Exception as e:
33
- print(f"❌ Error loading model: {e}")
34
- raise ValueError(f"❌ Model {model_name} may not have valid weight files. Check the Hugging Face repository.")
 
 
 
 
 
 
 
 
35
 
36
- print("✅ Model loaded successfully!")
37
- return tokenizer, model
38
 
39
  # Load the quantized Llama model
40
  tokenizer, model = load_llama_model(QUANTIZED_MODEL)
 
2
  import gradio as gr
3
  import torch
4
  import json
5
+ from transformers import LlamaTokenizer, LlamaForCausalLM
6
  from peft import PeftModel
7
 
8
  # Set Hugging Face Token for Authentication
 
14
  print("✅ HUGGINGFACE_TOKEN is set.")
15
 
16
  # Model Paths
17
+ MODEL_PATH = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8" # Directly using quantized model
18
  LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
19
 
20
  # Function to load Llama model (without LoRA)
21
+ # Load Model Manually (for Quantized Models)
22
+ def load_quantized_model(model_path):
23
+ print(f"🔄 Loading Quantized Model: {model_path}")
24
 
25
+ # Load config file manually
26
+ from transformers import LlamaConfig
27
+ config = LlamaConfig.from_pretrained(model_path)
28
+
29
+ # Initialize model
30
+ model = LlamaForCausalLM(config)
31
+
32
+ # Load quantized state_dict
33
+ checkpoint_path = os.path.join(model_path, "consolidated.00.pth")
34
+ state_dict = torch.load(checkpoint_path, map_location="cpu")
35
+
36
+ # Load state dict into model
37
+ model.load_state_dict(state_dict, strict=False)
38
+
39
+ print("✅ Quantized model loaded successfully!")
40
+ return model
41
+
42
+ # Load Tokenizer
43
+ tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH, token=HUGGINGFACE_TOKEN)
44
 
45
+ # Load the model
46
+ model = load_quantized_model(MODEL_PATH)
47
 
48
  # Load the quantized Llama model
49
  tokenizer, model = load_llama_model(QUANTIZED_MODEL)