Steph254 commited on
Commit
d43fa94
·
verified ·
1 Parent(s): aabf1c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -1,22 +1,28 @@
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
  import json
5
  from datetime import datetime
6
 
 
 
 
7
  # Load Llama 3.2 (QLoRA) Model on CPU
8
  MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"
9
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
  model = AutoModelForCausalLM.from_pretrained(
11
  MODEL_NAME,
 
12
  device_map="cpu" # Force CPU usage
13
  )
14
 
15
  # Load Llama Guard for content moderation on CPU
16
  LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
17
- guard_tokenizer = AutoTokenizer.from_pretrained(LLAMA_GUARD_NAME)
18
  guard_model = AutoModelForCausalLM.from_pretrained(
19
  LLAMA_GUARD_NAME,
 
20
  device_map="cpu"
21
  )
22
 
 
1
+ import os
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
  import json
6
  from datetime import datetime
7
 
8
+ # Set Hugging Face Token for Authentication (ensure it's set in your environment)
9
+ HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
10
+
11
  # Load Llama 3.2 (QLoRA) Model on CPU
12
  MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"
13
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HUGGINGFACE_TOKEN)
14
  model = AutoModelForCausalLM.from_pretrained(
15
  MODEL_NAME,
16
+ token=HUGGINGFACE_TOKEN,
17
  device_map="cpu" # Force CPU usage
18
  )
19
 
20
  # Load Llama Guard for content moderation on CPU
21
  LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
22
+ guard_tokenizer = AutoTokenizer.from_pretrained(LLAMA_GUARD_NAME, token=HUGGINGFACE_TOKEN)
23
  guard_model = AutoModelForCausalLM.from_pretrained(
24
  LLAMA_GUARD_NAME,
25
+ token=HUGGINGFACE_TOKEN,
26
  device_map="cpu"
27
  )
28