drvikasgaur commited on
Commit
1d2493f
·
verified ·
1 Parent(s): bfd7db7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -5,15 +5,15 @@ import torch
5
  import os
6
 
7
  # ---- LOAD LLM ----
8
- model_name = "mistralai/Mistral-7B-Instruct-v0.3"
9
 
10
  # Read token from environment variable (set in HF Space Secrets)
11
  hf_token = os.getenv("HF_TOKEN")
12
 
13
  tokenizer = AutoTokenizer.from_pretrained(
14
  model_name,
15
- token=hf_token,
16
- trust_remote_code=True
17
  )
18
 
19
  model = AutoModelForCausalLM.from_pretrained(
@@ -25,21 +25,28 @@ model = AutoModelForCausalLM.from_pretrained(
25
  )
26
 
27
  # --- Define llm generation function ---
28
- def llm(prompt, max_new_tokens=500, temperature=0.3, do_sample=True):
29
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
 
 
 
30
  output = model.generate(
31
  **inputs,
32
  max_new_tokens=max_new_tokens,
33
  temperature=temperature,
34
- do_sample=do_sample, # <--- Use passed do_sample parameter
35
  top_p=0.95,
36
  top_k=50,
37
  pad_token_id=tokenizer.eos_token_id
38
  )
 
39
  generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
40
- return [{"generated_text": generated_text}]
41
 
 
 
42
 
 
43
 
44
 
45
  # Define all the screening questions
 
5
  import os
6
 
7
  # ---- LOAD LLM ----
8
+ model_name = "meta-llama/Llama-3.2-3B-Instruct"
9
 
10
  # Read token from environment variable (set in HF Space Secrets)
11
  hf_token = os.getenv("HF_TOKEN")
12
 
13
  tokenizer = AutoTokenizer.from_pretrained(
14
  model_name,
15
+ token=hf_token, # Secure token
16
+ trust_remote_code=True # Required for llama3 models
17
  )
18
 
19
  model = AutoModelForCausalLM.from_pretrained(
 
25
  )
26
 
27
  # --- Define llm generation function ---
28
+ def llm(prompt, max_new_tokens=1000, temperature=0.3, do_sample=True):
29
+ # Wrap the prompt into proper Llama 3 chat format
30
+ system_prompt = "[INST] " + prompt + " [/INST]"
31
+
32
+ inputs = tokenizer(system_prompt, return_tensors="pt").to(model.device)
33
+
34
  output = model.generate(
35
  **inputs,
36
  max_new_tokens=max_new_tokens,
37
  temperature=temperature,
38
+ do_sample=do_sample,
39
  top_p=0.95,
40
  top_k=50,
41
  pad_token_id=tokenizer.eos_token_id
42
  )
43
+
44
  generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
 
45
 
46
+ # Remove prompt part (optional cleanup)
47
+ generated_text = generated_text.replace(system_prompt, "").strip()
48
 
49
+ return [{"generated_text": generated_text}]
50
 
51
 
52
  # Define all the screening questions