Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -27,16 +27,26 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
27 |
).to(device)
|
28 |
|
29 |
# --- Define llm generation function ---
|
30 |
-
def llm(prompt, max_new_tokens=400, do_sample=
|
31 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
output = model.generate(
|
34 |
**inputs,
|
35 |
-
|
36 |
-
do_sample=do_sample,
|
37 |
-
top_p=0.95 if do_sample else None,
|
38 |
-
top_k=50 if do_sample else None,
|
39 |
-
pad_token_id=tokenizer.eos_token_id
|
40 |
)
|
41 |
|
42 |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
@@ -45,6 +55,7 @@ def llm(prompt, max_new_tokens=400, do_sample=False):
|
|
45 |
|
46 |
|
47 |
|
|
|
48 |
# Define all the screening questions
|
49 |
questions = [
|
50 |
# Generalized Anxiety & Somatic Concerns
|
|
|
27 |
).to(device)
|
28 |
|
29 |
# --- Define llm generation function ---
|
30 |
+
def llm(prompt, max_new_tokens=400, temperature=0.3, do_sample=True):
|
31 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
32 |
|
33 |
+
generation_kwargs = {
|
34 |
+
"max_new_tokens": max_new_tokens,
|
35 |
+
"do_sample": do_sample,
|
36 |
+
"pad_token_id": tokenizer.eos_token_id,
|
37 |
+
}
|
38 |
+
|
39 |
+
# Only add temperature/top_p if sampling is enabled
|
40 |
+
if do_sample:
|
41 |
+
generation_kwargs.update({
|
42 |
+
"temperature": temperature,
|
43 |
+
"top_p": 0.95,
|
44 |
+
"top_k": 50
|
45 |
+
})
|
46 |
+
|
47 |
output = model.generate(
|
48 |
**inputs,
|
49 |
+
**generation_kwargs
|
|
|
|
|
|
|
|
|
50 |
)
|
51 |
|
52 |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
|
|
55 |
|
56 |
|
57 |
|
58 |
+
|
59 |
# Define all the screening questions
|
60 |
questions = [
|
61 |
# Generalized Anxiety & Somatic Concerns
|