Spaces:

drvikasgaur
/

mind-screen

Running

drvikasgaur commited on 3 days ago

Commit

713b966

verified ·

1 Parent(s): d266723

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,15 +5,15 @@ import torch
 import os
 # ---- LOAD LLM ----
-model_name = "meta-llama/Llama-3.2-3B-Instruct"
-# Read token from environment variable (set in HF Space Secrets)
 hf_token = os.getenv("HF_TOKEN")
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
-    token=hf_token,         # Secure token
-    trust_remote_code=True  # Required for llama3 models
 )
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -22,19 +22,14 @@ model = AutoModelForCausalLM.from_pretrained(
     model_name,
     token=hf_token,
     trust_remote_code=True,
-    torch_dtype=torch.float16 if device=="cuda" else torch.float32,
-    device_map="auto" if device=="cuda" else None
 ).to(device)
 # --- Define llm generation function ---
 def llm(prompt, max_new_tokens=1000, temperature=0.3, do_sample=True):
-    # Wrap the prompt into proper Llama 3 chat format
-    system_prompt = "[INST] " + prompt + " [/INST]"
-    inputs = tokenizer(system_prompt, return_tensors="pt").to(model.device)
     output = model.generate(
         **inputs,
@@ -48,9 +43,6 @@ def llm(prompt, max_new_tokens=1000, temperature=0.3, do_sample=True):
     generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
-    # Remove prompt part (optional cleanup)
-    generated_text = generated_text.replace(system_prompt, "").strip()
     return [{"generated_text": generated_text}]

 import os
 # ---- LOAD LLM ----
+model_name = "Qwen/Qwen1.5-0.5B"
+# No need for token usually; Qwen is public, but keeping it flexible
 hf_token = os.getenv("HF_TOKEN")
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
+    token=hf_token,         # can be None if not set
+    trust_remote_code=True  # required for Qwen
 )
 device = "cuda" if torch.cuda.is_available() else "cpu"
     model_name,
     token=hf_token,
     trust_remote_code=True,
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+    device_map="auto" if device == "cuda" else None
 ).to(device)
 # --- Define llm generation function ---
 def llm(prompt, max_new_tokens=1000, temperature=0.3, do_sample=True):
+    # Qwen does not require special prompt wrapping like [INST] ... [/INST]
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     output = model.generate(
         **inputs,
     generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
     return [{"generated_text": generated_text}]