Spaces:

Ruurd
/

tini

Running on Zero

Ruurd commited on 22 days ago

Commit

b5abc9b

1 Parent(s): 098132b

Change spaces.gpu application

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,19 +26,20 @@ with open("token_probabilities.json") as f:
     token_probs_dict = json.load(f)
 token_probabilities = np.array([token_probs_dict[str(i)] for i in range(len(token_probs_dict))], dtype=np.float32)
-@spaces.GPU
-def load_weights():
-    # OK: download & load weights to CPU
     ckpt_path = hf_hub_download(
         repo_id="ruurd/tini_model",
         filename="diffusion-model.pth",
         token=os.getenv("HF_TOKEN")
     )
-    return torch.load(ckpt_path, map_location="cpu")  # ✅ returns only CPU tensors
-model = CustomTransformerModel(...)
-model.load_state_dict(load_weights())
-model.to("cuda")  # ✅ OK now, after @spaces.GPU is done
 rng = np.random.default_rng()
@@ -82,6 +83,8 @@ def generate_diffusion_text(input_ids, answer_start):
     return input_ids[:answer_start] + sampled[answer_start:]
 # --- Inference Wrapper ---
 def diffusion_chat(question, eot_weight, max_it, sharpness):
     placeholder = "What do you know about the city of New York?"
     if question.strip() == "":
@@ -144,6 +147,10 @@ def diffusion_chat(question, eot_weight, max_it, sharpness):
 # --- Gradio Interface ---
 demo = gr.Interface(
     fn=diffusion_chat,
     inputs=[

     token_probs_dict = json.load(f)
 token_probabilities = np.array([token_probs_dict[str(i)] for i in range(len(token_probs_dict))], dtype=np.float32)
+def load_model():
     ckpt_path = hf_hub_download(
         repo_id="ruurd/tini_model",
         filename="diffusion-model.pth",
         token=os.getenv("HF_TOKEN")
     )
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = torch.load(ckpt_path, map_location=device)
+    model = disable_dropout(model)
+    model.to(device)
+    model.eval()
+    return model
 rng = np.random.default_rng()
     return input_ids[:answer_start] + sampled[answer_start:]
 # --- Inference Wrapper ---
+@spaces.GPU
 def diffusion_chat(question, eot_weight, max_it, sharpness):
     placeholder = "What do you know about the city of New York?"
     if question.strip() == "":
 # --- Gradio Interface ---
+print("Loading model...")
+model = load_model()
+print("✅ Model loaded.")
 demo = gr.Interface(
     fn=diffusion_chat,
     inputs=[