Spaces:

Steph254
/

demo_1

Runtime error

App Files Files Community

Steph254 commited on Mar 18

Commit

513c3f3

verified ·

1 Parent(s): f8d604d

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -46

app.py CHANGED Viewed

@@ -18,58 +18,29 @@ def load_llama_model(model_path, is_guard=False):
     print(f"Loading model: {model_path}")
     try:
-        # Check if token exists
         token = os.getenv("HUGGINGFACE_TOKEN")
         if not token:
-            print("Warning: HUGGINGFACE_TOKEN not set, attempting to load without authentication")
-            token = None  # Set to None explicitly
-        # First, try standard loading method with token handling
-        try:
-            tokenizer = LlamaTokenizer.from_pretrained(
-                BASE_MODEL,
-                use_auth_token=token  # Use this parameter instead of token=
-            )
-            model = AutoModelForCausalLM.from_pretrained(
-                model_path,
-                use_auth_token=token,
-                torch_dtype=torch.float16,
-                low_cpu_mem_usage=True
-            )
-        except Exception as e:
-            print(f"Standard loading failed: {e}, trying alternative method...")
-            # Fall back to alternative loading method
-            # Download files first to ensure they exist locally
-            from huggingface_hub import snapshot_download
-            cache_dir = snapshot_download(
-                BASE_MODEL,
-                use_auth_token=token,
-                local_dir="./model_cache"
-            )
-            # Load tokenizer from local files
-            tokenizer = LlamaTokenizer.from_pretrained(
-                cache_dir,
-                local_files_only=True
-            )
-            # Load model from local files
-            model = AutoModelForCausalLM.from_pretrained(
-                model_path,
-                use_auth_token=token,
-                torch_dtype=torch.float16,
-                low_cpu_mem_usage=True
-            )
         # Load QLoRA adapter if applicable
         if not is_guard and "QLORA" in model_path:
             print("Loading QLoRA adapter...")
-            from peft import PeftConfig, PeftModel
             model = PeftModel.from_pretrained(
                 model,
                 model_path,

     print(f"Loading model: {model_path}")
     try:
+        # Get token from secrets
         token = os.getenv("HUGGINGFACE_TOKEN")
         if not token:
+            print("Warning: HUGGINGFACE_TOKEN not found in environment variables")
+        else:
+            print("HUGGINGFACE_TOKEN found in environment")
+        # Use the parameter name 'use_auth_token' instead of 'token'
+        tokenizer = LlamaTokenizer.from_pretrained(
+            BASE_MODEL,
+            use_auth_token=token
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            use_auth_token=token,
+            torch_dtype=torch.float16,
+            low_cpu_mem_usage=True
+        )
         # Load QLoRA adapter if applicable
         if not is_guard and "QLORA" in model_path:
             print("Loading QLoRA adapter...")
             model = PeftModel.from_pretrained(
                 model,
                 model_path,