Spaces:

PhoenixDecim
/

slm_financial_rag

Running

PhoenixDecim commited on Mar 14

Commit

2100725

1 Parent(s): e1e8013

changed device to cpu

Files changed (1) hide show

app.py CHANGED Viewed

@@ -39,8 +39,8 @@ os.makedirs("data", exist_ok=True)
 # SLM: Microsoft PHI-2 model is loaded
 # It does have higher memory and compute requirements compared to TinyLlama and Falcon
 # But it gives the best results among the three
-# DEVICE = "cpu"  # or cuda
-DEVICE = "cuda"  # or cuda
 # MODEL_NAME = "TinyLlama/TinyLlama_v1.1"
 # MODEL_NAME = "tiiuae/falcon-rw-1b"
 MODEL_NAME = "microsoft/phi-2"
@@ -57,7 +57,7 @@ if tokenizer.pad_token is None:
 # Since the model is to be hosted on a cpu instance, we use float32
 # For GPU, we can use float16 or bfloat16
 model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME, torch_dtype=torch.bfloat16, trust_remote_code=True
 ).to(DEVICE)
 model.eval()
 # model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)

 # SLM: Microsoft PHI-2 model is loaded
 # It does have higher memory and compute requirements compared to TinyLlama and Falcon
 # But it gives the best results among the three
+DEVICE = "cpu"  # or cuda
+# DEVICE = "cuda"  # or cuda
 # MODEL_NAME = "TinyLlama/TinyLlama_v1.1"
 # MODEL_NAME = "tiiuae/falcon-rw-1b"
 MODEL_NAME = "microsoft/phi-2"
 # Since the model is to be hosted on a cpu instance, we use float32
 # For GPU, we can use float16 or bfloat16
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME, torch_dtype=torch.float32, trust_remote_code=True
 ).to(DEVICE)
 model.eval()
 # model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)