Spaces:
Running
Running
Commit
·
2100725
1
Parent(s):
e1e8013
changed device to cpu
Browse files
app.py
CHANGED
@@ -39,8 +39,8 @@ os.makedirs("data", exist_ok=True)
|
|
39 |
# SLM: Microsoft PHI-2 model is loaded
|
40 |
# It does have higher memory and compute requirements compared to TinyLlama and Falcon
|
41 |
# But it gives the best results among the three
|
42 |
-
|
43 |
-
DEVICE = "cuda" # or cuda
|
44 |
# MODEL_NAME = "TinyLlama/TinyLlama_v1.1"
|
45 |
# MODEL_NAME = "tiiuae/falcon-rw-1b"
|
46 |
MODEL_NAME = "microsoft/phi-2"
|
@@ -57,7 +57,7 @@ if tokenizer.pad_token is None:
|
|
57 |
# Since the model is to be hosted on a cpu instance, we use float32
|
58 |
# For GPU, we can use float16 or bfloat16
|
59 |
model = AutoModelForCausalLM.from_pretrained(
|
60 |
-
MODEL_NAME, torch_dtype=torch.
|
61 |
).to(DEVICE)
|
62 |
model.eval()
|
63 |
# model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
|
|
|
39 |
# SLM: Microsoft PHI-2 model is loaded
|
40 |
# It does have higher memory and compute requirements compared to TinyLlama and Falcon
|
41 |
# But it gives the best results among the three
|
42 |
+
DEVICE = "cpu" # or cuda
|
43 |
+
# DEVICE = "cuda" # or cuda
|
44 |
# MODEL_NAME = "TinyLlama/TinyLlama_v1.1"
|
45 |
# MODEL_NAME = "tiiuae/falcon-rw-1b"
|
46 |
MODEL_NAME = "microsoft/phi-2"
|
|
|
57 |
# Since the model is to be hosted on a cpu instance, we use float32
|
58 |
# For GPU, we can use float16 or bfloat16
|
59 |
model = AutoModelForCausalLM.from_pretrained(
|
60 |
+
MODEL_NAME, torch_dtype=torch.float32, trust_remote_code=True
|
61 |
).to(DEVICE)
|
62 |
model.eval()
|
63 |
# model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
|