PhoenixDecim commited on
Commit
2100725
·
1 Parent(s): e1e8013

changed device to cpu

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -39,8 +39,8 @@ os.makedirs("data", exist_ok=True)
39
  # SLM: Microsoft PHI-2 model is loaded
40
  # It does have higher memory and compute requirements compared to TinyLlama and Falcon
41
  # But it gives the best results among the three
42
- # DEVICE = "cpu" # or cuda
43
- DEVICE = "cuda" # or cuda
44
  # MODEL_NAME = "TinyLlama/TinyLlama_v1.1"
45
  # MODEL_NAME = "tiiuae/falcon-rw-1b"
46
  MODEL_NAME = "microsoft/phi-2"
@@ -57,7 +57,7 @@ if tokenizer.pad_token is None:
57
  # Since the model is to be hosted on a cpu instance, we use float32
58
  # For GPU, we can use float16 or bfloat16
59
  model = AutoModelForCausalLM.from_pretrained(
60
- MODEL_NAME, torch_dtype=torch.bfloat16, trust_remote_code=True
61
  ).to(DEVICE)
62
  model.eval()
63
  # model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
 
39
  # SLM: Microsoft PHI-2 model is loaded
40
  # It does have higher memory and compute requirements compared to TinyLlama and Falcon
41
  # But it gives the best results among the three
42
+ DEVICE = "cpu" # or cuda
43
+ # DEVICE = "cuda" # or cuda
44
  # MODEL_NAME = "TinyLlama/TinyLlama_v1.1"
45
  # MODEL_NAME = "tiiuae/falcon-rw-1b"
46
  MODEL_NAME = "microsoft/phi-2"
 
57
  # Since the model is to be hosted on a cpu instance, we use float32
58
  # For GPU, we can use float16 or bfloat16
59
  model = AutoModelForCausalLM.from_pretrained(
60
+ MODEL_NAME, torch_dtype=torch.float32, trust_remote_code=True
61
  ).to(DEVICE)
62
  model.eval()
63
  # model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)