Zhiding commited on
Commit
d62b227
·
1 Parent(s): e49e746
Files changed (1) hide show
  1. eagle_vl/serve/inference.py +2 -1
eagle_vl/serve/inference.py CHANGED
@@ -44,6 +44,8 @@ def load_model_from_nv(model_path: str = "nvidia/Eagle-2.5-8B"):
44
  def load_model_from_eagle(model_path: str = "NVEagle/Eagle2.5-VL-8B-Preview"):
45
 
46
  token = os.environ.get("HF_TOKEN")
 
 
47
  # hotfix the model to use flash attention 2
48
  config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, token=token)
49
  config._attn_implementation = "flash_attention_2"
@@ -51,7 +53,6 @@ def load_model_from_eagle(model_path: str = "NVEagle/Eagle2.5-VL-8B-Preview"):
51
  config.text_config._attn_implementation = "flash_attention_2"
52
  print("Successfully set the attn_implementation to flash_attention_2")
53
 
54
- logger.info(f"token = {token[:4]}***{token[-2:]}")
55
  model = AutoModel.from_pretrained(
56
  model_path,
57
  trust_remote_code=True,
 
44
  def load_model_from_eagle(model_path: str = "NVEagle/Eagle2.5-VL-8B-Preview"):
45
 
46
  token = os.environ.get("HF_TOKEN")
47
+ logger.info(f"token = {token[:4]}***{token[-2:]}")
48
+
49
  # hotfix the model to use flash attention 2
50
  config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, token=token)
51
  config._attn_implementation = "flash_attention_2"
 
53
  config.text_config._attn_implementation = "flash_attention_2"
54
  print("Successfully set the attn_implementation to flash_attention_2")
55
 
 
56
  model = AutoModel.from_pretrained(
57
  model_path,
58
  trust_remote_code=True,