Spaces:
Running
on
Zero
Running
on
Zero
import math | |
import warnings | |
import os, os.path as osp | |
import torch | |
from transformers import PretrainedConfig, PreTrainedModel | |
from transformers import ( | |
AutoTokenizer, | |
AutoModelForCausalLM, | |
AutoConfig, | |
BitsAndBytesConfig, | |
PretrainedConfig, | |
PreTrainedModel, | |
) | |
def has_tokenizer(path): | |
if ( | |
osp.exists(osp.join(path, "special_tokens_map.json")) | |
and osp.exists(osp.join(path, "tokenizer_config.json")) | |
and (osp.exists(osp.join(path, "tokenizer.model")) or osp.exists(osp.join(path, "tokenizer.json"))) | |
): | |
# print("[has_tokenizer]", path, True) | |
return True | |
from huggingface_hub import HfApi, file_exists | |
from huggingface_hub.utils import validate_repo_id, HFValidationError | |
api = HfApi() | |
try: | |
valid_hf_repo = api.repo_exists(path) | |
except HFValidationError as e: | |
valid_hf_repo = False | |
if ( | |
valid_hf_repo | |
and file_exists(path, "special_tokens_map.json") | |
and file_exists(path, "tokenizer_config.json") | |
and (file_exists(path, "tokenizer.model") or file_exists(path, "tokenizer.json")) | |
): | |
# print("[has_tokenizer]", path, True) | |
return True | |
# print("[has_tokenizer]", path, False) | |
return False | |
def context_length_extension(config): | |
orig_ctx_len = getattr(config, "max_position_embeddings", None) | |
model_max_length = getattr(config, "model_max_length", None) | |
if orig_ctx_len and model_max_length > orig_ctx_len: | |
print(f"Scaling RoPE from {orig_ctx_len} to {model_max_length}") | |
scaling_factor = float(math.ceil(model_max_length / orig_ctx_len)) | |
config.rope_scaling = {"type": "linear", "factor": scaling_factor} | |
return config | |
def build_llm_and_tokenizer( | |
model_name_or_path: str, | |
config: PretrainedConfig, | |
# config_cls: PretrainedConfig = None, | |
# llm_cls: PreTrainedModel = None, | |
attn_implementation=None, | |
model_max_length=None, | |
*args, | |
**kwargs, | |
) -> PreTrainedModel: | |
# if config_cls is None: | |
# config_cls = AutoConfig | |
# if llm_cls is None: | |
# llm_cls = AutoModelForCausalLM | |
# config_cls = AutoConfig | |
# llm_cls = AutoModelForCausalLM | |
## extra configuration for llm | |
# print("build_llm_and_tokenizer():", model_name_or_path); input("DEBUG") | |
llm_cfg = AutoConfig.from_pretrained(model_name_or_path) | |
llm_cfg._attn_implementation = attn_implementation | |
llm_cfg.model_max_length = model_max_length | |
if model_max_length is not None: | |
context_length_extension(llm_cfg) | |
llm = AutoModelForCausalLM.from_pretrained( | |
model_name_or_path, config=llm_cfg, torch_dtype=eval(config.model_dtype), *args, **kwargs | |
) | |
llm_path = model_name_or_path | |
if not has_tokenizer(llm_path): | |
warnings.warn("tokenizer found in VLM root folder. Move to ./{VILA}/llm in the future.") | |
llm_path = osp.join(llm_path, "llm") | |
# TODO(ligeng): use LLM class to judge to better compability. | |
if "mpt" in model_name_or_path: | |
tokenizer = AutoTokenizer.from_pretrained( | |
llm_path, | |
model_max_length=llm_cfg.model_max_length, | |
padding_side="right", | |
) | |
elif "yi" in model_name_or_path.lower(): | |
tokenizer = AutoTokenizer.from_pretrained( | |
llm_path, | |
model_max_length=llm_cfg.model_max_length, | |
padding_side="right", | |
use_fast=False, | |
) | |
else: | |
tokenizer = AutoTokenizer.from_pretrained( | |
llm_path, | |
model_max_length=llm_cfg.model_max_length, | |
padding_side="right", | |
use_fast=False, | |
legacy=False, | |
) | |
# TODO(ligeng): is this necessary for llava? | |
config.hidden_size = llm.config.hidden_size | |
return llm, tokenizer |