Spaces:

nvidia
/

describe-anything-model-demo

Running on Zero

App Files Files Community

describe-anything-model-demo / dam /model /language_model /builder.py

richardaecn

Upload 105 files

e19aac6 verified 8 days ago

raw

history blame

3.82 kB

	import math
	import warnings
	import os, os.path as osp
	import torch
	from transformers import PretrainedConfig, PreTrainedModel
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	AutoConfig,
	BitsAndBytesConfig,
	PretrainedConfig,
	PreTrainedModel,
	)


	def has_tokenizer(path):
	if (
	osp.exists(osp.join(path, "special_tokens_map.json"))
	and osp.exists(osp.join(path, "tokenizer_config.json"))
	and (osp.exists(osp.join(path, "tokenizer.model")) or osp.exists(osp.join(path, "tokenizer.json")))
	):
	# print("[has_tokenizer]", path, True)
	return True
	from huggingface_hub import HfApi, file_exists
	from huggingface_hub.utils import validate_repo_id, HFValidationError
	api = HfApi()
	try:
	valid_hf_repo = api.repo_exists(path)
	except HFValidationError as e:
	valid_hf_repo = False
	if (
	valid_hf_repo
	and file_exists(path, "special_tokens_map.json")
	and file_exists(path, "tokenizer_config.json")
	and (file_exists(path, "tokenizer.model") or file_exists(path, "tokenizer.json"))
	):
	# print("[has_tokenizer]", path, True)
	return True
	# print("[has_tokenizer]", path, False)
	return False


	def context_length_extension(config):
	orig_ctx_len = getattr(config, "max_position_embeddings", None)
	model_max_length = getattr(config, "model_max_length", None)
	if orig_ctx_len and model_max_length > orig_ctx_len:
	print(f"Scaling RoPE from {orig_ctx_len} to {model_max_length}")
	scaling_factor = float(math.ceil(model_max_length / orig_ctx_len))
	config.rope_scaling = {"type": "linear", "factor": scaling_factor}
	return config


	def build_llm_and_tokenizer(
	model_name_or_path: str,
	config: PretrainedConfig,
	# config_cls: PretrainedConfig = None,
	# llm_cls: PreTrainedModel = None,
	attn_implementation=None,
	model_max_length=None,
	*args,
	**kwargs,
	) -> PreTrainedModel:
	# if config_cls is None:
	# config_cls = AutoConfig
	# if llm_cls is None:
	# llm_cls = AutoModelForCausalLM
	# config_cls = AutoConfig
	# llm_cls = AutoModelForCausalLM
	## extra configuration for llm
	# print("build_llm_and_tokenizer():", model_name_or_path); input("DEBUG")
	llm_cfg = AutoConfig.from_pretrained(model_name_or_path)
	llm_cfg._attn_implementation = attn_implementation
	llm_cfg.model_max_length = model_max_length
	if model_max_length is not None:
	context_length_extension(llm_cfg)

	llm = AutoModelForCausalLM.from_pretrained(
	model_name_or_path, config=llm_cfg, torch_dtype=eval(config.model_dtype), args, *kwargs
	)

	llm_path = model_name_or_path
	if not has_tokenizer(llm_path):
	warnings.warn("tokenizer found in VLM root folder. Move to ./{VILA}/llm in the future.")
	llm_path = osp.join(llm_path, "llm")

	# TODO(ligeng): use LLM class to judge to better compability.
	if "mpt" in model_name_or_path:
	tokenizer = AutoTokenizer.from_pretrained(
	llm_path,
	model_max_length=llm_cfg.model_max_length,
	padding_side="right",
	)
	elif "yi" in model_name_or_path.lower():
	tokenizer = AutoTokenizer.from_pretrained(
	llm_path,
	model_max_length=llm_cfg.model_max_length,
	padding_side="right",
	use_fast=False,
	)
	else:
	tokenizer = AutoTokenizer.from_pretrained(
	llm_path,
	model_max_length=llm_cfg.model_max_length,
	padding_side="right",
	use_fast=False,
	legacy=False,
	)

	# TODO(ligeng): is this necessary for llava?
	config.hidden_size = llm.config.hidden_size
	return llm, tokenizer