Spaces:
Runtime error
Runtime error
from ..vision.siglip_config import SigLipConfig | |
from ..language.language_config import LanguageModelConfig | |
class MultiModalConfig(): | |
def __init__( | |
self, | |
vision_config=None, | |
text_config=None, | |
ignore_index=-100, | |
image_token_index=256000, | |
vocab_size=257152, | |
projection_dim=2048, | |
hidden_size=2048, | |
pad_token_id=None, | |
**kwargs, | |
): | |
super().__init__() | |
self.ignore_index = ignore_index | |
self.image_token_index = image_token_index | |
self.vocab_size = vocab_size | |
self.projection_dim = projection_dim | |
self.hidden_size = hidden_size | |
self.vision_config = vision_config | |
self.is_encoder_decoder = False | |
self.pad_token_id = pad_token_id | |
self.vision_config = SigLipConfig(**vision_config) | |
self.text_config = text_config | |
self.text_config = LanguageModelConfig(**text_config, pad_token_id=pad_token_id) | |
self.vocab_size = self.text_config.vocab_size | |
self.text_config.num_image_tokens = (self.vision_config.image_size // self.vision_config.patch_size) ** 2 | |
self.vision_config.projection_dim = projection_dim |