|
from PIL import Image |
|
import requests |
|
from transformers import AutoProcessor, AutoModel, AutoTokenizer |
|
import torch |
|
|
|
from transformers import TextIteratorStreamer |
|
import threading |
|
|
|
|
|
model = AutoModel.from_pretrained("/home/zhidingy/workspace/eagle-next/internvl_chat/work_dirs/release/test/Eagle2-2B",trust_remote_code=True, attn_implementation='flash_attention_2', torch_dtype=torch.bfloat16) |
|
tokenizer = AutoTokenizer.from_pretrained("/home/zhidingy/workspace/eagle-next/internvl_chat/work_dirs/release/test/Eagle2-2B", trust_remote_code=True, use_fast=True) |
|
processor = AutoProcessor.from_pretrained("/home/zhidingy/workspace/eagle-next/internvl_chat/work_dirs/release/test/Eagle2-2B", trust_remote_code=True, use_fast=True) |
|
processor.tokenizer.padding_side = "left" |
|
|
|
messages = [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "image", |
|
"image": "https://www.ilankelman.org/stopsigns/australia.jpg", |
|
}, |
|
{"type": "text", "text": "Describe this image."}, |
|
], |
|
} |
|
] |
|
|
|
text_list = [processor.apply_chat_template( |
|
messages, tokenize=False, add_generation_prompt=True |
|
)] |
|
image_inputs, video_inputs = processor.process_vision_info(messages) |
|
inputs = processor(text = text_list, images=image_inputs, videos=video_inputs, return_tensors="pt", padding=True) |
|
inputs = inputs.to("cuda") |
|
model = model.to("cuda") |
|
|
|
|
|
|
|
|
|
|
|
|
|
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) |
|
|
|
generation_kwargs = dict( |
|
**inputs, |
|
streamer=streamer, |
|
max_new_tokens=1024, |
|
do_sample=True, |
|
top_p=0.95, |
|
temperature=0.8 |
|
) |
|
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs) |
|
thread.start() |
|
|
|
for new_text in streamer: |
|
print(new_text, end="", flush=True) |
|
|