from auto_round import AutoRoundConfig ## must import for auto-round format import requests import torch from PIL import Image from transformers import AutoProcessor, LlavaForConditionalGeneration quantized_model_path="OPEA/llama-joycaption-alpha-two-hf-llava-int4-sym-inc" # Load JoyCaption INT4 Model processor = AutoProcessor.from_pretrained(quantized_model_path) model = LlavaForConditionalGeneration.from_pretrained( quantized_model_path, device_map="auto", revision="bc917a8" ## ##AutoGPTQ format ) model.eval() image_url = "http://images.cocodataset.org/train2017/000000116003.jpg" content = "Write a descriptive caption for this image in a formal tone." # Preparation for inference with torch.no_grad(): image = Image.open(requests.get(image_url, stream=True).raw) messages = [ { "role": "system", "content": "You are a helpful image captioner.", }, { "role": "user", "content": content, }, ] prompt = processor.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) assert isinstance(prompt, str) inputs = processor(text=[prompt], images=[image], return_tensors="pt").to(model.device) inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype) # Generate the captions generate_ids = model.generate( **inputs, max_new_tokens=50, do_sample=False, suppress_tokens=None, use_cache=True, temperature=0.6, top_k=None, top_p=0.9, )[0] # Trim off the prompt generate_ids = generate_ids[inputs['input_ids'].shape[1]:] # Decode the caption caption = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) caption = caption.strip() print(caption) '''import os import re from typing import List, Optional, Union from qwen_agent import Agent, MultiAgentHub from qwen_agent.gui.gradio_utils import format_cover_html from qwen_agent.gui.utils import convert_fncall_to_text, convert_history_to_chatbot, get_avatar_image from qwen_agent.llm.schema import CONTENT, IMAGE, FILE, NAME, USER, Message from qwen_agent.log import logger from qwen_agent.utils.utils import print_traceback class WebUI: """Chatbot application for managing multiple agents.""" def __init__(self, agent: Union[Agent, MultiAgentHub, List[Agent]], chatbot_config: Optional[dict] = None): """ Initialize the chatbot application with one or more agents and configuration options. """ chatbot_config = chatbot_config or {} self.agent_list = [agent] if not isinstance(agent, list) else agent self.agent_hub = None if isinstance(agent, list) else agent self.user_config = { 'name': chatbot_config.get('user.name', 'user'), 'avatar': chatbot_config.get('user.avatar', get_avatar_image(chatbot_config.get('user.name', 'user'))) } self.agent_config_list = [{ 'name': agent.name, 'avatar': chatbot_config.get('agent.avatar', os.path.join(os.path.dirname(__file__), 'assets/logo.jpeg')), 'description': agent.description or "I'm a helpful assistant." } for agent in self.agent_list] self.input_placeholder = chatbot_config.get('input.placeholder', 'Chat with me!') self.prompt_suggestions = chatbot_config.get('prompt.suggestions', []) self.verbose = chatbot_config.get('verbose', False) def run(self, messages: List[Message] = None, share: bool = False, server_name: str = None, server_port: int = None, concurrency_limit: int = 80, enable_mention: bool = False, **kwargs): from qwen_agent.gui.gradio import gr, mgr custom_theme = gr.themes.Default(primary_hue=gr.themes.utils.colors.blue, radius_size=gr.themes.utils.sizes.radius_none) with gr.Blocks(css=os.path.join(os.path.dirname(__file__), 'assets/appBot.css'), theme=custom_theme) as demo: history = gr.State([]) with gr.Row(elem_classes='container'): with gr.Column(scale=4): chatbot = mgr.Chatbot( value=convert_history_to_chatbot(messages=messages), avatar_images=[self.user_config, self.agent_config_list], height=850, avatar_image_width=80, flushing=False, show_copy_button=True, latex_delimiters=[{'left': '\\(', 'right': '\\)', 'display': True}, {'left': '\\begin{equation}', 'right': '\\end{equation}', 'display': True}] ) input_box = mgr.MultimodalInput(placeholder=self.input_placeholder, upload_button_props=dict(visible=True)) with gr.Column(scale=1): agent_selector = gr.Dropdown( [(agent.name, i) for i, agent in enumerate(self.agent_list)], label='Agents', value=0, interactive=True) if len(self.agent_list) > 1 else None agent_info_block = self._create_agent_info_block() if self.prompt_suggestions: gr.Examples(label='Suggested Conversations', examples=self.prompt_suggestions, inputs=[input_box]) if agent_selector: agent_selector.change(fn=self.change_agent, inputs=[agent_selector], outputs=[agent_selector, agent_info_block]) input_promise = input_box.submit(fn=self.add_text, inputs=[input_box, chatbot, history], outputs=[input_box, chatbot, history]) if len(self.agent_list) > 1 and enable_mention: input_promise = input_promise.then(self.add_mention, [chatbot, agent_selector], [chatbot, agent_selector]) \ .then(self.agent_run, [chatbot, history, agent_selector], [chatbot, history, agent_selector]) else: input_promise = input_promise.then(self.agent_run, [chatbot, history], [chatbot, history]) input_promise.then(self.flushed, None, [input_box]) demo.load(None) demo.queue(default_concurrency_limit=concurrency_limit).launch(share=share, server_name=server_name, server_port=server_port) def change_agent(self, agent_selector): yield agent_selector, self._create_agent_info_block(agent_selector), self._create_agent_plugins_block(agent_selector) def add_text(self, _input, _chatbot, _history): from qwen_agent.gui.gradio import gr if _input.text == "/clear": _chatbot.clear() _history.clear() yield gr.update(interactive=False, value=""), _chatbot, _history return if _history: gr.Warning("Only the most recent query is retained.", duration=5) _chatbot.clear() _history.clear() _history.append({ROLE: USER, CONTENT: [{'text': _input.text}]}) if self.user_config[NAME]: _history[-1][NAME] = self.user_config[NAME] if _input.files: for file in _input.files: if file.mime_type.startswith('image/'): _history[-1][CONTENT].append({IMAGE: f'file://{file.path}'}) else: _history[-1][CONTENT].append({FILE: file.path}) _chatbot.append([_input, None]) yield gr.update(interactive=False, value=None), _chatbot, _history def add_mention(self, _chatbot, _agent_selector): query = _chatbot[-1][0].text match = re.search(r'@\w+\b', query) if match: _agent_selector = self._get_agent_index_by_name(match.group()[1:]) agent_name = self.agent_list[_agent_selector].name if ('@' + agent_name) not in query and self.agent_hub is None: _chatbot[-1][0].text = f'@{agent_name} ' + query yield _chatbot, _agent_selector def agent_run(self, _chatbot, _history, _agent_selector=None): if not _history: yield _chatbot, _history, _agent_selector if _agent_selector else _chatbot, _history return if self.verbose: logger.info(f'agent_run input:\n{_history}') agent_runner = self.agent_list[_agent_selector or 0] if self.agent_hub is None else self.agent_hub responses = agent_runner.run(_history, **self.run_kwargs) for response in responses: if response[CONTENT] == 'PENDING_USER_INPUT': logger.info('Waiting for user input!') break display_responses = convert_fncall_to_text(response) if not display_responses or display_responses[-1][CONTENT] is None: continue _chatbot.append([None, None] * (len(display_responses) - len(_chatbot))) for i, rsp in enumerate(display_responses): _chatbot[-1][1][self._get_agent_index_by_name(rsp[NAME])] = rsp[CONTENT] if self.verbose: logger.info(f'agent_run response:\n{responses}') yield _chatbot, _history, _agent_selector if _agent_selector else _chatbot, _history def flushed(self): from qwen_agent.gui.gradio import gr return gr.update(interactive=True) def _get_agent_index_by_name(self, agent_name): try: return next(i for i, agent in enumerate(self.agent_list) if agent.name.strip() == agent_name.strip()) except StopIteration: print_traceback() return 0 def _create_agent_info_block(self, agent_index=0): from qwen_agent.gui.gradio import gr agent_config = self.agent_config_list[agent_index] return gr.HTML(format_cover_html(bot_name=agent_config['name'], bot_description=agent_config['description'], bot_avatar=agent_config['avatar'])) def _create_agent_plugins_block(self, agent_index=0): from qwen_agent.gui.gradio import gr agent = self.agent_list[agent_index] capabilities = list(agent.function_map.keys()) if agent.function_map else [] return gr.CheckboxGroup(label='Plugins', value=capabilities, choices=capabilities, interactive=False)'''