from auto_round import AutoRoundConfig ## must import for auto-round format
import requests
import torch
from PIL import Image
from transformers import AutoProcessor, LlavaForConditionalGeneration


quantized_model_path="OPEA/llama-joycaption-alpha-two-hf-llava-int4-sym-inc"

# Load JoyCaption INT4 Model
processor = AutoProcessor.from_pretrained(quantized_model_path)
model = LlavaForConditionalGeneration.from_pretrained(
    quantized_model_path,
    device_map="auto",
    revision="bc917a8" ## ##AutoGPTQ format
)
model.eval()

image_url = "http://images.cocodataset.org/train2017/000000116003.jpg"
content = "Write a descriptive caption for this image in a formal tone."

# Preparation for inference
with torch.no_grad():
    image = Image.open(requests.get(image_url, stream=True).raw)
    messages = [
        {
            "role": "system",
            "content": "You are a helpful image captioner.",
        },
        {
            "role": "user",
            "content": content,
        },
    ]
    prompt = processor.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    assert isinstance(prompt, str)
    inputs = processor(text=[prompt], images=[image], return_tensors="pt").to(model.device)
    inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype)
    
    # Generate the captions
    generate_ids = model.generate(
        **inputs,
        max_new_tokens=50,
        do_sample=False,
        suppress_tokens=None,
        use_cache=True,
        temperature=0.6,
        top_k=None,
        top_p=0.9,
    )[0]
    
    # Trim off the prompt
    generate_ids = generate_ids[inputs['input_ids'].shape[1]:]
    
    # Decode the caption
    caption = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
    caption = caption.strip()
    print(caption)


'''import os
import re
from typing import List, Optional, Union
from qwen_agent import Agent, MultiAgentHub
from qwen_agent.gui.gradio_utils import format_cover_html
from qwen_agent.gui.utils import convert_fncall_to_text, convert_history_to_chatbot, get_avatar_image
from qwen_agent.llm.schema import CONTENT, IMAGE, FILE, NAME, USER, Message
from qwen_agent.log import logger
from qwen_agent.utils.utils import print_traceback

class WebUI:
    """Chatbot application for managing multiple agents."""

    def __init__(self, agent: Union[Agent, MultiAgentHub, List[Agent]], chatbot_config: Optional[dict] = None):
        """
        Initialize the chatbot application with one or more agents and configuration options.
        """
        chatbot_config = chatbot_config or {}
        self.agent_list = [agent] if not isinstance(agent, list) else agent
        self.agent_hub = None if isinstance(agent, list) else agent

        self.user_config = {
            'name': chatbot_config.get('user.name', 'user'),
            'avatar': chatbot_config.get('user.avatar', get_avatar_image(chatbot_config.get('user.name', 'user')))
        }

        self.agent_config_list = [{
            'name': agent.name,
            'avatar': chatbot_config.get('agent.avatar', os.path.join(os.path.dirname(__file__), 'assets/logo.jpeg')),
            'description': agent.description or "I'm a helpful assistant."
        } for agent in self.agent_list]

        self.input_placeholder = chatbot_config.get('input.placeholder', 'Chat with me!')
        self.prompt_suggestions = chatbot_config.get('prompt.suggestions', [])
        self.verbose = chatbot_config.get('verbose', False)

    def run(self, messages: List[Message] = None, share: bool = False, server_name: str = None, 
            server_port: int = None, concurrency_limit: int = 80, enable_mention: bool = False, **kwargs):

        from qwen_agent.gui.gradio import gr, mgr

        custom_theme = gr.themes.Default(primary_hue=gr.themes.utils.colors.blue, radius_size=gr.themes.utils.sizes.radius_none)

        with gr.Blocks(css=os.path.join(os.path.dirname(__file__), 'assets/appBot.css'), theme=custom_theme) as demo:
            history = gr.State([])

            with gr.Row(elem_classes='container'):
                with gr.Column(scale=4):
                    chatbot = mgr.Chatbot(
                        value=convert_history_to_chatbot(messages=messages),
                        avatar_images=[self.user_config, self.agent_config_list],
                        height=850, avatar_image_width=80, flushing=False, show_copy_button=True,
                        latex_delimiters=[{'left': '\\(', 'right': '\\)', 'display': True}, {'left': '\\begin{equation}', 'right': '\\end{equation}', 'display': True}]
                    )

                    input_box = mgr.MultimodalInput(placeholder=self.input_placeholder, upload_button_props=dict(visible=True))

                with gr.Column(scale=1):
                    agent_selector = gr.Dropdown(
                        [(agent.name, i) for i, agent in enumerate(self.agent_list)],
                        label='Agents', value=0, interactive=True) if len(self.agent_list) > 1 else None

                    agent_info_block = self._create_agent_info_block()

                    if self.prompt_suggestions:
                        gr.Examples(label='Suggested Conversations', examples=self.prompt_suggestions, inputs=[input_box])

                if agent_selector:
                    agent_selector.change(fn=self.change_agent, inputs=[agent_selector], outputs=[agent_selector, agent_info_block])

                input_promise = input_box.submit(fn=self.add_text, inputs=[input_box, chatbot, history], outputs=[input_box, chatbot, history])

                if len(self.agent_list) > 1 and enable_mention:
                    input_promise = input_promise.then(self.add_mention, [chatbot, agent_selector], [chatbot, agent_selector]) \
                        .then(self.agent_run, [chatbot, history, agent_selector], [chatbot, history, agent_selector])
                else:
                    input_promise = input_promise.then(self.agent_run, [chatbot, history], [chatbot, history])

                input_promise.then(self.flushed, None, [input_box])

            demo.load(None)

        demo.queue(default_concurrency_limit=concurrency_limit).launch(share=share, server_name=server_name, server_port=server_port)

    def change_agent(self, agent_selector):
        yield agent_selector, self._create_agent_info_block(agent_selector), self._create_agent_plugins_block(agent_selector)

    def add_text(self, _input, _chatbot, _history):
        from qwen_agent.gui.gradio import gr
        if _input.text == "/clear":
            _chatbot.clear()
            _history.clear()
            yield gr.update(interactive=False, value=""), _chatbot, _history
            return

        if _history:
            gr.Warning("Only the most recent query is retained.", duration=5)
            _chatbot.clear()
            _history.clear()

        _history.append({ROLE: USER, CONTENT: [{'text': _input.text}]})
        if self.user_config[NAME]:
            _history[-1][NAME] = self.user_config[NAME]

        if _input.files:
            for file in _input.files:
                if file.mime_type.startswith('image/'):
                    _history[-1][CONTENT].append({IMAGE: f'file://{file.path}'})
                else:
                    _history[-1][CONTENT].append({FILE: file.path})

        _chatbot.append([_input, None])
        yield gr.update(interactive=False, value=None), _chatbot, _history

    def add_mention(self, _chatbot, _agent_selector):
        query = _chatbot[-1][0].text
        match = re.search(r'@\w+\b', query)
        if match:
            _agent_selector = self._get_agent_index_by_name(match.group()[1:])
        agent_name = self.agent_list[_agent_selector].name
        if ('@' + agent_name) not in query and self.agent_hub is None:
            _chatbot[-1][0].text = f'@{agent_name} ' + query

        yield _chatbot, _agent_selector

    def agent_run(self, _chatbot, _history, _agent_selector=None):
        if not _history:
            yield _chatbot, _history, _agent_selector if _agent_selector else _chatbot, _history
            return

        if self.verbose:
            logger.info(f'agent_run input:\n{_history}')

        agent_runner = self.agent_list[_agent_selector or 0] if self.agent_hub is None else self.agent_hub
        responses = agent_runner.run(_history, **self.run_kwargs)
        
        for response in responses:
            if response[CONTENT] == 'PENDING_USER_INPUT':
                logger.info('Waiting for user input!')
                break
            display_responses = convert_fncall_to_text(response)
            if not display_responses or display_responses[-1][CONTENT] is None:
                continue

            _chatbot.append([None, None] * (len(display_responses) - len(_chatbot)))
            for i, rsp in enumerate(display_responses):
                _chatbot[-1][1][self._get_agent_index_by_name(rsp[NAME])] = rsp[CONTENT]

        if self.verbose:
            logger.info(f'agent_run response:\n{responses}')

        yield _chatbot, _history, _agent_selector if _agent_selector else _chatbot, _history

    def flushed(self):
        from qwen_agent.gui.gradio import gr
        return gr.update(interactive=True)

    def _get_agent_index_by_name(self, agent_name):
        try:
            return next(i for i, agent in enumerate(self.agent_list) if agent.name.strip() == agent_name.strip())
        except StopIteration:
            print_traceback()
            return 0

    def _create_agent_info_block(self, agent_index=0):
        from qwen_agent.gui.gradio import gr
        agent_config = self.agent_config_list[agent_index]
        return gr.HTML(format_cover_html(bot_name=agent_config['name'], bot_description=agent_config['description'], bot_avatar=agent_config['avatar']))

    def _create_agent_plugins_block(self, agent_index=0):
        from qwen_agent.gui.gradio import gr
        agent = self.agent_list[agent_index]
        capabilities = list(agent.function_map.keys()) if agent.function_map else []
        return gr.CheckboxGroup(label='Plugins', value=capabilities, choices=capabilities, interactive=False)'''