Spaces:

amd
/

llama4-maverick-17b-128e-mi-amd

Running

App Files Files Community

Lohia, Aditya commited on 19 days ago

Commit

4b91514

1 Parent(s): 1ccd3bb

all files

Browse files

Files changed (17) hide show

.gitattributes +1 -0
app.py +364 -0
assets/sample-images/01.png +3 -0
assets/sample-images/02.png +3 -0
assets/sample-images/03.png +3 -0
assets/sample-images/04.png +3 -0
assets/sample-images/06-1.png +3 -0
assets/sample-images/08.png +3 -0
assets/sample-images/1.png +3 -0
assets/sample-images/10.png +3 -0
assets/sample-images/2.png +3 -0
assets/sample-images/3.png +3 -0
assets/sample-images/4.png +3 -0
assets/sample-images/barchart.png +3 -0
gateway.py +105 -0
requirements.txt +4 -0
style.css +10 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,364 @@

+import os
+import logging
+import re
+import gradio as gr
+import base64
+import io
+import json
+from PIL import Image
+from typing import Iterator
+from gateway import request_generation
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+# CONSTANTS
+# Get max new tokens from environment variable, if it is not set, default to 2048
+MAX_NEW_TOKENS: int = os.getenv("MAX_NEW_TOKENS", 2048)
+# Get max number of images to be passed in the prompt
+MAX_NUM_IMAGES: int = os.getenv("MAX_NUM_IMAGES")
+if not MAX_NUM_IMAGES:
+    raise EnvironmentError("MAX_NUM_IMAGES is not set. Please set it to 1 or more.")
+# Validate environment variables
+CLOUD_GATEWAY_API = os.getenv("API_ENDPOINT")
+if not CLOUD_GATEWAY_API:
+    raise EnvironmentError("API_ENDPOINT is not set.")
+MODEL_NAME: str = os.getenv("MODEL_NAME")
+if not MODEL_NAME:
+    raise EnvironmentError("MODEL_NAME is not set.")
+# Get API Key
+API_KEY = os.getenv("API_KEY")
+if not API_KEY:  # simple check to validate API Key
+    raise Exception("API Key not valid.")
+# Create a header, avoid declaring multiple times
+HEADER = {"x-api-key": f"{API_KEY}"}
+def validate_media(message: str, chat_history: list = None) -> bool:
+    """Validate the number of image files in the new message.
+    Args:
+        message (str): input message from the user
+        chat_history (list[tuple[str, str]]): entire chat history of the session
+    Returns:
+        bool: True if the number of image files is less than or equal to MAX_NUM_IMAGES, False otherwise
+    """
+    image_count = sum(1 for path in message["files"])
+    if image_count > MAX_NUM_IMAGES:
+        gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images at a time.")
+        return False
+    # If there are files, check if they are images
+    if not all(
+        file.lower().endswith((".png", ".jpg", ".jpeg")) for file in message["files"]
+    ):
+        gr.Warning("Only images are allowed. Format available: PNG, JPG, JPEG")
+    return True
+def encode_pil_to_base64(pil_image: Image.Image, format: str) -> str:
+    """Encode a PIL image to base64 string.
+    Args:
+        pil_image (Image.Image): PIL image object
+        format (str): format to save the image, defaults to JPEG
+    Returns:
+        str: base64 encoded string of the image
+    """
+    buffered = io.BytesIO()
+    # Handle potential transparency issues for JPEG or JPG
+    if format == "JPEG" and pil_image.mode in ("RGBA", "LA", "P"):
+        # Convert to RGB
+        pil_image = pil_image.convert("RGB")
+    # Define save arguments, including quality for JPEG
+    save_kwargs = {"format": format}
+    if format == "JPEG":
+        save_kwargs["quality"] = 85  # Adjust quality as needed (0-100)
+    try:
+        pil_image.save(buffered, **save_kwargs)
+    except Exception as e:
+        print(f"Error saving image to buffer with format {format}: {e}")
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    # Determine the MIME type based on the format
+    mime_format_part = format.lower()
+    if mime_format_part == "jpeg":
+        mime_type = "image/jpeg"
+    elif mime_format_part == "png":
+        mime_type = "image/png"
+    else:
+        gr.Error(f"Unsupported image format: {format}")
+        return None
+    return f"data:{mime_type};base64,{img_str}"
+def process_images(message: list) -> list[dict]:
+    """Process images in the message.
+    Args:
+        message (list): message list containing text and files
+    Returns:
+        list[dict]: list of dictionaries containing text and image content
+    """
+    content = []
+    # Iterate through the files in the message
+    for path in message:
+        pil_image = Image.open(path)
+        # Get the image format
+        image_format = pil_image.format.upper()
+        if image_format == "JPG":
+            image_format = "JPEG"
+        if image_format in ["JPEG", "PNG"]:
+            # Converting image to base64
+            base64_image_data = encode_pil_to_base64(pil_image, format=image_format)
+            content.append(
+                {"type": "image_url", "image_url": {"url": base64_image_data}}
+            )
+    return content
+def process_new_user_message(message: dict) -> list[dict]:
+    """Process the new user message and return a list of dictionaries containing text and image content.
+    Args:
+        message (dict): message dictionary containing text and files
+    Returns:
+        list[dict]: list of dictionaries containing text and image content
+    """
+    # Create the content list messages
+    messages = []
+    if message["text"]:
+        # Append the text part to the content list
+        messages.append({"type": "text", "text": message["text"]})
+        if not message["files"]:
+            # If there are no files, return the text part only
+            return messages
+        else:
+            # If there are files, process the images
+            image_content = process_images(message["files"])
+            # Append the image content to the messages list
+            messages.extend(image_content)
+            return messages
+    else:
+        # If there are no text parts, throw a gr.Warning to insert prompt and return nothing
+        gr.Warning("Please insert a prompt.")
+        return []
+def run(
+    message: str,
+    chat_history: list,
+    system_prompt: str,
+    max_new_tokens: int = 1024,
+    temperature: float = 0.6,
+    frequency_penalty: float = 0.0,
+    presence_penalty: float = 0.0,
+) -> Iterator[str]:
+    """Send a request to backend, fetch the streaming responses and emit to the UI.
+    Args:
+        message (str): input message from the user
+        chat_history (list[tuple[str, str]]): entire chat history of the session
+        system_prompt (str): system prompt
+        max_new_tokens (int, optional): maximum number of tokens to generate, ignoring the number of tokens in the
+                                        prompt. Defaults to 1024.
+        temperature (float, optional): the value used to module the next token probabilities. Defaults to 0.6.
+        top_p (float, optional): if set to float<1, only the smallest set of most probable tokens with probabilities
+                                    that add up to top_p or higher are kept for generation. Defaults to 0.9.
+        top_k (int, optional): the number of highest probability vocabulary tokens to keep for top-k-filtering.
+                                Defaults to 50.
+        repetition_penalty (float, optional): the parameter for repetition penalty. 1.0 means no penalty.
+                                Defaults to 1.2.
+    Yields:
+        Iterator[str]: Streaming responses to the UI
+    """
+    if not validate_media(message):
+        # If the number of image files is not valid, return an empty string
+        yield ""
+        return
+    messages = []
+    if system_prompt:
+        messages.append(
+            {"role": "system", "content": [{"type": "text", "text": system_prompt}]}
+        )
+    # Append the new user message if it returns anything other than empty string
+    content = process_new_user_message(message)
+    if content:
+        # Append the new user message to the messages list
+        messages.append({"role": "user", "content": content})
+    else:
+        # If the content is empty, return an empty string
+        yield ""
+        return
+    # sample method to yield responses from the llm model
+    outputs = []
+    for text in request_generation(
+        header=HEADER,
+        messages=messages,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
+        presence_penalty=presence_penalty,
+        frequency_penalty=frequency_penalty,
+        cloud_gateway_api=CLOUD_GATEWAY_API,
+        model_name=MODEL_NAME,
+    ):
+        outputs.append(text)
+        yield "".join(outputs)
+examples = [
+    ["Plan a three-day trip to Washington DC for Cherry Blossom Festival."],
+    ["How many hours does it take a man to eat a Helicopter?"],
+    [
+        {
+            "text": "Write the matplotlib code to generate the same bar chart.",
+            "files": ["assets/sample-images/barchart.png"],
+        }
+    ],
+    [
+        {
+            "text": "Describe the atmosphere of the scene.",
+            "files": ["assets/sample-images/06-1.png"],
+        }
+    ],
+    [
+        {
+            "text": "Write a short story about what might have happened in this house.",
+            "files": ["assets/sample-images/08.png"],
+        }
+    ],
+    [
+        {
+            "text": "Describe the creatures that would live in this world.",
+            "files": ["assets/sample-images/10.png"],
+        }
+    ],
+    [
+        {
+            "text": "Read text in the image.",
+            "files": ["assets/sample-images/1.png"],
+        }
+    ],
+    [
+        {
+            "text": "When is this ticket dated and how much did it cost?",
+            "files": ["assets/sample-images/2.png"],
+        }
+    ],
+    [
+        {
+            "text": "Read the text in the image into markdown.",
+            "files": ["assets/sample-images/3.png"],
+        }
+    ],
+    [
+        {
+            "text": "Evaluate this integral.",
+            "files": ["assets/sample-images/4.png"],
+        }
+    ],
+    [
+        {
+            "text": "Caption this image",
+            "files": ["assets/sample-images/01.png"],
+        }
+    ],
+    [
+        {
+            "text": "What's the sign says?",
+            "files": ["assets/sample-images/02.png"],
+        }
+    ],
+    [
+        {
+            "text": "Compare and contrast the two images.",
+            "files": ["assets/sample-images/03.png"],
+        }
+    ],
+    [
+        {
+            "text": "List all the objects in the image and their colors.",
+            "files": ["assets/sample-images/04.png"],
+        }
+    ],
+]
+demo = gr.ChatInterface(
+    fn=run,
+    type="messages",
+    chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
+    textbox=gr.MultimodalTextbox(
+        file_types=["image"],
+        file_count="single" if MAX_NUM_IMAGES == 1 else "multiple",
+        autofocus=True,
+    ),
+    multimodal=True,
+    additional_inputs=[
+        gr.Textbox(
+            label="System prompt",
+            # value="You are a highly capable AI assistant. Provide accurate, concise, and fact-based responses that are directly relevant to the user's query. Avoid speculation, ensure logical consistency, and maintain clarity in longer outputs.",
+            value="",
+            lines=3,
+        ),
+        gr.Slider(
+            label="Max New Tokens",
+            minimum=1,
+            maximum=MAX_NEW_TOKENS,
+            step=1,
+            value=2048,
+        ),
+        gr.Slider(
+            label="Temperature",
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=0.3,
+        ),
+        gr.Slider(
+            label="Frequency penalty",
+            minimum=-2.0,
+            maximum=2.0,
+            step=0.1,
+            value=0.0,
+        ),
+        gr.Slider(
+            label="Presence penalty",
+            minimum=-2.0,
+            maximum=2.0,
+            step=0.1,
+            value=0.0,
+        ),
+    ],
+    stop_btn=False,
+    title="Llama-4 Scout Instruct",
+    description="This Space is an Alpha release that demonstrates [Llama-4-Scout](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E) model running on AMD MI300 infrastructure. The space is built with Meta Llama 4 [License](https://www.llama.com/llama4/license/). Feel free to play with it!",
+    fill_height=True,
+    run_examples_on_click=False,
+    examples=examples,
+    cache_examples=False,
+)
+if __name__ == "__main__":
+    demo.queue(
+        max_size=int(os.getenv("QUEUE")),
+        default_concurrency_limit=int(os.getenv("CONCURRENCY_LIMIT")),
+    ).launch()

assets/sample-images/01.png ADDED Viewed

Git LFS Details

SHA256: 0b849f4cc108d58d0de9ec4707426ed1fe8fe276d90f72d56feed624e830c2b5
Pointer size: 132 Bytes
Size of remote file: 2.1 MB

assets/sample-images/02.png ADDED Viewed

Git LFS Details

SHA256: 983d5cd47f4a88924c8619c5a9ecbaa374e766847627d30ba1d4dc9e3c556255
Pointer size: 131 Bytes
Size of remote file: 621 kB

assets/sample-images/03.png ADDED Viewed

Git LFS Details

SHA256: ba2380dc16996f688760dd9f62ecfbc8b6abcb785cf667ede031c6c843cf8cfd
Pointer size: 132 Bytes
Size of remote file: 2.52 MB

assets/sample-images/04.png ADDED Viewed

Git LFS Details

SHA256: 8c096c6ac54439e68a5e77ed2991989970f5e522bca36136f438391d313b02d0
Pointer size: 132 Bytes
Size of remote file: 2.5 MB

assets/sample-images/06-1.png ADDED Viewed

Git LFS Details

SHA256: 4756c1c49975e926390b87c08b9672a51b09bc7c41c33f20a9fd82f999c26ac4
Pointer size: 132 Bytes
Size of remote file: 2.4 MB

assets/sample-images/08.png ADDED Viewed

Git LFS Details

SHA256: 637f90b5941cb8eb3cc1eff20092bdb07d0fcfbd6e5bf881b3951d421089594e
Pointer size: 132 Bytes
Size of remote file: 2.63 MB

assets/sample-images/1.png ADDED Viewed

Git LFS Details

SHA256: b498566d553d7f24d2a08fe65a7c52acca801fde4f155f8a3ba0b91e924044e9
Pointer size: 133 Bytes
Size of remote file: 12.4 MB

assets/sample-images/10.png ADDED Viewed

Git LFS Details

SHA256: e9ea637b8a7d50696ea85e608d6d9fe485958cf7aad52504ac27798c7e8b3d8f
Pointer size: 132 Bytes
Size of remote file: 2.7 MB

assets/sample-images/2.png ADDED Viewed

Git LFS Details

SHA256: df8a2ae6d36b0bda173b290123dc92385c7b60dbe63157f0a94dc865f2f79dd8
Pointer size: 132 Bytes
Size of remote file: 5.72 MB

assets/sample-images/3.png ADDED Viewed

Git LFS Details

SHA256: 03b341cb6773365b852f9614e4493aefe14e208066f507499598ce498e02c0b2
Pointer size: 133 Bytes
Size of remote file: 20.5 MB

assets/sample-images/4.png ADDED Viewed

Git LFS Details

SHA256: f40af3f85ab1524d5604f186ebbf102905c1b14ae631b82a092ec397f54eae7a
Pointer size: 129 Bytes
Size of remote file: 6.05 kB

assets/sample-images/barchart.png ADDED Viewed

Git LFS Details

SHA256: 4c83100dc7880913cecc96efcf7557c0f748c3b9d49e6f219e06f90e0a448847
Pointer size: 130 Bytes
Size of remote file: 74.2 kB

gateway.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import json
+import logging
+import requests
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+def request_generation(
+    header: dict,
+    messages: dict,
+    cloud_gateway_api: str,
+    model_name: str,
+    max_new_tokens: int = 1024,
+    temperature: float = 0.3,
+    frequency_penalty: float = 0.0,
+    presence_penalty: float = 0.0,
+):
+    """
+    Request streaming generation from the cloud gateway API. Uses the simple requests module with stream=True to utilize
+    token-by-token generation from LLM.
+    Args:
+        header: authorization header for the API.
+        message: prompt from the user.
+        system_prompt: system prompt to append.
+        cloud_gateway_api (str): API endpoint to send the request.
+        max_new_tokens: maximum number of tokens to generate, ignoring the number of tokens in the prompt.
+        temperature: the value used to module the next token probabilities.
+        top_p: if set to float<1, only the smallest set of most probable tokens with probabilities that add up to top_p
+                or higher are kept for generation.
+        repetition_penalty: the parameter for repetition penalty. 1.0 means no penalty.
+    Returns:
+    """
+    payload = {
+        "model": model_name,
+        "messages": messages,
+        "max_tokens": max_new_tokens,
+        "temperature": temperature,
+        "frequency_penalty": frequency_penalty,
+        "presence_penalty": presence_penalty,
+        "stream": True,  # Enable streaming
+        "serving_runtime": "vllm",
+    }
+    try:
+        response = requests.post(
+            cloud_gateway_api + "chat/conversation",
+            headers=header,
+            json=payload,
+            verify=False,
+        )
+        print(response.text)
+        response.raise_for_status()
+        # Append the conversation ID with the key X-Conversation-ID to the header
+        header["X-Conversation-ID"] = response.json()["conversationId"]
+        with requests.get(
+            cloud_gateway_api + f"conversation/stream",
+            headers=header,
+            verify=False,
+            stream=True,
+        ) as response:
+            for chunk in response.iter_lines():
+                if chunk:
+                    # Convert the chunk from bytes to a string and then parse it as json
+                    chunk_str = chunk.decode("utf-8")
+                    # Remove the `data: ` prefix from the chunk if it exists
+                    for _ in range(2):
+                        if chunk_str.startswith("data: "):
+                            chunk_str = chunk_str[len("data: ") :]
+                    # Skip empty chunks
+                    if chunk_str.strip() == "[DONE]":
+                        break
+                    # Parse the chunk into a JSON object
+                    try:
+                        chunk_json = json.loads(chunk_str)
+                        # Extract the "content" field from the choices
+                        if "choices" in chunk_json and chunk_json["choices"]:
+                            content = chunk_json["choices"][0]["delta"].get(
+                                "content", ""
+                            )
+                        else:
+                            content = ""
+                        # Print the generated content as it's streamed
+                        if content:
+                            yield content
+                    except json.JSONDecodeError:
+                        # Handle any potential errors in decoding
+                        continue
+    except requests.RequestException as e:
+        logging.error(f"Failed to generate response: {e}")
+        yield "Server not responding. Please try again later."

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+numpy
+pillow
+fastapi
+websockets

style.css ADDED Viewed

	@@ -0,0 +1,10 @@

+h1 {
+    text-align: center;
+    display: block;
+}
+.contain {
+    max-width: 900px;
+    margin: auto;
+    padding-top: 1.5rem;
+}