CrispChat

Sleeping

App Files Files Community

cstr commited on about 1 month ago

Commit

9dba8e1

verified ·

1 Parent(s): e0621f5

Update app.py

Browse files

Files changed (1) hide show

app.py +564 -150

app.py CHANGED Viewed

@@ -1,94 +1,142 @@
 import os
-import gradio as gr
-import requests
 import json
 import base64
-from PIL import Image
-import io
-import logging
-import PyPDF2
-import markdown
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 # API key
 OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
-# Model list with context sizes - organized by category
 MODELS = [
     # Vision Models
-    {"category": "Vision", "models": [
         ("Meta: Llama 3.2 11B Vision Instruct", "meta-llama/llama-3.2-11b-vision-instruct:free", 131072),
-        ("Qwen2.5 VL 72B Instruct", "qwen/qwen2.5-vl-72b-instruct:free", 131072),
-        ("Qwen2.5 VL 32B Instruct", "qwen/qwen2.5-vl-32b-instruct:free", 8192),
-        ("Qwen2.5 VL 7B Instruct", "qwen/qwen-2.5-vl-7b-instruct:free", 64000),
-        ("Qwen2.5 VL 3B Instruct", "qwen/qwen2.5-vl-3b-instruct:free", 64000),
     ]},
-    # Gemini Models
-    {"category": "Gemini", "models": [
-        ("Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
-        ("Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
-        ("Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp:free", 1048576),
-        ("Gemini Flash 2.0 Experimental", "google/gemini-2.0-flash-exp:free", 1048576),
-        ("Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 1000000),
-        ("LearnLM 1.5 Pro Experimental", "google/learnlm-1.5-pro-experimental:free", 40960),
     ]},
-    # Llama Models
-    {"category": "Llama", "models": [
-        ("Llama 3.3 70B Instruct", "meta-llama/llama-3.3-70b-instruct:free", 8000),
-        ("Llama 3.2 3B Instruct", "meta-llama/llama-3.2-3b-instruct:free", 20000),
-        ("Llama 3.2 1B Instruct", "meta-llama/llama-3.2-1b-instruct:free", 131072),
-        ("Llama 3.1 8B Instruct", "meta-llama/llama-3.1-8b-instruct:free", 131072),
-        ("Llama 3 8B Instruct", "meta-llama/llama-3-8b-instruct:free", 8192),
-        ("Llama 3.1 Nemotron 70B Instruct", "nvidia/llama-3.1-nemotron-70b-instruct:free", 131072),
     ]},
-    # DeepSeek Models
-    {"category": "DeepSeek", "models": [
-        ("DeepSeek R1 Zero", "deepseek/deepseek-r1-zero:free", 163840),
-        ("DeepSeek R1", "deepseek/deepseek-r1:free", 163840),
-        ("DeepSeek V3 Base", "deepseek/deepseek-v3-base:free", 131072),
-        ("DeepSeek V3 0324", "deepseek/deepseek-v3-0324:free", 131072),
-        ("DeepSeek V3", "deepseek/deepseek-chat:free", 131072),
-        ("DeepSeek R1 Distill Qwen 14B", "deepseek/deepseek-r1-distill-qwen-14b:free", 64000),
-        ("DeepSeek R1 Distill Qwen 32B", "deepseek/deepseek-r1-distill-qwen-32b:free", 16000),
-        ("DeepSeek R1 Distill Llama 70B", "deepseek/deepseek-r1-distill-llama-70b:free", 8192),
     ]},
-    # Other Popular Models
-    {"category": "Other Popular Models", "models": [
-        ("Mistral Nemo", "mistralai/mistral-nemo:free", 128000),
-        ("Mistral Small 3.1 24B", "mistralai/mistral-small-3.1-24b-instruct:free", 96000),
-        ("Gemma 3 27B", "google/gemma-3-27b-it:free", 96000),
-        ("Gemma 3 12B", "google/gemma-3-12b-it:free", 131072),
-        ("Gemma 3 4B", "google/gemma-3-4b-it:free", 131072),
-        ("DeepHermes 3 Llama 3 8B Preview", "nousresearch/deephermes-3-llama-3-8b-preview:free", 131072),
-        ("Qwen2.5 72B Instruct", "qwen/qwen-2.5-72b-instruct:free", 32768),
     ]},
-    # Smaller Models (<50B params)
-    {"category": "Smaller Models", "models": [
-        ("Gemma 3 1B", "google/gemma-3-1b-it:free", 32768),
-        ("Gemma 2 9B", "google/gemma-2-9b-it:free", 8192),
-        ("Mistral 7B Instruct", "mistralai/mistral-7b-instruct:free", 8192),
-        ("Qwen 2 7B Instruct", "qwen/qwen-2-7b-instruct:free", 8192),
-        ("Phi-3 Mini 128K Instruct", "microsoft/phi-3-mini-128k-instruct:free", 8192),
-        ("Phi-3 Medium 128K Instruct", "microsoft/phi-3-medium-128k-instruct:free", 8192),
-        ("OpenChat 3.5 7B", "openchat/openchat-7b:free", 8192),
-        ("Zephyr 7B", "huggingfaceh4/zephyr-7b-beta:free", 4096),
-        ("MythoMax 13B", "gryphe/mythomax-l2-13b:free", 4096),
     ]},
 ]
 # Flatten model list for easy searching
 ALL_MODELS = []
 for category in MODELS:
-    for model in category["models"]:
-        ALL_MODELS.append(model)
 def format_to_message_dict(history):
     """Convert history to proper message format"""
@@ -103,44 +151,72 @@ def format_to_message_dict(history):
     return messages
 def encode_image_to_base64(image_path):
-    """Encode an image file to base64 string"""
     try:
         if isinstance(image_path, str):  # File path as string
             with open(image_path, "rb") as image_file:
                 encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
                 file_extension = image_path.split('.')[-1].lower()
                 mime_type = f"image/{file_extension}"
-                if file_extension == "jpg" or file_extension == "jpeg":
                     mime_type = "image/jpeg"
                 return f"data:{mime_type};base64,{encoded_string}"
-        else:  # Pillow Image or file-like object
-            buffered = io.BytesIO()
-            image_path.save(buffered, format="PNG")
             encoded_string = base64.b64encode(buffered.getvalue()).decode('utf-8')
             return f"data:image/png;base64,{encoded_string}"
     except Exception as e:
         logger.error(f"Error encoding image: {str(e)}")
         return None
 def extract_text_from_file(file_path):
-    """Extract text from various file types"""
     try:
         file_extension = file_path.split('.')[-1].lower()
         if file_extension == 'pdf':
-            text = ""
-            with open(file_path, 'rb') as file:
-                pdf_reader = PyPDF2.PdfReader(file)
-                for page_num in range(len(pdf_reader.pages)):
-                    page = pdf_reader.pages[page_num]
-                    text += page.extract_text() + "\n\n"
-            return text
         elif file_extension == 'md':
-            with open(file_path, 'r', encoding='utf-8') as file:
-                md_text = file.read()
-                # You can convert markdown to plain text if needed
-                return md_text
         elif file_extension == 'txt':
             with open(file_path, 'r', encoding='utf-8') as file:
@@ -184,7 +260,7 @@ def prepare_message_with_media(text, images=None, documents=None):
             return text
     # If we have images, create a multimodal content array
-    content = [{"type": "text", "text": text}]
     # Add images if any
     if images:
@@ -203,10 +279,14 @@ def prepare_message_with_media(text, images=None, documents=None):
 def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p, frequency_penalty,
            presence_penalty, images, documents, reasoning_effort):
-    """Enhanced AI query function with comprehensive options"""
     if not message.strip() and not images and not documents:
         return chatbot, ""
     # Get model ID and context size
     model_id = None
     context_size = 0
@@ -239,11 +319,18 @@ def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p, frequ
             "messages": messages,
             "temperature": temperature,
             "max_tokens": max_tokens,
-            "top_p": top_p,
-            "frequency_penalty": frequency_penalty,
-            "presence_penalty": presence_penalty
         }
         # Add reasoning if selected
         if reasoning_effort != "none":
             payload["reasoning"] = {
@@ -266,7 +353,7 @@ def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p, frequ
         logger.info(f"Response status: {response.status_code}")
         response_text = response.text
-        logger.info(f"Response body: {response_text}")
         if response.status_code == 200:
             result = response.json()
@@ -288,6 +375,29 @@ def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p, frequ
 def clear_chat():
     return [], "", [], [], 0.7, 1000, 0.8, 0.0, 0.0, "none"
 def filter_models(search_term):
     """Filter models based on search term"""
     if not search_term:
@@ -316,6 +426,19 @@ def update_context_display(model_name):
         return f"{context_formatted} tokens"
     return "Unknown"
 # Create enhanced interface
 with gr.Blocks(css="""
     .context-size {
@@ -335,9 +458,9 @@ with gr.Blocks(css="""
     }
 """) as demo:
     gr.Markdown("""
-    # Enhanced AI Chat
-    Chat with various AI models from OpenRouter with support for images and documents.
     """)
     with gr.Row():
@@ -365,7 +488,7 @@ with gr.Blocks(css="""
             with gr.Row():
                 # Image upload
-                with gr.Accordion("Upload Images (for vision models)", open=False):
                     images = gr.Gallery(
                         label="Uploaded Images",
                         show_label=True,
@@ -424,6 +547,15 @@ with gr.Blocks(css="""
                         [model[0] for model in MODELS[0]["models"]],
                         label="Models in Category"
                     )
             with gr.Accordion("Generation Parameters", open=False):
                 with gr.Group(elem_classes="parameter-grid"):
@@ -472,81 +604,363 @@ with gr.Blocks(css="""
                         value="none",
                         label="Reasoning Effort"
                     )
-    # Connect model search to dropdown filter
-    model_search.change(
-        fn=filter_models,
-        inputs=[model_search],
-        outputs=[model_choice]
-    )
-    # Update context display when model changes
-    model_choice.change(
-        fn=update_context_display,
-        inputs=[model_choice],
-        outputs=[context_display]
-    )
-    # Update model list when category changes
-    def update_category_models(category):
-        for cat in MODELS:
-            if cat["category"] == category:
-                return gr.Radio.update(choices=[model[0] for model in cat["models"]], value=cat["models"][0][0])
-        return gr.Radio.update(choices=[], value=None)
-    model_categories.change(
-        fn=update_category_models,
-        inputs=[model_categories],
-        outputs=[category_models]
-    )
-    # Update main model choice when category model is selected
-    category_models.change(
-        fn=lambda x: x,
-        inputs=[category_models],
-        outputs=[model_choice]
-    )
-    # Process uploaded images
-    def process_uploaded_images(files):
-        return [file.name for file in files]
-    image_upload_btn.upload(
-        fn=process_uploaded_images,
-        inputs=[image_upload_btn],
-        outputs=[images]
-    )
-    # Set up events
-    submit_btn.click(
-        fn=ask_ai,
-        inputs=[
-            message, chatbot, model_choice, temperature, max_tokens,
-            top_p, frequency_penalty, presence_penalty, images,
-            documents, reasoning_effort
-        ],
-        outputs=[chatbot, message]
-    )
-    message.submit(
-        fn=ask_ai,
-        inputs=[
-            message, chatbot, model_choice, temperature, max_tokens,
-            top_p, frequency_penalty, presence_penalty, images,
-            documents, reasoning_effort
-        ],
-        outputs=[chatbot, message]
     )
-    clear_btn.click(
-        fn=clear_chat,
-        inputs=[],
-        outputs=[
-            chatbot, message, images, documents, temperature,
-            max_tokens, top_p, frequency_penalty, presence_penalty, reasoning_effort
-        ]
     )
 # Launch directly with Gradio's built-in server
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
+import logging
 import json
 import base64
+from io import BytesIO
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
+# Graceful imports with fallbacks
+try:
+    import gradio as gr
+except ImportError:
+    logger.error("Gradio not found. Please install with 'pip install gradio'")
+    raise
+try:
+    import requests
+except ImportError:
+    logger.error("Requests not found. Please install with 'pip install requests'")
+    raise
+# Optional libraries with fallbacks
+try:
+    from PIL import Image
+    PIL_AVAILABLE = True
+except ImportError:
+    logger.warning("PIL not found. Image processing functionality will be limited.")
+    PIL_AVAILABLE = False
+# PDF processing
+PDF_AVAILABLE = False
+try:
+    import PyPDF2
+    PDF_AVAILABLE = True
+except ImportError:
+    logger.warning("PyPDF2 not found. Attempting to use pdfminer.six as fallback...")
+    try:
+        from pdfminer.high_level import extract_text as pdf_extract_text
+        PDF_AVAILABLE = True
+        # Create a wrapper to mimic PyPDF2 functionality
+        def extract_text_from_pdf(file_path):
+            return pdf_extract_text(file_path)
+    except ImportError:
+        logger.warning("No PDF processing libraries found. PDF support will be disabled.")
+# Markdown processing
+MD_AVAILABLE = False
+try:
+    import markdown
+    MD_AVAILABLE = True
+except ImportError:
+    logger.warning("Markdown not found. Attempting to use markdownify as fallback...")
+    try:
+        from markdownify import markdownify as md
+        MD_AVAILABLE = True
+        # Create a wrapper for markdown
+        def convert_markdown(text):
+            return md(text)
+    except ImportError:
+        logger.warning("No Markdown processing libraries found. Markdown support will be limited.")
 # API key
 OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
+# Model list with context sizes - organized by capability
 MODELS = [
     # Vision Models
+    {"category": "Vision Models", "models": [
+        ("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
+        ("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
+        ("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp:free", 1048576),
+        ("Google: Gemini Flash 2.0 Experimental", "google/gemini-2.0-flash-exp:free", 1048576),
+        ("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 1000000),
+        ("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp-1219:free", 40000),
         ("Meta: Llama 3.2 11B Vision Instruct", "meta-llama/llama-3.2-11b-vision-instruct:free", 131072),
+        ("Qwen: Qwen2.5 VL 72B Instruct", "qwen/qwen2.5-vl-72b-instruct:free", 131072),
+        ("Qwen: Qwen2.5 VL 32B Instruct", "qwen/qwen2.5-vl-32b-instruct:free", 8192),
+        ("Qwen: Qwen2.5 VL 7B Instruct", "qwen/qwen-2.5-vl-7b-instruct:free", 64000),
+        ("Qwen: Qwen2.5 VL 3B Instruct", "qwen/qwen2.5-vl-3b-instruct:free", 64000),
+        ("Bytedance: UI-TARS 72B", "bytedance-research/ui-tars-72b:free", 32768),
     ]},
+    # Largest Context Models
+    {"category": "Largest Context (500K+)", "models": [
+        ("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
+        ("Google: Gemini 2.0 Flash Thinking Experimental", "google/gemini-2.0-flash-thinking-exp:free", 1048576),
+        ("Google: Gemini Flash 2.0 Experimental", "google/gemini-2.0-flash-exp:free", 1048576),
+        ("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
+        ("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 1000000),
     ]},
+    # High-performance Models
+    {"category": "High Performance", "models": [
+        ("Google: Gemini Pro 2.0 Experimental", "google/gemini-2.0-pro-exp-02-05:free", 2000000),
+        ("Google: Gemini Pro 2.5 Experimental", "google/gemini-2.5-pro-exp-03-25:free", 1000000),
+        ("Google: Gemma 3 27B", "google/gemma-3-27b-it:free", 96000),
+        ("Mistral: Mistral Small 3.1 24B", "mistralai/mistral-small-3.1-24b-instruct:free", 96000),
+        ("Qwen: Qwen2.5 VL 72B Instruct", "qwen/qwen2.5-vl-72b-instruct:free", 131072),
     ]},
+    # Mid-size Models
+    {"category": "Mid-size Models", "models": [
+        ("Google: Gemma 3 12B", "google/gemma-3-12b-it:free", 131072),
+        ("Google: Gemma 3 4B", "google/gemma-3-4b-it:free", 131072),
+        ("Google: LearnLM 1.5 Pro Experimental", "google/learnlm-1.5-pro-experimental:free", 40960),
+        ("Meta: Llama 3.1 8B Instruct", "meta-llama/llama-3.1-8b-instruct:free", 131072),
     ]},
+    # Smaller Models
+    {"category": "Smaller Models", "models": [
+        ("Google: Gemma 3 1B", "google/gemma-3-1b-it:free", 32768),
+        ("Qwen: Qwen2.5 VL 3B Instruct", "qwen/qwen2.5-vl-3b-instruct:free", 64000),
+        ("AllenAI: Molmo 7B D", "allenai/molmo-7b-d:free", 4096),
     ]},
+    # Sorting Options
+    {"category": "Sort By", "models": [
+        ("Context: High to Low", "sort_context_desc", 0),
+        ("Context: Low to High", "sort_context_asc", 0),
+        ("Newest", "sort_newest", 0),
+        ("Throughput: High to Low", "sort_throughput", 0),
+        ("Latency: Low to High", "sort_latency", 0),
     ]},
 ]
 # Flatten model list for easy searching
 ALL_MODELS = []
 for category in MODELS:
+    if category["category"] != "Sort By":  # Skip the sorting options
+        for model in category["models"]:
+            if model not in ALL_MODELS:
+                ALL_MODELS.append(model)
+# Sort models by context size (descending) by default
+ALL_MODELS.sort(key=lambda x: x[2], reverse=True)
 def format_to_message_dict(history):
     """Convert history to proper message format"""
     return messages
 def encode_image_to_base64(image_path):
+    """Encode an image file to base64 string with fallback methods"""
     try:
         if isinstance(image_path, str):  # File path as string
             with open(image_path, "rb") as image_file:
                 encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
                 file_extension = image_path.split('.')[-1].lower()
                 mime_type = f"image/{file_extension}"
+                if file_extension in ["jpg", "jpeg"]:
                     mime_type = "image/jpeg"
+                elif file_extension == "png":
+                    mime_type = "image/png"
+                elif file_extension in ["webp", "gif"]:
+                    mime_type = f"image/{file_extension}"
+                else:
+                    mime_type = "image/jpeg"  # Default fallback
                 return f"data:{mime_type};base64,{encoded_string}"
+        elif PIL_AVAILABLE:  # Pillow Image object
+            buffered = BytesIO()
+            # Handle if it's a PIL Image or file-like object
+            try:
+                image_path.save(buffered, format="PNG")
+            except AttributeError:
+                if hasattr(image_path, 'read'):
+                    # It's a file-like object but not a PIL Image
+                    buffered.write(image_path.read())
+                else:
+                    raise
             encoded_string = base64.b64encode(buffered.getvalue()).decode('utf-8')
             return f"data:image/png;base64,{encoded_string}"
+        else:
+            logger.error("Cannot process image: PIL not available and input is not a file path")
+            return None
     except Exception as e:
         logger.error(f"Error encoding image: {str(e)}")
         return None
 def extract_text_from_file(file_path):
+    """Extract text from various file types with fallbacks"""
     try:
         file_extension = file_path.split('.')[-1].lower()
         if file_extension == 'pdf':
+            if PDF_AVAILABLE:
+                if 'PyPDF2' in globals():
+                    text = ""
+                    with open(file_path, 'rb') as file:
+                        pdf_reader = PyPDF2.PdfReader(file)
+                        for page_num in range(len(pdf_reader.pages)):
+                            page = pdf_reader.pages[page_num]
+                            text += page.extract_text() + "\n\n"
+                    return text
+                else:
+                    # Use pdfminer fallback
+                    return extract_text_from_pdf(file_path)
+            else:
+                return "PDF support not available. Please install PyPDF2 or pdfminer.six."
         elif file_extension == 'md':
+            if MD_AVAILABLE:
+                with open(file_path, 'r', encoding='utf-8') as file:
+                    md_text = file.read()
+                    return md_text
+            else:
+                # Simple fallback - just read the file
+                with open(file_path, 'r', encoding='utf-8') as file:
+                    return file.read()
         elif file_extension == 'txt':
             with open(file_path, 'r', encoding='utf-8') as file:
             return text
     # If we have images, create a multimodal content array
+    content = [{"type": "text", "text": text or "Please analyze these images:"}]
     # Add images if any
     if images:
 def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p, frequency_penalty,
            presence_penalty, images, documents, reasoning_effort):
+    """Enhanced AI query function with comprehensive options and fallbacks"""
     if not message.strip() and not images and not documents:
         return chatbot, ""
+    # Check if this is a sorting option
+    if model_choice.startswith("Sort By"):
+        return chatbot + [[message, "Please select a model to chat with first."]], ""
     # Get model ID and context size
     model_id = None
     context_size = 0
             "messages": messages,
             "temperature": temperature,
             "max_tokens": max_tokens,
         }
+        # Add optional parameters if they have non-default values
+        if top_p < 1.0:
+            payload["top_p"] = top_p
+        if frequency_penalty != 0:
+            payload["frequency_penalty"] = frequency_penalty
+        if presence_penalty != 0:
+            payload["presence_penalty"] = presence_penalty
         # Add reasoning if selected
         if reasoning_effort != "none":
             payload["reasoning"] = {
         logger.info(f"Response status: {response.status_code}")
         response_text = response.text
+        logger.debug(f"Response body: {response_text}")
         if response.status_code == 200:
             result = response.json()
 def clear_chat():
     return [], "", [], [], 0.7, 1000, 0.8, 0.0, 0.0, "none"
+def apply_sort(sort_option):
+    """Apply sorting option to models list"""
+    if sort_option == "sort_context_desc":
+        # Sort by context size (high to low)
+        sorted_models = sorted(ALL_MODELS, key=lambda x: x[2], reverse=True)
+    elif sort_option == "sort_context_asc":
+        # Sort by context size (low to high)
+        sorted_models = sorted(ALL_MODELS, key=lambda x: x[2])
+    elif sort_option == "sort_newest":
+        # This would need a proper timestamp, using a rough approximation
+        # Models with "Experimental" in the name come first as they're likely newer
+        sorted_models = sorted(ALL_MODELS, key=lambda x: "Experimental" not in x[0])
+    elif sort_option == "sort_throughput" or sort_option == "sort_latency":
+        # These would need actual performance metrics
+        # For now, use model size as a rough proxy (smaller models generally have higher throughput and lower latency)
+        # Rough heuristic: models with smaller numbers in their names might be smaller
+        sorted_models = sorted(ALL_MODELS, key=lambda x: sum(int(s) for s in x[0] if s.isdigit()))
+    else:
+        # Default to context size sorting
+        sorted_models = sorted(ALL_MODELS, key=lambda x: x[2], reverse=True)
+    return sorted_models
 def filter_models(search_term):
     """Filter models based on search term"""
     if not search_term:
         return f"{context_formatted} tokens"
     return "Unknown"
+def update_models_from_sort(sort_option):
+    """Update models list based on sorting option"""
+    for category in MODELS:
+        if category["category"] == "Sort By":
+            for option in category["models"]:
+                if option[0] == sort_option:
+                    sort_key = option[1]
+                    sorted_models = apply_sort(sort_key)
+                    return gr.Dropdown.update(choices=[model[0] for model in sorted_models], value=sorted_models[0][0])
+    # Default sorting if option not found
+    return gr.Dropdown.update(choices=[model[0] for model in ALL_MODELS], value=ALL_MODELS[0][0])
 # Create enhanced interface
 with gr.Blocks(css="""
     .context-size {
     }
 """) as demo:
     gr.Markdown("""
+    # Vision AI Chat
+    Chat with various AI vision models from OpenRouter with support for images and documents.
     """)
     with gr.Row():
             with gr.Row():
                 # Image upload
+                with gr.Accordion("Upload Images", open=False):
                     images = gr.Gallery(
                         label="Uploaded Images",
                         show_label=True,
                         [model[0] for model in MODELS[0]["models"]],
                         label="Models in Category"
                     )
+                # Sort options
+                with gr.Accordion("Sort Models", open=False):
+                    sort_options = gr.Radio(
+                        ["Context: High to Low", "Context: Low to High", "Newest",
+                         "Throughput: High to Low", "Latency: Low to High"],
+                        label="Sort By",
+                        value="Context: High to Low"
+                    )
             with gr.Accordion("Generation Parameters", open=False):
                 with gr.Group(elem_classes="parameter-grid"):
                         value="none",
                         label="Reasoning Effort"
                     )
+            with gr.Accordion("Advanced Options", open=False):
+                with gr.Row():
+                    with gr.Column():
+                        repetition_penalty = gr.Slider(
+                            minimum=0.1,
+                            maximum=2.0,
+                            value=1.0,
+                            step=0.1,
+                            label="Repetition Penalty"
+                        )
+                        top_k = gr.Slider(
+                            minimum=1,
+                            maximum=100,
+                            value=40,
+                            step=1,
+                            label="Top K"
+                        )
+                        min_p = gr.Slider(
+                            minimum=0.0,
+                            maximum=1.0,
+                            value=0.1,
+                            step=0.05,
+                            label="Min P"
+                        )
+                    with gr.Column():
+                        seed = gr.Number(
+                            value=0,
+                            label="Seed (0 for random)",
+                            precision=0
+                        )
+                        top_a = gr.Slider(
+                            minimum=0.0,
+                            maximum=1.0,
+                            value=0.0,
+                            step=0.05,
+                            label="Top A"
+                        )
+                        stream_output = gr.Checkbox(
+                            label="Stream Output",
+                            value=False
+                        )
+                with gr.Row():
+                    response_format = gr.Radio(
+                        ["default", "json_object"],
+                        value="default",
+                        label="Response Format"
+                    )
+                    gr.Markdown("""
+                    * **json_object**: Forces the model to respond with valid JSON only.
+                    * Only available on certain models - check model support on OpenRouter.
+                    """)
+            # Custom instructing options
+            with gr.Accordion("Custom Instructions", open=False):
+                system_message = gr.Textbox(
+                    placeholder="Enter a system message to guide the model's behavior...",
+                    label="System Message",
+                    lines=3
+                )
+                transforms = gr.CheckboxGroup(
+                    ["prompt_optimize", "prompt_distill", "prompt_compress"],
+                    label="Prompt Transforms (OpenRouter specific)"
+                )
+                gr.Markdown("""
+                * **prompt_optimize**: Improve prompt for better responses.
+                * **prompt_distill**: Compress prompt to use fewer tokens without changing meaning.
+                * **prompt_compress**: Aggressively compress prompt to fit larger contexts.
+                """)
+# Connect model search to dropdown filter
+model_search.change(
+    fn=filter_models,
+    inputs=[model_search],
+    outputs=[model_choice]
+)
+# Update context display when model changes
+model_choice.change(
+    fn=update_context_display,
+    inputs=[model_choice],
+    outputs=[context_display]
+)
+# Update model list when category changes
+def update_category_models(category):
+    for cat in MODELS:
+        if cat["category"] == category:
+            return gr.Radio.update(choices=[model[0] for model in cat["models"]], value=cat["models"][0][0])
+    return gr.Radio.update(choices=[], value=None)
+model_categories.change(
+    fn=update_category_models,
+    inputs=[model_categories],
+    outputs=[category_models]
+)
+# Update main model choice when category model is selected
+category_models.change(
+    fn=lambda x: x,
+    inputs=[category_models],
+    outputs=[model_choice]
+)
+# Process uploaded images
+def process_uploaded_images(files):
+    return [file.name for file in files]
+image_upload_btn.upload(
+    fn=process_uploaded_images,
+    inputs=[image_upload_btn],
+    outputs=[images]
+)
+# Enhanced AI query function with all advanced parameters
+def ask_ai(message, chatbot, model_choice, temperature, max_tokens, top_p,
+           frequency_penalty, presence_penalty, repetition_penalty, top_k,
+           min_p, seed, top_a, stream_output, response_format,
+           images, documents, reasoning_effort, system_message, transforms):
+    """Comprehensive AI query function with all parameters"""
+    if not message.strip() and not images and not documents:
+        return chatbot, ""
+    # Get model ID and context size
+    model_id = None
+    context_size = 0
+    for name, model_id_value, ctx_size in ALL_MODELS:
+        if name == model_choice:
+            model_id = model_id_value
+            context_size = ctx_size
+            break
+    if model_id is None:
+        logger.error(f"Model not found: {model_choice}")
+        return chatbot + [[message, "Error: Model not found"]], ""
+    # Create messages from chatbot history
+    messages = format_to_message_dict(chatbot)
+    # Add system message if provided
+    if system_message and system_message.strip():
+        # Insert at the beginning to override any existing system message
+        for i, msg in enumerate(messages):
+            if msg.get("role") == "system":
+                messages.pop(i)
+                break
+        messages.insert(0, {"role": "system", "content": system_message.strip()})
+    # Prepare message with images and documents if any
+    content = prepare_message_with_media(message, images, documents)
+    # Add current message
+    messages.append({"role": "user", "content": content})
+    # Call API
+    try:
+        logger.info(f"Sending request to model: {model_id}")
+        # Build the comprehensive payload with all parameters
+        payload = {
+            "model": model_id,
+            "messages": messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "top_p": top_p,
+            "frequency_penalty": frequency_penalty,
+            "presence_penalty": presence_penalty,
+            "repetition_penalty": repetition_penalty if repetition_penalty != 1.0 else None,
+            "top_k": top_k,
+            "min_p": min_p if min_p > 0 else None,
+            "seed": seed if seed > 0 else None,
+            "top_a": top_a if top_a > 0 else None,
+            "stream": stream_output
+        }
+        # Add response format if not default
+        if response_format == "json_object":
+            payload["response_format"] = {"type": "json_object"}
+        # Add reasoning if selected
+        if reasoning_effort != "none":
+            payload["reasoning"] = {
+                "effort": reasoning_effort
+            }
+        # Add transforms if selected
+        if transforms:
+            payload["transforms"] = transforms
+        # Remove None values
+        payload = {k: v for k, v in payload.items() if v is not None}
+        logger.info(f"Request payload: {json.dumps(payload, default=str)}")
+        response = requests.post(
+            "https://openrouter.ai/api/v1/chat/completions",
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+                "HTTP-Referer": "https://huggingface.co/spaces"
+            },
+            json=payload,
+            timeout=180,  # Longer timeout for document processing and streaming
+            stream=stream_output
+        )
+        logger.info(f"Response status: {response.status_code}")
+        if stream_output and response.status_code == 200:
+            # Handle streaming response
+            chatbot = chatbot + [[message, ""]]
+            for line in response.iter_lines():
+                if line:
+                    line = line.decode('utf-8')
+                    if line.startswith('data: '):
+                        data = line[6:]
+                        if data.strip() == '[DONE]':
+                            break
+                        try:
+                            chunk = json.loads(data)
+                            if "choices" in chunk and len(chunk["choices"]) > 0:
+                                delta = chunk["choices"][0].get("delta", {})
+                                if "content" in delta and delta["content"]:
+                                    chatbot[-1][1] += delta["content"]
+                                    yield chatbot, ""
+                        except json.JSONDecodeError:
+                            continue
+            return chatbot, ""
+        elif response.status_code == 200:
+            # Handle normal response
+            result = response.json()
+            ai_response = result.get("choices", [{}])[0].get("message", {}).get("content", "")
+            chatbot = chatbot + [[message, ai_response]]
+            # Log token usage if available
+            if "usage" in result:
+                logger.info(f"Token usage: {result['usage']}")
+        else:
+            response_text = response.text
+            logger.info(f"Error response body: {response_text}")
+            error_message = f"Error: Status code {response.status_code}\n\nResponse: {response_text}"
+            chatbot = chatbot + [[message, error_message]]
+    except Exception as e:
+        logger.error(f"Exception during API call: {str(e)}")
+        chatbot = chatbot + [[message, f"Error: {str(e)}"]]
+    return chatbot, ""
+# Function to clear chat and reset parameters
+def clear_chat():
+    return [], "", [], [], 0.7, 1000, 0.8, 0.0, 0.0, 1.0, 40, 0.1, 0, 0.0, False, "default", "none", "", []
+# Set up events for the submit button
+submit_btn.click(
+    fn=ask_ai,
+    inputs=[
+        message, chatbot, model_choice, temperature, max_tokens,
+        top_p, frequency_penalty, presence_penalty, repetition_penalty,
+        top_k, min_p, seed, top_a, stream_output, response_format,
+        images, documents, reasoning_effort, system_message, transforms
+    ],
+    outputs=[chatbot, message]
+)
+# Set up events for message submission (pressing Enter)
+message.submit(
+    fn=ask_ai,
+    inputs=[
+        message, chatbot, model_choice, temperature, max_tokens,
+        top_p, frequency_penalty, presence_penalty, repetition_penalty,
+        top_k, min_p, seed, top_a, stream_output, response_format,
+        images, documents, reasoning_effort, system_message, transforms
+    ],
+    outputs=[chatbot, message]
+)
+# Set up events for the clear button
+clear_btn.click(
+    fn=clear_chat,
+    inputs=[],
+    outputs=[
+        chatbot, message, images, documents, temperature,
+        max_tokens, top_p, frequency_penalty, presence_penalty,
+        repetition_penalty, top_k, min_p, seed, top_a, stream_output,
+        response_format, reasoning_effort, system_message, transforms
+    ]
+)
+# Add a model information section
+with gr.Accordion("About Selected Model", open=False):
+    model_info_display = gr.HTML(
+        value="<p>Select a model to see details</p>"
     )
+    # Update model info when model changes
+    def update_model_info(model_name):
+        model_info = get_model_info(model_name)
+        if model_info:
+            name, model_id, context_size = model_info
+            return f"""
+            <div class="model-info">
+                <h3>{name}</h3>
+                <p><strong>Model ID:</strong> {model_id}</p>
+                <p><strong>Context Size:</strong> {context_size:,} tokens</p>
+                <p><strong>Provider:</strong> {model_id.split('/')[0]}</p>
+            </div>
+            """
+        return "<p>Model information not available</p>"
+    model_choice.change(
+        fn=update_model_info,
+        inputs=[model_choice],
+        outputs=[model_info_display]
     )
+# Add usage instructions
+with gr.Accordion("Usage Instructions", open=False):
+    gr.Markdown("""
+    ## Basic Usage
+    1. Type your message in the input box
+    2. Select a model from the dropdown
+    3. Click "Send" or press Enter
+    ## Working with Files
+    - **Images**: Upload images to use with vision-capable models like Llama 3.2 Vision
+    - **Documents**: Upload PDF, Markdown, or text files to analyze their content
+    ## Advanced Parameters
+    - **Temperature**: Controls randomness (higher = more creative, lower = more deterministic)
+    - **Max Tokens**: Maximum length of the response
+    - **Top P**: Nucleus sampling threshold (higher = consider more tokens)
+    - **Reasoning Effort**: Some models can show their reasoning process
+    ## Tips
+    - For code generation, use models like Qwen Coder
+    - For visual tasks, choose vision-capable models
+    - For long context, check the context window size next to the model name
+    """)
+# Add a footer with version info
+footer_md = gr.Markdown("""
+---
+### OpenRouter AI Chat Interface v1.0
+Built with ❤️ using Gradio and OpenRouter API | Context sizes shown next to model names
+""")
 # Launch directly with Gradio's built-in server
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)