Spaces:

Athspi
/

Whshhs

Runtime error

App Files Files Community

Athspi commited on 30 days ago

Commit

3fd0067

verified ·

1 Parent(s): c02bb52

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -122

app.py CHANGED Viewed

@@ -1,138 +1,182 @@
 import gradio as gr
-import asyncio
-import numpy as np
 from google import genai
 from google.genai import types
-import soundfile as sf
-import io
-# Configuration
-SAMPLE_RATE = 24000
-MODEL = "gemini-2.0-flash-exp"  # Correct experimental model name
-class GeminiTTS:
-    def __init__(self, api_key):
-        if not api_key:
-            raise ValueError("API key cannot be empty")
-        self.client = genai.Client(http_options={"api_version": "v1alpha"}, api_key=api_key)
-        self.config = types.LiveConnectConfig(
-            response_modalities=["AUDIO"],
-            speech_config=types.SpeechConfig(
-                voice_config=types.VoiceConfig(
-                    prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Puck")
-                )
-            ),
-            system_instruction=types.Content(
-                parts=[types.Part.from_text(text="Speak exactly what the user says")],
-                role="user"
-            ),
-        )
-    async def text_to_speech(self, text):
-        try:
-            async with self.client.aio.live.connect(model=MODEL, config=self.config) as session:
-                await session.send(input=text or " ", end_of_turn=True)
-                async for response in session.receive():
-                    if audio_data := response.data:
-                        # Convert to numpy array
-                        audio_array = np.frombuffer(audio_data, dtype=np.float32)
-                        # Handle empty/quiet audio
-                        if audio_array.size == 0:
-                            audio_array = np.zeros(int(SAMPLE_RATE * 0.5))  # 0.5s of silence
-                        # Normalize audio to prevent processing warnings
-                        max_val = np.max(np.abs(audio_array))
-                        if max_val > 0:
-                            audio_array = audio_array / max_val
-                        # Convert to proper format for Gradio
-                        return self._create_audio_response(audio_array)
-                    if text_response := response.text:
-                        return text_response
-                return None
-        except Exception as e:
-            return f"Error: {str(e)}"
-    def _create_audio_response(self, audio_array):
-        """Create properly formatted audio response for Gradio"""
-        # Convert to 16-bit PCM format
-        audio_array = (audio_array * 32767).astype(np.int16)
-        # Create WAV file in memory
-        with io.BytesIO() as wav_buffer:
-            with sf.SoundFile(
-                wav_buffer,
-                mode='w',
-                samplerate=SAMPLE_RATE,
-                channels=1,
-                format='WAV',
-                subtype='PCM_16'
-            ) as sf_file:
-                sf_file.write(audio_array)
-            wav_bytes = wav_buffer.getvalue()
-        return (SAMPLE_RATE, wav_bytes)
-def create_interface():
-    tts_engine = None
-    def init_engine(api_key):
-        nonlocal tts_engine
-        try:
-            tts_engine = GeminiTTS(api_key)
-            return "✅ TTS Initialized Successfully"
-        except Exception as e:
-            return f"❌ Initialization Failed: {str(e)}"
-    async def generate_speech(text):
-        if not tts_engine:
-            raise gr.Error("Please initialize the TTS first")
-        result = await tts_engine.text_to_speech(text)
-        if isinstance(result, str):
-            return None, result  # Return error message
-        elif result:
-            return result, ""  # Return audio and empty message
-        return None, "No response received"
-    with gr.Blocks(title="Gemini TTS") as app:
-        gr.Markdown("# 🎤 Gemini Text-to-Speech")
-        with gr.Row():
-            api_key = gr.Textbox(
-                label="API Key",
-                type="password",
-                placeholder="Enter your Gemini API key"
-            )
-            init_btn = gr.Button("Initialize")
-        init_status = gr.Textbox(label="Status", interactive=False)
-        init_btn.click(init_engine, inputs=api_key, outputs=init_status)
-        with gr.Group():
-            text_input = gr.Textbox(
-                label="Input Text",
-                lines=3,
-                placeholder="Type something to speak..."
-            )
-            generate_btn = gr.Button("Generate Speech")
-        audio_output = gr.Audio(label="Output Audio", type="filepath")
-        text_output = gr.Textbox(label="Messages", interactive=False)
-        generate_btn.click(
-            generate_speech,
-            inputs=text_input,
-            outputs=[audio_output, text_output]
         )
-    return app
 if __name__ == "__main__":
-    app = create_interface()
-    app.launch(server_name="0.0.0.0", server_port=7860)

+import base64
+import os
 import gradio as gr
+import requests
+import markdownify
 from google import genai
 from google.genai import types
+from urllib.robotparser import RobotFileParser
+from urllib.parse import urlparse
+# Configure browser tools
+def can_crawl_url(url: str, user_agent: str = "*") -> bool:
+    """Check robots.txt permissions for a URL"""
+    try:
+        parsed_url = urlparse(url)
+        robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
+        rp = RobotFileParser(robots_url)
+        rp.read()
+        return rp.can_fetch(user_agent, url)
+    except Exception as e:
+        print(f"Error checking robots.txt: {e}")
+        return False
+def load_page(url: str) -> str:
+    """Load webpage content as markdown"""
+    if not can_crawl_url(url):
+        return f"URL {url} failed robots.txt check"
+    try:
+        response = requests.get(url, timeout=10)
+        return markdownify.markdownify(response.text)
+    except Exception as e:
+        return f"Error loading page: {str(e)}"
+# Configure Gemini client
+client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
+MODEL = "gemini-2.0-flash"
+TOOLS = [
+    types.Tool(
+        function_declarations=[
+            types.FunctionDeclaration(
+                name="load_page",
+                description="Load webpage content as markdown",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "url": {"type": "string", "description": "Full URL to load"}
+                    },
+                    "required": ["url"]
+                }
+            )
+        ]
+    ),
+    types.Tool(google_search=types.GoogleSearch()),
+    types.Tool(code_execution=types.ToolCodeExecution())
+]
+SYSTEM_INSTRUCTION = """You are an AI assistant with multiple capabilities:
+1. Web browsing through search and direct page access
+2. Code execution for calculations, simulations, and data analysis
+3. File I/O operations for data processing
+Use this decision tree:
+- For factual questions: Use search
+- For time-sensitive data: Use browser tool
+- For math/data processing: Generate and execute code
+- Always explain your reasoning"""
+def format_code_response(parts):
+    """Format code execution parts for Markdown display"""
+    formatted = []
+    for part in parts:
+        if part.text:
+            formatted.append(part.text)
+        if part.executable_code:
+            formatted.append(f"```python\n{part.executable_code.code}\n```")
+        if part.code_execution_result:
+            formatted.append(f"**Result**:\n{part.code_execution_result.output}")
+        if part.inline_data:
+            formatted.append(f"![Generated Image](data:image/png;base64,{base64.b64encode(part.inline_data.data).decode()})")
+    return "\n\n".join(formatted)
+def generate_response(user_input):
+    full_response = ""
+    chat = client.chats.create(
+        model=MODEL,
+        config=types.GenerateContentConfig(
+            temperature=0.7,
+            tools=TOOLS,
+            system_instruction=SYSTEM_INSTRUCTION
+        )
+    )
+    # Initial request
+    response = chat.send_message(user_input)
+    # Process all response parts
+    response_parts = []
+    for part in response.candidates[0].content.parts:
+        response_parts.append(part)
+        full_response = format_code_response(response_parts)
+        yield full_response
+        # Handle function calls
+        if part.function_call:
+            fn = part.function_call
+            if fn.name == "load_page":
+                result = load_page(**fn.args)
+                chat.send_message(
+                    types.Content(
+                        parts=[
+                            types.Part(
+                                function_response=types.FunctionResponse(
+                                    name=fn.name,
+                                    id=fn.id,
+                                    response={"result": result}
+                                )
+                            )
+                        ]
+                    )
+                )
+                # Get final response after tool execution
+                final_response = chat.send_message("")
+                for final_part in final_response.candidates[0].content.parts:
+                    response_parts.append(final_part)
+                    full_response = format_code_response(response_parts)
+                    yield full_response
+# Gradio Interface
+with gr.Blocks(title="Gemini 2.0 AI Assistant") as demo:
+    gr.Markdown("# 🚀 Gemini 2.0 AI Assistant")
+    gr.Markdown("Web Access • Code Execution • Data Analysis")
+    with gr.Row():
+        input_box = gr.Textbox(
+            label="Your Query",
+            placeholder="Ask anything or request code execution...",
+            lines=3,
+            max_lines=10,
+            autofocus=True
+        )
+        output_box = gr.Markdown(
+            label="Assistant Response",
+            elem_classes="markdown-output"
         )
+    with gr.Row():
+        submit_btn = gr.Button("Submit", variant="primary")
+        clear_btn = gr.Button("Clear")
+    def clear():
+        return ["", ""]
+    submit_btn.click(
+        fn=generate_response,
+        inputs=input_box,
+        outputs=output_box,
+        queue=True
+    )
+    clear_btn.click(
+        fn=clear,
+        inputs=[],
+        outputs=[input_box, output_box]
+    )
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        css="""
+        .markdown-output {
+            padding: 20px;
+            border-radius: 5px;
+            background: #f9f9f9;
+        }
+        .markdown-output code {
+            background: #f3f3f3;
+            padding: 2px 5px;
+            border-radius: 3px;
+        }
+        """
+    )