Update app.py
Browse files
app.py
CHANGED
@@ -1,138 +1,182 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
import
|
4 |
from google import genai
|
5 |
from google.genai import types
|
6 |
-
|
7 |
-
import
|
8 |
|
9 |
-
#
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
async for response in session.receive():
|
37 |
-
if audio_data := response.data:
|
38 |
-
# Convert to numpy array
|
39 |
-
audio_array = np.frombuffer(audio_data, dtype=np.float32)
|
40 |
-
|
41 |
-
# Handle empty/quiet audio
|
42 |
-
if audio_array.size == 0:
|
43 |
-
audio_array = np.zeros(int(SAMPLE_RATE * 0.5)) # 0.5s of silence
|
44 |
-
|
45 |
-
# Normalize audio to prevent processing warnings
|
46 |
-
max_val = np.max(np.abs(audio_array))
|
47 |
-
if max_val > 0:
|
48 |
-
audio_array = audio_array / max_val
|
49 |
-
|
50 |
-
# Convert to proper format for Gradio
|
51 |
-
return self._create_audio_response(audio_array)
|
52 |
-
|
53 |
-
if text_response := response.text:
|
54 |
-
return text_response
|
55 |
-
|
56 |
-
return None
|
57 |
-
except Exception as e:
|
58 |
-
return f"Error: {str(e)}"
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
# Create WAV file in memory
|
66 |
-
with io.BytesIO() as wav_buffer:
|
67 |
-
with sf.SoundFile(
|
68 |
-
wav_buffer,
|
69 |
-
mode='w',
|
70 |
-
samplerate=SAMPLE_RATE,
|
71 |
-
channels=1,
|
72 |
-
format='WAV',
|
73 |
-
subtype='PCM_16'
|
74 |
-
) as sf_file:
|
75 |
-
sf_file.write(audio_array)
|
76 |
-
wav_bytes = wav_buffer.getvalue()
|
77 |
-
|
78 |
-
return (SAMPLE_RATE, wav_bytes)
|
79 |
|
80 |
-
def
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
try:
|
86 |
-
tts_engine = GeminiTTS(api_key)
|
87 |
-
return "✅ TTS Initialized Successfully"
|
88 |
-
except Exception as e:
|
89 |
-
return f"❌ Initialization Failed: {str(e)}"
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
96 |
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
label="Input Text",
|
120 |
-
lines=3,
|
121 |
-
placeholder="Type something to speak..."
|
122 |
-
)
|
123 |
-
generate_btn = gr.Button("Generate Speech")
|
124 |
-
|
125 |
-
audio_output = gr.Audio(label="Output Audio", type="filepath")
|
126 |
-
text_output = gr.Textbox(label="Messages", interactive=False)
|
127 |
-
|
128 |
-
generate_btn.click(
|
129 |
-
generate_speech,
|
130 |
-
inputs=text_input,
|
131 |
-
outputs=[audio_output, text_output]
|
132 |
)
|
133 |
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
if __name__ == "__main__":
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import os
|
3 |
import gradio as gr
|
4 |
+
import requests
|
5 |
+
import markdownify
|
6 |
from google import genai
|
7 |
from google.genai import types
|
8 |
+
from urllib.robotparser import RobotFileParser
|
9 |
+
from urllib.parse import urlparse
|
10 |
|
11 |
+
# Configure browser tools
|
12 |
+
def can_crawl_url(url: str, user_agent: str = "*") -> bool:
|
13 |
+
"""Check robots.txt permissions for a URL"""
|
14 |
+
try:
|
15 |
+
parsed_url = urlparse(url)
|
16 |
+
robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
|
17 |
+
rp = RobotFileParser(robots_url)
|
18 |
+
rp.read()
|
19 |
+
return rp.can_fetch(user_agent, url)
|
20 |
+
except Exception as e:
|
21 |
+
print(f"Error checking robots.txt: {e}")
|
22 |
+
return False
|
23 |
|
24 |
+
def load_page(url: str) -> str:
|
25 |
+
"""Load webpage content as markdown"""
|
26 |
+
if not can_crawl_url(url):
|
27 |
+
return f"URL {url} failed robots.txt check"
|
28 |
+
try:
|
29 |
+
response = requests.get(url, timeout=10)
|
30 |
+
return markdownify.markdownify(response.text)
|
31 |
+
except Exception as e:
|
32 |
+
return f"Error loading page: {str(e)}"
|
33 |
+
|
34 |
+
# Configure Gemini client
|
35 |
+
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
|
36 |
+
MODEL = "gemini-2.0-flash"
|
37 |
+
TOOLS = [
|
38 |
+
types.Tool(
|
39 |
+
function_declarations=[
|
40 |
+
types.FunctionDeclaration(
|
41 |
+
name="load_page",
|
42 |
+
description="Load webpage content as markdown",
|
43 |
+
parameters={
|
44 |
+
"type": "object",
|
45 |
+
"properties": {
|
46 |
+
"url": {"type": "string", "description": "Full URL to load"}
|
47 |
+
},
|
48 |
+
"required": ["url"]
|
49 |
+
}
|
50 |
+
)
|
51 |
+
]
|
52 |
+
),
|
53 |
+
types.Tool(google_search=types.GoogleSearch()),
|
54 |
+
types.Tool(code_execution=types.ToolCodeExecution())
|
55 |
+
]
|
56 |
|
57 |
+
SYSTEM_INSTRUCTION = """You are an AI assistant with multiple capabilities:
|
58 |
+
1. Web browsing through search and direct page access
|
59 |
+
2. Code execution for calculations, simulations, and data analysis
|
60 |
+
3. File I/O operations for data processing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
+
Use this decision tree:
|
63 |
+
- For factual questions: Use search
|
64 |
+
- For time-sensitive data: Use browser tool
|
65 |
+
- For math/data processing: Generate and execute code
|
66 |
+
- Always explain your reasoning"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
+
def format_code_response(parts):
|
69 |
+
"""Format code execution parts for Markdown display"""
|
70 |
+
formatted = []
|
71 |
+
for part in parts:
|
72 |
+
if part.text:
|
73 |
+
formatted.append(part.text)
|
74 |
+
if part.executable_code:
|
75 |
+
formatted.append(f"```python\n{part.executable_code.code}\n```")
|
76 |
+
if part.code_execution_result:
|
77 |
+
formatted.append(f"**Result**:\n{part.code_execution_result.output}")
|
78 |
+
if part.inline_data:
|
79 |
+
formatted.append(f".decode()})")
|
80 |
+
return "\n\n".join(formatted)
|
81 |
+
|
82 |
+
def generate_response(user_input):
|
83 |
+
full_response = ""
|
84 |
+
chat = client.chats.create(
|
85 |
+
model=MODEL,
|
86 |
+
config=types.GenerateContentConfig(
|
87 |
+
temperature=0.7,
|
88 |
+
tools=TOOLS,
|
89 |
+
system_instruction=SYSTEM_INSTRUCTION
|
90 |
+
)
|
91 |
+
)
|
92 |
|
93 |
+
# Initial request
|
94 |
+
response = chat.send_message(user_input)
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
# Process all response parts
|
97 |
+
response_parts = []
|
98 |
+
for part in response.candidates[0].content.parts:
|
99 |
+
response_parts.append(part)
|
100 |
+
full_response = format_code_response(response_parts)
|
101 |
+
yield full_response
|
102 |
|
103 |
+
# Handle function calls
|
104 |
+
if part.function_call:
|
105 |
+
fn = part.function_call
|
106 |
+
if fn.name == "load_page":
|
107 |
+
result = load_page(**fn.args)
|
108 |
+
chat.send_message(
|
109 |
+
types.Content(
|
110 |
+
parts=[
|
111 |
+
types.Part(
|
112 |
+
function_response=types.FunctionResponse(
|
113 |
+
name=fn.name,
|
114 |
+
id=fn.id,
|
115 |
+
response={"result": result}
|
116 |
+
)
|
117 |
+
)
|
118 |
+
]
|
119 |
+
)
|
120 |
+
)
|
121 |
+
# Get final response after tool execution
|
122 |
+
final_response = chat.send_message("")
|
123 |
+
for final_part in final_response.candidates[0].content.parts:
|
124 |
+
response_parts.append(final_part)
|
125 |
+
full_response = format_code_response(response_parts)
|
126 |
+
yield full_response
|
127 |
|
128 |
+
# Gradio Interface
|
129 |
+
with gr.Blocks(title="Gemini 2.0 AI Assistant") as demo:
|
130 |
+
gr.Markdown("# 🚀 Gemini 2.0 AI Assistant")
|
131 |
+
gr.Markdown("Web Access • Code Execution • Data Analysis")
|
132 |
+
|
133 |
+
with gr.Row():
|
134 |
+
input_box = gr.Textbox(
|
135 |
+
label="Your Query",
|
136 |
+
placeholder="Ask anything or request code execution...",
|
137 |
+
lines=3,
|
138 |
+
max_lines=10,
|
139 |
+
autofocus=True
|
140 |
+
)
|
141 |
+
output_box = gr.Markdown(
|
142 |
+
label="Assistant Response",
|
143 |
+
elem_classes="markdown-output"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
)
|
145 |
|
146 |
+
with gr.Row():
|
147 |
+
submit_btn = gr.Button("Submit", variant="primary")
|
148 |
+
clear_btn = gr.Button("Clear")
|
149 |
+
|
150 |
+
def clear():
|
151 |
+
return ["", ""]
|
152 |
+
|
153 |
+
submit_btn.click(
|
154 |
+
fn=generate_response,
|
155 |
+
inputs=input_box,
|
156 |
+
outputs=output_box,
|
157 |
+
queue=True
|
158 |
+
)
|
159 |
+
|
160 |
+
clear_btn.click(
|
161 |
+
fn=clear,
|
162 |
+
inputs=[],
|
163 |
+
outputs=[input_box, output_box]
|
164 |
+
)
|
165 |
|
166 |
if __name__ == "__main__":
|
167 |
+
demo.launch(
|
168 |
+
server_name="0.0.0.0",
|
169 |
+
server_port=7860,
|
170 |
+
css="""
|
171 |
+
.markdown-output {
|
172 |
+
padding: 20px;
|
173 |
+
border-radius: 5px;
|
174 |
+
background: #f9f9f9;
|
175 |
+
}
|
176 |
+
.markdown-output code {
|
177 |
+
background: #f3f3f3;
|
178 |
+
padding: 2px 5px;
|
179 |
+
border-radius: 3px;
|
180 |
+
}
|
181 |
+
"""
|
182 |
+
)
|