browser-use-sg / tests /test_browser_use.py
Sanjeev23oct's picture
Upload folder using huggingface_hub
f1d5e1c verified
import pdb
from dotenv import load_dotenv
load_dotenv()
import sys
sys.path.append(".")
import asyncio
import os
import sys
from pprint import pprint
from browser_use import Agent
from browser_use.agent.views import AgentHistoryList
from src.utils import utils
async def test_browser_use_org():
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import (
BrowserContextConfig,
BrowserContextWindowSize,
)
# llm = utils.get_llm_model(
# provider="azure_openai",
# model_name="gpt-4o",
# temperature=0.8,
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
# )
# llm = utils.get_llm_model(
# provider="deepseek",
# model_name="deepseek-chat",
# temperature=0.8
# )
llm = utils.get_llm_model(
provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
)
window_w, window_h = 1920, 1080
use_vision = False
use_own_browser = False
if use_own_browser:
chrome_path = os.getenv("CHROME_PATH", None)
if chrome_path == "":
chrome_path = None
else:
chrome_path = None
tool_calling_method = "json_schema" # setting to json_schema when using ollma
browser = Browser(
config=BrowserConfig(
headless=False,
disable_security=True,
chrome_instance_path=chrome_path,
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
)
)
async with await browser.new_context(
config=BrowserContextConfig(
trace_path="./tmp/traces",
save_recording_path="./tmp/record_videos",
no_viewport=False,
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
),
)
) as browser_context:
agent = Agent(
task="go to google.com and type 'OpenAI' click search and give me the first url",
llm=llm,
browser_context=browser_context,
use_vision=use_vision,
tool_calling_method=tool_calling_method
)
history: AgentHistoryList = await agent.run(max_steps=10)
print("Final Result:")
pprint(history.final_result(), indent=4)
print("\nErrors:")
pprint(history.errors(), indent=4)
# e.g. xPaths the model clicked on
print("\nModel Outputs:")
pprint(history.model_actions(), indent=4)
print("\nThoughts:")
pprint(history.model_thoughts(), indent=4)
# close browser
await browser.close()
async def test_browser_use_custom():
from browser_use.browser.context import BrowserContextWindowSize
from browser_use.browser.browser import BrowserConfig
from playwright.async_api import async_playwright
from src.agent.custom_agent import CustomAgent
from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
from src.browser.custom_browser import CustomBrowser
from src.browser.custom_context import BrowserContextConfig
from src.controller.custom_controller import CustomController
window_w, window_h = 1280, 1100
# llm = utils.get_llm_model(
# provider="openai",
# model_name="gpt-4o",
# temperature=0.8,
# base_url=os.getenv("OPENAI_ENDPOINT", ""),
# api_key=os.getenv("OPENAI_API_KEY", ""),
# )
llm = utils.get_llm_model(
provider="azure_openai",
model_name="gpt-4o",
temperature=0.5,
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
)
# llm = utils.get_llm_model(
# provider="google",
# model_name="gemini-2.0-flash",
# temperature=0.6,
# api_key=os.getenv("GOOGLE_API_KEY", "")
# )
# llm = utils.get_llm_model(
# provider="deepseek",
# model_name="deepseek-reasoner",
# temperature=0.8
# )
# llm = utils.get_llm_model(
# provider="deepseek",
# model_name="deepseek-chat",
# temperature=0.8
# )
# llm = utils.get_llm_model(
# provider="ollama", model_name="qwen2.5:7b", temperature=0.5
# )
# llm = utils.get_llm_model(
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
# )
controller = CustomController()
use_own_browser = True
disable_security = True
use_vision = True # Set to False when using DeepSeek
max_actions_per_step = 10
playwright = None
browser = None
browser_context = None
try:
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
if use_own_browser:
chrome_path = os.getenv("CHROME_PATH", None)
if chrome_path == "":
chrome_path = None
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
if chrome_user_data:
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
else:
chrome_path = None
browser = CustomBrowser(
config=BrowserConfig(
headless=False,
disable_security=disable_security,
chrome_instance_path=chrome_path,
extra_chromium_args=extra_chromium_args,
)
)
browser_context = await browser.new_context(
config=BrowserContextConfig(
trace_path="./tmp/traces",
save_recording_path="./tmp/record_videos",
no_viewport=False,
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
),
)
)
agent = CustomAgent(
task="open youtube in tab 1 , open google email in tab 2, open facebook in tab 3",
add_infos="", # some hints for llm to complete the task
llm=llm,
browser=browser,
browser_context=browser_context,
controller=controller,
system_prompt_class=CustomSystemPrompt,
agent_prompt_class=CustomAgentMessagePrompt,
use_vision=use_vision,
max_actions_per_step=max_actions_per_step,
generate_gif=True
)
history: AgentHistoryList = await agent.run(max_steps=100)
print("Final Result:")
pprint(history.final_result(), indent=4)
print("\nErrors:")
pprint(history.errors(), indent=4)
# e.g. xPaths the model clicked on
print("\nModel Outputs:")
pprint(history.model_actions(), indent=4)
print("\nThoughts:")
pprint(history.model_thoughts(), indent=4)
except Exception:
import traceback
traceback.print_exc()
finally:
# 显式关闭持久化上下文
if browser_context:
await browser_context.close()
# 关闭 Playwright 对象
if playwright:
await playwright.stop()
if browser:
await browser.close()
async def test_browser_use_parallel():
from browser_use.browser.context import BrowserContextWindowSize
from browser_use.browser.browser import BrowserConfig
from playwright.async_api import async_playwright
from browser_use.browser.browser import Browser
from src.agent.custom_agent import CustomAgent
from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
from src.browser.custom_browser import CustomBrowser
from src.browser.custom_context import BrowserContextConfig
from src.controller.custom_controller import CustomController
window_w, window_h = 1920, 1080
# llm = utils.get_llm_model(
# provider="openai",
# model_name="gpt-4o",
# temperature=0.8,
# base_url=os.getenv("OPENAI_ENDPOINT", ""),
# api_key=os.getenv("OPENAI_API_KEY", ""),
# )
# llm = utils.get_llm_model(
# provider="azure_openai",
# model_name="gpt-4o",
# temperature=0.8,
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
# )
llm = utils.get_llm_model(
provider="gemini",
model_name="gemini-2.0-flash-exp",
temperature=1.0,
api_key=os.getenv("GOOGLE_API_KEY", "")
)
# llm = utils.get_llm_model(
# provider="deepseek",
# model_name="deepseek-reasoner",
# temperature=0.8
# )
# llm = utils.get_llm_model(
# provider="deepseek",
# model_name="deepseek-chat",
# temperature=0.8
# )
# llm = utils.get_llm_model(
# provider="ollama", model_name="qwen2.5:7b", temperature=0.5
# )
# llm = utils.get_llm_model(
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
# )
controller = CustomController()
use_own_browser = True
disable_security = True
use_vision = True # Set to False when using DeepSeek
max_actions_per_step = 1
playwright = None
browser = None
browser_context = None
browser = Browser(
config=BrowserConfig(
disable_security=True,
headless=False,
new_context_config=BrowserContextConfig(save_recording_path='./tmp/recordings'),
)
)
try:
agents = [
Agent(task=task, llm=llm, browser=browser)
for task in [
'Search Google for weather in Tokyo',
'Check Reddit front page title',
'Find NASA image of the day',
'Check top story on CNN',
# 'Search latest SpaceX launch date',
# 'Look up population of Paris',
# 'Find current time in Sydney',
# 'Check who won last Super Bowl',
# 'Search trending topics on Twitter',
]
]
history = await asyncio.gather(*[agent.run() for agent in agents])
pdb.set_trace()
print("Final Result:")
pprint(history.final_result(), indent=4)
print("\nErrors:")
pprint(history.errors(), indent=4)
# e.g. xPaths the model clicked on
print("\nModel Outputs:")
pprint(history.model_actions(), indent=4)
print("\nThoughts:")
pprint(history.model_thoughts(), indent=4)
# close browser
except Exception:
import traceback
traceback.print_exc()
finally:
# 显式关闭持久化上下文
if browser_context:
await browser_context.close()
# 关闭 Playwright 对象
if playwright:
await playwright.stop()
if browser:
await browser.close()
if __name__ == "__main__":
# asyncio.run(test_browser_use_org())
# asyncio.run(test_browser_use_parallel())
asyncio.run(test_browser_use_custom())