Have you tried this sample app using Pydantic? This app works just fine for most of the web automation task. However, it is not production grade so we would like to use Pydantic so convert this app using how Pydantic registers models, tools etc. Anyone has done it so far! I have seen a different behaviour where it does not work the way this original CUA app is working! I have defined my tools like this
def _register_tools(self):
def log_tool_call(tool_name, **kwargs):
print(f"\n🔧 Calling tool: {tool_name}")
for key, value in kwargs.items():
print(f" {key}: {value}")
return kwargs
# Register all tools with logging and error handling
@self.agent.tool
async def goto(ctx: RunContext[ComputerUseAgentDeps], url: str) -> str:
"""Navigate to a specific URL."""
log_tool_call("goto", url=url)
try:
result = await ctx.deps.playwright_computer.goto(url)
print(f" ✅ Success: {result}")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def goto_in_new_tab(ctx: RunContext[ComputerUseAgentDeps], url: str) -> str:
"""Open a new browser tab and go to a specific URL."""
log_tool_call("goto_in_new_tab", url=url)
try:
result = await ctx.deps.playwright_computer.goto_in_new_tab(url)
print(f" ✅ Success: {result}")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def click(ctx: RunContext[ComputerUseAgentDeps], x: int, y: int, button: str = "left") -> str:
"""Click at the given (x, y) coordinates with specified button."""
log_tool_call("click", x=x, y=y, button=button)
try:
result = await ctx.deps.playwright_computer.click(x, y, button)
print(f" ✅ Success: Clicked at ({x}, {y}) with {button} button")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def double_click(ctx: RunContext[ComputerUseAgentDeps], x: int, y: int) -> str:
"""Double click at (x, y)."""
log_tool_call("double_click", x=x, y=y)
try:
result = await ctx.deps.playwright_computer.double_click(x, y)
print(f" ✅ Success: Double clicked at ({x}, {y})")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def scroll(ctx: RunContext[ComputerUseAgentDeps], x: int, y: int, scroll_x: int, scroll_y: int) -> str:
"""Scroll from (x, y) by scroll_x and scroll_y."""
log_tool_call("scroll", x=x, y=y, scroll_x=scroll_x, scroll_y=scroll_y)
try:
result = await ctx.deps.playwright_computer.scroll(x, y, scroll_x, scroll_y)
print(f" ✅ Success: Scrolled at ({x}, {y}) by ({scroll_x}, {scroll_y})")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def type_text(ctx: RunContext[ComputerUseAgentDeps], text: str) -> str:
"""Type the provided text."""
log_tool_call("type_text", text=text)
try:
result = await ctx.deps.playwright_computer.type(text)
print(f" ✅ Success: Typed: {text}")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def screenshot(ctx: RunContext[ComputerUseAgentDeps]) -> str:
"""Take a screenshot of the current view as base64 string."""
log_tool_call("screenshot")
try:
result = await ctx.deps.playwright_computer.screenshot()
print(f" ✅ Success: Screenshot taken")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def wait(ctx: RunContext[ComputerUseAgentDeps], ms: int = 1000) -> str:
"""Wait for a given number of milliseconds."""
log_tool_call("wait", ms=ms)
try:
result = await ctx.deps.playwright_computer.wait(ms)
print(f" ✅ Success: Waited for {ms}ms")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def move(ctx: RunContext[ComputerUseAgentDeps], x: int, y: int) -> str:
"""Move mouse to (x, y)."""
log_tool_call("move", x=x, y=y)
try:
result = await ctx.deps.playwright_computer.move(x, y)
print(f" ✅ Success: Moved mouse to ({x}, {y})")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def keypress(ctx: RunContext[ComputerUseAgentDeps], keys: list[str]) -> str:
"""Press and release a list of keys in sequence."""
log_tool_call("keypress", keys=keys)
try:
result = await ctx.deps.playwright_computer.keypress(keys)
print(f" ✅ Success: Pressed keys: {keys}")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def drag(ctx: RunContext[ComputerUseAgentDeps], path: list[dict]) -> str:
"""Drag the mouse along a path of points (list of {'x': int, 'y': int})."""
log_tool_call("drag", path=path)
try:
result = await ctx.deps.playwright_computer.drag(path)
print(f" ✅ Success: Dragged along given path")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def switch_tab_by_index(ctx: RunContext[ComputerUseAgentDeps], tab_index: int) -> str:
"""Switch to a browser tab by its index."""
log_tool_call("switch_tab_by_index", tab_index=tab_index)
try:
result = await ctx.deps.playwright_computer.switch_tab_by_index(tab_index)
print(f" ✅ Success: Switched to tab {tab_index}")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def back(ctx: RunContext[ComputerUseAgentDeps]) -> str:
"""Go back to the previous page in browser history."""
log_tool_call("back")
try:
result = await ctx.deps.playwright_computer.back()
print(f" ✅ Success: Navigated back")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise
@self.agent.tool
async def forward(ctx: RunContext[ComputerUseAgentDeps]) -> str:
"""Go forward in browser history."""
log_tool_call("forward")
try:
result = await ctx.deps.playwright_computer.forward()
print(f" ✅ Success: Navigated forward")
return result
except Exception as e:
print(f" ❌ Error: {str(e)}")
raise