Every browser automation follows this pattern: 1. **Navigate**: `agent-browser open <url>` 2. **Snapshot**: `agent-browser snapshot -i` (get element refs like `@e1`, `@e2`)
agent-browser open <url>agent-browser snapshot -i (get element refs like @e1, @e2)agent-browser open https://example.com/form agent-browser snapshot -i # Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit" agent-browser fill @e1 "user@example.com" agent-browser fill @e2 "password123" agent-browser click @e3 agent-browser wait --load networkidle agent-browser snapshot -i # Check result `## Essential Commands` # Navigation agent-browser open <url> # Navigate (aliases: goto, navigate) agent-browser close # Close browser # Snapshot agent-browser snapshot -i # Interactive elements with refs (recommended) agent-browser snapshot -i -C # Include cursor-interactive elements (divs with onclick, cursor:pointer) agent-browser snapshot -s "#selector" # Scope to CSS selector # Interaction (use @refs from snapshot) agent-browser click @e1 # Click element agent-browser fill @e2 "text" # Clear and type text agent-browser type @e2 "text" # Type without clearing agent-browser select @e1 "option" # Select dropdown option agent-browser check @e1 # Check checkbox agent-browser press Enter # Press key agent-browser scroll down 500 # Scroll page # Get information agent-browser get text @e1 # Get element text agent-browser get url # Get current URL agent-browser get title # Get page title # Wait agent-browser wait @e1 # Wait for element agent-browser wait --load networkidle # Wait for network idle agent-browser wait --url "**/page" # Wait for URL pattern agent-browser wait 2000 # Wait milliseconds # Capture agent-browser screenshot # Screenshot to temp dir agent-browser screenshot --full # Full page screenshot agent-browser pdf output.pdf # Save as PDF
agent-browser open https://example.com/signup agent-browser snapshot -i agent-browser fill @e1 "Jane Doe" agent-browser fill @e2 "jane@example.com" agent-browser select @e3 "California" agent-browser check @e4 agent-browser click @e5 agent-browser wait --load networkidle `### Authentication with State Persistence` # Login once and save state agent-browser open https://app.example.com/login agent-browser snapshot -i agent-browser fill @e1 "$USERNAME" agent-browser fill @e2 "$PASSWORD" agent-browser click @e3 agent-browser wait --url "**/dashboard" agent-browser state save auth.json # Reuse in future sessions agent-browser state load auth.json agent-browser open https://app.example.com/dashboard `### Data Extraction` agent-browser open https://example.com/products agent-browser snapshot -i agent-browser get text @e5 # Get specific element text agent-browser get text body > page.txt # Get all page text # JSON output for parsing agent-browser snapshot -i --json agent-browser get text @e1 --json `### Parallel Sessions` agent-browser --session site1 open https://site-a.com agent-browser --session site2 open https://site-b.com agent-browser --session site1 snapshot -i agent-browser --session site2 snapshot -i agent-browser session list `### Visual Browser (Debugging)` agent-browser --headed open https://example.com agent-browser highlight @e1 # Highlight element agent-browser record start demo.webm # Record session `### Local Files (PDFs, HTML)` # Open local files with file:// URLs agent-browser --allow-file-access open file:///path/to/document.pdf agent-browser --allow-file-access open file:///path/to/page.html agent-browser screenshot output.png `### iOS Simulator (Mobile Safari)` # List available iOS simulators agent-browser device list # Launch Safari on a specific device agent-browser -p ios --device "iPhone 16 Pro" open https://example.com # Same workflow as desktop - snapshot, interact, re-snapshot agent-browser -p ios snapshot -i agent-browser -p ios tap @e1 # Tap (alias for click) agent-browser -p ios fill @e2 "text" agent-browser -p ios swipe up # Mobile-specific gesture # Take screenshot agent-browser -p ios screenshot mobile.png # Close session (shuts down simulator) agent-browser -p ios close
npm install -g appium && appium driver install xcuitest)--device "<UDID>" where UDID is from xcrun xctrace list devices.@e1, @e2, etc.) are invalidated when the page changes. Always re-snapshot after:agent-browser click @e5 # Navigates to new page agent-browser snapshot -i # MUST re-snapshot agent-browser click @e1 # Use new refs
agent-browser find text "Sign In" click agent-browser find label "Email" fill "user@test.com" agent-browser find role button click --name "Submit" agent-browser find placeholder "Search" type "query" agent-browser find testid "submit-btn" click
./templates/form-automation.sh https://example.com/form ./templates/authenticated-session.sh https://app.example.com/login ./templates/capture-workflow.sh https://example.com ./output