Browser automation for AI agents via [inference.sh](https://inference.sh). Uses Playwright under the hood with a simple `@e` ref system for element interaction. ``` curl -fsSL https://cli.inference.sh | sh && infsh login
@e ref system for element interaction.# Install CLI curl -fsSL https://cli.inference.sh | sh && infsh login # Open a page and get interactive elements infsh app run agentic-browser --function open --input '{"url": "https://example.com"}' --session new
@e refs for elements# 1. Start session RESULT=$(infsh app run agentic-browser --function open --session new --input '{ "url": "https://example.com/login" }') SESSION_ID=$(echo $RESULT | jq -r '.session_id') # Elements: @e1 [input] "Email", @e2 [input] "Password", @e3 [button] "Sign In" # 2. Fill and submit infsh app run agentic-browser --function interact --session $SESSION_ID --input '{ "action": "fill", "ref": "@e1", "text": "user@example.com" }' infsh app run agentic-browser --function interact --session $SESSION_ID --input '{ "action": "fill", "ref": "@e2", "text": "password123" }' infsh app run agentic-browser --function interact --session $SESSION_ID --input '{ "action": "click", "ref": "@e3" }' # 3. Re-snapshot after navigation infsh app run agentic-browser --function snapshot --session $SESSION_ID --input '{}' # 4. Close when done infsh app run agentic-browser --function close --session $SESSION_ID --input '{}'
opensnapshot@e refs after DOM changesinteract@e refs (click, fill, drag, upload, etc.)screenshotexecutecloseclickrefdblclickreffillref, texttypetextpresstextselectref, texthoverrefcheckrefuncheckrefdragref, target_refuploadref, file_pathsscrolldirection (up/down/left/right), scroll_amountbackwaitwait_msgotourl@e refs:@e1 [a] "Home" href="/" @e2 [input type="text"] placeholder="Search" @e3 [button] "Submit" @e4 [select] "Choose option" @e5 [input type="checkbox"] name="agree"
# Start with recording enabled (optionally show cursor indicator) SESSION=$(infsh app run agentic-browser --function open --session new --input '{ "url": "https://example.com", "record_video": true, "show_cursor": true }' | jq -r '.session_id') # ... perform actions ... # Close to get the video file infsh app run agentic-browser --function close --session $SESSION --input '{}' # Returns: {"success": true, "video": <File>}
infsh app run agentic-browser --function open --session new --input '{ "url": "https://example.com", "show_cursor": true, "record_video": true }'
infsh app run agentic-browser --function open --session new --input '{ "url": "https://example.com", "proxy_url": "http://proxy.example.com:8080", "proxy_username": "user", "proxy_password": "pass" }'
infsh app run agentic-browser --function interact --session $SESSION --input '{ "action": "upload", "ref": "@e5", "file_paths": ["/path/to/file.pdf"] }'
infsh app run agentic-browser --function interact --session $SESSION --input '{ "action": "drag", "ref": "@e1", "target_ref": "@e2" }'
infsh app run agentic-browser --function execute --session $SESSION --input '{ "code": "document.querySelectorAll(\"h2\").length" }' # Returns: {"result": "5", "screenshot": <File>}
SESSION=$(infsh app run agentic-browser --function open --session new --input '{ "url": "https://example.com/contact" }' | jq -r '.session_id') # Get elements: @e1 [input] "Name", @e2 [input] "Email", @e3 [textarea], @e4 [button] "Send" infsh app run agentic-browser --function interact --session $SESSION --input '{"action": "fill", "ref": "@e1", "text": "John Doe"}' infsh app run agentic-browser --function interact --session $SESSION --input '{"action": "fill", "ref": "@e2", "text": "john@example.com"}' infsh app run agentic-browser --function interact --session $SESSION --input '{"action": "fill", "ref": "@e3", "text": "Hello!"}' infsh app run agentic-browser --function interact --session $SESSION --input '{"action": "click", "ref": "@e4"}' infsh app run agentic-browser --function snapshot --session $SESSION --input '{}' infsh app run agentic-browser --function close --session $SESSION --input '{}' `### Search and Extract` SESSION=$(infsh app run agentic-browser --function open --session new --input '{ "url": "https://google.com" }' | jq -r '.session_id') infsh app run agentic-browser --function interact --session $SESSION --input '{"action": "fill", "ref": "@e1", "text": "weather today"}' infsh app run agentic-browser --function interact --session $SESSION --input '{"action": "press", "text": "Enter"}' infsh app run agentic-browser --function interact --session $SESSION --input '{"action": "wait", "wait_ms": 2000}' infsh app run agentic-browser --function snapshot --session $SESSION --input '{}' infsh app run agentic-browser --function close --session $SESSION --input '{}' `### Screenshot with Video` SESSION=$(infsh app run agentic-browser --function open --session new --input '{ "url": "https://example.com", "record_video": true }' | jq -r '.session_id') # Take full page screenshot infsh app run agentic-browser --function screenshot --session $SESSION --input '{ "full_page": true }' # Close and get video RESULT=$(infsh app run agentic-browser --function close --session $SESSION --input '{}') echo $RESULT | jq '.video'
--session new on first callsession_id for subsequent calls# Web search (for research + browse) npx skills add inference-sh/skills@web-search # LLM models (analyze extracted content) npx skills add inference-sh/skills@llm-models