Name: Agent Browser
Author: CodePhiliaX

Agent Browser | Skills Pool

# Step 1: Open and discover elements (ONE tool call)
agent-browser open https://example.com/form && agent-browser wait --load load && agent-browser snapshot -i
# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"

# Step 2: Batch all interactions (ONE tool call)
agent-browser fill @e1 "[email protected]" && agent-browser fill @e2 "password123" && agent-browser click @e3 && agent-browser wait --load load

# Step 3: Only snapshot if you need to verify or discover new elements
agent-browser snapshot -i

# Navigation
agent-browser open <url>              # Navigate (aliases: goto, navigate)
agent-browser close                   # Close browser

# Snapshot
agent-browser snapshot -i             # Interactive elements with refs (recommended)
agent-browser snapshot -i -C          # Include cursor-interactive elements
agent-browser snapshot -s "#selector" # Scope to CSS selector

# Interaction (use @refs from snapshot)
agent-browser click @e1               # Click element
agent-browser fill @e2 "text"         # Clear and type text
agent-browser type @e2 "text"         # Type without clearing
agent-browser select @e1 "option"     # Select dropdown option
agent-browser check @e1               # Check checkbox
agent-browser press Enter             # Press key
agent-browser scroll down 500         # Scroll page

# Get information
agent-browser get text @e1            # Get element text
agent-browser get url                 # Get current URL
agent-browser get title               # Get page title

# Wait
agent-browser wait @e1                # Wait for element
agent-browser wait --load load        # Wait for DOM load (fast, preferred)
agent-browser wait --load networkidle # Wait for network idle (slow, use only when needed)
agent-browser wait --url "**/page"    # Wait for URL pattern
agent-browser wait 2000               # Wait milliseconds

# Capture
agent-browser screenshot              # Screenshot to temp dir
agent-browser screenshot --full       # Full page screenshot
agent-browser screenshot --annotate   # Annotated screenshot with numbered element labels
agent-browser pdf output.pdf          # Save as PDF

# Diff (compare page states)
agent-browser diff snapshot                          # Compare current vs last snapshot
agent-browser diff screenshot --baseline before.png  # Visual pixel diff
agent-browser diff url <url1> <url2>                 # Compare two pages

# Step 1: Open and discover elements (ONE call)
agent-browser open https://example.com/signup && agent-browser wait --load load && agent-browser snapshot -i

# Step 2: Fill and submit (ONE call)
agent-browser fill @e1 "Jane Doe" && agent-browser fill @e2 "[email protected]" && agent-browser select @e3 "California" && agent-browser check @e4 && agent-browser click @e5 && agent-browser wait --load load

# Login: open + snapshot (ONE call)
agent-browser open https://app.example.com/login && agent-browser wait --load load && agent-browser snapshot -i

# Fill credentials + submit (ONE call)
agent-browser fill @e1 "$USERNAME" && agent-browser fill @e2 "$PASSWORD" && agent-browser click @e3 && agent-browser wait --url "**/dashboard"
agent-browser state save auth.json

# Reuse in future sessions
agent-browser state load auth.json && agent-browser open https://app.example.com/dashboard

# Open + snapshot in one call
agent-browser open https://example.com/products && agent-browser wait --load load && agent-browser snapshot -i
agent-browser get text @e5           # Get specific element text
agent-browser get text body > page.txt  # Get all page text

# JSON output for parsing
agent-browser snapshot -i --json

agent-browser click @e5              # Navigates to new page
agent-browser snapshot -i            # MUST re-snapshot
agent-browser click @e1              # Use new refs

agent-browser screenshot --annotate
# Output includes the image path and a legend:
#   [1] @e1 button "Submit"
#   [2] @e2 link "Home"
#   [3] @e3 textbox "Email"
agent-browser click @e2              # Click using ref from annotated screenshot

agent-browser find text "Sign In" click
agent-browser find label "Email" fill "[email protected]"
agent-browser find role button click --name "Submit"
agent-browser find placeholder "Search" type "query"

agent-browser close                    # Close default session
agent-browser --session agent1 close   # Close specific session

agent-browser --session my-profile --profile /path/to/profile --headed open https://app.example.com
agent-browser --session my-profile --profile /path/to/profile --headed snapshot -i

Agent Browser

Browser Automation with agent-browser

Performance Rules (CRITICAL)

Core Workflow

Agent Browser

Browser Automation with agent-browser

Performance Rules (CRITICAL)

Core Workflow

Essential Commands

Common Patterns

Form Submission

Authentication with State Persistence

Data Extraction

Ref Lifecycle (Important)

Annotated Screenshots (Vision Mode)

Semantic Locators (Alternative to Refs)

Session Management

Troubleshooting

Default Mode (No Profile)

Oracle

Blucli

Peekaboo

Add Dock Band

Add Fallback Commands

Add Adaptive Card Form

Agent Browser

Browser Automation with agent-browser

Performance Rules (CRITICAL)

Core Workflow

Agent Browser

Browser Automation with agent-browser

Performance Rules (CRITICAL)

Core Workflow

Essential Commands

Common Patterns

Form Submission

Authentication with State Persistence

Data Extraction

Ref Lifecycle (Important)

Annotated Screenshots (Vision Mode)

Semantic Locators (Alternative to Refs)

Session Management

Browser Profile (Persistent Login)

Troubleshooting

Default Mode (No Profile)

Oracle

Blucli

Peekaboo

Add Dock Band

Add Fallback Commands

Add Adaptive Card Form