Skip to content

Commit bfd6eca

Browse files
Paperlzhsluoyz
authored andcommitted
feat: add OpenAgent Chrome extension browser use mode (#2205)
1 parent 99ee9d9 commit bfd6eca

9 files changed

Lines changed: 1066 additions & 14 deletions

File tree

controllers/chrome_connect.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright 2026 The OpenAgent Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package controllers
16+
17+
import "github.com/the-open-agent/openagent/tool"
18+
19+
// ChromeConnect upgrades a local OpenAgent Chrome extension connection to the
20+
// in-process WebSocket bridge used by the browser_use tool in extension mode.
21+
func (c *ApiController) ChromeConnect() {
22+
c.EnableRender = false
23+
tool.HandleChromeConnectWebSocket(c.Ctx.ResponseWriter, c.Ctx.Request)
24+
}

routers/authz_filter.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ func permissionFilter(ctx *context.Context) {
6363
exemptedPaths := []string{
6464
// Auth endpoints — must remain public
6565
"signin", "signout", "health",
66+
// Local Chrome extension browser bridge
67+
"chrome-connect",
6668
// Get paths accessible to regular users
6769
"get-account", "get-signin-options", "get-chats", "get-forms", "get-global-videos", "get-videos", "get-video", "get-messages",
6870
"delete-welcome-message", "get-message-answer", "get-answer",

routers/cors_filter.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ func setCorsHeaders(ctx *context.Context, origin string) {
4848
}
4949

5050
func CorsFilter(ctx *context.Context) {
51+
// The Chrome extension bridge is exempt from Casdoor-based CORS validation.
52+
// WebSocket upgrade requests from Chrome extensions carry a chrome-extension://
53+
// Origin that is not registered in Casdoor's redirect URIs and would be rejected
54+
// by the standard CORS check. The bridge handler performs its own access control:
55+
// loopback-only connections and chrome-extension:// origin validation via the
56+
// WebSocket Upgrader's CheckOrigin callback.
57+
if ctx.Request.URL.Path == "/api/chrome-connect" {
58+
return
59+
}
60+
5161
origin := ctx.Input.Header(headerOrigin)
5262

5363
if origin == "" || origin == "null" {

routers/router.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ func initAPI() {
4545
beego.Router("/api/get-account", &controllers.ApiController{}, "GET:GetAccount")
4646
beego.Router("/api/update-account", &controllers.ApiController{}, "POST:UpdateAccount")
4747
beego.Router("/api/get-signin-options", &controllers.ApiController{}, "GET:GetSigninOptions")
48+
beego.Router("/api/chrome-connect", &controllers.ApiController{}, "GET:ChromeConnect")
4849

4950
beego.Router("/api/get-global-sites", &controllers.ApiController{}, "GET:GetGlobalSites")
5051
beego.Router("/api/get-sites", &controllers.ApiController{}, "GET:GetSites")

tool/browser_use.go

Lines changed: 105 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ func NewBrowserUseTool(config Config) (*BrowserUseTool, error) {
7171
}
7272

7373
func (p *BrowserUseTool) BuiltinTools() []BuiltinTool {
74+
if p.isChromeExtMode() {
75+
return chromeConnectBuiltinTools()
76+
}
7477
return []BuiltinTool{
7578
&browserUseOpenBuiltin{provider: p},
7679
&browserUseSnapshotBuiltin{provider: p},
@@ -80,6 +83,7 @@ func (p *BrowserUseTool) BuiltinTools() []BuiltinTool {
8083
&browserUsePlayMediaBuiltin{provider: p},
8184
&browserUseTabsBuiltin{provider: p},
8285
&browserUseSwitchTabBuiltin{provider: p},
86+
&browserUseCloseTabBuiltin{provider: p},
8387
&browserUseCloseBuiltin{provider: p},
8488
}
8589
}
@@ -634,11 +638,13 @@ type browserUseElement struct {
634638
}
635639

636640
type browserUseTab struct {
637-
Index int
638-
ID target.ID
639-
Title string
640-
URL string
641-
Active bool
641+
Index int
642+
ID target.ID
643+
Title string
644+
URL string
645+
Active bool
646+
Controlled bool
647+
Protected bool
642648
}
643649

644650
func (s *browserUseSession) pageTargetsLocked() ([]*target.Info, error) {
@@ -781,11 +787,21 @@ func browserUseFormatTabs(tabs []browserUseTab) string {
781787
var builder strings.Builder
782788
builder.WriteString("Browser tabs:\n")
783789
for _, tab := range tabs {
784-
active := ""
790+
markers := []string{}
785791
if tab.Active {
786-
active = " active"
792+
markers = append(markers, "active")
793+
}
794+
if tab.Controlled {
795+
markers = append(markers, "controlled")
796+
}
797+
if tab.Protected {
798+
markers = append(markers, "protected")
799+
}
800+
markerText := ""
801+
if len(markers) > 0 {
802+
markerText = " " + strings.Join(markers, " ")
787803
}
788-
builder.WriteString(fmt.Sprintf("[%d]%s %s\n", tab.Index, active, strings.TrimSpace(tab.Title)))
804+
builder.WriteString(fmt.Sprintf("[%d]%s %s\n", tab.Index, markerText, strings.TrimSpace(tab.Title)))
789805
if strings.TrimSpace(tab.URL) != "" {
790806
builder.WriteString(fmt.Sprintf(" %s\n", tab.URL))
791807
}
@@ -1147,7 +1163,7 @@ type browserUseOpenBuiltin struct{ provider *BrowserUseTool }
11471163
func (b *browserUseOpenBuiltin) GetName() string { return "browser_use_open" }
11481164

11491165
func (b *browserUseOpenBuiltin) GetDescription() string {
1150-
return "Open or reuse the managed visible browser and navigate the active tab to a URL. Use this for real browser tasks only; do not claim a page was opened unless this tool succeeds. The browser keeps tabs, cookies, and media state across related user requests. This tool returns a fresh snapshot plus current browser state; use the returned element indexes only until the next page-changing action."
1166+
return "Open or reuse the managed visible browser and navigate the Browser Use controlled tab to a URL. In extension mode, OpenAgent UI tabs are protected and Browser Use uses a separate controlled tab. Use this for real browser tasks only; do not claim a page was opened unless this tool succeeds. The browser keeps tabs, cookies, and media state across related user requests. This tool returns a fresh snapshot plus current browser state; use the returned element indexes only until the next page-changing action."
11511167
}
11521168

11531169
func (b *browserUseOpenBuiltin) GetInputSchema() interface{} {
@@ -1190,7 +1206,7 @@ type browserUseSnapshotBuiltin struct{ provider *BrowserUseTool }
11901206
func (b *browserUseSnapshotBuiltin) GetName() string { return "browser_use_snapshot" }
11911207

11921208
func (b *browserUseSnapshotBuiltin) GetDescription() string {
1193-
return "Read the active tab in the existing managed browser and return visible text, indexed interactive elements, URL, title, active tab index, tab count, and media state. Treat this as the source of truth before acting. Use it at the start of a follow-up request and after every navigation, click, type, or key press before reusing element indexes. Do not invent page contents or completed browser actions that are not visible in this tool result."
1209+
return "Read the Browser Use controlled tab in the existing managed browser and return visible text, indexed interactive elements, URL, title, controlled tab index, tab count, and media state. Treat this as the source of truth before acting. Use it at the start of a follow-up request and after every navigation, click, type, or key press before reusing element indexes. Do not invent page contents or completed browser actions that are not visible in this tool result."
11941210
}
11951211

11961212
func (b *browserUseSnapshotBuiltin) GetInputSchema() interface{} {
@@ -1493,7 +1509,7 @@ type browserUsePlayMediaBuiltin struct{ provider *BrowserUseTool }
14931509
func (b *browserUsePlayMediaBuiltin) GetName() string { return "browser_use_play_media" }
14941510

14951511
func (b *browserUsePlayMediaBuiltin) GetDescription() string {
1496-
return "Play and unmute visible audio or video elements on the current browser tab. Use this after opening a page with music or video if playback is paused, muted, or silent. The result includes media playback state; do not tell the user audio is playing unless the returned state says a media element is playing."
1512+
return "Play and unmute visible audio or video elements on the Browser Use controlled tab. Use this after opening a page with music or video if playback is paused, muted, or silent. The result includes media playback state; do not tell the user audio is playing unless the returned state says a media element is playing."
14971513
}
14981514

14991515
func (b *browserUsePlayMediaBuiltin) GetInputSchema() interface{} {
@@ -1522,7 +1538,7 @@ type browserUseTabsBuiltin struct{ provider *BrowserUseTool }
15221538
func (b *browserUseTabsBuiltin) GetName() string { return "browser_use_tabs" }
15231539

15241540
func (b *browserUseTabsBuiltin) GetDescription() string {
1525-
return "List open browser tabs in the managed Browser Use window, including the active tab marker, titles, and URLs. Use this when a click opens a new tab, when the current page does not match what the user sees, or before switching tabs."
1541+
return "List open browser tabs available to Browser Use, including active, controlled, and protected tab markers, titles, and URLs. Use this when a click opens a new tab, when the current page does not match what the user sees, or before switching tabs."
15261542
}
15271543

15281544
func (b *browserUseTabsBuiltin) GetInputSchema() interface{} {
@@ -1550,7 +1566,7 @@ type browserUseSwitchTabBuiltin struct{ provider *BrowserUseTool }
15501566
func (b *browserUseSwitchTabBuiltin) GetName() string { return "browser_use_switch_tab" }
15511567

15521568
func (b *browserUseSwitchTabBuiltin) GetDescription() string {
1553-
return "Switch Browser Use to a tab returned by browser_use_tabs. The switch changes the active tab used by all Browser Use tools. This tool returns a fresh snapshot and browser state for the selected tab."
1569+
return "Switch Browser Use to a tab returned by browser_use_tabs. The switch sets the selected tab as the controlled tab used by Browser Use tools; protected OpenAgent UI tabs cannot be controlled. This tool returns a fresh snapshot and browser state for the selected tab."
15541570
}
15551571

15561572
func (b *browserUseSwitchTabBuiltin) GetInputSchema() interface{} {
@@ -1598,6 +1614,82 @@ func (b *browserUseSwitchTabBuiltin) Execute(ctx context.Context, arguments map[
15981614
return browserUseTextWithState(b.provider, snapshot), nil
15991615
}
16001616

1617+
// ---------------------------------------------------------------------------
1618+
// browser_use_close_tab
1619+
// ---------------------------------------------------------------------------
1620+
1621+
type browserUseCloseTabBuiltin struct{ provider *BrowserUseTool }
1622+
1623+
func (b *browserUseCloseTabBuiltin) GetName() string { return "browser_use_close_tab" }
1624+
1625+
func (b *browserUseCloseTabBuiltin) GetDescription() string {
1626+
return "Close a browser tab returned by browser_use_tabs without closing the whole Browser Use session. Use browser_use_tabs first, then pass the tab index to close."
1627+
}
1628+
1629+
func (b *browserUseCloseTabBuiltin) GetInputSchema() interface{} {
1630+
return map[string]interface{}{
1631+
"type": "object",
1632+
"additionalProperties": false,
1633+
"properties": map[string]interface{}{
1634+
"index": map[string]interface{}{
1635+
"type": "integer",
1636+
"description": "Tab index returned by browser_use_tabs.",
1637+
},
1638+
},
1639+
"required": []string{"index"},
1640+
}
1641+
}
1642+
1643+
func (b *browserUseCloseTabBuiltin) Execute(ctx context.Context, arguments map[string]interface{}) (*protocol.CallToolResult, error) {
1644+
rawIndex, ok := arguments["index"]
1645+
if !ok {
1646+
return browserToolError("missing required parameter: index"), nil
1647+
}
1648+
index, err := browserUsePositiveInt(rawIndex, "index")
1649+
if err != nil {
1650+
return browserToolError(err.Error()), nil
1651+
}
1652+
1653+
err = b.provider.runSession(func(session *browserUseSession) error {
1654+
tabs, err := session.pageTargetsLocked()
1655+
if err != nil {
1656+
return err
1657+
}
1658+
if index > len(tabs) {
1659+
return fmt.Errorf("tab index %d is out of range; there are %d tabs", index, len(tabs))
1660+
}
1661+
1662+
targetID := tabs[index-1].TargetID
1663+
active := targetID == session.currentTargetIDLocked()
1664+
chromeContext := chromedp.FromContext(session.browserCtx)
1665+
if chromeContext == nil || chromeContext.Browser == nil {
1666+
return fmt.Errorf("browser context is not ready")
1667+
}
1668+
timeoutCtx, cancel := context.WithTimeout(session.browserCtx, browserUseDefaultTimeout)
1669+
defer cancel()
1670+
if err = target.CloseTarget(targetID).Do(cdp.WithExecutor(timeoutCtx, chromeContext.Browser)); err != nil {
1671+
return err
1672+
}
1673+
if active {
1674+
time.Sleep(300 * time.Millisecond)
1675+
tabs, err = session.pageTargetsLocked()
1676+
if err != nil {
1677+
return err
1678+
}
1679+
if len(tabs) > 0 {
1680+
return session.switchToTargetLocked(tabs[0].TargetID)
1681+
}
1682+
session.activeTargetID = ""
1683+
session.ctx = session.browserCtx
1684+
}
1685+
return nil
1686+
})
1687+
if err != nil {
1688+
return browserUseErrorWithState(b.provider, fmt.Sprintf("browser use close tab failed: %s", err.Error())), nil
1689+
}
1690+
return browserUseTextWithState(b.provider, fmt.Sprintf("Closed tab %d.", index)), nil
1691+
}
1692+
16011693
// ---------------------------------------------------------------------------
16021694
// browser_use_close
16031695
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)