Skip to content

Update web driver to handle mutiple tasks per agent #956

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ public class StateConst

public const string SUB_CONVERSATION_ID = "sub_conversation_id";
public const string ORIGIN_CONVERSATION_ID = "origin_conversation_id";
public const string WEB_DRIVER_TASK_ID = "web_driver_task_id";
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public async Task<bool> Execute(RoleDialogModel message)
message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public async Task<bool> Execute(RoleDialogModel message)
message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public async Task<bool> Execute(RoleDialogModel message)
message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public async Task<bool> Execute(RoleDialogModel message)
message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public async Task<bool> Execute(RoleDialogModel message)
message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public async Task<bool> Execute(RoleDialogModel message)
message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);

Expand Down
4 changes: 2 additions & 2 deletions src/Plugins/BotSharp.Plugin.WebDriver/Functions/GoToPageFn.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public async Task<bool> Execute(RoleDialogModel message)
var result = await _browser.GoToPage(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, new PageActionArgs
{
Expand All @@ -45,7 +45,7 @@ public async Task<bool> Execute(RoleDialogModel message)
message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using BotSharp.Plugin.WebDriver.Services;

namespace BotSharp.Plugin.WebDriver.Functions;

public class HttpRequestFn : IFunctionCallback
Expand All @@ -20,12 +22,13 @@ public async Task<bool> Execute(RoleDialogModel message)
var args = JsonSerializer.Deserialize<HttpRequestParams>(message.FunctionArgs);

var agentService = _services.GetRequiredService<IAgentService>();
var webDriverService = _services.GetRequiredService<WebDriverService>();
var agent = await agentService.LoadAgent(message.CurrentAgentId);
var result = await _browser.SendHttpRequest(new MessageInfo
{
AgentId = agent.Id,
MessageId = message.MessageId,
ContextId = convService.ConversationId
ContextId = webDriverService.GetMessageContext(message)
}, args);

message.Content = result.IsSuccess ?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public async Task<bool> Execute(RoleDialogModel message)
message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public async Task<bool> Execute(RoleDialogModel message)
message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public async Task<bool> Execute(RoleDialogModel message)
var msgInfo = new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
};
var result = await _browser.LaunchBrowser(msgInfo, new BrowserActionArgs
Expand All @@ -58,7 +58,7 @@ public async Task<bool> Execute(RoleDialogModel message)
message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public async Task<bool> Execute(RoleDialogModel message)
message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);
message.Content = "Took screenshot completed. You can take another screenshot if needed.";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ public async Task<bool> Execute(RoleDialogModel message)
var args = JsonSerializer.Deserialize<BrowsingContextIn>(message.FunctionArgs);

var agentService = _services.GetRequiredService<IAgentService>();
var webDriverService = _services.GetRequiredService<WebDriverService>();
var agent = await agentService.LoadAgent(message.CurrentAgentId);

message.Data = await _browser.ScrollPage(new MessageInfo
{
AgentId = agent.Id,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, new PageActionArgs
{
Expand All @@ -35,13 +36,12 @@ public async Task<bool> Execute(RoleDialogModel message)

message.Content = "Scrolled. You can scroll more if needed.";

var webDriverService = _services.GetRequiredService<WebDriverService>();
var path = webDriverService.GetScreenshotFilePath(message.MessageId);

message.Data = await _browser.ScreenshotAsync(new MessageInfo
{
AgentId = message.CurrentAgentId,
ContextId = convService.ConversationId,
ContextId = webDriverService.GetMessageContext(message),
MessageId = message.MessageId
}, path);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using BotSharp.Abstraction.Infrastructures.Enums;

namespace BotSharp.Plugin.WebDriver.Services
{
public partial class WebDriverService
{
public string GetMessageContext(RoleDialogModel message)
{
var states = _services.GetService<IConversationStateService>();
var convService = _services.GetRequiredService<IConversationService>();
var webDriverTaskId = states.GetState(StateConst.WEB_DRIVER_TASK_ID, "");
var contextId = message.CurrentAgentId;
if (!string.IsNullOrWhiteSpace(webDriverTaskId))
{
contextId = webDriverTaskId;
}
return contextId;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,17 @@ public async Task<bool> Execute(RoleDialogModel message)
var conv = _services.GetRequiredService<IConversationService>();

var browser = _services.GetRequiredService<IWebBrowser>();
var webDriverService = _services.GetRequiredService<WebDriverService>();
var msg = new MessageInfo
{
AgentId = message.CurrentAgentId,
MessageId = message.MessageId,
ContextId = message.CurrentAgentId,
ContextId = webDriverService.GetMessageContext(message),
};
var result = await browser.ActionOnElement(msg, locatorArgs, actionArgs);

message.Content = $"{actionArgs.Action} executed {(result.IsSuccess ? "success" : "failed")}";

var webDriverService = _services.GetRequiredService<WebDriverService>();
var path = webDriverService.GetScreenshotFilePath(message.MessageId);

message.Data = await browser.ScreenshotAsync(msg, path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,19 @@ public UtilWebCloseBrowserFn(
public async Task<bool> Execute(RoleDialogModel message)
{
var conv = _services.GetRequiredService<IConversationService>();

var webDriverService = _services.GetRequiredService<WebDriverService>();
var browser = _services.GetRequiredService<IWebBrowser>();
var msg = new MessageInfo
{
AgentId = message.CurrentAgentId,
MessageId = message.MessageId,
ContextId = message.CurrentAgentId,
ContextId = webDriverService.GetMessageContext(message)
};

await browser.CloseBrowser(message.CurrentAgentId);

message.Content = $"Browser closed.";

var webDriverService = _services.GetRequiredService<WebDriverService>();
var path = webDriverService.GetScreenshotFilePath(message.MessageId);

message.Data = await browser.ScreenshotAsync(msg, path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ public async Task<bool> Execute(RoleDialogModel message)
args.WaitTime = _webDriver.DefaultWaitTime;

var conv = _services.GetRequiredService<IConversationService>();

var webDriverService = _services.GetRequiredService<WebDriverService>();
var browser = _services.GetRequiredService<IWebBrowser>();
var msg = new MessageInfo
{
AgentId = message.CurrentAgentId,
MessageId = message.MessageId,
ContextId = message.CurrentAgentId,
ContextId = webDriverService.GetMessageContext(message)
};
if (!args.KeepBrowserOpen)
{
Expand All @@ -50,7 +50,6 @@ public async Task<bool> Execute(RoleDialogModel message)

message.Content = $"Open web page successfully.";

var webDriverService = _services.GetRequiredService<WebDriverService>();
var path = webDriverService.GetScreenshotFilePath(message.MessageId);

message.Data = await browser.ScreenshotAsync(msg, path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ public async Task<bool> Execute(RoleDialogModel message)
locatorArgs.Highlight = true;

var browser = _services.GetRequiredService<IWebBrowser>();
var webDriverService = _services.GetRequiredService<WebDriverService>();
var msg = new MessageInfo
{
AgentId = message.CurrentAgentId,
MessageId = message.MessageId,
ContextId = message.CurrentAgentId,
ContextId = webDriverService.GetMessageContext(message)
};
var result = await browser.LocateElement(msg, locatorArgs);

message.Content = $"Locating element {(result.IsSuccess ? "success" : "failed")}";

var webDriverService = _services.GetRequiredService<WebDriverService>();
var path = webDriverService.GetScreenshotFilePath(message.MessageId);

message.Data = await browser.ScreenshotAsync(msg, path);
Expand Down
2 changes: 1 addition & 1 deletion src/Plugins/BotSharp.Plugin.WebDriver/WebDriverPlugin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public void RegisterDI(IServiceCollection services, IConfiguration config)
var settingService = provider.GetRequiredService<ISettingService>();
return settings;
});

services.AddSingleton<WebBrowsingSettings>();
services.AddScoped<PlaywrightWebDriver>();
services.AddSingleton<PlaywrightInstance>();

Expand Down
Loading