Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,8 @@ public interface IFileStorageService
string GetUserAvatar();
bool SaveUserAvatar(BotSharpFile file);
#endregion
#region Speech
Task SaveSpeechFileAsync(string conversationId, string fileName, BinaryData data);
Task<BinaryData> RetrieveSpeechFileAsync(string conversationId, string fileName);
#endregion
}
23 changes: 23 additions & 0 deletions src/Infrastructure/BotSharp.Abstraction/MLTasks/ITextToSpeech.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
namespace BotSharp.Abstraction.MLTasks
{
public interface ITextToSpeech
{
/// <summary>
/// The LLM provider like Microsoft Azure, OpenAI, ClaudAI
/// </summary>
string Provider { get; }

/// <summary>
/// Set model name, one provider can consume different model or version(s)
/// </summary>
/// <param name="model">deployment name</param>
void SetModelName(string model);

Task<BinaryData> GenerateSpeechFromTextAsync(string text, ITextToSpeechOptions? options = null);
}

public interface ITextToSpeechOptions
{

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
using System.IO;

namespace BotSharp.Core.Files.Services
{
public partial class LocalFileStorageService
{
public async Task SaveSpeechFileAsync(string conversationId, string fileName, BinaryData data)
{
var dir = Path.Combine(_baseDir, CONVERSATION_FOLDER, TEXT_TO_SPEECH_FOLDER, conversationId);
if (!Directory.Exists(dir))
{
Directory.CreateDirectory(dir);
}
using var file = File.Create(Path.Combine(dir, fileName));
using var input = data.ToStream();
await input.CopyToAsync(file);
}

public async Task<BinaryData> RetrieveSpeechFileAsync(string conversationId, string fileName)
{
var path = Path.Combine(_baseDir, CONVERSATION_FOLDER, TEXT_TO_SPEECH_FOLDER, conversationId, fileName);
using var file = new FileStream(path, FileMode.Open, FileAccess.Read);
return await BinaryData.FromStreamAsync(file);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ public partial class LocalFileStorageService : IFileStorageService
private const string USERS_FOLDER = "users";
private const string USER_AVATAR_FOLDER = "avatar";
private const string SESSION_FOLDER = "sessions";
private const string TEXT_TO_SPEECH_FOLDER = "speeches";

public LocalFileStorageService(
BotSharpDatabaseSettings dbSettings,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,22 @@ public static ITextEmbedding GetTextEmbedding(IServiceProvider services,
return completer;
}

public static ITextToSpeech GetTextToSpeech(
IServiceProvider services,
string provider,
string model)
{
var completions = services.GetServices<ITextToSpeech>();
var completer = completions.FirstOrDefault(x => x.Provider == provider);
if (completer == null)
{
var logger = services.GetRequiredService<ILogger<CompletionProvider>>();
logger.LogError($"Can't resolve text2speech provider by {provider}");
}
completer.SetModelName(model);
return completer;
}

private static (string, string) GetProviderAndModel(IServiceProvider services,
string? provider = null,
string? model = null,
Expand Down
2 changes: 2 additions & 0 deletions src/Plugins/BotSharp.Plugin.OpenAI/OpenAiPlugin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using BotSharp.Plugin.OpenAI.Providers.Text;
using BotSharp.Plugin.OpenAI.Providers.Chat;
using Microsoft.Extensions.Configuration;
using BotSharp.Plugin.OpenAI.Providers.Audio;

namespace BotSharp.Plugin.OpenAI;

Expand All @@ -30,5 +31,6 @@ public void RegisterDI(IServiceCollection services, IConfiguration config)
services.AddScoped<IChatCompletion, ChatCompletionProvider>();
services.AddScoped<ITextEmbedding, TextEmbeddingProvider>();
services.AddScoped<IImageCompletion, ImageCompletionProvider>();
services.AddScoped<ITextToSpeech, TextToSpeechProvider>();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using OpenAI.Audio;

namespace BotSharp.Plugin.OpenAI.Providers.Audio
{
public partial class TextToSpeechProvider : ITextToSpeech
{
public string Provider => "openai";
private readonly IServiceProvider _services;
private string? _model;

public TextToSpeechProvider(
IServiceProvider services)
{
_services = services;
}

public void SetModelName(string model)
{
_model = model;
}

public async Task<BinaryData> GenerateSpeechFromTextAsync(string text, ITextToSpeechOptions? options = null)
{
var client = ProviderHelper
.GetClient(Provider, _model, _services)
.GetAudioClient(_model);
return await client.GenerateSpeechFromTextAsync(text, GeneratedSpeechVoice.Alloy);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="StackExchange.Redis" Version="2.7.27" />
<PackageReference Include="StrongGrid" Version="0.108.0" />
<PackageReference Include="Twilio.AspNet.Common" Version="8.0.2" />
<PackageReference Include="Twilio.AspNet.Core" Version="8.0.2" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\Infrastructure\BotSharp.Abstraction\BotSharp.Abstraction.csproj" />
<ProjectReference Include="..\..\Infrastructure\BotSharp.Core\BotSharp.Core.csproj" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
using BotSharp.Abstraction.Files;
using BotSharp.Abstraction.Routing;
using BotSharp.Core.Infrastructures;
using BotSharp.Plugin.Twilio.Models;
using BotSharp.Plugin.Twilio.Services;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
using System.IdentityModel.Tokens.Jwt;
using BotSharp.Plugin.Twilio.Services;
using BotSharp.Abstraction.Routing;

namespace BotSharp.Plugin.Twilio.Controllers;

[AllowAnonymous]
[Route("[controller]")]
public class TwilioVoiceController : TwilioController
{
private readonly TwilioSetting _settings;
Expand Down Expand Up @@ -80,4 +84,88 @@ public async Task<TwiMLResult> ReceivedVoiceMessage([FromRoute] string agentId,

return TwiML(response);
}


[HttpPost("start")]
public TwiMLResult InitiateConversation(VoiceRequest request)
{
if (request?.CallSid == null) throw new ArgumentNullException(nameof(VoiceRequest.CallSid));
string sessionId = $"TwilioVoice_{request.CallSid}";
var twilio = _services.GetRequiredService<TwilioService>();
var url = $"twiliovoice/{sessionId}/send/0";
var response = twilio.ReturnInstructions("twilio/welcome.mp3", url, false);
return TwiML(response);
}

[HttpPost("{sessionId}/send/{seqNum}")]
public async Task<TwiMLResult> SendCallerMessage([FromRoute] string sessionId, [FromRoute] int seqNum, VoiceRequest request)
{
var twilio = _services.GetRequiredService<TwilioService>();
var messageQueue = _services.GetRequiredService<TwilioMessageQueue>();
var sessionManager = _services.GetRequiredService<ITwilioSessionManager>();
var url = $"twiliovoice/{sessionId}/reply/{seqNum}";
var messages = await sessionManager.RetrieveStagedCallerMessagesAsync(sessionId, seqNum);
if (!string.IsNullOrWhiteSpace(request.SpeechResult))
{
messages.Add(request.SpeechResult);
}
var messageContent = string.Join("\r\n", messages);
VoiceResponse response;
if (!string.IsNullOrWhiteSpace(messageContent))
{
var callerMessage = new CallerMessage()
{
SessionId = sessionId,
SeqNumber = seqNum,
Content = messageContent,
From = request.From
};
await messageQueue.EnqueueAsync(callerMessage);
response = twilio.ReturnInstructions("twilio/holdon.mp3", url, true);
}
else
{
response = twilio.HangUp("twilio/holdon.mp3");
}
return TwiML(response);
}

[HttpPost("{sessionId}/reply/{seqNum}")]
public async Task<TwiMLResult> ReplyCallerMessage([FromRoute] string sessionId, [FromRoute] int seqNum, VoiceRequest request)
{
var nextSeqNum = seqNum + 1;
var sessionManager = _services.GetRequiredService<ITwilioSessionManager>();
var twilio = _services.GetRequiredService<TwilioService>();
if (request.SpeechResult != null)
{
await sessionManager.StageCallerMessageAsync(sessionId, nextSeqNum, request.SpeechResult);
}
var reply = await sessionManager.GetAssistantReplyAsync(sessionId, seqNum);
VoiceResponse response;
if (string.IsNullOrEmpty(reply))
{
response = twilio.ReturnInstructions(null, $"twiliovoice/{sessionId}/reply/{seqNum}", true);
}
else
{

var textToSpeechService = CompletionProvider.GetTextToSpeech(_services, "openai", "tts-1");
var fileService = _services.GetRequiredService<IFileStorageService>();
var data = await textToSpeechService.GenerateSpeechFromTextAsync(reply);
var fileName = $"{seqNum}.mp3";
await fileService.SaveSpeechFileAsync(sessionId, fileName, data);
response = twilio.ReturnInstructions($"twiliovoice/speeches/{sessionId}/{fileName}", $"twiliovoice/{sessionId}/send/{nextSeqNum}", true);
}
return TwiML(response);
}

[HttpGet("speeches/{conversationId}/{fileName}")]
public async Task<FileContentResult> RetrieveSpeechFile([FromRoute] string conversationId, [FromRoute] string fileName)
{
var fileService = _services.GetRequiredService<IFileStorageService>();
var data = await fileService.RetrieveSpeechFileAsync(conversationId, fileName);
var result = new FileContentResult(data.ToArray(), "audio/mpeg");
result.FileDownloadName = fileName;
return result;
}
}
15 changes: 15 additions & 0 deletions src/Plugins/BotSharp.Plugin.Twilio/Models/CallerMessage.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
namespace BotSharp.Plugin.Twilio.Models
{
public class CallerMessage
{
public string SessionId { get; set; }
public int SeqNumber { get; set; }
public string Content { get; set; }
public string From { get; set; }

public override string ToString()
{
return $"{SessionId}-{SeqNumber}";
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using Task = System.Threading.Tasks.Task;

namespace BotSharp.Plugin.Twilio.Services
{
public interface ITwilioSessionManager
{
Task SetAssistantReplyAsync(string sessionId, int seqNum, string message);
Task<string> GetAssistantReplyAsync(string sessionId, int seqNum);
Task StageCallerMessageAsync(string sessionId, int seqNum, string message);
Task<List<string>> RetrieveStagedCallerMessagesAsync(string sessionId, int seqNum);
}
}
32 changes: 32 additions & 0 deletions src/Plugins/BotSharp.Plugin.Twilio/Services/TwilioMessageQueue.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
using BotSharp.Plugin.Twilio.Models;
using System.Threading.Channels;

namespace BotSharp.Plugin.Twilio.Services
{
public class TwilioMessageQueue
{
private readonly Channel<CallerMessage> _queue;
internal ChannelReader<CallerMessage> Reader => _queue.Reader;
public TwilioMessageQueue()
{
BoundedChannelOptions options = new(100)
{
FullMode = BoundedChannelFullMode.Wait
};
_queue = Channel.CreateBounded<CallerMessage>(options);
}

public async ValueTask EnqueueAsync(CallerMessage request)
{
if (request == null) throw new ArgumentNullException(nameof(request));
Console.WriteLine($"[{DateTime.UtcNow}] Enqueue {request}");
await _queue.Writer.WriteAsync(request);
}

internal void Stop()
{
Console.WriteLine($"[{DateTime.UtcNow}] Complete queue");
_queue.Writer.TryComplete();
}
}
}
Loading