diff --git a/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs b/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs index 233888074..ca55e385e 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs @@ -6,6 +6,7 @@ public class TokenStatsModel public string Model { get; set; } public string Prompt { get; set; } public int PromptCount { get; set; } + public int CachedPromptCount { get; set; } public int CompletionCount { get; set; } public AgentLlmConfig LlmConfig { get; set; } } diff --git a/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs b/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs index a40415c70..2a68ade6b 100644 --- a/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs +++ b/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs @@ -62,12 +62,22 @@ public class LlmModelSetting /// public int Dimension { get; set; } + public LlmCost AdditionalCost { get; set; } = new(); + public override string ToString() { return $"[{Type}] {Name} {Endpoint}"; } } +public class LlmCost +{ + public float CachedPromptCost { get; set; } = 0f; + public float AudioPromptCost { get; set; } = 0f; + public float ReasoningCompletionCost { get; } = 0f; + public float AudioCompletionCost { get; } = 0f; +} + public enum LlmModelType { Text = 1, diff --git a/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs b/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs index 0698c0cb9..5a0614ed3 100644 --- a/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs +++ b/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs @@ -41,9 +41,11 @@ public void AddToken(TokenStatsModel stats, RoleDialogModel message) var settingsService = _services.GetRequiredService(); var settings = settingsService.GetSetting(stats.Provider, _model); - var deltaPromptCost = stats.PromptCount / 1000f * settings.PromptCost; + var deltaPromptCost = (stats.PromptCount - stats.CachedPromptCount) / 1000f * settings.PromptCost; + var deltaCachedPromptCost = stats.CachedPromptCount / 1000f * (settings.AdditionalCost?.CachedPromptCost ?? 0f); var deltaCompletionCost = stats.CompletionCount / 1000f * settings.CompletionCost; - var deltaTotal = deltaPromptCost + deltaCompletionCost; + + var deltaTotal = deltaPromptCost + deltaCachedPromptCost + deltaCompletionCost; _promptCost += deltaPromptCost; _completionCost += deltaCompletionCost; @@ -53,6 +55,8 @@ public void AddToken(TokenStatsModel stats, RoleDialogModel message) stat.SetState("prompt_total", stats.PromptCount + inputCount, isNeedVersion: false, source: StateSource.Application); var outputCount = int.Parse(stat.GetState("completion_total", "0")); stat.SetState("completion_total", stats.CompletionCount + outputCount, isNeedVersion: false, source: StateSource.Application); + var cachedCount = int.Parse(stat.GetState("cached_prompt_total", "0")); + stat.SetState("cached_prompt_total", stats.CachedPromptCount + cachedCount, isNeedVersion: false, source: StateSource.Application); // Total cost var total_cost = float.Parse(stat.GetState("llm_total_cost", "0")); diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs index 031fab9c1..7d6bdcea5 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs @@ -84,6 +84,7 @@ public async Task GetChatCompletions(Agent agent, List