Skip to content

Commit 34d8d57

Browse files
committed
Fix usage token cache accounting
1 parent cde23b1 commit 34d8d57

4 files changed

Lines changed: 402 additions & 11 deletions

File tree

AGENTS.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
当前项目是基于 CLIProxyAPI(简称cpa) 做的 usage 统计程序 cpa-usage
2+
3+
CLIProxyAPI 代码在 https://github.com/router-for-me/CLIProxyAPI
4+
已经同步到目录:/Users/jchen/Sources/github.com/router-for-me/CLIProxyAPI
5+
6+
该项目目的是能更好的观测 API 的使用情况
7+
8+
2026-06-18 usage token 语义核查:
9+
- 以本地 CLIProxyAPI `origin/main` / `v7.2.16` 为准,队列中的 `tokens.input_tokens` 是 CPA 原样转发的上游 input/prompt 计数,不应全局理解为 `new/uncached`
10+
- OpenAI/Codex/OpenAI-compatible/Gemini 风格:`input_tokens`/`promptTokenCount` 通常已经是总输入,`cached_tokens`/`cachedContentTokenCount` 是其中的缓存命中部分;新输入应按 `input_tokens - cached_tokens` 估算,UI 不应再把二者相加为 Input。
11+
- Claude/Anthropic 风格:`input_tokens` 是最后 cache breakpoint 之后的未缓存输入,总输入应为 `input_tokens + cache_read_tokens + cache_creation_tokens`
12+
- CPA 的 `cached_tokens` 不是可靠的“cache read”字段:Claude 解析在 read 为 0 时会 fallback 到 cache creation;有 `cache_read_tokens/cache_creation_tokens` 时优先使用这两个拆分字段。
13+
- 当前修复策略:ingest 入口把新数据规范化为 `input_tokens=NEW``cached_tokens/cache_read_tokens=CACHE READ``cache_creation_tokens=CACHE WRITE`;历史数据用 `scripts/fix_usage_token_history.sh` 做同口径修复,脚本会先备份命中行。

internal/ingest/decoder.go

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ func Decode(message string) (storage.UsageEvent, error) {
3030
if ts.IsZero() {
3131
ts = time.Now().UTC()
3232
}
33+
tokens := normalizeUsageTokens(rec)
3334
return storage.UsageEvent{
3435
EventKey: requestID,
3536
Timestamp: ts.UTC(),
@@ -45,13 +46,13 @@ func Decode(message string) (storage.UsageEvent, error) {
4546
RequestID: requestID,
4647
LatencyMs: rec.LatencyMs,
4748
TTFTMs: rec.TTFTMs,
48-
InputTokens: rec.Tokens.InputTokens,
49-
OutputTokens: rec.Tokens.OutputTokens,
50-
ReasoningTokens: rec.Tokens.ReasoningTokens,
51-
CachedTokens: rec.Tokens.CachedTokens,
52-
CacheReadTokens: rec.Tokens.CacheReadTokens,
53-
CacheCreationTokens: rec.Tokens.CacheCreationTokens,
54-
TotalTokens: rec.Tokens.TotalTokens,
49+
InputTokens: tokens.InputTokens,
50+
OutputTokens: tokens.OutputTokens,
51+
ReasoningTokens: tokens.ReasoningTokens,
52+
CachedTokens: tokens.CachedTokens,
53+
CacheReadTokens: tokens.CacheReadTokens,
54+
CacheCreationTokens: tokens.CacheCreationTokens,
55+
TotalTokens: tokens.TotalTokens,
5556
Failed: rec.Failed,
5657
FailStatusCode: rec.Fail.StatusCode,
5758
FailBody: strings.TrimSpace(rec.Fail.Body),
@@ -94,6 +95,49 @@ func resolveAPIGroupKey(rec cpa.UsageRecord) string {
9495
return "unknown"
9596
}
9697

98+
func normalizeUsageTokens(rec cpa.UsageRecord) cpa.UsageTokens {
99+
tokens := rec.Tokens
100+
if isClaudeStyleUsage(rec) {
101+
if tokens.CacheReadTokens != 0 || tokens.CacheCreationTokens != 0 {
102+
tokens.CachedTokens = tokens.CacheReadTokens
103+
}
104+
return tokens
105+
}
106+
107+
cacheRead := tokens.CacheReadTokens
108+
if cacheRead == 0 {
109+
cacheRead = tokens.CachedTokens
110+
}
111+
cacheWrite := tokens.CacheCreationTokens
112+
cachedInput := cacheRead + cacheWrite
113+
if cachedInput > 0 {
114+
tokens.InputTokens = subtractFloor(tokens.InputTokens, cachedInput)
115+
tokens.CachedTokens = cacheRead
116+
tokens.CacheReadTokens = cacheRead
117+
}
118+
return tokens
119+
}
120+
121+
func isClaudeStyleUsage(rec cpa.UsageRecord) bool {
122+
for _, value := range []string{rec.Provider, rec.Model, rec.Endpoint} {
123+
v := strings.ToLower(strings.TrimSpace(value))
124+
if strings.Contains(v, "claude") || strings.Contains(v, "anthropic") {
125+
return true
126+
}
127+
}
128+
return false
129+
}
130+
131+
func subtractFloor(value, delta int64) int64 {
132+
if delta <= 0 {
133+
return value
134+
}
135+
if value <= delta {
136+
return 0
137+
}
138+
return value - delta
139+
}
140+
97141
func compactRawJSON(raw json.RawMessage) string {
98142
raw = bytes.TrimSpace(raw)
99143
if len(raw) == 0 || bytes.Equal(raw, []byte("null")) || !json.Valid(raw) {

internal/ingest/decoder_test.go

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ func TestDecodeNewUsageQueueFields(t *testing.T) {
2121
"failed":true,
2222
"fail":{"status_code":429,"body":" rate limited "},
2323
"response_headers":{"Retry-After":["30"],"X-Upstream-Request-Id":["upstream-req-1"]},
24-
"provider":"openai",
25-
"model":"gpt-5.4",
26-
"alias":"client-gpt",
24+
"provider":"claude",
25+
"model":"claude-sonnet-4",
26+
"alias":"client-claude",
2727
"endpoint":"POST /v1/chat/completions",
2828
"auth_type":"apikey",
2929
"api_key":"test-key",
@@ -39,7 +39,7 @@ func TestDecodeNewUsageQueueFields(t *testing.T) {
3939
if ev.EventKey != "ctx-request-id" || ev.RequestID != "ctx-request-id" {
4040
t.Fatalf("request ids = event_key %q request_id %q", ev.EventKey, ev.RequestID)
4141
}
42-
if ev.Alias != "client-gpt" || ev.TTFTMs != 320 {
42+
if ev.Alias != "client-claude" || ev.TTFTMs != 320 {
4343
t.Fatalf("alias/ttft = %q/%d", ev.Alias, ev.TTFTMs)
4444
}
4545
if ev.CacheReadTokens != 4 || ev.CacheCreationTokens != 5 {
@@ -55,3 +55,63 @@ func TestDecodeNewUsageQueueFields(t *testing.T) {
5555
t.Fatalf("reasoning/service tier = %q/%q", ev.ReasoningEffort, ev.ServiceTier)
5656
}
5757
}
58+
59+
func TestDecodeNormalizesTotalInputStyleCacheTokens(t *testing.T) {
60+
raw := `{
61+
"timestamp":"2026-04-25T00:00:00Z",
62+
"provider":"openai",
63+
"model":"gpt-5",
64+
"request_id":"req-openai-cache",
65+
"tokens":{
66+
"input_tokens":1000,
67+
"output_tokens":50,
68+
"cached_tokens":900,
69+
"total_tokens":1050
70+
}
71+
}`
72+
73+
ev, err := Decode(raw)
74+
if err != nil {
75+
t.Fatalf("Decode: %v", err)
76+
}
77+
if ev.InputTokens != 100 {
78+
t.Fatalf("input/new tokens = %d, want 100", ev.InputTokens)
79+
}
80+
if ev.CachedTokens != 900 || ev.CacheReadTokens != 900 || ev.CacheCreationTokens != 0 {
81+
t.Fatalf("cache split = cached %d read %d write %d, want 900/900/0", ev.CachedTokens, ev.CacheReadTokens, ev.CacheCreationTokens)
82+
}
83+
if ev.TotalTokens != 1050 {
84+
t.Fatalf("total tokens = %d, want preserved 1050", ev.TotalTokens)
85+
}
86+
}
87+
88+
func TestDecodePreservesClaudeInputAndUsesExplicitCacheRead(t *testing.T) {
89+
raw := `{
90+
"timestamp":"2026-04-25T00:00:00Z",
91+
"provider":"claude",
92+
"model":"claude-sonnet-4",
93+
"request_id":"req-claude-cache",
94+
"tokens":{
95+
"input_tokens":100,
96+
"output_tokens":50,
97+
"cached_tokens":900,
98+
"cache_read_tokens":0,
99+
"cache_creation_tokens":900,
100+
"total_tokens":1050
101+
}
102+
}`
103+
104+
ev, err := Decode(raw)
105+
if err != nil {
106+
t.Fatalf("Decode: %v", err)
107+
}
108+
if ev.InputTokens != 100 {
109+
t.Fatalf("input/new tokens = %d, want 100", ev.InputTokens)
110+
}
111+
if ev.CachedTokens != 0 || ev.CacheReadTokens != 0 || ev.CacheCreationTokens != 900 {
112+
t.Fatalf("cache split = cached %d read %d write %d, want 0/0/900", ev.CachedTokens, ev.CacheReadTokens, ev.CacheCreationTokens)
113+
}
114+
if ev.TotalTokens != 1050 {
115+
t.Fatalf("total tokens = %d, want preserved 1050", ev.TotalTokens)
116+
}
117+
}

0 commit comments

Comments
 (0)