Skip to content

Commit 4124587

Browse files
committed
feat: implement 'estimateCost'
1 parent 44f5ca5 commit 4124587

42 files changed

Lines changed: 291 additions & 10 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/GptEncoding.test.ts

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
chatModelParams,
1111
encodingNames,
1212
} from './mapping.js'
13+
import { models } from './models.js'
1314
import { resolveEncoding } from './resolveEncoding.js'
1415
import { EndOfText } from './specialTokens.js'
1516

@@ -287,6 +288,84 @@ describe.each(chatModelNames)('%s', (modelName) => {
287288
})
288289
})
289290

291+
describe('estimateCost functionality', () => {
292+
const gpt4oEncoding = GptEncoding.getEncodingApiForModel(
293+
'gpt-4o',
294+
resolveEncoding,
295+
)
296+
const gpt35Encoding = GptEncoding.getEncodingApiForModel(
297+
'gpt-3.5-turbo',
298+
resolveEncoding,
299+
)
300+
301+
test('estimates cost correctly for gpt-4o model', () => {
302+
const tokenCount = 1_000
303+
const cost = gpt4oEncoding.estimateCost(tokenCount)
304+
305+
// gpt-4o has $2.5 per million tokens for input and $10 per million tokens for output
306+
expect(cost.input).toBeCloseTo(0.002_5, 6) // 1000/1M * $2.5
307+
expect(cost.output).toBeCloseTo(0.01, 6) // 1000/1M * $10
308+
expect(cost.batchInput).toBeCloseTo(0.001_25, 6) // 1000/1M * $1.25
309+
expect(cost.batchOutput).toBeCloseTo(0.005, 6) // 1000/1M * $5
310+
})
311+
312+
test('estimates cost correctly for gpt-3.5-turbo model', () => {
313+
const tokenCount = 1_000
314+
const cost = gpt35Encoding.estimateCost(tokenCount)
315+
316+
// gpt-3.5-turbo has $0.5 per million tokens for input and $1.5 per million tokens for output
317+
expect(cost.input).toBeCloseTo(0.000_5, 6) // 1000/1M * $0.5
318+
expect(cost.output).toBeCloseTo(0.001_5, 6) // 1000/1M * $1.5
319+
expect(cost.batchInput).toBeCloseTo(0.000_25, 6) // 1000/1M * $0.25
320+
expect(cost.batchOutput).toBeCloseTo(0.000_75, 6) // 1000/1M * $0.75
321+
})
322+
323+
test('allows overriding model name', () => {
324+
const tokenCount = 1_000
325+
// Use gpt-4o encoding but override with gpt-3.5-turbo model name
326+
const cost = gpt4oEncoding.estimateCost(tokenCount, 'gpt-3.5-turbo')
327+
328+
expect(cost.input).toBeCloseTo(0.000_5, 6) // 1000/1M * $0.5
329+
expect(cost.output).toBeCloseTo(0.001_5, 6) // 1000/1M * $1.5
330+
})
331+
332+
test('throws error when model name is not provided', () => {
333+
const encoding = GptEncoding.getEncodingApi('cl100k_base', resolveEncoding)
334+
const tokenCount = 1_000
335+
336+
// No model name was provided during initialization or function call
337+
expect(() => encoding.estimateCost(tokenCount)).toThrow(
338+
'Model name must be provided either during initialization or passed in to the method.',
339+
)
340+
})
341+
342+
test('throws error for unknown model', () => {
343+
const tokenCount = 1_000
344+
expect(() =>
345+
gpt4oEncoding.estimateCost(tokenCount, 'non-existent-model' as any),
346+
).toThrow('Unknown model: non-existent-model')
347+
})
348+
349+
test('only includes properties that exist for the model', () => {
350+
// Find a model that only has input cost but no output cost
351+
const modelWithInputOnly = Object.entries(models).find(
352+
([_, model]) =>
353+
model.cost?.input !== undefined && model.cost?.output === undefined,
354+
)
355+
356+
if (modelWithInputOnly) {
357+
const [modelName] = modelWithInputOnly
358+
const cost = gpt4oEncoding.estimateCost(1_000, modelName as any)
359+
360+
expect(cost.input).toBeDefined()
361+
expect(cost.output).toBeUndefined()
362+
} else {
363+
// Skip test if we can't find an appropriate model
364+
console.log('Skipping test: no model with input-only cost found')
365+
}
366+
})
367+
})
368+
290369
function loadTestPlans() {
291370
const testPlanPath = path.join(__dirname, '../data/TestPlans.txt')
292371
const testPlanData = fs.readFileSync(testPlanPath, 'utf8')

src/GptEncoding.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import {
1515
type GetMergeableRanksFn,
1616
getEncodingParams,
1717
} from './modelParams.js'
18+
import { type CostEstimate, models } from './models.js'
1819
import {
1920
EndOfPrompt,
2021
EndOfText,
@@ -129,6 +130,7 @@ export class GptEncoding {
129130
this.countTokens = this.countTokens.bind(this)
130131
this.setMergeCacheSize = this.setMergeCacheSize.bind(this)
131132
this.clearMergeCache = this.clearMergeCache.bind(this)
133+
this.estimateCost = this.estimateCost.bind(this)
132134
this.modelName = modelName
133135
}
134136

@@ -460,4 +462,53 @@ export class GptEncoding {
460462

461463
return buffer
462464
}
465+
466+
/**
467+
* Estimates the cost of processing a given token count using the model's pricing.
468+
*
469+
* @param tokenCount - The number of tokens to estimate cost for
470+
* @param modelName - Optional model name to use for cost calculation (defaults to this.modelName)
471+
* @returns Cost estimate object with applicable price components (input, output, batchInput, batchOutput)
472+
*/
473+
estimateCost(tokenCount: number, modelName = this.modelName): CostEstimate {
474+
if (!modelName) {
475+
throw new Error(
476+
'Model name must be provided either during initialization or passed in to the method.',
477+
)
478+
}
479+
480+
const model = models[modelName]
481+
if (!model) {
482+
throw new Error(`Unknown model: ${modelName}`)
483+
}
484+
485+
if (!model.cost) {
486+
throw new Error(`No cost information available for model: ${modelName}`)
487+
}
488+
489+
const costPerMillion = model.cost
490+
const result: CostEstimate = {}
491+
492+
// Calculate cost per token and multiply by token count
493+
// eslint-disable-next-line no-magic-numbers
494+
const millionTokens = tokenCount / 1_000_000
495+
496+
if (costPerMillion.input !== undefined) {
497+
result.input = costPerMillion.input * millionTokens
498+
}
499+
500+
if (costPerMillion.output !== undefined) {
501+
result.output = costPerMillion.output * millionTokens
502+
}
503+
504+
if (costPerMillion.batchInput !== undefined) {
505+
result.batchInput = costPerMillion.batchInput * millionTokens
506+
}
507+
508+
if (costPerMillion.batchOutput !== undefined) {
509+
result.batchOutput = costPerMillion.batchOutput * millionTokens
510+
}
511+
512+
return result
513+
}
463514
}

src/encoding/cl100k_base.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ const {
1919
vocabularySize,
2020
setMergeCacheSize,
2121
clearMergeCache,
22+
estimateCost,
2223
} = api
2324
export {
2425
clearMergeCache,
@@ -30,6 +31,7 @@ export {
3031
encodeChat,
3132
encodeChatGenerator,
3233
encodeGenerator,
34+
estimateCost,
3335
isWithinTokenLimit,
3436
setMergeCacheSize,
3537
vocabularySize,

src/encoding/o200k_base.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ const {
1919
vocabularySize,
2020
setMergeCacheSize,
2121
clearMergeCache,
22+
estimateCost,
2223
} = api
2324
export {
2425
clearMergeCache,
@@ -30,6 +31,7 @@ export {
3031
encodeChat,
3132
encodeChatGenerator,
3233
encodeGenerator,
34+
estimateCost,
3335
isWithinTokenLimit,
3436
setMergeCacheSize,
3537
vocabularySize,

src/encoding/p50k_base.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ const {
1717
vocabularySize,
1818
setMergeCacheSize,
1919
clearMergeCache,
20+
estimateCost,
2021
} = api
2122
export {
2223
clearMergeCache,
@@ -26,6 +27,7 @@ export {
2627
decodeGenerator,
2728
encode,
2829
encodeGenerator,
30+
estimateCost,
2931
isWithinTokenLimit,
3032
setMergeCacheSize,
3133
vocabularySize,

src/encoding/p50k_edit.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@ const {
1414
encodeGenerator,
1515
isWithinTokenLimit,
1616
countTokens,
17+
encodeChat,
18+
encodeChatGenerator,
1719
vocabularySize,
1820
setMergeCacheSize,
1921
clearMergeCache,
22+
estimateCost,
2023
} = api
2124
export {
2225
clearMergeCache,
@@ -25,7 +28,10 @@ export {
2528
decodeAsyncGenerator,
2629
decodeGenerator,
2730
encode,
31+
encodeChat,
32+
encodeChatGenerator,
2833
encodeGenerator,
34+
estimateCost,
2935
isWithinTokenLimit,
3036
setMergeCacheSize,
3137
vocabularySize,

src/encoding/r50k_base.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ const {
1717
vocabularySize,
1818
setMergeCacheSize,
1919
clearMergeCache,
20+
estimateCost,
2021
} = api
2122
export {
2223
clearMergeCache,
@@ -26,6 +27,7 @@ export {
2627
decodeGenerator,
2728
encode,
2829
encodeGenerator,
30+
estimateCost,
2931
isWithinTokenLimit,
3032
setMergeCacheSize,
3133
vocabularySize,

src/model/gpt-3.5-turbo-0125.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@ const {
1818
encodeChatGenerator,
1919
vocabularySize,
2020
setMergeCacheSize,
21+
clearMergeCache,
22+
estimateCost,
2123
} = api
2224
export {
25+
clearMergeCache,
2326
countTokens,
2427
decode,
2528
decodeAsyncGenerator,
@@ -28,6 +31,7 @@ export {
2831
encodeChat,
2932
encodeChatGenerator,
3033
encodeGenerator,
34+
estimateCost,
3135
isWithinTokenLimit,
3236
setMergeCacheSize,
3337
vocabularySize,

src/model/gpt-3.5-turbo-0301.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@ const {
1818
encodeChatGenerator,
1919
vocabularySize,
2020
setMergeCacheSize,
21+
clearMergeCache,
22+
estimateCost,
2123
} = api
2224
export {
25+
clearMergeCache,
2326
countTokens,
2427
decode,
2528
decodeAsyncGenerator,
@@ -28,6 +31,7 @@ export {
2831
encodeChat,
2932
encodeChatGenerator,
3033
encodeGenerator,
34+
estimateCost,
3135
isWithinTokenLimit,
3236
setMergeCacheSize,
3337
vocabularySize,

src/model/gpt-3.5-turbo-0613.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@ const {
1818
encodeChatGenerator,
1919
vocabularySize,
2020
setMergeCacheSize,
21+
clearMergeCache,
22+
estimateCost,
2123
} = api
2224
export {
25+
clearMergeCache,
2326
countTokens,
2427
decode,
2528
decodeAsyncGenerator,
@@ -28,6 +31,7 @@ export {
2831
encodeChat,
2932
encodeChatGenerator,
3033
encodeGenerator,
34+
estimateCost,
3135
isWithinTokenLimit,
3236
setMergeCacheSize,
3337
vocabularySize,

0 commit comments

Comments
 (0)