Skip to content

Commit 56121ca

Browse files
sabrennerjuan-fernandez
authored andcommitted
feat(openai): support token metrics for completions and chat completions when not provided (#4366)
* usage is included in the body * completion tokens * capture usage from chunk * impl * add tests * simplify count tokens * cleanup, comments
1 parent aff17f5 commit 56121ca

File tree

5 files changed

+354
-64
lines changed

5 files changed

+354
-64
lines changed

packages/datadog-instrumentations/src/openai.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ addHook({ name: 'openai', file: 'dist/api.js', versions: ['>=3.0.0 <4'] }, expor
143143
return exports
144144
})
145145

146-
function addStreamedChunkChoices (content, chunk) {
146+
function addStreamedChunk (content, chunk) {
147+
content.usage = chunk.usage // add usage if it was specified to be returned
147148
for (const choice of chunk.choices) {
148149
const choiceIdx = choice.index
149150
const oldChoice = content.choices.find(choice => choice?.index === choiceIdx)
@@ -247,7 +248,7 @@ function wrapStreamIterator (response, options, n) {
247248
body = { ...chunks[0], choices: Array.from({ length: n }) }
248249
// start from the first chunk, and add its choices into the body
249250
for (let i = 0; i < chunks.length; i++) {
250-
addStreamedChunkChoices(body, chunks[i])
251+
addStreamedChunk(body, chunks[i])
251252
}
252253
}
253254
}

packages/datadog-plugin-openai/src/index.js

Lines changed: 157 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@ const RE_TAB = /\t/g
1515
// TODO: In the future we should refactor config.js to make it requirable
1616
let MAX_TEXT_LEN = 128
1717

18+
let encodingForModel
19+
try {
20+
// eslint-disable-next-line import/no-extraneous-dependencies
21+
encodingForModel = require('tiktoken').encoding_for_model
22+
} catch {
23+
// we will use token count estimations in this case
24+
}
25+
1826
class OpenApiPlugin extends TracingPlugin {
1927
static get id () { return 'openai' }
2028
static get operation () { return 'request' }
@@ -232,27 +240,47 @@ class OpenApiPlugin extends TracingPlugin {
232240

233241
super.finish()
234242
this.sendLog(methodName, span, tags, store, error)
235-
this.sendMetrics(headers, body, endpoint, span._duration, error)
243+
this.sendMetrics(headers, body, endpoint, span._duration, error, tags)
236244
}
237245

238-
sendMetrics (headers, body, endpoint, duration, error) {
246+
sendMetrics (headers, body, endpoint, duration, error, spanTags) {
239247
const tags = [`error:${Number(!!error)}`]
240248
if (error) {
241249
this.metrics.increment('openai.request.error', 1, tags)
242250
} else {
243251
tags.push(`org:${headers['openai-organization']}`)
244252
tags.push(`endpoint:${endpoint}`) // just "/v1/models", no method
245-
tags.push(`model:${headers['openai-model']}`)
253+
tags.push(`model:${headers['openai-model'] || body.model}`)
246254
}
247255

248256
this.metrics.distribution('openai.request.duration', duration * 1000, tags)
249257

250-
if (body && body.usage) {
251-
const promptTokens = body.usage.prompt_tokens
252-
const completionTokens = body.usage.completion_tokens
253-
this.metrics.distribution('openai.tokens.prompt', promptTokens, tags)
254-
this.metrics.distribution('openai.tokens.completion', completionTokens, tags)
255-
this.metrics.distribution('openai.tokens.total', promptTokens + completionTokens, tags)
258+
const promptTokens = spanTags['openai.response.usage.prompt_tokens']
259+
const promptTokensEstimated = spanTags['openai.response.usage.prompt_tokens_estimated']
260+
261+
const completionTokens = spanTags['openai.response.usage.completion_tokens']
262+
const completionTokensEstimated = spanTags['openai.response.usage.completion_tokens_estimated']
263+
264+
if (!error) {
265+
if (promptTokensEstimated) {
266+
this.metrics.distribution(
267+
'openai.tokens.prompt', promptTokens, [...tags, 'openai.estimated:true'])
268+
} else {
269+
this.metrics.distribution('openai.tokens.prompt', promptTokens, tags)
270+
}
271+
if (completionTokensEstimated) {
272+
this.metrics.distribution(
273+
'openai.tokens.completion', completionTokens, [...tags, 'openai.estimated:true'])
274+
} else {
275+
this.metrics.distribution('openai.tokens.completion', completionTokens, tags)
276+
}
277+
278+
if (promptTokensEstimated || completionTokensEstimated) {
279+
this.metrics.distribution(
280+
'openai.tokens.total', promptTokens + completionTokens, [...tags, 'openai.estimated:true'])
281+
} else {
282+
this.metrics.distribution('openai.tokens.total', promptTokens + completionTokens, tags)
283+
}
256284
}
257285

258286
if (headers) {
@@ -290,6 +318,89 @@ class OpenApiPlugin extends TracingPlugin {
290318
}
291319
}
292320

321+
function countPromptTokens (methodName, payload, model) {
322+
let promptTokens = 0
323+
let promptEstimated = false
324+
if (methodName === 'chat.completions.create') {
325+
const messages = payload.messages
326+
for (const message of messages) {
327+
const content = message.content
328+
const { tokens, estimated } = countTokens(content, model)
329+
promptTokens += tokens
330+
promptEstimated = estimated
331+
}
332+
} else if (methodName === 'completions.create') {
333+
let prompt = payload.prompt
334+
if (!Array.isArray(prompt)) prompt = [prompt]
335+
336+
for (const p of prompt) {
337+
const { tokens, estimated } = countTokens(p, model)
338+
promptTokens += tokens
339+
promptEstimated = estimated
340+
}
341+
}
342+
343+
return { promptTokens, promptEstimated }
344+
}
345+
346+
function countCompletionTokens (body, model) {
347+
let completionTokens = 0
348+
let completionEstimated = false
349+
if (body?.choices) {
350+
for (const choice of body.choices) {
351+
const message = choice.message || choice.delta // delta for streamed responses
352+
const text = choice.text
353+
const content = text || message?.content
354+
355+
const { tokens, estimated } = countTokens(content, model)
356+
completionTokens += tokens
357+
completionEstimated = estimated
358+
}
359+
}
360+
361+
return { completionTokens, completionEstimated }
362+
}
363+
364+
function countTokens (content, model) {
365+
if (encodingForModel) {
366+
try {
367+
// try using tiktoken if it was available
368+
const encoder = encodingForModel(model)
369+
const tokens = encoder.encode(content).length
370+
encoder.free()
371+
return { tokens, estimated: false }
372+
} catch {
373+
// possible errors from tiktoken:
374+
// * model not available for token counts
375+
// * issue encoding content
376+
}
377+
}
378+
379+
return {
380+
tokens: estimateTokens(content),
381+
estimated: true
382+
}
383+
}
384+
385+
// If model is unavailable or tiktoken is not imported, then provide a very rough estimate of the number of tokens
386+
// Approximate using the following assumptions:
387+
// * English text
388+
// * 1 token ~= 4 chars
389+
// * 1 token ~= ¾ words
390+
function estimateTokens (content) {
391+
let estimatedTokens = 0
392+
if (typeof content === 'string') {
393+
const estimation1 = content.length / 4
394+
395+
const matches = content.match(/[\w']+|[.,!?;~@#$%^&*()+/-]/g)
396+
const estimation2 = matches ? matches.length * 0.75 : 0 // in the case of an empty string
397+
estimatedTokens = Math.round((1.5 * estimation1 + 0.5 * estimation2) / 2)
398+
} else if (Array.isArray(content) && typeof content[0] === 'number') {
399+
estimatedTokens = content.length
400+
}
401+
return estimatedTokens
402+
}
403+
293404
function createEditRequestExtraction (tags, payload, store) {
294405
const instruction = payload.instruction
295406
tags['openai.request.instruction'] = instruction
@@ -348,7 +459,7 @@ function responseDataExtractionByMethod (methodName, tags, body, store) {
348459
case 'chat.completions.create':
349460
case 'createEdit':
350461
case 'edits.create':
351-
commonCreateResponseExtraction(tags, body, store)
462+
commonCreateResponseExtraction(tags, body, store, methodName)
352463
break
353464

354465
case 'listFiles':
@@ -584,8 +695,8 @@ function createModerationResponseExtraction (tags, body) {
584695
}
585696

586697
// createCompletion, createChatCompletion, createEdit
587-
function commonCreateResponseExtraction (tags, body, store) {
588-
usageExtraction(tags, body)
698+
function commonCreateResponseExtraction (tags, body, store, methodName) {
699+
usageExtraction(tags, body, methodName)
589700

590701
if (!body.choices) return
591702

@@ -625,11 +736,40 @@ function commonCreateResponseExtraction (tags, body, store) {
625736
}
626737

627738
// createCompletion, createChatCompletion, createEdit, createEmbedding
628-
function usageExtraction (tags, body) {
629-
if (typeof body.usage !== 'object' || !body.usage) return
630-
tags['openai.response.usage.prompt_tokens'] = body.usage.prompt_tokens
631-
tags['openai.response.usage.completion_tokens'] = body.usage.completion_tokens
632-
tags['openai.response.usage.total_tokens'] = body.usage.total_tokens
739+
function usageExtraction (tags, body, methodName) {
740+
let promptTokens = 0
741+
let completionTokens = 0
742+
let totalTokens = 0
743+
if (body && body.usage) {
744+
promptTokens = body.usage.prompt_tokens
745+
completionTokens = body.usage.completion_tokens
746+
totalTokens = body.usage.total_tokens
747+
} else if (['chat.completions.create', 'completions.create'].includes(methodName)) {
748+
// estimate tokens based on method name for completions and chat completions
749+
const { model } = body
750+
let promptEstimated = false
751+
let completionEstimated = false
752+
753+
// prompt tokens
754+
const payload = storage.getStore().openai
755+
const promptTokensCount = countPromptTokens(methodName, payload, model)
756+
promptTokens = promptTokensCount.promptTokens
757+
promptEstimated = promptTokensCount.promptEstimated
758+
759+
// completion tokens
760+
const completionTokensCount = countCompletionTokens(body, model)
761+
completionTokens = completionTokensCount.completionTokens
762+
completionEstimated = completionTokensCount.completionEstimated
763+
764+
// total tokens
765+
totalTokens = promptTokens + completionTokens
766+
if (promptEstimated) tags['openai.response.usage.prompt_tokens_estimated'] = true
767+
if (completionEstimated) tags['openai.response.usage.completion_tokens_estimated'] = true
768+
}
769+
770+
if (promptTokens) tags['openai.response.usage.prompt_tokens'] = promptTokens
771+
if (completionTokens) tags['openai.response.usage.completion_tokens'] = completionTokens
772+
if (totalTokens) tags['openai.response.usage.total_tokens'] = totalTokens
633773
}
634774

635775
function truncateApiKey (apiKey) {

0 commit comments

Comments
 (0)