@@ -15,6 +15,14 @@ const RE_TAB = /\t/g
15
15
// TODO: In the future we should refactor config.js to make it requirable
16
16
let MAX_TEXT_LEN = 128
17
17
18
+ let encodingForModel
19
+ try {
20
+ // eslint-disable-next-line import/no-extraneous-dependencies
21
+ encodingForModel = require ( 'tiktoken' ) . encoding_for_model
22
+ } catch {
23
+ // we will use token count estimations in this case
24
+ }
25
+
18
26
class OpenApiPlugin extends TracingPlugin {
19
27
static get id ( ) { return 'openai' }
20
28
static get operation ( ) { return 'request' }
@@ -232,27 +240,47 @@ class OpenApiPlugin extends TracingPlugin {
232
240
233
241
super . finish ( )
234
242
this . sendLog ( methodName , span , tags , store , error )
235
- this . sendMetrics ( headers , body , endpoint , span . _duration , error )
243
+ this . sendMetrics ( headers , body , endpoint , span . _duration , error , tags )
236
244
}
237
245
238
- sendMetrics ( headers , body , endpoint , duration , error ) {
246
+ sendMetrics ( headers , body , endpoint , duration , error , spanTags ) {
239
247
const tags = [ `error:${ Number ( ! ! error ) } ` ]
240
248
if ( error ) {
241
249
this . metrics . increment ( 'openai.request.error' , 1 , tags )
242
250
} else {
243
251
tags . push ( `org:${ headers [ 'openai-organization' ] } ` )
244
252
tags . push ( `endpoint:${ endpoint } ` ) // just "/v1/models", no method
245
- tags . push ( `model:${ headers [ 'openai-model' ] } ` )
253
+ tags . push ( `model:${ headers [ 'openai-model' ] || body . model } ` )
246
254
}
247
255
248
256
this . metrics . distribution ( 'openai.request.duration' , duration * 1000 , tags )
249
257
250
- if ( body && body . usage ) {
251
- const promptTokens = body . usage . prompt_tokens
252
- const completionTokens = body . usage . completion_tokens
253
- this . metrics . distribution ( 'openai.tokens.prompt' , promptTokens , tags )
254
- this . metrics . distribution ( 'openai.tokens.completion' , completionTokens , tags )
255
- this . metrics . distribution ( 'openai.tokens.total' , promptTokens + completionTokens , tags )
258
+ const promptTokens = spanTags [ 'openai.response.usage.prompt_tokens' ]
259
+ const promptTokensEstimated = spanTags [ 'openai.response.usage.prompt_tokens_estimated' ]
260
+
261
+ const completionTokens = spanTags [ 'openai.response.usage.completion_tokens' ]
262
+ const completionTokensEstimated = spanTags [ 'openai.response.usage.completion_tokens_estimated' ]
263
+
264
+ if ( ! error ) {
265
+ if ( promptTokensEstimated ) {
266
+ this . metrics . distribution (
267
+ 'openai.tokens.prompt' , promptTokens , [ ...tags , 'openai.estimated:true' ] )
268
+ } else {
269
+ this . metrics . distribution ( 'openai.tokens.prompt' , promptTokens , tags )
270
+ }
271
+ if ( completionTokensEstimated ) {
272
+ this . metrics . distribution (
273
+ 'openai.tokens.completion' , completionTokens , [ ...tags , 'openai.estimated:true' ] )
274
+ } else {
275
+ this . metrics . distribution ( 'openai.tokens.completion' , completionTokens , tags )
276
+ }
277
+
278
+ if ( promptTokensEstimated || completionTokensEstimated ) {
279
+ this . metrics . distribution (
280
+ 'openai.tokens.total' , promptTokens + completionTokens , [ ...tags , 'openai.estimated:true' ] )
281
+ } else {
282
+ this . metrics . distribution ( 'openai.tokens.total' , promptTokens + completionTokens , tags )
283
+ }
256
284
}
257
285
258
286
if ( headers ) {
@@ -290,6 +318,89 @@ class OpenApiPlugin extends TracingPlugin {
290
318
}
291
319
}
292
320
321
+ function countPromptTokens ( methodName , payload , model ) {
322
+ let promptTokens = 0
323
+ let promptEstimated = false
324
+ if ( methodName === 'chat.completions.create' ) {
325
+ const messages = payload . messages
326
+ for ( const message of messages ) {
327
+ const content = message . content
328
+ const { tokens, estimated } = countTokens ( content , model )
329
+ promptTokens += tokens
330
+ promptEstimated = estimated
331
+ }
332
+ } else if ( methodName === 'completions.create' ) {
333
+ let prompt = payload . prompt
334
+ if ( ! Array . isArray ( prompt ) ) prompt = [ prompt ]
335
+
336
+ for ( const p of prompt ) {
337
+ const { tokens, estimated } = countTokens ( p , model )
338
+ promptTokens += tokens
339
+ promptEstimated = estimated
340
+ }
341
+ }
342
+
343
+ return { promptTokens, promptEstimated }
344
+ }
345
+
346
+ function countCompletionTokens ( body , model ) {
347
+ let completionTokens = 0
348
+ let completionEstimated = false
349
+ if ( body ?. choices ) {
350
+ for ( const choice of body . choices ) {
351
+ const message = choice . message || choice . delta // delta for streamed responses
352
+ const text = choice . text
353
+ const content = text || message ?. content
354
+
355
+ const { tokens, estimated } = countTokens ( content , model )
356
+ completionTokens += tokens
357
+ completionEstimated = estimated
358
+ }
359
+ }
360
+
361
+ return { completionTokens, completionEstimated }
362
+ }
363
+
364
+ function countTokens ( content , model ) {
365
+ if ( encodingForModel ) {
366
+ try {
367
+ // try using tiktoken if it was available
368
+ const encoder = encodingForModel ( model )
369
+ const tokens = encoder . encode ( content ) . length
370
+ encoder . free ( )
371
+ return { tokens, estimated : false }
372
+ } catch {
373
+ // possible errors from tiktoken:
374
+ // * model not available for token counts
375
+ // * issue encoding content
376
+ }
377
+ }
378
+
379
+ return {
380
+ tokens : estimateTokens ( content ) ,
381
+ estimated : true
382
+ }
383
+ }
384
+
385
+ // If model is unavailable or tiktoken is not imported, then provide a very rough estimate of the number of tokens
386
+ // Approximate using the following assumptions:
387
+ // * English text
388
+ // * 1 token ~= 4 chars
389
+ // * 1 token ~= ¾ words
390
+ function estimateTokens ( content ) {
391
+ let estimatedTokens = 0
392
+ if ( typeof content === 'string' ) {
393
+ const estimation1 = content . length / 4
394
+
395
+ const matches = content . match ( / [ \w ' ] + | [ . , ! ? ; ~ @ # $ % ^ & * ( ) + / - ] / g)
396
+ const estimation2 = matches ? matches . length * 0.75 : 0 // in the case of an empty string
397
+ estimatedTokens = Math . round ( ( 1.5 * estimation1 + 0.5 * estimation2 ) / 2 )
398
+ } else if ( Array . isArray ( content ) && typeof content [ 0 ] === 'number' ) {
399
+ estimatedTokens = content . length
400
+ }
401
+ return estimatedTokens
402
+ }
403
+
293
404
function createEditRequestExtraction ( tags , payload , store ) {
294
405
const instruction = payload . instruction
295
406
tags [ 'openai.request.instruction' ] = instruction
@@ -348,7 +459,7 @@ function responseDataExtractionByMethod (methodName, tags, body, store) {
348
459
case 'chat.completions.create' :
349
460
case 'createEdit' :
350
461
case 'edits.create' :
351
- commonCreateResponseExtraction ( tags , body , store )
462
+ commonCreateResponseExtraction ( tags , body , store , methodName )
352
463
break
353
464
354
465
case 'listFiles' :
@@ -584,8 +695,8 @@ function createModerationResponseExtraction (tags, body) {
584
695
}
585
696
586
697
// createCompletion, createChatCompletion, createEdit
587
- function commonCreateResponseExtraction ( tags , body , store ) {
588
- usageExtraction ( tags , body )
698
+ function commonCreateResponseExtraction ( tags , body , store , methodName ) {
699
+ usageExtraction ( tags , body , methodName )
589
700
590
701
if ( ! body . choices ) return
591
702
@@ -625,11 +736,40 @@ function commonCreateResponseExtraction (tags, body, store) {
625
736
}
626
737
627
738
// createCompletion, createChatCompletion, createEdit, createEmbedding
628
- function usageExtraction ( tags , body ) {
629
- if ( typeof body . usage !== 'object' || ! body . usage ) return
630
- tags [ 'openai.response.usage.prompt_tokens' ] = body . usage . prompt_tokens
631
- tags [ 'openai.response.usage.completion_tokens' ] = body . usage . completion_tokens
632
- tags [ 'openai.response.usage.total_tokens' ] = body . usage . total_tokens
739
+ function usageExtraction ( tags , body , methodName ) {
740
+ let promptTokens = 0
741
+ let completionTokens = 0
742
+ let totalTokens = 0
743
+ if ( body && body . usage ) {
744
+ promptTokens = body . usage . prompt_tokens
745
+ completionTokens = body . usage . completion_tokens
746
+ totalTokens = body . usage . total_tokens
747
+ } else if ( [ 'chat.completions.create' , 'completions.create' ] . includes ( methodName ) ) {
748
+ // estimate tokens based on method name for completions and chat completions
749
+ const { model } = body
750
+ let promptEstimated = false
751
+ let completionEstimated = false
752
+
753
+ // prompt tokens
754
+ const payload = storage . getStore ( ) . openai
755
+ const promptTokensCount = countPromptTokens ( methodName , payload , model )
756
+ promptTokens = promptTokensCount . promptTokens
757
+ promptEstimated = promptTokensCount . promptEstimated
758
+
759
+ // completion tokens
760
+ const completionTokensCount = countCompletionTokens ( body , model )
761
+ completionTokens = completionTokensCount . completionTokens
762
+ completionEstimated = completionTokensCount . completionEstimated
763
+
764
+ // total tokens
765
+ totalTokens = promptTokens + completionTokens
766
+ if ( promptEstimated ) tags [ 'openai.response.usage.prompt_tokens_estimated' ] = true
767
+ if ( completionEstimated ) tags [ 'openai.response.usage.completion_tokens_estimated' ] = true
768
+ }
769
+
770
+ if ( promptTokens ) tags [ 'openai.response.usage.prompt_tokens' ] = promptTokens
771
+ if ( completionTokens ) tags [ 'openai.response.usage.completion_tokens' ] = completionTokens
772
+ if ( totalTokens ) tags [ 'openai.response.usage.total_tokens' ] = totalTokens
633
773
}
634
774
635
775
function truncateApiKey ( apiKey ) {
0 commit comments