99 "os"
1010 "os/signal"
1111 "strconv"
12- "strings"
1312 "syscall"
1413 "time"
1514
@@ -324,7 +323,7 @@ func main() {
324323 })
325324
326325 // Add chat endpoint with advanced tracing
327- mux .HandleFunc ("/chat" , handleChatWithTracing (client , model ))
326+ mux .HandleFunc ("/chat" , handleChat (client , model ))
328327
329328 // Create HTTP server
330329 server := & http.Server {
@@ -385,8 +384,8 @@ func getEnvOrDefault(key, defaultValue string) string {
385384 return value
386385}
387386
388- // handleChatWithTracing handles the chat endpoint with detailed tracing
389- func handleChatWithTracing (client * openai.Client , model string ) http.HandlerFunc {
387+ // handleChat handles the chat endpoint with simple tracing
388+ func handleChat (client * openai.Client , model string ) http.HandlerFunc {
390389 return func (w http.ResponseWriter , r * http.Request ) {
391390 w .Header ().Set ("Access-Control-Allow-Origin" , "*" )
392391 w .Header ().Set ("Access-Control-Allow-Methods" , "POST, OPTIONS" )
@@ -407,7 +406,6 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
407406 log .Printf ("Invalid request body: %v" , err )
408407 http .Error (w , "Invalid request body" , http .StatusBadRequest )
409408 requestCounter .WithLabelValues (r .Method , r .URL .Path , fmt .Sprintf ("%d" , http .StatusBadRequest )).Inc ()
410- tracing .RecordError (r .Context (), err , "Failed to decode request body" )
411409 return
412410 }
413411
@@ -417,10 +415,6 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
417415 w .Header ().Set ("Connection" , "keep-alive" )
418416 w .Header ().Set ("Access-Control-Allow-Origin" , "*" )
419417
420- // Start tracing for model inference
421- tracedInference := tracing .NewTracedModelInference (r .Context (), model )
422- defer tracedInference .End (0 , nil ) // Will be updated with actual token count
423-
424418 // Count input tokens (rough estimate)
425419 inputTokens := 0
426420 for _ , msg := range req .Messages {
@@ -431,11 +425,6 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
431425 // Track metrics for input tokens
432426 chatTokensCounter .WithLabelValues ("input" , model ).Add (float64 (inputTokens ))
433427
434- // Create span event for building message context
435- tracedInference .StartProcessing ("build_messages" )
436- tracing .AddAttribute (tracedInference .Ctx , "message_count" , len (req .Messages )+ 1 )
437- tracing .AddAttribute (tracedInference .Ctx , "input_tokens" , inputTokens )
438-
439428 var messages []openai.ChatCompletionMessageParamUnion
440429 for _ , msg := range req .Messages {
441430 var message openai.ChatCompletionMessageParamUnion
@@ -449,29 +438,20 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
449438 messages = append (messages , message )
450439 }
451440
452- // Add the user message to the conversation
453- messages = append (messages , openai .UserMessage (req .Message ))
454- tracedInference .EndProcessing ()
455-
456441 // Start model timing
457- tracedInference .StartProcessing ("model_inference" )
458442 modelStartTime := time .Now ()
459443 var firstTokenTime time.Time
460444 outputTokens := 0
461445
446+ // Add the user message to the conversation
447+ messages = append (messages , openai .UserMessage (req .Message ))
448+
462449 param := openai.ChatCompletionNewParams {
463450 Messages : openai .F (messages ),
464451 Model : openai .F (model ),
465452 }
466453
467454 ctx := r .Context ()
468- tracing .AddAttributes (ctx ,
469- attribute .String ("model.name" , model ),
470- attribute .Int ("input.tokens" , inputTokens ),
471- )
472-
473- // Create stream event
474- tracing .CreateEvent (tracedInference .Ctx , "stream_start" )
475455 stream := client .Chat .Completions .NewStreaming (ctx , param )
476456
477457 for stream .Next () {
@@ -480,13 +460,6 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
480460 // Record first token time
481461 if firstTokenTime .IsZero () && len (chunk .Choices ) > 0 && chunk .Choices [0 ].Delta .Content != "" {
482462 firstTokenTime = time .Now ()
483- ttft := firstTokenTime .Sub (modelStartTime )
484-
485- // Create event for first token
486- tracedInference .RecordFirstToken (ttft )
487- tracing .CreateEvent (tracedInference .Ctx , "first_token_received" ,
488- attribute .Float64 ("time_to_first_token_ms" , float64 (ttft .Milliseconds ())),
489- )
490463 }
491464
492465 // Stream each chunk as it arrives
@@ -495,23 +468,15 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
495468 _ , err := fmt .Fprintf (w , "%s" , chunk .Choices [0 ].Delta .Content )
496469 if err != nil {
497470 log .Printf ("Error writing to stream: %v" , err )
498- tracing .RecordError (tracedInference .Ctx , err , "Error writing to stream" )
499471 return
500472 }
501473 w .(http.Flusher ).Flush ()
502-
503- // Record token event every 50 tokens to avoid too many events
504- if outputTokens % 50 == 0 {
505- tracing .CreateEvent (tracedInference .Ctx , "tokens_generated" ,
506- attribute .Int ("tokens_so_far" , outputTokens ),
507- )
508- }
509474 }
510475 }
511-
512- tracedInference .EndProcessing ()
513476
514477 // Record metrics
478+ requestDuration .WithLabelValues (r .Method , r .URL .Path ).Observe (time .Since (start ).Seconds ())
479+ requestCounter .WithLabelValues (r .Method , r .URL .Path , "200" ).Inc ()
515480 chatTokensCounter .WithLabelValues ("output" , model ).Add (float64 (outputTokens ))
516481 modelLatency .WithLabelValues (model , "inference" ).Observe (time .Since (modelStartTime ).Seconds ())
517482
@@ -521,18 +486,10 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
521486 firstTokenLatency .WithLabelValues (model ).Observe (ttft )
522487 }
523488
524- // Record token counts in the span
525- tracedInference .RecordTokenCounts (inputTokens , outputTokens )
526-
527- // Handle stream errors
528489 if err := stream .Err (); err != nil {
529490 log .Printf ("Error in stream: %v" , err )
530- tracing .RecordError (tracedInference .Ctx , err , "Error in stream" )
531491 http .Error (w , "Internal server error" , http .StatusInternalServerError )
532492 return
533493 }
534-
535- // Final update to the tracing end (with correct token count)
536- tracedInference .End (outputTokens , nil )
537494 }
538495}
0 commit comments