Skip to content

Commit bbf7e07

Browse files
committed
Fix build issues: Add missing model.go and simplify main.go
1 parent 10694ea commit bbf7e07

File tree

2 files changed

+100
-51
lines changed

2 files changed

+100
-51
lines changed

main.go

Lines changed: 8 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"os"
1010
"os/signal"
1111
"strconv"
12-
"strings"
1312
"syscall"
1413
"time"
1514

@@ -324,7 +323,7 @@ func main() {
324323
})
325324

326325
// Add chat endpoint with advanced tracing
327-
mux.HandleFunc("/chat", handleChatWithTracing(client, model))
326+
mux.HandleFunc("/chat", handleChat(client, model))
328327

329328
// Create HTTP server
330329
server := &http.Server{
@@ -385,8 +384,8 @@ func getEnvOrDefault(key, defaultValue string) string {
385384
return value
386385
}
387386

388-
// handleChatWithTracing handles the chat endpoint with detailed tracing
389-
func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc {
387+
// handleChat handles the chat endpoint with simple tracing
388+
func handleChat(client *openai.Client, model string) http.HandlerFunc {
390389
return func(w http.ResponseWriter, r *http.Request) {
391390
w.Header().Set("Access-Control-Allow-Origin", "*")
392391
w.Header().Set("Access-Control-Allow-Methods", "POST, OPTIONS")
@@ -407,7 +406,6 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
407406
log.Printf("Invalid request body: %v", err)
408407
http.Error(w, "Invalid request body", http.StatusBadRequest)
409408
requestCounter.WithLabelValues(r.Method, r.URL.Path, fmt.Sprintf("%d", http.StatusBadRequest)).Inc()
410-
tracing.RecordError(r.Context(), err, "Failed to decode request body")
411409
return
412410
}
413411

@@ -417,10 +415,6 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
417415
w.Header().Set("Connection", "keep-alive")
418416
w.Header().Set("Access-Control-Allow-Origin", "*")
419417

420-
// Start tracing for model inference
421-
tracedInference := tracing.NewTracedModelInference(r.Context(), model)
422-
defer tracedInference.End(0, nil) // Will be updated with actual token count
423-
424418
// Count input tokens (rough estimate)
425419
inputTokens := 0
426420
for _, msg := range req.Messages {
@@ -431,11 +425,6 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
431425
// Track metrics for input tokens
432426
chatTokensCounter.WithLabelValues("input", model).Add(float64(inputTokens))
433427

434-
// Create span event for building message context
435-
tracedInference.StartProcessing("build_messages")
436-
tracing.AddAttribute(tracedInference.Ctx, "message_count", len(req.Messages)+1)
437-
tracing.AddAttribute(tracedInference.Ctx, "input_tokens", inputTokens)
438-
439428
var messages []openai.ChatCompletionMessageParamUnion
440429
for _, msg := range req.Messages {
441430
var message openai.ChatCompletionMessageParamUnion
@@ -449,29 +438,20 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
449438
messages = append(messages, message)
450439
}
451440

452-
// Add the user message to the conversation
453-
messages = append(messages, openai.UserMessage(req.Message))
454-
tracedInference.EndProcessing()
455-
456441
// Start model timing
457-
tracedInference.StartProcessing("model_inference")
458442
modelStartTime := time.Now()
459443
var firstTokenTime time.Time
460444
outputTokens := 0
461445

446+
// Add the user message to the conversation
447+
messages = append(messages, openai.UserMessage(req.Message))
448+
462449
param := openai.ChatCompletionNewParams{
463450
Messages: openai.F(messages),
464451
Model: openai.F(model),
465452
}
466453

467454
ctx := r.Context()
468-
tracing.AddAttributes(ctx,
469-
attribute.String("model.name", model),
470-
attribute.Int("input.tokens", inputTokens),
471-
)
472-
473-
// Create stream event
474-
tracing.CreateEvent(tracedInference.Ctx, "stream_start")
475455
stream := client.Chat.Completions.NewStreaming(ctx, param)
476456

477457
for stream.Next() {
@@ -480,13 +460,6 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
480460
// Record first token time
481461
if firstTokenTime.IsZero() && len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != "" {
482462
firstTokenTime = time.Now()
483-
ttft := firstTokenTime.Sub(modelStartTime)
484-
485-
// Create event for first token
486-
tracedInference.RecordFirstToken(ttft)
487-
tracing.CreateEvent(tracedInference.Ctx, "first_token_received",
488-
attribute.Float64("time_to_first_token_ms", float64(ttft.Milliseconds())),
489-
)
490463
}
491464

492465
// Stream each chunk as it arrives
@@ -495,23 +468,15 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
495468
_, err := fmt.Fprintf(w, "%s", chunk.Choices[0].Delta.Content)
496469
if err != nil {
497470
log.Printf("Error writing to stream: %v", err)
498-
tracing.RecordError(tracedInference.Ctx, err, "Error writing to stream")
499471
return
500472
}
501473
w.(http.Flusher).Flush()
502-
503-
// Record token event every 50 tokens to avoid too many events
504-
if outputTokens % 50 == 0 {
505-
tracing.CreateEvent(tracedInference.Ctx, "tokens_generated",
506-
attribute.Int("tokens_so_far", outputTokens),
507-
)
508-
}
509474
}
510475
}
511-
512-
tracedInference.EndProcessing()
513476

514477
// Record metrics
478+
requestDuration.WithLabelValues(r.Method, r.URL.Path).Observe(time.Since(start).Seconds())
479+
requestCounter.WithLabelValues(r.Method, r.URL.Path, "200").Inc()
515480
chatTokensCounter.WithLabelValues("output", model).Add(float64(outputTokens))
516481
modelLatency.WithLabelValues(model, "inference").Observe(time.Since(modelStartTime).Seconds())
517482

@@ -521,18 +486,10 @@ func handleChatWithTracing(client *openai.Client, model string) http.HandlerFunc
521486
firstTokenLatency.WithLabelValues(model).Observe(ttft)
522487
}
523488

524-
// Record token counts in the span
525-
tracedInference.RecordTokenCounts(inputTokens, outputTokens)
526-
527-
// Handle stream errors
528489
if err := stream.Err(); err != nil {
529490
log.Printf("Error in stream: %v", err)
530-
tracing.RecordError(tracedInference.Ctx, err, "Error in stream")
531491
http.Error(w, "Internal server error", http.StatusInternalServerError)
532492
return
533493
}
534-
535-
// Final update to the tracing end (with correct token count)
536-
tracedInference.End(outputTokens, nil)
537494
}
538495
}

pkg/tracing/model.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package tracing
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"go.opentelemetry.io/otel/attribute"
8+
"go.opentelemetry.io/otel/trace"
9+
)
10+
11+
// TracedModelInference creates spans for model inference operations
12+
type TracedModelInference struct {
13+
Ctx context.Context
14+
ModelName string
15+
ParentSpan trace.Span
16+
CurrentSpan trace.Span
17+
StartTime time.Time
18+
}
19+
20+
// NewTracedModelInference creates a new traced model inference
21+
func NewTracedModelInference(ctx context.Context, modelName string) *TracedModelInference {
22+
// Start the parent span for the overall inference
23+
ctx, span := StartSpan(ctx, "model_inference")
24+
span.SetAttributes(
25+
attribute.String("model.name", modelName),
26+
attribute.String("inference.type", "streaming"),
27+
)
28+
29+
return &TracedModelInference{
30+
Ctx: ctx,
31+
ModelName: modelName,
32+
ParentSpan: span,
33+
StartTime: time.Now(),
34+
}
35+
}
36+
37+
// StartProcessing starts a processing phase span
38+
func (t *TracedModelInference) StartProcessing(name string) {
39+
_, span := StartChildSpan(t.Ctx, name)
40+
t.CurrentSpan = span
41+
}
42+
43+
// EndProcessing ends the current processing phase span
44+
func (t *TracedModelInference) EndProcessing() {
45+
if t.CurrentSpan != nil {
46+
t.CurrentSpan.End()
47+
t.CurrentSpan = nil
48+
}
49+
}
50+
51+
// RecordFirstToken records when the first token is received
52+
func (t *TracedModelInference) RecordFirstToken(ttft time.Duration) {
53+
if t.ParentSpan == nil {
54+
return
55+
}
56+
57+
t.ParentSpan.AddEvent("first_token", trace.WithAttributes(
58+
attribute.Float64("time_to_first_token_ms", float64(ttft.Milliseconds())),
59+
))
60+
t.ParentSpan.SetAttributes(attribute.Float64("time_to_first_token_sec", ttft.Seconds()))
61+
}
62+
63+
// RecordTokenCounts records the input and output token counts
64+
func (t *TracedModelInference) RecordTokenCounts(inputTokens, outputTokens int) {
65+
if t.ParentSpan == nil {
66+
return
67+
}
68+
69+
t.ParentSpan.SetAttributes(
70+
attribute.Int("tokens.input", inputTokens),
71+
attribute.Int("tokens.output", outputTokens),
72+
)
73+
}
74+
75+
// End ends the parent span and records completion metrics
76+
func (t *TracedModelInference) End(outputTokens int, err error) {
77+
if t.ParentSpan == nil {
78+
return
79+
}
80+
81+
totalDuration := time.Since(t.StartTime)
82+
t.ParentSpan.SetAttributes(
83+
attribute.Float64("duration_sec", totalDuration.Seconds()),
84+
attribute.Int("tokens.output.total", outputTokens),
85+
)
86+
87+
if err != nil {
88+
RecordError(t.Ctx, err, "Model inference error")
89+
}
90+
91+
t.ParentSpan.End()
92+
}

0 commit comments

Comments
 (0)