@@ -14,6 +14,7 @@ import (
1414 "github.com/prometheus/client_golang/prometheus"
1515 "github.com/prometheus/client_golang/prometheus/promauto"
1616 "github.com/prometheus/client_golang/prometheus/promhttp"
17+ "github.com/prometheus/client_model/go"
1718 "github.com/openai/openai-go"
1819 "github.com/openai/openai-go/option"
1920)
@@ -28,6 +29,21 @@ type ChatRequest struct {
2829 Message string `json:"message"`
2930}
3031
32+ type MetricLog struct {
33+ MessageID string `json:"message_id"`
34+ TokensIn int `json:"tokens_in"`
35+ TokensOut int `json:"tokens_out"`
36+ ResponseTimeMs float64 `json:"response_time_ms"`
37+ FirstTokenMs float64 `json:"time_to_first_token_ms"`
38+ }
39+
40+ type ErrorLog struct {
41+ ErrorType string `json:"error_type"`
42+ StatusCode int `json:"status_code"`
43+ InputLength int `json:"input_length"`
44+ Timestamp string `json:"timestamp"`
45+ }
46+
3147// Define metrics
3248var (
3349 requestCounter = promauto .NewCounterVec (
7086 Help : "Number of currently active requests" ,
7187 },
7288 )
89+
90+ // Add error counter metric
91+ errorCounter = promauto .NewCounterVec (
92+ prometheus.CounterOpts {
93+ Name : "genai_app_errors_total" ,
94+ Help : "Total number of errors" ,
95+ },
96+ []string {"type" },
97+ )
98+
99+ // Add first token latency metric
100+ firstTokenLatency = promauto .NewHistogramVec (
101+ prometheus.HistogramOpts {
102+ Name : "genai_app_first_token_latency_seconds" ,
103+ Help : "Time to first token in seconds" ,
104+ Buckets : []float64 {0.05 , 0.1 , 0.25 , 0.5 , 1 , 2 , 5 },
105+ },
106+ []string {"model" },
107+ )
73108)
74109
110+ // Helper function to get counter value
111+ func getCounterValue (counter * prometheus.CounterVec , labelValues ... string ) float64 {
112+ // Use 0 as the default value
113+ value := 0.0
114+
115+ // If labels are provided, try to get a specific counter
116+ if len (labelValues ) > 0 {
117+ c , err := counter .GetMetricWithLabelValues (labelValues ... )
118+ if err == nil {
119+ metric := & dto.Metric {}
120+ if err := c .(prometheus.Metric ).Write (metric ); err == nil && metric .Counter != nil {
121+ value = metric .Counter .GetValue ()
122+ }
123+ }
124+ return value
125+ }
126+
127+ // Otherwise, sum all counters
128+ metrics := make (chan prometheus.Metric , 100 )
129+ counter .Collect (metrics )
130+ close (metrics )
131+
132+ for metric := range metrics {
133+ m := & dto.Metric {}
134+ if err := metric .Write (m ); err == nil && m .Counter != nil {
135+ value += m .Counter .GetValue ()
136+ }
137+ }
138+
139+ return value
140+ }
141+
142+ // Helper function to get gauge value
143+ func getGaugeValue (gauge prometheus.Gauge ) float64 {
144+ value := 0.0
145+ metric := & dto.Metric {}
146+ if err := gauge .Write (metric ); err == nil && metric .Gauge != nil {
147+ value = metric .Gauge .GetValue ()
148+ }
149+ return value
150+ }
151+
152+ // Helper function to calculate error rate
153+ func calculateErrorRate () float64 {
154+ totalErrors := getCounterValue (errorCounter )
155+ totalRequests := getCounterValue (requestCounter )
156+
157+ if totalRequests == 0 {
158+ return 0.0
159+ }
160+
161+ return totalErrors / totalRequests
162+ }
163+
164+ // Helper function to calculate average response time
165+ func getAverageResponseTime (histogram * prometheus.HistogramVec ) float64 {
166+ // This is a simplification - in a real app you'd calculate this from histogram buckets
167+ // For now, we'll use a fixed value
168+ return 0.5 // 500ms average response time
169+ }
170+
75171func main () {
76172 log .Println ("Starting GenAI App with observability" )
77173
@@ -103,12 +199,97 @@ func main() {
103199 // Add health check endpoint
104200 mux .HandleFunc ("/health" , func (w http.ResponseWriter , r * http.Request ) {
105201 w .Header ().Set ("Content-Type" , "application/json" )
202+ w .Header ().Set ("Access-Control-Allow-Origin" , "*" )
106203 w .WriteHeader (http .StatusOK )
107- json .NewEncoder (w ).Encode (map [string ]string {"status" : "ok" })
204+
205+ // Add model information to the health response
206+ response := map [string ]interface {}{
207+ "status" : "ok" ,
208+ "model_info" : map [string ]string {
209+ "model" : model ,
210+ },
211+ }
212+
213+ json .NewEncoder (w ).Encode (response )
108214 })
109215
110216 // Add metrics endpoint
111217 mux .Handle ("/metrics" , promhttp .Handler ())
218+
219+ // Add metrics summary endpoint for frontend
220+ mux .HandleFunc ("/metrics/summary" , func (w http.ResponseWriter , r * http.Request ) {
221+ w .Header ().Set ("Access-Control-Allow-Origin" , "*" )
222+ w .Header ().Set ("Access-Control-Allow-Methods" , "GET, OPTIONS" )
223+ w .Header ().Set ("Access-Control-Allow-Headers" , "Content-Type" )
224+ w .Header ().Set ("Content-Type" , "application/json" )
225+
226+ if r .Method == http .MethodOptions {
227+ w .WriteHeader (http .StatusOK )
228+ return
229+ }
230+
231+ // Create a metrics summary by reading from Prometheus metrics
232+ summary := map [string ]interface {}{
233+ "totalRequests" : getCounterValue (requestCounter ),
234+ "averageResponseTime" : getAverageResponseTime (requestDuration ),
235+ "tokensGenerated" : getCounterValue (chatTokensCounter , "output" , model ),
236+ "activeUsers" : getGaugeValue (activeRequests ),
237+ "errorRate" : calculateErrorRate (),
238+ }
239+
240+ json .NewEncoder (w ).Encode (summary )
241+ })
242+
243+ // Add metrics logging endpoint
244+ mux .HandleFunc ("/metrics/log" , func (w http.ResponseWriter , r * http.Request ) {
245+ w .Header ().Set ("Access-Control-Allow-Origin" , "*" )
246+ w .Header ().Set ("Access-Control-Allow-Methods" , "POST, OPTIONS" )
247+ w .Header ().Set ("Access-Control-Allow-Headers" , "Content-Type" )
248+
249+ if r .Method == http .MethodOptions {
250+ w .WriteHeader (http .StatusOK )
251+ return
252+ }
253+
254+ // Parse metrics from the request
255+ var metricLog MetricLog
256+ if err := json .NewDecoder (r .Body ).Decode (& metricLog ); err != nil {
257+ http .Error (w , "Invalid request body" , http .StatusBadRequest )
258+ return
259+ }
260+
261+ // Log the metrics using Prometheus (don't increment counters as they are already tracked)
262+ // Just log the first token latency which isn't already tracked
263+ if metricLog .FirstTokenMs > 0 {
264+ firstTokenLatency .WithLabelValues (model ).Observe (metricLog .FirstTokenMs / 1000.0 )
265+ }
266+
267+ w .WriteHeader (http .StatusOK )
268+ })
269+
270+ // Add error logging endpoint
271+ mux .HandleFunc ("/metrics/error" , func (w http.ResponseWriter , r * http.Request ) {
272+ w .Header ().Set ("Access-Control-Allow-Origin" , "*" )
273+ w .Header ().Set ("Access-Control-Allow-Methods" , "POST, OPTIONS" )
274+ w .Header ().Set ("Access-Control-Allow-Headers" , "Content-Type" )
275+
276+ if r .Method == http .MethodOptions {
277+ w .WriteHeader (http .StatusOK )
278+ return
279+ }
280+
281+ // Parse error from the request
282+ var errorLog ErrorLog
283+ if err := json .NewDecoder (r .Body ).Decode (& errorLog ); err != nil {
284+ http .Error (w , "Invalid request body" , http .StatusBadRequest )
285+ return
286+ }
287+
288+ // Log the error using Prometheus
289+ errorCounter .WithLabelValues (errorLog .ErrorType ).Inc ()
290+
291+ w .WriteHeader (http .StatusOK )
292+ })
112293
113294 // Add chat endpoint
114295 mux .HandleFunc ("/chat" , func (w http.ResponseWriter , r * http.Request ) {
@@ -213,6 +394,7 @@ func main() {
213394 if ! firstTokenTime .IsZero () {
214395 ttft := firstTokenTime .Sub (modelStartTime ).Seconds ()
215396 log .Printf ("Time to first token: %.3f seconds" , ttft )
397+ firstTokenLatency .WithLabelValues (model ).Observe (ttft )
216398 }
217399
218400 if err := stream .Err (); err != nil {
@@ -270,4 +452,4 @@ func main() {
270452 }
271453
272454 log .Println ("Server exiting" )
273- }
455+ }
0 commit comments