@@ -24,12 +24,108 @@ const AgentLoop = {
2424
2525 const MAX_ITERATIONS = 50 ;
2626 const MAX_TOOL_CALLS_PER_ITERATION = 3 ;
27+ const MAX_NO_PROGRESS_ITERATIONS = 5 ; // Max consecutive iterations without tool calls
28+ const TOOL_EXECUTION_TIMEOUT_MS = 30000 ; // 30 second timeout per tool
2729 let _isRunning = false ;
2830 let _abortController = null ;
2931 let _modelConfig = null ;
3032 const MAX_ACTIVITY_LOG = 200 ;
3133 const _activityLog = [ ] ;
3234
35+ // Stuck loop detection state
36+ let _loopHealth = {
37+ consecutiveNoToolCalls : 0 ,
38+ lastResponseLength : 0 ,
39+ repeatedShortResponses : 0
40+ } ;
41+
42+ const _resetLoopHealth = ( ) => {
43+ _loopHealth = {
44+ consecutiveNoToolCalls : 0 ,
45+ lastResponseLength : 0 ,
46+ repeatedShortResponses : 0
47+ } ;
48+ } ;
49+
50+ // Circuit breaker for failing tools
51+ const CIRCUIT_THRESHOLD = 3 ; // Failures before circuit opens
52+ const CIRCUIT_RESET_MS = 60000 ; // 1 minute cooldown
53+ const _toolCircuits = new Map ( ) ; // tool -> { count, lastError, tripTime }
54+
55+ const _isCircuitOpen = ( toolName ) => {
56+ const record = _toolCircuits . get ( toolName ) ;
57+ if ( ! record ) return false ;
58+
59+ if ( record . count >= CIRCUIT_THRESHOLD ) {
60+ const elapsed = Date . now ( ) - record . tripTime ;
61+ if ( elapsed < CIRCUIT_RESET_MS ) {
62+ return true ; // Circuit still open
63+ }
64+ // Reset after cooldown
65+ _toolCircuits . delete ( toolName ) ;
66+ logger . info ( `[Agent] Circuit breaker reset for tool: ${ toolName } ` ) ;
67+ }
68+ return false ;
69+ } ;
70+
71+ const _recordToolFailure = ( toolName , error ) => {
72+ const record = _toolCircuits . get ( toolName ) || { count : 0 , lastError : null , tripTime : 0 } ;
73+ record . count ++ ;
74+ record . lastError = error ;
75+
76+ if ( record . count >= CIRCUIT_THRESHOLD ) {
77+ record . tripTime = Date . now ( ) ;
78+ logger . warn ( `[Agent] Circuit breaker TRIPPED for tool: ${ toolName } after ${ record . count } failures` ) ;
79+ EventBus . emit ( 'tool:circuit_open' , { tool : toolName , failures : record . count , error } ) ;
80+ }
81+
82+ _toolCircuits . set ( toolName , record ) ;
83+ } ;
84+
85+ const _recordToolSuccess = ( toolName ) => {
86+ // Reset failure count on success
87+ if ( _toolCircuits . has ( toolName ) ) {
88+ _toolCircuits . delete ( toolName ) ;
89+ }
90+ } ;
91+
92+ const _resetCircuits = ( ) => {
93+ _toolCircuits . clear ( ) ;
94+ } ;
95+
96+ const _checkLoopHealth = ( iteration , toolCallCount , responseLength ) => {
97+ // Check 1: No tool calls for too many iterations
98+ if ( toolCallCount === 0 ) {
99+ _loopHealth . consecutiveNoToolCalls ++ ;
100+ if ( _loopHealth . consecutiveNoToolCalls >= MAX_NO_PROGRESS_ITERATIONS ) {
101+ return {
102+ stuck : true ,
103+ reason : `No tool calls for ${ MAX_NO_PROGRESS_ITERATIONS } consecutive iterations` ,
104+ action : 'request_summary'
105+ } ;
106+ }
107+ } else {
108+ _loopHealth . consecutiveNoToolCalls = 0 ;
109+ }
110+
111+ // Check 2: Response getting very short (model degradation)
112+ if ( responseLength < 50 && iteration > 3 ) {
113+ _loopHealth . repeatedShortResponses ++ ;
114+ if ( _loopHealth . repeatedShortResponses >= 3 ) {
115+ return {
116+ stuck : true ,
117+ reason : 'Model producing very short responses repeatedly' ,
118+ action : 'force_stop'
119+ } ;
120+ }
121+ } else {
122+ _loopHealth . repeatedShortResponses = 0 ;
123+ }
124+
125+ _loopHealth . lastResponseLength = responseLength ;
126+ return { stuck : false } ;
127+ } ;
128+
33129 const _pushActivity = ( entry ) => {
34130 _activityLog . push ( { ts : Date . now ( ) , ...entry } ) ;
35131 if ( _activityLog . length > MAX_ACTIVITY_LOG ) {
@@ -43,6 +139,8 @@ const AgentLoop = {
43139
44140 _isRunning = true ;
45141 _abortController = new AbortController ( ) ;
142+ _resetLoopHealth ( ) ;
143+ _resetCircuits ( ) ;
46144
47145 logger . info ( `[Agent] Starting cycle. Goal: "${ goal } "` ) ;
48146 EventBus . emit ( 'agent:status' , { state : 'STARTING' , activity : 'Initializing...' } ) ;
@@ -81,6 +179,11 @@ const AgentLoop = {
81179
82180 context = await ContextManager . compact ( context , _modelConfig ) ;
83181
182+ // Emit token count for UI
183+ if ( ContextManager . emitTokens ) {
184+ ContextManager . emitTokens ( context ) ;
185+ }
186+
84187 let llmResponseText = '' ;
85188 const streamCallback = ( text ) => {
86189 EventBus . emit ( 'agent:stream' , text ) ;
@@ -101,6 +204,36 @@ const AgentLoop = {
101204 const toolCalls = ResponseParser . parseToolCalls ( responseContent ) ;
102205 context . push ( { role : 'assistant' , content : responseContent } ) ;
103206
207+ // Check for stuck loop
208+ const healthCheck = _checkLoopHealth ( iteration , toolCalls . length , responseContent . length ) ;
209+ if ( healthCheck . stuck ) {
210+ logger . warn ( `[Agent] STUCK LOOP DETECTED: ${ healthCheck . reason } ` ) ;
211+ EventBus . emit ( 'agent:warning' , {
212+ type : 'stuck_loop' ,
213+ reason : healthCheck . reason ,
214+ cycle : iteration
215+ } ) ;
216+
217+ if ( healthCheck . action === 'request_summary' ) {
218+ // Ask model to summarize and conclude
219+ context . push ( {
220+ role : 'user' ,
221+ content : 'SYSTEM: You appear to be stuck without making progress. Please summarize what you have accomplished so far and what remains to be done, then stop.'
222+ } ) ;
223+ // Get one more response then exit
224+ try {
225+ const summaryResponse = await LLMClient . chat ( context , _modelConfig ) ;
226+ _pushActivity ( { kind : 'stuck_summary' , cycle : iteration , content : summaryResponse . content } ) ;
227+ EventBus . emit ( 'agent:history' , { type : 'llm_response' , cycle : iteration , content : summaryResponse . content } ) ;
228+ } catch ( e ) {
229+ logger . error ( '[Agent] Failed to get summary response' , e ) ;
230+ }
231+ break ;
232+ } else if ( healthCheck . action === 'force_stop' ) {
233+ break ;
234+ }
235+ }
236+
104237 if ( toolCalls . length > 0 ) {
105238 let executedTools = 0 ;
106239 for ( const call of toolCalls ) {
@@ -113,16 +246,82 @@ const AgentLoop = {
113246
114247 if ( _abortController . signal . aborted ) break ;
115248
249+ // Check circuit breaker before executing
250+ if ( _isCircuitOpen ( call . name ) ) {
251+ const circuitRecord = _toolCircuits . get ( call . name ) ;
252+ const remainingMs = CIRCUIT_RESET_MS - ( Date . now ( ) - circuitRecord . tripTime ) ;
253+ const remainingSec = Math . ceil ( remainingMs / 1000 ) ;
254+ logger . warn ( `[Agent] Circuit breaker OPEN for ${ call . name } - skipping (${ remainingSec } s remaining)` ) ;
255+
256+ const skipMsg = `Tool ${ call . name } is temporarily disabled due to repeated failures. Last error: ${ circuitRecord . lastError } . Will retry in ${ remainingSec } s.` ;
257+ context . push ( { role : 'user' , content : `TOOL_RESULT (${ call . name } ):\nError: ${ skipMsg } ` } ) ;
258+ EventBus . emit ( 'tool:circuit_skip' , { tool : call . name , remainingMs, lastError : circuitRecord . lastError } ) ;
259+ continue ;
260+ }
261+
116262 logger . info ( `[Agent] Tool Call: ${ call . name } ` ) ;
117263 EventBus . emit ( 'agent:status' , { state : 'ACTING' , activity : `Executing tool: ${ call . name } ` } ) ;
118264
119265 let result ;
120- try {
121- const rawResult = await ToolRunner . execute ( call . name , call . args ) ;
122- result = typeof rawResult === 'string' ? rawResult : JSON . stringify ( rawResult , null , 2 ) ;
123- } catch ( err ) {
124- logger . error ( `[Agent] Tool Error: ${ call . name } ` , err ) ;
125- result = `Error: ${ err . message } ` ;
266+ const MAX_RETRIES = 2 ;
267+ let lastError = null ;
268+
269+ // Helper to execute with timeout
270+ const executeWithTimeout = async ( ) => {
271+ return Promise . race ( [
272+ ToolRunner . execute ( call . name , call . args ) ,
273+ new Promise ( ( _ , reject ) =>
274+ setTimeout ( ( ) => reject ( new Error ( `Tool timeout after ${ TOOL_EXECUTION_TIMEOUT_MS } ms` ) ) , TOOL_EXECUTION_TIMEOUT_MS )
275+ )
276+ ] ) ;
277+ } ;
278+
279+ for ( let attempt = 0 ; attempt <= MAX_RETRIES ; attempt ++ ) {
280+ try {
281+ const toolStartTime = Date . now ( ) ;
282+ const rawResult = await executeWithTimeout ( ) ;
283+ const toolDuration = Date . now ( ) - toolStartTime ;
284+
285+ // Warn on slow tools
286+ if ( toolDuration > TOOL_EXECUTION_TIMEOUT_MS * 0.7 ) {
287+ logger . warn ( `[Agent] Slow tool: ${ call . name } took ${ toolDuration } ms` ) ;
288+ EventBus . emit ( 'tool:slow' , { tool : call . name , ms : toolDuration , cycle : iteration } ) ;
289+ }
290+
291+ result = typeof rawResult === 'string' ? rawResult : JSON . stringify ( rawResult , null , 2 ) ;
292+ // Validate serialization didn't produce undefined
293+ if ( result === 'undefined' || result === undefined ) {
294+ result = '(Tool returned no output)' ;
295+ }
296+ lastError = null ;
297+ break ;
298+ } catch ( err ) {
299+ lastError = err ;
300+ const isTimeout = err . message ?. includes ( 'timeout' ) ;
301+
302+ if ( isTimeout ) {
303+ logger . error ( `[Agent] Tool ${ call . name } TIMEOUT - exceeded ${ TOOL_EXECUTION_TIMEOUT_MS } ms` ) ;
304+ result = `Error: Tool execution timed out after ${ TOOL_EXECUTION_TIMEOUT_MS / 1000 } s. The operation may still be running.` ;
305+ EventBus . emit ( 'tool:timeout' , { tool : call . name , timeout : TOOL_EXECUTION_TIMEOUT_MS , cycle : iteration } ) ;
306+ break ; // Don't retry on timeout
307+ }
308+
309+ if ( attempt < MAX_RETRIES ) {
310+ logger . warn ( `[Agent] Tool ${ call . name } failed (attempt ${ attempt + 1 } /${ MAX_RETRIES + 1 } ), retrying...` ) ;
311+ await new Promise ( r => setTimeout ( r , 100 * ( attempt + 1 ) ) ) ; // Exponential backoff
312+ }
313+ }
314+ }
315+
316+ if ( lastError && ! result ) {
317+ logger . error ( `[Agent] Tool Error: ${ call . name } ` , lastError ) ;
318+ result = `Error: ${ lastError . message } ` ;
319+ EventBus . emit ( 'tool:error' , { tool : call . name , error : lastError . message , cycle : iteration } ) ;
320+ // Record failure for circuit breaker
321+ _recordToolFailure ( call . name , lastError . message ) ;
322+ } else if ( ! lastError ) {
323+ // Record success - resets circuit breaker count
324+ _recordToolSuccess ( call . name ) ;
126325 }
127326
128327 // Smart truncation
0 commit comments