@@ -313,28 +313,35 @@ async def _callbacks_initializer(
313313 project_name : Optional [str ],
314314 client : LangChainPlusClient ,
315315 run_evaluators : Sequence [RunEvaluator ],
316+ evaluation_handler_collector : List [EvaluatorCallbackHandler ],
316317) -> List [BaseTracer ]:
317318 """
318319 Initialize a tracer to share across tasks.
319320
320321 Args:
321322 project_name: The project name for the tracer.
323+ client: The client to use for the tracer.
324+ run_evaluators: The evaluators to run.
325+ evaluation_handler_collector: A list to collect the evaluators.
326+ Used to wait for the evaluators to finish.
322327
323328 Returns:
324- A LangChainTracer instance with an active project .
329+ The callbacks for this thread .
325330 """
326331 callbacks : List [BaseTracer ] = []
327332 if project_name :
328333 callbacks .append (LangChainTracer (project_name = project_name ))
334+ evaluator_project_name = f"{ project_name } -evaluators" if project_name else None
329335 if run_evaluators :
330- callbacks .append (
331- EvaluatorCallbackHandler (
332- client = client ,
333- evaluators = run_evaluators ,
334- # We already have concurrency, don't want to overload the machine
335- max_workers = 1 ,
336- )
336+ callback = EvaluatorCallbackHandler (
337+ client = client ,
338+ evaluators = run_evaluators ,
339+ # We already have concurrency, don't want to overload the machine
340+ max_workers = 1 ,
341+ project_name = evaluator_project_name ,
337342 )
343+ callbacks .append (callback )
344+ evaluation_handler_collector .append (callback )
338345 return callbacks
339346
340347
@@ -382,12 +389,9 @@ async def arun_on_examples(
382389 """
383390 project_name = _get_project_name (project_name , llm_or_chain_factory , None )
384391 client_ = client or LangChainPlusClient ()
385- client_ .create_project (project_name , mode = "eval" )
392+ client_ .create_project (project_name )
386393
387394 results : Dict [str , List [Any ]] = {}
388- evaluation_handler = EvaluatorCallbackHandler (
389- evaluators = run_evaluators or [], client = client_
390- )
391395
392396 async def process_example (
393397 example : Example , callbacks : List [BaseCallbackHandler ], job_state : dict
@@ -410,17 +414,20 @@ async def process_example(
410414 flush = True ,
411415 )
412416
417+ evaluation_handlers : List [EvaluatorCallbackHandler ] = []
413418 await _gather_with_concurrency (
414419 concurrency_level ,
415420 functools .partial (
416421 _callbacks_initializer ,
417422 project_name = project_name ,
418423 client = client_ ,
424+ evaluation_handler_collector = evaluation_handlers ,
419425 run_evaluators = run_evaluators or [],
420426 ),
421427 * (functools .partial (process_example , e ) for e in examples ),
422428 )
423- evaluation_handler .wait_for_futures ()
429+ for handler in evaluation_handlers :
430+ handler .wait_for_futures ()
424431 return results
425432
426433
@@ -581,10 +588,13 @@ def run_on_examples(
581588 results : Dict [str , Any ] = {}
582589 project_name = _get_project_name (project_name , llm_or_chain_factory , None )
583590 client_ = client or LangChainPlusClient ()
584- client_ .create_project (project_name , mode = "eval" )
591+ client_ .create_project (project_name )
585592 tracer = LangChainTracer (project_name = project_name )
593+ evaluator_project_name = f"{ project_name } -evaluators"
586594 evalution_handler = EvaluatorCallbackHandler (
587- evaluators = run_evaluators or [], client = client_
595+ evaluators = run_evaluators or [],
596+ client = client_ ,
597+ project_name = evaluator_project_name ,
588598 )
589599 callbacks : List [BaseCallbackHandler ] = [tracer , evalution_handler ]
590600 for i , example in enumerate (examples ):
0 commit comments