Skip to content

Commit 75aa408

Browse files
authored
Send evaluator logs to new session (#7206)
Also stop specifying "eval" mode since explicit project modes are deprecated
1 parent 0dc700e commit 75aa408

File tree

2 files changed

+50
-18
lines changed

2 files changed

+50
-18
lines changed

langchain/callbacks/tracers/evaluation.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from langchainplus_sdk import LangChainPlusClient, RunEvaluator
88

9+
from langchain.callbacks.manager import tracing_v2_enabled
910
from langchain.callbacks.tracers.base import BaseTracer
1011
from langchain.callbacks.tracers.schemas import Run
1112

@@ -27,6 +28,8 @@ class EvaluatorCallbackHandler(BaseTracer):
2728
If not specified, a new instance will be created.
2829
example_id : Union[UUID, str], optional
2930
The example ID to be associated with the runs.
31+
project_name : str, optional
32+
The LangSmith project name to be organize eval chain runs under.
3033
3134
Attributes
3235
----------
@@ -40,6 +43,8 @@ class EvaluatorCallbackHandler(BaseTracer):
4043
The thread pool executor used for running the evaluators.
4144
futures : Set[Future]
4245
The set of futures representing the running evaluators.
46+
project_name : Optional[str]
47+
The LangSmith project name to be organize eval chain runs under.
4348
"""
4449

4550
name = "evaluator_callback_handler"
@@ -50,6 +55,7 @@ def __init__(
5055
max_workers: Optional[int] = None,
5156
client: Optional[LangChainPlusClient] = None,
5257
example_id: Optional[Union[UUID, str]] = None,
58+
project_name: Optional[str] = None,
5359
**kwargs: Any,
5460
) -> None:
5561
super().__init__(**kwargs)
@@ -62,10 +68,24 @@ def __init__(
6268
max_workers=max(max_workers or len(evaluators), 1)
6369
)
6470
self.futures: Set[Future] = set()
71+
self.project_name = project_name
6572

66-
def _evaluate_run(self, run: Run, evaluator: RunEvaluator) -> None:
73+
def _evaluate_in_project(self, run: Run, evaluator: RunEvaluator) -> None:
74+
"""Evaluate the run in the project.
75+
76+
Parameters
77+
----------
78+
run : Run
79+
The run to be evaluated.
80+
evaluator : RunEvaluator
81+
The evaluator to use for evaluating the run.
82+
83+
"""
6784
try:
68-
self.client.evaluate_run(run, evaluator)
85+
if self.project_name is None:
86+
self.client.evaluate_run(run, evaluator)
87+
with tracing_v2_enabled(project_name=self.project_name):
88+
self.client.evaluate_run(run, evaluator)
6989
except Exception as e:
7090
logger.error(
7191
f"Error evaluating run {run.id} with "
@@ -86,7 +106,9 @@ def _persist_run(self, run: Run) -> None:
86106
run_ = run.copy()
87107
run_.reference_example_id = self.example_id
88108
for evaluator in self.evaluators:
89-
self.futures.add(self.executor.submit(self._evaluate_run, run_, evaluator))
109+
self.futures.add(
110+
self.executor.submit(self._evaluate_in_project, run_, evaluator)
111+
)
90112

91113
def wait_for_futures(self) -> None:
92114
"""Wait for all futures to complete."""

langchain/client/runner_utils.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -313,28 +313,35 @@ async def _callbacks_initializer(
313313
project_name: Optional[str],
314314
client: LangChainPlusClient,
315315
run_evaluators: Sequence[RunEvaluator],
316+
evaluation_handler_collector: List[EvaluatorCallbackHandler],
316317
) -> List[BaseTracer]:
317318
"""
318319
Initialize a tracer to share across tasks.
319320
320321
Args:
321322
project_name: The project name for the tracer.
323+
client: The client to use for the tracer.
324+
run_evaluators: The evaluators to run.
325+
evaluation_handler_collector: A list to collect the evaluators.
326+
Used to wait for the evaluators to finish.
322327
323328
Returns:
324-
A LangChainTracer instance with an active project.
329+
The callbacks for this thread.
325330
"""
326331
callbacks: List[BaseTracer] = []
327332
if project_name:
328333
callbacks.append(LangChainTracer(project_name=project_name))
334+
evaluator_project_name = f"{project_name}-evaluators" if project_name else None
329335
if run_evaluators:
330-
callbacks.append(
331-
EvaluatorCallbackHandler(
332-
client=client,
333-
evaluators=run_evaluators,
334-
# We already have concurrency, don't want to overload the machine
335-
max_workers=1,
336-
)
336+
callback = EvaluatorCallbackHandler(
337+
client=client,
338+
evaluators=run_evaluators,
339+
# We already have concurrency, don't want to overload the machine
340+
max_workers=1,
341+
project_name=evaluator_project_name,
337342
)
343+
callbacks.append(callback)
344+
evaluation_handler_collector.append(callback)
338345
return callbacks
339346

340347

@@ -382,12 +389,9 @@ async def arun_on_examples(
382389
"""
383390
project_name = _get_project_name(project_name, llm_or_chain_factory, None)
384391
client_ = client or LangChainPlusClient()
385-
client_.create_project(project_name, mode="eval")
392+
client_.create_project(project_name)
386393

387394
results: Dict[str, List[Any]] = {}
388-
evaluation_handler = EvaluatorCallbackHandler(
389-
evaluators=run_evaluators or [], client=client_
390-
)
391395

392396
async def process_example(
393397
example: Example, callbacks: List[BaseCallbackHandler], job_state: dict
@@ -410,17 +414,20 @@ async def process_example(
410414
flush=True,
411415
)
412416

417+
evaluation_handlers: List[EvaluatorCallbackHandler] = []
413418
await _gather_with_concurrency(
414419
concurrency_level,
415420
functools.partial(
416421
_callbacks_initializer,
417422
project_name=project_name,
418423
client=client_,
424+
evaluation_handler_collector=evaluation_handlers,
419425
run_evaluators=run_evaluators or [],
420426
),
421427
*(functools.partial(process_example, e) for e in examples),
422428
)
423-
evaluation_handler.wait_for_futures()
429+
for handler in evaluation_handlers:
430+
handler.wait_for_futures()
424431
return results
425432

426433

@@ -581,10 +588,13 @@ def run_on_examples(
581588
results: Dict[str, Any] = {}
582589
project_name = _get_project_name(project_name, llm_or_chain_factory, None)
583590
client_ = client or LangChainPlusClient()
584-
client_.create_project(project_name, mode="eval")
591+
client_.create_project(project_name)
585592
tracer = LangChainTracer(project_name=project_name)
593+
evaluator_project_name = f"{project_name}-evaluators"
586594
evalution_handler = EvaluatorCallbackHandler(
587-
evaluators=run_evaluators or [], client=client_
595+
evaluators=run_evaluators or [],
596+
client=client_,
597+
project_name=evaluator_project_name,
588598
)
589599
callbacks: List[BaseCallbackHandler] = [tracer, evalution_handler]
590600
for i, example in enumerate(examples):

0 commit comments

Comments
 (0)