|
9 | 9 | from io import BytesIO
|
10 | 10 | from pathlib import Path
|
11 | 11 | from typing import cast
|
| 12 | +from uuid import UUID |
12 | 13 |
|
13 | 14 | import httpx
|
14 | 15 | import numpy as np
|
@@ -387,25 +388,28 @@ def accum(x) -> None:
|
387 | 388 | outputs.append(x)
|
388 | 389 |
|
389 | 390 | llm = s.get_llm()
|
| 391 | + messages = [Message(content="The duck says")] |
390 | 392 |
|
391 |
| - messages = [ |
392 |
| - Message(content="The duck says"), |
393 |
| - ] |
394 |
| - completion = await llm.call_single( |
395 |
| - messages=messages, |
396 |
| - callbacks=[accum], |
397 |
| - ) |
| 393 | + # With callbacks uses streaming |
| 394 | + completion = await llm.call_single(messages=messages, callbacks=[accum]) |
| 395 | + first_id = completion.id |
| 396 | + assert isinstance(first_id, UUID) |
| 397 | + assert completion.text |
398 | 398 | assert completion.seconds_to_first_token > 0
|
399 | 399 | assert completion.prompt_count > 0
|
400 | 400 | assert completion.completion_count > 0
|
401 | 401 | assert str(completion) == "".join(outputs)
|
| 402 | + assert completion.cost > 0 |
402 | 403 |
|
403 |
| - completion = await llm.call_single( |
404 |
| - messages=messages, |
405 |
| - ) |
| 404 | + # Without callbacks we don't use streaming |
| 405 | + completion = await llm.call_single(messages=messages) |
| 406 | + assert isinstance(completion.id, UUID) |
| 407 | + assert completion.id != first_id, "Expected different response ID" |
| 408 | + assert completion.text |
406 | 409 | assert completion.seconds_to_first_token == 0
|
407 | 410 | assert completion.seconds_to_last_token > 0
|
408 |
| - |
| 411 | + assert completion.prompt_count > 0 |
| 412 | + assert completion.completion_count > 0 |
409 | 413 | assert completion.cost > 0
|
410 | 414 |
|
411 | 415 |
|
|
0 commit comments