Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion tensorrt_llm/_torch/attention_backend/trtllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,14 @@ def prepare(self) -> None:
self.kv_cache_block_offsets[:, :self.num_seqs].copy_(
self.host_kv_cache_block_offsets[:, :self.num_seqs],
non_blocking=True)

error_message = (
f"The max KV cache length of input sequences ({self.kv_lens[:self.num_seqs].max()}) "
f"exceeds the KV cache manager's maximum supported length "
f"({self.kv_cache_manager.max_seq_len}).")

assert self.kv_lens[:self.num_seqs].max(
) <= self.kv_cache_manager.max_seq_len, f"Please set max_seq_len to at least {self.kv_lens[:self.num_seqs].max()} for kv cache manager."
) <= self.kv_cache_manager.max_seq_len, error_message

self.kv_lens_cuda_runtime = self.kv_lens_cuda[:self.num_seqs]
self.kv_lens_runtime = self.kv_lens[:self.num_seqs]
Expand Down
Loading