Skip to content

Commit 428e340

Browse files
authored
chore: remove unused variables in pyexecutor (#6280)
Signed-off-by: junq <[email protected]>
1 parent 31d3eff commit 428e340

File tree

2 files changed

+3
-17
lines changed

2 files changed

+3
-17
lines changed

tensorrt_llm/_torch/pyexecutor/executor_request_queue.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import torch
1111

1212
from tensorrt_llm._utils import nvtx_range
13-
from tensorrt_llm.bindings.executor import RequestType
1413

1514
from ..distributed import Distributed
1615
from .llm_request import ExecutorRequest, executor_request_to_llm_request
@@ -61,7 +60,6 @@ def __init__(self, dist: Distributed, enable_attention_dp: bool,
6160
self.num_fetch_requests_cur_rank = 0
6261
self.expected_num_active_requests = 0
6362
self.new_active_requests_queue_latency_ms = 0
64-
self.has_context_request = False
6563
self.is_shutdown = False
6664
self.should_exclude_last_generation_logits = False
6765

@@ -318,7 +316,6 @@ def _balance_requests_across_ranks(
318316
self, new_requests: List[RequestQueueItem],
319317
all_ranks_num_active_requests: List[int]) -> List[RequestQueueItem]:
320318
"""Balance requests across ranks for attention DP."""
321-
self.has_context_request = False
322319
new_requests_cur_rank = []
323320

324321
if new_requests and self.expected_num_active_requests > all_ranks_num_active_requests[
@@ -364,15 +361,6 @@ def _balance_requests_across_ranks(
364361
elif val.rank == self.dist.tp_rank:
365362
break
366363

367-
# Check for context requests
368-
if self.is_disaggregated:
369-
for req_item in new_requests_cur_rank:
370-
if req_item.request.request_type == RequestType.REQUEST_TYPE_CONTEXT_ONLY:
371-
self.has_context_request = True
372-
break
373-
else:
374-
self.has_context_request = len(new_requests_cur_rank) > 0
375-
376364
return new_requests_cur_rank
377365

378366
def _collect_py_objects_from_requests(

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,6 @@ def __init__(self,
169169
self.draft_model_engine = draft_model_engine
170170

171171
# enqueue and _fetch_new_requests used data
172-
self.active = True
173172
self.next_req_id = max_batch_size # The first max_batch_size request IDs are reserved for dummy requests
174173
self.max_beam_width = max_beam_width
175174
self.max_draft_len = max_draft_len
@@ -196,7 +195,6 @@ def __init__(self,
196195
self.max_num_active_requests = model_engine.get_max_num_sequences()
197196
self.active_requests: List[LlmRequest] = []
198197
self.expected_num_active_requests = 0
199-
self.has_context_request = False
200198
self.ctx_in_transmission_requests = []
201199
self.previous_batch: Optional[BatchState] = None
202200
self.num_scheduled_requests: int = 0
@@ -1148,7 +1146,7 @@ def _check_disagg_gen_transfer_status(self):
11481146
@nvtx_range("_pad_attention_dp_dummy_request")
11491147
def _pad_attention_dp_dummy_request(self):
11501148
"""
1151-
Pad with a dummy request, if required, to ensure every attention_dp rank has at least one active request.
1149+
Pad with a generation dummy request, if required, to ensure every attention_dp rank has at least one active request.
11521150
"""
11531151
if not self.enable_attention_dp:
11541152
return
@@ -1166,8 +1164,8 @@ def _pad_attention_dp_dummy_request(self):
11661164
if self.expected_num_active_requests - num_active_request > 0 and num_active_request == 0:
11671165
llm_request = self.kv_cache_manager.add_dummy_requests(
11681166
request_ids=[0],
1169-
is_gen=not self.has_context_request,
1170-
prepare_resource=not self.has_context_request,
1167+
is_gen=True,
1168+
prepare_resource=True,
11711169
max_num_draft_tokens=self.max_draft_len,
11721170
)[0]
11731171
llm_request.is_attention_dp_dummy = True

0 commit comments

Comments
 (0)