Skip to content

Commit 383526f

Browse files
authored
Merge branch 'main' into clean_unused
2 parents 4d78f1e + 6268a60 commit 383526f

File tree

14 files changed

+201
-132
lines changed

14 files changed

+201
-132
lines changed

cpp/tensorrt_llm/nanobind/batch_manager/bindings.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ void initBindings(nb::module_& m)
5757
using GenLlmReq = tb::GenericLlmRequest<runtime::ITensor::SharedPtr>;
5858

5959
// Create and register exceptions in module scope
60-
nb::exception<tb::PeftTaskNotCachedException>(m, "PeftTaskNotCachedException");
61-
nb::exception<tr::LoraCacheFullException>(m, "LoraCacheFullException");
60+
static nb::object peft_exc = nb::exception<tb::PeftTaskNotCachedException>(m, "PeftTaskNotCachedException");
61+
static nb::object lora_exc = nb::exception<tr::LoraCacheFullException>(m, "LoraCacheFullException");
6262

6363
// Register with no captures
6464
nb::register_exception_translator(
@@ -71,11 +71,11 @@ void initBindings(nb::module_& m)
7171
}
7272
catch (const tb::PeftTaskNotCachedException& e)
7373
{
74-
PyErr_SetString(nb::type<tb::PeftTaskNotCachedException>().ptr(), e.what());
74+
PyErr_SetString(peft_exc.ptr(), e.what());
7575
}
7676
catch (const tr::LoraCacheFullException& e)
7777
{
78-
PyErr_SetString(nb::type<tr::LoraCacheFullException>().ptr(), e.what());
78+
PyErr_SetString(lora_exc.ptr(), e.what());
7979
}
8080
});
8181

cpp/tensorrt_llm/nanobind/executor/request.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,21 @@ void initRequestBindings(nb::module_& m)
210210
nb::cast<std::optional<std::vector<tle::AdditionalModelOutput>>>(state[6]));
211211
};
212212
nb::class_<tle::OutputConfig>(m, "OutputConfig")
213-
.def(nb::init<bool, bool, bool, bool, bool, bool, std::optional<std::vector<tle::AdditionalModelOutput>>>(),
214-
nb::arg("return_log_probs").none() = false, nb::arg("return_context_logits") = false,
215-
nb::arg("return_generation_logits") = false, nb::arg("exclude_input_from_output") = false,
216-
nb::arg("return_encoder_output") = false, nb::arg("return_perf_metrics") = false,
213+
.def(
214+
"__init__",
215+
[](tle::OutputConfig& self, std::optional<bool> return_log_probs, std::optional<bool> return_context_logits,
216+
std::optional<bool> return_generation_logits, std::optional<bool> exclude_input_from_output,
217+
std::optional<bool> return_encoder_output, std::optional<bool> return_perf_metrics,
218+
std::optional<std::vector<tle::AdditionalModelOutput>> additional_model_outputs)
219+
{
220+
new (&self) tle::OutputConfig(return_log_probs.value_or(false), return_context_logits.value_or(false),
221+
return_generation_logits.value_or(false), exclude_input_from_output.value_or(false),
222+
return_encoder_output.value_or(false), return_perf_metrics.value_or(false),
223+
additional_model_outputs);
224+
},
225+
nb::arg("return_log_probs") = nb::none(), nb::arg("return_context_logits") = nb::none(),
226+
nb::arg("return_generation_logits") = nb::none(), nb::arg("exclude_input_from_output") = nb::none(),
227+
nb::arg("return_encoder_output") = nb::none(), nb::arg("return_perf_metrics") = nb::none(),
217228
nb::arg("additional_model_outputs") = nb::none())
218229
.def_rw("return_log_probs", &tle::OutputConfig::returnLogProbs)
219230
.def_rw("return_context_logits", &tle::OutputConfig::returnContextLogits)

cpp/tensorrt_llm/pybind/executor/executorConfig.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ void initConfigBindings(pybind11::module_& m)
424424
.value("MPI", tle::CacheTransceiverConfig::BackendType::MPI)
425425
.value("UCX", tle::CacheTransceiverConfig::BackendType::UCX)
426426
.value("NIXL", tle::CacheTransceiverConfig::BackendType::NIXL)
427-
.def(py::init(
427+
.def("from_string",
428428
[](std::string const& str)
429429
{
430430
if (str == "DEFAULT" || str == "default")
@@ -436,9 +436,7 @@ void initConfigBindings(pybind11::module_& m)
436436
if (str == "NIXL" || str == "nixl")
437437
return tle::CacheTransceiverConfig::BackendType::NIXL;
438438
throw std::runtime_error("Invalid backend type: " + str);
439-
}));
440-
441-
py::implicitly_convertible<std::string, tle::CacheTransceiverConfig::BackendType>();
439+
});
442440

443441
py::class_<tle::CacheTransceiverConfig>(m, "CacheTransceiverConfig")
444442
.def(py::init<std::optional<tle::CacheTransceiverConfig::BackendType>, std::optional<size_t>>(),

jenkins/L0_MergeRequest.groovy

Lines changed: 46 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -105,15 +105,13 @@ def EXTRA_STAGE_LIST = "extra_stage"
105105
@Field
106106
def MULTI_GPU_FILE_CHANGED = "multi_gpu_file_changed"
107107
@Field
108-
def ONLY_PYTORCH_FILE_CHANGED = "only_pytorch_file_changed"
108+
def ONLY_ONE_GROUP_CHANGED = "only_one_group_changed"
109109
@Field
110110
def AUTO_TRIGGER_TAG_LIST = "auto_trigger_tag_list"
111111
@Field
112112
def DEBUG_MODE = "debug"
113113
@Field
114114
def DETAILED_LOG = "detailed_log"
115-
@Field
116-
def ONLY_DOCS_FILE_CHANGED = "only_docs_file_changed"
117115

118116
def testFilter = [
119117
(REUSE_STAGE_LIST): trimForStageList(gitlabParamsFromBot.get(REUSE_STAGE_LIST, null)?.tokenize(',')),
@@ -127,11 +125,10 @@ def testFilter = [
127125
(DISABLE_MULTI_GPU_TEST): gitlabParamsFromBot.get((DISABLE_MULTI_GPU_TEST), false),
128126
(EXTRA_STAGE_LIST): trimForStageList(gitlabParamsFromBot.get((EXTRA_STAGE_LIST), null)?.tokenize(',')),
129127
(MULTI_GPU_FILE_CHANGED): false,
130-
(ONLY_PYTORCH_FILE_CHANGED): false,
128+
(ONLY_ONE_GROUP_CHANGED): "",
131129
(DEBUG_MODE): gitlabParamsFromBot.get(DEBUG_MODE, false),
132130
(AUTO_TRIGGER_TAG_LIST): [],
133131
(DETAILED_LOG): gitlabParamsFromBot.get(DETAILED_LOG, false),
134-
(ONLY_DOCS_FILE_CHANGED): false,
135132
]
136133

137134
String reuseBuild = gitlabParamsFromBot.get('reuse_build', null)
@@ -324,9 +321,8 @@ def setupPipelineEnvironment(pipeline, testFilter, globalVars)
324321
echo "Env.gitlabMergeRequestLastCommit: ${env.gitlabMergeRequestLastCommit}."
325322
echo "Freeze GitLab commit. Branch: ${env.gitlabBranch}. Commit: ${env.gitlabCommit}."
326323
testFilter[(MULTI_GPU_FILE_CHANGED)] = getMultiGpuFileChanged(pipeline, testFilter, globalVars)
327-
testFilter[(ONLY_PYTORCH_FILE_CHANGED)] = getOnlyPytorchFileChanged(pipeline, testFilter, globalVars)
324+
testFilter[(ONLY_ONE_GROUP_CHANGED)] = getOnlyOneGroupChanged(pipeline, testFilter, globalVars)
328325
testFilter[(AUTO_TRIGGER_TAG_LIST)] = getAutoTriggerTagList(pipeline, testFilter, globalVars)
329-
testFilter[(ONLY_DOCS_FILE_CHANGED)] = getOnlyDocsFileChanged(pipeline, testFilter, globalVars)
330326
getContainerURIs().each { k, v ->
331327
globalVars[k] = v
332328
}
@@ -644,86 +640,62 @@ def getMultiGpuFileChanged(pipeline, testFilter, globalVars)
644640
return relatedFileChanged
645641
}
646642

647-
def getOnlyPytorchFileChanged(pipeline, testFilter, globalVars) {
643+
def getOnlyOneGroupChanged(pipeline, testFilter, globalVars) {
648644
def isOfficialPostMergeJob = (env.JOB_NAME ==~ /.*PostMerge.*/)
649645
if (env.alternativeTRT || isOfficialPostMergeJob) {
650-
pipeline.echo("Force set ONLY_PYTORCH_FILE_CHANGED false.")
651-
return false
646+
pipeline.echo("Force set ONLY_ONE_GROUP_CHANGED \"\".")
647+
return ""
652648
}
653-
def pytorchOnlyList = [
654-
"tensorrt_llm/_torch/",
655-
"tensorrt_llm/scaffolding/",
656-
"tests/unittest/_torch/",
657-
"tests/unittest/scaffolding/",
658-
"tests/unittest/llmapi/test_llm_pytorch.py",
659-
"tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py",
660-
"tests/integration/defs/accuracy/test_llm_api_pytorch.py",
661-
"tests/integration/defs/disaggregated/",
662-
"examples/auto_deploy",
663-
"examples/disaggregated",
664-
"examples/pytorch/",
665-
"examples/scaffolding/",
666-
"docs/"
649+
def groupFileMap = [
650+
"Docs": [ // TODO: Add more docs path to the list, e.g. *.md files in other directories
651+
"docs/",
652+
],
653+
"PyTorch": [
654+
"tensorrt_llm/_torch/",
655+
"tensorrt_llm/scaffolding/",
656+
"tests/unittest/_torch/",
657+
"tests/unittest/scaffolding/",
658+
"tests/unittest/llmapi/test_llm_pytorch.py",
659+
"tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py",
660+
"tests/integration/defs/accuracy/test_llm_api_pytorch.py",
661+
"tests/integration/defs/disaggregated/",
662+
"examples/auto_deploy",
663+
"examples/disaggregated",
664+
"examples/pytorch/",
665+
"examples/scaffolding/",
666+
"docs/",
667+
],
668+
"Triton": [
669+
"tests/integration/defs/triton_server/",
670+
"triton_backend/",
671+
],
667672
]
668673

669674
def changedFileList = getMergeRequestChangedFileList(pipeline, globalVars)
670-
671675
if (!changedFileList || changedFileList.isEmpty()) {
672-
return false
676+
return ""
673677
}
674678

675-
def result = true
676-
for (file in changedFileList) {
677-
def isPytorchFile = false
678-
for (prefix in pytorchOnlyList) {
679-
if (file.startsWith(prefix)) {
680-
isPytorchFile = true
681-
break
682-
}
679+
for (group in groupFileMap.keySet()) {
680+
def groupPrefixes = groupFileMap[group]
681+
def allFilesInGroup = changedFileList.every { file ->
682+
groupPrefixes.any { prefix -> file.startsWith(prefix) }
683683
}
684-
if (!isPytorchFile) {
685-
pipeline.echo("Found non-PyTorch file: ${file}")
686-
result = false
687-
break
688-
}
689-
}
690-
691-
pipeline.echo("Only PyTorch files changed: ${result}")
692-
return result
693-
}
694-
695-
def getOnlyDocsFileChanged(pipeline, testFilter, globalVars) {
696-
def isOfficialPostMergeJob = (env.JOB_NAME ==~ /.*PostMerge.*/)
697-
if (env.alternativeTRT || isOfficialPostMergeJob) {
698-
pipeline.echo("Force set ONLY_DOCS_FILE_CHANGED false.")
699-
return false
700-
}
701-
702-
// TODO: Add more docs path to the list, e.g. *.md files in other directories
703-
def docsFileList = [
704-
"docs/",
705-
]
706-
707-
def changedFileList = getMergeRequestChangedFileList(pipeline, globalVars)
708-
if (!changedFileList || changedFileList.isEmpty()) {
709-
return false
710-
}
711684

712-
for (file in changedFileList) {
713-
def isDocsFile = false
714-
for (prefix in docsFileList) {
715-
if (file.startsWith(prefix)) {
716-
isDocsFile = true
717-
break
685+
if (allFilesInGroup) {
686+
pipeline.echo("Only ${group} files changed.")
687+
return group
688+
} else {
689+
def nonGroupFile = changedFileList.find { file ->
690+
!groupPrefixes.any { prefix -> file.startsWith(prefix) }
691+
}
692+
if (nonGroupFile != null) {
693+
pipeline.echo("Found non-${group} file: ${nonGroupFile}")
718694
}
719-
}
720-
if (!isDocsFile) {
721-
pipeline.echo("Found non-docs file: ${file}")
722-
return false
723695
}
724696
}
725-
pipeline.echo("Only docs files changed.")
726-
return true
697+
698+
return ""
727699
}
728700

729701
def collectTestResults(pipeline, testFilter)
@@ -1040,7 +1012,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
10401012
testStageName = "[Test-SBSA] Remote Run"
10411013
}
10421014

1043-
if (testFilter[(ONLY_DOCS_FILE_CHANGED)]) {
1015+
if (testFilter[(ONLY_ONE_GROUP_CHANGED)] == "Docs") {
10441016
echo "SBSA build job is skipped due to Jenkins configuration or conditional pipeline run"
10451017
return
10461018
}

jenkins/L0_Test.groovy

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -449,16 +449,14 @@ def EXTRA_STAGE_LIST = "extra_stage"
449449
@Field
450450
def MULTI_GPU_FILE_CHANGED = "multi_gpu_file_changed"
451451
@Field
452-
def ONLY_PYTORCH_FILE_CHANGED = "only_pytorch_file_changed"
452+
def ONLY_ONE_GROUP_CHANGED = "only_one_group_changed"
453453
@Field
454454
def AUTO_TRIGGER_TAG_LIST = "auto_trigger_tag_list"
455455
@Field
456456
def DEBUG_MODE = "debug"
457457
@Field
458458
def DETAILED_LOG = "detailed_log"
459459
@Field
460-
def ONLY_DOCS_FILE_CHANGED = "only_docs_file_changed"
461-
@Field
462460
def testFilter = [
463461
(REUSE_STAGE_LIST): null,
464462
(ENABLE_SKIP_TEST): false,
@@ -471,11 +469,10 @@ def testFilter = [
471469
(DISABLE_MULTI_GPU_TEST): false,
472470
(EXTRA_STAGE_LIST): null,
473471
(MULTI_GPU_FILE_CHANGED): false,
474-
(ONLY_PYTORCH_FILE_CHANGED): false,
472+
(ONLY_ONE_GROUP_CHANGED): "",
475473
(DEBUG_MODE): false,
476474
(AUTO_TRIGGER_TAG_LIST): [],
477475
(DETAILED_LOG): false,
478-
(ONLY_DOCS_FILE_CHANGED): false,
479476
]
480477

481478
@Field
@@ -2209,22 +2206,28 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
22092206
println parallelJobsFiltered.keySet()
22102207
}
22112208

2212-
if (testFilter[(ONLY_PYTORCH_FILE_CHANGED)]) {
2209+
if (testFilter[(ONLY_ONE_GROUP_CHANGED)] == "Docs") {
2210+
echo "Only docs files are changed, run doc build stage only."
2211+
parallelJobsFiltered = docBuildJobs
2212+
println parallelJobsFiltered.keySet()
2213+
} else if (testFilter[(ONLY_ONE_GROUP_CHANGED)] != "") {
22132214
if (testFilter[(TEST_BACKEND)] != null) {
2214-
echo "Force disable ONLY_PYTORCH_FILE_CHANGED mode. Backend mode set by flag: ${testFilter[(TEST_BACKEND)]}."
2215+
echo "Force disable ONLY_ONE_GROUP_CHANGED mode. Backend mode set by flag: ${testFilter[(TEST_BACKEND)]}."
22152216
} else {
2216-
echo "ONLY_PYTORCH_FILE_CHANGED mode is true."
2217-
parallelJobsFiltered = parallelJobsFiltered.findAll { !it.key.contains("-CPP-") && !it.key.contains("-TensorRT-") }
2217+
echo "ONLY_ONE_GROUP_CHANGED mode is true. The group is: ${testFilter[(ONLY_ONE_GROUP_CHANGED)]}."
2218+
def excludedBackends = new HashMap()
2219+
excludedBackends["PyTorch"] = ["-CPP-", "-TensorRT-", "-Triton-"]
2220+
excludedBackends["Triton"] = ["-PyTorch-", "-CPP-", "-TensorRT-"]
2221+
def group = testFilter[(ONLY_ONE_GROUP_CHANGED)]
2222+
if (excludedBackends.containsKey(group)) {
2223+
parallelJobsFiltered = parallelJobsFiltered.findAll { key, value ->
2224+
!excludedBackends[group].any { backend -> key.contains(backend) }
2225+
}
2226+
}
22182227
println parallelJobsFiltered.keySet()
22192228
}
22202229
}
22212230

2222-
if (testFilter[(ONLY_DOCS_FILE_CHANGED)]) {
2223-
echo "Only docs files are changed, run doc build stage only."
2224-
parallelJobsFiltered = docBuildJobs
2225-
println parallelJobsFiltered.keySet()
2226-
}
2227-
22282231
// Check --stage-list, only run the stages in stage-list.
22292232
if (testFilter[TEST_STAGE_LIST] != null) {
22302233
echo "Use TEST_STAGE_LIST for filtering. Stages: ${testFilter[(TEST_STAGE_LIST)]}."
@@ -2405,7 +2408,7 @@ pipeline {
24052408
expression {
24062409
// Only run the test list validation when necessary
24072410
env.targetArch == X86_64_TRIPLE &&
2408-
testFilter[ONLY_DOCS_FILE_CHANGED] == false &&
2411+
testFilter[ONLY_ONE_GROUP_CHANGED] != "Docs" &&
24092412
!(env.JOB_NAME ==~ /.*Multi-GPU.*/) &&
24102413
!(env.JOB_NAME ==~ /.*BuildDockerImageSanityTest.*/)
24112414
}

tensorrt_llm/_torch/pyexecutor/llm_request.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ def __init__(
303303
self.py_batch_idx = None
304304
self.py_rewind_len = 0
305305
self.py_draft_tokens = [] if self.draft_tokens is None else self.draft_tokens
306+
self.py_last_context_chunk = (None, None)
306307
self.py_last_draft_tokens = None
307308
self.py_num_accepted_draft_tokens = 0
308309
self.py_decoding_iter = 0

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,10 @@ def _update_request_states_tp(self, scheduled_requests: ScheduledRequests):
13111311

13121312
for request in scheduled_requests.context_requests:
13131313
if request.state != LlmRequestState.GENERATION_COMPLETE: # skip failed requests
1314+
request.py_last_context_chunk = (
1315+
request.context_current_position,
1316+
request.context_current_position +
1317+
request.context_chunk_size)
13141318
request.move_to_next_context_chunk()
13151319
if request.context_remaining_length == 0:
13161320
request.state = LlmRequestState.GENERATION_IN_PROGRESS

0 commit comments

Comments
 (0)