add helpful comments

venkywonka · venkywonka · commit 227fed5ffa94 · 2025-09-09T00:04:13.000-07:00
Signed-off-by: Venky Ganesh &lt;23023424+venkywonka@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/executor/result.py b/tensorrt_llm/executor/result.py
@@ -244,10 +244,16 @@ def _handle_sequence(self,
             output.cumulative_logprob = response_tensors.cum_log_probs[src_idx]
 
         if logprobs_result:
-            output.prompt_logprobs = logprobs_result.prompt
+            output.prompt_logprobs = logprobs_result.prompt  # both backends
+            # this line only matters for TRT backend, where generation logprobs are
+            # calculated outside engine in logprobs_result.generation.
+            # for pytorch backend, generation logprobs are calculated in sampler,
+            # and are provided by response_tensors.log_probs in the following lines.
             output.logprobs = logprobs_result.generation
 
         if response_tensors.log_probs is not None:
+            # response_tensors.log_probs has per-token generation logprobs
+            # that are coupled to the sampling strategy, hence is provided by sampler.
             output._last_logprobs_len = len(
                 output.logprobs) if output.logprobs is not None else 0
             output.logprobs = response_tensors.log_probs[src_idx]
@@ -699,12 +705,12 @@ def compute_logprobs(
     output_token_ids: Optional[list[int]],
 ) -> LogProbsResult:
     """
-    Compute top-K logprobs and ranks for each token position.
+    Compute top-K logprobs from logits when engine doesn't provide them directly.
 
-    Returns:
-        LogProbsResult, a NamedTuple containing:
-            - prompt: Optional[List[Dict[token_id, Logprob]]] logprobs for prompt tokens.
-            - generation: Optional[List[Dict[token_id, Logprob]]] logprobs for generated tokens.
+    Used for post-processing logits into logprobs.
+    - Prompt logprobs (from context_logits): always used.
+    - Generation logprobs (from generation_logits, TRT backend): used when backend doesn't compute them in sampler (e.g., TRT).
+    - Generation logprobs (PyTorch backend): not used; computed in sampler, not here.
     """
 
     def _topk_logprobs(logits: torch.Tensor, top_k: int,
diff --git a/tensorrt_llm/executor/worker.py b/tensorrt_llm/executor/worker.py
@@ -1053,7 +1053,11 @@ def _get_params_for_first_rsp(
 def _get_logprobs(worker,
                   response: Union[tllm.Response, LlmResponse],
                   is_pytorch_backend=False) -> Optional[LogProbsResult]:
-    """Compute logprob and prompt logprob and clear out logits if applicable.
+    """Compute logprobs from response logits when needed.
+
+    Logprobs provenance varies by backend:
+    - PyTorch: Generation logprobs computed in sampler, only prompt logprobs computed here
+    - TRT: Both prompt and generation logprobs computed here from logits
     """
 
     logprobs_result = None
@@ -1066,10 +1070,14 @@ def _get_logprobs(worker,
     if logprob_params:
         if is_pytorch_backend:
             if not logprob_params.prompt_logprobs:
-                # generation logprobs are already calculated in PyTorch backend sampler
+                # PyTorch: generation logprobs computed in sampler, no post-processing needed
                 return
             else:
-                # Fallback: compute from context_logits if available
+                # PyTorch: compute only prompt logprobs from context logits
+                # This can be done as a postprocessing step instead of coupling it to the
+                # pytorch engine, because prompt_logprobs calculation is not complicated by
+                # generation sampling strategies. Therefore it is simpler to do it here than
+                # doing it in the pytorch engine and plumbing it through the response.
                 logprobs_result = compute_logprobs(
                     logprob_params.prompt_logprobs, None,
                     response.result.context_logits, None, None)
@@ -1079,7 +1087,7 @@ def _get_logprobs(worker,
                     response.clear_context_logits()
                 return logprobs_result
 
-        # trt backend
+        # TRT backend: compute both prompt and generation logprobs from logits
         logprobs_result = compute_logprobs(logprob_params.prompt_logprobs,
                                            logprob_params.logprobs,
                                            response.result.context_logits,
diff --git a/tensorrt_llm/llmapi/llm.py b/tensorrt_llm/llmapi/llm.py
@@ -569,12 +569,6 @@ def _check_arguments(self, prompt_len: int, query_len: int,
                          is_gen_only: bool) -> None:
 
         if self.args.backend in ["pytorch", "_autodeploy"]:
-            # TODO: remove these checks after PyTorch backend
-            # fully support TopK prompt and generation logprobs.
-            # if sampling_params.prompt_logprobs:
-            #     raise ValueError(
-            #         f"`prompt_logprobs` in sampling_params is not supported in the PyTorch backend yet. Received `prompt_logprobs={sampling_params.prompt_logprobs}`. Please unset this field."
-            #     )
             if sampling_params.logprobs and sampling_params.logprobs > 1:
                 raise ValueError(
                     f"PyTorch backend currently only supports `logprobs=1`. Received `logprobs={sampling_params.logprobs}` (Top{sampling_params.logprobs} logprobs). Please set `logprobs=1` in `sampling_params` instead."
diff --git a/tensorrt_llm/sampling_params.py b/tensorrt_llm/sampling_params.py
@@ -8,6 +8,7 @@
 from pydantic import BaseModel
 
 from tensorrt_llm.bindings import executor as tllme
+from tensorrt_llm.logger import logger
 
 
 @dataclass(slots=True, kw_only=True)
@@ -453,6 +454,11 @@ def _get_output_config(self, is_pytorch_backend: bool = False) -> tllme.OutputCo
             # we need to internally enable context logits for prompt logprobs computation
             # They will be dropped after computation if the user didn't explicitly request them
             if self.prompt_logprobs and not self.return_context_logits:
+                logger.info(
+                    "Since prompt_logprobs is requested but return_context_logits is False, "
+                    "internally enabling context logits for prompt logprobs computation. "
+                    "context logits will be dropped after computation as the user didn't explicitly request them."
+                )
                 config_kwargs["return_context_logits"] = True
         else:
             config_kwargs["return_log_probs"] = self._return_log_probs