Skip to content

Commit 91912cc

Browse files
fix t2i (#4163)
Co-authored-by: Yuanle Liu <[email protected]>
1 parent cc6e14d commit 91912cc

File tree

5 files changed

+8
-2
lines changed

5 files changed

+8
-2
lines changed

fastdeploy/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,7 +890,9 @@ def __init__(self, args):
890890
self.kv_cache_ratio = 1.0
891891
else:
892892
self.kv_cache_ratio = 0.75
893-
self.enc_dec_block_num = 0 if current_platform.is_iluvatar() or current_platform.is_maca() else 2
893+
self.enc_dec_block_num = (
894+
0 if current_platform.is_iluvatar() or current_platform.is_maca() else envs.FD_ENC_DEC_BLOCK_NUM
895+
)
894896
self.prealloc_dec_block_slot_num_threshold = 12
895897
self.cache_dtype = "bfloat16"
896898
self.model_cfg = None

fastdeploy/engine/args_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
704704
cache_group.add_argument(
705705
"--prealloc-dec-block-slot-num-threshold",
706706
type=int,
707-
default=12,
707+
default=EngineArgs.prealloc_dec_block_slot_num_threshold,
708708
help="Number of token slot threadshold to allocate next blocks for decoding.",
709709
)
710710

fastdeploy/engine/request.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ def to_dict(self):
304304
"index": self.index,
305305
"send_idx": self.send_idx,
306306
"token_ids": self.token_ids,
307+
"decode_type": self.decode_type,
307308
"logprob": self.logprob,
308309
"top_logprobs": self.top_logprobs,
309310
"logprobs": self.logprobs,

fastdeploy/envs.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@
8282
"EXPORTER_OTLP_HEADERS": lambda: os.getenv("EXPORTER_OTLP_HEADERS"),
8383
# enable kv cache block scheduler v1 (no need for kv_cache_ratio)
8484
"ENABLE_V1_KVCACHE_SCHEDULER": lambda: int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "1")),
85+
# set prealloc block num for decoder
86+
"FD_ENC_DEC_BLOCK_NUM": lambda: int(os.getenv("FD_ENC_DEC_BLOCK_NUM", "2")),
8587
# Whether to use PLUGINS.
8688
"FD_PLUGINS": lambda: None if "FD_PLUGINS" not in os.environ else os.environ["FD_PLUGINS"].split(","),
8789
# set trace attribute job_id.

fastdeploy/scheduler/local_scheduler.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ def put_results(self, results: List[RequestOutput]):
306306
if response.request_id not in self.responses:
307307
self.responses[response.request_id] = [response]
308308
continue
309+
scheduler_logger.debug(f"append response {response.raw}")
309310
self.responses[response.request_id].append(response)
310311
self.responses_not_empty.notify_all()
311312

0 commit comments

Comments
 (0)