Skip to content

Commit 9eecdf2

Browse files
authored
[TRTLLM-7008][fix] cherrypick fix to 1.0 Add automatic shared memory delete if already exist (#7433)
Signed-off-by: Dongxu Yang <[email protected]>
1 parent 95e0318 commit 9eecdf2

File tree

2 files changed

+21
-8
lines changed

2 files changed

+21
-8
lines changed

tensorrt_llm/_torch/modules/fused_moe/moe_load_balancer.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import threading
23
from contextlib import nullcontext
34
from multiprocessing import resource_tracker, shared_memory
@@ -173,9 +174,20 @@ def finalize_layer_weights(self):
173174
total_size += aligned_size
174175

175176
shm_name = self.get_shared_memory_name()
176-
shm = shared_memory.SharedMemory(name=shm_name,
177-
create=True,
178-
size=total_size)
177+
try:
178+
shm = shared_memory.SharedMemory(name=shm_name,
179+
create=True,
180+
size=total_size)
181+
except FileExistsError:
182+
tensorrt_llm.logger.warning(
183+
f'Found exist EPLB shared memory name: {shm_name}, unlinking...'
184+
)
185+
existing_shm = shared_memory.SharedMemory(name=shm_name)
186+
existing_shm.close()
187+
existing_shm.unlink()
188+
shm = shared_memory.SharedMemory(name=shm_name,
189+
create=True,
190+
size=total_size)
179191
self.own_shm = shm
180192

181193
offset = 0
@@ -670,15 +682,15 @@ def __init__(self,
670682
ep_rank: int,
671683
ep_size: int,
672684
layer_updates_per_iter: int,
673-
shared_memory_base_name: str = 'moe_shared'):
685+
shared_memory_base_name: Optional[str] = None):
674686
"""
675687
Initialize a MoeLoadBalancer instance.
676688
677689
Args:
678690
ep_rank: The rank of the current process in expert parallelism
679691
ep_size: The total number of processes in expert parallelism
680692
layer_updates_per_iter: The number of layers to update per iteration
681-
shared_memory_base_name: Shared memory base name
693+
shared_memory_base_name: Shared memory base name, will use 'moe_shared' if None
682694
"""
683695
self.is_shutdown = True
684696
self.ep_rank = ep_rank
@@ -688,7 +700,8 @@ def __init__(self,
688700
layer_updates_per_iter)
689701
self._previous_balancer = None
690702
self.single_layer_load_balancers = []
691-
self.shared_memory_base_name = shared_memory_base_name
703+
self.shared_memory_base_name = shared_memory_base_name or os.getenv(
704+
'TRTLLM_EPLB_SHM_NAME', 'moe_shared')
692705
self._setup_mpi_comm()
693706
self.is_shutdown = False
694707

tests/integration/test_lists/test-db/l0_gb200.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ l0_gb200:
3333
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-pp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True]
3434
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
3535
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=0-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
36+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True]
37+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2]
3638
- condition:
3739
ranges:
3840
system_gpu_count:
@@ -64,5 +66,3 @@ l0_gb200:
6466
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-pp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
6567
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=2-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
6668
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=2-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
67-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True]
68-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2]

0 commit comments

Comments
 (0)