diff --git a/torchx/schedulers/aws_batch_scheduler.py b/torchx/schedulers/aws_batch_scheduler.py index 3738df0c8..2777023b5 100644 --- a/torchx/schedulers/aws_batch_scheduler.py +++ b/torchx/schedulers/aws_batch_scheduler.py @@ -85,10 +85,10 @@ def _role_to_node_properties(idx: int, role: Role) -> Dict[str, object]: cpu = 1 reqs.append({"type": "VCPU", "value": str(cpu)}) - mem = resource.memMB - if mem <= 0: - mem = 1000 - reqs.append({"type": "MEMORY", "value": str(mem)}) + memMB = resource.memMB + if memMB <= 0: + memMB = 1000 + reqs.append({"type": "MEMORY", "value": str(memMB)}) if resource.gpu > 0: reqs.append({"type": "GPU", "value": str(resource.gpu)}) @@ -130,6 +130,11 @@ def _role_to_node_properties(idx: int, role: Role) -> Dict[str, object]: "image": role.image, "environment": [{"name": k, "value": v} for k, v in role.env.items()], "resourceRequirements": reqs, + "linuxParameters": { + # To support PyTorch dataloaders we need to set /dev/shm to larger + # than the 64M default. + "sharedMemorySize": memMB, + }, "logConfiguration": { "logDriver": "awslogs", }, diff --git a/torchx/schedulers/docker_scheduler.py b/torchx/schedulers/docker_scheduler.py index 31370a6c1..4ecb5655c 100644 --- a/torchx/schedulers/docker_scheduler.py +++ b/torchx/schedulers/docker_scheduler.py @@ -269,7 +269,11 @@ def _submit_dryrun( } resource = replica_role.resource if resource.memMB >= 0: - c.kwargs["mem_limit"] = f"{int(resource.memMB)}m" + # To support PyTorch dataloaders we need to set /dev/shm to + # larger than the 64M default. + c.kwargs["mem_limit"] = c.kwargs[ + "shm_size" + ] = f"{int(resource.memMB)}m" if resource.cpu >= 0: c.kwargs["nano_cpus"] = int(resource.cpu * 1e9) if resource.gpu > 0: diff --git a/torchx/schedulers/kubernetes_scheduler.py b/torchx/schedulers/kubernetes_scheduler.py index accf143ba..76a8b72c0 100644 --- a/torchx/schedulers/kubernetes_scheduler.py +++ b/torchx/schedulers/kubernetes_scheduler.py @@ -172,6 +172,7 @@ def role_to_pod(name: str, role: Role, service_account: Optional[str]) -> "V1Pod V1Volume, V1HostPathVolumeSource, V1PersistentVolumeClaimVolumeSource, + V1EmptyDirVolumeSource, ) requests = {} @@ -189,8 +190,21 @@ def role_to_pod(name: str, role: Role, service_account: Optional[str]) -> "V1Pod requests=requests, ) - volumes = [] - volume_mounts = [] + # To support PyTorch dataloaders we need to set /dev/shm to larger than the + # 64M default so we mount an unlimited sized tmpfs directory on it. + SHM_VOL = "dshm" + volumes = [ + V1Volume( + name=SHM_VOL, + empty_dir=V1EmptyDirVolumeSource( + medium="Memory", + ), + ), + ] + volume_mounts = [ + V1VolumeMount(name=SHM_VOL, mount_path="/dev/shm"), + ] + for i, mount in enumerate(role.mounts): mount_name = f"mount-{i}" if isinstance(mount, BindMount): diff --git a/torchx/schedulers/test/aws_batch_scheduler_test.py b/torchx/schedulers/test/aws_batch_scheduler_test.py index 78e7eddaa..b54f90cf8 100644 --- a/torchx/schedulers/test/aws_batch_scheduler_test.py +++ b/torchx/schedulers/test/aws_batch_scheduler_test.py @@ -112,6 +112,9 @@ def test_submit_dryrun(self) -> None: {"type": "MEMORY", "value": "3000"}, {"type": "GPU", "value": "4"}, ], + "linuxParameters": { + "sharedMemorySize": 3000, + }, "logConfiguration": {"logDriver": "awslogs"}, "mountPoints": [ { @@ -154,6 +157,9 @@ def test_submit_dryrun(self) -> None: {"type": "MEMORY", "value": "3000"}, {"type": "GPU", "value": "4"}, ], + "linuxParameters": { + "sharedMemorySize": 3000, + }, "logConfiguration": {"logDriver": "awslogs"}, "mountPoints": [ { diff --git a/torchx/schedulers/test/docker_scheduler_test.py b/torchx/schedulers/test/docker_scheduler_test.py index e8002095c..af76bd2fa 100644 --- a/torchx/schedulers/test/docker_scheduler_test.py +++ b/torchx/schedulers/test/docker_scheduler_test.py @@ -100,6 +100,7 @@ def test_submit_dryrun(self) -> None: "torchx.pytorch.org/version": "0.1.2dev0", }, "mem_limit": "3000m", + "shm_size": "3000m", "name": "app_name_42-trainer-0", "hostname": "app_name_42-trainer-0", "nano_cpus": int(2e9), diff --git a/torchx/schedulers/test/kubernetes_scheduler_test.py b/torchx/schedulers/test/kubernetes_scheduler_test.py index a7d1e4356..592a0999f 100644 --- a/torchx/schedulers/test/kubernetes_scheduler_test.py +++ b/torchx/schedulers/test/kubernetes_scheduler_test.py @@ -118,6 +118,7 @@ def test_role_to_pod(self) -> None: V1Volume, V1VolumeMount, V1HostPathVolumeSource, + V1EmptyDirVolumeSource, ) app = _test_app() @@ -148,11 +149,15 @@ def test_role_to_pod(self) -> None: resources=resources, ports=[V1ContainerPort(name="foo", container_port=1234)], volume_mounts=[ + V1VolumeMount( + name="dshm", + mount_path="/dev/shm", + ), V1VolumeMount( name="mount-0", mount_path="/dst", read_only=True, - ) + ), ], ) want = V1Pod( @@ -161,6 +166,12 @@ def test_role_to_pod(self) -> None: restart_policy="Never", service_account_name="srvacc", volumes=[ + V1Volume( + name="dshm", + empty_dir=V1EmptyDirVolumeSource( + medium="Memory", + ), + ), V1Volume( name="mount-0", host_path=V1HostPathVolumeSource( @@ -272,11 +283,16 @@ def test_submit_dryrun(self) -> None: memory: 3000M nvidia.com/gpu: '4' volumeMounts: + - mountPath: /dev/shm + name: dshm - mountPath: /dst name: mount-0 readOnly: true restartPolicy: Never volumes: + - emptyDir: + medium: Memory + name: dshm - hostPath: path: /src name: mount-0 @@ -289,6 +305,7 @@ def test_volume_mounts(self) -> None: V1Volume, V1VolumeMount, V1PersistentVolumeClaimVolumeSource, + V1EmptyDirVolumeSource, ) role = specs.Role( @@ -302,6 +319,12 @@ def test_volume_mounts(self) -> None: self.assertEqual( pod.spec.volumes, [ + V1Volume( + name="dshm", + empty_dir=V1EmptyDirVolumeSource( + medium="Memory", + ), + ), V1Volume( name="mount-0", persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( @@ -313,11 +336,15 @@ def test_volume_mounts(self) -> None: self.assertEqual( pod.spec.containers[0].volume_mounts, [ + V1VolumeMount( + name="dshm", + mount_path="/dev/shm", + ), V1VolumeMount( name="mount-0", mount_path="/dst", read_only=True, - ) + ), ], )