Skip to content

Commit 8e4192e

Browse files
committed
Version 1.4.0
1 parent 4a2cab6 commit 8e4192e

File tree

16 files changed

+296
-28
lines changed

16 files changed

+296
-28
lines changed

chart/mjs/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ apiVersion: v2
33
name: mjs
44
description: A Helm chart for MATLAB (R) Job Scheduler in Kubernetes
55
type: application
6-
version: 1.3.0
7-
appVersion: 1.3.0
6+
version: 1.4.0
7+
appVersion: 1.4.0

chart/mjs/files/workergroup.config

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import com.mathworks.toolbox.distcomp.mjs.service.ConfigUtil;
2+
import com.mathworks.toolbox.distcomp.control.PortConfig;
3+
import com.mathworks.util.PlatformInfo;
4+
import com.mathworks.toolbox.parallel.pctutil.logging.DistcompFileHandler;
5+
import com.mathworks.toolbox.parallel.pctutil.logging.DistcompSimpleFormatter;
6+
import com.mathworks.toolbox.parallel.pctutil.logging.DistcompLevel;
7+
8+
/* Copyright 2018-2024 The MathWorks, Inc. */
9+
10+
// IMPORTANT!!!: Please note that the system properties referenced here actually get set
11+
// in start-workergroup.config file, are read by the service constructor on STDIN and then are
12+
// set using the java.lang.System.setProperty method before the service makes use of the
13+
// config file. This is because MATLAB does not accept -D arguments. In order to see a
14+
// property here, you need to add them to the String[] that is passed as the 6th argument
15+
// to the SharedActivatableServiceDescriptor in start-workergroup.config
16+
17+
com.mathworks.toolbox.distcomp.workergroup {
18+
private static configDir = "${com.mathworks.toolbox.distcomp.toolboxroot}${/}config${/}";
19+
20+
persistenceDirectory = "${com.mathworks.toolbox.distcomp.persistenceDir}";
21+
logDirectory = "${com.mathworks.toolbox.distcomp.logdir}";
22+
serviceName = "${com.mathworks.toolbox.distcomp.servicename}";
23+
24+
lookupServiceQueryIntervalSecs = 30;
25+
maxWaitBeforeShutdownSecs = 10;
26+
27+
codebase = "";
28+
policy = ConfigUtil.concat(new String[]{configDir,"jsk-all.policy"});
29+
minServiceExportPort = PortConfig.getMinDistcompServiceExportPort("${com.mathworks.toolbox.distcomp.base_port}");
30+
31+
// Each worker needs 2 ports, so need a large port range to accommodate
32+
// machines running many workers. This range will allow approximately 2000
33+
// workers.
34+
maxNumPorts = 4000;
35+
36+
defaultRmiClientConnectTimeoutSecs = 5;
37+
38+
logLevel = "${com.mathworks.toolbox.distcomp.loglevel}";
39+
40+
onDemand = "${com.mathworks.toolbox.distcomp.worker.onDemand}";
41+
idleKillTimeoutInSeconds = 5L;
42+
idleLicenseTimeoutInSeconds = 0L; // Immediately return licenses every time
43+
44+
// Messages logged via LOGGER.log() in the Worker JVM will end up in this file.
45+
serviceLogFilePattern = ConfigUtil.concat(new String[]{
46+
"${com.mathworks.toolbox.distcomp.logdir}${/}",
47+
"workergroup_",
48+
serviceName,
49+
".%u.%g", //unique number and sequence number
50+
".log"});
51+
serviceHandler = new DistcompFileHandler(serviceLogFilePattern,
52+
true,
53+
DistcompLevel.getLevelFromValue(Integer.parseInt(logLevel)),
54+
10,
55+
100000000,
56+
new DistcompSimpleFormatter());
57+
58+
securityLevel = "${com.mathworks.toolbox.distcomp.securityLevel}";
59+
securityDir = "${com.mathworks.toolbox.distcomp.securityDir}";
60+
61+
useSecureCommunication = Boolean.parseBoolean("${com.mathworks.toolbox.distcomp.rmi.useSecureCommunication}");
62+
63+
requireWebLicensing = Boolean.parseBoolean("${com.mathworks.toolbox.distcomp.requireWebLicensing}");
64+
65+
requireClientCertificate = Boolean.parseBoolean("${com.mathworks.toolbox.distcomp.rmi.requireClientCertificate}");
66+
67+
taskEvaluator = "com.mathworks.toolbox.distcomp.mjs.worker.matlab.VersionSwitchingTaskEvaluator";
68+
69+
matlabroot = "${com.mathworks.toolbox.distcomp.matlabroot}";
70+
71+
// These properties control how the Worker starts MATLAB.
72+
matlabExecutable = "${com.mathworks.toolbox.distcomp.matlabexecutable}";
73+
74+
// Use -noFigureWindows on Windows and -nodisplay everywhere else
75+
private static nodisplay = ConfigUtil.ifThenElse(
76+
/*if*/ PlatformInfo.isWindows(),
77+
/*then*/ "-noFigureWindows",
78+
/*else*/ "-nodisplay"
79+
);
80+
81+
82+
// Can add other command line arguments: e.g -timing or -jdb
83+
// In addition to these arguments, the PID of the Java worker service is automatically
84+
// added as an argument to the initworker.m script
85+
matlabArguments = new String[]{"-parallelserver", nodisplay, "-r", "initworker"};
86+
// Command line arguments for worker MATLABs from 18b and earlier
87+
matlabArgumentsDmlWorker = new String[]{"-dmlworker", nodisplay, "-r", "initworker"};
88+
// Command line arguments for worker MATLABs launched with MVM from 19b
89+
matlabArgumentsMvm = new String[]{"-parallelserver", nodisplay};
90+
91+
// Time to wait between polls on the MVM for whether it is still running
92+
mvmPollIntervalMillis = 1000L;
93+
94+
// Can be used to instrument the workers for debugging, with e.g. strace or gdb.
95+
// Will be placed in front of the MATLAB executable on the command line
96+
workerInstrumentation = new String[]{};
97+
98+
// Whether to launch each worker with "mpiexec -n 1 -laucher fork"
99+
// This is required for mpich3, but should be removed for other mpi
100+
// implementations
101+
shouldLaunchWithMpiexec = false;
102+
103+
// MATLAB stdout and stderr streams are written to this file.
104+
matlabOutputMaxTotalSize = 1000000000; // 1GB
105+
matlabOutputMaxNumFiles = 10;
106+
107+
matlabEnvironment = new String[]{
108+
"HOSTNAME", "${com.mathworks.toolbox.distcomp.hostname}",
109+
"BASE_PORT", "${com.mathworks.toolbox.distcomp.base_port}",
110+
"USE_SERVER_SPECIFIED_HOSTNAME", "${com.mathworks.toolbox.distcomp.rmi.useServerSpecifiedHostname}",
111+
"MDCS_PEERSESSION_KEEP_ALIVE_PERIOD", "${com.mathworks.toolbox.distcomp.pmode.keepAlivePeriod}",
112+
"MDCS_PEERSESSION_KEEP_ALIVE_TIME_UNIT", "${com.mathworks.toolbox.distcomp.pmode.keepAliveTimeUnit}",
113+
"MDCS_MAX_NUM_PORTS", Integer.toString(maxNumPorts),
114+
"MDCS_MATLAB_DRIVE_ENABLED_ON_WORKER", "${com.mathworks.toolbox.distcomp.matlabDriveEnabledOnWorker}",
115+
"MJS_IDLE_LICENSE_TIMEOUT_SECS", Long.toString(idleLicenseTimeoutInSeconds),
116+
"MW_MATLAB_DRIVE_FOLDER_LOCATION_CFG", "${com.mathworks.toolbox.distcomp.matlabDriveFolderLocationCfg}"};
117+
118+
// Other useful variables to set for debugging purposes are:
119+
//"PCTIPC_VERBOSE", "DEBUG4",
120+
//"PCTIPC_LOGFILE", ConfigUtil.concat("/tmp/pctipc_", serviceName, ".log")
121+
122+
// Set by mjs_def "WORKER_START_TIMEOUT" property
123+
matlabStartupTimeoutSecs = Long.parseLong("${com.mathworks.toolbox.distcomp.workerstarttimeout}");
124+
125+
// Maximum time to wait for a clean MATLAB shutdown before the process is hard-killed
126+
matlabShutdownTimeoutSecs = 60;
127+
128+
windowsDomain = "${com.mathworks.toolbox.distcomp.worker.windowsDomain}";
129+
130+
// By default do not use an activatable exporter.
131+
useActivatableExporter = false;
132+
}
133+

chart/mjs/templates/_derived.tpl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,9 @@
2424
# If we are using a secure LDAP server and not using a persistent volume claim for the job manager pod, we need to add the LDAP certificate to the job manager's secret store
2525
{{- define "derived.addLDAPCert" -}}
2626
{{ and (hasPrefix "ldaps://" .Values.ldapURL) (or (empty .Values.matlabPVC) (not .Values.jobManagerUsesPVC)) }}
27+
{{- end -}}
28+
29+
# Whether to override the workergroup config file
30+
{{- define "derived.overrideWorkergroupConfig" -}}
31+
{{ and (eq .Values.matlabRelease "r2024b") (not (empty .Values.networkLicenseManager)) }}
2732
{{- end -}}

chart/mjs/templates/controller-configmap.yaml

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,7 @@ data:
3434
"DeploymentName": {{ include "resources.controller" . | quote}},
3535
"EnableServiceLinks": {{ include "derived.enableServiceLinks" . }},
3636
"OpenMetricsPortOutsideKubernetes": {{ .Values.openMetricsPortOutsideKubernetes }},
37-
"ExtraWorkerEnvironment": {
38-
{{- $comma := "" }}
39-
{{- range $key, $value := .Values.extraWorkerEnv }}
40-
{{ $comma }}{{ $key | quote }}: {{ $value | quote }}
41-
{{- $comma = "," }}
42-
{{- end }}
43-
},
37+
"ExtraWorkerEnvironment": {{ toJson (.Values.extraWorkerEnv | default dict) }},
4438
"IdleStop": {{ .Values.idleStop }},
4539
"InternalClientsOnly": {{ .Values.internalClientsOnly }},
4640
"JobManagerImage": {{ printf "%s:%s" $jobManagerImage $jobManagerImageTag | quote }},
@@ -51,9 +45,11 @@ data:
5145
"JobManagerMemoryLimit": {{ .Values.jobManagerMemoryLimit | quote }},
5246
"JobManagerMemoryRequest": {{ .Values.jobManagerMemoryRequest | quote }},
5347
"JobManagerGroupID": {{ .Values.jobManagerGroupID }},
48+
"JobManagerNodeSelector": {{ toJson .Values.jobManagerNodeSelector }},
5449
"JobManagerUserID": {{ .Values.jobManagerUserID }},
50+
"JobManagerUsesPVC": {{ .Values.jobManagerUsesPVC }},
5551
"JobManagerUID": {{ uuidv4 | quote }},
56-
{{ if eq (include "derived.addLDAPCert" .) "true" -}}
52+
{{- if eq (include "derived.addLDAPCert" .) "true" -}}
5753
"LDAPCertPath": {{ include "paths.ldapCert" . | quote }},
5854
{{- end }}
5955
"LivenessProbeFailureThreshold": {{ .Values.livenessProbeFailureThreshold | default 3 }},
@@ -72,7 +68,9 @@ data:
7268
"MJSDefDir" : {{ include "paths.configDir" . | quote }},
7369
"Namespace": {{ .Release.Namespace | quote }},
7470
"NetworkLicenseManager": {{ .Values.networkLicenseManager | quote }},
71+
"OverrideWorkergroupConfig": {{ include "derived.overrideWorkergroupConfig" . }},
7572
"Period": {{ .Values.autoScalingPeriod }},
73+
"PreserveSecrets": {{ .Values.preserveSecrets | default false }},
7674
"PortsPerWorker": {{ .Values.portsPerWorker | default 2 }},
7775
"PoolProxyBasePort": {{ .Values.poolProxyBasePort }},
7876
"PoolProxyCPULimit": {{ .Values.poolProxyCPULimit | quote }},
@@ -98,6 +96,7 @@ data:
9896
"WorkerMemoryLimit": {{ .Values.workerMemoryLimit | quote }},
9997
"WorkerMemoryRequest": {{ .Values.workerMemoryRequest | quote }},
10098
"WorkerLogPVC": {{ .Values.workerLogPVC | quote }},
99+
"WorkerNodeSelector": {{ toJson .Values.workerNodeSelector }},
101100
"WorkerPassword": {{ .Values.workerPassword | quote }},
102101
"WorkersPerPoolProxy": {{ .Values.workersPerPoolProxy }},
103102
"WorkerUsername": {{ .Values.workerUsername | quote }},

chart/mjs/templates/controller-deployment.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ spec:
2222
# If set to false, disable creation of environment variables for services
2323
enableServiceLinks: {{ include "derived.enableServiceLinks" . }}
2424

25+
# Schedule on same nodes as the job manager
26+
nodeSelector: {{ toJson .Values.jobManagerNodeSelector }}
27+
2528
containers:
2629
- name: {{ $name }}
2730
image: {{ printf "%s:%s" .Values.controllerImage $controllerImageTag }}

chart/mjs/templates/ingress-proxy-deployment.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ spec:
2020
# If set to false, disable creation of environment variables for services
2121
enableServiceLinks: {{ include "derived.enableServiceLinks" . }}
2222

23+
# Schedule on same nodes as the job manager
24+
nodeSelector: {{ toJson .Values.jobManagerNodeSelector }}
25+
2326
containers:
2427
- name: haproxy
2528
image: {{ $.Values.haproxyImage }}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{{- if eq (include "derived.overrideWorkergroupConfig" .) "true" -}}
2+
# Config files for the MJS workers.
3+
# Copyright 2024 The MathWorks, Inc.
4+
apiVersion: v1
5+
kind: ConfigMap
6+
metadata:
7+
name: mjs-worker-config
8+
data:
9+
workergroup.config: |
10+
{{- .Files.Get "files/workergroup.config" | nindent 4 }}
11+
{{- end -}}

chart/mjs/values.schema.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,12 @@
105105
"jobManagerName": {
106106
"type": "string"
107107
},
108+
"jobManagerNodeSelector": {
109+
"type": "object",
110+
"additionalProperties": {
111+
"type": "string"
112+
}
113+
},
108114
"jobManagerUserID": {
109115
"type": "integer",
110116
"minimum": 0
@@ -181,6 +187,9 @@
181187
"openMetricsPortOutsideKubernetes": {
182188
"type": "boolean"
183189
},
190+
"preserveSecrets": {
191+
"type": "boolean"
192+
},
184193
"poolProxyBasePort": {
185194
"type": "integer",
186195
"minimum": 1024,
@@ -286,6 +295,12 @@
286295
{ "type": "number" }
287296
]
288297
},
298+
"workerNodeSelector": {
299+
"type": "object",
300+
"additionalProperties": {
301+
"type": "string"
302+
}
303+
},
289304
"workerPassword": {
290305
"type": "string"
291306
},

chart/mjs/values.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ poolProxyCPURequest: "0.5" # CPU request for each parallel pool proxy process
6161
poolProxyMemoryLimit: "" # Memory limit for each parallel pool proxy process
6262
poolProxyMemoryRequest: "500Mi" # Memory request for each parallel pool proxy process
6363

64+
# Node settings
65+
jobManagerNodeSelector: {} # Node selector for the job manager, specified as key-value pairs
66+
workerNodeSelector: {} # Node selector for the workers, specified as key-value pairs
67+
6468
# Auto-scaling settings
6569
idleStop: 300 # Time after which idle worker pods will be removed
6670
autoScalingPeriod: 15 # Period with which the controller checks the cluster's size requirements

helm_values.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ If you do not include a parameter in your YAML file, your configuration uses the
3030
`jobManagerMemoryLimit` | Memory limit for the job manager pod. | —
3131
`jobManagerMemoryRequest` | Memory request for the job manager pod. | `4Gi`
3232
`jobManagerName` | Name of the MATLAB Job Scheduler job manager. | `MJS_Kubernetes`
33+
`jobManagerNodeSelector` | Node selector for the job manager pod, specified as key-value pairs that match the labels of the Kubernetes nodes you want to run the job manager on. For example, to run the job manager on nodes with label `node-type=jobmanager`, set this parameter to `{"node-type":"jobmanager"}`. You must assign the appropriate labels to your nodes before you can use the `nodeSelector` feature. For more information, see [Assigning Pods to Nodes](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) on the Kubernetes website. | `{}`
3334
`jobManagerUserID` | User ID of the user account that MATLAB Job Scheduler uses to run the job manager pod. The user must have write permission for the checkpoint and log PersistentVolumes. To find the user ID, on a Linux machine, run `id -u` in the terminal. | `0`
3435
`jobManagerUsesPVC` | Flag to mount a MATLAB Parallel Server installation from a PersistentVolume onto the job manager pod if the `matlabPVC` parameter is set. If this flag is set to true, the job manager pod uses the image specified in the `matlabDepsImage` parameter. | `false`
3536
`ldapSecurityPrincipalFormat` | Format of a security principal (user) for your LDAP server. | —
@@ -68,6 +69,7 @@ If you do not include a parameter in your YAML file, your configuration uses the
6869
`workerLogPVC` | Name of the PersistentVolumeClaim that is bound to the PersistentVolume used to retain worker logs. | —
6970
`workerMemoryLimit` | Memory limit for each worker pod. | `8Gi`
7071
`workerMemoryRequest` | Memory request for each worker pod. | `8Gi`
72+
`workerNodeSelector` | Node selector for the worker pods, specified as key-value pairs that match the labels of the Kubernetes nodes you want to run the workers on. For example, to run the workers on nodes with label `node-type=worker`, set this parameter to `{"node-type":"worker"}`. You must assign the appropriate labels to your nodes before you can use the `nodeSelector` feature. For more information, see [Assigning Pods to Nodes](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) on the Kubernetes website. | `{}`
7173
`workerPassword` | Password of the username that MATLAB Parallel Server uses to run jobs. | `matlab`
7274
`workerUsername` | Username that MATLAB Parallel Server uses to run jobs. | `matlab`
7375
`workersPerPoolProxy` | Maximum number of workers using each parallel pool proxy. | `32`

0 commit comments

Comments
 (0)