@@ -99,6 +99,8 @@ MODEL_CACHE_DIR="/scratch.trt_llm_data/llm-models"
9999ENABLE_NGC_DEVEL_IMAGE_TEST = params. enableNgcDevelImageTest ?: false
100100ENABLE_NGC_RELEASE_IMAGE_TEST = params. enableNgcReleaseImageTest ?: false
101101
102+ COMMON_SSH_OPTIONS = " -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
103+
102104def uploadResults (def pipeline , SlurmCluster cluster , String nodeName , String stageName ){
103105 withCredentials([usernamePassword(credentialsId : ' svc_tensorrt' , usernameVariable : ' USERNAME' , passwordVariable : ' PASSWORD' )]) {
104106 def remote = [
@@ -113,7 +115,7 @@ def uploadResults(def pipeline, SlurmCluster cluster, String nodeName, String st
113115 pipeline. stage(' Submit Test Results' ) {
114116 sh " mkdir -p ${ stageName} "
115117 def resultsFilePath = " /home/svc_tensorrt/bloom/scripts/${ nodeName} /results/results.xml"
116- def downloadResultCmd = " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ remote.user} @${ remote.host} :${ resultsFilePath} ${ stageName} /"
118+ def downloadResultCmd = " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ remote.user} @${ remote.host} :${ resultsFilePath} ${ stageName} /"
117119 def downloadSucceed = sh(script : downloadResultCmd, returnStatus : true ) == 0
118120 if (downloadSucceed) {
119121 sh " ls ${ stageName} "
@@ -239,7 +241,7 @@ def runLLMTestlistOnSlurm(pipeline, platform, testList, config=VANILLA_CONFIG, p
239241
240242 Utils . exec(pipeline, script : " chmod +x ${ jenkinsSetupPath} " , returnStdout : true )
241243
242- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ jenkinsSetupPath} ${ remote.user} @${ remote.host} :~/bloom/scripts/${ nodeName} -slurm_jenkins_agent_setup.sh" ,)
244+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ jenkinsSetupPath} ${ remote.user} @${ remote.host} :~/bloom/scripts/${ nodeName} -slurm_jenkins_agent_setup.sh" ,)
243245
244246 Utils . exec(
245247 pipeline,
@@ -327,7 +329,7 @@ def runLLMTestlistOnSlurm_MultiNodes(pipeline, platform, testList, config=VANILL
327329
328330 stage(' Prepare Testing' ) {
329331 // Create Job Workspace folder in Frontend Node
330- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' ssh -oStrictHostKeyChecking=no ${ remote.user} @${ remote.host} 'mkdir ${ jobWorkspace} '" ,)
332+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' ssh ${ COMMON_SSH_OPTIONS } ${ remote.user} @${ remote.host} 'mkdir -p ${ jobWorkspace} '" ,)
331333
332334 // Download and Unzip Tar File
333335 trtllm_utils. llmExecStepWithRetry(pipeline, script : " cd ${ llmPath} && wget -nv ${ llmTarfile} " )
@@ -336,11 +338,11 @@ def runLLMTestlistOnSlurm_MultiNodes(pipeline, platform, testList, config=VANILL
336338 // Upload slurm_run_sh to Frontend node
337339 def scriptRunLocalPath = " ${ llmSrcLocal} /jenkins/scripts/slurm_run.sh"
338340 Utils . exec(pipeline, script : " chmod +x ${ scriptRunLocalPath} " , returnStdout : true )
339- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ scriptRunLocalPath} ${ remote.user} @${ remote.host} :${ scriptRunNode} " ,)
341+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ scriptRunLocalPath} ${ remote.user} @${ remote.host} :${ scriptRunNode} " ,)
340342
341343 // Upload waives.txt to Frontend node
342344 def waivesListLocalPath = " ${ llmSrcLocal} /tests/integration/test_lists/waives.txt"
343- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ waivesListLocalPath} ${ remote.user} @${ remote.host} :${ waivesListPathNode} " ,)
345+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ waivesListLocalPath} ${ remote.user} @${ remote.host} :${ waivesListPathNode} " ,)
344346
345347 // Generate Test List and Upload to Frontend Node
346348 def makoArgs = getMakoArgsFromStageName(stageName, true )
@@ -349,7 +351,7 @@ def runLLMTestlistOnSlurm_MultiNodes(pipeline, platform, testList, config=VANILL
349351 // if the line cannot be split by "=", just ignore that line.
350352 def makoOptsJson = transformMakoArgsToJson([" Mako options:" ] + makoArgs)
351353 def testListPath = renderTestDB(testList, llmSrcLocal, stageName, makoOptsJson)
352- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ testListPath} ${ remote.user} @${ remote.host} :${ testListPathNode} " ,)
354+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ testListPath} ${ remote.user} @${ remote.host} :${ testListPathNode} " ,)
353355
354356 // Generate Multi Node Job Launch Script
355357 def container = LLM_DOCKER_IMAGE . replace(" urm.nvidia.com/" , " urm.nvidia.com#" )
@@ -393,7 +395,7 @@ def runLLMTestlistOnSlurm_MultiNodes(pipeline, platform, testList, config=VANILL
393395 """ . stripIndent()
394396 pipeline. writeFile(file : scriptLaunchDestPath, text : scriptContent)
395397 Utils . exec(pipeline, script : " chmod +x ${ scriptLaunchDestPath} " , returnStdout : true )
396- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ scriptLaunchDestPath} ${ remote.user} @${ remote.host} :${ scriptLaunch} " ,)
398+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ scriptLaunchDestPath} ${ remote.user} @${ remote.host} :${ scriptLaunch} " ,)
397399 }
398400 stage(' Run Test' ) {
399401 def scriptLaunch = " ${ jobWorkspace} /slurm_launch.sh"
@@ -1089,7 +1091,7 @@ def getSSHConnectionPorts(portConfigFile, stageName)
10891091 usernamePassword(credentialsId : ' tensorrt_llm_infra_debug_vm_01_credentials' , usernameVariable : ' USERNAME' , passwordVariable : ' PASSWORD' ),
10901092 string(credentialsId : ' DEBUG_HOST_NAME' , variable : ' HOST_NAME' )
10911093 ]) {
1092- portUsage = sh(script : " ssh -v ${ USERNAME} @${ HOST_NAME} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null 'netstat -tuln'" ,returnStdout : true )
1094+ portUsage = sh(script : " ssh -v ${ USERNAME} @${ HOST_NAME} ${ COMMON_SSH_OPTIONS } 'netstat -tuln'" , returnStdout : true )
10931095 }
10941096 echo " Port Usage: ${ portUsage} "
10951097
@@ -1248,7 +1250,7 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO
12481250 def llmRootConfig = " ${ LLM_ROOT}${ config} "
12491251 sh " mkdir ${ llmRootConfig} "
12501252
1251- def llmPath = sh (script : " realpath ${ llmRootConfig} " ,returnStdout : true ). trim()
1253+ def llmPath = sh (script : " realpath ${ llmRootConfig} " , returnStdout : true ). trim()
12521254 def llmSrc = " ${ llmPath} /TensorRT-LLM/src"
12531255 echoNodeAndGpuInfo(pipeline, stageName)
12541256
@@ -1362,9 +1364,9 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO
13621364 usernamePassword(credentialsId : ' tensorrt_llm_infra_debug_vm_01_credentials' , usernameVariable : ' USERNAME' , passwordVariable : ' PASSWORD' ),
13631365 string(credentialsId : ' DEBUG_HOST_NAME' , variable : ' HOST_NAME' )
13641366 ]) {
1365- sh " sshpass -p ${ PASSWORD} -v ssh ${ USERNAME} @${ HOST_NAME} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null 'cat >> ~/.ssh/authorized_keys' < ~/.ssh/id_rsa.pub"
1366- sh " ssh -v ${ USERNAME} @${ HOST_NAME} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null 'echo \"\" > ~/.ssh/known_hosts && cat ~/.ssh/id_rsa.pub' >> ~/.ssh/authorized_keys"
1367- sh " ssh -v ${ USERNAME} @${ HOST_NAME} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null 'cat ~/.ssh/ports_config.txt' >> ${ portConfigFilePath} "
1367+ sh " sshpass -p ${ PASSWORD} -v ssh ${ USERNAME} @${ HOST_NAME} ${ COMMON_SSH_OPTIONS } 'cat >> ~/.ssh/authorized_keys' < ~/.ssh/id_rsa.pub"
1368+ sh " ssh -v ${ USERNAME} @${ HOST_NAME} ${ COMMON_SSH_OPTIONS } 'echo \"\" > ~/.ssh/known_hosts && cat ~/.ssh/id_rsa.pub' >> ~/.ssh/authorized_keys"
1369+ sh " ssh -v ${ USERNAME} @${ HOST_NAME} ${ COMMON_SSH_OPTIONS } 'cat ~/.ssh/ports_config.txt' >> ${ portConfigFilePath} "
13681370
13691371 def (int userPort, int monitorPort) = getSSHConnectionPorts(portConfigFilePath, stageName)
13701372 if (userPort == 0 ) {
@@ -1373,7 +1375,7 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO
13731375 return
13741376 }
13751377
1376- sh " ssh -f -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -L 1111:127.0.0.1:${ monitorPort} -R ${ monitorPort} :127.0.0.1:1112 -NR ${ userPort} :localhost:22 ${ USERNAME} @${ HOST_NAME} "
1378+ sh " ssh -f ${ COMMON_SSH_OPTIONS } -L 1111:127.0.0.1:${ monitorPort} -R ${ monitorPort} :127.0.0.1:1112 -NR ${ userPort} :localhost:22 ${ USERNAME} @${ HOST_NAME} "
13771379 sh " autossh -fNR ${ userPort} :localhost:22 ${ USERNAME} @${ HOST_NAME} "
13781380 sh " ps aux | grep ssh"
13791381 try {
0 commit comments