Skip to content

Commit dd3a61d

Browse files
authored
Merge pull request #211 from pjh5/libtorch_timeout
Extending timeout for libtorch job
2 parents 46b7382 + 3653606 commit dd3a61d

File tree

2 files changed

+37
-11
lines changed

2 files changed

+37
-11
lines changed

cron/build_multiple.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,13 @@ for config in "${all_configs[@]}"; do
111111
build_script="${NIGHTLIES_BUILDER_ROOT}/cron/build_docker.sh"
112112
fi
113113

114+
# Swap timeout out for libtorch
115+
if [[ "$package_type" == libtorch ]]; then
116+
_timeout="$PYTORCH_NIGHTLIES_LIBTORCH_TIMEOUT"
117+
else
118+
_timeout="$PYTORCH_NIGHTLIES_TIMEOUT"
119+
fi
120+
114121
set +x
115122
echo
116123
echo "##############################"
@@ -124,7 +131,7 @@ for config in "${all_configs[@]}"; do
124131
DESIRED_PYTHON="$py_ver" \
125132
DESIRED_CUDA="$cuda_ver" \
126133
ON_SUCCESS_WRITE_ME="$succeeded_log_loc" \
127-
$PORTABLE_TIMEOUT "$PYTORCH_NIGHTLIES_TIMEOUT" \
134+
$PORTABLE_TIMEOUT "$_timeout" \
128135
"$build_script" > "$log_name" 2>&1
129136
ret="$?"
130137
duration="$SECONDS"

cron/nightly_defaults.sh

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ mkdir -p "$today" || true
6161

6262
# List of people to email when things go wrong. This is passed directly to
6363
# `mail -t`
64-
export NIGHTLIES_EMAIL_LIST='[email protected]'
64+
if [[ -z "$NIGHTLIES_EMAIL_LIST" ]]; then
65+
export NIGHTLIES_EMAIL_LIST='[email protected]'
66+
fi
6567

6668
# PYTORCH_CREDENTIALS_FILE
6769
# A bash file that exports credentials needed to upload to aws and anaconda.
@@ -79,15 +81,19 @@ fi
7981
# Location of the temporary miniconda that is downloaded to install conda-build
8082
# and aws to upload finished packages TODO this is messy to install this in
8183
# upload.sh and later use it in upload_logs.sh
82-
CONDA_UPLOADER_INSTALLATION="${today}/miniconda"
84+
if [[ -z "$CONDA_UPLOADER_INSTALLATION" ]]; then
85+
export CONDA_UPLOADER_INSTALLATION="${today}/miniconda"
86+
fi
8387

8488
# N.B. BUILDER_REPO and BUILDER_BRANCH are both set in cron_start.sh, as that
8589
# is the script that actually clones the builder repo that /this/ script is
8690
# running from.
8791
export NIGHTLIES_BUILDER_ROOT="$(cd $(dirname $0)/.. && pwd)"
8892

8993
# The shared pytorch repo to be used by all builds
90-
export NIGHTLIES_PYTORCH_ROOT="${today}/pytorch"
94+
if [[ -z "$NIGHTLIES_PYTORCH_ROOT" ]]; then
95+
export NIGHTLIES_PYTORCH_ROOT="${today}/pytorch"
96+
fi
9197

9298
# PYTORCH_REPO
9399
# The Github org/user whose fork of Pytorch to check out (git clone
@@ -190,15 +196,23 @@ nightlies_package_folder () {
190196
# should be empty. Logs are written out to RUNNING_LOG_DIR. When a build
191197
# fails, it's log should be moved to FAILED_LOG_DIR, and similarily for
192198
# succeeded builds.
193-
export RUNNING_LOG_DIR="${today}/logs"
194-
export FAILED_LOG_DIR="${today}/logs/failed"
195-
export SUCCEEDED_LOG_DIR="${today}/logs/succeeded"
199+
if [[ -z "$RUNNING_LOG_DIR" ]]; then
200+
export RUNNING_LOG_DIR="${today}/logs"
201+
fi
202+
if [[ -z "$FAILED_LOG_DIR" ]]; then
203+
export FAILED_LOG_DIR="${today}/logs/failed"
204+
fi
205+
if [[ -z "$SUCCEEDED_LOG_DIR" ]]; then
206+
export SUCCEEDED_LOG_DIR="${today}/logs/succeeded"
207+
fi
196208

197209
# Log s3 directory, must not end in a /
198-
if [[ "$(uname)" == 'Darwin' ]]; then
199-
export LOGS_S3_DIR="nightly_logs/macos/$NIGHTLIES_DATE"
200-
else
201-
export LOGS_S3_DIR="nightly_logs/linux/$NIGHTLIES_DATE"
210+
if [[ -z "$LOGS_S3_DIR" ]]; then
211+
if [[ "$(uname)" == 'Darwin' ]]; then
212+
export LOGS_S3_DIR="nightly_logs/macos/$NIGHTLIES_DATE"
213+
else
214+
export LOGS_S3_DIR="nightly_logs/linux/$NIGHTLIES_DATE"
215+
fi
202216
fi
203217
# The location of the binary_sizes dir in s3 is hardcoded into
204218
# upload_binary_sizes.sh
@@ -236,6 +250,11 @@ if [[ -z "$PYTORCH_NIGHTLIES_TIMEOUT" ]]; then
236250
export PYTORCH_NIGHTLIES_TIMEOUT=4800
237251
fi
238252
fi
253+
if [[ -z "$PYTORCH_NIGHTLIES_LIBTORCH_TIMEOUT" ]]; then
254+
# The libtorch job actually runs for several cpu/cuda versions in sequence
255+
# and so takes a long time
256+
export PYTORCH_NIGHTLIES_LIBTORCH_TIMEOUT=10800
257+
fi
239258

240259
# PORTABLE_TIMEOUT
241260
# Command/executable of some timeout command. Defined here because the path

0 commit comments

Comments
 (0)