Skip to content

Commit 71b3c4d

Browse files
committed
ci: test gpu on self-hosted runners
1 parent 0e2b1cc commit 71b3c4d

File tree

1 file changed

+73
-42
lines changed

1 file changed

+73
-42
lines changed

.github/workflows/ci.yml

Lines changed: 73 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
name: CI
22

3-
on: [pull_request, push]
4-
#on: [push]
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- master
8+
- self-hosted
59

610
# Cancel a job if there's a new on on the same branch started.
711
# Based on https://stackoverflow.com/questions/58895283/stop-already-running-workflow-job-in-github-actions/67223051#67223051
@@ -23,7 +27,9 @@ jobs:
2327
steps:
2428
- uses: actions/checkout@v4
2529
- name: Install required packages
26-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
30+
run: |
31+
sudo apt-get update
32+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
2733
- name: Install cargo clippy
2834
run: rustup component add clippy
2935
- name: Run cargo clippy
@@ -40,42 +46,47 @@ jobs:
4046
run: cargo fmt --all -- --check
4147

4248
test_release:
43-
runs-on: ubuntu-24.04
49+
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge']
4450
name: Test in release mode
4551
strategy:
4652
matrix:
4753
cargo-args: ['', '--features fixed-rows-to-discard']
54+
fail-fast: false
4855
env:
4956
# Run all tests with multicore-SDR enabled.
5057
FIL_PROOFS_USE_MULTICORE_SDR: true
5158
steps:
5259
- uses: actions/checkout@v4
5360
- name: Install required packages
54-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
61+
run: |
62+
sudo apt-get update
63+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
5564
5665
- name: Download the proof params
5766
uses: ./.github/actions/proof-params-download
5867
with:
5968
github-token: ${{ secrets.GITHUB_TOKEN }}
6069

70+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
71+
with:
72+
toolchain: 1.83
73+
6174
- name: Run usual tests in release profile
6275
run: cargo test --verbose --release --workspace --all-targets ${{ matrix.cargo-args }} -- --nocapture
6376
- name: Run isolated PoRep tests in release profile
64-
# Getting the cores does not work on GitHub Actions, hence skip that
65-
# specific test.
66-
run: cargo test --release -p storage-proofs-porep --features isolated-testing ${{ matrix.cargo-args }} -- --nocapture --skip stacked::vanilla::cores::tests::test_checkout_cores
77+
run: cargo test --release -p storage-proofs-porep --features isolated-testing ${{ matrix.cargo-args }} -- --nocapture
6778
- name: Run isolated update tests in release profile
68-
# Some `storage-proofs-update` tests need to run sequentially due to
69-
# their high memory usage.
70-
run: cargo test --release -p storage-proofs-update --features isolated-testing ${{ matrix.cargo-args }} -- --nocapture --test-threads=1
79+
run: cargo test --release -p storage-proofs-update --features isolated-testing ${{ matrix.cargo-args }} -- --nocapture
7180

7281
test_ignored_release:
7382
runs-on: ubuntu-24.04
7483
name: Test ignored in release mode
7584
steps:
7685
- uses: actions/checkout@v4
7786
- name: Install required packages
78-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
87+
run: |
88+
sudo apt-get update
89+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
7990
8091
- name: Download the proof params
8192
uses: ./.github/actions/proof-params-download
@@ -86,60 +97,80 @@ jobs:
8697
run: cargo test --release --workspace -- ignored --nocapture
8798

8899
test_no_default_features:
89-
runs-on: ubuntu-24.04
100+
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge']
90101
name: Test without default features
91102
steps:
92103
- uses: actions/checkout@v4
93104
- name: Install required packages
94-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
105+
run: |
106+
sudo apt-get update
107+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
95108
96109
- name: Download the proof params
97110
uses: ./.github/actions/proof-params-download
98111
with:
99112
github-token: ${{ secrets.GITHUB_TOKEN }}
100113

114+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
115+
with:
116+
toolchain: 1.83
117+
101118
- name: Test ignored in release profile
102119
run: cargo test --release --workspace --no-default-features
103120

104121
build_gpu:
105-
runs-on: ubuntu-24.04
122+
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge']
106123
name: Build with various GPU support enabled
107124
steps:
108125
- uses: actions/checkout@v4
109126
- name: Install required packages
110-
run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
127+
run: |
128+
sudo apt-get update
129+
sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
130+
131+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
132+
with:
133+
toolchain: 1.83
111134

112135
- name: Build with `cuda` and `opencl` features enabled
113136
run: cargo build --workspace --features cuda,opencl
114137
- name: Build with `cuda-supraseal` feature enabled
115138
run: CC=gcc-12 CXX=g++-12 NVCC_PREPEND_FLAGS='-ccbin /usr/bin/g++-12' cargo build --workspace --no-default-features --features cuda-supraseal
116139

117-
# Commented out until we run it on hardware with actual GPUs.
118-
#test_gpu:
119-
# runs-on: ubuntu-24.04
120-
# name: Test on GPUs
121-
# strategy:
122-
# matrix:
123-
# test-args: ['', '--ignored']
124-
# env:
125-
# FIL_PROOFS_USE_GPU_COLUMN_BUILDER: true
126-
# FIL_PROOFS_USE_GPU_TREE_BUILDER: true
127-
# BELLMAN_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
128-
# NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
129-
# steps:
130-
# - uses: actions/checkout@v4
131-
# - name: Install required packages
132-
# run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
133-
#
134-
# - name: Download the proof params
135-
# uses: ./.github/actions/proof-params-download
136-
# with:
137-
# github-token: ${{ secrets.GITHUB_TOKEN }}
138-
#
139-
# - name: Test with CUDA
140-
# run: cargo test --verbose --release --workspace --features cuda -- --nocapture ${{ matrix.test-args }}
141-
# - name: Test with `cuda-supraseal`
142-
# run: CC=gcc-12 CXX=g++-12 NVCC_PREPEND_FLAGS='-ccbin /usr/bin/g++-12' cargo test -p filecoin-proofs --release --no-default-features --features cuda-supraseal -- --nocapture --test-threads=1 ${{ matrix.test-args }}
140+
test_gpu:
141+
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge+gpu']
142+
name: Test on GPUs
143+
strategy:
144+
matrix:
145+
test-args: ['', '--ignored']
146+
fail-fast: true
147+
# NOTE: We only can run one 2xlarge+gpu job at a time at the moment.
148+
max-parallel: 1
149+
env:
150+
FIL_PROOFS_USE_GPU_COLUMN_BUILDER: true
151+
FIL_PROOFS_USE_GPU_TREE_BUILDER: true
152+
BELLMAN_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
153+
NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
154+
steps:
155+
- uses: actions/checkout@v4
156+
- name: Install required packages
157+
run: |
158+
sudo apt-get update
159+
sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
160+
161+
- name: Download the proof params
162+
uses: ./.github/actions/proof-params-download
163+
with:
164+
github-token: ${{ secrets.GITHUB_TOKEN }}
165+
166+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
167+
with:
168+
toolchain: 1.83
169+
170+
- name: Test with CUDA
171+
run: cargo test --verbose --release --workspace --features cuda -- --nocapture ${{ matrix.test-args }}
172+
- name: Test with `cuda-supraseal`
173+
run: CC=gcc-12 CXX=g++-12 NVCC_PREPEND_FLAGS='-ccbin /usr/bin/g++-12' cargo test -p filecoin-proofs --release --no-default-features --features cuda-supraseal -- --nocapture ${{ matrix.test-args }}
143174

144175
test_macos:
145176
runs-on: macos-latest

0 commit comments

Comments
 (0)