Skip to content

Commit e76781b

Browse files
committed
ci: test gpu on self-hosted runners
1 parent 0e2b1cc commit e76781b

File tree

1 file changed

+80
-39
lines changed

1 file changed

+80
-39
lines changed

.github/workflows/ci.yml

Lines changed: 80 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
name: CI
22

3-
on: [pull_request, push]
4-
#on: [push]
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- master
8+
- self-hosted
59

610
# Cancel a job if there's a new on on the same branch started.
711
# Based on https://stackoverflow.com/questions/58895283/stop-already-running-workflow-job-in-github-actions/67223051#67223051
@@ -23,7 +27,9 @@ jobs:
2327
steps:
2428
- uses: actions/checkout@v4
2529
- name: Install required packages
26-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
30+
run: |
31+
sudo apt-get update
32+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
2733
- name: Install cargo clippy
2834
run: rustup component add clippy
2935
- name: Run cargo clippy
@@ -40,42 +46,49 @@ jobs:
4046
run: cargo fmt --all -- --check
4147

4248
test_release:
43-
runs-on: ubuntu-24.04
49+
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge']
4450
name: Test in release mode
4551
strategy:
4652
matrix:
4753
cargo-args: ['', '--features fixed-rows-to-discard']
54+
fail-fast: false
4855
env:
4956
# Run all tests with multicore-SDR enabled.
5057
FIL_PROOFS_USE_MULTICORE_SDR: true
5158
steps:
5259
- uses: actions/checkout@v4
5360
- name: Install required packages
54-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
61+
run: |
62+
sudo apt-get update
63+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
5564
5665
- name: Download the proof params
5766
uses: ./.github/actions/proof-params-download
5867
with:
5968
github-token: ${{ secrets.GITHUB_TOKEN }}
6069

70+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
71+
with:
72+
toolchain: 1.83
73+
6174
- name: Run usual tests in release profile
6275
run: cargo test --verbose --release --workspace --all-targets ${{ matrix.cargo-args }} -- --nocapture
6376
- name: Run isolated PoRep tests in release profile
6477
# Getting the cores does not work on GitHub Actions, hence skip that
6578
# specific test.
6679
run: cargo test --release -p storage-proofs-porep --features isolated-testing ${{ matrix.cargo-args }} -- --nocapture --skip stacked::vanilla::cores::tests::test_checkout_cores
6780
- name: Run isolated update tests in release profile
68-
# Some `storage-proofs-update` tests need to run sequentially due to
69-
# their high memory usage.
70-
run: cargo test --release -p storage-proofs-update --features isolated-testing ${{ matrix.cargo-args }} -- --nocapture --test-threads=1
81+
run: cargo test --release -p storage-proofs-update --features isolated-testing ${{ matrix.cargo-args }} -- --nocapture
7182

7283
test_ignored_release:
7384
runs-on: ubuntu-24.04
7485
name: Test ignored in release mode
7586
steps:
7687
- uses: actions/checkout@v4
7788
- name: Install required packages
78-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
89+
run: |
90+
sudo apt-get update
91+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
7992
8093
- name: Download the proof params
8194
uses: ./.github/actions/proof-params-download
@@ -86,60 +99,88 @@ jobs:
8699
run: cargo test --release --workspace -- ignored --nocapture
87100

88101
test_no_default_features:
89-
runs-on: ubuntu-24.04
102+
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge']
90103
name: Test without default features
91104
steps:
92105
- uses: actions/checkout@v4
93106
- name: Install required packages
94-
run: sudo apt install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
107+
run: |
108+
sudo apt-get update
109+
sudo apt-get install --no-install-recommends --yes libhwloc-dev ocl-icd-opencl-dev
95110
96111
- name: Download the proof params
97112
uses: ./.github/actions/proof-params-download
98113
with:
99114
github-token: ${{ secrets.GITHUB_TOKEN }}
100115

116+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
117+
with:
118+
toolchain: 1.83
119+
101120
- name: Test ignored in release profile
102121
run: cargo test --release --workspace --no-default-features
103122

104123
build_gpu:
105-
runs-on: ubuntu-24.04
124+
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge']
106125
name: Build with various GPU support enabled
107126
steps:
108127
- uses: actions/checkout@v4
109128
- name: Install required packages
110-
run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
129+
run: |
130+
sudo apt-get update
131+
sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
132+
133+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
134+
with:
135+
toolchain: 1.83
111136

112137
- name: Build with `cuda` and `opencl` features enabled
113138
run: cargo build --workspace --features cuda,opencl
114139
- name: Build with `cuda-supraseal` feature enabled
115140
run: CC=gcc-12 CXX=g++-12 NVCC_PREPEND_FLAGS='-ccbin /usr/bin/g++-12' cargo build --workspace --no-default-features --features cuda-supraseal
116141

117-
# Commented out until we run it on hardware with actual GPUs.
118-
#test_gpu:
119-
# runs-on: ubuntu-24.04
120-
# name: Test on GPUs
121-
# strategy:
122-
# matrix:
123-
# test-args: ['', '--ignored']
124-
# env:
125-
# FIL_PROOFS_USE_GPU_COLUMN_BUILDER: true
126-
# FIL_PROOFS_USE_GPU_TREE_BUILDER: true
127-
# BELLMAN_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
128-
# NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
129-
# steps:
130-
# - uses: actions/checkout@v4
131-
# - name: Install required packages
132-
# run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
133-
#
134-
# - name: Download the proof params
135-
# uses: ./.github/actions/proof-params-download
136-
# with:
137-
# github-token: ${{ secrets.GITHUB_TOKEN }}
138-
#
139-
# - name: Test with CUDA
140-
# run: cargo test --verbose --release --workspace --features cuda -- --nocapture ${{ matrix.test-args }}
141-
# - name: Test with `cuda-supraseal`
142-
# run: CC=gcc-12 CXX=g++-12 NVCC_PREPEND_FLAGS='-ccbin /usr/bin/g++-12' cargo test -p filecoin-proofs --release --no-default-features --features cuda-supraseal -- --nocapture --test-threads=1 ${{ matrix.test-args }}
142+
test_gpu:
143+
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge+gpu']
144+
name: Test on GPUs
145+
strategy:
146+
matrix:
147+
test-args: ['', '--ignored']
148+
fail-fast: true
149+
# NOTE: We only can run one 2xlarge+gpu job at a time at the moment.
150+
max-parallel: 1
151+
env:
152+
FIL_PROOFS_USE_GPU_COLUMN_BUILDER: true
153+
FIL_PROOFS_USE_GPU_TREE_BUILDER: true
154+
BELLMAN_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
155+
NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
156+
steps:
157+
- uses: actions/checkout@v4
158+
- name: Install CUDA drivers
159+
run: |
160+
curl -L -o nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb https://us.download.nvidia.com/tesla/570.148.08/nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
161+
sudo dpkg -i nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
162+
sudo cp /var/nvidia-driver-local-repo-ubuntu2404-570.148.08/nvidia-driver-local-*-keyring.gpg /usr/share/keyrings/
163+
sudo apt-get update
164+
sudo apt-get install --no-install-recommends --yes cuda-drivers
165+
rm nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
166+
- name: Install required packages
167+
run: |
168+
sudo apt-get update
169+
sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
170+
171+
- name: Download the proof params
172+
uses: ./.github/actions/proof-params-download
173+
with:
174+
github-token: ${{ secrets.GITHUB_TOKEN }}
175+
176+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
177+
with:
178+
toolchain: 1.83
179+
180+
- name: Test with CUDA
181+
run: cargo test --verbose --release --workspace --features cuda -- --nocapture ${{ matrix.test-args }}
182+
- name: Test with `cuda-supraseal`
183+
run: CC=gcc-12 CXX=g++-12 NVCC_PREPEND_FLAGS='-ccbin /usr/bin/g++-12' cargo test -p filecoin-proofs --release --no-default-features --features cuda-supraseal -- --nocapture ${{ matrix.test-args }}
143184

144185
test_macos:
145186
runs-on: macos-latest

0 commit comments

Comments
 (0)