Skip to content

Update windows-cuda.yml #93

Update windows-cuda.yml

Update windows-cuda.yml #93

Workflow file for this run

name: Build (Windows, CUDA)
on: [push, workflow_dispatch]
jobs:
build-windows:
runs-on: windows-2025
defaults:
run:
shell: cmd
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
submodules: recursive
# master
ref: ecb26fb7754d7c9edf24b1844ea807180a2e3e23
fetch-depth: 0
- name: Fix
run: |
(echo #if USE_FPA_INTB_GEMM & type onnxruntime\contrib_ops\cuda\llm\fpA_intB_gemm_profiler.cc & echo #endif) > temp && move /y temp onnxruntime\contrib_ops\cuda\llm\fpA_intB_gemm_profiler.cc
type onnxruntime\contrib_ops\cuda\llm\fpA_intB_gemm_profiler.cc
- name: Setup MSVC
uses: ilammy/msvc-dev-cmd@v1
- name: Setup CMake and Ninja
run: pip install cmake ninja
- name: Cache CUDA
id: cache-cuda
uses: actions/cache@v4
with:
path: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
key: ${{ runner.os }}-cuda-13.0.1
- name: Setup CUDA
if: steps.cache-cuda.outputs.cache-hit != 'true'
run: |
curl -s -o cuda_installer.exe -L https://developer.download.nvidia.com/compute/cuda/13.0.1/network_installers/cuda_13.0.1_windows_network.exe
cuda_installer.exe -s nvcc_13.0 cudart_13.0 cublas_dev_13.0 cufft_dev_13.0 curand_dev_13.0 cusparse_dev_13.0 cupti_13.0 thrust_13.0 nvtx_13.0 crt_13.0 nvptxcompiler_13.0
cmake --version
- name: Download cuDNN inference library
run: |
curl -LJ https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.13.0.50_cuda13-archive.zip -o cudnn.zip
unzip cudnn.zip
mkdir -p cudnn
cp -r cudnn-windows-*/include cudnn/ -v
cp -r cudnn-windows-*/lib cudnn/ -v
- name: Configure
run: cmake -S cmake -B build -G Ninja -Wno-dev -LA
-D CMAKE_BUILD_TYPE=Release
-D CMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded -D ONNX_USE_MSVC_STATIC_RUNTIME=ON -D ABSL_MSVC_STATIC_RUNTIME=ON
-D onnxruntime_BUILD_UNIT_TESTS=OFF -D onnxruntime_BUILD_SHARED_LIB=ON
-D onnxruntime_ENABLE_LTO=ON
-D onnxruntime_USE_FLASH_ATTENTION=OFF -D onnxruntime_USE_LEAN_ATTENTION=OFF
-D onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION=OFF -D onnxruntime_USE_FPA_INTB_GEMM=OFF
-D onnxruntime_ENABLE_CPU_FP16_OPS=OFF -D onnxruntime_USE_AVX=ON
-D onnxruntime_USE_CUDA=ON -D onnxruntime_CUDA_HOME="%CUDA_PATH%" -D onnxruntime_NVCC_THREADS=1
-D onnxruntime_USE_CUDA_NHWC_OPS=ON
-D onnxruntime_CUDNN_HOME="%cd%\cudnn"
-D onnxruntime_ENABLE_NVTX_PROFILE=OFF
-D onnxruntime_USE_DML=ON
-D CMAKE_CUDA_ARCHITECTURES="75-real;86-real;89-real;120-real"
-D CMAKE_CUDA_COMPILER="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin\nvcc"
env:
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0
- name: Build
run: cmake --build build --verbose
- name: Install
run: cmake --install build --prefix onnxruntime-gpu
- name: Show
run: ls -R onnxruntime-gpu
- name: Upload
uses: actions/upload-artifact@v4
with:
name: onnxruntime-windows-cuda
retention-days: 1
path: onnxruntime-gpu
- name: Package
shell: pwsh
run: Compress-Archive onnxruntime-gpu -DestinationPath onnxruntime-gpu-win64.zip
- name: Get description
shell: bash
run: |
echo ORT_VERSION=`git describe` >> $GITHUB_ENV
echo TIME=`date -u +"%y%m%d-%H%M"` >> $GITHUB_ENV
- name: Release
uses: softprops/action-gh-release@v1
with:
files: onnxruntime-gpu-win64.zip
name: Build ${{ env.ORT_VERSION }}
tag_name: ${{ env.ORT_VERSION }}-${{ env.TIME }}
prerelease: true