Skip to content

fix yaml interpretation #88

fix yaml interpretation

fix yaml interpretation #88

Workflow file for this run

name: Build (Windows, CUDA)
on: [push, workflow_dispatch]
jobs:
build-windows:
runs-on: windows-2025
defaults:
run:
shell: cmd
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
submodules: recursive
# master
ref: 4754a1d64e5920a715b0396906f339e6c15742a0
fetch-depth: 0
- name: Fix
run: (echo '#if USE_FPA_INTB_GEMM' & type onnxruntime\contrib_ops\cuda\llm\fpA_intB_gemm_profiler.cc & echo '#endif') > temp && move /y temp onnxruntime\contrib_ops\cuda\llm\fpA_intB_gemm_profiler.cc
- name: Setup MSVC
uses: ilammy/msvc-dev-cmd@v1
- name: Setup Ninja
run: pip install ninja
- name: Cache CUDA
id: cache-cuda
uses: actions/cache@v4
with:
path: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
key: ${{ runner.os }}-cuda-12.9.1
- name: Setup CUDA
if: steps.cache-cuda.outputs.cache-hit != 'true'
run: |
curl -s -o cuda_installer.exe -L https://developer.download.nvidia.com/compute/cuda/12.9.1/network_installers/cuda_12.9.1_windows_network.exe
cuda_installer.exe -s nvcc_12.9 cudart_12.9 cublas_dev_12.9 cufft_dev_12.9 curand_dev_12.9 cusparse_dev_12.9 cupti_12.9 thrust_12.9 nvtx_12.9
- name: Download cuDNN inference library
run: |
curl -LJ https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.12.0.46_cuda12-archive.zip -o cudnn.zip
unzip cudnn.zip
mkdir -p cudnn
cp -r cudnn-windows-*/include cudnn/ -v
cp -r cudnn-windows-*/lib cudnn/ -v
- name: Configure
run: cmake -S cmake -B build -G Ninja -Wno-dev -LA
-D CMAKE_BUILD_TYPE=Release
-D CMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded -D ONNX_USE_MSVC_STATIC_RUNTIME=ON -D ABSL_MSVC_STATIC_RUNTIME=ON
-D onnxruntime_BUILD_UNIT_TESTS=OFF -D onnxruntime_BUILD_SHARED_LIB=ON
-D onnxruntime_ENABLE_LTO=ON
-D onnxruntime_USE_FLASH_ATTENTION=OFF -D onnxruntime_USE_LEAN_ATTENTION=OFF
-D onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION=OFF -D onnxruntime_USE_FPA_INTB_GEMM=OFF
-D onnxruntime_ENABLE_CPU_FP16_OPS=OFF -D onnxruntime_USE_AVX=ON
-D onnxruntime_USE_CUDA=ON -D onnxruntime_CUDA_HOME="%CUDA_PATH%" -D onnxruntime_NVCC_THREADS=1
-D onnxruntime_USE_CUDA_NHWC_OPS=ON
-D onnxruntime_CUDNN_HOME="%cd%\cudnn"
-D onnxruntime_ENABLE_NVTX_PROFILE=OFF
-D onnxruntime_USE_DML=ON
-D CMAKE_CUDA_ARCHITECTURES="75-real;86-real;89-real;120-real"
env:
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9
- name: Build
run: cmake --build build --verbose
- name: Install
run: cmake --install build --prefix onnxruntime-gpu
- name: Show
run: ls -R onnxruntime-gpu
- name: Upload
uses: actions/upload-artifact@v4
with:
name: onnxruntime-windows-cuda
retention-days: 1
path: onnxruntime-gpu
- name: Package
shell: pwsh
run: Compress-Archive onnxruntime-gpu -DestinationPath onnxruntime-gpu-win64.zip
- name: Get description
shell: bash
run: |
echo ORT_VERSION=`git describe` >> $GITHUB_ENV
echo TIME=`date -u +"%y%m%d-%H%M"` >> $GITHUB_ENV
- name: Release
uses: softprops/action-gh-release@v1
with:
files: onnxruntime-gpu-win64.zip
name: Build ${{ env.ORT_VERSION }}
tag_name: ${{ env.ORT_VERSION }}-${{ env.TIME }}
prerelease: true