Update windows-cuda.yml #93
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build (Windows, CUDA) | |
| on: [push, workflow_dispatch] | |
| jobs: | |
| build-windows: | |
| runs-on: windows-2025 | |
| defaults: | |
| run: | |
| shell: cmd | |
| steps: | |
| - name: Checkout repo | |
| uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| # master | |
| ref: ecb26fb7754d7c9edf24b1844ea807180a2e3e23 | |
| fetch-depth: 0 | |
| - name: Fix | |
| run: | | |
| (echo #if USE_FPA_INTB_GEMM & type onnxruntime\contrib_ops\cuda\llm\fpA_intB_gemm_profiler.cc & echo #endif) > temp && move /y temp onnxruntime\contrib_ops\cuda\llm\fpA_intB_gemm_profiler.cc | |
| type onnxruntime\contrib_ops\cuda\llm\fpA_intB_gemm_profiler.cc | |
| - name: Setup MSVC | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| - name: Setup CMake and Ninja | |
| run: pip install cmake ninja | |
| - name: Cache CUDA | |
| id: cache-cuda | |
| uses: actions/cache@v4 | |
| with: | |
| path: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA | |
| key: ${{ runner.os }}-cuda-13.0.1 | |
| - name: Setup CUDA | |
| if: steps.cache-cuda.outputs.cache-hit != 'true' | |
| run: | | |
| curl -s -o cuda_installer.exe -L https://developer.download.nvidia.com/compute/cuda/13.0.1/network_installers/cuda_13.0.1_windows_network.exe | |
| cuda_installer.exe -s nvcc_13.0 cudart_13.0 cublas_dev_13.0 cufft_dev_13.0 curand_dev_13.0 cusparse_dev_13.0 cupti_13.0 thrust_13.0 nvtx_13.0 crt_13.0 nvptxcompiler_13.0 | |
| cmake --version | |
| - name: Download cuDNN inference library | |
| run: | | |
| curl -LJ https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.13.0.50_cuda13-archive.zip -o cudnn.zip | |
| unzip cudnn.zip | |
| mkdir -p cudnn | |
| cp -r cudnn-windows-*/include cudnn/ -v | |
| cp -r cudnn-windows-*/lib cudnn/ -v | |
| - name: Configure | |
| run: cmake -S cmake -B build -G Ninja -Wno-dev -LA | |
| -D CMAKE_BUILD_TYPE=Release | |
| -D CMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded -D ONNX_USE_MSVC_STATIC_RUNTIME=ON -D ABSL_MSVC_STATIC_RUNTIME=ON | |
| -D onnxruntime_BUILD_UNIT_TESTS=OFF -D onnxruntime_BUILD_SHARED_LIB=ON | |
| -D onnxruntime_ENABLE_LTO=ON | |
| -D onnxruntime_USE_FLASH_ATTENTION=OFF -D onnxruntime_USE_LEAN_ATTENTION=OFF | |
| -D onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION=OFF -D onnxruntime_USE_FPA_INTB_GEMM=OFF | |
| -D onnxruntime_ENABLE_CPU_FP16_OPS=OFF -D onnxruntime_USE_AVX=ON | |
| -D onnxruntime_USE_CUDA=ON -D onnxruntime_CUDA_HOME="%CUDA_PATH%" -D onnxruntime_NVCC_THREADS=1 | |
| -D onnxruntime_USE_CUDA_NHWC_OPS=ON | |
| -D onnxruntime_CUDNN_HOME="%cd%\cudnn" | |
| -D onnxruntime_ENABLE_NVTX_PROFILE=OFF | |
| -D onnxruntime_USE_DML=ON | |
| -D CMAKE_CUDA_ARCHITECTURES="75-real;86-real;89-real;120-real" | |
| -D CMAKE_CUDA_COMPILER="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin\nvcc" | |
| env: | |
| CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0 | |
| - name: Build | |
| run: cmake --build build --verbose | |
| - name: Install | |
| run: cmake --install build --prefix onnxruntime-gpu | |
| - name: Show | |
| run: ls -R onnxruntime-gpu | |
| - name: Upload | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: onnxruntime-windows-cuda | |
| retention-days: 1 | |
| path: onnxruntime-gpu | |
| - name: Package | |
| shell: pwsh | |
| run: Compress-Archive onnxruntime-gpu -DestinationPath onnxruntime-gpu-win64.zip | |
| - name: Get description | |
| shell: bash | |
| run: | | |
| echo ORT_VERSION=`git describe` >> $GITHUB_ENV | |
| echo TIME=`date -u +"%y%m%d-%H%M"` >> $GITHUB_ENV | |
| - name: Release | |
| uses: softprops/action-gh-release@v1 | |
| with: | |
| files: onnxruntime-gpu-win64.zip | |
| name: Build ${{ env.ORT_VERSION }} | |
| tag_name: ${{ env.ORT_VERSION }}-${{ env.TIME }} | |
| prerelease: true |