disable attentions #86
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build (Windows, CUDA) | |
| on: [push, workflow_dispatch] | |
| jobs: | |
| build-windows: | |
| runs-on: windows-2025 | |
| defaults: | |
| run: | |
| shell: cmd | |
| steps: | |
| - name: Checkout repo | |
| uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| # master | |
| ref: 4754a1d64e5920a715b0396906f339e6c15742a0 | |
| fetch-depth: 0 | |
| - name: Fix | |
| run: (echo #if USE_FPA_INTB_GEMM & type onnxruntime/contrib_ops/cuda/llm/fpA_intB_gemm_profiler.cc & echo #endif) > temp && move /y temp onnxruntime/contrib_ops/cuda/llm/fpA_intB_gemm_profiler.cc | |
| - name: Setup MSVC | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| - name: Setup Ninja | |
| run: pip install ninja | |
| - name: Cache CUDA | |
| id: cache-cuda | |
| uses: actions/cache@v4 | |
| with: | |
| path: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA | |
| key: ${{ runner.os }}-cuda-12.9.1 | |
| - name: Setup CUDA | |
| if: steps.cache-cuda.outputs.cache-hit != 'true' | |
| run: | | |
| curl -s -o cuda_installer.exe -L https://developer.download.nvidia.com/compute/cuda/12.9.1/network_installers/cuda_12.9.1_windows_network.exe | |
| cuda_installer.exe -s nvcc_12.9 cudart_12.9 cublas_dev_12.9 cufft_dev_12.9 curand_dev_12.9 cusparse_dev_12.9 cupti_12.9 thrust_12.9 nvtx_12.9 | |
| - name: Download cuDNN inference library | |
| run: | | |
| curl -LJ https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.12.0.46_cuda12-archive.zip -o cudnn.zip | |
| unzip cudnn.zip | |
| mkdir -p cudnn | |
| cp -r cudnn-windows-*/include cudnn/ -v | |
| cp -r cudnn-windows-*/lib cudnn/ -v | |
| - name: Configure | |
| run: cmake -S cmake -B build -G Ninja -Wno-dev -LA | |
| -D CMAKE_BUILD_TYPE=Release | |
| -D CMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded -D ONNX_USE_MSVC_STATIC_RUNTIME=ON -D ABSL_MSVC_STATIC_RUNTIME=ON | |
| -D onnxruntime_BUILD_UNIT_TESTS=OFF -D onnxruntime_BUILD_SHARED_LIB=ON | |
| -D onnxruntime_ENABLE_LTO=ON | |
| -D onnxruntime_USE_FLASH_ATTENTION=OFF -D onnxruntime_USE_LEAN_ATTENTION=OFF | |
| -D onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION=OFF -D onnxruntime_USE_FPA_INTB_GEMM=OFF | |
| -D onnxruntime_ENABLE_CPU_FP16_OPS=OFF -D onnxruntime_USE_AVX=ON | |
| -D onnxruntime_USE_CUDA=ON -D onnxruntime_CUDA_HOME="%CUDA_PATH%" -D onnxruntime_NVCC_THREADS=1 | |
| -D onnxruntime_USE_CUDA_NHWC_OPS=ON | |
| -D onnxruntime_CUDNN_HOME="%cd%\cudnn" | |
| -D onnxruntime_ENABLE_NVTX_PROFILE=OFF | |
| -D onnxruntime_USE_DML=ON | |
| -D CMAKE_CUDA_ARCHITECTURES="75-real;86-real;89-real;120-real" | |
| env: | |
| CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9 | |
| - name: Build | |
| run: cmake --build build --verbose | |
| - name: Install | |
| run: cmake --install build --prefix onnxruntime-gpu | |
| - name: Show | |
| run: ls -R onnxruntime-gpu | |
| - name: Upload | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: onnxruntime-windows-cuda | |
| retention-days: 1 | |
| path: onnxruntime-gpu | |
| - name: Package | |
| shell: pwsh | |
| run: Compress-Archive onnxruntime-gpu -DestinationPath onnxruntime-gpu-win64.zip | |
| - name: Get description | |
| shell: bash | |
| run: | | |
| echo ORT_VERSION=`git describe` >> $GITHUB_ENV | |
| echo TIME=`date -u +"%y%m%d-%H%M"` >> $GITHUB_ENV | |
| - name: Release | |
| uses: softprops/action-gh-release@v1 | |
| with: | |
| files: onnxruntime-gpu-win64.zip | |
| name: Build ${{ env.ORT_VERSION }} | |
| tag_name: ${{ env.ORT_VERSION }}-${{ env.TIME }} | |
| prerelease: true |