Update windows-cuda.yml #93

Workflow file for this run

.github/workflows/windows-cuda.yml at 2a8e0e1

	name: Build (Windows, CUDA)

	on: [push, workflow_dispatch]

	jobs:
	build-windows:
	runs-on: windows-2025

	defaults:
	run:
	shell: cmd

	steps:
	- name: Checkout repo
	uses: actions/checkout@v4
	with:
	submodules: recursive
	# master
	ref: ecb26fb7754d7c9edf24b1844ea807180a2e3e23
	fetch-depth: 0

	- name: Fix
	run: \|
	(echo #if USE_FPA_INTB_GEMM & type onnxruntime\contrib_ops\cuda\llm\fpA_intB_gemm_profiler.cc & echo #endif) > temp && move /y temp onnxruntime\contrib_ops\cuda\llm\fpA_intB_gemm_profiler.cc
	type onnxruntime\contrib_ops\cuda\llm\fpA_intB_gemm_profiler.cc

	- name: Setup MSVC
	uses: ilammy/msvc-dev-cmd@v1

	- name: Setup CMake and Ninja
	run: pip install cmake ninja

	- name: Cache CUDA
	id: cache-cuda
	uses: actions/cache@v4
	with:
	path: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
	key: ${{ runner.os }}-cuda-13.0.1

	- name: Setup CUDA
	if: steps.cache-cuda.outputs.cache-hit != 'true'
	run: \|
	curl -s -o cuda_installer.exe -L https://developer.download.nvidia.com/compute/cuda/13.0.1/network_installers/cuda_13.0.1_windows_network.exe
	cuda_installer.exe -s nvcc_13.0 cudart_13.0 cublas_dev_13.0 cufft_dev_13.0 curand_dev_13.0 cusparse_dev_13.0 cupti_13.0 thrust_13.0 nvtx_13.0 crt_13.0 nvptxcompiler_13.0
	cmake --version

	- name: Download cuDNN inference library
	run: \|
	curl -LJ https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.13.0.50_cuda13-archive.zip -o cudnn.zip
	unzip cudnn.zip
	mkdir -p cudnn
	cp -r cudnn-windows-*/include cudnn/ -v
	cp -r cudnn-windows-*/lib cudnn/ -v

	- name: Configure
	run: cmake -S cmake -B build -G Ninja -Wno-dev -LA
	-D CMAKE_BUILD_TYPE=Release
	-D CMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded -D ONNX_USE_MSVC_STATIC_RUNTIME=ON -D ABSL_MSVC_STATIC_RUNTIME=ON
	-D onnxruntime_BUILD_UNIT_TESTS=OFF -D onnxruntime_BUILD_SHARED_LIB=ON
	-D onnxruntime_ENABLE_LTO=ON
	-D onnxruntime_USE_FLASH_ATTENTION=OFF -D onnxruntime_USE_LEAN_ATTENTION=OFF
	-D onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION=OFF -D onnxruntime_USE_FPA_INTB_GEMM=OFF
	-D onnxruntime_ENABLE_CPU_FP16_OPS=OFF -D onnxruntime_USE_AVX=ON
	-D onnxruntime_USE_CUDA=ON -D onnxruntime_CUDA_HOME="%CUDA_PATH%" -D onnxruntime_NVCC_THREADS=1
	-D onnxruntime_USE_CUDA_NHWC_OPS=ON
	-D onnxruntime_CUDNN_HOME="%cd%\cudnn"
	-D onnxruntime_ENABLE_NVTX_PROFILE=OFF
	-D onnxruntime_USE_DML=ON
	-D CMAKE_CUDA_ARCHITECTURES="75-real;86-real;89-real;120-real"
	-D CMAKE_CUDA_COMPILER="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin\nvcc"
	env:
	CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0

	- name: Build
	run: cmake --build build --verbose

	- name: Install
	run: cmake --install build --prefix onnxruntime-gpu

	- name: Show
	run: ls -R onnxruntime-gpu

	- name: Upload
	uses: actions/upload-artifact@v4
	with:
	name: onnxruntime-windows-cuda
	retention-days: 1
	path: onnxruntime-gpu

	- name: Package
	shell: pwsh
	run: Compress-Archive onnxruntime-gpu -DestinationPath onnxruntime-gpu-win64.zip

	- name: Get description
	shell: bash
	run: \|
	echo ORT_VERSION=`git describe` >> $GITHUB_ENV
	echo TIME=`date -u +"%y%m%d-%H%M"` >> $GITHUB_ENV

	- name: Release
	uses: softprops/action-gh-release@v1
	with:
	files: onnxruntime-gpu-win64.zip
	name: Build ${{ env.ORT_VERSION }}
	tag_name: ${{ env.ORT_VERSION }}-${{ env.TIME }}
	prerelease: true

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Update windows-cuda.yml #93

Workflow file

Update windows-cuda.yml #93

Uh oh!

Workflow file for this run