From 7af3feeae86fdc264b172d9a9e24ecaf51a33945 Mon Sep 17 00:00:00 2001 From: Mariusz Sikora Date: Thu, 23 May 2024 13:30:53 +0200 Subject: [PATCH 1/2] [AMDGPU] Update tests for last-use in global/scratch/flat/buffer load instructions --- .../llvm.amdgcn.buffer.load-last-use.ll | 27 +++++++++++++++++++ .../AMDGPU/memory-legalizer-flat-lastuse.ll | 7 ++--- .../AMDGPU/memory-legalizer-global-lastuse.ll | 7 ++--- .../memory-legalizer-private-lastuse.ll | 7 ++--- 4 files changed, 33 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll new file mode 100644 index 0000000000000..e19db7f7eb15f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck %s --check-prefix=GCN +;RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck %s --check-prefix=GCN + +define amdgpu_ps float @raw_buffer_load(<4 x i32> inreg) { +; GCN-LABEL: raw_buffer_load: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: buffer_load_b32 v0, off, s[0:3], null th:TH_LOAD_LU +; GCN-NEXT: s_wait_loadcnt 0x0 +; GCN-NEXT: ; return to shader part epilog +main_body: + %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %0, i32 0, i32 0, i32 3) + ret float %data +} + +define amdgpu_ps float @struct_buffer_load(<4 x i32> inreg) { +; GCN-LABEL: struct_buffer_load: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: buffer_load_b32 v0, v0, s[0:3], null idxen th:TH_LOAD_LU +; GCN-NEXT: s_wait_loadcnt 0x0 +; GCN-NEXT: ; return to shader part epilog +main_body: + %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 3) + ret float %data +} + diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll index e7c6044b3fb6b..fb40274cac1ba 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) { ; GFX12-LABEL: flat_last_use_load_0: @@ -107,6 +107,3 @@ entry: !0 = !{i32 1} declare i32 @llvm.amdgcn.workitem.id.x() -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX12-CU: {{.*}} -; GFX12-WGP: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll index c889c67a5ca37..7a9cb992a0cd1 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s define amdgpu_kernel void @global_last_use_load_0(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; GFX12-LABEL: global_last_use_load_0: @@ -92,6 +92,3 @@ entry: } !0 = !{i32 1} declare i32 @llvm.amdgcn.workitem.id.x() -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX12-CU: {{.*}} -; GFX12-WGP: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll index 1f835349b12b0..61cec731feb56 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s define amdgpu_kernel void @private_last_use_load_0(ptr addrspace(5) %in, ptr addrspace(1) %out) { ; GFX12-LABEL: private_last_use_load_0: @@ -85,6 +85,3 @@ entry: !0 = !{i32 1} declare i32 @llvm.amdgcn.workitem.id.x() -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX12-CU: {{.*}} -; GFX12-WGP: {{.*}} From 91161a8acb6238ca80113e0eb00e1ea9ca81c1e1 Mon Sep 17 00:00:00 2001 From: Mariusz Sikora Date: Mon, 10 Jun 2024 17:10:30 +0200 Subject: [PATCH 2/2] Update test --- .../llvm.amdgcn.buffer.load-last-use.ll | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll index e19db7f7eb15f..de484e3db18ab 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll @@ -1,25 +1,35 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck %s --check-prefix=GCN -;RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck %s --check-prefix=GCN +;RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 | FileCheck %s --check-prefix=GCN +;RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 | FileCheck %s --check-prefix=GCN -define amdgpu_ps float @raw_buffer_load(<4 x i32> inreg) { +define float @raw_buffer_load(<4 x i32> inreg) { ; GCN-LABEL: raw_buffer_load: ; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_expcnt 0x0 +; GCN-NEXT: s_wait_samplecnt 0x0 +; GCN-NEXT: s_wait_bvhcnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: buffer_load_b32 v0, off, s[0:3], null th:TH_LOAD_LU ; GCN-NEXT: s_wait_loadcnt 0x0 -; GCN-NEXT: ; return to shader part epilog +; GCN-NEXT: s_setpc_b64 s[30:31] main_body: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %0, i32 0, i32 0, i32 3) ret float %data } -define amdgpu_ps float @struct_buffer_load(<4 x i32> inreg) { +define float @struct_buffer_load(<4 x i32> inreg) { ; GCN-LABEL: struct_buffer_load: ; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_expcnt 0x0 +; GCN-NEXT: s_wait_samplecnt 0x0 +; GCN-NEXT: s_wait_bvhcnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: buffer_load_b32 v0, v0, s[0:3], null idxen th:TH_LOAD_LU ; GCN-NEXT: s_wait_loadcnt 0x0 -; GCN-NEXT: ; return to shader part epilog +; GCN-NEXT: s_setpc_b64 s[30:31] main_body: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 3) ret float %data