diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index ed493d50712a2..773c97f7b4dc0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -995,7 +995,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // actions computeRegisterProperties(STI.getRegisterInfo()); - setMinCmpXchgSizeInBits(STI.hasAtomCas16() ? 16 : 32); + setMinCmpXchgSizeInBits(STI.getMinCmpXchgSizeInBits()); setMaxAtomicSizeInBitsSupported(64); setMaxDivRemBitWidthSupported(64); } diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 3b5c28e357e0c..919f487c70141 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -128,6 +128,8 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { // set of equivalent memory operations with a scalar data-type, executed in // an unspecified order on the elements in the vector. unsigned getMaxRequiredAlignment() const { return 8; } + // Emulated loops with 32-bit/64-bit CAS generate better SASS than 16-bit CAS + unsigned getMinCmpXchgSizeInBits() const { return 32; } unsigned getPTXVersion() const { return PTXVersion; } diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll index b76b3e59e9e6d..b180928af82a4 100644 --- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll +++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll @@ -134,10 +134,10 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half % ; CHECKPTX62-NEXT: @%p4 bra $L__BB0_7; ; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end ; CHECKPTX62-NEXT: ret; - %r1 = atomicrmw fadd ptr %dp0, half %val seq_cst - %r2 = atomicrmw fadd ptr %dp0, half 1.0 seq_cst - %r3 = atomicrmw fadd ptr addrspace(1) %dp1, half %val seq_cst - %r4 = atomicrmw fadd ptr addrspace(3) %dp3, half %val seq_cst + %r1 = atomicrmw fadd ptr %dp0, half %val monotonic + %r2 = atomicrmw fadd ptr %dp0, half 1.0 monotonic + %r3 = atomicrmw fadd ptr addrspace(1) %dp1, half %val monotonic + %r4 = atomicrmw fadd ptr addrspace(3) %dp3, half %val monotonic ret void } diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll index 67552b95e0491..67abfe8295a62 100644 --- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll +++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll @@ -46,59 +46,101 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat ; CHECKPTX71-LABEL: test( ; CHECKPTX71: { ; CHECKPTX71-NEXT: .reg .pred %p<5>; -; CHECKPTX71-NEXT: .reg .b16 %rs<26>; -; CHECKPTX71-NEXT: .reg .b32 %r<4>; +; CHECKPTX71-NEXT: .reg .b16 %rs<14>; +; CHECKPTX71-NEXT: .reg .b32 %r<58>; ; CHECKPTX71-EMPTY: ; CHECKPTX71-NEXT: // %bb.0: -; CHECKPTX71-NEXT: ld.param.b16 %rs13, [test_param_3]; -; CHECKPTX71-NEXT: ld.param.u32 %r3, [test_param_2]; -; CHECKPTX71-NEXT: ld.param.u32 %r2, [test_param_1]; -; CHECKPTX71-NEXT: ld.param.u32 %r1, [test_param_0]; -; CHECKPTX71-NEXT: ld.b16 %rs22, [%r1]; -; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start14 +; CHECKPTX71-NEXT: ld.param.b16 %rs1, [test_param_3]; +; CHECKPTX71-NEXT: ld.param.u32 %r23, [test_param_2]; +; CHECKPTX71-NEXT: ld.param.u32 %r22, [test_param_1]; +; CHECKPTX71-NEXT: ld.param.u32 %r24, [test_param_0]; +; CHECKPTX71-NEXT: and.b32 %r1, %r24, -4; +; CHECKPTX71-NEXT: and.b32 %r25, %r24, 3; +; CHECKPTX71-NEXT: shl.b32 %r2, %r25, 3; +; CHECKPTX71-NEXT: mov.b32 %r26, 65535; +; CHECKPTX71-NEXT: shl.b32 %r27, %r26, %r2; +; CHECKPTX71-NEXT: not.b32 %r3, %r27; +; CHECKPTX71-NEXT: ld.u32 %r54, [%r1]; +; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start45 ; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECKPTX71-NEXT: mov.b16 %rs14, 0x3F80; -; CHECKPTX71-NEXT: fma.rn.bf16 %rs15, %rs22, %rs14, %rs13; -; CHECKPTX71-NEXT: atom.cas.b16 %rs3, [%r1], %rs22, %rs15; -; CHECKPTX71-NEXT: setp.ne.s16 %p1, %rs3, %rs22; -; CHECKPTX71-NEXT: mov.u16 %rs22, %rs3; +; CHECKPTX71-NEXT: shr.u32 %r28, %r54, %r2; +; CHECKPTX71-NEXT: cvt.u16.u32 %rs2, %r28; +; CHECKPTX71-NEXT: mov.b16 %rs3, 0x3F80; +; CHECKPTX71-NEXT: fma.rn.bf16 %rs4, %rs2, %rs3, %rs1; +; CHECKPTX71-NEXT: cvt.u32.u16 %r29, %rs4; +; CHECKPTX71-NEXT: shl.b32 %r30, %r29, %r2; +; CHECKPTX71-NEXT: and.b32 %r31, %r54, %r3; +; CHECKPTX71-NEXT: or.b32 %r32, %r31, %r30; +; CHECKPTX71-NEXT: atom.cas.b32 %r6, [%r1], %r54, %r32; +; CHECKPTX71-NEXT: setp.ne.s32 %p1, %r6, %r54; +; CHECKPTX71-NEXT: mov.u32 %r54, %r6; ; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1; -; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end13 -; CHECKPTX71-NEXT: ld.b16 %rs23, [%r1]; -; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start8 +; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end44 +; CHECKPTX71-NEXT: ld.u32 %r55, [%r1]; +; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start27 ; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECKPTX71-NEXT: mov.b16 %rs16, 0x3F80; -; CHECKPTX71-NEXT: fma.rn.bf16 %rs17, %rs23, %rs16, %rs16; -; CHECKPTX71-NEXT: atom.cas.b16 %rs6, [%r1], %rs23, %rs17; -; CHECKPTX71-NEXT: setp.ne.s16 %p2, %rs6, %rs23; -; CHECKPTX71-NEXT: mov.u16 %rs23, %rs6; +; CHECKPTX71-NEXT: shr.u32 %r33, %r55, %r2; +; CHECKPTX71-NEXT: cvt.u16.u32 %rs5, %r33; +; CHECKPTX71-NEXT: mov.b16 %rs6, 0x3F80; +; CHECKPTX71-NEXT: fma.rn.bf16 %rs7, %rs5, %rs6, %rs6; +; CHECKPTX71-NEXT: cvt.u32.u16 %r34, %rs7; +; CHECKPTX71-NEXT: shl.b32 %r35, %r34, %r2; +; CHECKPTX71-NEXT: and.b32 %r36, %r55, %r3; +; CHECKPTX71-NEXT: or.b32 %r37, %r36, %r35; +; CHECKPTX71-NEXT: atom.cas.b32 %r9, [%r1], %r55, %r37; +; CHECKPTX71-NEXT: setp.ne.s32 %p2, %r9, %r55; +; CHECKPTX71-NEXT: mov.u32 %r55, %r9; ; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3; -; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end7 -; CHECKPTX71-NEXT: ld.global.b16 %rs24, [%r2]; -; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start2 +; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end26 +; CHECKPTX71-NEXT: and.b32 %r10, %r22, -4; +; CHECKPTX71-NEXT: shl.b32 %r38, %r22, 3; +; CHECKPTX71-NEXT: and.b32 %r11, %r38, 24; +; CHECKPTX71-NEXT: mov.b32 %r39, 65535; +; CHECKPTX71-NEXT: shl.b32 %r40, %r39, %r11; +; CHECKPTX71-NEXT: not.b32 %r12, %r40; +; CHECKPTX71-NEXT: ld.global.u32 %r56, [%r10]; +; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start9 ; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECKPTX71-NEXT: mov.b16 %rs18, 0x3F80; -; CHECKPTX71-NEXT: fma.rn.bf16 %rs19, %rs24, %rs18, %rs13; -; CHECKPTX71-NEXT: atom.global.cas.b16 %rs9, [%r2], %rs24, %rs19; -; CHECKPTX71-NEXT: setp.ne.s16 %p3, %rs9, %rs24; -; CHECKPTX71-NEXT: mov.u16 %rs24, %rs9; +; CHECKPTX71-NEXT: shr.u32 %r41, %r56, %r11; +; CHECKPTX71-NEXT: cvt.u16.u32 %rs8, %r41; +; CHECKPTX71-NEXT: mov.b16 %rs9, 0x3F80; +; CHECKPTX71-NEXT: fma.rn.bf16 %rs10, %rs8, %rs9, %rs1; +; CHECKPTX71-NEXT: cvt.u32.u16 %r42, %rs10; +; CHECKPTX71-NEXT: shl.b32 %r43, %r42, %r11; +; CHECKPTX71-NEXT: and.b32 %r44, %r56, %r12; +; CHECKPTX71-NEXT: or.b32 %r45, %r44, %r43; +; CHECKPTX71-NEXT: atom.global.cas.b32 %r15, [%r10], %r56, %r45; +; CHECKPTX71-NEXT: setp.ne.s32 %p3, %r15, %r56; +; CHECKPTX71-NEXT: mov.u32 %r56, %r15; ; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5; -; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end1 -; CHECKPTX71-NEXT: ld.shared.b16 %rs25, [%r3]; +; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end8 +; CHECKPTX71-NEXT: and.b32 %r16, %r23, -4; +; CHECKPTX71-NEXT: shl.b32 %r46, %r23, 3; +; CHECKPTX71-NEXT: and.b32 %r17, %r46, 24; +; CHECKPTX71-NEXT: mov.b32 %r47, 65535; +; CHECKPTX71-NEXT: shl.b32 %r48, %r47, %r17; +; CHECKPTX71-NEXT: not.b32 %r18, %r48; +; CHECKPTX71-NEXT: ld.shared.u32 %r57, [%r16]; ; CHECKPTX71-NEXT: $L__BB0_7: // %atomicrmw.start ; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECKPTX71-NEXT: mov.b16 %rs20, 0x3F80; -; CHECKPTX71-NEXT: fma.rn.bf16 %rs21, %rs25, %rs20, %rs13; -; CHECKPTX71-NEXT: atom.shared.cas.b16 %rs12, [%r3], %rs25, %rs21; -; CHECKPTX71-NEXT: setp.ne.s16 %p4, %rs12, %rs25; -; CHECKPTX71-NEXT: mov.u16 %rs25, %rs12; +; CHECKPTX71-NEXT: shr.u32 %r49, %r57, %r17; +; CHECKPTX71-NEXT: cvt.u16.u32 %rs11, %r49; +; CHECKPTX71-NEXT: mov.b16 %rs12, 0x3F80; +; CHECKPTX71-NEXT: fma.rn.bf16 %rs13, %rs11, %rs12, %rs1; +; CHECKPTX71-NEXT: cvt.u32.u16 %r50, %rs13; +; CHECKPTX71-NEXT: shl.b32 %r51, %r50, %r17; +; CHECKPTX71-NEXT: and.b32 %r52, %r57, %r18; +; CHECKPTX71-NEXT: or.b32 %r53, %r52, %r51; +; CHECKPTX71-NEXT: atom.shared.cas.b32 %r21, [%r16], %r57, %r53; +; CHECKPTX71-NEXT: setp.ne.s32 %p4, %r21, %r57; +; CHECKPTX71-NEXT: mov.u32 %r57, %r21; ; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7; ; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end ; CHECKPTX71-NEXT: ret; - %r1 = atomicrmw fadd ptr %dp0, bfloat %val seq_cst - %r2 = atomicrmw fadd ptr %dp0, bfloat 1.0 seq_cst - %r3 = atomicrmw fadd ptr addrspace(1) %dp1, bfloat %val seq_cst - %r4 = atomicrmw fadd ptr addrspace(3) %dp3, bfloat %val seq_cst + %r1 = atomicrmw fadd ptr %dp0, bfloat %val monotonic + %r2 = atomicrmw fadd ptr %dp0, bfloat 1.0 monotonic + %r3 = atomicrmw fadd ptr addrspace(1) %dp1, bfloat %val monotonic + %r4 = atomicrmw fadd ptr addrspace(3) %dp3, bfloat %val monotonic ret void } diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll index 608dbb3a0ba73..33a1f15c6a5cd 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll @@ -53,43 +53,44 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-LABEL: relaxed_sys_i8( ; SM70: { ; SM70-NEXT: .reg .pred %p<3>; -; SM70-NEXT: .reg .b16 %rs<17>; -; SM70-NEXT: .reg .b32 %r<3>; -; SM70-NEXT: .reg .b64 %rd<5>; +; SM70-NEXT: .reg .b16 %rs<2>; +; SM70-NEXT: .reg .b32 %r<21>; +; SM70-NEXT: .reg .b64 %rd<3>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: -; SM70-NEXT: ld.param.u8 %rs9, [relaxed_sys_i8_param_2]; +; SM70-NEXT: ld.param.u8 %rs1, [relaxed_sys_i8_param_2]; ; SM70-NEXT: ld.param.u64 %rd2, [relaxed_sys_i8_param_0]; -; SM70-NEXT: and.b64 %rd1, %rd2, -2; -; SM70-NEXT: ld.param.u8 %rs10, [relaxed_sys_i8_param_1]; -; SM70-NEXT: and.b64 %rd3, %rd2, 1; -; SM70-NEXT: shl.b64 %rd4, %rd3, 3; -; SM70-NEXT: cvt.u32.u64 %r1, %rd4; -; SM70-NEXT: mov.b16 %rs11, 255; -; SM70-NEXT: shl.b16 %rs12, %rs11, %r1; -; SM70-NEXT: not.b16 %rs2, %rs12; -; SM70-NEXT: shl.b16 %rs3, %rs9, %r1; -; SM70-NEXT: shl.b16 %rs4, %rs10, %r1; -; SM70-NEXT: ld.u16 %rs13, [%rd1]; -; SM70-NEXT: and.b16 %rs16, %rs13, %rs2; +; SM70-NEXT: and.b64 %rd1, %rd2, -4; +; SM70-NEXT: cvt.u32.u64 %r9, %rd2; +; SM70-NEXT: and.b32 %r10, %r9, 3; +; SM70-NEXT: shl.b32 %r1, %r10, 3; +; SM70-NEXT: mov.b32 %r11, 255; +; SM70-NEXT: shl.b32 %r12, %r11, %r1; +; SM70-NEXT: not.b32 %r2, %r12; +; SM70-NEXT: cvt.u32.u16 %r13, %rs1; +; SM70-NEXT: and.b32 %r14, %r13, 255; +; SM70-NEXT: shl.b32 %r3, %r14, %r1; +; SM70-NEXT: ld.param.u8 %r15, [relaxed_sys_i8_param_1]; +; SM70-NEXT: shl.b32 %r4, %r15, %r1; +; SM70-NEXT: ld.u32 %r16, [%rd1]; +; SM70-NEXT: and.b32 %r20, %r16, %r2; ; SM70-NEXT: $L__BB0_1: // %partword.cmpxchg.loop ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 -; SM70-NEXT: or.b16 %rs14, %rs16, %rs3; -; SM70-NEXT: or.b16 %rs15, %rs16, %rs4; -; SM70-NEXT: atom.cas.b16 %rs7, [%rd1], %rs15, %rs14; -; SM70-NEXT: setp.eq.s16 %p1, %rs7, %rs15; +; SM70-NEXT: or.b32 %r17, %r20, %r3; +; SM70-NEXT: or.b32 %r18, %r20, %r4; +; SM70-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; +; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; ; SM70-NEXT: @%p1 bra $L__BB0_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1 -; SM70-NEXT: and.b16 %rs8, %rs7, %rs2; -; SM70-NEXT: setp.ne.s16 %p2, %rs16, %rs8; -; SM70-NEXT: mov.u16 %rs16, %rs8; +; SM70-NEXT: and.b32 %r8, %r7, %r2; +; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; +; SM70-NEXT: mov.u32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB0_1; ; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end -; SM70-NEXT: cvt.u32.u16 %r2, %rs9; -; SM70-NEXT: st.param.b32 [func_retval0], %r2; +; SM70-NEXT: st.param.b32 [func_retval0], %r13; ; SM70-NEXT: ret; - %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new seq_cst seq_cst + %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new monotonic monotonic ret i8 %new } @@ -137,19 +138,44 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; ; SM70-LABEL: relaxed_sys_i16( ; SM70: { -; SM70-NEXT: .reg .b16 %rs<4>; -; SM70-NEXT: .reg .b32 %r<2>; -; SM70-NEXT: .reg .b64 %rd<2>; +; SM70-NEXT: .reg .pred %p<3>; +; SM70-NEXT: .reg .b16 %rs<2>; +; SM70-NEXT: .reg .b32 %r<20>; +; SM70-NEXT: .reg .b64 %rd<3>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: -; SM70-NEXT: ld.param.u64 %rd1, [relaxed_sys_i16_param_0]; -; SM70-NEXT: ld.param.u16 %rs1, [relaxed_sys_i16_param_1]; -; SM70-NEXT: ld.param.u16 %rs2, [relaxed_sys_i16_param_2]; -; SM70-NEXT: atom.cas.b16 %rs3, [%rd1], %rs1, %rs2; -; SM70-NEXT: cvt.u32.u16 %r1, %rs2; -; SM70-NEXT: st.param.b32 [func_retval0], %r1; +; SM70-NEXT: ld.param.u16 %rs1, [relaxed_sys_i16_param_2]; +; SM70-NEXT: ld.param.u64 %rd2, [relaxed_sys_i16_param_0]; +; SM70-NEXT: and.b64 %rd1, %rd2, -4; +; SM70-NEXT: ld.param.u16 %r9, [relaxed_sys_i16_param_1]; +; SM70-NEXT: cvt.u32.u64 %r10, %rd2; +; SM70-NEXT: and.b32 %r11, %r10, 3; +; SM70-NEXT: shl.b32 %r1, %r11, 3; +; SM70-NEXT: mov.b32 %r12, 65535; +; SM70-NEXT: shl.b32 %r13, %r12, %r1; +; SM70-NEXT: not.b32 %r2, %r13; +; SM70-NEXT: cvt.u32.u16 %r14, %rs1; +; SM70-NEXT: shl.b32 %r3, %r14, %r1; +; SM70-NEXT: shl.b32 %r4, %r9, %r1; +; SM70-NEXT: ld.u32 %r15, [%rd1]; +; SM70-NEXT: and.b32 %r19, %r15, %r2; +; SM70-NEXT: $L__BB1_1: // %partword.cmpxchg.loop +; SM70-NEXT: // =>This Inner Loop Header: Depth=1 +; SM70-NEXT: or.b32 %r16, %r19, %r3; +; SM70-NEXT: or.b32 %r17, %r19, %r4; +; SM70-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; +; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; +; SM70-NEXT: @%p1 bra $L__BB1_3; +; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure +; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1 +; SM70-NEXT: and.b32 %r8, %r7, %r2; +; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; +; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: @%p2 bra $L__BB1_1; +; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end +; SM70-NEXT: st.param.b32 [func_retval0], %r14; ; SM70-NEXT: ret; - %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new seq_cst seq_cst + %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new monotonic monotonic ret i16 %new } @@ -180,7 +206,7 @@ define i32 @relaxed_sys_i32(ptr %addr, i32 %cmp, i32 %new) { ; SM70-NEXT: atom.cas.b32 %r3, [%rd1], %r1, %r2; ; SM70-NEXT: st.param.b32 [func_retval0], %r2; ; SM70-NEXT: ret; - %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst seq_cst + %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new monotonic monotonic ret i32 %new } @@ -209,7 +235,7 @@ define i64 @relaxed_sys_i64(ptr %addr, i64 %cmp, i64 %new) { ; SM70-NEXT: atom.cas.b64 %rd4, [%rd1], %rd2, %rd3; ; SM70-NEXT: st.param.b64 [func_retval0], %rd3; ; SM70-NEXT: ret; - %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst seq_cst + %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new monotonic monotonic ret i64 %new } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: