From 02909a4046fbfffbe4332f796ea2089854c12bab Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Mon, 20 Jan 2025 07:55:40 +0000 Subject: [PATCH 1/6] [compiler-rt] rtsan pipe2 interception for Linux. (#123517) completing fpurge interception for mac too. --- .../lib/rtsan/rtsan_interceptors_posix.cpp | 15 +++++++++++++++ .../rtsan/tests/rtsan_test_interceptors_posix.cpp | 9 +++++++++ 2 files changed, 24 insertions(+) diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp index 7ab54c24a002f..c4cf7791b53cf 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp @@ -302,6 +302,9 @@ INTERCEPTOR(int, fpurge, FILE *stream) { __rtsan_notify_intercepted_call("fpurge"); return REAL(fpurge)(stream); } +#define RTSAN_MAYBE_INTERCEPT_FPURGE INTERCEPT_FUNCTION(fpurge) +#else +#define RTSAN_MAYBE_INTERCEPT_FPURGE #endif INTERCEPTOR(FILE *, fdopen, int fd, const char *mode) { @@ -1031,6 +1034,16 @@ INTERCEPTOR(int, pipe, int pipefd[2]) { return REAL(pipe)(pipefd); } +#if !SANITIZER_APPLE +INTERCEPTOR(int, pipe2, int pipefd[2], int flags) { + __rtsan_notify_intercepted_call("pipe2"); + return REAL(pipe2)(pipefd, flags); +} +#define RTSAN_MAYBE_INTERCEPT_PIPE2 INTERCEPT_FUNCTION(pipe2) +#else +#define RTSAN_MAYBE_INTERCEPT_PIPE2 +#endif + INTERCEPTOR(int, mkfifo, const char *pathname, mode_t mode) { __rtsan_notify_intercepted_call("mkfifo"); return REAL(mkfifo)(pathname, mode); @@ -1133,6 +1146,8 @@ void __rtsan::InitializeInterceptors() { INTERCEPT_FUNCTION(puts); INTERCEPT_FUNCTION(fputs); INTERCEPT_FUNCTION(fflush); + RTSAN_MAYBE_INTERCEPT_FPURGE; + RTSAN_MAYBE_INTERCEPT_PIPE2; INTERCEPT_FUNCTION(fdopen); INTERCEPT_FUNCTION(freopen); RTSAN_MAYBE_INTERCEPT_FOPENCOOKIE; diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp index 0e03b19e80b6c..3a7952efb1cf9 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp @@ -1349,6 +1349,15 @@ TEST(TestRtsanInterceptors, PipeDiesWhenRealtime) { ExpectNonRealtimeSurvival(Func); } +#if !SANITIZER_APPLE +TEST(TestRtsanInterceptors, Pipe2DiesWhenRealtime) { + int fds[2]; + auto Func = [&fds]() { pipe2(fds, O_CLOEXEC); }; + ExpectRealtimeDeath(Func, "pipe2"); + ExpectNonRealtimeSurvival(Func); +} +#endif + #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" TEST(TestRtsanInterceptors, SyscallDiesWhenRealtime) { From 6972788bf3d330b7a6136e2ddd840782882b8dd0 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Mon, 20 Jan 2025 08:55:54 +0100 Subject: [PATCH 2/6] [clang][bytecode] Fix discarding DerivedToBase casts (#123523) --- clang/lib/AST/ByteCode/Compiler.cpp | 6 ++++++ clang/test/AST/ByteCode/records.cpp | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 1a0e5ff45587f..66ab27bdd13da 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -253,6 +253,9 @@ bool Compiler::VisitCastExpr(const CastExpr *CE) { case CK_UncheckedDerivedToBase: case CK_DerivedToBase: { + if (DiscardResult) + return this->discard(SubExpr); + if (!this->delegate(SubExpr)) return false; @@ -282,6 +285,9 @@ bool Compiler::VisitCastExpr(const CastExpr *CE) { } case CK_BaseToDerived: { + if (DiscardResult) + return this->discard(SubExpr); + if (!this->delegate(SubExpr)) return false; diff --git a/clang/test/AST/ByteCode/records.cpp b/clang/test/AST/ByteCode/records.cpp index d329219264d89..9470e7d8e3dcb 100644 --- a/clang/test/AST/ByteCode/records.cpp +++ b/clang/test/AST/ByteCode/records.cpp @@ -1684,3 +1684,18 @@ namespace ExplicitThisInTemporary { constexpr bool g(B b) { return &b == b.p; } static_assert(g({}), ""); } + +namespace IgnoredMemberExpr { + class A { + public: + int a; + }; + class B : public A { + public: + constexpr int foo() { + a; // both-warning {{expression result unused}} + return 0; + } + }; + static_assert(B{}.foo() == 0, ""); +} From 84220eccb6ce5413f9782590b3877bd689c9b43c Mon Sep 17 00:00:00 2001 From: ZhaoQi Date: Mon, 20 Jan 2025 16:11:09 +0800 Subject: [PATCH 3/6] [LoongArch] Add generation support for `preld` instruction (#118436) Instruction `preld` is used to prefetch one cache-line of data from memory in advance into the cache. This commit allows it to be generated automatically. --- .../LoongArch/LoongArchISelDAGToDAG.cpp | 22 ++++++ .../Target/LoongArch/LoongArchISelDAGToDAG.h | 1 + .../LoongArch/LoongArchISelLowering.cpp | 2 + .../Target/LoongArch/LoongArchInstrInfo.td | 9 +++ llvm/test/CodeGen/LoongArch/preld.ll | 67 +++++++++++++++++++ 5 files changed, 101 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/preld.ll diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index d330f95355601..cb0fb9bc9c7f9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -245,6 +245,28 @@ bool LoongArchDAGToDAGISel::selectNonFIBaseAddr(SDValue Addr, SDValue &Base) { return true; } +bool LoongArchDAGToDAGISel::SelectAddrRegImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) { + SDLoc DL(Addr); + MVT VT = Addr.getSimpleValueType(); + + // The address is the result of an ADD. Here we only consider reg+simm12. + if (CurDAG->isBaseWithConstantOffset(Addr)) { + int64_t Imm = cast(Addr.getOperand(1))->getSExtValue(); + if (isInt<12>(Imm)) { + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(SignExtend64<12>(Imm), DL, VT); + return true; + } + } + + // Otherwise, we assume Addr as the base address and use constant 0 as the + // offset. + Base = Addr; + Offset = CurDAG->getTargetConstant(0, DL, VT); + return true; +} + bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt) { // Shift instructions on LoongArch only read the lower 5 or 6 bits of the diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h index 363b4f0ca7cf0..8a7eba418d804 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -43,6 +43,7 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel { bool SelectBaseAddr(SDValue Addr, SDValue &Base); bool SelectAddrConstant(SDValue Addr, SDValue &Base, SDValue &Offset); bool selectNonFIBaseAddr(SDValue Addr, SDValue &Base); + bool SelectAddrRegImm12(SDValue Addr, SDValue &Base, SDValue &Offset); bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 96e6f71344a78..2417455808751 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -99,6 +99,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + // Expand bitreverse.i16 with native-width bitrev and shift for now, before // we get to know which of sll and revb.2h is faster. setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 2101aa058305f..62cb6fa1d88a8 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -522,6 +522,7 @@ def HI16ForAddu16idAddiPair: SDNodeXForm; def AddrConstant : ComplexPattern; def NonFIBaseAddr : ComplexPattern; +def AddrRegImm : ComplexPattern; def fma_nsz : PatFrag<(ops node:$fj, node:$fk, node:$fa), (fma node:$fj, node:$fk, node:$fa), [{ @@ -2011,6 +2012,14 @@ class PseudoMaskedAMMinMax def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMMinMax; def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; +// Data prefetch + +// TODO: Supports for preldx instruction. +def : Pat<(prefetch (AddrRegImm GPR:$rj, simm12:$imm12), (i32 0), timm, (i32 1)), + (PRELD 0, GPR:$rj, simm12:$imm12)>; // data prefetch for loads +def : Pat<(prefetch (AddrRegImm GPR:$rj, simm12:$imm12), (i32 1), timm, (i32 1)), + (PRELD 8, GPR:$rj, simm12:$imm12)>; // data prefetch for stores + /// Compare and exchange class PseudoCmpXchg diff --git a/llvm/test/CodeGen/LoongArch/preld.ll b/llvm/test/CodeGen/LoongArch/preld.ll new file mode 100644 index 0000000000000..18057ac871f75 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/preld.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +declare void @llvm.prefetch(ptr, i32, i32, i32) + +define void @load_prefetch_no_offset(ptr %a) { +; LA32-LABEL: load_prefetch_no_offset: +; LA32: # %bb.0: # %entry +; LA32-NEXT: preld 0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_prefetch_no_offset: +; LA64: # %bb.0: # %entry +; LA64-NEXT: preld 0, $a0, 0 +; LA64-NEXT: ret +entry: + call void @llvm.prefetch(ptr %a, i32 0, i32 3, i32 1) + ret void +} + +define void @store_prefetch_no_offset(ptr %a) { +; LA32-LABEL: store_prefetch_no_offset: +; LA32: # %bb.0: # %entry +; LA32-NEXT: preld 8, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_prefetch_no_offset: +; LA64: # %bb.0: # %entry +; LA64-NEXT: preld 8, $a0, 0 +; LA64-NEXT: ret +entry: + call void @llvm.prefetch(ptr %a, i32 1, i32 3, i32 1) + ret void +} + +define void @load_prefetch_with_offset(ptr %a) { +; LA32-LABEL: load_prefetch_with_offset: +; LA32: # %bb.0: # %entry +; LA32-NEXT: preld 0, $a0, 200 +; LA32-NEXT: ret +; +; LA64-LABEL: load_prefetch_with_offset: +; LA64: # %bb.0: # %entry +; LA64-NEXT: preld 0, $a0, 200 +; LA64-NEXT: ret +entry: + %addr = getelementptr i8, ptr %a, i64 200 + call void @llvm.prefetch(ptr %addr, i32 0, i32 3, i32 1) + ret void +} + +define void @store_prefetch_with_offset(ptr %a) { +; LA32-LABEL: store_prefetch_with_offset: +; LA32: # %bb.0: # %entry +; LA32-NEXT: preld 8, $a0, 200 +; LA32-NEXT: ret +; +; LA64-LABEL: store_prefetch_with_offset: +; LA64: # %bb.0: # %entry +; LA64-NEXT: preld 8, $a0, 200 +; LA64-NEXT: ret +entry: + %addr = getelementptr i8, ptr %a, i64 200 + call void @llvm.prefetch(ptr %addr, i32 1, i32 3, i32 1) + ret void +} From 18d5d84d761d9f6c12dcfd3d23a965203cd5f886 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Mon, 20 Jan 2025 08:11:33 +0000 Subject: [PATCH 4/6] [compiler-rt][rtsan] intercept getpeername/recvmmsg/sendmmsg (#123484) --- .../lib/rtsan/rtsan_interceptors_posix.cpp | 36 +++++++++++++++++++ .../tests/rtsan_test_interceptors_posix.cpp | 28 +++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp index c4cf7791b53cf..34c2d4cb37fd0 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp @@ -843,6 +843,17 @@ INTERCEPTOR(int, getsockname, int socket, struct sockaddr *sa, #define RTSAN_MAYBE_INTERCEPT_GETSOCKNAME #endif +#if SANITIZER_INTERCEPT_GETPEERNAME +INTERCEPTOR(int, getpeername, int socket, struct sockaddr *sa, + socklen_t *salen) { + __rtsan_notify_intercepted_call("getpeername"); + return REAL(getpeername)(socket, sa, salen); +} +#define RTSAN_MAYBE_INTERCEPT_GETPEERNAME INTERCEPT_FUNCTION(getpeername) +#else +#define RTSAN_MAYBE_INTERCEPT_GETPEERNAME +#endif + INTERCEPTOR(int, bind, int socket, const struct sockaddr *address, socklen_t address_len) { __rtsan_notify_intercepted_call("bind"); @@ -882,6 +893,17 @@ INTERCEPTOR(ssize_t, sendmsg, int socket, const struct msghdr *message, return REAL(sendmsg)(socket, message, flags); } +#if SANITIZER_INTERCEPT_SENDMMSG +INTERCEPTOR(int, sendmmsg, int socket, struct mmsghdr *message, + unsigned int len, int flags) { + __rtsan_notify_intercepted_call("sendmmsg"); + return REAL(sendmmsg)(socket, message, len, flags); +} +#define RTSAN_MAYBE_INTERCEPT_SENDMMSG INTERCEPT_FUNCTION(sendmmsg) +#else +#define RTSAN_MAYBE_INTERCEPT_SENDMMSG +#endif + INTERCEPTOR(ssize_t, sendto, int socket, const void *buffer, size_t length, int flags, const struct sockaddr *dest_addr, socklen_t dest_len) { __rtsan_notify_intercepted_call("sendto"); @@ -904,6 +926,17 @@ INTERCEPTOR(ssize_t, recvmsg, int socket, struct msghdr *message, int flags) { return REAL(recvmsg)(socket, message, flags); } +#if SANITIZER_INTERCEPT_RECVMMSG +INTERCEPTOR(int, recvmmsg, int socket, struct mmsghdr *message, + unsigned int len, int flags, struct timespec *timeout) { + __rtsan_notify_intercepted_call("recvmmsg"); + return REAL(recvmmsg)(socket, message, len, flags, timeout); +} +#define RTSAN_MAYBE_INTERCEPT_RECVMMSG INTERCEPT_FUNCTION(recvmmsg) +#else +#define RTSAN_MAYBE_INTERCEPT_RECVMMSG +#endif + INTERCEPTOR(int, shutdown, int socket, int how) { __rtsan_notify_intercepted_call("shutdown"); return REAL(shutdown)(socket, how); @@ -1209,13 +1242,16 @@ void __rtsan::InitializeInterceptors() { INTERCEPT_FUNCTION(recv); INTERCEPT_FUNCTION(recvfrom); INTERCEPT_FUNCTION(recvmsg); + RTSAN_MAYBE_INTERCEPT_RECVMMSG; INTERCEPT_FUNCTION(send); INTERCEPT_FUNCTION(sendmsg); + RTSAN_MAYBE_INTERCEPT_SENDMMSG; INTERCEPT_FUNCTION(sendto); INTERCEPT_FUNCTION(shutdown); INTERCEPT_FUNCTION(socket); RTSAN_MAYBE_INTERCEPT_ACCEPT4; RTSAN_MAYBE_INTERCEPT_GETSOCKNAME; + RTSAN_MAYBE_INTERCEPT_GETPEERNAME; RTSAN_MAYBE_INTERCEPT_SELECT; INTERCEPT_FUNCTION(pselect); diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp index 3a7952efb1cf9..c858a5a771fe4 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp @@ -1118,6 +1118,15 @@ TEST(TestRtsanInterceptors, SendmsgToASocketDiesWhenRealtime) { ExpectNonRealtimeSurvival(Func); } +#if SANITIZER_INTERCEPT_SENDMMSG +TEST(TestRtsanInterceptors, SendmmsgOnASocketDiesWhenRealtime) { + mmsghdr msg{}; + auto Func = [&]() { sendmmsg(0, &msg, 0, 0); }; + ExpectRealtimeDeath(Func, "sendmmsg"); + ExpectNonRealtimeSurvival(Func); +} +#endif + TEST(TestRtsanInterceptors, SendtoToASocketDiesWhenRealtime) { sockaddr addr{}; socklen_t len{}; @@ -1147,6 +1156,15 @@ TEST(TestRtsanInterceptors, RecvmsgOnASocketDiesWhenRealtime) { ExpectNonRealtimeSurvival(Func); } +#if SANITIZER_INTERCEPT_RECVMMSG +TEST(TestRtsanInterceptors, RecvmmsgOnASocketDiesWhenRealtime) { + mmsghdr msg{}; + auto Func = [&]() { recvmmsg(0, &msg, 0, 0, nullptr); }; + ExpectRealtimeDeath(Func, "recvmmsg"); + ExpectNonRealtimeSurvival(Func); +} +#endif + TEST(TestRtsanInterceptors, ShutdownOnASocketDiesWhenRealtime) { auto Func = [&]() { shutdown(0, 0); }; ExpectRealtimeDeath(Func, "shutdown"); @@ -1163,6 +1181,16 @@ TEST(TestRtsanInterceptors, GetsocknameOnASocketDiesWhenRealtime) { } #endif +#if SANITIZER_INTERCEPT_GETPEERNAME +TEST(TestRtsanInterceptors, GetpeernameOnASocketDiesWhenRealtime) { + sockaddr addr{}; + socklen_t len{}; + auto Func = [&]() { getpeername(0, &addr, &len); }; + ExpectRealtimeDeath(Func, "getpeername"); + ExpectNonRealtimeSurvival(Func); +} +#endif + /* I/O Multiplexing */ From ca4886bf96f0b6dcc151c03bd8c7df414f3f659b Mon Sep 17 00:00:00 2001 From: ZhaoQi Date: Mon, 20 Jan 2025 16:20:15 +0800 Subject: [PATCH 5/6] [LoongArch] Impl TTI hooks for LoongArch to support LoopDataPrefetch pass (#118437) Inspired by https://reviews.llvm.org/D146600, this commit adds some TTI hooks for LoongArch to make LoopDataPrefetch pass really work. Including: - `getCacheLineSize()`: 64 for loongarch64. - `getPrefetchDistance()`: After testing SPEC CPU 2017, improvements taken by prefetching are more obvious when set PrefetchDistance to 200(results shown blow), although different benchmarks fit for different best choice. - `enableWritePrefetching()`: store prefetch is supported by LoongArch, so set WritePrefetching to true in default. - `getMinPrefetchStride()` and `getMaxPrefetchIterationsAhead()` still use default values: 1 and UINT_MAX, so not override them. After this commit, the test added by https://reviews.llvm.org/D146600 can generate llvm.prefetch intrinsic IR correctly. Results of spec2017rate benchmarks (testing date: ref, copies: 1): - For all C/C++ benchmarks, compared to O3+novec/lsx/lasx, prefetch can bring about -1.58%/0.31%/0.07% performance improvement for int benchmarks and 3.26%/3.73%/3.78% improvement for floating point benchmarks. (Only O3+novec+prefetch decreases when testing intrate.) - But prefetch results in performance reduction almost for every Fortran benchmark compiled by flang. While considering all C/C++/Fortran benchmarks, prefetch performance will decrease about 1% ~ 5%. FIXME: Keep `loongarch-enable-loop-data-prefetch` option default to false for now due to the bad effect for Fortran. --- .../LoongArchTargetTransformInfo.cpp | 6 ++++ .../LoongArch/LoongArchTargetTransformInfo.h | 4 +++ .../LoopDataPrefetch/LoongArch/basic.ll | 33 +++++++++++++++---- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp index 5fbc7c734168d..cbc9c3f3beca0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -89,4 +89,10 @@ LoongArchTTIImpl::getPopcntSupport(unsigned TyWidth) { return ST->hasExtLSX() ? TTI::PSK_FastHardware : TTI::PSK_Software; } +unsigned LoongArchTTIImpl::getCacheLineSize() const { return 64; } + +unsigned LoongArchTTIImpl::getPrefetchDistance() const { return 200; } + +bool LoongArchTTIImpl::enableWritePrefetching() const { return true; } + // TODO: Implement more hooks to provide TTI machinery for LoongArch. diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h index f7ce75173be20..b3edf131c584c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h @@ -47,6 +47,10 @@ class LoongArchTTIImpl : public BasicTTIImplBase { const char *getRegisterClassName(unsigned ClassID) const; TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + unsigned getCacheLineSize() const override; + unsigned getPrefetchDistance() const override; + bool enableWritePrefetching() const override; + // TODO: Implement more hooks to provide TTI machinery for LoongArch. }; diff --git a/llvm/test/Transforms/LoopDataPrefetch/LoongArch/basic.ll b/llvm/test/Transforms/LoopDataPrefetch/LoongArch/basic.ll index 8553171ac68ac..0313bbd883287 100644 --- a/llvm/test/Transforms/LoopDataPrefetch/LoongArch/basic.ll +++ b/llvm/test/Transforms/LoopDataPrefetch/LoongArch/basic.ll @@ -1,16 +1,38 @@ -;; Tag this 'XFAIL' because we need a few more TTIs and ISels. -; XFAIL: * -; RUN: opt --mtriple=loongarch64 -mattr=+d --passes=loop-data-prefetch -loongarch-enable-loop-data-prefetch -S < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt --mtriple=loongarch64 -mattr=+d --passes=loop-data-prefetch -S < %s | FileCheck %s define void @foo(ptr %a, ptr %b) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 200 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 200 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: call void @llvm.prefetch.p0(ptr [[SCEVGEP]], i32 0, i32 3, i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP4]], 1.000000e+00 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: call void @llvm.prefetch.p0(ptr [[SCEVGEP1]], i32 1, i32 3, i32 1) +; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1600 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; entry: br label %for.body -; CHECK: for.body: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv -; CHECK: call void @llvm.prefetch %0 = load double, ptr %arrayidx, align 8 %add = fadd double %0, 1.000000e+00 %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv @@ -19,7 +41,6 @@ for.body: ; preds = %for.body, %entry %exitcond = icmp eq i64 %indvars.iv.next, 1600 br i1 %exitcond, label %for.end, label %for.body -; CHECK: for.end: for.end: ; preds = %for.body ret void } From a6bb8a707c4fb7c0953cfd8ae6c5aeb4f3a1feb8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 20 Jan 2025 09:21:50 +0100 Subject: [PATCH 6/6] [MLIR] Add missing include (NFC) Needed for libstdc++ 15 compatibility. --- mlir/include/mlir/Target/SPIRV/Serialization.h | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/include/mlir/Target/SPIRV/Serialization.h b/mlir/include/mlir/Target/SPIRV/Serialization.h index 613f0a423f9f8..225777e25d607 100644 --- a/mlir/include/mlir/Target/SPIRV/Serialization.h +++ b/mlir/include/mlir/Target/SPIRV/Serialization.h @@ -14,6 +14,7 @@ #define MLIR_TARGET_SPIRV_SERIALIZATION_H #include "mlir/Support/LLVM.h" +#include namespace mlir { class MLIRContext;