diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake index 361538a58e47c..21efa2251219d 100644 --- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake +++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake @@ -158,11 +158,11 @@ function(add_compiler_rt_runtime name type) set(libnames) # Until we support this some other way, build compiler-rt runtime without LTO # to allow non-LTO projects to link with it. - if(COMPILER_RT_HAS_FNO_LTO_FLAG) - set(NO_LTO_FLAGS "-fno-lto") - else() - set(NO_LTO_FLAGS "") - endif() + # if(COMPILER_RT_HAS_FNO_LTO_FLAG) + # set(NO_LTO_FLAGS "-fno-lto") + # else() + # set(NO_LTO_FLAGS "") + # endif() # By default do not instrument or use profdata for compiler-rt. set(NO_PGO_FLAGS "") diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index ad3b98799c5c9..f2caf3650eab0 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -38,6 +38,7 @@ asm(\"cas w0, w1, [x2]\"); set(ARM64 aarch64) set(ARM32 arm armhf armv6m armv7m armv7em armv7 armv7s armv7k) +set(DPU dpu) set(HEXAGON hexagon) set(X86 i386) set(X86_64 x86_64) @@ -60,7 +61,7 @@ if(APPLE) endif() set(ALL_BUILTIN_SUPPORTED_ARCH - ${X86} ${X86_64} ${ARM32} ${ARM64} + ${X86} ${X86_64} ${ARM32} ${ARM64} ${DPU} ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64} ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} ${WASM32} ${WASM64} ${VE}) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 73b6bead84245..b2f2adf0c88d0 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -94,13 +94,13 @@ set(GENERIC_SOURCES ffssi2.c ffsti2.c fixdfdi.c - fixdfsi.c + # fixdfsi.c fixdfti.c fixsfdi.c fixsfsi.c fixsfti.c fixunsdfdi.c - fixunsdfsi.c + # fixunsdfsi.c fixunsdfti.c fixunssfdi.c fixunssfsi.c @@ -263,6 +263,11 @@ if (NOT MSVC) ) endif () +set(dpu_SOURCES + dpu/udivsi3.c + ${GENERIC_SOURCES} + ) + # Implement extended-precision builtins, assuming long double is 80 bits. # long double is not 80 bits on Android or MSVC. set(x86_80_BIT_SOURCES diff --git a/compiler-rt/lib/builtins/dpu/udiv32_stepper.c b/compiler-rt/lib/builtins/dpu/udiv32_stepper.c new file mode 100644 index 0000000000000..ce4a1960ad5bf --- /dev/null +++ b/compiler-rt/lib/builtins/dpu/udiv32_stepper.c @@ -0,0 +1,67 @@ +/* Copyright 2024 UPMEM. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include + +uint64_t __attribute__((used)) __udiv32(uint32_t dividend, uint32_t divider) +{ + uint64_t dest; + + uint32_t temp0; + uint32_t temp1; + + /* clang-format off */ + __asm__ volatile(" clz %[temp0], %[divider], max, 1f\n" // %[temp0] = by how many the divider can be shifted on 32-bit + " clz %[temp1], %[dividend]\n" // %[temp1] = number of useless bits of the dividend + " sub %[temp0], %[temp1], %[temp0], gtu, 2f\n" // %[temp0] = the maximal shift to be done + " move %[temp1], %[divider]\n" + " move.u %[dest], %[dividend]\n" + " jump %[temp0], 3f\n" // As we will jump backward relatively to label 3 forward + " div_step %[dest], %[temp1], %[dest], 31\n" + " div_step %[dest], %[temp1], %[dest], 30\n" + " div_step %[dest], %[temp1], %[dest], 29\n" + " div_step %[dest], %[temp1], %[dest], 28\n" + " div_step %[dest], %[temp1], %[dest], 27\n" + " div_step %[dest], %[temp1], %[dest], 26\n" + " div_step %[dest], %[temp1], %[dest], 25\n" + " div_step %[dest], %[temp1], %[dest], 24\n" + " div_step %[dest], %[temp1], %[dest], 23\n" + " div_step %[dest], %[temp1], %[dest], 22\n" + " div_step %[dest], %[temp1], %[dest], 21\n" + " div_step %[dest], %[temp1], %[dest], 20\n" + " div_step %[dest], %[temp1], %[dest], 19\n" + " div_step %[dest], %[temp1], %[dest], 18\n" + " div_step %[dest], %[temp1], %[dest], 17\n" + " div_step %[dest], %[temp1], %[dest], 16\n" + " div_step %[dest], %[temp1], %[dest], 15\n" + " div_step %[dest], %[temp1], %[dest], 14\n" + " div_step %[dest], %[temp1], %[dest], 13\n" + " div_step %[dest], %[temp1], %[dest], 12\n" + " div_step %[dest], %[temp1], %[dest], 11\n" + " div_step %[dest], %[temp1], %[dest], 10\n" + " div_step %[dest], %[temp1], %[dest], 9\n" + " div_step %[dest], %[temp1], %[dest], 8\n" + " div_step %[dest], %[temp1], %[dest], 7\n" + " div_step %[dest], %[temp1], %[dest], 6\n" + " div_step %[dest], %[temp1], %[dest], 5\n" + " div_step %[dest], %[temp1], %[dest], 4\n" + " div_step %[dest], %[temp1], %[dest], 3\n" + " div_step %[dest], %[temp1], %[dest], 2\n" + " div_step %[dest], %[temp1], %[dest], 1\n" + "3:\n" + " div_step %[dest], %[temp1], %[dest], 0\n" + "4:\n" + " jump 5f\n" + "2:\n" + " move.u %[dest], %[dividend], true, 4b\n" + "1:\n" + " fault 2\n" + "5:\n" + : [dest] "=r"(dest), [temp0] "=&r" (temp0), [temp1] "=&r" (temp1) + : [dividend] "r"(dividend), [divider] "r"(divider)); + /* clang-format on */ + + return dest; +} diff --git a/compiler-rt/lib/builtins/dpu/udivsi3.c b/compiler-rt/lib/builtins/dpu/udivsi3.c new file mode 100644 index 0000000000000..299208b0a5a29 --- /dev/null +++ b/compiler-rt/lib/builtins/dpu/udivsi3.c @@ -0,0 +1,15 @@ +#include "../int_lib.h" + +typedef su_int fixuint_t; +typedef si_int fixint_t; + +/* extern void uint64_t __attribute__((used)) __udiv32(uint32_t dividend, uint32_t divider) */ +#include "udiv32_stepper.c" + +// Returns: a / b + +COMPILER_RT_ABI su_int __udivsi3(su_int a, su_int b) { + uint64_t res = __udiv32(a, b); + return (su_int) (res >> 32); + /* return 42; */ +} diff --git a/llvm/lib/Target/DPU/DPUTargetLowering.cpp b/llvm/lib/Target/DPU/DPUTargetLowering.cpp index 95ed30c7086ec..74801610df51c 100644 --- a/llvm/lib/Target/DPU/DPUTargetLowering.cpp +++ b/llvm/lib/Target/DPU/DPUTargetLowering.cpp @@ -89,10 +89,10 @@ DPUTargetLowering::DPUTargetLowering(const TargetMachine &TM, DPUSubtarget &STI) PredictableSelectIsExpensive = true; setJumpIsExpensive(false); - setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); - setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); - setLibcallName(RTLIB::SDIV_I32, "__div32"); - setLibcallName(RTLIB::UDIV_I32, "__udiv32"); + // setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); + // setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); + // setLibcallName(RTLIB::SDIV_I32, "__div32"); + // setLibcallName(RTLIB::UDIV_I32, "__udiv32"); // Set up the register classes. addRegisterClass(MVT::i32, &DPU::GP_REGRegClass); diff --git a/llvm/lib/Target/DPU/MCTargetDesc/DPUMCCodeEmitter.cpp b/llvm/lib/Target/DPU/MCTargetDesc/DPUMCCodeEmitter.cpp index 4eaaec5755b07..b7acf108155c5 100644 --- a/llvm/lib/Target/DPU/MCTargetDesc/DPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/DPU/MCTargetDesc/DPUMCCodeEmitter.cpp @@ -36,6 +36,9 @@ void DPUMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { // Get instruction encoding and emit it + // MI.dump(); + MI.dump_pretty(dbgs()); + dbgs() << "\n"; uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); // Emit bytes in little-endian