diff --git a/openmp/README.rst b/openmp/README.rst index 2dfc8630858b8..52c4787eeef1c 100644 --- a/openmp/README.rst +++ b/openmp/README.rst @@ -141,7 +141,7 @@ Options for all Libraries Options for ``libomp`` ---------------------- -**LIBOMP_ARCH** = ``aarch64|aarch64_32|arm|i386|loongarch64|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64|s390x`` +**LIBOMP_ARCH** = ``aarch64|aarch64_32|arm|i386|loongarch64|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv32|riscv64|s390x`` The default value for this option is chosen based on probing the compiler for architecture macros (e.g., is ``__x86_64__`` predefined by compiler?). @@ -198,7 +198,7 @@ Optional Features **LIBOMP_OMPT_SUPPORT** = ``ON|OFF`` Include support for the OpenMP Tools Interface (OMPT). This option is supported and ``ON`` by default for x86, x86_64, AArch64, - PPC64, RISCV64, LoongArch64, and s390x on Linux* and macOS*. + PPC64, RISCV32, RISCV64, LoongArch64, and s390x on Linux* and macOS*. This option is ``OFF`` if this feature is not supported for the platform. **LIBOMP_OMPT_OPTIONAL** = ``ON|OFF`` diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index bcae02eba6a59..d3bc494369eee 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD}) # If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake libomp_get_architecture(LIBOMP_DETECTED_ARCH) set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING - "The architecture to build for (x86_64/i386/arm/ppc/ppc64/ppc64le/aarch64/aarch64_32/mic/mips/mips64/riscv64/loongarch64/ve/s390x/wasm32).") + "The architecture to build for (x86_64/i386/arm/ppc/ppc64/ppc64le/aarch64/aarch64_32/mic/mips/mips64/riscv32/riscv64/loongarch64/ve/s390x/wasm32).") # Should assertions be enabled? They are on by default. set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL "enable assertions?") @@ -63,6 +63,8 @@ else() # Part of LLVM build set(LIBOMP_ARCH aarch64) elseif(LIBOMP_NATIVE_ARCH MATCHES "arm") set(LIBOMP_ARCH arm) + elseif(LIBOMP_NATIVE_ARCH MATCHES "riscv32") + set(LIBOMP_ARCH riscv32) elseif(LIBOMP_NATIVE_ARCH MATCHES "riscv64") set(LIBOMP_ARCH riscv64) elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64") @@ -93,7 +95,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64") endif() endif() -libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc ppc64 ppc64le aarch64 aarch64_32 aarch64_a64fx mic mips mips64 riscv64 loongarch64 ve s390x wasm32) +libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc ppc64 ppc64le aarch64 aarch64_32 aarch64_a64fx mic mips mips64 riscv32 riscv64 loongarch64 ve s390x wasm32) set(LIBOMP_LIB_TYPE normal CACHE STRING "Performance,Profiling,Stubs library (normal/profile/stubs)") @@ -180,6 +182,7 @@ set(PPC64 FALSE) set(MIC FALSE) set(MIPS64 FALSE) set(MIPS FALSE) +set(RISCV32 FALSE) set(RISCV64 FALSE) set(LOONGARCH64 FALSE) set(VE FALSE) @@ -212,6 +215,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "mips") # MIPS architecture set(MIPS TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "mips64") # MIPS64 architecture set(MIPS64 TRUE) +elseif("${LIBOMP_ARCH}" STREQUAL "riscv32") # RISCV32 architecture + set(RISCV32 TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture set(RISCV64 TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture diff --git a/openmp/runtime/README.txt b/openmp/runtime/README.txt index ddd8b0e4282d8..4e478b87c0dd3 100644 --- a/openmp/runtime/README.txt +++ b/openmp/runtime/README.txt @@ -53,7 +53,8 @@ Architectures Supported * IBM(R) Power architecture (big endian) * IBM(R) Power architecture (little endian) * MIPS and MIPS64 architecture -* RISCV64 architecture +* RISC-V 32 bits architecture +* RISC-V 64 bits architecture * LoongArch64 architecture Supported RTL Build Configurations diff --git a/openmp/runtime/cmake/LibompGetArchitecture.cmake b/openmp/runtime/cmake/LibompGetArchitecture.cmake index 2d5c6622c9f7d..5d9accdf115ba 100644 --- a/openmp/runtime/cmake/LibompGetArchitecture.cmake +++ b/openmp/runtime/cmake/LibompGetArchitecture.cmake @@ -49,6 +49,8 @@ function(libomp_get_architecture return_arch) #error ARCHITECTURE=mips64 #elif defined(__mips__) && !defined(__mips64) #error ARCHITECTURE=mips + #elif defined(__riscv) && __riscv_xlen == 32 + #error ARCHITECTURE=riscv32 #elif defined(__riscv) && __riscv_xlen == 64 #error ARCHITECTURE=riscv64 #elif defined(__loongarch__) && __loongarch_grlen == 64 diff --git a/openmp/runtime/cmake/LibompMicroTests.cmake b/openmp/runtime/cmake/LibompMicroTests.cmake index 0d48246cb6ec5..b9843712c10bd 100644 --- a/openmp/runtime/cmake/LibompMicroTests.cmake +++ b/openmp/runtime/cmake/LibompMicroTests.cmake @@ -208,7 +208,7 @@ else() elseif(${MIPS} OR ${MIPS64}) libomp_append(libomp_expected_library_deps libc.so.6) libomp_append(libomp_expected_library_deps ld.so.1) - elseif(${RISCV64}) + elseif(${RISCV32} OR ${RISCV64}) libomp_append(libomp_expected_library_deps libc.so.6) libomp_append(libomp_expected_library_deps ld.so.1) elseif(${LOONGARCH64}) diff --git a/openmp/runtime/cmake/LibompUtils.cmake b/openmp/runtime/cmake/LibompUtils.cmake index 58b93b384a76a..5111a2c96ed78 100644 --- a/openmp/runtime/cmake/LibompUtils.cmake +++ b/openmp/runtime/cmake/LibompUtils.cmake @@ -109,6 +109,8 @@ function(libomp_get_legal_arch return_arch_string) set(${return_arch_string} "MIPS" PARENT_SCOPE) elseif(${MIPS64}) set(${return_arch_string} "MIPS64" PARENT_SCOPE) + elseif(${RISCV32}) + set(${return_arch_string} "RISCV32" PARENT_SCOPE) elseif(${RISCV64}) set(${return_arch_string} "RISCV64" PARENT_SCOPE) elseif(${LOONGARCH64}) diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake index ac2bbb902161e..77014009d9c82 100644 --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -314,6 +314,7 @@ else() (LIBOMP_ARCH STREQUAL aarch64_a64fx) OR (LIBOMP_ARCH STREQUAL ppc64le) OR (LIBOMP_ARCH STREQUAL ppc64) OR + (LIBOMP_ARCH STREQUAL riscv32) OR (LIBOMP_ARCH STREQUAL riscv64) OR (LIBOMP_ARCH STREQUAL loongarch64) OR (LIBOMP_ARCH STREQUAL s390x)) diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h index 9ab2c0cc70d8c..7a1e5ee328bd8 100644 --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -278,7 +278,7 @@ class KMPHwlocAffinity : public KMPAffinity { #elif __NR_sched_getaffinity != 123 #error Wrong code for getaffinity system call. #endif /* __NR_sched_getaffinity */ -#elif KMP_ARCH_RISCV64 +#elif KMP_ARCH_RISCV64 || KMP_ARCH_RISCV32 #ifndef __NR_sched_setaffinity #define __NR_sched_setaffinity 122 #elif __NR_sched_setaffinity != 122 diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index 2252f5e7e97a7..f83eeaccc11fd 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -179,7 +179,7 @@ typedef unsigned long long kmp_uint64; #endif /* KMP_OS_UNIX */ #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM || \ - KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 + KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || KMP_ARCH_RISCV32 #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ @@ -1050,8 +1050,9 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); #endif /* KMP_OS_WINDOWS */ #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ - KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ - KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV32 || KMP_ARCH_RISCV64 || \ + KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC || \ + KMP_ARCH_AARCH64_32 #if KMP_OS_WINDOWS #undef KMP_MB #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst) diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h index 9c2215140467d..805d4f28fbac9 100644 --- a/openmp/runtime/src/kmp_platform.h +++ b/openmp/runtime/src/kmp_platform.h @@ -118,6 +118,7 @@ #define KMP_ARCH_PPC_XCOFF 0 #define KMP_ARCH_MIPS 0 #define KMP_ARCH_MIPS64 0 +#define KMP_ARCH_RISCV32 0 #define KMP_ARCH_RISCV64 0 #define KMP_ARCH_LOONGARCH64 0 #define KMP_ARCH_VE 0 @@ -178,6 +179,9 @@ #undef KMP_ARCH_MIPS #define KMP_ARCH_MIPS 1 #endif +#elif defined __riscv && __riscv_xlen == 32 +#undef KMP_ARCH_RISCV32 +#define KMP_ARCH_RISCV32 1 #elif defined __riscv && __riscv_xlen == 64 #undef KMP_ARCH_RISCV64 #define KMP_ARCH_RISCV64 1 @@ -263,8 +267,8 @@ // TODO: Fixme - This is clever, but really fugly #if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \ - KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64 + KMP_ARCH_VE + \ - KMP_ARCH_S390X + KMP_ARCH_WASM + KMP_ARCH_PPC + \ + KMP_ARCH_RISCV32 + KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64 + \ + KMP_ARCH_VE + KMP_ARCH_S390X + KMP_ARCH_WASM + KMP_ARCH_PPC + \ KMP_ARCH_AARCH64_32) #error Unknown or unsupported architecture #endif diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 2c8d9304c46bc..83235819f94b9 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -8947,8 +8947,8 @@ __kmp_determine_reduction_method( int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ - KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ - KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV32 || KMP_ARCH_RISCV64 || \ + KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \ diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S index cc5344cdd124a..c253695c4e7c9 100644 --- a/openmp/runtime/src/z_Linux_asm.S +++ b/openmp/runtime/src/z_Linux_asm.S @@ -1788,7 +1788,17 @@ __kmp_invoke_microtask: #endif /* KMP_ARCH_PPC64 */ -#if KMP_ARCH_RISCV64 +#if KMP_ARCH_RISCV32 || KMP_ARCH_RISCV64 + +#if KMP_ARCH_RISCV32 +#define REG_L lw +#define REG_S sw +#define SZREG 4 +#else +#define REG_L ld +#define REG_S sd +#define SZREG 8 +#endif //------------------------------------------------------------------------ // @@ -1846,24 +1856,24 @@ __kmp_invoke_microtask: .cfi_startproc // First, save ra and fp - addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) - addi fp, sp, 16 + addi sp, sp, -2*SZREG + REG_S ra, 1*SZREG(sp) + REG_S fp, 0*SZREG(sp) + addi fp, sp, 2*SZREG .cfi_def_cfa fp, 0 - .cfi_offset ra, -8 - .cfi_offset fp, -16 + .cfi_offset ra, -1*SZREG + .cfi_offset fp, -2*SZREG // Compute the dynamic stack size: // // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by // reference - // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' - // function by register. Given that we have 8 of such registers (a[0-7]) - // and two + 'argc' arguments (consider >id and &tid), we need to - // reserve max(0, argc - 6)*8 extra bytes + // - We need XLenBytes (4 for riscv32 and 8 for riscv64) bytes for each argument + // that cannot be passed to the 'pkfn' function by register. Given that we have + // 8 of such registers (a[0-7]) and two + 'argc' arguments (consider >id and + // &tid), we need to reserve max(0, argc - 6)*XLenBytes extra bytes // - // The total number of bytes is then max(0, argc - 6)*8 + 8 + // The total number of bytes is then max(0, argc - 6)*XLenBytes + 8 // Compute max(0, argc - 6) using the following bithack: // max(0, x) = x - (x & (x >> 31)), where x := argc - 6 @@ -1874,8 +1884,11 @@ __kmp_invoke_microtask: sub t0, t0, t1 addi t0, t0, 1 - +#if KMP_ARCH_RISCV32 + slli t0, t0, 2 +#else slli t0, t0, 3 +#endif sub sp, sp, t0 // Align the stack to 16 bytes @@ -1887,7 +1900,7 @@ __kmp_invoke_microtask: #if OMPT_SUPPORT // Save frame pointer into exit_frame - sd fp, 0(a5) + REG_S fp, 0(a5) #endif // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) @@ -1899,37 +1912,37 @@ __kmp_invoke_microtask: addi a1, fp, __tid beqz t3, .L_kmp_3 - ld a2, 0(t4) + REG_L a2, 0*SZREG(t4) addi t3, t3, -1 beqz t3, .L_kmp_3 - ld a3, 8(t4) + REG_L a3, 1*SZREG(t4) addi t3, t3, -1 beqz t3, .L_kmp_3 - ld a4, 16(t4) + REG_L a4, 2*SZREG(t4) addi t3, t3, -1 beqz t3, .L_kmp_3 - ld a5, 24(t4) + REG_L a5, 3*SZREG(t4) addi t3, t3, -1 beqz t3, .L_kmp_3 - ld a6, 32(t4) + REG_L a6, 4*SZREG(t4) addi t3, t3, -1 beqz t3, .L_kmp_3 - ld a7, 40(t4) + REG_L a7, 5*SZREG(t4) // Prepare any additional argument passed through the stack - addi t4, t4, 48 + addi t4, t4, 6*SZREG mv t1, sp j .L_kmp_2 .L_kmp_1: - ld t2, 0(t4) - sd t2, 0(t1) - addi t4, t4, 8 - addi t1, t1, 8 + REG_L t2, 0(t4) + REG_S t2, 0(t1) + addi t4, t4, SZREG + addi t1, t1, SZREG .L_kmp_2: addi t3, t3, -1 bnez t3, .L_kmp_1 @@ -1942,10 +1955,10 @@ __kmp_invoke_microtask: addi a0, zero, 1 - addi sp, fp, -16 - ld fp, 0(sp) - ld ra, 8(sp) - addi sp, sp, 16 + addi sp, fp, -2*SZREG + REG_L fp, 0*SZREG(sp) + REG_L ra, 1*SZREG(sp) + addi sp, sp, 2*SZREG ret .Lfunc_end0: .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask @@ -1953,7 +1966,7 @@ __kmp_invoke_microtask: // -- End __kmp_invoke_microtask -#endif /* KMP_ARCH_RISCV64 */ +#endif /* KMP_ARCH_RISCV32 || KMP_ARCH_RISCV64 */ #if KMP_ARCH_LOONGARCH64 @@ -2475,8 +2488,8 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): #endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 */ #if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \ - KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || \ - KMP_ARCH_S390X + KMP_ARCH_RISCV32 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ + KMP_ARCH_VE || KMP_ARCH_S390X #ifndef KMP_PREFIX_UNDERSCORE # define KMP_PREFIX_UNDERSCORE(x) x #endif @@ -2491,8 +2504,8 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 #endif #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || - KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || - KMP_ARCH_S390X */ + KMP_ARCH_RISCV32 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || + KMP_ARCH_VE || KMP_ARCH_S390X */ #if KMP_OS_LINUX # if KMP_ARCH_ARM || KMP_ARCH_AARCH64 diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index bdb7a12cef5e9..a939f73b68344 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -2743,9 +2743,9 @@ int __kmp_get_load_balance(int max) { #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \ - KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ - KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF || \ - KMP_ARCH_AARCH64_32) + KMP_ARCH_PPC64 || KMP_ARCH_RISCV32 || KMP_ARCH_RISCV64 || \ + KMP_ARCH_LOONGARCH64 || KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || \ + KMP_ARCH_PPC_XCOFF || KMP_ARCH_AARCH64_32) // Because WebAssembly will use `call_indirect` to invoke the microtask and // WebAssembly indirect calls check that the called signature is a precise