Skip to content

Commit 87c3949

Browse files
committed
util/crc32c: always compile FastCRC32 with SSE4.2 instructions
The use of this function is guarded by a CPUID check. That is, compiling this function with SSE4.2 crc32q instructions will never cause a SIGILL, even on CPUs that don't support SSE4.2, because the function can never be reached. The previous solution to enable hardware support for CRC32C required compiling all of RocksDB with `-msse4.2` or the equivalent. This gave the compiler permission to emit SSE4.2 instructions in functions that were not guarded by a CPUID check, resulting in SIGILLS on non-SSE4.2 CPUs.
1 parent 0403a97 commit 87c3949

File tree

1 file changed

+5
-28
lines changed

1 file changed

+5
-28
lines changed

util/crc32c.cc

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,7 @@
1515
#include "util/crc32c.h"
1616

1717
#include <stdint.h>
18-
#ifdef __SSE4_2__
19-
#include <nmmintrin.h>
20-
#endif
21-
#if defined(_WIN64)
22-
#ifdef __AVX2__
2318
#include <nmmintrin.h>
24-
#endif
25-
#endif
2619
#include "util/coding.h"
2720

2821
namespace rocksdb {
@@ -298,21 +291,12 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) {
298291
return DecodeFixed32(reinterpret_cast<const char*>(p));
299292
}
300293

301-
#ifdef __SSE4_2__
302-
#ifdef __LP64__
294+
#if defined(__LP64__) || defined(_WIN64)
303295
static inline uint64_t LE_LOAD64(const uint8_t *p) {
304296
return DecodeFixed64(reinterpret_cast<const char*>(p));
305297
}
306298
#endif
307-
#endif
308299

309-
#if defined(_WIN64)
310-
#ifdef __AVX2__
311-
static inline uint64_t LE_LOAD64(const uint8_t *p) {
312-
return DecodeFixed64(reinterpret_cast<const char*>(p));
313-
}
314-
#endif
315-
#endif
316300
static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
317301
uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
318302
*p += 4;
@@ -329,8 +313,8 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
329313
table0_[c >> 24];
330314
}
331315

316+
__attribute__((target("sse4.2")))
332317
static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
333-
#ifdef __SSE4_2__
334318
#ifdef __LP64__
335319
*l = _mm_crc32_u64(*l, LE_LOAD64(*p));
336320
*p += 8;
@@ -340,16 +324,6 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
340324
*l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
341325
*p += 4;
342326
#endif
343-
#elif defined(_WIN64)
344-
#ifdef __AVX2__
345-
*l = _mm_crc32_u64(*l, LE_LOAD64(*p));
346-
*p += 8;
347-
#else
348-
Slow_CRC32(l, p);
349-
#endif
350-
#else
351-
Slow_CRC32(l, p);
352-
#endif
353327
}
354328

355329
template<void (*CRC32)(uint64_t*, uint8_t const**)>
@@ -411,6 +385,9 @@ static bool isSSE42() {
411385
#endif
412386
}
413387

388+
template __attribute__((target("sse4.2")))
389+
uint32_t ExtendImpl<Fast_CRC32>(uint32_t, const char*, size_t);
390+
414391
typedef uint32_t (*Function)(uint32_t, const char*, size_t);
415392

416393
static inline Function Choose_Extend() {

0 commit comments

Comments
 (0)