intel · dian-lun-lin · Jun 26, 2025 · Jun 26, 2025 · Jun 26, 2025
@@ -25,27 +25,19 @@
 #include <cstring>
 #include <iostream>
 #include <type_traits>
-#if defined(__F16C__)
-#include <x86intrin.h>
-#endif
 
 namespace svs {
 namespace float16 {
 namespace detail {
 
-// TODO: Update to `bitcast` if its available in the standard library.
 inline uint32_t bitcast_float_to_uint32(const float x) {
     static_assert(sizeof(float) == sizeof(uint32_t));
-    uint32_t u;
-    memcpy(&u, &x, sizeof(x));
-    return u;
+    return std::bit_cast<uint32_t>(x);
 }
 
 inline float bitcast_uint32_to_float(const uint32_t x) {
     static_assert(sizeof(float) == sizeof(uint32_t));
-    float f;
-    memcpy(&f, &x, sizeof(x));
-    return f;
+    return std::bit_cast<float>(x);
 }
 
 // reference:
@@ -72,38 +64,12 @@ inline uint16_t float_to_float16_untyped_slow(const float x) {
                0x7FFF; // sign : normalized : denormalized : saturate
 }
 
-// If the processor is new enough, we can use hardware intrinsics to perform the conversion
-// without using bit-level manipulation.
-//
-// Here, we check if the `F16C` set is enabled and if so, we define the intrinsic based
-// conversion functions.
-//
-// The entry point for users of the conversion is `*_to_*_untyped`, which will dispatch
-// to either the slow or fast version, depending on the architecture.
-#if defined(__F16C__)
-inline float float16_to_float_untyped_fast(const uint16_t x) {
-    auto converted = _mm_cvtph_ps(_mm_set1_epi16(std::bit_cast<int16_t>(x)));
-    return _mm_cvtss_f32(converted);
-}
-inline uint16_t float_to_float16_untyped_fast(const float x) {
-    auto converted = _mm_cvtps_ph(__m128{x}, _MM_FROUND_NO_EXC);
-    return _mm_extract_epi16(converted, 0);
-}
-
-inline float float16_to_float_untyped(const uint16_t x) {
-    return float16_to_float_untyped_fast(x);
-}
-inline uint16_t float_to_float16_untyped(const float x) {
-    return float_to_float16_untyped_fast(x);
-}
-#else
 inline float float16_to_float_untyped(const uint16_t x) {
     return float16_to_float_untyped_slow(x);
 }
 inline uint16_t float_to_float16_untyped(const float x) {
     return float_to_float16_untyped_slow(x);
 }
-#endif
 } // namespace detail
 
 // On GCC - we need to add this attribute so that Float16 members can appear inside