Skip to content

Fix svs::Float16 issue #151

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 26, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 2 additions & 36 deletions include/svs/lib/float16.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,19 @@
#include <cstring>
#include <iostream>
#include <type_traits>
#if defined(__F16C__)
#include <x86intrin.h>
#endif

namespace svs {
namespace float16 {
namespace detail {

// TODO: Update to `bitcast` if its available in the standard library.
inline uint32_t bitcast_float_to_uint32(const float x) {
static_assert(sizeof(float) == sizeof(uint32_t));
uint32_t u;
memcpy(&u, &x, sizeof(x));
return u;
return std::bit_cast<uint32_t>(x);
}

inline float bitcast_uint32_to_float(const uint32_t x) {
static_assert(sizeof(float) == sizeof(uint32_t));
float f;
memcpy(&f, &x, sizeof(x));
return f;
return std::bit_cast<float>(x);
}

// reference:
Expand All @@ -72,38 +64,12 @@ inline uint16_t float_to_float16_untyped_slow(const float x) {
0x7FFF; // sign : normalized : denormalized : saturate
}

// If the processor is new enough, we can use hardware intrinsics to perform the conversion
// without using bit-level manipulation.
//
// Here, we check if the `F16C` set is enabled and if so, we define the intrinsic based
// conversion functions.
//
// The entry point for users of the conversion is `*_to_*_untyped`, which will dispatch
// to either the slow or fast version, depending on the architecture.
#if defined(__F16C__)
inline float float16_to_float_untyped_fast(const uint16_t x) {
auto converted = _mm_cvtph_ps(_mm_set1_epi16(std::bit_cast<int16_t>(x)));
return _mm_cvtss_f32(converted);
}
inline uint16_t float_to_float16_untyped_fast(const float x) {
auto converted = _mm_cvtps_ph(__m128{x}, _MM_FROUND_NO_EXC);
return _mm_extract_epi16(converted, 0);
}

inline float float16_to_float_untyped(const uint16_t x) {
return float16_to_float_untyped_fast(x);
}
inline uint16_t float_to_float16_untyped(const float x) {
return float_to_float16_untyped_fast(x);
}
#else
inline float float16_to_float_untyped(const uint16_t x) {
return float16_to_float_untyped_slow(x);
}
inline uint16_t float_to_float16_untyped(const float x) {
return float_to_float16_untyped_slow(x);
}
#endif
} // namespace detail

// On GCC - we need to add this attribute so that Float16 members can appear inside
Expand Down
Loading