We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 195acec commit 7f97c39Copy full SHA for 7f97c39
mlx/backend/cuda/quantized/cuda_fp4.h
@@ -81,3 +81,16 @@ struct __nv_fp4_e2m1 {
81
}
82
uint8_t __x{0};
83
};
84
+
85
+struct __nv_fp4x4_e2m1 {
86
+ __device__ operator float4() {
87
+ float4 out;
88
+ out.x = float(*(__nv_fp4_e2m1*)(__high));
89
+ out.y = float(*(__nv_fp4_e2m1*)(__high >> 4));
90
+ out.z = float(*(__nv_fp4_e2m1*)(__low));
91
+ out.w = float(*(__nv_fp4_e2m1*)(__low >> 4));
92
+ return out;
93
+ }
94
+ uint8_t __high{0};
95
+ uint8_t __low{0};
96
+};
0 commit comments