Closed
Description
class SimdFloat
{
private:
typedef svfloat32_t simdInternalType_
__attribute__((arm_sve_vector_bits(512)));
public:
SimdFloat() {}
SimdFloat(const float f) { this->simdInternal_ = svdup_n_f32(f); }
SimdFloat(svfloat32_t simd) : simdInternal_(simd) {}
simdInternalType_ simdInternal_;
};
...
SimdFloat foo (SimdFloat a , SimdFloat b, const float* m, const float* n) {
SimdFloat mc = simdLoadFloat(m);
SimdFloat nc = simdLoadFloat(n);
return a + b + nc + mc;
}
- For above test case, the gcc use register z0 and z1 to pass the argument a and b, while llvm pass them with memory
Smaller example: https://gcc.godbolt.org/z/K484P4zEr