Skip to content

Commit 7e20f80

Browse files
committed
Use pointers to pass in s32fc arguments
This avoids undefined behaviour arising from incompatibility between complex numbers in C and C++. Signed-off-by: Clayton Smith <argilo@gmail.com>
1 parent 56a893c commit 7e20f80

File tree

7 files changed

+134
-131
lines changed

7 files changed

+134
-131
lines changed

.github/workflows/run-tests.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ jobs:
114114
submodules: 'recursive'
115115
- uses: uraimo/run-on-arch-action@v2.5.0
116116
name: Build in non-x86 container
117-
continue-on-error: ${{ contains(fromJson('["ppc64le", "s390x"]'), matrix.arch) }}
118117
id: build
119118
with:
120119
arch: ${{ matrix.arch }}
@@ -153,7 +152,9 @@ jobs:
153152
cmake -DCMAKE_CXX_FLAGS="-Werror" -DBUILD_EXECUTABLE=ON ..
154153
echo "Build with $(nproc) thread(s)"
155154
make -j$(nproc)
156-
./cpu_features/list_cpu_features
155+
if [ -f ./cpu_features/list_cpu_features ]; then
156+
./cpu_features/list_cpu_features
157+
fi
157158
./apps/volk-config-info --alignment
158159
./apps/volk-config-info --avail-machines
159160
./apps/volk-config-info --all-machines

kernels/volk/volk_32fc_s32fc_multiply_32fc.h

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@
1818
* <b>Dispatcher Prototype</b>
1919
* \code
2020
* void volk_32fc_s32fc_multiply_32fc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const
21-
* lv_32fc_t scalar, unsigned int num_points); \endcode
21+
* lv_32fc_t* scalar, unsigned int num_points); \endcode
2222
*
2323
* \b Inputs
2424
* \li aVector: The input vector to be multiplied.
25-
* \li scalar The complex scalar to multiply against aVector.
25+
* \li scalar: The complex scalar to multiply against aVector.
2626
* \li num_points: The number of complex values in aVector.
2727
*
2828
* \b Outputs
@@ -46,7 +46,7 @@
4646
* in[ii+N/2] = lv_cmake(-real, -imag);
4747
* }
4848
*
49-
* volk_32fc_s32fc_multiply_32fc(out, in, scalar, N);
49+
* volk_32fc_s32fc_multiply_32fc(out, in, &scalar, N);
5050
*
5151
* printf(" mag phase | mag phase\n");
5252
* for(unsigned int ii = 0; ii < N; ++ii){
@@ -73,7 +73,7 @@
7373

7474
static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
7575
const lv_32fc_t* aVector,
76-
const lv_32fc_t scalar,
76+
const lv_32fc_t* scalar,
7777
unsigned int num_points)
7878
{
7979
unsigned int number = 0;
@@ -85,8 +85,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
8585
const lv_32fc_t* a = aVector;
8686

8787
// Set up constant scalar vector
88-
yl = _mm256_set1_ps(lv_creal(scalar));
89-
yh = _mm256_set1_ps(lv_cimag(scalar));
88+
yl = _mm256_set1_ps(lv_creal(*scalar));
89+
yh = _mm256_set1_ps(lv_cimag(*scalar));
9090

9191
for (; number < quarterPoints; number++) {
9292
x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -107,7 +107,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
107107
}
108108

109109
for (i = num_points - isodd; i < num_points; i++) {
110-
*c++ = (*a++) * scalar;
110+
*c++ = (*a++) * (*scalar);
111111
}
112112
}
113113
#endif /* LV_HAVE_AVX && LV_HAVE_FMA */
@@ -117,7 +117,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
117117

118118
static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
119119
const lv_32fc_t* aVector,
120-
const lv_32fc_t scalar,
120+
const lv_32fc_t* scalar,
121121
unsigned int num_points)
122122
{
123123
unsigned int number = 0;
@@ -129,8 +129,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
129129
const lv_32fc_t* a = aVector;
130130

131131
// Set up constant scalar vector
132-
yl = _mm256_set1_ps(lv_creal(scalar));
133-
yh = _mm256_set1_ps(lv_cimag(scalar));
132+
yl = _mm256_set1_ps(lv_creal(*scalar));
133+
yh = _mm256_set1_ps(lv_cimag(*scalar));
134134

135135
for (; number < quarterPoints; number++) {
136136
x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -151,7 +151,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
151151
}
152152

153153
for (i = num_points - isodd; i < num_points; i++) {
154-
*c++ = (*a++) * scalar;
154+
*c++ = (*a++) * (*scalar);
155155
}
156156
}
157157
#endif /* LV_HAVE_AVX */
@@ -161,7 +161,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
161161

162162
static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
163163
const lv_32fc_t* aVector,
164-
const lv_32fc_t scalar,
164+
const lv_32fc_t* scalar,
165165
unsigned int num_points)
166166
{
167167
unsigned int number = 0;
@@ -172,8 +172,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
172172
const lv_32fc_t* a = aVector;
173173

174174
// Set up constant scalar vector
175-
yl = _mm_set_ps1(lv_creal(scalar));
176-
yh = _mm_set_ps1(lv_cimag(scalar));
175+
yl = _mm_set_ps1(lv_creal(*scalar));
176+
yh = _mm_set_ps1(lv_cimag(*scalar));
177177

178178
for (; number < halfPoints; number++) {
179179

@@ -195,7 +195,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
195195
}
196196

197197
if ((num_points % 2) != 0) {
198-
*c = (*a) * scalar;
198+
*c = (*a) * (*scalar);
199199
}
200200
}
201201
#endif /* LV_HAVE_SSE */
@@ -204,7 +204,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
204204

205205
static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,
206206
const lv_32fc_t* aVector,
207-
const lv_32fc_t scalar,
207+
const lv_32fc_t* scalar,
208208
unsigned int num_points)
209209
{
210210
lv_32fc_t* cPtr = cVector;
@@ -213,20 +213,20 @@ static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,
213213

214214
// unwrap loop
215215
while (number >= 8) {
216-
*cPtr++ = (*aPtr++) * scalar;
217-
*cPtr++ = (*aPtr++) * scalar;
218-
*cPtr++ = (*aPtr++) * scalar;
219-
*cPtr++ = (*aPtr++) * scalar;
220-
*cPtr++ = (*aPtr++) * scalar;
221-
*cPtr++ = (*aPtr++) * scalar;
222-
*cPtr++ = (*aPtr++) * scalar;
223-
*cPtr++ = (*aPtr++) * scalar;
216+
*cPtr++ = (*aPtr++) * (*scalar);
217+
*cPtr++ = (*aPtr++) * (*scalar);
218+
*cPtr++ = (*aPtr++) * (*scalar);
219+
*cPtr++ = (*aPtr++) * (*scalar);
220+
*cPtr++ = (*aPtr++) * (*scalar);
221+
*cPtr++ = (*aPtr++) * (*scalar);
222+
*cPtr++ = (*aPtr++) * (*scalar);
223+
*cPtr++ = (*aPtr++) * (*scalar);
224224
number -= 8;
225225
}
226226

227227
// clean up any remaining
228228
while (number-- > 0)
229-
*cPtr++ = *aPtr++ * scalar;
229+
*cPtr++ = *aPtr++ * (*scalar);
230230
}
231231
#endif /* LV_HAVE_GENERIC */
232232

@@ -245,7 +245,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,
245245

246246
static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
247247
const lv_32fc_t* aVector,
248-
const lv_32fc_t scalar,
248+
const lv_32fc_t* scalar,
249249
unsigned int num_points)
250250
{
251251
unsigned int number = 0;
@@ -257,8 +257,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
257257
const lv_32fc_t* a = aVector;
258258

259259
// Set up constant scalar vector
260-
yl = _mm256_set1_ps(lv_creal(scalar));
261-
yh = _mm256_set1_ps(lv_cimag(scalar));
260+
yl = _mm256_set1_ps(lv_creal(*scalar));
261+
yh = _mm256_set1_ps(lv_cimag(*scalar));
262262

263263
for (; number < quarterPoints; number++) {
264264
x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -279,7 +279,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
279279
}
280280

281281
for (i = num_points - isodd; i < num_points; i++) {
282-
*c++ = (*a++) * scalar;
282+
*c++ = (*a++) * (*scalar);
283283
}
284284
}
285285
#endif /* LV_HAVE_AVX && LV_HAVE_FMA */
@@ -290,7 +290,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
290290

291291
static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
292292
const lv_32fc_t* aVector,
293-
const lv_32fc_t scalar,
293+
const lv_32fc_t* scalar,
294294
unsigned int num_points)
295295
{
296296
unsigned int number = 0;
@@ -302,8 +302,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
302302
const lv_32fc_t* a = aVector;
303303

304304
// Set up constant scalar vector
305-
yl = _mm256_set1_ps(lv_creal(scalar));
306-
yh = _mm256_set1_ps(lv_cimag(scalar));
305+
yl = _mm256_set1_ps(lv_creal(*scalar));
306+
yh = _mm256_set1_ps(lv_cimag(*scalar));
307307

308308
for (; number < quarterPoints; number++) {
309309
x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -324,7 +324,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
324324
}
325325

326326
for (i = num_points - isodd; i < num_points; i++) {
327-
*c++ = (*a++) * scalar;
327+
*c++ = (*a++) * (*scalar);
328328
}
329329
}
330330
#endif /* LV_HAVE_AVX */
@@ -334,7 +334,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
334334

335335
static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
336336
const lv_32fc_t* aVector,
337-
const lv_32fc_t scalar,
337+
const lv_32fc_t* scalar,
338338
unsigned int num_points)
339339
{
340340
unsigned int number = 0;
@@ -345,8 +345,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
345345
const lv_32fc_t* a = aVector;
346346

347347
// Set up constant scalar vector
348-
yl = _mm_set_ps1(lv_creal(scalar));
349-
yh = _mm_set_ps1(lv_cimag(scalar));
348+
yl = _mm_set_ps1(lv_creal(*scalar));
349+
yh = _mm_set_ps1(lv_cimag(*scalar));
350350

351351
for (; number < halfPoints; number++) {
352352

@@ -368,7 +368,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
368368
}
369369

370370
if ((num_points % 2) != 0) {
371-
*c = (*a) * scalar;
371+
*c = (*a) * (*scalar);
372372
}
373373
}
374374
#endif /* LV_HAVE_SSE */
@@ -378,7 +378,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
378378

379379
static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
380380
const lv_32fc_t* aVector,
381-
const lv_32fc_t scalar,
381+
const lv_32fc_t* scalar,
382382
unsigned int num_points)
383383
{
384384
lv_32fc_t* cPtr = cVector;
@@ -389,8 +389,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
389389
float32x4x2_t a_val, scalar_val;
390390
float32x4x2_t tmp_imag;
391391

392-
scalar_val.val[0] = vld1q_dup_f32((const float*)&scalar);
393-
scalar_val.val[1] = vld1q_dup_f32(((const float*)&scalar) + 1);
392+
scalar_val.val[0] = vld1q_dup_f32((const float*)scalar);
393+
scalar_val.val[1] = vld1q_dup_f32(((const float*)scalar) + 1);
394394
for (number = 0; number < quarter_points; ++number) {
395395
a_val = vld2q_f32((float*)aPtr);
396396
tmp_imag.val[1] = vmulq_f32(a_val.val[1], scalar_val.val[0]);
@@ -405,7 +405,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
405405
}
406406

407407
for (number = quarter_points * 4; number < num_points; number++) {
408-
*cPtr++ = *aPtr++ * scalar;
408+
*cPtr++ = *aPtr++ * (*scalar);
409409
}
410410
}
411411
#endif /* LV_HAVE_NEON */

0 commit comments

Comments
 (0)