2
2
; RUN: llc < %s -mtriple=x86_64-- -mcpu=nehalem | FileCheck %s --check-prefixes=NHM
3
3
; RUN: llc < %s -mtriple=x86_64-- -mcpu=sandybridge | FileCheck %s --check-prefixes=FAST-SCALAR,SNB
4
4
; RUN: llc < %s -mtriple=x86_64-- -mcpu=broadwell | FileCheck %s --check-prefixes=FAST-SCALAR,BDW
5
- ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=FAST-SCALAR,SKL
6
- ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=SLOW -SCALAR,ZN1
7
- ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=SLOW -SCALAR,ZN3
5
+ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
6
+ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=FAST -SCALAR,FAST-VECTOR
7
+ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=FAST -SCALAR,FAST-VECTOR
8
8
9
9
define float @f32_no_daz (float %f ) #0 {
10
10
; NHM-LABEL: f32_no_daz:
@@ -26,19 +26,6 @@ define float @f32_no_daz(float %f) #0 {
26
26
; FAST-SCALAR: # %bb.0:
27
27
; FAST-SCALAR-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
28
28
; FAST-SCALAR-NEXT: retq
29
- ;
30
- ; SLOW-SCALAR-LABEL: f32_no_daz:
31
- ; SLOW-SCALAR: # %bb.0:
32
- ; SLOW-SCALAR-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
33
- ; SLOW-SCALAR-NEXT: vbroadcastss {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN]
34
- ; SLOW-SCALAR-NEXT: vmulss %xmm1, %xmm0, %xmm2
35
- ; SLOW-SCALAR-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem
36
- ; SLOW-SCALAR-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
37
- ; SLOW-SCALAR-NEXT: vandps %xmm3, %xmm0, %xmm0
38
- ; SLOW-SCALAR-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
39
- ; SLOW-SCALAR-NEXT: vmulss %xmm1, %xmm2, %xmm1
40
- ; SLOW-SCALAR-NEXT: vandnps %xmm1, %xmm0, %xmm0
41
- ; SLOW-SCALAR-NEXT: retq
42
29
%call = tail call fast float @llvm.sqrt.f32 (float %f ) #2
43
30
ret float %call
44
31
}
@@ -91,42 +78,10 @@ define <4 x float> @v4f32_no_daz(<4 x float> %f) #0 {
91
78
; BDW-NEXT: vandps %xmm1, %xmm0, %xmm0
92
79
; BDW-NEXT: retq
93
80
;
94
- ; SKL-LABEL: v4f32_no_daz:
95
- ; SKL: # %bb.0:
96
- ; SKL-NEXT: vsqrtps %xmm0, %xmm0
97
- ; SKL-NEXT: retq
98
- ;
99
- ; ZN1-LABEL: v4f32_no_daz:
100
- ; ZN1: # %bb.0:
101
- ; ZN1-NEXT: vrsqrtps %xmm0, %xmm1
102
- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
103
- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN]
104
- ; ZN1-NEXT: vmulps %xmm1, %xmm0, %xmm2
105
- ; ZN1-NEXT: vfmadd231ps {{.*#+}} xmm3 = (xmm2 * xmm1) + xmm3
106
- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
107
- ; ZN1-NEXT: vandps %xmm4, %xmm0, %xmm0
108
- ; ZN1-NEXT: vmulps %xmm1, %xmm2, %xmm1
109
- ; ZN1-NEXT: vmulps %xmm3, %xmm1, %xmm1
110
- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
111
- ; ZN1-NEXT: vcmpleps %xmm0, %xmm3, %xmm0
112
- ; ZN1-NEXT: vandps %xmm1, %xmm0, %xmm0
113
- ; ZN1-NEXT: retq
114
- ;
115
- ; ZN3-LABEL: v4f32_no_daz:
116
- ; ZN3: # %bb.0:
117
- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
118
- ; ZN3-NEXT: vrsqrtps %xmm0, %xmm1
119
- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN]
120
- ; ZN3-NEXT: vmulps %xmm1, %xmm0, %xmm2
121
- ; ZN3-NEXT: vfmadd231ps {{.*#+}} xmm3 = (xmm2 * xmm1) + xmm3
122
- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
123
- ; ZN3-NEXT: vandps %xmm4, %xmm0, %xmm0
124
- ; ZN3-NEXT: vmulps %xmm1, %xmm2, %xmm1
125
- ; ZN3-NEXT: vmulps %xmm3, %xmm1, %xmm1
126
- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
127
- ; ZN3-NEXT: vcmpleps %xmm0, %xmm3, %xmm0
128
- ; ZN3-NEXT: vandps %xmm1, %xmm0, %xmm0
129
- ; ZN3-NEXT: retq
81
+ ; FAST-VECTOR-LABEL: v4f32_no_daz:
82
+ ; FAST-VECTOR: # %bb.0:
83
+ ; FAST-VECTOR-NEXT: vsqrtps %xmm0, %xmm0
84
+ ; FAST-VECTOR-NEXT: retq
130
85
%call = tail call fast <4 x float > @llvm.sqrt.v4f32 (<4 x float > %f ) #2
131
86
ret <4 x float > %call
132
87
}
@@ -194,42 +149,10 @@ define <8 x float> @v8f32_no_daz(<8 x float> %f) #0 {
194
149
; BDW-NEXT: vandps %ymm1, %ymm0, %ymm0
195
150
; BDW-NEXT: retq
196
151
;
197
- ; SKL-LABEL: v8f32_no_daz:
198
- ; SKL: # %bb.0:
199
- ; SKL-NEXT: vsqrtps %ymm0, %ymm0
200
- ; SKL-NEXT: retq
201
- ;
202
- ; ZN1-LABEL: v8f32_no_daz:
203
- ; ZN1: # %bb.0:
204
- ; ZN1-NEXT: vrsqrtps %ymm0, %ymm1
205
- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
206
- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
207
- ; ZN1-NEXT: vmulps %ymm1, %ymm0, %ymm2
208
- ; ZN1-NEXT: vandps %ymm4, %ymm0, %ymm0
209
- ; ZN1-NEXT: vfmadd231ps {{.*#+}} ymm3 = (ymm2 * ymm1) + ymm3
210
- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
211
- ; ZN1-NEXT: vmulps %ymm1, %ymm2, %ymm1
212
- ; ZN1-NEXT: vmulps %ymm3, %ymm1, %ymm1
213
- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
214
- ; ZN1-NEXT: vcmpleps %ymm0, %ymm3, %ymm0
215
- ; ZN1-NEXT: vandps %ymm1, %ymm0, %ymm0
216
- ; ZN1-NEXT: retq
217
- ;
218
- ; ZN3-LABEL: v8f32_no_daz:
219
- ; ZN3: # %bb.0:
220
- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
221
- ; ZN3-NEXT: vrsqrtps %ymm0, %ymm1
222
- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
223
- ; ZN3-NEXT: vmulps %ymm1, %ymm0, %ymm2
224
- ; ZN3-NEXT: vfmadd231ps {{.*#+}} ymm3 = (ymm2 * ymm1) + ymm3
225
- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
226
- ; ZN3-NEXT: vandps %ymm4, %ymm0, %ymm0
227
- ; ZN3-NEXT: vmulps %ymm1, %ymm2, %ymm1
228
- ; ZN3-NEXT: vmulps %ymm3, %ymm1, %ymm1
229
- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
230
- ; ZN3-NEXT: vcmpleps %ymm0, %ymm3, %ymm0
231
- ; ZN3-NEXT: vandps %ymm1, %ymm0, %ymm0
232
- ; ZN3-NEXT: retq
152
+ ; FAST-VECTOR-LABEL: v8f32_no_daz:
153
+ ; FAST-VECTOR: # %bb.0:
154
+ ; FAST-VECTOR-NEXT: vsqrtps %ymm0, %ymm0
155
+ ; FAST-VECTOR-NEXT: retq
233
156
%call = tail call fast <8 x float > @llvm.sqrt.v8f32 (<8 x float > %f ) #2
234
157
ret <8 x float > %call
235
158
}
@@ -256,18 +179,6 @@ define float @f32_daz(float %f) #1 {
256
179
; FAST-SCALAR: # %bb.0:
257
180
; FAST-SCALAR-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
258
181
; FAST-SCALAR-NEXT: retq
259
- ;
260
- ; SLOW-SCALAR-LABEL: f32_daz:
261
- ; SLOW-SCALAR: # %bb.0:
262
- ; SLOW-SCALAR-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
263
- ; SLOW-SCALAR-NEXT: vmulss %xmm1, %xmm0, %xmm2
264
- ; SLOW-SCALAR-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem
265
- ; SLOW-SCALAR-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
266
- ; SLOW-SCALAR-NEXT: vmulss %xmm1, %xmm2, %xmm1
267
- ; SLOW-SCALAR-NEXT: vxorps %xmm2, %xmm2, %xmm2
268
- ; SLOW-SCALAR-NEXT: vcmpeqss %xmm2, %xmm0, %xmm0
269
- ; SLOW-SCALAR-NEXT: vandnps %xmm1, %xmm0, %xmm0
270
- ; SLOW-SCALAR-NEXT: retq
271
182
%call = tail call fast float @llvm.sqrt.f32 (float %f ) #2
272
183
ret float %call
273
184
}
@@ -315,38 +226,10 @@ define <4 x float> @v4f32_daz(<4 x float> %f) #1 {
315
226
; BDW-NEXT: vandps %xmm1, %xmm0, %xmm0
316
227
; BDW-NEXT: retq
317
228
;
318
- ; SKL-LABEL: v4f32_daz:
319
- ; SKL: # %bb.0:
320
- ; SKL-NEXT: vsqrtps %xmm0, %xmm0
321
- ; SKL-NEXT: retq
322
- ;
323
- ; ZN1-LABEL: v4f32_daz:
324
- ; ZN1: # %bb.0:
325
- ; ZN1-NEXT: vrsqrtps %xmm0, %xmm1
326
- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
327
- ; ZN1-NEXT: vmulps %xmm1, %xmm0, %xmm2
328
- ; ZN1-NEXT: vfmadd231ps {{.*#+}} xmm3 = (xmm2 * xmm1) + xmm3
329
- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
330
- ; ZN1-NEXT: vmulps %xmm1, %xmm2, %xmm1
331
- ; ZN1-NEXT: vxorps %xmm2, %xmm2, %xmm2
332
- ; ZN1-NEXT: vcmpneqps %xmm2, %xmm0, %xmm0
333
- ; ZN1-NEXT: vmulps %xmm3, %xmm1, %xmm1
334
- ; ZN1-NEXT: vandps %xmm1, %xmm0, %xmm0
335
- ; ZN1-NEXT: retq
336
- ;
337
- ; ZN3-LABEL: v4f32_daz:
338
- ; ZN3: # %bb.0:
339
- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
340
- ; ZN3-NEXT: vrsqrtps %xmm0, %xmm1
341
- ; ZN3-NEXT: vmulps %xmm1, %xmm0, %xmm2
342
- ; ZN3-NEXT: vfmadd231ps {{.*#+}} xmm3 = (xmm2 * xmm1) + xmm3
343
- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
344
- ; ZN3-NEXT: vmulps %xmm1, %xmm2, %xmm1
345
- ; ZN3-NEXT: vxorps %xmm2, %xmm2, %xmm2
346
- ; ZN3-NEXT: vcmpneqps %xmm2, %xmm0, %xmm0
347
- ; ZN3-NEXT: vmulps %xmm3, %xmm1, %xmm1
348
- ; ZN3-NEXT: vandps %xmm1, %xmm0, %xmm0
349
- ; ZN3-NEXT: retq
229
+ ; FAST-VECTOR-LABEL: v4f32_daz:
230
+ ; FAST-VECTOR: # %bb.0:
231
+ ; FAST-VECTOR-NEXT: vsqrtps %xmm0, %xmm0
232
+ ; FAST-VECTOR-NEXT: retq
350
233
%call = tail call fast <4 x float > @llvm.sqrt.v4f32 (<4 x float > %f ) #2
351
234
ret <4 x float > %call
352
235
}
@@ -405,38 +288,10 @@ define <8 x float> @v8f32_daz(<8 x float> %f) #1 {
405
288
; BDW-NEXT: vandps %ymm1, %ymm0, %ymm0
406
289
; BDW-NEXT: retq
407
290
;
408
- ; SKL-LABEL: v8f32_daz:
409
- ; SKL: # %bb.0:
410
- ; SKL-NEXT: vsqrtps %ymm0, %ymm0
411
- ; SKL-NEXT: retq
412
- ;
413
- ; ZN1-LABEL: v8f32_daz:
414
- ; ZN1: # %bb.0:
415
- ; ZN1-NEXT: vrsqrtps %ymm0, %ymm1
416
- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
417
- ; ZN1-NEXT: vmulps %ymm1, %ymm0, %ymm2
418
- ; ZN1-NEXT: vfmadd231ps {{.*#+}} ymm3 = (ymm2 * ymm1) + ymm3
419
- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
420
- ; ZN1-NEXT: vmulps %ymm1, %ymm2, %ymm1
421
- ; ZN1-NEXT: vxorps %xmm2, %xmm2, %xmm2
422
- ; ZN1-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
423
- ; ZN1-NEXT: vmulps %ymm3, %ymm1, %ymm1
424
- ; ZN1-NEXT: vandps %ymm1, %ymm0, %ymm0
425
- ; ZN1-NEXT: retq
426
- ;
427
- ; ZN3-LABEL: v8f32_daz:
428
- ; ZN3: # %bb.0:
429
- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
430
- ; ZN3-NEXT: vrsqrtps %ymm0, %ymm1
431
- ; ZN3-NEXT: vmulps %ymm1, %ymm0, %ymm2
432
- ; ZN3-NEXT: vfmadd231ps {{.*#+}} ymm3 = (ymm2 * ymm1) + ymm3
433
- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
434
- ; ZN3-NEXT: vmulps %ymm1, %ymm2, %ymm1
435
- ; ZN3-NEXT: vxorps %xmm2, %xmm2, %xmm2
436
- ; ZN3-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
437
- ; ZN3-NEXT: vmulps %ymm3, %ymm1, %ymm1
438
- ; ZN3-NEXT: vandps %ymm1, %ymm0, %ymm0
439
- ; ZN3-NEXT: retq
291
+ ; FAST-VECTOR-LABEL: v8f32_daz:
292
+ ; FAST-VECTOR: # %bb.0:
293
+ ; FAST-VECTOR-NEXT: vsqrtps %ymm0, %ymm0
294
+ ; FAST-VECTOR-NEXT: retq
440
295
%call = tail call fast <8 x float > @llvm.sqrt.v8f32 (<8 x float > %f ) #2
441
296
ret <8 x float > %call
442
297
}
0 commit comments