Skip to content

Commit 7ad12a7

Browse files
authored
[ARM] Add tan intrinsic lowering (#95439)
- `ARMISelLowering.cpp` - Add f16 type and neon and mve vector support for tan
1 parent 597d2f7 commit 7ad12a7

File tree

8 files changed

+220
-0
lines changed

8 files changed

+220
-0
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
365365
setOperationAction(ISD::FSQRT, VT, Expand);
366366
setOperationAction(ISD::FSIN, VT, Expand);
367367
setOperationAction(ISD::FCOS, VT, Expand);
368+
setOperationAction(ISD::FTAN, VT, Expand);
368369
setOperationAction(ISD::FPOW, VT, Expand);
369370
setOperationAction(ISD::FLOG, VT, Expand);
370371
setOperationAction(ISD::FLOG2, VT, Expand);
@@ -875,6 +876,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
875876
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
876877
setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
877878
setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
879+
setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
878880
setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
879881
setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
880882
setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
@@ -897,6 +899,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
897899
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
898900
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
899901
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
902+
setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
900903
setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
901904
setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
902905
setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
@@ -914,6 +917,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
914917
setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
915918
setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
916919
setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
920+
setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
917921
setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
918922
setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
919923
setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
@@ -1540,6 +1544,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
15401544
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
15411545
setOperationAction(ISD::FSIN, MVT::f16, Promote);
15421546
setOperationAction(ISD::FCOS, MVT::f16, Promote);
1547+
setOperationAction(ISD::FTAN, MVT::f16, Promote);
15431548
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
15441549
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
15451550
setOperationAction(ISD::FPOW, MVT::f16, Promote);

llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,37 @@ L.entry:
5656

5757
declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly
5858

59+
define void @test_tan(ptr %X) nounwind {
60+
61+
; CHECK-LABEL: test_tan:
62+
63+
; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
64+
; CHECK: movt [[reg0]], :upper16:{{.*}}
65+
; CHECK: vld1.64
66+
67+
; CHECK: {{v?mov(.32)?}} r0,
68+
; CHECK: bl {{.*}}tanf
69+
70+
; CHECK: {{v?mov(.32)?}} r0,
71+
; CHECK: bl {{.*}}tanf
72+
73+
; CHECK: {{v?mov(.32)?}} r0,
74+
; CHECK: bl {{.*}}tanf
75+
76+
; CHECK: {{v?mov(.32)?}} r0,
77+
; CHECK: bl {{.*}}tanf
78+
79+
; CHECK: vst1.64
80+
81+
L.entry:
82+
%0 = load <4 x float>, ptr @A, align 16
83+
%1 = call <4 x float> @llvm.tan.v4f32(<4 x float> %0)
84+
store <4 x float> %1, ptr %X, align 16
85+
ret void
86+
}
87+
88+
declare <4 x float> @llvm.tan.v4f32(<4 x float>) nounwind readonly
89+
5990
define void @test_exp(ptr %X) nounwind {
6091

6192
; CHECK-LABEL: test_exp:

llvm/test/CodeGen/ARM/fp16-fullfp16.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,23 @@ define void @test_cos(ptr %p) {
281281
ret void
282282
}
283283

284+
define void @test_tan(ptr %p) {
285+
; CHECK-LABEL: test_tan:
286+
; CHECK: .save {r4, lr}
287+
; CHECK-NEXT: push {r4, lr}
288+
; CHECK-NEXT: vldr.16 s0, [r0]
289+
; CHECK-NEXT: mov r4, r0
290+
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
291+
; CHECK-NEXT: bl tanf
292+
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
293+
; CHECK-NEXT: vstr.16 s0, [r4]
294+
; CHECK-NEXT: pop {r4, pc}
295+
%a = load half, ptr %p, align 2
296+
%r = call half @llvm.tan.f16(half %a)
297+
store half %r, ptr %p
298+
ret void
299+
}
300+
284301
define void @test_pow(ptr %p, ptr %q) {
285302
; CHECK-LABEL: test_pow:
286303
; CHECK: .save {r4, lr}
@@ -588,6 +605,7 @@ declare half @llvm.sqrt.f16(half %a)
588605
declare half @llvm.powi.f16.i32(half %a, i32 %b)
589606
declare half @llvm.sin.f16(half %a)
590607
declare half @llvm.cos.f16(half %a)
608+
declare half @llvm.tan.f16(half %a)
591609
declare half @llvm.pow.f16(half %a, half %b)
592610
declare half @llvm.exp.f16(half %a)
593611
declare half @llvm.exp2.f16(half %a)

llvm/test/CodeGen/ARM/fp16-promote.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,7 @@ declare half @llvm.sqrt.f16(half %a) #0
393393
declare half @llvm.powi.f16.i32(half %a, i32 %b) #0
394394
declare half @llvm.sin.f16(half %a) #0
395395
declare half @llvm.cos.f16(half %a) #0
396+
declare half @llvm.tan.f16(half %a) #0
396397
declare half @llvm.pow.f16(half %a, half %b) #0
397398
declare half @llvm.exp.f16(half %a) #0
398399
declare half @llvm.exp2.f16(half %a) #0
@@ -472,6 +473,21 @@ define void @test_cos(ptr %p) #0 {
472473
ret void
473474
}
474475

476+
; CHECK-FP16-LABEL: test_tan:
477+
; CHECK-FP16: vcvtb.f32.f16
478+
; CHECK-FP16: bl tanf
479+
; CHECK-FP16: vcvtb.f16.f32
480+
; CHECK-LIBCALL-LABEL: test_tan:
481+
; CHECK-LIBCALL: bl __aeabi_h2f
482+
; CHECK-LIBCALL: bl tanf
483+
; CHECK-LIBCALL: bl __aeabi_f2h
484+
define void @test_tan(ptr %p) #0 {
485+
%a = load half, ptr %p, align 2
486+
%r = call half @llvm.tan.f16(half %a)
487+
store half %r, ptr %p
488+
ret void
489+
}
490+
475491
; CHECK-FP16-LABEL: test_pow:
476492
; CHECK-FP16: vcvtb.f32.f16
477493
; CHECK-FP16: vcvtb.f32.f16

llvm/test/CodeGen/ARM/vfloatintrinsics.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ define %v2f32 @test_v2f32.cos(%v2f32 %a) {
2929
%1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a)
3030
ret %v2f32 %1
3131
}
32+
; CHECK-LABEL: test_v2f32.tan:{{.*}}
33+
define %v2f32 @test_v2f32.tan(%v2f32 %a) {
34+
; CHECK: tan
35+
%1 = call %v2f32 @llvm.tan.v2f32(%v2f32 %a)
36+
ret %v2f32 %1
37+
}
3238
; CHECK-LABEL: test_v2f32.pow:{{.*}}
3339
define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) {
3440
; CHECK: pow
@@ -112,6 +118,7 @@ declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
112118
declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0
113119
declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
114120
declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
121+
declare %v2f32 @llvm.tan.v2f32(%v2f32) #0
115122
declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0
116123
declare %v2f32 @llvm.exp.v2f32(%v2f32) #0
117124
declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0
@@ -153,6 +160,12 @@ define %v4f32 @test_v4f32.cos(%v4f32 %a) {
153160
%1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a)
154161
ret %v4f32 %1
155162
}
163+
; CHECK-LABEL: test_v4f32.tan:{{.*}}
164+
define %v4f32 @test_v4f32.tan(%v4f32 %a) {
165+
; CHECK: tan
166+
%1 = call %v4f32 @llvm.tan.v4f32(%v4f32 %a)
167+
ret %v4f32 %1
168+
}
156169
; CHECK-LABEL: test_v4f32.pow:{{.*}}
157170
define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) {
158171
; CHECK: pow
@@ -236,6 +249,7 @@ declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0
236249
declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0
237250
declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
238251
declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
252+
declare %v4f32 @llvm.tan.v4f32(%v4f32) #0
239253
declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0
240254
declare %v4f32 @llvm.exp.v4f32(%v4f32) #0
241255
declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0
@@ -277,6 +291,12 @@ define %v2f64 @test_v2f64.cos(%v2f64 %a) {
277291
%1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a)
278292
ret %v2f64 %1
279293
}
294+
; CHECK-LABEL: test_v2f64.tan:{{.*}}
295+
define %v2f64 @test_v2f64.tan(%v2f64 %a) {
296+
; CHECK: tan
297+
%1 = call %v2f64 @llvm.tan.v2f64(%v2f64 %a)
298+
ret %v2f64 %1
299+
}
280300
; CHECK-LABEL: test_v2f64.pow:{{.*}}
281301
define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) {
282302
; CHECK: pow
@@ -361,6 +381,7 @@ declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0
361381
declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0
362382
declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
363383
declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
384+
declare %v2f64 @llvm.tan.v2f64(%v2f64) #0
364385
declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0
365386
declare %v2f64 @llvm.exp.v2f64(%v2f64) #0
366387
declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0

llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,15 @@ define double @cos_d(double %a) {
4141
ret double %1
4242
}
4343

44+
declare double @llvm.tan.f64(double %Val)
45+
define double @tan_d(double %a) {
46+
; CHECK-LABEL: tan_d:
47+
; SOFT: {{(bl|b)}} tan
48+
; HARD: b tan
49+
%1 = call double @llvm.tan.f64(double %a)
50+
ret double %1
51+
}
52+
4453
declare double @llvm.pow.f64(double %Val, double %power)
4554
define double @pow_d(double %a, double %b) {
4655
; CHECK-LABEL: pow_d:

llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,15 @@ define float @cos_f(float %a) {
4242
ret float %1
4343
}
4444

45+
declare float @llvm.tan.f32(float %Val)
46+
define float @tan_f(float %a) {
47+
; CHECK-LABEL: tan_f:
48+
; SOFT: bl tanf
49+
; HARD: b tanf
50+
%1 = call float @llvm.tan.f32(float %a)
51+
ret float %1
52+
}
53+
4554
declare float @llvm.pow.f32(float %Val, float %power)
4655
define float @pow_f(float %a, float %b) {
4756
; CHECK-LABEL: pow_f:

llvm/test/CodeGen/Thumb2/mve-fmath.ll

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,117 @@ entry:
288288
ret <2 x double> %0
289289
}
290290

291+
define arm_aapcs_vfpcc <4 x float> @tan_float32_t(<4 x float> %src) {
292+
; CHECK-LABEL: tan_float32_t:
293+
; CHECK: @ %bb.0: @ %entry
294+
; CHECK-NEXT: .save {r4, r5, r7, lr}
295+
; CHECK-NEXT: push {r4, r5, r7, lr}
296+
; CHECK-NEXT: .vsave {d8, d9}
297+
; CHECK-NEXT: vpush {d8, d9}
298+
; CHECK-NEXT: vmov q4, q0
299+
; CHECK-NEXT: vmov r0, r4, d9
300+
; CHECK-NEXT: bl tanf
301+
; CHECK-NEXT: mov r5, r0
302+
; CHECK-NEXT: mov r0, r4
303+
; CHECK-NEXT: bl tanf
304+
; CHECK-NEXT: vmov r4, r1, d8
305+
; CHECK-NEXT: vmov s19, r0
306+
; CHECK-NEXT: vmov s18, r5
307+
; CHECK-NEXT: mov r0, r1
308+
; CHECK-NEXT: bl tanf
309+
; CHECK-NEXT: vmov s17, r0
310+
; CHECK-NEXT: mov r0, r4
311+
; CHECK-NEXT: bl tanf
312+
; CHECK-NEXT: vmov s16, r0
313+
; CHECK-NEXT: vmov q0, q4
314+
; CHECK-NEXT: vpop {d8, d9}
315+
; CHECK-NEXT: pop {r4, r5, r7, pc}
316+
entry:
317+
%0 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %src)
318+
ret <4 x float> %0
319+
}
320+
321+
define arm_aapcs_vfpcc <8 x half> @tan_float16_t(<8 x half> %src) {
322+
; CHECK-LABEL: tan_float16_t:
323+
; CHECK: @ %bb.0: @ %entry
324+
; CHECK-NEXT: .save {r7, lr}
325+
; CHECK-NEXT: push {r7, lr}
326+
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
327+
; CHECK-NEXT: vpush {d8, d9, d10, d11}
328+
; CHECK-NEXT: vmov q4, q0
329+
; CHECK-NEXT: vcvtb.f32.f16 s0, s16
330+
; CHECK-NEXT: vmov r0, s0
331+
; CHECK-NEXT: bl tanf
332+
; CHECK-NEXT: vcvtt.f32.f16 s0, s16
333+
; CHECK-NEXT: vmov s16, r0
334+
; CHECK-NEXT: vmov r1, s0
335+
; CHECK-NEXT: mov r0, r1
336+
; CHECK-NEXT: bl tanf
337+
; CHECK-NEXT: vmov s0, r0
338+
; CHECK-NEXT: vcvtb.f16.f32 s20, s16
339+
; CHECK-NEXT: vcvtt.f16.f32 s20, s0
340+
; CHECK-NEXT: vcvtb.f32.f16 s0, s17
341+
; CHECK-NEXT: vmov r0, s0
342+
; CHECK-NEXT: bl tanf
343+
; CHECK-NEXT: vmov s0, r0
344+
; CHECK-NEXT: vcvtb.f16.f32 s21, s0
345+
; CHECK-NEXT: vcvtt.f32.f16 s0, s17
346+
; CHECK-NEXT: vmov r0, s0
347+
; CHECK-NEXT: bl tanf
348+
; CHECK-NEXT: vmov s0, r0
349+
; CHECK-NEXT: vcvtt.f16.f32 s21, s0
350+
; CHECK-NEXT: vcvtb.f32.f16 s0, s18
351+
; CHECK-NEXT: vmov r0, s0
352+
; CHECK-NEXT: bl tanf
353+
; CHECK-NEXT: vmov s0, r0
354+
; CHECK-NEXT: vcvtb.f16.f32 s22, s0
355+
; CHECK-NEXT: vcvtt.f32.f16 s0, s18
356+
; CHECK-NEXT: vmov r0, s0
357+
; CHECK-NEXT: bl tanf
358+
; CHECK-NEXT: vmov s0, r0
359+
; CHECK-NEXT: vcvtt.f16.f32 s22, s0
360+
; CHECK-NEXT: vcvtb.f32.f16 s0, s19
361+
; CHECK-NEXT: vmov r0, s0
362+
; CHECK-NEXT: bl tanf
363+
; CHECK-NEXT: vmov s0, r0
364+
; CHECK-NEXT: vcvtb.f16.f32 s23, s0
365+
; CHECK-NEXT: vcvtt.f32.f16 s0, s19
366+
; CHECK-NEXT: vmov r0, s0
367+
; CHECK-NEXT: bl tanf
368+
; CHECK-NEXT: vmov s0, r0
369+
; CHECK-NEXT: vcvtt.f16.f32 s23, s0
370+
; CHECK-NEXT: vmov q0, q5
371+
; CHECK-NEXT: vpop {d8, d9, d10, d11}
372+
; CHECK-NEXT: pop {r7, pc}
373+
entry:
374+
%0 = call fast <8 x half> @llvm.tan.v8f16(<8 x half> %src)
375+
ret <8 x half> %0
376+
}
377+
378+
define arm_aapcs_vfpcc <2 x double> @tan_float64_t(<2 x double> %src) {
379+
; CHECK-LABEL: tan_float64_t:
380+
; CHECK: @ %bb.0: @ %entry
381+
; CHECK-NEXT: .save {r7, lr}
382+
; CHECK-NEXT: push {r7, lr}
383+
; CHECK-NEXT: .vsave {d8, d9}
384+
; CHECK-NEXT: vpush {d8, d9}
385+
; CHECK-NEXT: vmov q4, q0
386+
; CHECK-NEXT: vmov r0, r1, d9
387+
; CHECK-NEXT: bl tan
388+
; CHECK-NEXT: vmov r2, r3, d8
389+
; CHECK-NEXT: vmov d9, r0, r1
390+
; CHECK-NEXT: mov r0, r2
391+
; CHECK-NEXT: mov r1, r3
392+
; CHECK-NEXT: bl tan
393+
; CHECK-NEXT: vmov d8, r0, r1
394+
; CHECK-NEXT: vmov q0, q4
395+
; CHECK-NEXT: vpop {d8, d9}
396+
; CHECK-NEXT: pop {r7, pc}
397+
entry:
398+
%0 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %src)
399+
ret <2 x double> %0
400+
}
401+
291402
define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
292403
; CHECK-LABEL: exp_float32_t:
293404
; CHECK: @ %bb.0: @ %entry

0 commit comments

Comments
 (0)