1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2
- ; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK
2
+ ; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK
3
3
4
4
; Exercise tail folding on RISCV w/scalable vectors.
5
5
@@ -330,17 +330,44 @@ for.end:
330
330
define i64 @uniform_load (ptr noalias nocapture %a , ptr noalias nocapture %b , i64 %n ) {
331
331
; CHECK-LABEL: @uniform_load(
332
332
; CHECK-NEXT: entry:
333
+ ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
334
+ ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
335
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
336
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
337
+ ; CHECK: vector.ph:
338
+ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
339
+ ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
340
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
341
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
342
+ ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
343
+ ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2
333
344
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
334
- ; CHECK: for .body:
335
- ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.* ]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
345
+ ; CHECK: vector .body:
346
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
336
347
; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8
348
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0
349
+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
337
350
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]]
338
- ; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
339
- ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
340
- ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
341
- ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
351
+ ; CHECK-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[ARRAYIDX]], align 8
352
+ ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP5]]
353
+ ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
354
+ ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
355
+ ; CHECK: middle.block:
356
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]]
357
+ ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
358
+ ; CHECK: scalar.ph:
359
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
360
+ ; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
361
+ ; CHECK: for.body:
362
+ ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ]
363
+ ; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[B]], align 8
364
+ ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV1]]
365
+ ; CHECK-NEXT: store i64 [[V1]], ptr [[ARRAYIDX1]], align 8
366
+ ; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
367
+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], 1025
368
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP14:![0-9]+]]
342
369
; CHECK: for.end:
343
- ; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY ]] ]
370
+ ; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V1]], [[FOR_BODY1]] ], [ [[ V]], [[MIDDLE_BLOCK ]] ]
344
371
; CHECK-NEXT: ret i64 [[V_LCSSA]]
345
372
;
346
373
entry:
@@ -389,7 +416,7 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) {
389
416
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]]
390
417
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
391
418
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
392
- ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13 :![0-9]+]]
419
+ ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15 :![0-9]+]]
393
420
; CHECK: middle.block:
394
421
; CHECK-NEXT: br label [[FOR_END:%.*]]
395
422
; CHECK: scalar.ph:
@@ -403,7 +430,7 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) {
403
430
; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8
404
431
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
405
432
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
406
- ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP14 :![0-9]+]]
433
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP16 :![0-9]+]]
407
434
; CHECK: for.end:
408
435
; CHECK-NEXT: ret void
409
436
;
0 commit comments