@@ -813,4 +813,70 @@ extern "C" void acc_combined(int N, int cond) {
813
813
// CHECK-NEXT: acc.yield
814
814
// CHECK-NEXT: } loc
815
815
816
+ #pragma acc parallel loop num_workers(cond)
817
+ for (unsigned I = 0 ; I < N; ++I);
818
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
819
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
820
+ // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[CONV_CAST]] : si32) {
821
+ // CHECK-NEXT: acc.loop combined(parallel) {
822
+ // CHECK: acc.yield
823
+ // CHECK-NEXT: } loc
824
+ // CHECK-NEXT: acc.yield
825
+ // CHECK-NEXT: } loc
826
+
827
+ #pragma acc kernels loop num_workers(cond) device_type(nvidia) num_workers(2u)
828
+ for (unsigned I = 0 ; I < N; ++I);
829
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
830
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
831
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i
832
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32
833
+ // CHECK-NEXT: acc.kernels combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) {
834
+ // CHECK-NEXT: acc.loop combined(kernels) {
835
+ // CHECK: acc.yield
836
+ // CHECK-NEXT: } loc
837
+ // CHECK-NEXT: acc.terminator
838
+ // CHECK-NEXT: } loc
839
+
840
+ #pragma acc parallel loop num_workers(cond) device_type(nvidia, host) num_workers(2) device_type(radeon) num_workers(3)
841
+ for (unsigned I = 0 ; I < N; ++I);
842
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
843
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
844
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
845
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
846
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
847
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
848
+ // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
849
+ // CHECK-NEXT: acc.loop combined(parallel) {
850
+ // CHECK: acc.yield
851
+ // CHECK-NEXT: } loc
852
+ // CHECK-NEXT: acc.yield
853
+ // CHECK-NEXT: } loc
854
+
855
+ #pragma acc kernels loop num_workers(cond) device_type(nvidia) num_workers(2) device_type(radeon, multicore) num_workers(4)
856
+ for (unsigned I = 0 ; I < N; ++I);
857
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
858
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
859
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
860
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
861
+ // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i
862
+ // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32
863
+ // CHECK-NEXT: acc.kernels combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[FOUR_CAST]] : si32 [#acc.device_type<radeon>], %[[FOUR_CAST]] : si32 [#acc.device_type<multicore>]) {
864
+ // CHECK-NEXT: acc.loop combined(kernels) {
865
+ // CHECK: acc.yield
866
+ // CHECK-NEXT: } loc
867
+ // CHECK-NEXT: acc.terminator
868
+ // CHECK-NEXT: } loc
869
+
870
+ #pragma acc parallel loop device_type(nvidia) num_workers(2) device_type(radeon) num_workers(3)
871
+ for (unsigned I = 0 ; I < N; ++I);
872
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
873
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
874
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
875
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
876
+ // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
877
+ // CHECK-NEXT: acc.loop combined(parallel) {
878
+ // CHECK: acc.yield
879
+ // CHECK-NEXT: } loc
880
+ // CHECK-NEXT: acc.yield
881
+ // CHECK-NEXT: } loc
816
882
}
0 commit comments