diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir index beeb1362b7ab3..bc3c3c014c4d2 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir @@ -1,16 +1,14 @@ // RUN: mlir-opt %s -test-vector-transfer-collapse-inner-most-dims -split-input-file | FileCheck %s -// TODO: Unify how memref and vectors are named - //----------------------------------------------------------------------------- // 1. vector.transfer_read //----------------------------------------------------------------------------- -func.func @contiguous_inner_most(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{ +func.func @contiguous_inner_most(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{ %c0 = arith.constant 0 : index - %cst = arith.constant 0.0 : f32 - %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32> - return %0 : vector<1x8x1xf32> + %pad = arith.constant 0.0 : f32 + %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32> + return %v : vector<1x8x1xf32> } // CHECK: func @contiguous_inner_most(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>> @@ -23,13 +21,13 @@ func.func @contiguous_inner_most(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1 // Same as the top example within this split, but with the inner vector // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e. -// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute. +// vscale = 1). This is assumed via the `in_bounds` attribute. -func.func @contiguous_inner_most_scalable_inner_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x[8]x1xf32>{ +func.func @contiguous_inner_most_scalable_inner_dim(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x[8]x1xf32>{ %c0 = arith.constant 0 : index - %cst = arith.constant 0.0 : f32 - %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x[8]x1xf32> - return %0 : vector<1x[8]x1xf32> + %pad = arith.constant 0.0 : f32 + %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x[8]x1xf32> + return %v : vector<1x[8]x1xf32> } // CHECK: func @contiguous_inner_most_scalable_inner_dim(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>> @@ -43,11 +41,11 @@ func.func @contiguous_inner_most_scalable_inner_dim(%in: memref<1x1x8x1xf32, str // Same as the top example within this split, but the trailing unit dim was // replaced with a dyn dim - not supported -func.func @negative_dynamic_trailing_dim(%in: memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{ +func.func @negative_dynamic_trailing_dim(%src: memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{ %c0 = arith.constant 0 : index - %cst = arith.constant 0.0 : f32 - %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32> - return %0 : vector<1x8x1xf32> + %pad = arith.constant 0.0 : f32 + %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32> + return %v : vector<1x8x1xf32> } // CHECK-LABEL: func @negative_dynamic_trailing_dim @@ -57,11 +55,11 @@ func.func @negative_dynamic_trailing_dim(%in: memref<1x1x8x?xf32, strided<[3072, // Same as the top example within this split, but with a "scalable unit" dim in // the output vector - not supported (scalable 1, [1], is _not_ a unit dimension). -func.func @negative_scalable_one_trailing_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x[1]xf32>{ +func.func @negative_scalable_one_trailing_dim(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x[1]xf32>{ %c0 = arith.constant 0 : index - %cst = arith.constant 0.0 : f32 - %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x[1]xf32> - return %0 : vector<1x8x[1]xf32> + %pad = arith.constant 0.0 : f32 + %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x[1]xf32> + return %v : vector<1x8x[1]xf32> } // CHECK-LABEL: func @negative_scalable_one_trailing_dim // CHECK-NOT: memref.subview @@ -69,10 +67,10 @@ func.func @negative_scalable_one_trailing_dim(%in: memref<1x1x8x1xf32, strided<[ // ----- -func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %memref: memref) -> vector<8x1xf32> { +func.func @contiguous_inner_most_dynamic_outer(%i: index, %ii: index, %memref: memref) -> vector<8x1xf32> { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<8x1xf32> + %v = vector.transfer_read %memref[%i, %ii, %c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<8x1xf32> return %v : vector<8x1xf32> } // CHECK: func.func @contiguous_inner_most_dynamic_outer @@ -93,12 +91,12 @@ func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %memref: me // Same as the top example within this split, but with the outer vector // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e. -// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute. +// vscale = 1). This is assumed via the `in_bounds` attribute. -func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%a: index, %b: index, %memref: memref) -> vector<[8]x1xf32> { +func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%i: index, %ii: index, %memref: memref) -> vector<[8]x1xf32> { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[8]x1xf32> + %v = vector.transfer_read %memref[%i, %ii, %c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[8]x1xf32> return %v : vector<[8]x1xf32> } // CHECK-LABEL: func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim @@ -118,11 +116,11 @@ func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%a: index, %b: // The index to be dropped is == 0, so it's safe to collapse. The other index // should be preserved correctly. -func.func @contiguous_inner_most_zero_idx_in_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { +func.func @contiguous_inner_most_zero_idx_in_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { %pad = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index - %1 = vector.transfer_read %A[%i, %c0], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32> - return %1 : vector<8x1xf32> + %v = vector.transfer_read %src[%i, %c0], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32> + return %v : vector<8x1xf32> } // CHECK-LABEL: func.func @contiguous_inner_most_zero_idx_in_bounds( // CHECK-SAME: %[[MEM:.*]]: memref<16x1xf32>, @@ -135,11 +133,11 @@ func.func @contiguous_inner_most_zero_idx_in_bounds(%A: memref<16x1xf32>, %i:ind // The index to be dropped is == 0, so it's safe to collapse. The "out of // bounds" attribute is too conservative and will be folded to "in bounds" // before the pattern runs. The other index should be preserved correctly. -func.func @contiguous_inner_most_zero_idx_out_of_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { +func.func @contiguous_inner_most_zero_idx_out_of_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { %pad = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index - %1 = vector.transfer_read %A[%i, %c0], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32> - return %1 : vector<8x1xf32> + %v = vector.transfer_read %src[%i, %c0], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32> + return %v : vector<8x1xf32> } // CHECK-LABEL: func.func @contiguous_inner_most_zero_idx_out_of_bounds( // CHECK-SAME: %[[MEM:.*]]: memref<16x1xf32>, @@ -151,10 +149,10 @@ func.func @contiguous_inner_most_zero_idx_out_of_bounds(%A: memref<16x1xf32>, %i // The index to be dropped is unknown, but since it's "in bounds", it has to be // == 0. It's safe to collapse the corresponding dim. -func.func @contiguous_inner_most_non_zero_idx_in_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { +func.func @contiguous_inner_most_non_zero_idx_in_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { %pad = arith.constant 0.0 : f32 - %1 = vector.transfer_read %A[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32> - return %1 : vector<8x1xf32> + %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32> + return %v : vector<8x1xf32> } // CHECK-LABEL: func.func @contiguous_inner_most_non_zero_idx_in_bounds( // CHECK-SAME: %[[MEM:.*]]: memref<16x1xf32>, @@ -164,12 +162,29 @@ func.func @contiguous_inner_most_non_zero_idx_in_bounds(%A: memref<16x1xf32>, %i // CHECK: %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<8xf32> // CHECK: vector.shape_cast %[[READ]] : vector<8xf32> to vector<8x1xf32> +// Same as the top example within this split, but with the outer vector +// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e. +// vscale = 1). This is assumed via the `in_bounds` attribute. + +func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%src: memref<16x1xf32>, %i:index) -> (vector<[8]x1xf32>) { + %pad = arith.constant 0.0 : f32 + %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<[8]x1xf32> + return %v : vector<[8]x1xf32> +} +// CHECK-LABEL: func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable +// CHECK-SAME: %[[MEM:.*]]: memref<16x1xf32>, +// CHECK-SAME: %[[IDX:.*]]: index) -> vector<[8]x1xf32> { +// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<[8]xf32> +// CHECK: vector.shape_cast %[[READ]] : vector<[8]xf32> to vector<[8]x1xf32> + // The index to be dropped is unknown and "out of bounds" - not safe to // collapse. -func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { +func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { %pad = arith.constant 0.0 : f32 - %1 = vector.transfer_read %A[%i, %i], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32> - return %1 : vector<8x1xf32> + %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32> + return %v : vector<8x1xf32> } // CHECK-LABEL: func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds( // CHECK-NOT: memref.subview @@ -179,12 +194,12 @@ func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(%A: memref< // ----- -func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) { +func.func @contiguous_inner_most_dim_with_subview(%src: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) { %c0 = arith.constant 0 : index - %cst = arith.constant 0.0 : f32 - %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> - %1 = vector.transfer_read %0[%ii, %c0], %cst {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<4x1xf32> - return %1 : vector<4x1xf32> + %pad = arith.constant 0.0 : f32 + %sv = memref.subview %src[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> + %v = vector.transfer_read %sv[%ii, %c0], %pad {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<4x1xf32> + return %v : vector<4x1xf32> } // CHECK: func @contiguous_inner_most_dim_with_subview(%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32> // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]] @@ -195,14 +210,14 @@ func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:ind // Same as the top example within this split, but with the outer vector // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e. -// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute. +// vscale = 1). This is assumed via the `in_bounds` attribute. -func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<[4]x1xf32>) { +func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%src: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<[4]x1xf32>) { %c0 = arith.constant 0 : index - %cst = arith.constant 0.0 : f32 - %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> - %1 = vector.transfer_read %0[%ii, %c0], %cst {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<[4]x1xf32> - return %1 : vector<[4]x1xf32> + %pad = arith.constant 0.0 : f32 + %sv = memref.subview %src[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> + %v = vector.transfer_read %sv[%ii, %c0], %pad {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<[4]x1xf32> + return %v : vector<[4]x1xf32> } // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_scalable_inner_dim // CHECK-SAME: %[[SRC:.+]]: memref<1000x1xf32> @@ -213,12 +228,12 @@ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref< // ----- -func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<4x1x1xf32>) { +func.func @contiguous_inner_most_dim_with_subview_2d(%src: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<4x1x1xf32>) { %c0 = arith.constant 0 : index - %cst = arith.constant 0.0 : f32 - %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> - %1 = vector.transfer_read %0[%ii, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<4x1x1xf32> - return %1 : vector<4x1x1xf32> + %pad = arith.constant 0.0 : f32 + %sv = memref.subview %src[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> + %v = vector.transfer_read %sv[%ii, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<4x1x1xf32> + return %v : vector<4x1x1xf32> } // CHECK: func @contiguous_inner_most_dim_with_subview_2d(%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32> // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]] @@ -229,14 +244,14 @@ func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, % // Same as the top example within this split, but with the outer vector // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e. -// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute. +// vscale = 1). This is assumed via the `in_bounds` attribute. -func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<[4]x1x1xf32>) { +func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%src: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<[4]x1x1xf32>) { %c0 = arith.constant 0 : index - %cst = arith.constant 0.0 : f32 - %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> - %1 = vector.transfer_read %0[%ii, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<[4]x1x1xf32> - return %1 : vector<[4]x1x1xf32> + %pad = arith.constant 0.0 : f32 + %sv = memref.subview %src[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> + %v = vector.transfer_read %sv[%ii, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<[4]x1x1xf32> + return %v : vector<[4]x1x1xf32> } // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim( // CHECK-SAME: %[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<[4]x1x1xf32> @@ -251,11 +266,11 @@ func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%A: memr // NOTE: This is an out-of-bounds access. -func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>) -> vector<4x8xf32> { +func.func @negative_non_unit_inner_vec_dim(%src: memref<4x1xf32>) -> vector<4x8xf32> { %c0 = arith.constant 0 : index - %cst = arith.constant 0.000000e+00 : f32 - %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<4x1xf32>, vector<4x8xf32> - return %0 : vector<4x8xf32> + %pad = arith.constant 0.000000e+00 : f32 + %v = vector.transfer_read %src[%c0, %c0], %pad : memref<4x1xf32>, vector<4x8xf32> + return %v : vector<4x8xf32> } // CHECK: func.func @negative_non_unit_inner_vec_dim // CHECK-NOT: memref.subview @@ -263,11 +278,11 @@ func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>) -> vector<4x8 // ----- -func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>) -> vector<4x1xf32> { +func.func @negative_non_unit_inner_memref_dim(%src: memref<4x8xf32>) -> vector<4x1xf32> { %c0 = arith.constant 0 : index - %cst = arith.constant 0.000000e+00 : f32 - %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<4x8xf32>, vector<4x1xf32> - return %0 : vector<4x1xf32> + %pad = arith.constant 0.000000e+00 : f32 + %v = vector.transfer_read %src[%c0, %c0], %pad : memref<4x8xf32>, vector<4x1xf32> + return %v : vector<4x1xf32> } // CHECK: func.func @negative_non_unit_inner_memref_dim // CHECK-NOT: memref.subview @@ -275,13 +290,28 @@ func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>) -> vector< // ----- +// The inner most unit dims can not be dropped if the strides are not ones. + +func.func @negative_non_unit_strides(%src: memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, %i: index) -> vector<16x16x1xf32> { + %c0 = arith.constant 0 : index + %pad = arith.constant 0.000000e+00 : f32 + %v = vector.transfer_read %src[%i, %c0, %c0], %pad + {in_bounds = [true, true, true]} + : memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, vector<16x16x1xf32> + return %v : vector<16x16x1xf32> +} +// CHECK: func.func @negative_non_unit_strides +// CHECK-NOT: memref.subview + +// ----- + //----------------------------------------------------------------------------- // 2. vector.transfer_write //----------------------------------------------------------------------------- -func.func @contiguous_inner_most(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x16x1x1xf32>, %arg2: index) { +func.func @contiguous_inner_most(%dest: memref<1x512x16x1x1xf32>, %v: vector<1x16x16x1x1xf32>, %i: index) { %c0 = arith.constant 0 : index - vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0] + vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x16x16x1x1xf32>, memref<1x512x16x1x1xf32> return @@ -299,11 +329,11 @@ func.func @contiguous_inner_most(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector< // Same as the top example within this split, but with the inner vector // dim scalable. Note that this example only makes sense when "16 = [16]" (i.e. -// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute. +// vscale = 1). This is assumed via the `in_bounds` attribute. -func.func @contiguous_inner_most_scalable_inner_dim(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x[16]x1x1xf32>, %arg2: index) { +func.func @contiguous_inner_most_scalable_inner_dim(%dest: memref<1x512x16x1x1xf32>, %v: vector<1x16x[16]x1x1xf32>, %i: index) { %c0 = arith.constant 0 : index - vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0] + vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x16x[16]x1x1xf32>, memref<1x512x16x1x1xf32> return @@ -322,9 +352,9 @@ func.func @contiguous_inner_most_scalable_inner_dim(%arg0: memref<1x512x16x1x1xf // Same as the top example within this split, but the trailing unit dim was // replaced with a dyn dim - not supported -func.func @negative_dynamic_trailing_dim(%arg0: memref<1x512x16x1x?xf32>, %arg1: vector<1x16x16x1x1xf32>, %arg2: index) { +func.func @negative_dynamic_trailing_dim(%dest: memref<1x512x16x1x?xf32>, %v: vector<1x16x16x1x1xf32>, %i: index) { %c0 = arith.constant 0 : index - vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0] + vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x16x16x1x1xf32>, memref<1x512x16x1x?xf32> return @@ -336,9 +366,9 @@ func.func @negative_dynamic_trailing_dim(%arg0: memref<1x512x16x1x?xf32>, %arg1: // Same as the top example within this split, but with a "scalable unit" dim in // the input vector - not supported (scalable 1, [1], is _not_ a unit dimension). -func.func @negative_scalable_one_trailing_dim(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x16x1x[1]xf32>, %arg2: index) { +func.func @negative_scalable_one_trailing_dim(%dest: memref<1x512x16x1x1xf32>, %v: vector<1x16x16x1x[1]xf32>, %i: index) { %c0 = arith.constant 0 : index - vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0] + vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x16x16x1x[1]xf32>, memref<1x512x16x1x1xf32> return @@ -350,9 +380,9 @@ func.func @negative_scalable_one_trailing_dim(%arg0: memref<1x512x16x1x1xf32>, % // ----- -func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %arg0: memref, %arg1: vector<8x1xf32>) { +func.func @contiguous_inner_most_dynamic_outer(%i: index, %ii: index, %dest: memref, %v: vector<8x1xf32>) { %c0 = arith.constant 0 : index - vector.transfer_write %arg1, %arg0[%a, %b, %c0, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref + vector.transfer_write %v, %dest[%i, %ii, %c0, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref return } // CHECK-LABEL: func.func @contiguous_inner_most_dynamic_outer( @@ -369,11 +399,11 @@ func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %arg0: memr // Same as the top example within this split, but with the outer vector // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e. -// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute. +// vscale = 1). This is assumed via the `in_bounds` attribute. -func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%a: index, %b: index, %arg0: memref, %arg1: vector<[8]x1xf32>) { +func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%i: index, %ii: index, %dest: memref, %v: vector<[8]x1xf32>) { %c0 = arith.constant 0 : index - vector.transfer_write %arg1, %arg0[%a, %b, %c0, %c0] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref + vector.transfer_write %v, %dest[%i, %ii, %c0, %c0] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref return } // CHECK-LABEL: func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim( @@ -395,9 +425,9 @@ func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%a: index, %b: // The index to be dropped is == 0, so it's safe to collapse. The other index // should be preserved correctly. -func.func @contiguous_inner_most_zero_idx_in_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) { +func.func @contiguous_inner_most_zero_idx_in_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) { %c0 = arith.constant 0 : index - vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32> + vector.transfer_write %v, %dest[%i, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32> return } // CHECK-LABEL: func.func @contiguous_inner_most_zero_idx_in_bounds( @@ -411,9 +441,9 @@ func.func @contiguous_inner_most_zero_idx_in_bounds(%arg0: memref<16x1xf32>, %ar // The index to be dropped is == 0, so it's safe to collapse. The "out of // bounds" attribute is too conservative and will be folded to "in bounds" // before the pattern runs. The other index should be preserved correctly. -func.func @contiguous_inner_most_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) { +func.func @contiguous_inner_most_zero_idx_out_of_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) { %c0 = arith.constant 0 : index - vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32> + vector.transfer_write %v, %dest[%i, %c0] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32> return } // CHECK-LABEL: func.func @contiguous_inner_most_zero_idx_out_of_bounds @@ -426,8 +456,8 @@ func.func @contiguous_inner_most_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>, // The index to be dropped is unknown, but since it's "in bounds", it has to be // == 0. It's safe to collapse the corresponding dim. -func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) { - vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32> +func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) { + vector.transfer_write %v, %dest[%i, %i] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32> return } // CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idx_in_bounds @@ -442,10 +472,8 @@ func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%arg0: memref<16x1xf // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e. // vscale = 1). This is assumed via the `in_bounds` attribute. -// TODO: Add a similar test for xfer_write - -func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%arg0: memref<16x1xf32>, %arg1: vector<[8]x1xf32>, %i: index) { - vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<16x1xf32> +func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%dest: memref<16x1xf32>, %v: vector<[8]x1xf32>, %i: index) { + vector.transfer_write %v, %dest[%i, %i] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<16x1xf32> return } // CHECK-LABEL: func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable( @@ -458,8 +486,8 @@ func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%arg0: memref<1 // The index to be dropped is unknown and "out of bounds" - not safe to // collapse. -func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) { - vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32> +func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) { + vector.transfer_write %v, %dest[%i, %i] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32> return } // CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds @@ -471,10 +499,10 @@ func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0: // Verify that the transformation does work even when the input is a "subview" -func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<4x1xf32>) { +func.func @contiguous_inner_most_dim_with_subview(%dest: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<4x1xf32>) { %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> + %0 = memref.subview %dest[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = [true, true]} : vector<4x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>> return } @@ -492,10 +520,10 @@ func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:ind // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e. // vscale = 1). This is assumed via the `in_bounds` attribute. -func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1xf32>) { +func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%dest: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1xf32>) { %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> + %0 = memref.subview %dest[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = [true, true]} : vector<[4]x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>> return } @@ -511,10 +539,10 @@ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref< // ----- -func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<4x1x1xf32>) { +func.func @contiguous_inner_most_dim_with_subview_2d(%dest: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<4x1x1xf32>) { %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> + %0 = memref.subview %dest[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = [true, true, true]} : vector<4x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> return } @@ -529,12 +557,12 @@ func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, % // Same as the top example within this split, but with the outer vector // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e. -// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute. +// vscale = 1). This is assumed via the `in_bounds` attribute. -func.func @contiguous_inner_most_dim_with_subview_2d_scalable(%A: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1x1xf32>) { +func.func @contiguous_inner_most_dim_with_subview_2d_scalable(%dest: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1x1xf32>) { %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> + %0 = memref.subview %dest[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = [true, true, true]} : vector<[4]x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> return } @@ -549,31 +577,11 @@ func.func @contiguous_inner_most_dim_with_subview_2d_scalable(%A: memref<1000x1x // ----- -func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) { - %c0 = arith.constant 0 : index - vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0] - {in_bounds = [true, true, true, true]} - : vector<1x16x16x1xf32>, memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>> - return -} -// CHECK: func.func @drop_inner_most_dim -// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[VEC:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[IDX:[a-zA-Z0-9]+]] -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK: %[[SUBVIEW:.+]] = memref.subview %[[DEST]] -// CHECK-SAME: memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>> to memref<1x512x16xf32, strided<[8192, 16, 1], offset: ?>> -// CHECK: %[[CAST:.+]] = vector.shape_cast %[[VEC]] : vector<1x16x16x1xf32> to vector<1x16x16xf32> -// CHECK: vector.transfer_write %[[CAST]], %[[SUBVIEW]] -// CHECK-SAME: [%[[C0]], %[[IDX]], %[[C0]]] - -// ----- - // NOTE: This is an out-of-bounds access. -func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>, %vec: vector<4x8xf32>) { +func.func @negative_non_unit_inner_vec_dim(%dest: memref<4x1xf32>, %vec: vector<4x8xf32>) { %c0 = arith.constant 0 : index - vector.transfer_write %vec, %arg0[%c0, %c0] : vector<4x8xf32>, memref<4x1xf32> + vector.transfer_write %vec, %dest[%c0, %c0] : vector<4x8xf32>, memref<4x1xf32> return } // CHECK: func.func @negative_non_unit_inner_vec_dim @@ -582,9 +590,9 @@ func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>, %vec: vector< // ----- -func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>, %vec: vector<4x1xf32>) { +func.func @negative_non_unit_inner_memref_dim(%dest: memref<4x8xf32>, %vec: vector<4x1xf32>) { %c0 = arith.constant 0 : index - vector.transfer_write %vec, %arg0[%c0, %c0] : vector<4x1xf32>, memref<4x8xf32> + vector.transfer_write %vec, %dest[%c0, %c0] : vector<4x1xf32>, memref<4x8xf32> return } // CHECK: func.func @negative_non_unit_inner_memref_dim @@ -593,13 +601,14 @@ func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>, %vec: vect // ----- -func.func @non_unit_strides(%arg0: memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, %arg1: vector<16x16x1xf32>, %arg2: index) { +// The inner most unit dims can not be dropped if the strides are not ones. + +func.func @negative_non_unit_strides(%dest: memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, %v: vector<16x16x1xf32>, %i: index) { %c0 = arith.constant 0 : index - vector.transfer_write %arg1, %arg0[%arg2, %c0, %c0] + vector.transfer_write %v, %dest[%i, %c0, %c0] {in_bounds = [true, true, true]} : vector<16x16x1xf32>, memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>> return } -// The inner most unit dims can not be dropped if the strides are not ones. -// CHECK: func.func @non_unit_strides +// CHECK: func.func @negative_non_unit_strides // CHECK-NOT: memref.subview