Skip to content

Commit

Permalink
[mlir][tensor] Remove folding of tensor.extract_slice during tiling
Browse files Browse the repository at this point in the history
Blindly folding tensor.extract_slice makes the bufferization
transformation harder. This kind of transformation should happen
separatley if needed rather than doing it within makeShape that is
called during tiling.
Also removed makeComposedExtractSliceOp as it is not tested outside of
this code.

Differential Revision: https://reviews.llvm.org/D132666
  • Loading branch information
ThomasRaoux committed Aug 26, 2022
1 parent e84784e commit 2e34599
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 79 deletions.
19 changes: 0 additions & 19 deletions mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,25 +93,6 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap,
/// (boundsMap = affine.map<() -> (42)>)
FailureOr<int64_t> getConstantUpperBoundForIndex(Value value);

/// Create an ExtractSliceOp and, if `source` is defined by an ExtractSliceOp,
/// fold it by adding the offsets.
///
/// Example:
/// ```
/// %0 = tensor.extract_slice %arg0[3, 4][3, 32][1, 1] : tensor<64x64xf32> to
/// tensor<3x32xf32>
/// %1 = tensor.extract_slice %0[0, 5][3, 4][1, 1] : tensor<3x32xf32> to
/// tensor<3x4xf32>
/// ```
/// folds into:
/// ```
/// %1 = tensor.extract_slice %arg0[3, 9][3, 4][1, 1] : tensor<64x64xf32> to
/// tensor<3x4xf32>
/// ```
tensor::ExtractSliceOp makeComposedExtractSliceOp(
OpBuilder &b, Location loc, Value source, ArrayRef<OpFoldResult> offsets,
ArrayRef<OpFoldResult> sizes, ArrayRef<OpFoldResult> strides);

/// Create a tensor::PadOp that pads `source` to the size of the statically
/// sized `type` whose static sizes are assumed to be greater than the dynamic
/// `source` size. The padding introduces trailing `pad` values until the target
Expand Down
46 changes: 2 additions & 44 deletions mlir/lib/Dialect/Linalg/Utils/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,48 +346,6 @@ FailureOr<int64_t> getConstantUpperBoundForIndex(Value value) {
return *std::min_element(constantBounds.begin(), constantBounds.end());
}

tensor::ExtractSliceOp makeComposedExtractSliceOp(
OpBuilder &b, Location loc, Value source, ArrayRef<OpFoldResult> offsets,
ArrayRef<OpFoldResult> sizes, ArrayRef<OpFoldResult> strides) {
assert(source && "expect source to be nonzero");

// Do not fold if the producer is not an ExtractSliceOp.
auto producerOp = source.getDefiningOp<tensor::ExtractSliceOp>();
if (!producerOp)
return b.create<tensor::ExtractSliceOp>(loc, source, offsets, sizes,
strides);

// Do not fold if the producer is rank reducing or if there are any non-unit
// strides. Supporting non-unit strides complicates the offset computation
// since the consumer offsets need to be multiplied by the producer strides.
// TODO: support non-unit strides once there are use cases.
SmallVector<OpFoldResult> allStrides = producerOp.getMixedStrides();
allStrides.append(strides.begin(), strides.end());
bool hasNonUnitStride = any_of(allStrides, [](OpFoldResult ofr) {
return getConstantIntValue(ofr) != static_cast<int64_t>(1);
});
if (hasNonUnitStride ||
producerOp.getSourceType().getRank() !=
producerOp.getResult().getType().cast<ShapedType>().getRank())
return b.create<tensor::ExtractSliceOp>(loc, source, offsets, sizes,
strides);

// Fold the producer by adding the offests and extracting the slice directly
// from the producer source tensor.
SmallVector<OpFoldResult> foldedOffsets(offsets.begin(), offsets.end());
AffineExpr dim1, dim2;
bindDims(b.getContext(), dim1, dim2);
for (const auto &en : enumerate(producerOp.getMixedOffsets())) {
SmallVector<Value> offsetValues = {
getValueOrCreateConstantIndexOp(b, loc, foldedOffsets[en.index()]),
getValueOrCreateConstantIndexOp(b, loc, en.value())};
foldedOffsets[en.index()] =
makeComposedAffineApply(b, loc, dim1 + dim2, offsetValues).getResult();
}
return b.create<tensor::ExtractSliceOp>(loc, producerOp.getSource(),
foldedOffsets, sizes, strides);
}

Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
Value source, Value pad, bool nofold) {
// Exit if `source` is not defined by an ExtractSliceOp.
Expand Down Expand Up @@ -777,8 +735,8 @@ static Value materializeTiledShape(OpBuilder &builder, Location loc,
sliceParams.sizes, sliceParams.strides);
})
.Case([&](RankedTensorType) {
return makeComposedExtractSliceOp(
builder, loc, valueToTile, sliceParams.offsets,
return builder.create<tensor::ExtractSliceOp>(
loc, valueToTile, sliceParams.offsets,
sliceParams.sizes, sliceParams.strides);
})
.Default([](ShapedType) -> Operation * {
Expand Down
7 changes: 5 additions & 2 deletions mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,16 @@ func.func @two_d(%arg0: tensor<10x34xf32>,
// The canonicalizer is able to recover static shapes of for linalg.generic
// instances, use those to differentiate the quadrants.

// CHECK: %[[SLICE_1_IN:.+]] = tensor.extract_slice %[[IN]][0, 0] [4, 34] [1, 1]
// CHECK: %[[SLICE_1:.+]] = tensor.extract_slice %[[OUT]][0, 0] [4, 34] [1, 1]
// CHECK: scf.for %[[I1:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_1:.+]] = %[[SLICE_1]])
// CHECK: %[[OUTSLICE_1_IN:.+]] = tensor.extract_slice %[[SLICE_1_IN]][%[[I1]], 0] [2, 34] [1, 1]
// CHECK: %[[OUTSLICE_1:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 34] [1, 1]

// CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 16] [1, 1]
// CHECK: %[[SLICE_2_IN:.+]] = tensor.extract_slice %[[OUTSLICE_1_IN]][0, 0] [2, 16] [1, 1]
// CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[OUTSLICE_1]][0, 0] [2, 16] [1, 1]
// CHECK: %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]])
// CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[IN]][%[[I1]], %[[I2]]] [2, 8] [1, 1]
// CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[SLICE_2_IN]][0, %[[I2]]] [2, 8] [1, 1]
// CHECK: %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1]
// CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>)
// CHECK: %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]]
Expand Down
6 changes: 4 additions & 2 deletions mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ func.func @fill_matmul_tensors(
// CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]
// CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]]
// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
// CHECK: %[[OUTSLICEA:.+]] = tensor.extract_slice %{{.*}}[%{{.*}}, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[OUTSLICEB:.+]] = tensor.extract_slice %{{.*}}[0, %{{.*}}] [%{{.*}}, %{{.*}}] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[TC1]]
// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[SLICE]]
// CHECK: %[[sTD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[FILL]]) -> (tensor<?x?xf32>) {
// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[OUTSLICEA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[OUTSLICEB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
// CHECK-SAME: outs(%[[sTC]] : tensor<?x?xf32>) -> tensor<?x?xf32>
Expand Down
10 changes: 4 additions & 6 deletions mlir/test/Dialect/Linalg/tile-tensors.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ func.func @generic_op_tensors(
// -----

// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (d0 + 3)>
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0) -> (d0 + 4)>

// CHECK: fold_extract_slice
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<?x128xf32>
Expand All @@ -93,15 +91,15 @@ func.func @fold_extract_slice(
%0 = tensor.dim %arg1, %c0 : tensor<?x42xf32>
%1 = tensor.extract_slice %arg0[3, 4] [%0, 42] [1, 1] : tensor<?x128xf32> to tensor<?x42xf32>

// CHECK: %[[E:.*]] = tensor.extract_slice %[[ARG0]][3, 4] [%[[DIM]], 42] [1, 1] : tensor<?x128xf32> to tensor<?x42xf32>

// CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] =
// CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] =

// Fold the existing extract slice op into the one created by the tiling.
// CHECK: %[[SIZE0:.*]] = affine.min #[[MAP0]](%[[IV0]])[%[[DIM]]
// CHECK: %[[OFF0:.*]] = affine.apply #[[MAP1]](%[[IV0]]
// CHECK: %[[OFF1:.*]] = affine.apply #[[MAP2]](%[[IV1]]
// CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
// CHECK-SAME: %[[OFF0]], %[[OFF1]]
// CHECK: %[[T0:.*]] = tensor.extract_slice %[[E]]
// CHECK-SAME: %[[IV0]], %[[IV1]]
// CHECK-SAME: %[[SIZE0]], 3
// CHECK-SAME: 1, 1
// CHECK: {{.*}} = linalg.generic {{.*}} ins(%[[T0]]
Expand Down
7 changes: 4 additions & 3 deletions mlir/test/Dialect/Linalg/transform-op-fuse.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,12 @@ func.func @interchange_reduction(%input: tensor<12x7x25xf32>) -> tensor<12x25xf3
// CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index
// CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %[[C5]] iter_args(%[[FOR_ARG0:.+]] = %[[INIT]])
// CHECK: scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]])
// CHECK: %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]]
// CHECK: %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE0]] : tensor<?x?xf32>)
// CHECK: %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], 0, %[[IV1]]]
// CHECK: %[[OUT_SLICE1:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]]
// CHECK: %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE1]] : tensor<?x?xf32>)
// CHECK: %[[C4:.+]] = arith.constant 4 : index
// CHECK: scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]])
// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[INPUT]]
// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[OUT_SLICE0]]
// CHECK: %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0]
// CHECK: linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor<?x?x?xf32>) outs(%[[OUT_SLICE2]] : tensor<?x?xf32>)

Expand Down
6 changes: 3 additions & 3 deletions mlir/test/Dialect/Linalg/transform-op-split.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,14 @@ func.func @two_d(%arg0: tensor<10x34xf32>,
// CHECK: %[[OUT_2:.+]] = tensor.extract_slice %[[PARTIAL_1]]
// Note that `extract_slice` taking a slice from another `extract_slice` result
// is folded to use the operand of the first `extract_slice`.
// CHECK: %[[IN_21:.+]] = tensor.extract_slice %[[IN]]
// CHECK: %[[OUT_21:.+]] = tensor.extract_slice %[[PARTIAL_1]]
// CHECK: %[[IN_21:.+]] = tensor.extract_slice %[[IN_2]]
// CHECK: %[[OUT_21:.+]] = tensor.extract_slice %[[OUT_2]]
// CHECK: %[[RES_21:.+]] = linalg.generic
// CHECK-SAME: ins(%[[IN_21]] : tensor<6x16xf32>)
// CHECK-SAME: outs(%[[OUT_21]] : tensor<6x16xf32>)
// CHECK: %[[PARTIAL_21:.+]] = tensor.insert_slice %[[RES_21]] into %[[OUT_2]]
//
// CHECK: %[[IN_22:.+]] = tensor.extract_slice %[[IN]]
// CHECK: %[[IN_22:.+]] = tensor.extract_slice %[[IN_2]]
// CHECK: %[[OUT_22:.+]] = tensor.extract_slice %[[PARTIAL_21]]
// CHECK: %[[RES_22:.+]] = linalg.generic
// CHECK-SAME: ins(%[[IN_22]] : tensor<6x18xf32>)
Expand Down

0 comments on commit 2e34599

Please sign in to comment.