Skip to content

Commit

Permalink
test2
Browse files Browse the repository at this point in the history
  • Loading branch information
hanhanW committed Aug 25, 2022
1 parent 0f58a76 commit f4e8518
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 29 deletions.
37 changes: 9 additions & 28 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,7 @@ static LogicalResult setElementwiseGenericOpRootConfig(
/*allowIncompleteTile=*/true);

// Adjust the number of workload per workgroup to at least 4096.
constexpr int64_t kMinimumWorkload = 4096;
auto shape = genericOp.getStaticLoopRanges();
int64_t numWorkload = 1;
for (auto en : llvm::enumerate(shape)) {
Expand All @@ -1137,8 +1138,6 @@ static LogicalResult setElementwiseGenericOpRootConfig(
}
numWorkload *= size;
}

constexpr int64_t kMinimumWorkload = 4096;
for (unsigned currDim = 0;
numWorkload < kMinimumWorkload && currDim < numLoops;) {
int64_t currSize = flowTileSizes[currDim];
Expand All @@ -1153,34 +1152,16 @@ static LogicalResult setElementwiseGenericOpRootConfig(
flowTileSizes[currDim] = newSize;
}

// Set the next level tile sizes.
SmallVector<int64_t> parallelTileSizes;
auto inputOutputOpOperands = genericOp.getInputAndOutputOperands();
for (auto map : llvm::enumerate(genericOp.getIndexingMapsArray())) {
// Check the fastest varying dimension of the operand. Set the vector size
// of the corresponding loop to the vector size.
if (map.value().getNumResults() == 0) continue;
auto fastestVaryingDimExpr =
map.value().getResults().back().dyn_cast<AffineDimExpr>();
if (!fastestVaryingDimExpr) continue;
unsigned fastestVaryingDim = fastestVaryingDimExpr.getPosition();

// If the indexing map has result it has to be a shaped type.
auto operandType =
inputOutputOpOperands[map.index()]->get().getType().cast<ShapedType>();
int64_t tileSize = getVectorSize(entryPointFn, operandType);
// Vectorization of reductions is driven by input tensors and considering
// the output's fastest varying dim leads to large unroll factors. We limit
// the tile size for this case to 'maxUnrollFactor'.
minTileSizes[fastestVaryingDim] =
std::min<int64_t>(minTileSizes[fastestVaryingDim], 8);
minTileSizes[fastestVaryingDim] =
std::min<int64_t>(minTileSizes[fastestVaryingDim], tileSize);
// Limit the tiling sizes to avoid large unroll factors. Most of the use cases
// are i32 and f32, so we divide the vector size by four by default. This can
// be relaxed once we have better control on vector unrolling.
SmallVector<int64_t> parallelTileSizes(minTileSizes.begin(),
minTileSizes.end());
int64_t vectorSize = getNativeVectorSizeInBytes(entryPointFn).value() / 4;
for (auto &size : parallelTileSizes) {
size = std::min<int64_t>(size, vectorSize);
}

setX86WorkgroupTileSizes(genericOp, flowTileSizes, minTileSizes,
parallelTileSizes, /*allowIncompleteTile=*/true);

// Setting reduction tile sizes is a workaround to kick in peeling transform.
// The tiling won't happen because the sizes are zeros.
SmallVector<int64_t> reductionTileSizes(numLoops, 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ LinalgMatmulOpToLinalgMmt4DOpPattern::chooseTileParams(Value lhs, Value rhs,
int m0k0n0ForVecMat[3] = {m0k0n0ForMatVec[2], m0k0n0ForMatVec[1],
m0k0n0ForMatVec[0]};
return Mmt4DTileParams(m0k0n0ForVecMat, comment + ", vector*matrix");
}else {
} else {
return Mmt4DTileParams(m0k0n0, comment);
}
};
Expand Down

0 comments on commit f4e8518

Please sign in to comment.