diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 0ded98f162abfb..01624de190d510 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -728,6 +728,9 @@ class TargetTransformInfoImplBase { switch (ICA.getID()) { default: break; + case Intrinsic::experimental_vector_histogram_add: + // For now, we want explicit support from the target for histograms. + return InstructionCost::getInvalid(); case Intrinsic::allow_runtime_check: case Intrinsic::allow_ubsan_check: case Intrinsic::annotation: diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index c0abbd32eeec41..1941cfcf807739 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -61,6 +61,11 @@ static cl::opt EnableOrLikeSelectOpt("enable-aarch64-or-like-select", static cl::opt EnableLSRCostOpt("enable-aarch64-lsr-cost-opt", cl::init(true), cl::Hidden); +// A complete guess as to a reasonable cost. +static cl::opt + BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden, + cl::desc("The cost of a histcnt instruction")); + namespace { class TailFoldingOption { // These bitfields will only ever be set to something non-zero in operator=, @@ -508,11 +513,39 @@ static bool isUnpackedVectorVT(EVT VecVT) { VecVT.getSizeInBits().getKnownMinValue() < AArch64::SVEBitsPerBlock; } +static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { + Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers + Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements + + // Only allow (32b and 64b) integers or pointers for now... + if ((!EltTy->isIntegerTy() && !EltTy->isPointerTy()) || + (EltTy->getScalarSizeInBits() != 32 && + EltTy->getScalarSizeInBits() != 64)) + return InstructionCost::getInvalid(); + + // FIXME: Hacky check for legal vector types. We can promote smaller types + // but we cannot legalize vectors via splitting for histcnt. + // FIXME: We should be able to generate histcnt for fixed-length vectors + // using ptrue with a specific VL. + if (VectorType *VTy = dyn_cast(BucketPtrsTy)) + if ((VTy->getElementCount().getKnownMinValue() != 2 && + VTy->getElementCount().getKnownMinValue() != 4) || + VTy->getPrimitiveSizeInBits().getKnownMinValue() > 128 || + !VTy->isScalableTy()) + return InstructionCost::getInvalid(); + + return InstructionCost(BaseHistCntCost); +} + InstructionCost AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { auto *RetTy = ICA.getReturnType(); switch (ICA.getID()) { + case Intrinsic::experimental_vector_histogram_add: + if (!ST->hasSVE2()) + return InstructionCost::getInvalid(); + return getHistogramCost(ICA); case Intrinsic::umin: case Intrinsic::umax: case Intrinsic::smin: diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 1ff280d75b4e90..1993023c91e261 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -909,6 +909,123 @@ define void @masked_scatter_v1i128(<1 x i128> %data, <1 x ptr> %ptrs, <1 x i1> % ret void } +define void @histogram_nxv2i64( %buckets, %mask) #3 { +; CHECK-LABEL: 'histogram_nxv2i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64( %buckets, i64 1, %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPE_BASED_ONLY-LABEL: 'histogram_nxv2i64' +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64( %buckets, i64 1, %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.experimental.vector.histogram.add.nxv2p0.i64( %buckets, i64 1, %mask) + ret void +} + +define void @histogram_nxv4i32( %buckets, %mask) #3 { +; CHECK-LABEL: 'histogram_nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( %buckets, i32 1, %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPE_BASED_ONLY-LABEL: 'histogram_nxv4i32' +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( %buckets, i32 1, %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( %buckets, i32 1, %mask) + ret void +} + +define void @histogram_nxv8i16( %buckets, %mask) { +; CHECK-LABEL: 'histogram_nxv8i16' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16( %buckets, i16 1, %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPE_BASED_ONLY-LABEL: 'histogram_nxv8i16' +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16( %buckets, i16 1, %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.experimental.vector.histogram.add.nxv8p0.i16( %buckets, i16 1, %mask) + ret void +} + +define void @histogram_nxv16i8( %buckets, %mask) { +; CHECK-LABEL: 'histogram_nxv16i8' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8( %buckets, i8 1, %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPE_BASED_ONLY-LABEL: 'histogram_nxv16i8' +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8( %buckets, i8 1, %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.experimental.vector.histogram.add.nxv16p0.i64( %buckets, i8 1, %mask) + ret void +} + +define void @histogram_v2i64(<2 x ptr> %buckets, <2 x i1> %mask) { +; CHECK-LABEL: 'histogram_v2i64' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPE_BASED_ONLY-LABEL: 'histogram_v2i64' +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask) + ret void +} + +define void @histogram_v4i32(<4 x ptr> %buckets, <4 x i1> %mask) { +; CHECK-LABEL: 'histogram_v4i32' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPE_BASED_ONLY-LABEL: 'histogram_v4i32' +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask) + ret void +} + +define void @histogram_v8i16(<8 x ptr> %buckets, <8 x i1> %mask) { +; CHECK-LABEL: 'histogram_v8i16' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPE_BASED_ONLY-LABEL: 'histogram_v8i16' +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask) + ret void +} + +define void @histogram_v16i8(<16 x ptr> %buckets, <16 x i1> %mask) { +; CHECK-LABEL: 'histogram_v16i8' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPE_BASED_ONLY-LABEL: 'histogram_v16i8' +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.experimental.vector.histogram.add.v16p0.i64(<16 x ptr> %buckets, i8 1, <16 x i1> %mask) + ret void +} + +define void @histogram_nxv4i64( %buckets, %mask) { +; CHECK-LABEL: 'histogram_nxv4i64' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64( %buckets, i64 1, %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPE_BASED_ONLY-LABEL: 'histogram_nxv4i64' +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64( %buckets, i64 1, %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.experimental.vector.histogram.add.nxv4p0.i64( %buckets, i64 1, %mask) + ret void +} + declare @llvm.get.active.lane.mask.nxv16i1.i64(i64, i64) declare @llvm.get.active.lane.mask.nxv8i1.i64(i64, i64) declare @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64) @@ -949,3 +1066,4 @@ declare void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, attributes #0 = { "target-features"="+sve,+bf16" } attributes #1 = { "target-features"="+sve" vscale_range(1,16) } attributes #2 = { "target-features"="+sve" vscale_range(2, 16) } +attributes #3 = { "target-features"="+sve,+sve2" vscale_range(1,16) }