Revert "AMDGPU: Add 24-bit mul intrinsics"

This reverts commit b508009.
ROCm · Aug 24, 2019 · b7f8762 · b7f8762
1 parent 2397af0
commit b7f8762
Show file tree

Hide file tree

Showing 9 changed files with 11 additions and 751 deletions.
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1358,16 +1358,6 @@ def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">,
   [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty],
-  [llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
->;
-
-def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
-  [llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
->;
-
 // llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
 //
 // bar_val is the total number of waves that will wait on this

diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -61,7 +61,6 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
   AssumptionCache *AC = nullptr;
   LegacyDivergenceAnalysis *DA = nullptr;
   Module *Mod = nullptr;
-  const DataLayout *DL = nullptr;
   bool HasUnsafeFPMath = false;
 
   /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
@@ -134,16 +133,6 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
   /// \returns True.
   bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
 
-
-  unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const;
-  unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const;
-  bool isI24(Value *V, unsigned ScalarSize) const;
-  bool isU24(Value *V, unsigned ScalarSize) const;
-
-  /// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
-  /// SelectionDAG has an issue where an and asserting the bits are known
-  bool replaceMulWithMul24(BinaryOperator &I) const;
-
   /// Expands 24 bit div or rem.
   Value* expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I,
                         Value *Num, Value *Den,
@@ -403,118 +392,6 @@ bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
   return true;
 }
 
-unsigned AMDGPUCodeGenPrepare::numBitsUnsigned(Value *Op,
-                                               unsigned ScalarSize) const {
-  KnownBits Known = computeKnownBits(Op, *DL, 0, AC);
-  return ScalarSize - Known.countMinLeadingZeros();
-}
-
-unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op,
-                                             unsigned ScalarSize) const {
-  // In order for this to be a signed 24-bit value, bit 23, must
-  // be a sign bit.
-  return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC);
-}
-
-bool AMDGPUCodeGenPrepare::isI24(Value *V, unsigned ScalarSize) const {
-  return ScalarSize >= 24 && // Types less than 24-bit should be treated
-                                     // as unsigned 24-bit values.
-    numBitsSigned(V, ScalarSize) < 24;
-}
-
-bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const {
-  return numBitsUnsigned(V, ScalarSize) <= 24;
-}
-
-static void extractValues(IRBuilder<> &Builder,
-                          SmallVectorImpl<Value *> &Values, Value *V) {
-  VectorType *VT = dyn_cast<VectorType>(V->getType());
-  if (!VT) {
-    Values.push_back(V);
-    return;
-  }
-
-  for (int I = 0, E = VT->getNumElements(); I != E; ++I)
-    Values.push_back(Builder.CreateExtractElement(V, I));
-}
-
-static Value *insertValues(IRBuilder<> &Builder,
-                           Type *Ty,
-                           SmallVectorImpl<Value *> &Values) {
-  if (Values.size() == 1)
-    return Values[0];
-
-  Value *NewVal = UndefValue::get(Ty);
-  for (int I = 0, E = Values.size(); I != E; ++I)
-    NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);
-
-  return NewVal;
-}
-
-bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
-  if (I.getOpcode() != Instruction::Mul)
-    return false;
-
-  Type *Ty = I.getType();
-  unsigned Size = Ty->getScalarSizeInBits();
-  if (Size <= 16 && ST->has16BitInsts())
-    return false;
-
-  // Prefer scalar if this could be s_mul_i32
-  if (DA->isUniform(&I))
-    return false;
-
-  Value *LHS = I.getOperand(0);
-  Value *RHS = I.getOperand(1);
-  IRBuilder<> Builder(&I);
-  Builder.SetCurrentDebugLocation(I.getDebugLoc());
-
-  Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
-
-  // TODO: Should this try to match mulhi24?
-  if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
-    IntrID = Intrinsic::amdgcn_mul_u24;
-  } else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
-    IntrID = Intrinsic::amdgcn_mul_i24;
-  } else
-    return false;
-
-  SmallVector<Value *, 4> LHSVals;
-  SmallVector<Value *, 4> RHSVals;
-  SmallVector<Value *, 4> ResultVals;
-  extractValues(Builder, LHSVals, LHS);
-  extractValues(Builder, RHSVals, RHS);
-
-
-  IntegerType *I32Ty = Builder.getInt32Ty();
-  FunctionCallee Intrin = Intrinsic::getDeclaration(Mod, IntrID);
-  for (int I = 0, E = LHSVals.size(); I != E; ++I) {
-    Value *LHS, *RHS;
-    if (IntrID == Intrinsic::amdgcn_mul_u24) {
-      LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
-      RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
-    } else {
-      LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
-      RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
-    }
-
-    Value *Result = Builder.CreateCall(Intrin, {LHS, RHS});
-
-    if (IntrID == Intrinsic::amdgcn_mul_u24) {
-      ResultVals.push_back(Builder.CreateZExtOrTrunc(Result,
-                                                     LHSVals[I]->getType()));
-    } else {
-      ResultVals.push_back(Builder.CreateSExtOrTrunc(Result,
-                                                     LHSVals[I]->getType()));
-    }
-  }
-
-  I.replaceAllUsesWith(insertValues(Builder, Ty, ResultVals));
-  I.eraseFromParent();
-
-  return true;
-}
-
 static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
   const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
   if (!CNum)
@@ -879,9 +756,6 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
       DA->isUniform(&I) && promoteUniformOpToI32(I))
     return true;
 
-  if (replaceMulWithMul24(I))
-    return true;
-
   bool Changed = false;
   Instruction::BinaryOps Opc = I.getOpcode();
   Type *Ty = I.getType();
@@ -1008,7 +882,6 @@ bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
 
 bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
   Mod = &M;
-  DL = &Mod->getDataLayout();
   return false;
 }
 

diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5929,11 +5929,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::amdgcn_cos:
     return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1));
 
-  case Intrinsic::amdgcn_mul_u24:
-    return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, Op.getOperand(1), Op.getOperand(2));
-  case Intrinsic::amdgcn_mul_i24:
-    return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, Op.getOperand(1), Op.getOperand(2));
-
   case Intrinsic::amdgcn_log_clamp: {
     if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
       return SDValue();