diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index b4e091fe021423..fcbdf51b03c1fc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -336,8 +336,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FLOG2, MVT::f32, Custom); setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom); - setOperationAction({ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2}, MVT::f32, - Custom); + setOperationAction( + {ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10}, MVT::f32, + Custom); setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom); @@ -352,7 +353,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom); } - setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP}, MVT::f16, Custom); + setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP, ISD::FEXP10}, MVT::f16, + Custom); // FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches // scalarization code. Can be removed when IS_FPCLASS expand isn't called by @@ -457,14 +459,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, for (MVT VT : FloatVectorTypes) { setOperationAction( - {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, - ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2, - ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG, - ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC, - ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT, - ISD::FSQRT, ISD::FSIN, ISD::FSUB, ISD::FNEG, - ISD::VSELECT, ISD::SELECT_CC, ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE, - ISD::SETCC, ISD::FCANONICALIZE, ISD::FROUNDEVEN}, + {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, + ISD::FADD, ISD::FCEIL, ISD::FCOS, + ISD::FDIV, ISD::FEXP2, ISD::FEXP, + ISD::FEXP10, ISD::FLOG2, ISD::FREM, + ISD::FLOG, ISD::FLOG10, ISD::FPOW, + ISD::FFLOOR, ISD::FTRUNC, ISD::FMUL, + ISD::FMA, ISD::FRINT, ISD::FNEARBYINT, + ISD::FSQRT, ISD::FSIN, ISD::FSUB, + ISD::FNEG, ISD::VSELECT, ISD::SELECT_CC, + ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE, ISD::SETCC, + ISD::FCANONICALIZE, ISD::FROUNDEVEN}, VT, Expand); } @@ -1322,6 +1327,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::FLOG10: return LowerFLOGCommon(Op, DAG); case ISD::FEXP: + case ISD::FEXP10: return lowerFEXP(Op, DAG); case ISD::FEXP2: return lowerFEXP2(Op, DAG); @@ -1367,6 +1373,7 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Lowered); return; case ISD::FEXP: + case ISD::FEXP10: if (SDValue Lowered = lowerFEXP(SDValue(N, 0), DAG)) Results.push_back(Lowered); return; @@ -2841,12 +2848,66 @@ SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL, Flags); } +/// Emit approx-funcs appropriate lowering for exp10. inf/nan should still be +/// handled correctly. +SDValue AMDGPUTargetLowering::lowerFEXP10Unsafe(SDValue X, const SDLoc &SL, + SelectionDAG &DAG, + SDNodeFlags Flags) const { + const EVT VT = X.getValueType(); + const unsigned Exp2Op = VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2; + + if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) { + // exp2(x * 0x1.a92000p+1f) * exp2(x * 0x1.4f0978p-11f); + SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT); + SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT); + + SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, X, K0, Flags); + SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags); + SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, X, K1, Flags); + SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags); + return DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1); + } + + // bool s = x < -0x1.2f7030p+5f; + // x += s ? 0x1.0p+5f : 0.0f; + // exp10 = exp2(x * 0x1.a92000p+1f) * + // exp2(x * 0x1.4f0978p-11f) * + // (s ? 0x1.9f623ep-107f : 1.0f); + + EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + + SDValue Threshold = DAG.getConstantFP(-0x1.2f7030p+5f, SL, VT); + SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT); + + SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+5f, SL, VT); + SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags); + SDValue AdjustedX = + DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X); + + SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT); + SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT); + + SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K0, Flags); + SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags); + SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K1, Flags); + SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags); + + SDValue MulExps = DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1, Flags); + + SDValue ResultScaleFactor = DAG.getConstantFP(0x1.9f623ep-107f, SL, VT); + SDValue AdjustedResult = + DAG.getNode(ISD::FMUL, SL, VT, MulExps, ResultScaleFactor, Flags); + + return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, MulExps, + Flags); +} + SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc SL(Op); SDValue X = Op.getOperand(0); SDNodeFlags Flags = Op->getFlags(); - const bool IsExp10 = false; // TODO: For some reason exp10 is missing + const bool IsExp10 = Op.getOpcode() == ISD::FEXP10; if (VT.getScalarType() == MVT::f16) { // v_exp_f16 (fmul x, log2e) @@ -2871,8 +2932,8 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { // TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying // library behavior. Also, is known-not-daz source sufficient? if (allowApproxFunc(DAG, Flags)) { - assert(!IsExp10 && "todo exp10 support"); - return lowerFEXPUnsafe(X, SL, DAG, Flags); + return IsExp10 ? lowerFEXP10Unsafe(X, SL, DAG, Flags) + : lowerFEXPUnsafe(X, SL, DAG, Flags); } // Algorithm: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 51f95ef97a3308..6841067e31b3b4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -80,6 +80,8 @@ class AMDGPUTargetLowering : public TargetLowering { SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, SDNodeFlags Flags) const; + SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, + SDNodeFlags Flags) const; SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index bd5ad4fa8d3a0a..ccdbd3216e2604 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1173,7 +1173,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, Log2Ops.scalarize(0) .lower(); - auto &LogOps = getActionDefinitionsBuilder({G_FLOG, G_FLOG10, G_FEXP}); + auto &LogOps = + getActionDefinitionsBuilder({G_FLOG, G_FLOG10, G_FEXP, G_FEXP10}); LogOps.customFor({S32, S16}); LogOps.clampScalar(0, MinScalarFPTy, S32) .scalarize(0); @@ -2045,6 +2046,7 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper, case TargetOpcode::G_FEXP2: return legalizeFExp2(MI, B); case TargetOpcode::G_FEXP: + case TargetOpcode::G_FEXP10: return legalizeFExp(MI, B); case TargetOpcode::G_FPOW: return legalizeFPow(MI, B); @@ -3466,7 +3468,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, LLT Ty = MRI.getType(Dst); const LLT F16 = LLT::scalar(16); const LLT F32 = LLT::scalar(32); - const bool IsExp10 = false; // TODO: For some reason exp10 is missing + const bool IsExp10 = MI.getOpcode() == TargetOpcode::G_FEXP10; if (Ty == F16) { // v_exp_f16 (fmul x, log2e) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll new file mode 100644 index 00000000000000..eee254398aefc2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll @@ -0,0 +1,7560 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,VI,VI-SDAG %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,VI,VI-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,GFX900,GFX900-SDAG %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,GFX900,GFX900-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-GISEL %s + +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s + +; FIXME: Fallback enabled due to bfloat extensions + +define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) { +; VI-SDAG-LABEL: s_exp10_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549000 +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: s_and_b32 s3, s2, 0xfffff000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, s2, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s3, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x3a2784bc +; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, s3, v3 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x421a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: s_exp10_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x40549000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3a2784bc +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_and_b32 s3, s2, 0xfffff000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s3 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s3, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 +; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 +; VI-GISEL-NEXT: s_endpgm +; +; GFX900-SDAG-LABEL: s_exp10_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x421a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1] +; GFX900-SDAG-NEXT: s_endpgm +; +; GFX900-GISEL-LABEL: s_exp10_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v0, s2, v0, -v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v0, s2, v1, v0 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX900-GISEL-NEXT: s_endpgm +; +; SI-SDAG-LABEL: s_exp10_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v2, s4, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x421a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: s_exp10_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0 +; SI-GISEL-NEXT: v_fma_f32 v0, s2, v0, -v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_fma_f32 v0, s2, v1, v0 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; R600-LABEL: s_exp10_f32: +; R600: ; %bb.0: +; R600-NEXT: ALU 59, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 4: +; R600-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T1.W, KC0[2].Z, -PV.W, +; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x, +; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; R600-NEXT: RNDNE T3.W, PS, +; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x, +; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS, +; R600-NEXT: TRUNC * T4.W, PV.W, +; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; R600-NEXT: FLT_TO_INT T0.Z, PS, +; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W, +; R600-NEXT: ADD * T1.W, T2.W, -T3.W, +; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; R600-NEXT: ADD T1.Z, PS, PV.W, +; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x, +; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y, +; R600-NEXT: -330(nan), 381(5.338947e-43) +; R600-NEXT: ADD_INT T0.X, PS, literal.x, +; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y, +; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z, +; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w, +; R600-NEXT: EXP_IEEE * T1.X, PV.Z, +; R600-NEXT: -254(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x, +; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y, +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, +; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z, +; R600-NEXT: -127(nan), 254(3.559298e-43) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x, +; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y, +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z, +; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X, +; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z, +; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W, +; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W, +; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.x, +; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T1.Z, T1.Y, T3.X, PS, +; R600-NEXT: CNDE_INT T0.W, T1.W, PV.W, T1.X, +; R600-NEXT: LSHL * T1.W, PV.Z, literal.x, +; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; R600-NEXT: ADD_INT T1.W, PS, literal.x, +; R600-NEXT: CNDE_INT * T0.W, T4.W, PV.W, PV.Z, +; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T0.W, PS, PV.W, +; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].Z, +; R600-NEXT: -1036817932(-4.485347e+01), 0(0.000000e+00) +; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0, +; R600-NEXT: SETGT * T1.W, KC0[2].Z, literal.x, +; R600-NEXT: 1109008539(3.853184e+01), 0(0.000000e+00) +; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; R600-NEXT: 2139095040(INF), 2(2.802597e-45) +; +; CM-LABEL: s_exp10_f32: +; CM: ; %bb.0: +; CM-NEXT: ALU 64, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: ALU clause starting at 4: +; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, KC0[2].Z, -PV.W, +; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x, +; CM-NEXT: RNDNE * T2.W, PV.Z, +; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; CM-NEXT: TRUNC T2.Z, PV.W, +; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z, +; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W, +; CM-NEXT: ADD T0.Z, T0.Z, -T2.W, +; CM-NEXT: FLT_TO_INT * T0.W, PV.Z, +; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, PV.Z, PV.Y, +; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; CM-NEXT: EXP_IEEE T0.X, T1.W, +; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x, +; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y, +; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z, +; CM-NEXT: 2130706432(1.701412e+38), -254(nan) +; CM-NEXT: -330(nan), 0(0.000000e+00) +; CM-NEXT: ADD_INT T1.X, T0.W, literal.x, +; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y, +; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z, +; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w, +; CM-NEXT: -127(nan), 204(2.858649e-43) +; CM-NEXT: 102(1.429324e-43), -229(nan) +; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y, +; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z, +; CM-NEXT: 254(3.559298e-43), -127(nan) +; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W, +; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z, +; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y, +; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43) +; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W, +; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x, +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T0.Y, T2.X, T0.Y, PV.W, +; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.X, +; CM-NEXT: LSHL * T1.W, PV.Y, literal.x, +; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; CM-NEXT: ADD_INT T1.Z, PV.W, literal.x, +; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Z, PV.Y, +; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T0.Z, PV.W, PV.Z, +; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z, +; CM-NEXT: -1036817932(-4.485347e+01), 0(0.000000e+00) +; CM-NEXT: CNDE T0.Z, PV.W, PV.Z, 0.0, +; CM-NEXT: SETGT * T0.W, KC0[2].Z, literal.x, +; CM-NEXT: 1109008539(3.853184e+01), 0(0.000000e+00) +; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x, +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %result = call float @llvm.exp10.f32(float %in) + store float %result, ptr addrspace(1) %out + ret void +} + +; FIXME: We should be able to merge these packets together on Cayman so we +; have a maximum of 4 instructions. +define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in) { +; VI-SDAG-LABEL: s_exp10_v2f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549000 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: s_and_b32 s4, s3, 0xfffff000 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, s3, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x3a2784bc +; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v4 +; VI-SDAG-NEXT: s_and_b32 s4, s2, 0xfffff000 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; VI-SDAG-NEXT: v_mov_b32_e32 v6, s4 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v6, s2, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3a2784bc, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x40549000, v6 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: v_rndne_f32_e32 v5, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, s4, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v5 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x421a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: s_exp10_v2f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x40549000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3a2784bc +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_and_b32 s4, s2, 0xfffff000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, s4, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, s4, v1 +; VI-GISEL-NEXT: s_and_b32 s4, s3, 0xfffff000 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s4 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v5, s3, v5 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3a2784bc, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x40549000, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 +; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5 +; VI-GISEL-NEXT: v_rndne_f32_e32 v5, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v5 +; VI-GISEL-NEXT: v_exp_f32_e32 v5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v4 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-GISEL-NEXT: s_endpgm +; +; GFX900-SDAG-LABEL: s_exp10_v2f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc23369f4 +; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s2, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v6 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v8, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v7 +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x421a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v6 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; GFX900-SDAG-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX900-SDAG-NEXT: s_endpgm +; +; GFX900-GISEL-LABEL: s_exp10_v2f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, s2, v0, -v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v2 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s3, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, s2, v1, v3 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX900-GISEL-NEXT: v_fma_f32 v0, s3, v0, -v5 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_fma_f32 v0, s3, v1, v0 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v5 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v5, v5, v1 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v5, v0 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 +; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX900-GISEL-NEXT: s_endpgm +; +; SI-SDAG-LABEL: s_exp10_v2f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; SI-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; SI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5 +; SI-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v5 +; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6 +; SI-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v7, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v5, v6 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x421a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v5 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v4 +; SI-SDAG-NEXT: s_mov_b32 s4, s0 +; SI-SDAG-NEXT: s_mov_b32 s5, s1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: s_exp10_v2f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, s2, v0, -v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v5, s3, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, s2, v1, v3 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 +; SI-GISEL-NEXT: v_fma_f32 v0, s3, v0, -v5 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_fma_f32 v0, s3, v1, v0 +; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v5 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v5, v5, v1 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v5, v0 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v5, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v5, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; R600-LABEL: s_exp10_v2f32: +; R600: ; %bb.0: +; R600-NEXT: ALU 96, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 12, @101, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: ALU clause starting at 4: +; R600-NEXT: AND_INT * T0.W, KC0[3].X, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD * T1.W, KC0[3].X, -PV.W, +; R600-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, +; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.y, +; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.z, +; R600-NEXT: -4096(nan), 975668412(6.390323e-04) +; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; R600-NEXT: RNDNE T1.Z, PS, +; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W, +; R600-NEXT: ADD * T2.W, KC0[2].W, -PV.Z, +; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x, +; R600-NEXT: MUL_IEEE T2.Z, T0.Z, literal.y, +; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W, +; R600-NEXT: ADD * T1.W, T3.W, -PV.Z, +; R600-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00) +; R600-NEXT: ADD T3.Z, PS, PV.W, +; R600-NEXT: RNDNE T0.W, PV.Z, +; R600-NEXT: MULADD_IEEE * T1.W, T2.W, literal.x, PV.Y, BS:VEC_021/SCL_122 +; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; R600-NEXT: TRUNC T0.Y, T1.Z, +; R600-NEXT: MULADD_IEEE T0.Z, T0.Z, literal.x, PS, BS:VEC_120/SCL_212 +; R600-NEXT: ADD T1.W, T2.Z, -PV.W, BS:VEC_201 +; R600-NEXT: EXP_IEEE * T0.X, PV.Z, +; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; R600-NEXT: ADD T0.Z, PV.W, PV.Z, +; R600-NEXT: FLT_TO_INT T1.W, PV.Y, +; R600-NEXT: MUL_IEEE * T2.W, PS, literal.x, +; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T1.Z, PS, literal.x, +; R600-NEXT: SETGT_UINT T3.W, PV.W, literal.y, +; R600-NEXT: EXP_IEEE * T0.Y, PV.Z, +; R600-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43) +; R600-NEXT: CNDE_INT T1.X, PV.W, T2.W, PV.Z, +; R600-NEXT: MUL_IEEE T1.Y, PS, literal.x, +; R600-NEXT: MAX_INT T0.Z, T1.W, literal.y, +; R600-NEXT: MIN_INT T2.W, T1.W, literal.z, +; R600-NEXT: TRUNC * T0.W, T0.W, +; R600-NEXT: 2130706432(1.701412e+38), -330(nan) +; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; R600-NEXT: FLT_TO_INT T2.X, PS, +; R600-NEXT: ADD_INT T2.Y, PV.W, literal.x, +; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y, +; R600-NEXT: ADD_INT T0.W, T1.W, literal.z, +; R600-NEXT: SETGT_UINT * T2.W, T1.W, literal.w, +; R600-NEXT: -254(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: ADD_INT T3.X, T1.W, literal.x, +; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W, +; R600-NEXT: SETGT_INT T0.Z, T1.W, literal.x, +; R600-NEXT: MUL_IEEE T0.W, T0.X, literal.y, +; R600-NEXT: MUL_IEEE * T4.W, T0.Y, literal.y, +; R600-NEXT: -127(nan), 209715200(1.972152e-31) +; R600-NEXT: MUL_IEEE T4.X, PS, literal.x, +; R600-NEXT: MUL_IEEE T4.Y, PV.W, literal.x, +; R600-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, T1.W, +; R600-NEXT: CNDE_INT T3.W, T3.W, PV.X, T2.Y, +; R600-NEXT: MAX_INT * T5.W, T2.X, literal.y, +; R600-NEXT: 209715200(1.972152e-31), -330(nan) +; R600-NEXT: SETGT_INT T3.X, T1.W, literal.x, +; R600-NEXT: ADD_INT T2.Y, PS, literal.y, +; R600-NEXT: ADD_INT T2.Z, T2.X, literal.z, +; R600-NEXT: SETGT_UINT * T1.W, T2.X, literal.w, +; R600-NEXT: 127(1.779649e-43), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: MIN_INT * T5.W, T2.X, literal.x, +; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; R600-NEXT: ADD_INT T5.X, PV.W, literal.x, +; R600-NEXT: ADD_INT T3.Y, T2.X, literal.y, +; R600-NEXT: SETGT_UINT T3.Z, T2.X, literal.z, +; R600-NEXT: CNDE_INT T5.W, T1.W, T2.Y, T2.Z, +; R600-NEXT: SETGT_INT * T6.W, T2.X, literal.y, +; R600-NEXT: -254(nan), -127(nan) +; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T2.X, +; R600-NEXT: CNDE_INT T2.Y, PV.Z, PV.Y, PV.X, +; R600-NEXT: SETGT_INT T2.Z, T2.X, literal.x, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE_INT T3.W, T3.X, T1.Z, T3.W, BS:VEC_021/SCL_122 +; R600-NEXT: CNDE_INT * T0.W, T2.W, T4.Y, T0.W, +; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T0.X, T0.Z, PS, T0.X, +; R600-NEXT: LSHL T3.Y, PV.W, literal.x, +; R600-NEXT: CNDE_INT T0.Z, PV.Z, PV.X, PV.Y, +; R600-NEXT: CNDE_INT T0.W, T1.W, T4.X, T4.W, +; R600-NEXT: MUL_IEEE * T1.W, T1.Y, literal.y, +; R600-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38) +; R600-NEXT: CNDE_INT T2.X, T3.Z, T1.Y, PS, +; R600-NEXT: CNDE_INT T0.Y, T6.W, PV.W, T0.Y, +; R600-NEXT: LSHL T0.Z, PV.Z, literal.x, +; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y, +; R600-NEXT: CNDE_INT * T1.W, T3.X, PV.X, T1.X, +; R600-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00) +; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W, +; R600-NEXT: SETGT T1.Z, literal.x, KC0[3].X, +; R600-NEXT: ADD_INT * T0.W, PV.Z, literal.y, +; R600-NEXT: -1036817932(-4.485347e+01), 1065353216(1.000000e+00) +; R600-NEXT: ALU clause starting at 101: +; R600-NEXT: CNDE_INT * T1.W, T2.Z, T0.Y, T2.X, +; R600-NEXT: MUL_IEEE T0.Y, PV.W, T0.W, +; R600-NEXT: SETGT T0.Z, literal.x, KC0[2].W, +; R600-NEXT: CNDE T0.W, T1.Z, T1.Y, 0.0, +; R600-NEXT: SETGT * T1.W, KC0[3].X, literal.y, +; R600-NEXT: -1036817932(-4.485347e+01), 1109008539(3.853184e+01) +; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x, +; R600-NEXT: CNDE T0.W, PV.Z, PV.Y, 0.0, +; R600-NEXT: SETGT * T1.W, KC0[2].W, literal.y, +; R600-NEXT: 2139095040(INF), 1109008539(3.853184e+01) +; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x, +; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y, +; R600-NEXT: 2139095040(INF), 2(2.802597e-45) +; +; CM-LABEL: s_exp10_v2f32: +; CM: ; %bb.0: +; CM-NEXT: ALU 100, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 18, @105, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X +; CM-NEXT: CF_END +; CM-NEXT: ALU clause starting at 4: +; CM-NEXT: AND_INT * T0.W, KC0[2].W, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, KC0[2].W, -PV.W, +; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x, +; CM-NEXT: RNDNE * T2.W, PV.Z, +; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; CM-NEXT: TRUNC T0.Y, PV.W, +; CM-NEXT: AND_INT T2.Z, KC0[3].X, literal.x, +; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.y, PV.Z, +; CM-NEXT: -4096(nan), 1079283712(3.321289e+00) +; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W, +; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y, +; CM-NEXT: FLT_TO_INT T1.Z, PV.Y, +; CM-NEXT: ADD * T0.W, KC0[3].X, -PV.Z, +; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00) +; CM-NEXT: ADD T1.X, T0.Z, -T2.W, +; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, +; CM-NEXT: MAX_INT T0.Z, PV.Z, literal.y, +; CM-NEXT: RNDNE * T1.W, PV.Y, +; CM-NEXT: 975668412(6.390323e-04), -330(nan) +; CM-NEXT: TRUNC T2.X, PV.W, +; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x, +; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.Y, +; CM-NEXT: ADD * T0.W, PV.X, T0.X, +; CM-NEXT: 204(2.858649e-43), 1079283712(3.321289e+00) +; CM-NEXT: EXP_IEEE T0.X, T0.W, +; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, +; CM-NEXT: ADD_INT T1.X, T1.Z, literal.x, +; CM-NEXT: MULADD_IEEE T0.Y, T2.Z, literal.y, T0.Z, BS:VEC_102/SCL_221 +; CM-NEXT: ADD T0.Z, T1.Y, -T1.W, +; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z, +; CM-NEXT: 102(1.429324e-43), 975668412(6.390323e-04) +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: SETGT_UINT T3.X, T1.Z, literal.x, +; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y, +; CM-NEXT: SETGT_UINT T2.Z, T1.Z, literal.z, +; CM-NEXT: ADD * T1.W, PV.Z, PV.Y, +; CM-NEXT: -229(nan), 2130706432(1.701412e+38) +; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00) +; CM-NEXT: EXP_IEEE T0.X (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T0.Y, T1.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: CNDE_INT T4.X, T2.Z, T0.W, T1.Y, +; CM-NEXT: CNDE_INT T1.Y, T3.X, T2.Y, T1.X, +; CM-NEXT: FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212 +; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.x, +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: SETGT_INT T1.X, T1.Z, literal.x, +; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y, +; CM-NEXT: MUL_IEEE T3.Z, PV.W, literal.z, +; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w, +; CM-NEXT: -127(nan), 209715200(1.972152e-31) +; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43) +; CM-NEXT: CNDE_INT T2.X, PV.W, T0.W, PV.Z, +; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x, +; CM-NEXT: CNDE_INT T3.Z, PV.X, T1.Y, T1.Z, +; CM-NEXT: MAX_INT * T0.W, T0.Z, literal.y, +; CM-NEXT: 209715200(1.972152e-31), -330(nan) +; CM-NEXT: ADD_INT T5.X, PV.W, literal.x, +; CM-NEXT: ADD_INT T1.Y, T0.Z, literal.y, +; CM-NEXT: SETGT_UINT T4.Z, T0.Z, literal.z, +; CM-NEXT: MUL_IEEE * T0.W, T0.Y, literal.w, +; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) +; CM-NEXT: -229(nan), 209715200(1.972152e-31) +; CM-NEXT: MUL_IEEE T6.X, PV.W, literal.x, +; CM-NEXT: MIN_INT T4.Y, T0.Z, literal.y, +; CM-NEXT: CNDE_INT T5.Z, PV.Z, PV.X, PV.Y, +; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.z, +; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z, +; CM-NEXT: MIN_INT T1.Y, T1.Z, literal.x, +; CM-NEXT: ADD_INT T5.Z, PV.Y, literal.y, +; CM-NEXT: ADD_INT * T3.W, T0.Z, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 381(5.338947e-43), -254(nan) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T7.X, T1.W, PV.W, PV.Z, +; CM-NEXT: SETGT_INT T4.Y, T0.Z, literal.x, +; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y, +; CM-NEXT: ADD_INT * T1.W, T1.Z, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 127(1.779649e-43), -254(nan) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T8.X, T2.Z, PV.W, PV.Z, +; CM-NEXT: SETGT_INT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: CNDE_INT T0.Z, PV.Y, T5.X, PV.X, +; CM-NEXT: CNDE_INT * T0.W, T4.Z, T6.X, T0.W, BS:VEC_201 +; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T5.X, T2.W, PV.W, T0.Y, +; CM-NEXT: LSHL T0.Y, PV.Z, literal.x, +; CM-NEXT: CNDE_INT T0.Z, PV.Y, T3.Z, PV.X, +; CM-NEXT: CNDE_INT * T0.W, T3.X, T3.Y, T2.Y, BS:VEC_201 +; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T0.X, T1.X, PV.W, T0.X, +; CM-NEXT: LSHL T2.Y, PV.Z, literal.x, +; CM-NEXT: ADD_INT * T0.Z, PV.Y, literal.y, +; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00) +; CM-NEXT: ALU clause starting at 105: +; CM-NEXT: CNDE_INT * T0.W, T4.Y, T5.X, T2.X, +; CM-NEXT: MUL_IEEE T1.X, PV.W, T0.Z, +; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].X, +; CM-NEXT: ADD_INT T0.Z, T2.Y, literal.y, +; CM-NEXT: CNDE_INT * T0.W, T1.Y, T0.X, T4.X, BS:VEC_120/SCL_212 +; CM-NEXT: -1036817932(-4.485347e+01), 1065353216(1.000000e+00) +; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z, +; CM-NEXT: SETGT T1.Y, literal.x, KC0[2].W, +; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0, +; CM-NEXT: SETGT * T0.W, KC0[3].X, literal.y, +; CM-NEXT: -1036817932(-4.485347e+01), 1109008539(3.853184e+01) +; CM-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.x, +; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0, +; CM-NEXT: SETGT * T0.W, KC0[2].W, literal.y, +; CM-NEXT: 2139095040(INF), 1109008539(3.853184e+01) +; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x, +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %result = call <2 x float> @llvm.exp10.v2f32(<2 x float> %in) + store <2 x float> %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in) { +; VI-SDAG-LABEL: s_exp10_v3f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549000 +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: s_and_b32 s2, s6, 0xfffff000 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, s6, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x3a2784bc +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_and_b32 s2, s5, 0xfffff000 +; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v7, s5, v7 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3a2784bc, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x40549000, v7 +; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7 +; VI-SDAG-NEXT: v_exp_f32_e32 v7, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x421a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5 +; VI-SDAG-NEXT: s_and_b32 s2, s4, 0xfffff000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v1, v7, v6 +; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v7, s4, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3a2784bc, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x40549000, v7 +; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v9 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v6 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0 +; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2] +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: s_exp10_v3f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3a2784bc +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_and_b32 s2, s4, 0xfffff000 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, s4, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, s2, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, s2, v2 +; VI-GISEL-NEXT: s_and_b32 s2, s5, 0xfffff000 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v5, s5, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3a2784bc, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x40549000, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, s2, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v7, v5 +; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v6, v5 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 +; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0 +; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 +; VI-GISEL-NEXT: s_and_b32 s2, s6, 0xfffff000 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_ldexp_f32 v5, v5, v6 +; VI-GISEL-NEXT: v_mov_b32_e32 v6, s2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, s6, v6 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3a2784bc, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x40549000, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v6 +; VI-GISEL-NEXT: v_rndne_f32_e32 v6, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v6 +; VI-GISEL-NEXT: v_exp_f32_e32 v6, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v4 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s0 +; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] +; VI-GISEL-NEXT: s_endpgm +; +; GFX900-SDAG-LABEL: s_exp10_v3f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x421a209b +; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7 +; GFX900-SDAG-NEXT: v_fma_f32 v6, s5, v0, -v6 +; GFX900-SDAG-NEXT: v_fma_f32 v6, s5, v1, v6 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s6, v0, -v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v6, v6 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s6, v1, v4 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v6, v6, v7 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s4, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v9, v7 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v7 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0xc23369f4 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v10, v0 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v7 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; GFX900-SDAG-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX900-SDAG-NEXT: s_endpgm +; +; GFX900-GISEL-LABEL: s_exp10_v3f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x33979a37 +; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s5, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v6, s5, v1, -v5 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v7, v5 +; GFX900-GISEL-NEXT: v_fma_f32 v6, s5, v2, v6 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v5, v5, v7 +; GFX900-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v5 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v3, s4, v1, -v0 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v3, s4, v2, v3 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v3 +; GFX900-GISEL-NEXT: v_ldexp_f32 v5, v5, v6 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s6, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, s6, v1, -v6 +; GFX900-GISEL-NEXT: v_fma_f32 v1, s6, v2, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v6 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v6, v6, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 +; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX900-GISEL-NEXT: s_endpgm +; +; SI-SDAG-LABEL: s_exp10_v3f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd +; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x33979a37 +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5 +; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6 +; SI-SDAG-NEXT: v_fma_f32 v5, s4, v0, -v5 +; SI-SDAG-NEXT: v_fma_f32 v5, s4, v2, v5 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5 +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; SI-SDAG-NEXT: v_fma_f32 v4, s5, v0, -v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v5, v5 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_fma_f32 v4, s5, v2, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, v5, v6 +; SI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v8, v6 +; SI-SDAG-NEXT: v_fma_f32 v0, s6, v0, -v6 +; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v8 +; SI-SDAG-NEXT: v_fma_f32 v0, s6, v2, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc23369f4 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v9, v0 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x421a209b +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v8 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v6 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v4 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 +; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: s_exp10_v3f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x33979a37 +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_mul_f32_e32 v5, s5, v1 +; SI-GISEL-NEXT: v_fma_f32 v6, s5, v1, -v5 +; SI-GISEL-NEXT: v_rndne_f32_e32 v7, v5 +; SI-GISEL-NEXT: v_fma_f32 v6, s5, v2, v6 +; SI-GISEL-NEXT: v_sub_f32_e32 v5, v5, v7 +; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 +; SI-GISEL-NEXT: v_exp_f32_e32 v5, v5 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v1 +; SI-GISEL-NEXT: v_fma_f32 v3, s4, v1, -v0 +; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v0 +; SI-GISEL-NEXT: v_fma_f32 v3, s4, v2, v3 +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v5, v5, v6 +; SI-GISEL-NEXT: v_mul_f32_e32 v6, s6, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, s6, v1, -v6 +; SI-GISEL-NEXT: v_fma_f32 v1, s6, v2, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v6 +; SI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v3 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_exp_f32_e32 v6, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v6, v2 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v3 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 +; SI-GISEL-NEXT: s_endpgm +; +; R600-LABEL: s_exp10_v3f32: +; R600: ; %bb.0: +; R600-NEXT: ALU 100, @6, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 69, @107, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 6: +; R600-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T1.W, KC0[3].Y, -PV.W, +; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x, +; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; R600-NEXT: RNDNE T3.W, PS, +; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x, +; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS, +; R600-NEXT: TRUNC * T4.W, PV.W, +; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; R600-NEXT: FLT_TO_INT T0.Z, PS, +; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W, +; R600-NEXT: ADD * T1.W, T2.W, -T3.W, +; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; R600-NEXT: ADD T0.W, PS, PV.W, +; R600-NEXT: MAX_INT * T1.W, PV.Z, literal.x, +; R600-NEXT: -330(nan), 0(0.000000e+00) +; R600-NEXT: ADD_INT T0.Y, PS, literal.x, +; R600-NEXT: ADD_INT T1.Z, T0.Z, literal.y, +; R600-NEXT: SETGT_UINT T1.W, T0.Z, literal.z, +; R600-NEXT: EXP_IEEE * T0.X, PV.W, +; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43) +; R600-NEXT: -229(nan), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, +; R600-NEXT: SETGT_INT T0.W, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE * T2.W, PS, literal.y, +; R600-NEXT: -127(nan), 209715200(1.972152e-31) +; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x, +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z, +; R600-NEXT: MIN_INT T3.W, T0.Z, literal.y, +; R600-NEXT: AND_INT * T4.W, KC0[3].W, literal.z, +; R600-NEXT: 209715200(1.972152e-31), 381(5.338947e-43) +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T1.X, T0.X, literal.x, +; R600-NEXT: ADD T1.Y, KC0[3].W, -PS, +; R600-NEXT: ADD_INT T2.Z, PV.W, literal.y, +; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z, +; R600-NEXT: SETGT_UINT * T5.W, T0.Z, literal.w, +; R600-NEXT: 2130706432(1.701412e+38), -254(nan) +; R600-NEXT: -127(nan), 254(3.559298e-43) +; R600-NEXT: CNDE_INT T2.X, PS, PV.W, PV.Z, +; R600-NEXT: SETGT_INT T2.Y, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE T0.Z, PV.Y, literal.y, +; R600-NEXT: MUL_IEEE T3.W, T4.W, literal.z, +; R600-NEXT: MUL_IEEE * T6.W, PV.X, literal.w, +; R600-NEXT: 127(1.779649e-43), 975668412(6.390323e-04) +; R600-NEXT: 1079283712(3.321289e+00), 2130706432(1.701412e+38) +; R600-NEXT: CNDE_INT T1.X, T5.W, T1.X, PS, BS:VEC_120/SCL_212 +; R600-NEXT: RNDNE T3.Y, PV.W, +; R600-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z, +; R600-NEXT: CNDE_INT T5.W, PV.Y, T1.Z, PV.X, +; R600-NEXT: CNDE_INT * T1.W, T1.W, T0.Y, T2.W, +; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T0.X, T0.W, PS, T0.X, +; R600-NEXT: LSHL T0.Y, PV.W, literal.x, +; R600-NEXT: AND_INT T1.Z, KC0[3].Z, literal.y, +; R600-NEXT: MULADD_IEEE T0.W, T4.W, literal.z, PV.Z, BS:VEC_120/SCL_212 +; R600-NEXT: ADD * T1.W, T3.W, -PV.Y, +; R600-NEXT: 23(3.222986e-44), -4096(nan) +; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; R600-NEXT: ADD T1.Y, PS, PV.W, +; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.x, +; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y, +; R600-NEXT: CNDE_INT * T1.W, T2.Y, PV.X, T1.X, +; R600-NEXT: 1079283712(3.321289e+00), 1065353216(1.000000e+00) +; R600-NEXT: MUL_IEEE T0.X, PS, PV.W, +; R600-NEXT: ADD T0.Y, KC0[3].Z, -T1.Z, +; R600-NEXT: RNDNE T2.Z, PV.Z, +; R600-NEXT: TRUNC T0.W, T3.Y, +; R600-NEXT: EXP_IEEE * T1.X, PV.Y, +; R600-NEXT: SETGT T2.X, literal.x, KC0[3].Y, +; R600-NEXT: FLT_TO_INT T1.Y, PV.W, +; R600-NEXT: TRUNC T3.Z, PV.Z, +; R600-NEXT: MUL_IEEE T0.W, PV.Y, literal.y, +; R600-NEXT: MUL_IEEE * T1.W, PS, literal.z, +; R600-NEXT: -1036817932(-4.485347e+01), 975668412(6.390323e-04) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x, +; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y, +; R600-NEXT: MULADD_IEEE T4.Z, T0.Y, literal.z, PV.W, +; R600-NEXT: FLT_TO_INT T0.W, PV.Z, +; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.w, +; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; R600-NEXT: 1079283712(3.321289e+00), 381(5.338947e-43) +; R600-NEXT: ADD_INT T4.X, PS, literal.x, +; R600-NEXT: MAX_INT T0.Y, PV.W, literal.y, +; R600-NEXT: MULADD_IEEE T1.Z, T1.Z, literal.z, PV.Z, +; R600-NEXT: ADD T2.W, T0.Z, -T2.Z, BS:VEC_120/SCL_212 +; R600-NEXT: MIN_INT * T3.W, PV.W, literal.w, +; R600-NEXT: -254(nan), -330(nan) +; R600-NEXT: 975668412(6.390323e-04), 381(5.338947e-43) +; R600-NEXT: ADD_INT T5.X, PS, literal.x, +; R600-NEXT: ADD T3.Y, PV.W, PV.Z, +; R600-NEXT: ADD_INT T0.Z, PV.Y, literal.y, +; R600-NEXT: ADD_INT T2.W, T0.W, literal.z, +; R600-NEXT: SETGT_UINT * T3.W, T0.W, literal.w, +; R600-NEXT: -254(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: ADD_INT * T6.X, T0.W, literal.x, +; R600-NEXT: -127(nan), 0(0.000000e+00) +; R600-NEXT: ALU clause starting at 107: +; R600-NEXT: SETGT_UINT T0.Y, T0.W, literal.x, +; R600-NEXT: CNDE_INT T0.Z, T3.W, T0.Z, T2.W, BS:VEC_102/SCL_221 +; R600-NEXT: SETGT_INT T2.W, T0.W, literal.y, +; R600-NEXT: EXP_IEEE * T1.Z, T3.Y, +; R600-NEXT: 254(3.559298e-43), -127(nan) +; R600-NEXT: ADD_INT T7.X, T1.Y, literal.x, +; R600-NEXT: MUL_IEEE T3.Y, PS, literal.y, +; R600-NEXT: CNDE_INT T0.Z, PV.W, PV.Z, T0.W, +; R600-NEXT: CNDE_INT T4.W, PV.Y, T6.X, T5.X, +; R600-NEXT: SETGT_INT * T0.W, T0.W, literal.z, +; R600-NEXT: -127(nan), 209715200(1.972152e-31) +; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; R600-NEXT: SETGT_UINT T5.X, T1.Y, literal.x, +; R600-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W, +; R600-NEXT: MAX_INT T0.Z, T1.Y, literal.y, +; R600-NEXT: MUL_IEEE T4.W, T1.Z, literal.z, +; R600-NEXT: MUL_IEEE * T5.W, PV.Y, literal.w, +; R600-NEXT: 254(3.559298e-43), -330(nan) +; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; R600-NEXT: CNDE_INT T6.X, T3.W, PS, T3.Y, BS:VEC_021/SCL_122 +; R600-NEXT: MUL_IEEE T3.Y, PV.W, literal.x, +; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y, +; R600-NEXT: ADD_INT T3.W, T1.Y, literal.z, +; R600-NEXT: SETGT_UINT * T5.W, T1.Y, literal.w, +; R600-NEXT: 2130706432(1.701412e+38), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: CNDE_INT T8.X, PS, PV.Z, PV.W, +; R600-NEXT: SETGT_INT T5.Y, T1.Y, literal.x, +; R600-NEXT: CNDE_INT T0.Z, T0.Y, T4.W, PV.Y, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE_INT T2.W, T2.W, PV.X, T1.Z, +; R600-NEXT: LSHL * T3.W, T4.Y, literal.y, +; R600-NEXT: -127(nan), 23(3.222986e-44) +; R600-NEXT: ADD_INT T6.X, PS, literal.x, +; R600-NEXT: CNDE_INT T0.Y, T0.W, PV.W, PV.Z, +; R600-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T1.Y, +; R600-NEXT: CNDE_INT T0.W, T5.X, T7.X, T4.X, +; R600-NEXT: SETGT_INT * T2.W, T1.Y, literal.y, +; R600-NEXT: 1065353216(1.000000e+00), 127(1.779649e-43) +; R600-NEXT: CNDE_INT T4.X, PS, PV.Z, PV.W, +; R600-NEXT: MUL_IEEE T0.Y, PV.Y, PV.X, +; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].Z, +; R600-NEXT: CNDE_INT T0.W, T5.W, T2.Y, T1.W, +; R600-NEXT: MUL_IEEE * T1.W, T3.X, literal.y, +; R600-NEXT: -1036817932(-4.485347e+01), 2130706432(1.701412e+38) +; R600-NEXT: CNDE_INT T3.X, T5.X, T3.X, PS, +; R600-NEXT: CNDE_INT T1.Y, T5.Y, PV.W, T1.X, +; R600-NEXT: CNDE T0.Z, PV.Z, PV.Y, 0.0, +; R600-NEXT: SETGT T0.W, KC0[3].Z, literal.x, +; R600-NEXT: LSHL * T1.W, PV.X, literal.y, +; R600-NEXT: 1109008539(3.853184e+01), 23(3.222986e-44) +; R600-NEXT: ADD_INT T1.X, PS, literal.x, +; R600-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.y, +; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.Y, PV.X, +; R600-NEXT: CNDE T0.W, T2.X, T0.X, 0.0, +; R600-NEXT: SETGT * T1.W, KC0[3].Y, literal.z, +; R600-NEXT: 1065353216(1.000000e+00), 2139095040(INF) +; R600-NEXT: 1109008539(3.853184e+01), 0(0.000000e+00) +; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x, +; R600-NEXT: MUL_IEEE T0.W, PV.Z, PV.X, +; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W, +; R600-NEXT: 2139095040(INF), -1036817932(-4.485347e+01) +; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x, +; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0, +; R600-NEXT: SETGT * T1.W, KC0[3].W, literal.y, +; R600-NEXT: 2(2.802597e-45), 1109008539(3.853184e+01) +; R600-NEXT: CNDE T2.X, PS, PV.W, literal.x, +; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; R600-NEXT: 2139095040(INF), 8(1.121039e-44) +; R600-NEXT: LSHR * T3.X, PV.W, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; CM-LABEL: s_exp10_v3f32: +; CM: ; %bb.0: +; CM-NEXT: ALU 102, @6, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 80, @109, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T3.X +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: ALU clause starting at 6: +; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W, +; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x, +; CM-NEXT: RNDNE * T2.W, PV.Z, +; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; CM-NEXT: TRUNC T2.Z, PV.W, +; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z, +; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W, +; CM-NEXT: ADD T0.Z, T0.Z, -T2.W, +; CM-NEXT: FLT_TO_INT * T0.W, PV.Z, +; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, PV.Z, PV.Y, +; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; CM-NEXT: EXP_IEEE T0.X, T1.W, +; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x, +; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y, +; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z, +; CM-NEXT: 2130706432(1.701412e+38), -254(nan) +; CM-NEXT: -330(nan), 0(0.000000e+00) +; CM-NEXT: ADD_INT T1.X, T0.W, literal.x, +; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y, +; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z, +; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w, +; CM-NEXT: -127(nan), 204(2.858649e-43) +; CM-NEXT: 102(1.429324e-43), -229(nan) +; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y, +; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z, +; CM-NEXT: 254(3.559298e-43), -127(nan) +; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W, +; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z, +; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y, +; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43) +; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W, +; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x, +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T1.X, T2.X, T0.Y, PV.W, +; CM-NEXT: CNDE_INT T0.Y, T1.Z, PV.Z, T0.X, +; CM-NEXT: LSHL T0.Z, PV.Y, literal.x, +; CM-NEXT: AND_INT * T1.W, KC0[3].Z, literal.y, +; CM-NEXT: 23(3.222986e-44), -4096(nan) +; CM-NEXT: MUL_IEEE T0.X, PV.W, literal.x, +; CM-NEXT: ADD T1.Y, KC0[3].Z, -PV.W, +; CM-NEXT: ADD_INT T0.Z, PV.Z, literal.y, +; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Y, PV.X, +; CM-NEXT: 1079283712(3.321289e+00), 1065353216(1.000000e+00) +; CM-NEXT: MUL_IEEE T0.Y, PV.W, PV.Z, +; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x, +; CM-NEXT: RNDNE * T0.W, PV.X, +; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; CM-NEXT: SETGT T1.X, literal.x, KC0[3].Y, +; CM-NEXT: TRUNC T2.Y, PV.W, +; CM-NEXT: AND_INT T1.Z, KC0[3].W, literal.y, +; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.z, PV.Z, +; CM-NEXT: -1036817932(-4.485347e+01), -4096(nan) +; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T2.X, T1.W, literal.x, PV.W, +; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y, +; CM-NEXT: FLT_TO_INT T0.Z, PV.Y, +; CM-NEXT: ADD * T1.W, KC0[3].W, -PV.Z, +; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00) +; CM-NEXT: ADD T0.X, T0.X, -T0.W, +; CM-NEXT: MUL_IEEE T2.Y, PV.W, literal.x, +; CM-NEXT: MAX_INT T2.Z, PV.Z, literal.y, +; CM-NEXT: RNDNE * T0.W, PV.Y, +; CM-NEXT: 975668412(6.390323e-04), -330(nan) +; CM-NEXT: TRUNC T3.X, PV.W, +; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.x, +; CM-NEXT: MULADD_IEEE T2.Z, T1.W, literal.y, PV.Y, +; CM-NEXT: ADD * T1.W, PV.X, T2.X, +; CM-NEXT: 204(2.858649e-43), 1079283712(3.321289e+00) +; CM-NEXT: EXP_IEEE T0.X, T1.W, +; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x, +; CM-NEXT: MULADD_IEEE T2.Y, T1.Z, literal.y, T2.Z, BS:VEC_102/SCL_221 +; CM-NEXT: ADD T1.Z, T1.Y, -T0.W, +; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z, +; CM-NEXT: 102(1.429324e-43), 975668412(6.390323e-04) +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: SETGT_UINT T4.X, T0.Z, literal.x, +; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y, +; CM-NEXT: SETGT_UINT T2.Z, T0.Z, literal.z, +; CM-NEXT: ADD * T1.W, PV.Z, PV.Y, +; CM-NEXT: -229(nan), 2130706432(1.701412e+38) +; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00) +; CM-NEXT: EXP_IEEE T1.X (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T1.Y (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T1.Z, T1.W, +; CM-NEXT: EXP_IEEE * T1.W (MASKED), T1.W, +; CM-NEXT: ALU clause starting at 109: +; CM-NEXT: CNDE_INT T5.X, T2.Z, T0.W, T1.Y, +; CM-NEXT: CNDE_INT T1.Y, T4.X, T3.Y, T2.X, +; CM-NEXT: FLT_TO_INT T3.Z, T3.X, BS:VEC_120/SCL_212 +; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: SETGT_INT T2.X, T0.Z, literal.x, +; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y, +; CM-NEXT: MUL_IEEE T4.Z, PV.W, literal.z, +; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w, +; CM-NEXT: -127(nan), 209715200(1.972152e-31) +; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43) +; CM-NEXT: CNDE_INT T3.X, PV.W, T0.W, PV.Z, +; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x, +; CM-NEXT: CNDE_INT T4.Z, PV.X, T1.Y, T0.Z, +; CM-NEXT: MAX_INT * T0.W, T3.Z, literal.y, +; CM-NEXT: 209715200(1.972152e-31), -330(nan) +; CM-NEXT: ADD_INT T6.X, PV.W, literal.x, +; CM-NEXT: ADD_INT T1.Y, T3.Z, literal.y, +; CM-NEXT: SETGT_UINT T5.Z, T3.Z, literal.z, +; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.w, BS:VEC_120/SCL_212 +; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) +; CM-NEXT: -229(nan), 209715200(1.972152e-31) +; CM-NEXT: MUL_IEEE T7.X, PV.W, literal.x, +; CM-NEXT: MIN_INT T4.Y, T3.Z, literal.y, +; CM-NEXT: CNDE_INT T6.Z, PV.Z, PV.X, PV.Y, +; CM-NEXT: SETGT_INT * T2.W, T3.Z, literal.z, +; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, T3.Z, +; CM-NEXT: MIN_INT T1.Y, T0.Z, literal.x, +; CM-NEXT: ADD_INT T6.Z, PV.Y, literal.y, +; CM-NEXT: ADD_INT * T3.W, T3.Z, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 381(5.338947e-43), -254(nan) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T8.X, T1.W, PV.W, PV.Z, +; CM-NEXT: SETGT_INT T4.Y, T3.Z, literal.x, +; CM-NEXT: ADD_INT T3.Z, PV.Y, literal.y, +; CM-NEXT: ADD_INT * T1.W, T0.Z, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 127(1.779649e-43), -254(nan) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T9.X, T2.Z, PV.W, PV.Z, +; CM-NEXT: SETGT_INT T1.Y, T0.Z, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: CNDE_INT T0.Z, PV.Y, T6.X, PV.X, +; CM-NEXT: CNDE_INT * T0.W, T5.Z, T7.X, T0.W, BS:VEC_201 +; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T6.X, T2.W, PV.W, T1.Z, +; CM-NEXT: LSHL T5.Y, PV.Z, literal.x, +; CM-NEXT: CNDE_INT T0.Z, PV.Y, T4.Z, PV.X, +; CM-NEXT: CNDE_INT * T0.W, T4.X, T3.Y, T2.Y, +; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T0.X, T2.X, PV.W, T0.X, +; CM-NEXT: LSHL T2.Y, PV.Z, literal.x, +; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y, +; CM-NEXT: CNDE_INT * T0.W, T4.Y, PV.X, T3.X, BS:VEC_021/SCL_122 +; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00) +; CM-NEXT: MUL_IEEE T2.X, PV.W, PV.Z, +; CM-NEXT: SETGT T3.Y, literal.x, KC0[3].W, +; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y, +; CM-NEXT: CNDE_INT * T0.W, T1.Y, PV.X, T5.X, +; CM-NEXT: -1036817932(-4.485347e+01), 1065353216(1.000000e+00) +; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z, +; CM-NEXT: SETGT T1.Y, literal.x, KC0[3].Z, +; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0, +; CM-NEXT: SETGT * T0.W, KC0[3].W, literal.y, +; CM-NEXT: -1036817932(-4.485347e+01), 1109008539(3.853184e+01) +; CM-NEXT: CNDE T2.X, PV.W, PV.Z, literal.x, +; CM-NEXT: CNDE T1.Y, PV.Y, PV.X, 0.0, +; CM-NEXT: SETGT T0.Z, KC0[3].Z, literal.y, +; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, +; CM-NEXT: 2139095040(INF), 1109008539(3.853184e+01) +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: LSHR T0.X, PV.W, literal.x, +; CM-NEXT: CNDE T1.Y, PV.Z, PV.Y, literal.y, +; CM-NEXT: CNDE T0.Z, T1.X, T0.Y, 0.0, +; CM-NEXT: SETGT * T0.W, KC0[3].Y, literal.z, +; CM-NEXT: 2(2.802597e-45), 2139095040(INF) +; CM-NEXT: 1109008539(3.853184e+01), 0(0.000000e+00) +; CM-NEXT: CNDE * T1.X, PV.W, PV.Z, literal.x, +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) +; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %result = call <3 x float> @llvm.exp10.v3f32(<3 x float> %in) + store <3 x float> %result, ptr addrspace(1) %out + ret void +} + +; FIXME: We should be able to merge these packets together on Cayman so we +; have a maximum of 4 instructions. +define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) { +; VI-SDAG-LABEL: s_exp10_v4f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549000 +; VI-SDAG-NEXT: v_mov_b32_e32 v6, 0x421a209b +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: s_and_b32 s2, s7, 0xfffff000 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, s7, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x3a2784bc +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_and_b32 s2, s6, 0xfffff000 +; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v7, s6, v7 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3a2784bc, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x40549000, v7 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7 +; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v3 +; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0xc23369f4 +; VI-SDAG-NEXT: s_and_b32 s2, s5, 0xfffff000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v5 +; VI-SDAG-NEXT: v_mov_b32_e32 v9, s2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v6 +; VI-SDAG-NEXT: v_sub_f32_e32 v9, s5, v9 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v1, v2, v7 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x3a2784bc, v9 +; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x40549000, v9 +; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10 +; VI-SDAG-NEXT: v_mul_f32_e32 v10, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v7 +; VI-SDAG-NEXT: v_add_f32_e32 v9, v10, v9 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v9 +; VI-SDAG-NEXT: v_exp_f32_e32 v9, v2 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v6 +; VI-SDAG-NEXT: s_and_b32 s2, s4, 0xfffff000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v1, v9, v7 +; VI-SDAG-NEXT: v_mov_b32_e32 v9, s2 +; VI-SDAG-NEXT: v_sub_f32_e32 v9, s4, v9 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x3a2784bc, v9 +; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x40549000, v9 +; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v7 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v9 +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v7 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v6 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v6 +; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0 +; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: s_exp10_v4f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x40549000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3a2784bc +; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x421a209b +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_and_b32 s2, s4, 0xfffff000 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, s4, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, s2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0 +; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: s_and_b32 s2, s5, 0xfffff000 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v1, s5, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, s2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v7, v1 +; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: s_and_b32 s2, s6, 0xfffff000 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, s2, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 +; VI-GISEL-NEXT: v_mov_b32_e32 v6, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, s6, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3a2784bc, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x40549000, v6 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v9 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, s2, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v9, v6 +; VI-GISEL-NEXT: v_rndne_f32_e32 v9, v8 +; VI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v9 +; VI-GISEL-NEXT: v_add_f32_e32 v6, v8, v6 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 +; VI-GISEL-NEXT: v_exp_f32_e32 v6, v6 +; VI-GISEL-NEXT: s_and_b32 s2, s7, 0xfffff000 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, s2, v3 +; VI-GISEL-NEXT: v_ldexp_f32 v6, v6, v8 +; VI-GISEL-NEXT: v_mov_b32_e32 v8, s2 +; VI-GISEL-NEXT: v_sub_f32_e32 v8, s7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3a2784bc, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x40549000, v8 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v8 +; VI-GISEL-NEXT: v_rndne_f32_e32 v8, v2 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v4 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v8 +; VI-GISEL-NEXT: v_exp_f32_e32 v8, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s0 +; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; VI-GISEL-NEXT: s_endpgm +; +; GFX900-SDAG-LABEL: s_exp10_v4f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc23369f4 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x421a209b +; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s7, v0, -v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_fma_f32 v4, s7, v1, v4 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v5 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v9, 0x7f800000 +; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s6, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v3 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v3, v7 +; GFX900-SDAG-NEXT: v_fma_f32 v3, s6, v0, -v3 +; GFX900-SDAG-NEXT: v_fma_f32 v3, s6, v1, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v8, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v8, v3 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, v9, v2, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v8, v7 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s5, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v8, v7 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v8 +; GFX900-SDAG-NEXT: v_fma_f32 v7, s5, v0, -v7 +; GFX900-SDAG-NEXT: v_fma_f32 v7, s5, v1, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v7, v10, v7 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v7, v7 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v8 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v6 +; GFX900-SDAG-NEXT: v_ldexp_f32 v7, v7, v8 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v8, s4, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v10, v8 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v8 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v11, v8, v10 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v11, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v10 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v8 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v6 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc +; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX900-SDAG-NEXT: s_endpgm +; +; GFX900-GISEL-LABEL: s_exp10_v4f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x40549a78 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x421a209b +; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v1, s4, v2, -v0 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, s4, v3, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s5, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v6, s5, v2, -v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v7, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v6, s5, v3, v6 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v7 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v8, s6, v2, -v6 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v9, v6 +; GFX900-GISEL-NEXT: v_fma_f32 v8, s6, v3, v8 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v6, v6, v9 +; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v6, v6, v8 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, s7, v2 +; GFX900-GISEL-NEXT: v_fma_f32 v2, s7, v2, -v8 +; GFX900-GISEL-NEXT: v_fma_f32 v2, s7, v3, v2 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v8 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v8, v2 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v8, v2 +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX900-GISEL-NEXT: s_endpgm +; +; SI-SDAG-LABEL: s_exp10_v4f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd +; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x421a209b +; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; SI-SDAG-NEXT: v_fma_f32 v4, s7, v0, -v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_fma_f32 v4, s7, v1, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v4 +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v3 +; SI-SDAG-NEXT: v_sub_f32_e32 v7, v3, v6 +; SI-SDAG-NEXT: v_fma_f32 v3, s6, v0, -v3 +; SI-SDAG-NEXT: v_fma_f32 v3, s6, v1, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v7, v3 +; SI-SDAG-NEXT: v_exp_f32_e32 v7, v3 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v5 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v2, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v7, v6 +; SI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v7, v6 +; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v7 +; SI-SDAG-NEXT: v_fma_f32 v6, s5, v0, -v6 +; SI-SDAG-NEXT: v_fma_f32 v6, s5, v1, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v6, v9, v6 +; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v6, v6, v7 +; SI-SDAG-NEXT: v_mul_f32_e32 v7, s4, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v9, v7 +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v7 +; SI-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9 +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v10, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v7 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: s_exp10_v4f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x40549a78 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x421a209b +; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v2 +; SI-GISEL-NEXT: v_fma_f32 v1, s4, v2, -v0 +; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, s4, v3, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, s5, v2 +; SI-GISEL-NEXT: v_fma_f32 v6, s5, v2, -v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v7, v1 +; SI-GISEL-NEXT: v_fma_f32 v6, s5, v3, v6 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v7 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v6 +; SI-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2 +; SI-GISEL-NEXT: v_fma_f32 v8, s6, v2, -v6 +; SI-GISEL-NEXT: v_rndne_f32_e32 v9, v6 +; SI-GISEL-NEXT: v_fma_f32 v8, s6, v3, v8 +; SI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v9 +; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 +; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v6, v6, v8 +; SI-GISEL-NEXT: v_mul_f32_e32 v8, s7, v2 +; SI-GISEL-NEXT: v_fma_f32 v2, s7, v2, -v8 +; SI-GISEL-NEXT: v_fma_f32 v2, s7, v3, v2 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v8 +; SI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v8, v2 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v8, v2 +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v8, v3 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; R600-LABEL: s_exp10_v4f32: +; R600: ; %bb.0: +; R600-NEXT: ALU 98, @6, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 98, @105, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 24, @204, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 6: +; R600-NEXT: AND_INT * T0.W, KC0[3].Z, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD T1.W, KC0[3].Z, -PV.W, +; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x, +; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; R600-NEXT: RNDNE T3.W, PS, +; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x, +; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS, +; R600-NEXT: TRUNC * T4.W, PV.W, +; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; R600-NEXT: FLT_TO_INT T0.Z, PS, +; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W, +; R600-NEXT: ADD * T1.W, T2.W, -T3.W, +; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; R600-NEXT: ADD T1.Z, PS, PV.W, +; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x, +; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y, +; R600-NEXT: -330(nan), 381(5.338947e-43) +; R600-NEXT: ADD_INT T0.X, PS, literal.x, +; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y, +; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z, +; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w, +; R600-NEXT: EXP_IEEE * T1.X, PV.Z, +; R600-NEXT: -254(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x, +; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y, +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, +; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z, +; R600-NEXT: -127(nan), 254(3.559298e-43) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x, +; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y, +; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z, +; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X, +; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z, +; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; R600-NEXT: AND_INT T2.Y, KC0[4].X, literal.x, +; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W, +; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W, +; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.y, +; R600-NEXT: -4096(nan), 2130706432(1.701412e+38) +; R600-NEXT: CNDE_INT T0.X, T1.Y, T3.X, PS, +; R600-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T1.X, +; R600-NEXT: LSHL T0.Z, PV.Z, literal.x, +; R600-NEXT: ADD T0.W, KC0[4].X, -PV.Y, +; R600-NEXT: MUL_IEEE * T1.W, PV.Y, literal.y, +; R600-NEXT: 23(3.222986e-44), 1079283712(3.321289e+00) +; R600-NEXT: RNDNE T1.Y, PS, +; R600-NEXT: MUL_IEEE T1.Z, PV.W, literal.x, +; R600-NEXT: ADD_INT T2.W, PV.Z, literal.y, +; R600-NEXT: CNDE_INT * T3.W, T4.W, PV.Y, PV.X, +; R600-NEXT: 975668412(6.390323e-04), 1065353216(1.000000e+00) +; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W, +; R600-NEXT: AND_INT T0.Z, KC0[3].W, literal.x, +; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.y, PV.Z, +; R600-NEXT: TRUNC * T2.W, PV.Y, +; R600-NEXT: -4096(nan), 1079283712(3.321289e+00) +; R600-NEXT: SETGT T0.X, literal.x, KC0[3].Z, +; R600-NEXT: FLT_TO_INT T3.Y, PS, +; R600-NEXT: MULADD_IEEE T1.Z, T2.Y, literal.y, PV.W, +; R600-NEXT: ADD T0.W, T1.W, -T1.Y, +; R600-NEXT: MUL_IEEE * T1.W, PV.Z, literal.z, +; R600-NEXT: -1036817932(-4.485347e+01), 975668412(6.390323e-04) +; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; R600-NEXT: RNDNE T1.X, PS, +; R600-NEXT: AND_INT T1.Y, KC0[3].Y, literal.x, +; R600-NEXT: ADD T1.Z, PV.W, PV.Z, +; R600-NEXT: MAX_INT T0.W, PV.Y, literal.y, +; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.z, +; R600-NEXT: -4096(nan), -330(nan) +; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; R600-NEXT: ADD_INT T2.X, PS, literal.x, +; R600-NEXT: ADD_INT T2.Y, PV.W, literal.y, +; R600-NEXT: ADD_INT T2.Z, T3.Y, literal.z, +; R600-NEXT: SETGT_UINT T0.W, T3.Y, literal.w, +; R600-NEXT: EXP_IEEE * T1.Z, PV.Z, +; R600-NEXT: -254(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -229(nan) +; R600-NEXT: ADD_INT T3.X, T3.Y, literal.x, +; R600-NEXT: SETGT_UINT T4.Y, T3.Y, literal.y, +; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Y, PV.Z, +; R600-NEXT: SETGT_INT T2.W, T3.Y, literal.x, +; R600-NEXT: MUL_IEEE * T3.W, PS, literal.z, +; R600-NEXT: -127(nan), 254(3.559298e-43) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T4.X, T1.Z, literal.x, +; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y, +; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Z, T3.Y, +; R600-NEXT: CNDE_INT T4.W, PV.Y, PV.X, T2.X, +; R600-NEXT: SETGT_INT * T5.W, T3.Y, literal.z, +; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; R600-NEXT: ADD T2.X, KC0[3].W, -T0.Z, +; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W, +; R600-NEXT: CNDE_INT * T2.Z, T0.W, PV.Y, T3.W, +; R600-NEXT: ALU clause starting at 105: +; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.x, +; R600-NEXT: ADD * T3.W, KC0[3].Y, -T1.Y, +; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T3.X, PS, literal.x, +; R600-NEXT: MUL_IEEE T2.Y, T1.Y, literal.y, +; R600-NEXT: CNDE_INT T3.Z, T4.Y, T4.X, PV.W, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE_INT T0.W, T2.W, T2.Z, T1.Z, +; R600-NEXT: LSHL * T2.W, T3.Y, literal.z, +; R600-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00) +; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; R600-NEXT: ADD_INT T4.X, PS, literal.x, +; R600-NEXT: CNDE_INT T3.Y, T5.W, PV.W, PV.Z, +; R600-NEXT: RNDNE T1.Z, PV.Y, +; R600-NEXT: MULADD_IEEE T0.W, T3.W, literal.y, PV.X, BS:VEC_120/SCL_212 +; R600-NEXT: MUL_IEEE * T2.W, T2.X, literal.z, +; R600-NEXT: 1065353216(1.000000e+00), 1079283712(3.321289e+00) +; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T2.X, T2.X, literal.x, PS, +; R600-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.y, PV.W, +; R600-NEXT: ADD T2.Z, T2.Y, -PV.Z, BS:VEC_120/SCL_212 +; R600-NEXT: MUL_IEEE T0.W, PV.Y, PV.X, +; R600-NEXT: SETGT * T2.W, literal.z, KC0[4].X, +; R600-NEXT: 1079283712(3.321289e+00), 975668412(6.390323e-04) +; R600-NEXT: -1036817932(-4.485347e+01), 0(0.000000e+00) +; R600-NEXT: CNDE T3.X, PS, PV.W, 0.0, +; R600-NEXT: ADD T1.Y, PV.Z, PV.Y, +; R600-NEXT: TRUNC T1.Z, T1.Z, +; R600-NEXT: MULADD_IEEE T0.W, T0.Z, literal.x, PV.X, BS:VEC_120/SCL_212 +; R600-NEXT: ADD * T1.W, T1.W, -T1.X, +; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00) +; R600-NEXT: SETGT T2.X, KC0[4].X, literal.x, +; R600-NEXT: ADD T2.Y, PS, PV.W, +; R600-NEXT: FLT_TO_INT T0.Z, PV.Z, +; R600-NEXT: TRUNC T0.W, T1.X, +; R600-NEXT: EXP_IEEE * T1.X, PV.Y, +; R600-NEXT: 1109008539(3.853184e+01), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T4.X, PS, literal.x, +; R600-NEXT: FLT_TO_INT T1.Y, PV.W, +; R600-NEXT: MAX_INT T1.Z, PV.Z, literal.y, +; R600-NEXT: MUL_IEEE T0.W, PS, literal.z, +; R600-NEXT: EXP_IEEE * T1.W, PV.Y, +; R600-NEXT: 2130706432(1.701412e+38), -330(nan) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T5.X, PV.W, literal.x, +; R600-NEXT: MUL_IEEE T2.Y, PS, literal.x, +; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y, +; R600-NEXT: ADD_INT T2.W, T0.Z, literal.z, +; R600-NEXT: MAX_INT * T3.W, PV.Y, literal.w, +; R600-NEXT: 209715200(1.972152e-31), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), -330(nan) +; R600-NEXT: SETGT_UINT T6.X, T0.Z, literal.x, +; R600-NEXT: ADD_INT T3.Y, PS, literal.y, +; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.z, +; R600-NEXT: SETGT_UINT T3.W, T1.Y, literal.x, +; R600-NEXT: MIN_INT * T4.W, T1.Y, literal.w, +; R600-NEXT: -229(nan), 204(2.858649e-43) +; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43) +; R600-NEXT: ADD_INT T7.X, PS, literal.x, +; R600-NEXT: ADD_INT T4.Y, T1.Y, literal.y, +; R600-NEXT: SETGT_UINT T3.Z, T1.Y, literal.z, +; R600-NEXT: CNDE_INT T4.W, PV.W, PV.Y, PV.Z, +; R600-NEXT: SETGT_INT * T5.W, T1.Y, literal.y, +; R600-NEXT: -254(nan), -127(nan) +; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T8.X, PS, PV.W, T1.Y, +; R600-NEXT: CNDE_INT T3.Y, PV.Z, PV.Y, PV.X, +; R600-NEXT: SETGT_INT T2.Z, T1.Y, literal.x, +; R600-NEXT: CNDE_INT T2.W, T6.X, T1.Z, T2.W, +; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.y, +; R600-NEXT: 127(1.779649e-43), -127(nan) +; R600-NEXT: CNDE_INT T7.X, PS, PV.W, T0.Z, +; R600-NEXT: CNDE_INT T1.Y, PV.Z, PV.X, PV.Y, +; R600-NEXT: MIN_INT T1.Z, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE T2.W, T1.W, literal.y, +; R600-NEXT: MUL_IEEE * T6.W, T2.Y, literal.z, +; R600-NEXT: 381(5.338947e-43), 2130706432(1.701412e+38) +; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T8.X, T3.W, PS, T2.Y, +; R600-NEXT: MUL_IEEE T2.Y, PV.W, literal.x, +; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y, +; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z, +; R600-NEXT: SETGT_UINT * T6.W, T0.Z, literal.w, +; R600-NEXT: 2130706432(1.701412e+38), -254(nan) +; R600-NEXT: -127(nan), 254(3.559298e-43) +; R600-NEXT: CNDE_INT T9.X, PS, PV.W, PV.Z, +; R600-NEXT: SETGT_INT T3.Y, T0.Z, literal.x, +; R600-NEXT: CNDE_INT T0.Z, T3.Z, T2.W, PV.Y, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE_INT T1.W, T5.W, PV.X, T1.W, BS:VEC_021/SCL_122 +; R600-NEXT: LSHL * T2.W, T1.Y, literal.y, +; R600-NEXT: 127(1.779649e-43), 23(3.222986e-44) +; R600-NEXT: ADD_INT T8.X, PS, literal.x, +; R600-NEXT: CNDE_INT T1.Y, T2.Z, PV.W, PV.Z, +; R600-NEXT: CNDE_INT T0.Z, PV.Y, T7.X, PV.X, +; R600-NEXT: CNDE_INT * T0.W, T6.X, T5.X, T0.W, BS:VEC_021/SCL_122 +; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE * T1.W, T4.X, literal.x, +; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; R600-NEXT: CNDE_INT T4.X, T6.W, T4.X, PV.W, +; R600-NEXT: CNDE_INT * T2.Y, T4.W, T0.W, T1.X, BS:VEC_120/SCL_212 +; R600-NEXT: ALU clause starting at 204: +; R600-NEXT: LSHL T0.Z, T0.Z, literal.x, +; R600-NEXT: MUL_IEEE T0.W, T1.Y, T8.X, +; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W, +; R600-NEXT: 23(3.222986e-44), -1036817932(-4.485347e+01) +; R600-NEXT: CNDE T1.X, PS, PV.W, 0.0, +; R600-NEXT: SETGT T1.Y, KC0[3].W, literal.x, +; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y, +; R600-NEXT: CNDE_INT T0.W, T3.Y, T2.Y, T4.X, BS:VEC_120/SCL_212 +; R600-NEXT: CNDE * T1.W, T2.X, T3.X, literal.z, +; R600-NEXT: 1109008539(3.853184e+01), 1065353216(1.000000e+00) +; R600-NEXT: 2139095040(INF), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE T2.X, PV.W, PV.Z, +; R600-NEXT: SETGT T2.Y, literal.x, KC0[3].Y, +; R600-NEXT: CNDE T1.Z, PV.Y, PV.X, literal.y, +; R600-NEXT: CNDE T0.W, T0.X, T0.Y, 0.0, +; R600-NEXT: SETGT * T2.W, KC0[3].Z, literal.z, +; R600-NEXT: -1036817932(-4.485347e+01), 2139095040(INF) +; R600-NEXT: 1109008539(3.853184e+01), 0(0.000000e+00) +; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x, +; R600-NEXT: CNDE T0.W, PV.Y, PV.X, 0.0, +; R600-NEXT: SETGT * T2.W, KC0[3].Y, literal.y, +; R600-NEXT: 2139095040(INF), 1109008539(3.853184e+01) +; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x, +; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y, +; R600-NEXT: 2139095040(INF), 2(2.802597e-45) +; +; CM-LABEL: s_exp10_v4f32: +; CM: ; %bb.0: +; CM-NEXT: ALU 97, @6, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 100, @104, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 36, @205, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: ALU clause starting at 6: +; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W, +; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, +; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y, +; CM-NEXT: AND_INT * T2.W, KC0[3].W, literal.z, +; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00) +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: ADD T1.Y, KC0[3].W, -PV.W, +; CM-NEXT: RNDNE T1.Z, PV.Z, +; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y, +; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W, +; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z, +; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x, +; CM-NEXT: MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212 +; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00) +; CM-NEXT: TRUNC T1.X, T1.Z, +; CM-NEXT: RNDNE T2.Y, PV.W, +; CM-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z, +; CM-NEXT: ADD * T1.W, PV.Y, PV.X, +; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00) +; CM-NEXT: EXP_IEEE T0.X, T1.W, +; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: MULADD_IEEE T2.X, T2.W, literal.x, T0.Z, +; CM-NEXT: ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212 +; CM-NEXT: FLT_TO_INT T0.Z, T1.X, +; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y, +; CM-NEXT: 975668412(6.390323e-04), 209715200(1.972152e-31) +; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x, +; CM-NEXT: MUL_IEEE T1.Y, T0.X, literal.y, +; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.z, +; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.w, +; CM-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38) +; CM-NEXT: -330(nan), 381(5.338947e-43) +; CM-NEXT: ADD_INT T3.X, PV.W, literal.x, +; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.y, +; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z, +; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w, +; CM-NEXT: -254(nan), 204(2.858649e-43) +; CM-NEXT: 102(1.429324e-43), -229(nan) +; CM-NEXT: ADD_INT T4.X, T0.Z, literal.x, +; CM-NEXT: SETGT_UINT T4.Y, T0.Z, literal.y, +; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, +; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x, +; CM-NEXT: -127(nan), 254(3.559298e-43) +; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z, +; CM-NEXT: CNDE_INT T3.Y, PV.Y, PV.X, T3.X, +; CM-NEXT: SETGT_INT T0.Z, T0.Z, literal.x, +; CM-NEXT: MUL_IEEE * T3.W, T1.Y, literal.y, +; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38) +; CM-NEXT: CNDE_INT T3.X, T4.Y, T1.Y, PV.W, +; CM-NEXT: AND_INT T1.Y, KC0[3].Z, literal.x, +; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.X, PV.Y, +; CM-NEXT: CNDE_INT * T0.W, T1.W, T1.X, T0.W, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X, +; CM-NEXT: LSHL T3.Y, PV.Z, literal.x, +; CM-NEXT: TRUNC T1.Z, T2.Y, +; CM-NEXT: ADD * T0.W, KC0[3].Z, -PV.Y, +; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x, +; CM-NEXT: FLT_TO_INT T2.Y, PV.Z, +; CM-NEXT: ADD_INT T1.Z, PV.Y, literal.y, +; CM-NEXT: CNDE_INT * T1.W, T0.Z, PV.X, T3.X, +; CM-NEXT: 975668412(6.390323e-04), 1065353216(1.000000e+00) +; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z, +; CM-NEXT: MIN_INT T3.Y, PV.Y, literal.x, +; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.X, +; CM-NEXT: ADD * T0.W, T0.Y, T2.X, +; CM-NEXT: 381(5.338947e-43), 1079283712(3.321289e+00) +; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W, +; CM-NEXT: EXP_IEEE T0.Y, T0.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, +; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, +; CM-NEXT: MULADD_IEEE T1.X, T1.Y, literal.x, T0.Z, +; CM-NEXT: MUL_IEEE T4.Y, PV.Y, literal.y, +; CM-NEXT: ADD_INT T0.Z, T3.Y, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: MAX_INT * T0.W, T2.Y, literal.w, BS:VEC_201 +; CM-NEXT: 975668412(6.390323e-04), 2130706432(1.701412e+38) +; CM-NEXT: -254(nan), -330(nan) +; CM-NEXT: ADD_INT T2.X, T2.Y, literal.x, +; CM-NEXT: ADD_INT T3.Y, PV.W, literal.y, +; CM-NEXT: ADD_INT T1.Z, T2.Y, literal.z, +; CM-NEXT: SETGT_UINT * T0.W, T2.Y, literal.w, +; CM-NEXT: -127(nan), 204(2.858649e-43) +; CM-NEXT: 102(1.429324e-43), -229(nan) +; CM-NEXT: SETGT_UINT T3.X, T2.Y, literal.x, +; CM-NEXT: CNDE_INT T3.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: SETGT_INT T1.Z, T2.Y, literal.y, +; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.z, BS:VEC_120/SCL_212 +; CM-NEXT: 254(3.559298e-43), -127(nan) +; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T4.X, PV.W, literal.x, +; CM-NEXT: CNDE_INT * T3.Y, PV.Z, PV.Y, T2.Y, +; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) +; CM-NEXT: ALU clause starting at 104: +; CM-NEXT: CNDE_INT T0.Z, T3.X, T2.X, T0.Z, +; CM-NEXT: SETGT_INT * T2.W, T2.Y, literal.x, +; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T2.X, T1.Y, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.W, T3.Y, PV.Z, +; CM-NEXT: CNDE_INT T0.Z, T0.W, T4.X, T1.W, +; CM-NEXT: MUL_IEEE * T0.W, T4.Y, literal.y, BS:VEC_201 +; CM-NEXT: 1079283712(3.321289e+00), 2130706432(1.701412e+38) +; CM-NEXT: AND_INT T4.X, KC0[4].X, literal.x, +; CM-NEXT: CNDE_INT T2.Y, T3.X, T4.Y, PV.W, +; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.Y, +; CM-NEXT: LSHL * T0.W, PV.Y, literal.y, +; CM-NEXT: -4096(nan), 23(3.222986e-44) +; CM-NEXT: ADD_INT T3.X, PV.W, literal.x, +; CM-NEXT: CNDE_INT T0.Y, T2.W, PV.Z, PV.Y, +; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.y, +; CM-NEXT: RNDNE * T0.W, T2.X, +; CM-NEXT: 1065353216(1.000000e+00), 1079283712(3.321289e+00) +; CM-NEXT: ADD T2.X, T2.X, -PV.W, +; CM-NEXT: RNDNE T1.Y, PV.Z, +; CM-NEXT: MUL_IEEE T1.Z, PV.Y, PV.X, +; CM-NEXT: SETGT * T1.W, literal.x, KC0[3].W, +; CM-NEXT: -1036817932(-4.485347e+01), 0(0.000000e+00) +; CM-NEXT: CNDE T3.X, PV.W, PV.Z, 0.0, +; CM-NEXT: TRUNC T0.Y, T0.W, +; CM-NEXT: TRUNC T1.Z, PV.Y, +; CM-NEXT: ADD * T0.W, PV.X, T1.X, +; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W, +; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, +; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, +; CM-NEXT: EXP_IEEE * T0.W, T0.W, +; CM-NEXT: FLT_TO_INT T1.X, T1.Z, +; CM-NEXT: FLT_TO_INT T0.Y, T0.Y, +; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x, +; CM-NEXT: ADD * T1.W, KC0[4].X, -T4.X, +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE T2.X, PV.W, literal.x, +; CM-NEXT: MUL_IEEE T2.Y, T0.W, literal.y, +; CM-NEXT: MUL_IEEE T2.Z, PV.Z, literal.z, +; CM-NEXT: SETGT_UINT * T2.W, PV.Y, literal.w, +; CM-NEXT: 975668412(6.390323e-04), 209715200(1.972152e-31) +; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43) +; CM-NEXT: CNDE_INT T5.X, PV.W, T1.Z, PV.Z, +; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x, +; CM-NEXT: MULADD_IEEE T1.Z, T1.W, literal.y, PV.X, +; CM-NEXT: MAX_INT * T1.W, T1.X, literal.z, +; CM-NEXT: 209715200(1.972152e-31), 1079283712(3.321289e+00) +; CM-NEXT: -330(nan), 0(0.000000e+00) +; CM-NEXT: ADD_INT T2.X, PV.W, literal.x, +; CM-NEXT: ADD_INT T4.Y, T1.X, literal.y, +; CM-NEXT: MULADD_IEEE T1.Z, T4.X, literal.z, PV.Z, BS:VEC_120/SCL_212 +; CM-NEXT: MAX_INT * T1.W, T0.Y, literal.w, +; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) +; CM-NEXT: 975668412(6.390323e-04), -330(nan) +; CM-NEXT: ADD T4.X, T0.Z, -T1.Y, +; CM-NEXT: ADD_INT T1.Y, PV.W, literal.x, +; CM-NEXT: ADD_INT T0.Z, T0.Y, literal.y, +; CM-NEXT: SETGT_UINT * T1.W, T0.Y, literal.z, +; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) +; CM-NEXT: -229(nan), 0(0.000000e+00) +; CM-NEXT: SETGT_UINT T6.X, T1.X, literal.x, +; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, +; CM-NEXT: SETGT_INT T0.Z, T0.Y, literal.y, +; CM-NEXT: ADD * T3.W, PV.X, T1.Z, +; CM-NEXT: -229(nan), -127(nan) +; CM-NEXT: EXP_IEEE T1.X (MASKED), T3.W, +; CM-NEXT: EXP_IEEE T1.Y (MASKED), T3.W, +; CM-NEXT: EXP_IEEE T1.Z, T3.W, +; CM-NEXT: EXP_IEEE * T1.W (MASKED), T3.W, +; CM-NEXT: CNDE_INT T4.X, T0.Z, T1.Y, T0.Y, +; CM-NEXT: CNDE_INT T1.Y, T6.X, T2.X, T4.Y, BS:VEC_120/SCL_212 +; CM-NEXT: SETGT_INT T2.Z, T1.X, literal.x, +; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.y, +; CM-NEXT: -127(nan), 209715200(1.972152e-31) +; CM-NEXT: MUL_IEEE T2.X, T1.Z, literal.x, +; CM-NEXT: MUL_IEEE T4.Y, PV.W, literal.y, +; CM-NEXT: CNDE_INT T3.Z, PV.Z, PV.Y, T1.X, +; CM-NEXT: MIN_INT * T4.W, T1.X, literal.z, +; CM-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) +; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) +; CM-NEXT: MIN_INT T7.X, T0.Y, literal.x, +; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y, +; CM-NEXT: ADD_INT T4.Z, T1.X, literal.z, +; CM-NEXT: SETGT_UINT * T4.W, T1.X, literal.w, +; CM-NEXT: 381(5.338947e-43), -254(nan) +; CM-NEXT: -127(nan), 254(3.559298e-43) +; CM-NEXT: CNDE_INT T8.X, PV.W, PV.Z, PV.Y, +; CM-NEXT: SETGT_INT T1.Y, T1.X, literal.x, +; CM-NEXT: ADD_INT T4.Z, PV.X, literal.y, +; CM-NEXT: ADD_INT * T5.W, T0.Y, literal.z, +; CM-NEXT: 127(1.779649e-43), -254(nan) +; CM-NEXT: -127(nan), 0(0.000000e+00) +; CM-NEXT: CNDE_INT T1.X, T2.W, PV.W, PV.Z, +; CM-NEXT: CNDE_INT T5.Y, PV.Y, T3.Z, PV.X, +; CM-NEXT: CNDE_INT T3.Z, T6.X, T4.Y, T3.W, +; CM-NEXT: MUL_IEEE * T2.W, T2.X, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) +; CM-NEXT: SETGT_INT T6.X, T0.Y, literal.x, +; CM-NEXT: CNDE_INT T0.Y, T4.W, T2.X, PV.W, +; CM-NEXT: CNDE_INT * T1.Z, T2.Z, PV.Z, T1.Z, +; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00) +; CM-NEXT: ALU clause starting at 205: +; CM-NEXT: LSHL * T2.W, T5.Y, literal.x, +; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; CM-NEXT: ADD_INT T2.X, PV.W, literal.x, +; CM-NEXT: CNDE_INT T0.Y, T1.Y, T1.Z, T0.Y, +; CM-NEXT: CNDE_INT * T1.Z, T6.X, T4.X, T1.X, +; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) +; CM-NEXT: CNDE_INT * T1.W, T1.W, T3.Y, T2.Y, +; CM-NEXT: CNDE_INT T1.X, T0.Z, PV.W, T0.W, +; CM-NEXT: LSHL T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: MUL_IEEE T0.Z, T0.Y, T2.X, +; CM-NEXT: SETGT * T0.W, literal.y, KC0[4].X, +; CM-NEXT: 23(3.222986e-44), -1036817932(-4.485347e+01) +; CM-NEXT: CNDE T2.X, PV.W, PV.Z, 0.0, +; CM-NEXT: SETGT T0.Y, KC0[4].X, literal.x, +; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y, +; CM-NEXT: CNDE_INT * T0.W, T6.X, PV.X, T5.X, +; CM-NEXT: 1109008539(3.853184e+01), 1065353216(1.000000e+00) +; CM-NEXT: SETGT T1.X, KC0[3].W, literal.x, +; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z, +; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z, +; CM-NEXT: CNDE * T0.W, PV.Y, PV.X, literal.z, +; CM-NEXT: 1109008539(3.853184e+01), -1036817932(-4.485347e+01) +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) +; CM-NEXT: SETGT T2.X, literal.x, KC0[3].Y, +; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, 0.0, +; CM-NEXT: CNDE T0.Z, PV.X, T3.X, literal.y, +; CM-NEXT: SETGT * T1.W, KC0[3].Z, literal.z, +; CM-NEXT: -1036817932(-4.485347e+01), 2139095040(INF) +; CM-NEXT: 1109008539(3.853184e+01), 0(0.000000e+00) +; CM-NEXT: CNDE T0.Y, PV.W, PV.Y, literal.x, +; CM-NEXT: CNDE T1.Z, PV.X, T0.X, 0.0, +; CM-NEXT: SETGT * T1.W, KC0[3].Y, literal.y, +; CM-NEXT: 2139095040(INF), 1109008539(3.853184e+01) +; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x, +; CM-NEXT: 2139095040(INF), 0(0.000000e+00) +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %result = call <4 x float> @llvm.exp10.v4f32(<4 x float> %in) + store <4 x float> %result, ptr addrspace(1) %out + ret void +} + +define float @v_exp10_f32(float %in) { +; VI-SDAG-LABEL: v_exp10_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_fabs_f32(float %in) { +; VI-SDAG-LABEL: v_exp10_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e64 v4, |v0|, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e64 v2, |v0|, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, |v0|, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, |v0|, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, |v0|, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_fabs_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_fabs_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e64 v2, |v0|, v1 +; SI-GISEL-NEXT: v_fma_f32 v1, |v0|, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, |v0|, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_fabs_f32: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_fabs_f32: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %fabs = call float @llvm.fabs.f32(float %in) + %result = call float @llvm.exp10.f32(float %fabs) + ret float %result +} + +define float @v_exp10_fneg_fabs_f32(float %in) { +; VI-SDAG-LABEL: v_exp10_fneg_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e64 v4, -|v0|, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x423369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc21a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_fneg_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_or_b32_e32 v1, 0x80000000, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e64 v2, -|v0|, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_fneg_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc0549a78 +; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb3979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x423369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc21a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_fneg_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, -|v0|, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, -|v0|, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, -|v0|, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_fneg_fabs_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc0549a78 +; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xb3979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x423369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc21a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_fneg_fabs_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e64 v2, -|v0|, v1 +; SI-GISEL-NEXT: v_fma_f32 v1, -|v0|, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, -|v0|, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_fneg_fabs_f32: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_fneg_fabs_f32: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %fabs = call float @llvm.fabs.f32(float %in) + %fneg.fabs = fneg float %fabs + %result = call float @llvm.exp10.f32(float %fneg.fabs) + ret float %result +} + +define float @v_exp10_fneg_f32(float %in) { +; VI-SDAG-LABEL: v_exp10_fneg_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v0 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e64 v4, -v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x423369f4 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc21a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_fneg_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e64 v2, -v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_fneg_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0xc0549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc0549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb3979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x423369f4 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc21a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_fneg_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, -v0, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v1, -v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, -v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_fneg_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xc0549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc0549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xb3979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x423369f4 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc21a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_fneg_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e64 v2, -v0, v1 +; SI-GISEL-NEXT: v_fma_f32 v1, -v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, -v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_fneg_f32: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_fneg_f32: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %fneg = fneg float %in + %result = call float @llvm.exp10.f32(float %fneg) + ret float %result +} + +define float @v_exp10_f32_fast(float %in) { +; GCN-SDAG-LABEL: v_exp10_f32_fast: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42000000, v0 +; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp10_f32_fast: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_fast: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42000000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_fast: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_fast: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_fast: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call fast float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { +; GCN-SDAG-LABEL: v_exp10_f32_unsafe_math_attr: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42000000, v0 +; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp10_f32_unsafe_math_attr: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_unsafe_math_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42000000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_unsafe_math_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_unsafe_math_attr: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_unsafe_math_attr: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { +; GCN-SDAG-LABEL: v_exp10_f32_approx_fn_attr: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42000000, v0 +; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp10_f32_approx_fn_attr: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_approx_fn_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42000000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_approx_fn_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_approx_fn_attr: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_approx_fn_attr: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_ninf(float %in) { +; VI-SDAG-LABEL: v_exp10_f32_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_ninf: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_ninf: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call ninf float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_afn(float %in) { +; GCN-SDAG-LABEL: v_exp10_f32_afn: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42000000, v0 +; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp10_f32_afn: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42000000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_afn: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_afn: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call afn float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_afn_daz(float %in) #0 { +; GCN-SDAG-LABEL: v_exp10_f32_afn_daz: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp10_f32_afn_daz: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_afn_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_afn_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_afn_daz: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_afn_daz: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call afn float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_afn_dynamic(float %in) #1 { +; GCN-SDAG-LABEL: v_exp10_f32_afn_dynamic: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42000000, v0 +; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp10_f32_afn_dynamic: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_afn_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42000000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_afn_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_afn_dynamic: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_afn_dynamic: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call afn float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_fabs_exp10_f32_afn(float %in) { +; GCN-SDAG-LABEL: v_fabs_exp10_f32_afn: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; GCN-SDAG-NEXT: s_mov_b32 s5, 0x42000000 +; GCN-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5 +; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GCN-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_fabs_exp10_f32_afn: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GCN-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2 +; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GCN-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_fabs_exp10_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x42000000 +; SI-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_fabs_exp10_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_fabs_exp10_f32_afn: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_fabs_exp10_f32_afn: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %fabs = call float @llvm.fabs.f32(float %in) + %result = call afn float @llvm.exp10.f32(float %fabs) + ret float %result +} + +define float @v_exp10_f32_daz(float %in) #0 { +; VI-SDAG-LABEL: v_exp10_f32_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_daz: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_daz: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_nnan(float %in) { +; VI-SDAG-LABEL: v_exp10_f32_nnan: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_nnan: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_nnan: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_nnan: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_nnan: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_nnan: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_nnan: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_nnan: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call nnan float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_nnan_daz(float %in) #0 { +; VI-SDAG-LABEL: v_exp10_f32_nnan_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_nnan_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_nnan_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_nnan_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_nnan_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_nnan_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_nnan_daz: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_nnan_daz: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call nnan float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_nnan_dynamic(float %in) #1 { +; VI-SDAG-LABEL: v_exp10_f32_nnan_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_nnan_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_nnan_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_nnan_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_nnan_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_nnan_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_nnan_dynamic: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_nnan_dynamic: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call nnan float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_ninf_daz(float %in) #0 { +; VI-SDAG-LABEL: v_exp10_f32_ninf_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_ninf_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_ninf_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_ninf_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_ninf_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_ninf_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_ninf_daz: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_ninf_daz: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call ninf float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_ninf_dynamic(float %in) #1 { +; VI-SDAG-LABEL: v_exp10_f32_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_ninf_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_ninf_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_ninf_dynamic: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_ninf_dynamic: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call ninf float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_nnan_ninf(float %in) { +; VI-SDAG-LABEL: v_exp10_f32_nnan_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_nnan_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_nnan_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_nnan_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_nnan_ninf: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_nnan_ninf: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call nnan ninf float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 { +; VI-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_nnan_ninf_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_nnan_ninf_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_nnan_ninf_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_nnan_ninf_daz: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_nnan_ninf_daz: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call nnan ninf float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 { +; VI-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_nnan_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_nnan_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_nnan_ninf_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_nnan_ninf_dynamic: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_nnan_ninf_dynamic: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call nnan ninf float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_fast_daz(float %in) #0 { +; GCN-SDAG-LABEL: v_exp10_f32_fast_daz: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp10_f32_fast_daz: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_fast_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_fast_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_fast_daz: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_fast_daz: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call fast float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_dynamic_mode(float %in) #1 { +; VI-SDAG-LABEL: v_exp10_f32_dynamic_mode: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_dynamic_mode: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_dynamic_mode: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_dynamic_mode: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_dynamic_mode: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_dynamic_mode: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_dynamic_mode: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_dynamic_mode: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_undef() { +; VI-SDAG-LABEL: v_exp10_f32_undef: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_rndne_f32_e32 v0, 0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x7fc00000 +; VI-SDAG-NEXT: v_add_f32_e64 v1, -v0, s4 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_undef: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_sub_f32_e64 v0, s4, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3a2784bc +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v2, v0 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_undef: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_undef: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s4, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v0, -v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_undef: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000 +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_undef: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v0 +; SI-GISEL-NEXT: v_fma_f32 v0, s4, v0, -v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0 +; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_undef: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_undef: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call float @llvm.exp10.f32(float undef) + ret float %result +} + +define float @v_exp10_f32_0() { +; GCN-LABEL: v_exp10_f32_0: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, 1.0 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; SI-LABEL: v_exp10_f32_0: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, 1.0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_0: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_0: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call float @llvm.exp10.f32(float 0.0) + ret float %result +} + +define float @v_exp10_f32_from_fpext_f16(i16 %src.i) { +; VI-SDAG-LABEL: v_exp10_f32_from_fpext_f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x40549000, v3 +; VI-SDAG-NEXT: v_rndne_f32_e32 v4, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v4 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_from_fpext_f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_from_fpext_f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x33979a37 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_from_fpext_f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x33979a37 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_from_fpext_f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x33979a37 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_from_fpext_f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x33979a37 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 +; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_from_fpext_f16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_from_fpext_f16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %src = bitcast i16 %src.i to half + %fpext = fpext half %src to float + %result = call float @llvm.exp10.f32(float %fpext) + ret float %result +} + +define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { +; VI-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_from_fpext_math_f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x33979a37 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_from_fpext_math_f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x33979a37 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_from_fpext_math_f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x33979a37 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 +; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_from_fpext_math_f16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_from_fpext_math_f16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %src0 = bitcast i16 %src0.i to half + %src1 = bitcast i16 %src1.i to half + %fadd = fadd half %src0, %src1 + %fpext = fpext half %fadd to float + %result = call float @llvm.exp10.f32(float %fpext) + ret float %result +} + +define float @v_exp10_f32_from_fpext_bf16(bfloat %src) { +; VI-LABEL: v_exp10_f32_from_fpext_bf16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-NEXT: v_rndne_f32_e32 v3, v2 +; VI-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-NEXT: v_exp_f32_e32 v1, v1 +; VI-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-NEXT: s_mov_b32 s4, 0x421a209b +; VI-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: v_exp10_f32_from_fpext_bf16: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SI-LABEL: v_exp10_f32_from_fpext_bf16: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-NEXT: v_rndne_f32_e32 v2, v1 +; SI-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-NEXT: v_exp_f32_e32 v1, v1 +; SI-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-NEXT: s_mov_b32 s4, 0x421a209b +; SI-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_from_fpext_bf16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_from_fpext_bf16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %fpext = fpext bfloat %src to float + %result = call float @llvm.exp10.f32(float %fpext) + ret float %result +} + +define float @v_exp10_f32_from_fpext_math_f16_fast(i16 %src0.i, i16 %src1.i) { +; GCN-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_fast: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp10_f32_from_fpext_math_f16_fast: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_fast: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc217b818 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42000000, v0 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_from_fpext_math_f16_fast: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_from_fpext_math_f16_fast: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_from_fpext_math_f16_fast: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %src0 = bitcast i16 %src0.i to half + %src1 = bitcast i16 %src1.i to half + %fadd = fadd half %src0, %src1 + %fpext = fpext half %fadd to float + %result = call fast float @llvm.exp10.f32(float %fpext) + ret float %result +} + +define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { +; VI-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_from_fpext_math_f16_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x33979a37 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_from_fpext_math_f16_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x33979a37 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_from_fpext_math_f16_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x33979a37 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 +; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_from_fpext_math_f16_daz: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_from_fpext_math_f16_daz: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %src0 = bitcast i16 %src0.i to half + %src1 = bitcast i16 %src1.i to half + %fadd = fadd half %src0, %src1 + %fpext = fpext half %fadd to float + %result = call float @llvm.exp10.f32(float %fpext) + ret float %result +} + +; FIXME: Fold out fp16_to_fp (FP_TO_FP16) on no-f16 targets +define half @v_exp10_f16(half %in) { +; GCN-LABEL: v_exp10_f16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-NEXT: v_exp_f32_e32 v0, v0 +; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call half @llvm.exp10.f16(half %in) + ret half %result +} + +define half @v_exp10_fabs_f16(half %in) { +; GCN-LABEL: v_exp10_fabs_f16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-NEXT: v_exp_f32_e32 v0, v0 +; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_fabs_f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_fabs_f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_fabs_f16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_fabs_f16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %fabs = call half @llvm.fabs.f16(half %in) + %result = call half @llvm.exp10.f16(half %fabs) + ret half %result +} + +define half @v_exp10_fneg_fabs_f16(half %in) { +; GCN-SDAG-LABEL: v_exp10_fneg_fabs_f16: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp10_fneg_fabs_f16: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0| +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_fneg_fabs_f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_fneg_fabs_f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0| +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_fneg_fabs_f16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_fneg_fabs_f16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %fabs = call half @llvm.fabs.f16(half %in) + %fneg.fabs = fneg half %fabs + %result = call half @llvm.exp10.f16(half %fneg.fabs) + ret half %result +} + +define half @v_exp10_fneg_f16(half %in) { +; GCN-SDAG-LABEL: v_exp10_fneg_f16: +; GCN-SDAG: ; %bb.0: +; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0 +; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GCN-GISEL-LABEL: v_exp10_fneg_f16: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_fneg_f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_fneg_f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_fneg_f16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_fneg_f16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %fneg = fneg half %in + %result = call half @llvm.exp10.f16(half %fneg) + ret half %result +} + +define half @v_exp10_f16_fast(half %in) { +; GCN-LABEL: v_exp10_f16_fast: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 +; GCN-NEXT: v_exp_f16_e32 v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f16_fast: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f16_fast: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 0x3dc5 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f16_fast: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f16_fast: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call fast half @llvm.exp10.f16(half %in) + ret half %result +} + +define <2 x half> @v_exp10_v2f16(<2 x half> %in) { +; VI-SDAG-LABEL: v_exp10_v2f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_v2f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: v_exp10_v2f16: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_v2f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_v2f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_v2f16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_v2f16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call <2 x half> @llvm.exp10.v2f16(<2 x half> %in) + ret <2 x half> %result +} + +define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) { +; VI-SDAG-LABEL: v_exp10_fabs_v2f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_fabs_v2f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_fabs_v2f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v0| +; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_fabs_v2f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_fabs_v2f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_fabs_v2f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_fabs_v2f16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_fabs_v2f16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) + %result = call <2 x half> @llvm.exp10.v2f16(<2 x half> %fabs) + ret <2 x half> %result +} + +define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) { +; VI-SDAG-LABEL: v_exp10_fneg_fabs_v2f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0| +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_fneg_fabs_v2f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_fneg_fabs_v2f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v0| +; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_fneg_fabs_v2f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_fneg_fabs_v2f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_fneg_fabs_v2f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_fneg_fabs_v2f16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_fneg_fabs_v2f16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) + %fneg.fabs = fneg <2 x half> %fabs + %result = call <2 x half> @llvm.exp10.v2f16(<2 x half> %fneg.fabs) + ret <2 x half> %result +} + +define <2 x half> @v_exp10_fneg_v2f16(<2 x half> %in) { +; VI-SDAG-LABEL: v_exp10_fneg_v2f16: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_fneg_v2f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_fneg_v2f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -v0 +; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_fneg_v2f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_fneg_v2f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_fneg_v2f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_fneg_v2f16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_fneg_v2f16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %fneg = fneg <2 x half> %in + %result = call <2 x half> @llvm.exp10.v2f16(<2 x half> %fneg) + ret <2 x half> %result +} + +define <2 x half> @v_exp10_v2f16_fast(<2 x half> %in) { +; VI-SDAG-LABEL: v_exp10_v2f16_fast: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5 +; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 +; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0 +; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_v2f16_fast: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5 +; VI-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 +; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-GISEL-NEXT: v_exp_f16_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_v2f16_fast: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 +; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0 +; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1 +; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_v2f16_fast: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5 +; GFX900-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 +; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v2 +; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_v2f16_fast: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_v2f16_fast: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, 0x3dc5 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_v2f16_fast: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_v2f16_fast: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call fast <2 x half> @llvm.exp10.v2f16(<2 x half> %in) + ret <2 x half> %result +} + +define <3 x half> @v_exp10_v3f16(<3 x half> %in) { +; VI-LABEL: v_exp10_v3f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; VI-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 +; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; VI-NEXT: v_exp_f32_e32 v2, v2 +; VI-NEXT: v_exp_f32_e32 v0, v0 +; VI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; VI-NEXT: v_exp_f32_e32 v1, v1 +; VI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-NEXT: v_or_b32_e32 v0, v2, v0 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: v_exp10_v3f16: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX900-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 +; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GFX900-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; GFX900-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX900-NEXT: v_pack_b32_f16 v0, v2, v0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_v3f16: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_v3f16: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_v3f16: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_v3f16: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call <3 x half> @llvm.exp10.v3f16(<3 x half> %in) + ret <3 x half> %result +} + +define <3 x half> @v_exp10_v3f16_afn(<3 x half> %in) { +; VI-SDAG-LABEL: v_exp10_v3f16_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x3dc5 +; VI-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 +; VI-SDAG-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-SDAG-NEXT: v_exp_f16_e32 v2, v2 +; VI-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 +; VI-SDAG-NEXT: v_exp_f16_e32 v1, v1 +; VI-SDAG-NEXT: v_or_b32_e32 v0, v2, v0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_v3f16_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5 +; VI-GISEL-NEXT: v_mul_f16_e32 v3, 0x3dc5, v0 +; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-GISEL-NEXT: v_exp_f16_e32 v3, v3 +; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 +; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1 +; VI-GISEL-NEXT: v_or_b32_e32 v0, v3, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_v3f16_afn: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 +; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 +; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX900-SDAG-NEXT: v_exp_f16_e32 v2, v2 +; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 +; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1 +; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v2, v0 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_v3f16_afn: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5 +; GFX900-GISEL-NEXT: v_mul_f16_e32 v3, 0x3dc5, v0 +; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX900-GISEL-NEXT: v_exp_f16_e32 v3, v3 +; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 +; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1 +; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v3 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_v3f16_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_v3f16_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 0x3dc5 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v3 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_v3f16_afn: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_v3f16_afn: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call afn <3 x half> @llvm.exp10.v3f16(<3 x half> %in) + ret <3 x half> %result +} + +define float @v_exp10_f32_contract(float %in) { +; VI-SDAG-LABEL: v_exp10_f32_contract: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_contract: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_contract: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_contract: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_contract: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_contract: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_contract: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_contract: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call contract float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_contract_daz(float %in) #0 { +; VI-SDAG-LABEL: v_exp10_f32_contract_daz: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_contract_daz: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_contract_daz: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_contract_daz: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_contract_daz: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x421a209b +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_contract_daz: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_contract_daz: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_contract_daz: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call contract float @llvm.exp10.f32(float %in) + ret float %result +} + +define float @v_exp10_f32_contract_nnan_ninf(float %in) { +; VI-SDAG-LABEL: v_exp10_f32_contract_nnan_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x40549000, v4 +; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp10_f32_contract_nnan_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549000, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x40549000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp10_f32_contract_nnan_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp10_f32_contract_nnan_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SI-SDAG-LABEL: v_exp10_f32_contract_nnan_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v0 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x40549a78 +; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x33979a37 +; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc23369f4 +; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_exp10_f32_contract_nnan_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40549a78 +; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x33979a37 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 +; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 +; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 +; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; R600-LABEL: v_exp10_f32_contract_nnan_ninf: +; R600: ; %bb.0: +; R600-NEXT: CF_END +; R600-NEXT: PAD +; +; CM-LABEL: v_exp10_f32_contract_nnan_ninf: +; CM: ; %bb.0: +; CM-NEXT: CF_END +; CM-NEXT: PAD + %result = call contract nnan ninf float @llvm.exp10.f32(float %in) + ret float %result +} + +declare float @llvm.fabs.f32(float) #2 +declare float @llvm.exp10.f32(float) #2 +declare <2 x float> @llvm.exp10.v2f32(<2 x float>) #2 +declare <3 x float> @llvm.exp10.v3f32(<3 x float>) #2 +declare <4 x float> @llvm.exp10.v4f32(<4 x float>) #2 +declare half @llvm.fabs.f16(half) #2 +declare half @llvm.exp10.f16(half) #2 +declare <2 x half> @llvm.exp10.v2f16(<2 x half>) #2 +declare <3 x half> @llvm.exp10.v3f16(<3 x half>) #2 +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 + +attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" } +attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" } +attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }