Skip to content

Commit

Permalink
SWDEV-179955 - OpenCL/LC - Merge branch master into amd-master
Browse files Browse the repository at this point in the history
Change-Id: I38322d1f54cd45b23977939c21a7b5aefb0be25c
  • Loading branch information
Jenkins committed May 15, 2019
2 parents bd58ea0 + 7c3b5ad commit 0cd1eb7
Show file tree
Hide file tree
Showing 38 changed files with 825 additions and 241 deletions.
4 changes: 2 additions & 2 deletions docs/CommandGuide/FileCheck.rst
Original file line number Diff line number Diff line change
Expand Up @@ -593,13 +593,13 @@ For example:
The above example would match the line:

.. code-block:: llvm
.. code-block:: gas
add r5, r5, r6
but would not match the line:

.. code-block:: llvm
.. code-block:: gas
add r5, r5, r7
Expand Down
14 changes: 10 additions & 4 deletions lib/Analysis/InstructionSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4316,16 +4316,22 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
(FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
return Op0;

// With nnan: (+/-0.0 - X) + X --> 0.0 (and commuted variant)
// With nnan: -X + X --> 0.0 (and commuted variant)
// We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN.
// Negative zeros are allowed because we always end up with positive zero:
// X = -0.0: (-0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
// X = -0.0: ( 0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
// X = 0.0: (-0.0 - ( 0.0)) + ( 0.0) == (-0.0) + ( 0.0) == 0.0
// X = 0.0: ( 0.0 - ( 0.0)) + ( 0.0) == ( 0.0) + ( 0.0) == 0.0
if (FMF.noNaNs() && (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) ||
match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0)))))
return ConstantFP::getNullValue(Op0->getType());
if (FMF.noNaNs()) {
if (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) ||
match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0))))
return ConstantFP::getNullValue(Op0->getType());

if (match(Op0, m_FNeg(m_Specific(Op1))) ||
match(Op1, m_FNeg(m_Specific(Op0))))
return ConstantFP::getNullValue(Op0->getType());
}

// (X - Y) + Y --> X
// Y + (X - Y) --> X
Expand Down
2 changes: 2 additions & 0 deletions lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19830,6 +19830,8 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
return false;
}

// Try to prove that there is aliasing, or that there is no aliasing. Either
// way, we can return now. If nothing can be proved, proceed with more tests.
bool IsAlias;
if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
DAG, IsAlias))
Expand Down
6 changes: 3 additions & 3 deletions lib/Support/APFloat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4418,9 +4418,9 @@ APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
return;
}
if (usesLayout<DoubleAPFloat>(Semantics)) {
const fltSemantics IEEESemantics = F.getSemantics();
new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), IEEESemantics),
APFloat(semIEEEdouble));
new (&Double)
DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
APFloat(semIEEEdouble));
return;
}
llvm_unreachable("Unexpected semantics");
Expand Down
22 changes: 20 additions & 2 deletions lib/Target/AMDGPU/AMDGPUCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,16 @@ def CC_SI : CallingConv<[
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47,
SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55,
SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63,
SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71,
SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79,
SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87,
SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95,
SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103,
SGPR104, SGPR105
]>>>,

// We have no way of referring to the generated register tuples
Expand Down Expand Up @@ -59,7 +68,16 @@ def RetCC_SI_Shader : CallingConv<[
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47,
SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55,
SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63,
SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71,
SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79,
SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87,
SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95,
SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103,
SGPR104, SGPR105
]>>,

// 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
Expand Down
4 changes: 2 additions & 2 deletions lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
case MVT::v2f32:
case MVT::v4i16:
case MVT::v4f16: {
// Up to SGPR0-SGPR39
// Up to SGPR0-SGPR105
return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
&AMDGPU::SGPR_64RegClass, 20);
&AMDGPU::SGPR_64RegClass, 53);
}
default:
return false;
Expand Down
20 changes: 15 additions & 5 deletions lib/Target/ARM/ARM.td
Original file line number Diff line number Diff line change
Expand Up @@ -978,21 +978,27 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
FeatureHasSlowFPVMLx,
FeatureAvoidPartialCPSR]>;

def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m,
def : ProcessorModel<"cortex-m3", CortexM4Model, [ARMv7m,
ProcM3,
FeaturePrefLoopAlign32,
FeatureUseMISched,
FeatureUseAA,
FeatureHasNoBranchPredictor]>;

def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m,
def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m,
ProcM3,
FeatureUseMISched,
FeatureUseAA,
FeatureHasNoBranchPredictor]>;

def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em,
def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em,
FeatureVFP4,
FeatureVFPOnlySP,
FeatureD16,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
FeatureUseMISched,
FeatureUseAA,
FeatureHasNoBranchPredictor]>;

def : ProcNoItin<"cortex-m7", [ARMv7em,
Expand All @@ -1002,22 +1008,26 @@ def : ProcNoItin<"cortex-m7", [ARMv7em,
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
FeatureNoMovt]>;

def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline,
def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline,
FeatureDSP,
FeatureFPARMv8,
FeatureD16,
FeatureVFPOnlySP,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
FeatureUseMISched,
FeatureUseAA,
FeatureHasNoBranchPredictor]>;

def : ProcessorModel<"cortex-m35p", CortexM3Model, [ARMv8mMainline,
def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline,
FeatureDSP,
FeatureFPARMv8,
FeatureD16,
FeatureVFPOnlySP,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
FeatureUseMISched,
FeatureUseAA,
FeatureHasNoBranchPredictor]>;


Expand Down
2 changes: 1 addition & 1 deletion lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1184,7 +1184,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setStackPointerRegisterToSaveRestore(ARM::SP);

if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
!Subtarget->hasVFP2())
!Subtarget->hasVFP2() || Subtarget->hasMinSize())
setSchedulingPreference(Sched::RegPressure);
else
setSchedulingPreference(Sched::Hybrid);
Expand Down
28 changes: 14 additions & 14 deletions lib/Target/ARM/ARMInstrThumb.td
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in
def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
"ldr", "\t$Rt, $addr",
[(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
T1Encoding<{0,1,0,0,1,?}> {
T1Encoding<{0,1,0,0,1,?}>, Sched<[WriteLd]> {
// A6.2 & A8.6.59
bits<3> Rt;
bits<8> addr;
Expand All @@ -677,7 +677,7 @@ let canFoldAsLoad = 1 in
def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
"ldr", "\t$Rt, $addr",
[(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
T1LdStSP<{1,?,?}> {
T1LdStSP<{1,?,?}>, Sched<[WriteLd]> {
bits<3> Rt;
bits<8> addr;
let Inst{10-8} = Rt;
Expand Down Expand Up @@ -728,39 +728,39 @@ multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iLoad_r, IIC_iLoad_i, "ldr",
load>;
load>, Sched<[WriteLd]>;

// A8.6.64 & A8.6.61
defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
zextloadi8>;
zextloadi8>, Sched<[WriteLd]>;

// A8.6.76 & A8.6.73
defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
zextloadi16>;
zextloadi16>, Sched<[WriteLd]>;

let AddedComplexity = 10 in
def tLDRSB : // A8.6.80
T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr),
AddrModeT1_1, IIC_iLoad_bh_r,
"ldrsb", "\t$Rt, $addr",
[(set tGPR:$Rt, (sextloadi8 t_addrmode_rr_sext:$addr))]>;
[(set tGPR:$Rt, (sextloadi8 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>;

let AddedComplexity = 10 in
def tLDRSH : // A8.6.84
T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr),
AddrModeT1_2, IIC_iLoad_bh_r,
"ldrsh", "\t$Rt, $addr",
[(set tGPR:$Rt, (sextloadi16 t_addrmode_rr_sext:$addr))]>;
[(set tGPR:$Rt, (sextloadi16 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>;


def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
"str", "\t$Rt, $addr",
[(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
T1LdStSP<{0,?,?}> {
T1LdStSP<{0,?,?}>, Sched<[WriteST]> {
bits<3> Rt;
bits<8> addr;
let Inst{10-8} = Rt;
Expand All @@ -771,19 +771,19 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iStore_r, IIC_iStore_i, "str",
store>;
store>, Sched<[WriteST]>;

// A8.6.197 & A8.6.195
defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
truncstorei8>;
truncstorei8>, Sched<[WriteST]>;

// A8.6.207 & A8.6.205
defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
truncstorei16>;
truncstorei16>, Sched<[WriteST]>;


//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -843,7 +843,7 @@ let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1,
def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
IIC_iPop,
"pop${p}\t$regs", []>,
T1Misc<{1,1,0,?,?,?,?}> {
T1Misc<{1,1,0,?,?,?,?}>, Sched<[WriteLd]> {
bits<16> regs;
let Inst{8} = regs{15};
let Inst{7-0} = regs{7-0};
Expand All @@ -853,7 +853,7 @@ let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
IIC_iStore_m,
"push${p}\t$regs", []>,
T1Misc<{0,1,0,?,?,?,?}> {
T1Misc<{0,1,0,?,?,?,?}>, Sched<[WriteST]> {
bits<16> regs;
let Inst{8} = regs{14};
let Inst{7-0} = regs{7-0};
Expand Down Expand Up @@ -1214,7 +1214,7 @@ def tMUL : // A8.6.105 T1
Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2,
IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd",
[(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>,
T1DataProcessing<0b1101> {
T1DataProcessing<0b1101>, Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
bits<3> Rd;
bits<3> Rn;
let Inst{5-3} = Rn;
Expand Down
Loading

0 comments on commit 0cd1eb7

Please sign in to comment.