From 90494c279db4cb3b7a4d0e5c2c4ce4bf98569e57 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 14 Aug 2023 11:59:20 -0400 Subject: [PATCH] Fix Expr(:loopinfo) codegen (#50663) We used a loop-marker intrinsic because the LoopID used to be dropped by optimization passes (this seems no longer true). #50660 is an example of a miscompilation where a loop of length 1 got optimized by simplifycfg to the point where the loop-marker is now attached to the wrong back-edge. This PR drops the loop-marker and uses the LoopID metadata node directly. --- doc/src/devdocs/llvm-passes.md | 4 +- src/codegen.cpp | 45 ++-- src/jl_exported_funcs.inc | 2 +- src/llvm-julia-passes.inc | 2 +- src/llvm-simdloop.cpp | 241 +++++++----------- src/passes.h | 10 +- src/pipeline.cpp | 2 +- test/llvmpasses/julia-simdloop-memoryssa.ll | 55 ++++ .../{simdloop.ll => julia-simdloop.ll} | 29 +-- test/llvmpasses/loopinfo.jl | 22 +- test/llvmpasses/parsing.ll | 2 +- 11 files changed, 210 insertions(+), 204 deletions(-) create mode 100644 test/llvmpasses/julia-simdloop-memoryssa.ll rename test/llvmpasses/{simdloop.ll => julia-simdloop.ll} (75%) diff --git a/doc/src/devdocs/llvm-passes.md b/doc/src/devdocs/llvm-passes.md index eec8b07c2701e..d4d5b92421277 100644 --- a/doc/src/devdocs/llvm-passes.md +++ b/doc/src/devdocs/llvm-passes.md @@ -58,9 +58,9 @@ This pass removes the non-integral address spaces from the module's datalayout s * Filename: `llvm-simdloop.cpp` * Class Name: `LowerSIMDLoopPass` -* Opt Name: `module(LowerSIMDLoop)` +* Opt Name: `loop(LowerSIMDLoop)` -This pass acts as the main driver of the `@simd` annotation. Codegen inserts a call to a marker intrinsic (`julia.simdloop`), which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop). +This pass acts as the main driver of the `@simd` annotation. Codegen inserts a `!llvm.loopid` marker at the back branch of a loop, which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop). ### LowerPTLS diff --git a/src/codegen.cpp b/src/codegen.cpp index 119b9fcafa2a4..3c0f76709e771 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -992,14 +992,7 @@ static const auto jl_typeof_func = new JuliaFunction<>{ Attributes(C, {Attribute::NonNull}), None); }, }; -static const auto jl_loopinfo_marker_func = new JuliaFunction<>{ - "julia.loopinfo_marker", - [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); }, - [](LLVMContext &C) { return AttributeList::get(C, - Attributes(C, {Attribute::ReadOnly, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}), - AttributeSet(), - None); }, -}; + static const auto jl_write_barrier_func = new JuliaFunction<>{ "julia.write_barrier", [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), @@ -1609,6 +1602,7 @@ class jl_codectx_t { jl_codegen_params_t &emission_context; llvm::MapVector call_targets; Function *f = NULL; + MDNode* LoopID = NULL; // local var info. globals are not in here. std::vector slots; std::map phic_slots; @@ -5773,16 +5767,22 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_ } else if (head == jl_loopinfo_sym) { // parse Expr(:loopinfo, "julia.simdloop", ("llvm.loop.vectorize.width", 4)) + // to LLVM LoopID SmallVector MDs; + + // Reserve first location for self reference to the LoopID metadata node. + TempMDTuple TempNode = MDNode::getTemporary(ctx.builder.getContext(), None); + MDs.push_back(TempNode.get()); + for (int i = 0, ie = nargs; i < ie; ++i) { Metadata *MD = to_md_tree(args[i], ctx.builder.getContext()); if (MD) MDs.push_back(MD); } - MDNode* MD = MDNode::get(ctx.builder.getContext(), MDs); - CallInst *I = ctx.builder.CreateCall(prepare_call(jl_loopinfo_marker_func)); - I->setMetadata("julia.loopinfo", MD); + ctx.LoopID = MDNode::getDistinct(ctx.builder.getContext(), MDs); + // Replace the temporary node with a self-reference. + ctx.LoopID->replaceOperandWith(0, ctx.LoopID); return jl_cgval_t(); } else if (head == jl_leave_sym || head == jl_coverageeffect_sym @@ -8125,6 +8125,7 @@ static jl_llvm_functions_t std::map BB; std::map come_from_bb; int cursor = 0; + int current_label = 0; auto find_next_stmt = [&] (int seq_next) { // new style ir is always in dominance order, but frontend IR might not be // `seq_next` is the next statement we want to emit @@ -8141,6 +8142,7 @@ static jl_llvm_functions_t workstack.pop_back(); auto nextbb = BB.find(item + 1); if (nextbb == BB.end()) { + // Not a BB cursor = item; return; } @@ -8151,8 +8153,10 @@ static jl_llvm_functions_t seq_next = -1; // if this BB is non-empty, we've visited it before so skip it if (!nextbb->second->getTerminator()) { + // New BB ctx.builder.SetInsertPoint(nextbb->second); cursor = item; + current_label = item; return; } } @@ -8399,7 +8403,12 @@ static jl_llvm_functions_t if (jl_is_gotonode(stmt)) { int lname = jl_gotonode_label(stmt); come_from_bb[cursor+1] = ctx.builder.GetInsertBlock(); - ctx.builder.CreateBr(BB[lname]); + auto br = ctx.builder.CreateBr(BB[lname]); + // Check if backwards branch + if (ctx.LoopID && lname <= current_label) { + br->setMetadata(LLVMContext::MD_loop, ctx.LoopID); + ctx.LoopID = NULL; + } find_next_stmt(lname - 1); continue; } @@ -8417,10 +8426,17 @@ static jl_llvm_functions_t workstack.push_back(lname - 1); BasicBlock *ifnot = BB[lname]; BasicBlock *ifso = BB[cursor+2]; + Instruction *br; if (ifnot == ifso) - ctx.builder.CreateBr(ifnot); + br = ctx.builder.CreateBr(ifnot); else - ctx.builder.CreateCondBr(isfalse, ifnot, ifso); + br = ctx.builder.CreateCondBr(isfalse, ifnot, ifso); + + // Check if backwards branch + if (ctx.LoopID && lname <= current_label) { + br->setMetadata(LLVMContext::MD_loop, ctx.LoopID); + ctx.LoopID = NULL; + } find_next_stmt(cursor + 1); continue; } @@ -9163,7 +9179,6 @@ static void init_jit_functions(void) add_named_global(jl_object_id__func, &jl_object_id_); add_named_global(jl_alloc_obj_func, (void*)NULL); add_named_global(jl_newbits_func, (void*)jl_new_bits); - add_named_global(jl_loopinfo_marker_func, (void*)NULL); add_named_global(jl_typeof_func, (void*)NULL); add_named_global(jl_write_barrier_func, (void*)NULL); add_named_global(jldlsym_func, &jl_load_and_lookup); diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 7d54d13d699d0..704235f3b21eb 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -582,7 +582,6 @@ YY(LLVMExtraAddCPUFeaturesPass) \ YY(LLVMExtraMPMAddCPUFeaturesPass) \ YY(LLVMExtraMPMAddRemoveNIPass) \ - YY(LLVMExtraMPMAddLowerSIMDLoopPass) \ YY(LLVMExtraMPMAddMultiVersioningPass) \ YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \ YY(LLVMExtraMPMAddRemoveAddrspacesPass) \ @@ -596,6 +595,7 @@ YY(LLVMExtraFPMAddGCInvariantVerifierPass) \ YY(LLVMExtraFPMAddFinalLowerGCPass) \ YY(LLVMExtraLPMAddJuliaLICMPass) \ + YY(LLVMExtraLPMAddLowerSIMDLoopPass) \ YY(JLJITGetLLVMOrcExecutionSession) \ YY(JLJITGetJuliaOJIT) \ YY(JLJITGetExternalJITDylib) \ diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc index 198ab0e868538..bd89c01c6fdfe 100644 --- a/src/llvm-julia-passes.inc +++ b/src/llvm-julia-passes.inc @@ -2,7 +2,6 @@ #ifdef MODULE_PASS MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass()) MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass()) -MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass()) MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass()) MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass()) MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass()) @@ -24,4 +23,5 @@ FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass()) //Loop passes #ifdef LOOP_PASS LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass()) +LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass()) #endif diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index a2ac6fcfbf7a0..f6a1e3a501a98 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -73,7 +74,7 @@ static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFE /// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv, /// mark the ops as permitting reassociation/commuting. /// As of LLVM 4.0, FDiv is not handled by the loop vectorizer -static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT +static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT { typedef SmallVector chainVector; chainVector chain; @@ -84,7 +85,7 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe // Find the user of instruction I that is within loop L. for (User *UI : I->users()) { /*}*/ Instruction *U = cast(UI); - if (L->contains(U)) { + if (L.contains(U)) { if (J) { LLVM_DEBUG(dbgs() << "LSL: not a reduction var because op has two internal uses: " << *I << "\n"); REMARK([&]() { @@ -151,128 +152,94 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe }); (*K)->setHasAllowReassoc(true); (*K)->setHasAllowContract(true); + if (SE) + SE->forgetValue(*K); ++length; } ReductionChainLength += length; MaxChainLength.updateMax(length); } -static bool markLoopInfo(Module &M, Function *marker, function_ref GetLI) JL_NOTSAFEPOINT +static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT { - bool Changed = false; - std::vector ToDelete; - for (User *U : marker->users()) { - ++TotalMarkedLoops; - Instruction *I = cast(U); - ToDelete.push_back(I); - - BasicBlock *B = I->getParent(); - OptimizationRemarkEmitter ORE(B->getParent()); - LoopInfo &LI = GetLI(*B->getParent()); - Loop *L = LI.getLoopFor(B); - if (!L) { - I->removeFromParent(); - continue; - } - - LLVM_DEBUG(dbgs() << "LSL: loopinfo marker found\n"); - bool simd = false; - bool ivdep = false; - SmallVector MDs; - - BasicBlock *Lh = L->getHeader(); - LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n"); - - // Reserve first location for self reference to the LoopID metadata node. - TempMDTuple TempNode = MDNode::getTemporary(Lh->getContext(), None); - MDs.push_back(TempNode.get()); - - // Walk `julia.loopinfo` metadata and filter out `julia.simdloop` and `julia.ivdep` - if (I->hasMetadataOtherThanDebugLoc()) { - MDNode *JLMD= I->getMetadata("julia.loopinfo"); - if (JLMD) { - LLVM_DEBUG(dbgs() << "LSL: has julia.loopinfo metadata with " << JLMD->getNumOperands() <<" operands\n"); - for (unsigned i = 0, ie = JLMD->getNumOperands(); i < ie; ++i) { - Metadata *Op = JLMD->getOperand(i); - const MDString *S = dyn_cast(Op); - if (S) { - LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n"); - if (S->getString().startswith("julia")) { - if (S->getString().equals("julia.simdloop")) - simd = true; - if (S->getString().equals("julia.ivdep")) - ivdep = true; - continue; - } - } - MDs.push_back(Op); - } - } - } - - LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n"); - - REMARK([=]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", I->getDebugLoc(), B) - << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }"; - }); - - MDNode *n = L->getLoopID(); - if (n) { - // Loop already has a LoopID so copy over Metadata - // original loop id is operand 0 - for (unsigned i = 1, ie = n->getNumOperands(); i < ie; ++i) { - Metadata *Op = n->getOperand(i); - MDs.push_back(Op); + MDNode *LoopID = L.getLoopID(); + if (!LoopID) + return false; + bool simd = false; + bool ivdep = false; + + BasicBlock *Lh = L.getHeader(); + LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n"); + + SmallVector MDs(1); + // First Operand is self-reference + // Drop `julia.` prefixes + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + Metadata *Op = LoopID->getOperand(i); + const MDString *S = dyn_cast(Op); + if (S) { + LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n"); + if (S->getString().startswith("julia")) { + if (S->getString().equals("julia.simdloop")) + simd = true; + if (S->getString().equals("julia.ivdep")) + ivdep = true; + continue; } } - MDNode *LoopID = MDNode::getDistinct(Lh->getContext(), MDs); - // Replace the temporary node with a self-reference. - LoopID->replaceOperandWith(0, LoopID); - L->setLoopID(LoopID); - assert(L->getLoopID()); + MDs.push_back(Op); + } + LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n"); + if (!simd && !ivdep) + return false; + + LLVMContext &Context = L.getHeader()->getContext(); + LoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself + LoopID->replaceOperandWith(0, LoopID); + L.setLoopID(LoopID); + + REMARK([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", L.getStartLoc(), L.getHeader()) + << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }"; + }); + + // If ivdep is true we assume that there is no memory dependency between loop iterations + // This is a fairly strong assumption and does often not hold true for generic code. + if (ivdep) { + ++IVDepLoops; MDNode *m = MDNode::get(Lh->getContext(), ArrayRef(LoopID)); - - // If ivdep is true we assume that there is no memory dependency between loop iterations - // This is a fairly strong assumption and does often not hold true for generic code. - if (ivdep) { - ++IVDepLoops; - // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop. - for (BasicBlock *BB : L->blocks()) { - for (Instruction &I : *BB) { - if (I.mayReadOrWriteMemory()) { - ++IVDepInstructions; - I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m); - } - } + // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop. + for (BasicBlock *BB : L.blocks()) { + for (Instruction &I : *BB) { + if (I.mayReadOrWriteMemory()) { + ++IVDepInstructions; + I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m); + } } - assert(L->isAnnotatedParallel()); } + assert(L.isAnnotatedParallel()); + } - if (simd) { - ++SimdLoops; - // Mark floating-point reductions as okay to reassociate/commute. - for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) { - if (PHINode *Phi = dyn_cast(I)) - enableUnsafeAlgebraIfReduction(Phi, L, ORE); - else - break; - } + if (simd) { + ++SimdLoops; + // Mark floating-point reductions as okay to reassociate/commute. + for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) { + if (PHINode *Phi = dyn_cast(I)) + enableUnsafeAlgebraIfReduction(Phi, L, ORE, SE); + else + break; } - I->removeFromParent(); - - Changed = true; + if (SE) + SE->forgetLoopDispositions(&L); } - for (Instruction *I : ToDelete) - I->deleteValue(); - marker->eraseFromParent(); #ifdef JL_VERIFY_PASSES - assert(!verifyLLVMIR(M)); + assert(!verifyLLVMIR(L)); #endif - return Changed; + return true; } } // end anonymous namespace @@ -283,23 +250,19 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref(M).getManager(); +PreservedAnalyses LowerSIMDLoopPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) - auto GetLI = [&FAM](Function &F) -> LoopInfo & { - return FAM.getResult(F); - }; - - if (markLoopInfo(M, loopinfo_marker, GetLI)) { - auto preserved = PreservedAnalyses::allInSet(); - preserved.preserve(); +{ + OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); + if (processLoop(L, ORE, &AR.SE)) { +#ifdef JL_DEBUG_BUILD + if (AR.MSSA) + AR.MSSA->verifyMemorySSA(); +#endif + auto preserved = getLoopPassPreservedAnalyses(); + preserved.preserveSet(); + preserved.preserve(); return preserved; } @@ -307,37 +270,23 @@ PreservedAnalyses LowerSIMDLoopPass::run(Module &M, ModuleAnalysisManager &AM) } namespace { -class LowerSIMDLoopLegacy : public ModulePass { - //LowerSIMDLoop Impl; +class LowerSIMDLoopLegacy : public LoopPass { public: - static char ID; + static char ID; - LowerSIMDLoopLegacy() : ModulePass(ID) { - } - - bool runOnModule(Module &M) override { - bool Changed = false; - - Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker"); - - auto GetLI = [this](Function &F) JL_NOTSAFEPOINT -> LoopInfo & { - return getAnalysis(F).getLoopInfo(); - }; - - if (loopinfo_marker) - Changed |= markLoopInfo(M, loopinfo_marker, GetLI); + LowerSIMDLoopLegacy() : LoopPass(ID) { + } - return Changed; - } + bool runOnLoop(Loop *L, LPPassManager &LPM) override + { + OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); + return processLoop(*L, ORE, nullptr); + } - void getAnalysisUsage(AnalysisUsage &AU) const override - { - ModulePass::getAnalysisUsage(AU); - AU.addRequired(); - AU.addPreserved(); - AU.setPreservesCFG(); - } + void getAnalysisUsage(AnalysisUsage &AU) const override { + getLoopAnalysisUsage(AU); + } }; } // end anonymous namespace diff --git a/src/passes.h b/src/passes.h index 1f4aa2e4bb89a..9c3b0421670b5 100644 --- a/src/passes.h +++ b/src/passes.h @@ -62,11 +62,6 @@ struct RemoveNIPass : PassInfoMixin { static bool isRequired() { return true; } }; -struct LowerSIMDLoopPass : PassInfoMixin { - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; - static bool isRequired() { return true; } -}; - struct MultiVersioningPass : PassInfoMixin { bool external_use; MultiVersioningPass(bool external_use = false) : external_use(external_use) {} @@ -103,6 +98,11 @@ struct JuliaLICMPass : PassInfoMixin { LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT; }; +struct LowerSIMDLoopPass : PassInfoMixin { + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT; +}; + #define MODULE_MARKER_PASS(NAME) \ struct NAME##MarkerPass : PassInfoMixin { \ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT { return PreservedAnalyses::all(); } \ diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 6830252997460..3ed40f018f23f 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -402,6 +402,7 @@ static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB FPM.addPass(BeforeLoopOptimizationMarkerPass()); { LoopPassManager LPM; + LPM.addPass(LowerSIMDLoopPass()); if (O.getSpeedupLevel() >= 2) { LPM.addPass(LoopRotatePass()); } @@ -562,7 +563,6 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL buildEarlySimplificationPipeline(MPM, PB, O, options); MPM.addPass(AlwaysInlinerPass()); buildEarlyOptimizerPipeline(MPM, PB, O, options); - MPM.addPass(LowerSIMDLoopPass()); { FunctionPassManager FPM; buildLoopOptimizerPipeline(FPM, PB, O, options); diff --git a/test/llvmpasses/julia-simdloop-memoryssa.ll b/test/llvmpasses/julia-simdloop-memoryssa.ll new file mode 100644 index 0000000000000..0c1c4ac021996 --- /dev/null +++ b/test/llvmpasses/julia-simdloop-memoryssa.ll @@ -0,0 +1,55 @@ +; COM: NewPM-only test, tests that memoryssa is preserved correctly + +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK + +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK + +; CHECK-LABEL: MemorySSA for function: simd_test +; CHECK-LABEL: @simd_test( +define void @simd_test(double *%a, double *%b) { +; CHECK: top: +top: + br label %loop +; CHECK: loop: +loop: +; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({top,liveOnEntry},{loop,[[MSSA_USE:[0-9]+]]}) + %i = phi i64 [0, %top], [%nexti, %loop] + %aptr = getelementptr double, double *%a, i64 %i + %bptr = getelementptr double, double *%b, i64 %i +; CHECK: MemoryUse([[MPHI]]) MayAlias +; CHECK: llvm.mem.parallel_loop_access + %aval = load double, double *%aptr +; CHECK: MemoryUse([[MPHI]]) MayAlias + %bval = load double, double *%aptr + %cval = fadd double %aval, %bval +; CHECK: [[MSSA_USE]] = MemoryDef([[MPHI]]) + store double %cval, double *%bptr + %nexti = add i64 %i, 1 + %done = icmp sgt i64 %nexti, 500 + br i1 %done, label %loopdone, label %loop, !llvm.loop !1 +loopdone: + ret void +} + +; CHECK-LABEL: MemorySSA for function: simd_test_sub2 +; CHECK-LABEL: @simd_test_sub2( +define double @simd_test_sub2(double *%a) { +top: + br label %loop +loop: + %i = phi i64 [0, %top], [%nexti, %loop] + %v = phi double [0.000000e+00, %top], [%nextv, %loop] + %aptr = getelementptr double, double *%a, i64 %i +; CHECK: MemoryUse(liveOnEntry) MayAlias + %aval = load double, double *%aptr + %nextv = fsub double %v, %aval +; CHECK: fsub reassoc contract double %v, %aval + %nexti = add i64 %i, 1 + %done = icmp sgt i64 %nexti, 500 + br i1 %done, label %loopdone, label %loop, !llvm.loop !0 +loopdone: + ret double %nextv +} + +!0 = distinct !{!0, !"julia.simdloop"} +!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"} \ No newline at end of file diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/julia-simdloop.ll similarity index 75% rename from test/llvmpasses/simdloop.ll rename to test/llvmpasses/julia-simdloop.ll index 929fbeea2c3f5..ad3e125893068 100644 --- a/test/llvmpasses/simdloop.ll +++ b/test/llvmpasses/julia-simdloop.ll @@ -1,12 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license ; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s ; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s - -declare void @julia.loopinfo_marker() +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s ; CHECK-LABEL: @simd_test( define void @simd_test(double *%a, double *%b) { @@ -22,9 +20,8 @@ loop: %cval = fadd double %aval, %bval store double %cval, double *%bptr %nexti = add i64 %i, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !3 %done = icmp sgt i64 %nexti, 500 - br i1 %done, label %loopdone, label %loop + br i1 %done, label %loopdone, label %loop, !llvm.loop !1 loopdone: ret void } @@ -42,9 +39,8 @@ loop: %nextv = fsub double %v, %aval ; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !3 %done = icmp sgt i64 %nexti, 500 - br i1 %done, label %loopdone, label %loop + br i1 %done, label %loopdone, label %loop, !llvm.loop !1 loopdone: ret double %nextv } @@ -61,9 +57,8 @@ loop: %nextv = fsub double %v, %aval ; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !2 %done = icmp sgt i64 %nexti, 500 - br i1 %done, label %loopdone, label %loop + br i1 %done, label %loopdone, label %loop, !llvm.loop !0 loopdone: ret double %nextv } @@ -82,20 +77,16 @@ for.body: ; preds = %for.body, %entry %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !4 %exitcond = icmp eq i64 %indvars.iv.next, 48 ; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]] - br i1 %exitcond, label %for.end, label %for.body + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 for.end: ; preds = %for.body %1 = load i32, i32* %a, align 4 ret i32 %1 } -!1 = !{} -!2 = !{!"julia.simdloop"} -!3 = !{!"julia.simdloop", !"julia.ivdep"} -!4 = !{!"julia.simdloop", !"julia.ivdep", !5} -!5 = !{!"llvm.loop.vectorize.disable", i1 0} -; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[LOOP_DISABLE:![0-9]+]]} -; CHECK-NEXT: [[LOOP_DISABLE]] = !{!"llvm.loop.vectorize.disable", i1 false} +!0 = distinct !{!0, !"julia.simdloop"} +!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"} +!2 = distinct !{!2, !"julia.simdloop", !"julia.ivdep", !3} +!3 = !{!"llvm.loop.vectorize.disable", i1 0} diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl index b9b388c73d0c5..559793b70b27f 100644 --- a/test/llvmpasses/loopinfo.jl +++ b/test/llvmpasses/loopinfo.jl @@ -3,7 +3,7 @@ # RUN: julia --startup-file=no %s %t && llvm-link -S %t/* -o %t/module.ll # RUN: cat %t/module.ll | FileCheck %s # RUN: cat %t/module.ll | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S - | FileCheck %s -check-prefix=LOWER -# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S - | FileCheck %s -check-prefix=LOWER +# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S - | FileCheck %s -check-prefix=LOWER # RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll # RUN: cat %t/module.ll | FileCheck %s -check-prefix=FINAL @@ -27,10 +27,9 @@ function simdf(X) acc = zero(eltype(X)) @simd for x in X acc += x -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO:![0-9]+]] +# CHECK: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]] # LOWER-NOT: llvm.mem.parallel_loop_access # LOWER: fadd reassoc contract double -# LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]] # FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double> end @@ -43,9 +42,8 @@ function simdf2(X) acc = zero(eltype(X)) @simd ivdep for x in X acc += x -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO2:![0-9]+]] +# CHECK: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]] # LOWER: llvm.mem.parallel_loop_access -# LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: fadd reassoc contract double # LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]] end @@ -61,8 +59,7 @@ end for i in 1:N iteration(i) $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.count"), 3))) -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO3:![0-9]+]] -# LOWER-NOT: call void @julia.loopinfo_marker() +# CHECK: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]] # LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]] # FINAL: call {{(swiftcc )?}}void @j_iteration # FINAL: call {{(swiftcc )?}}void @j_iteration @@ -87,8 +84,7 @@ end iteration(i) end $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.full"),))) -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO4:![0-9]+]] -# LOWER-NOT: call void @julia.loopinfo_marker() +# CHECK: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]] # LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]] # FINAL: call {{(swiftcc )?}}void @j_iteration # FINAL: call {{(swiftcc )?}}void @j_iteration @@ -119,11 +115,11 @@ end end ## Check all the MD nodes -# CHECK: [[LOOPINFO]] = !{!"julia.simdloop"} -# CHECK: [[LOOPINFO2]] = !{!"julia.simdloop", !"julia.ivdep"} -# CHECK: [[LOOPINFO3]] = !{[[LOOPUNROLL:![0-9]+]]} +# CHECK: [[LOOPID]] = distinct !{[[LOOPID]], !"julia.simdloop"} +# CHECK: [[LOOPID2]] = distinct !{[[LOOPID2]], !"julia.simdloop", !"julia.ivdep"} +# CHECK: [[LOOPID3]] = distinct !{[[LOOPID3]], [[LOOPUNROLL:![0-9]+]]} # CHECK: [[LOOPUNROLL]] = !{!"llvm.loop.unroll.count", i64 3} -# CHECK: [[LOOPINFO4]] = !{[[LOOPUNROLL2:![0-9]+]]} +# CHECK: [[LOOPID4]] = distinct !{[[LOOPID4]], [[LOOPUNROLL2:![0-9]+]]} # CHECK: [[LOOPUNROLL2]] = !{!"llvm.loop.unroll.full"} # LOWER: [[LOOPID]] = distinct !{[[LOOPID]]} # LOWER: [[LOOPID2]] = distinct !{[[LOOPID2]]} diff --git a/test/llvmpasses/parsing.ll b/test/llvmpasses/parsing.ll index 26f0b6b63fbf2..6a5909ff5fd40 100644 --- a/test/llvmpasses/parsing.ll +++ b/test/llvmpasses/parsing.ll @@ -1,6 +1,6 @@ ; COM: NewPM-only test, tests for ability to parse Julia passes -; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,LowerSIMDLoop,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(JuliaLICM),GCInvariantVerifier,GCInvariantVerifier),LowerPTLSPass,LowerPTLSPass,JuliaMultiVersioning,JuliaMultiVersioning)' -S %s -o /dev/null +; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(LowerSIMDLoop,JuliaLICM),GCInvariantVerifier,GCInvariantVerifier),LowerPTLSPass,LowerPTLSPass,JuliaMultiVersioning,JuliaMultiVersioning)' -S %s -o /dev/null define void @test() { ret void