From cafcd7cdabf5be7a727d888ad2933d4953b310e7 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 26 Sep 2017 16:40:57 -0400 Subject: [PATCH 1/2] codegen-opt: fix stack alignment previously, stack-alignment of, for example, a 12-byte allocation would be computed to be 16-bytes during codegen (because of gc-alignment rounding up), but 8-bytes here (because of this code rounding down) --- src/llvm-alloc-opt.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 2ef844a4f7af3..4a67c39f841aa 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -715,10 +715,7 @@ bool AllocOpt::runOnFunction(Function &F) sz += align; } else if (sz > 1) { - align = JL_SMALL_BYTE_ALIGNMENT; - while (sz < align) { - align = align / 2; - } + align = llvm::MinAlign(JL_SMALL_BYTE_ALIGNMENT, llvm::NextPowerOf2(sz)); } // No debug info for prolog instructions IRBuilder<> prolog_builder(&entry.front()); From 9eb9a10d112281d2fd20d0685f8ee77bf1b39472 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 26 Sep 2017 17:54:57 -0400 Subject: [PATCH 2/2] codegen: fix vector alignment --- src/cgutils.cpp | 23 ++++++++++++----------- src/codegen.cpp | 10 +++++----- src/intrinsics.cpp | 17 ++--------------- 3 files changed, 19 insertions(+), 31 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 239068286ef09..4987ea22e6daf 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1168,10 +1168,7 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v // If given alignment is 0 and LLVM's assumed alignment for a load/store via ptr // might be stricter than the Julia alignment for jltype, return the alignment of jltype. // Otherwise return the given alignment. -// -// Parameter ptr should be the pointer argument for the LoadInst or StoreInst. -// It is currently unused, but might be used in the future for a more precise answer. -static unsigned julia_alignment(Value* /*ptr*/, jl_value_t *jltype, unsigned alignment) +static unsigned julia_alignment(jl_value_t *jltype, unsigned alignment) { if (!alignment) { alignment = jl_datatype_align(jltype); @@ -1207,7 +1204,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j //} //else { Instruction *load = ctx.builder.CreateAlignedLoad(data, isboxed ? - alignment : julia_alignment(data, jltype, alignment), false); + alignment : julia_alignment(jltype, alignment), false); if (isboxed) load = maybe_mark_load_dereferenceable(load, true, jltype); if (tbaa) { @@ -1254,7 +1251,7 @@ static void typed_store(jl_codectx_t &ctx, data = ptr; } Instruction *store = ctx.builder.CreateAlignedStore(r, ctx.builder.CreateGEP(data, - idx_0based), isboxed ? alignment : julia_alignment(r, jltype, alignment)); + idx_0based), isboxed ? alignment : julia_alignment(jltype, alignment)); if (tbaa) tbaa_decorate(tbaa, store); } @@ -1290,6 +1287,9 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x, Type *astype static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, Value *src, uint64_t sz, unsigned align, bool is_volatile, MDNode *tbaa) { + if (sz == 0) + return; + assert(align && "align must be specified"); // If the types are small and simple, use load and store directly. // Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int // that interferes with other optimizations. @@ -1466,7 +1466,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st addr = ctx.builder.CreateStructGEP(lt, ptr, idx); } } - int align = jl_field_align(jt, idx); + unsigned align = jl_field_align(jt, idx); if (jl_field_isptr(jt, idx)) { bool maybe_null = idx >= (unsigned)jt->ninitialized; Instruction *Load = maybe_mark_load_dereferenceable( @@ -1733,7 +1733,7 @@ static Value *emit_array_nd_index( #endif Value **idxs = (Value**)alloca(sizeof(Value*) * nidxs); for (size_t k = 0; k < nidxs; k++) { - idxs[k] = emit_unbox(ctx, T_size, argv[k], NULL); + idxs[k] = emit_unbox(ctx, T_size, argv[k], (jl_value_t*)jl_long_type); // type asserted by caller } Value *ii; for (size_t k = 0; k < nidxs; k++) { @@ -2154,7 +2154,8 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, const jl_cgval_t &src dest = emit_bitcast(ctx, dest, T_pint8); if (skip) // copy dest -> dest to simulate an undef value / conditional copy src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr); - emit_memcpy(ctx, dest, src_ptr, jl_datatype_size(typ), 0, isVolatile, tbaa); + unsigned alignment = julia_alignment(typ, 0); + emit_memcpy(ctx, dest, src_ptr, jl_datatype_size(typ), alignment, isVolatile, tbaa); } } } @@ -2172,7 +2173,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, const jl_cgval_t &src bool allunboxed = for_each_uniontype_small( [&](unsigned idx, jl_datatype_t *jt) { unsigned nb = jl_datatype_size(jt); - unsigned alignment = 0; + unsigned alignment = julia_alignment((jl_value_t*)jt, 0); BasicBlock *tempBB = BasicBlock::Create(jl_LLVMContext, "union_move", ctx.f); ctx.builder.SetInsertPoint(tempBB); switchInst->addCase(ConstantInt::get(T_int8, idx), tempBB); @@ -2310,7 +2311,7 @@ static void emit_setfield(jl_codectx_t &ctx, } } else { - int align = jl_field_align(sty, idx0); + unsigned align = jl_field_align(sty, idx0); typed_store(ctx, addr, ConstantInt::get(T_size, 0), rhs, jfty, strct.tbaa, data_pointer(ctx, strct, T_pjlvalue), align); } diff --git a/src/codegen.cpp b/src/codegen.cpp index a6fb5c717ea02..9a453361e2b6f 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -4050,7 +4050,7 @@ static void emit_cfunc_invalidate( } else { gf_ret = emit_bitcast(ctx, gf_ret, gfrt->getPointerTo()); - ctx.builder.CreateRet(ctx.builder.CreateLoad(gf_ret)); + ctx.builder.CreateRet(ctx.builder.CreateAlignedLoad(gf_ret, julia_alignment(astrt, 0))); } break; } @@ -4598,7 +4598,7 @@ static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, const jl_returnin if (lty != NULL && !isboxed) { theArg = decay_derived(emit_bitcast(ctx, theArg, PointerType::get(lty, 0))); if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers - theArg = ctx.builder.CreateAlignedLoad(theArg, julia_alignment(theArg, ty, 0)); + theArg = ctx.builder.CreateAlignedLoad(theArg, julia_alignment(ty, 0)); } assert(dyn_cast(theArg) == NULL); args[idx] = theArg; @@ -4684,7 +4684,7 @@ static jl_returninfo_t get_specsig_function(Module *M, const std::string &name, jl_returninfo_t props = {}; SmallVector fsig; Type *rt; - if (jlrettype == (jl_value_t*)jl_void_type) { + if (jl_is_structtype(jlrettype) && jl_is_datatype_singleton((jl_datatype_t*)jlrettype)) { rt = T_void; props.cc = jl_returninfo_t::Register; } @@ -5695,9 +5695,9 @@ static std::unique_ptr emit_function( if (returninfo.cc == jl_returninfo_t::SRet) { assert(jl_is_leaf_type(jlrettype)); emit_memcpy(ctx, sret, retvalinfo, jl_datatype_size(jlrettype), - returninfo.union_minalign); + julia_alignment(jlrettype, 0)); } - else { + else { // must be jl_returninfo_t::Union emit_unionmove(ctx, sret, retvalinfo, isboxed_union, false, NULL); } } diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 288005b1d0fbc..425941888d77b 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -341,17 +341,8 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va return NULL; } - int alignment; - if (jt) { - alignment = julia_alignment(p, jt, 0); - } - else { - // stack has default alignment - alignment = 0; - } + unsigned alignment = julia_alignment(jt, 0); if (dest) { - // callers using the dest argument only use it for a stack slot for now - alignment = 0; MDNode *tbaa = x.tbaa; // the memcpy intrinsic does not allow to specify different alias tags // for the load part (x.tbaa) and the store part (tbaa_stack). @@ -363,11 +354,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va return NULL; } else { - Instruction *load; - if (alignment) - load = ctx.builder.CreateAlignedLoad(p, alignment); - else - load = ctx.builder.CreateLoad(p); + Instruction *load = ctx.builder.CreateAlignedLoad(p, alignment); return tbaa_decorate(x.tbaa, load); } }