Skip to content

Commit

Permalink
Remove new BitCastInst unless it's in a typed pointer context (#50338)
Browse files Browse the repository at this point in the history
* Add debug_level module flag
* Make ptls use IRBuilder
* Only use 'new BitCastInst' when we know we are not in opaque pointer mode
  • Loading branch information
pchintalapudi authored Jun 30, 2023
1 parent b303d0e commit 734cafa
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 52 deletions.
1 change: 1 addition & 0 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7271,6 +7271,7 @@ static jl_llvm_functions_t
// allocate Function declarations and wrapper objects
//Safe because params holds ctx lock
Module *M = TSM.getModuleUnlocked();
M->addModuleFlag(Module::Warning, "julia.debug_level", ctx.emission_context.debug_level);
jl_debugcache_t debuginfo;
debuginfo.initialize(M);
jl_returninfo_t returninfo = {};
Expand Down
2 changes: 2 additions & 0 deletions src/llvm-alloc-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,8 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
auto replace_i = new_i;
Type *new_t = new_i->getType();
if (cast_t != new_t) {
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(cast_t->getContext().supportsTypedPointers());
replace_i = new BitCastInst(replace_i, cast_t, "", user);
replace_i->setDebugLoc(user->getDebugLoc());
replace_i->takeName(user);
Expand Down
35 changes: 7 additions & 28 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,37 +81,16 @@ Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
unsigned nRoots = cast<ConstantInt>(target->getArgOperand(0))->getLimitedValue(INT_MAX);

// Create the GC frame.
unsigned allocaAddressSpace = F.getParent()->getDataLayout().getAllocaAddrSpace();
AllocaInst *gcframe_alloca = new AllocaInst(
T_prjlvalue,
allocaAddressSpace,
ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2),
Align(16));
gcframe_alloca->insertAfter(target);
Instruction *gcframe;
if (allocaAddressSpace) {
// addrspacecast as needed for non-0 alloca addrspace
gcframe = new AddrSpaceCastInst(gcframe_alloca, T_prjlvalue->getPointerTo(0));
gcframe->insertAfter(gcframe_alloca);
} else {
gcframe = gcframe_alloca;
}
IRBuilder<> builder(target->getNextNode());
auto gcframe_alloca = builder.CreateAlloca(T_prjlvalue, ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2));
gcframe_alloca->setAlignment(Align(16));
// addrspacecast as needed for non-0 alloca addrspace
auto gcframe = cast<Instruction>(builder.CreateAddrSpaceCast(gcframe_alloca, T_prjlvalue->getPointerTo(0)));
gcframe->takeName(target);

// Zero out the GC frame.
BitCastInst *tempSlot_i8 = new BitCastInst(gcframe, Type::getInt8PtrTy(F.getContext()), "");
tempSlot_i8->insertAfter(gcframe);
Type *argsT[2] = {tempSlot_i8->getType(), Type::getInt32Ty(F.getContext())};
Function *memset = Intrinsic::getDeclaration(F.getParent(), Intrinsic::memset, makeArrayRef(argsT));
Value *args[4] = {
tempSlot_i8, // dest
ConstantInt::get(Type::getInt8Ty(F.getContext()), 0), // val
ConstantInt::get(Type::getInt32Ty(F.getContext()), sizeof(jl_value_t*) * (nRoots + 2)), // len
ConstantInt::get(Type::getInt1Ty(F.getContext()), 0)}; // volatile
CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args));
cast<MemSetInst>(zeroing)->setDestAlignment(Align(16));
zeroing->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
zeroing->insertAfter(tempSlot_i8);
auto ptrsize = F.getParent()->getDataLayout().getPointerSize();
builder.CreateMemSet(gcframe, Constant::getNullValue(Type::getInt8Ty(F.getContext())), ptrsize * (nRoots + 2), Align(16), tbaa_gcframe);

return gcframe;
}
Expand Down
19 changes: 16 additions & 3 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -702,8 +702,11 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i),
"", SI);
}
if (FalseElem->getType() != TrueElem->getType())
if (FalseElem->getType() != TrueElem->getType()) {
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(FalseElem->getContext().supportsTypedPointers());
FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI);
}
SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI);
int Number = ++S.MaxPtrNumber;
S.AllPtrNumbering[SelectBase] = Number;
Expand Down Expand Up @@ -776,6 +779,8 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
else
BaseElem = IncomingBases[i];
if (BaseElem->getType() != T_prjlvalue) {
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(BaseElem->getContext().supportsTypedPointers());
auto &remap = CastedRoots[i][BaseElem];
if (!remap) {
if (auto constant = dyn_cast<Constant>(BaseElem)) {
Expand Down Expand Up @@ -2595,8 +2600,11 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor
// Pointee types don't have semantics, so the optimizer is
// free to rewrite them if convenient. We need to change
// it back here for the store.
if (Val->getType() != T_prjlvalue)
if (Val->getType() != T_prjlvalue) {
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(Val->getContext().supportsTypedPointers());
Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore);
}
new StoreInst(Val, slotAddress, InsertBefore);
}

Expand Down Expand Up @@ -2677,6 +2685,8 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
}
if (slotAddress->getType() != AI->getType()) {
// If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(slotAddress->getContext().supportsTypedPointers());
auto BCI = new BitCastInst(slotAddress, AI->getType());
BCI->insertAfter(slotAddress);
slotAddress = BCI;
Expand Down Expand Up @@ -2705,8 +2715,11 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
slotAddress->insertAfter(gcframe);
auto ValExpr = std::make_pair(Base, isa<PointerType>(Base->getType()) ? -1 : i);
auto Elem = MaybeExtractScalar(S, ValExpr, SI);
if (Elem->getType() != T_prjlvalue)
if (Elem->getType() != T_prjlvalue) {
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(Elem->getContext().supportsTypedPointers());
Elem = new BitCastInst(Elem, T_prjlvalue, "", SI);
}
//auto Idxs = makeArrayRef(Tracked[i]);
//Value *Elem = ExtractScalar(Base, true, Idxs, SI);
Value *shadowStore = new StoreInst(Elem, slotAddress, SI);
Expand Down
2 changes: 2 additions & 0 deletions src/llvm-propagate-addrspaces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
if (LiftingMap.count(CurrentV))
CurrentV = LiftingMap[CurrentV];
if (CurrentV->getType() != TargetType) {
// Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
assert(CurrentV->getContext().supportsTypedPointers());
auto *BCI = new BitCastInst(CurrentV, TargetType);
ToInsert.push_back(std::make_pair(BCI, InsertPt));
CurrentV = BCI;
Expand Down
48 changes: 27 additions & 21 deletions src/llvm-ptls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const

Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const
{
IRBuilder<> builder(insertBefore);
Value *tls;
if (TargetTriple.isX86() && insertBefore->getFunction()->callsFunctionThatReturnsTwice()) {
// Workaround LLVM bug by hiding the offset computation
Expand All @@ -87,15 +88,15 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
if (offset) {
std::vector<Type*> args(0);
args.push_back(offset->getType());
auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), args, false),
auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), args, false),
dyn_asm_str, "=&r,r,~{dirflag},~{fpsr},~{flags}", false);
tls = CallInst::Create(tp, offset, "pgcstack_i8", insertBefore);
tls = builder.CreateCall(tp, {offset}, "pgcstack");
}
else {
auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false),
const_asm_str.c_str(), "=r,~{dirflag},~{fpsr},~{flags}",
false);
tls = CallInst::Create(tp, "pgcstack_i8", insertBefore);
tls = builder.CreateCall(tp, {}, "tls_pgcstack");
}
} else {
// AArch64/ARM doesn't seem to have this issue.
Expand All @@ -118,12 +119,12 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
}
if (!offset)
offset = ConstantInt::getSigned(T_size, jl_tls_offset);
auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false), asm_str, "=r", false);
tls = CallInst::Create(tp, "thread_ptr", insertBefore);
tls = GetElementPtrInst::Create(Type::getInt8Ty(insertBefore->getContext()), tls, {offset}, "ppgcstack_i8", insertBefore);
auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), false), asm_str, "=r", false);
tls = builder.CreateCall(tp, {}, "thread_ptr");
tls = builder.CreateGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack");
}
tls = new BitCastInst(tls, T_pppjlvalue->getPointerTo(), "ppgcstack", insertBefore);
return new LoadInst(T_pppjlvalue, tls, "pgcstack", false, insertBefore);
tls = builder.CreateBitCast(tls, T_pppjlvalue->getPointerTo());
return builder.CreateLoad(T_pppjlvalue, tls, "tls_pgcstack");
}

GlobalVariable *LowerPTLS::create_hidden_global(Type *T, StringRef name) const
Expand Down Expand Up @@ -153,15 +154,16 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
// if (!retboxed)
// foreach(retinst)
// emit_gc_unsafe_leave(ctx, last_gc_state);
auto phi = PHINode::Create(pgcstack->getType(), 2, "");
phi->insertAfter(pgcstack);
IRBuilder<> builder(pgcstack->getNextNode());
auto phi = builder.CreatePHI(pgcstack->getType(), 2, "pgcstack");
pgcstack->replaceAllUsesWith(phi);
MDBuilder MDB(pgcstack->getContext());
SmallVector<uint32_t, 2> Weights{9, 1};
TerminatorInst *fastTerm;
TerminatorInst *slowTerm;
assert(pgcstack->getType()); // Static analyzer
auto cmp = new ICmpInst(phi, CmpInst::ICMP_NE, pgcstack, Constant::getNullValue(pgcstack->getType()));
builder.SetInsertPoint(phi);
auto cmp = builder.CreateICmpNE(pgcstack, Constant::getNullValue(pgcstack->getType()));
SplitBlockAndInsertIfThenElse(cmp, phi, &fastTerm, &slowTerm,
MDB.createBranchWeights(Weights));
if (CFGModified)
Expand All @@ -180,7 +182,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
adopt->insertBefore(slowTerm);
phi->addIncoming(adopt, slowTerm->getParent());
// emit fast branch code
IRBuilder<> builder(fastTerm->getParent());
builder.SetInsertPoint(fastTerm->getParent());
fastTerm->removeFromParent();
MDNode *tbaa = tbaa_gcframe;
Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, pgcstack), tbaa), true);
Expand All @@ -194,24 +196,24 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
last_gc_state->addIncoming(prior, fastTerm->getParent());
for (auto &BB : *pgcstack->getParent()->getParent()) {
if (isa<ReturnInst>(BB.getTerminator())) {
IRBuilder<> builder(BB.getTerminator());
builder.SetInsertPoint(BB.getTerminator());
emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, phi), tbaa), last_gc_state, true);
}
}
}
}

if (imaging_mode) {
IRBuilder<> builder(pgcstack);
if (jl_tls_elf_support) {
// if (offset != 0)
// pgcstack = tp + offset; // fast
// else
// pgcstack = getter(); // slow
auto offset = new LoadInst(T_size, pgcstack_offset, "", false, pgcstack);
auto offset = builder.CreateLoad(T_size, pgcstack_offset);
offset->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
offset->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
auto cmp = new ICmpInst(pgcstack, CmpInst::ICMP_NE, offset,
Constant::getNullValue(offset->getType()));
auto cmp = builder.CreateICmpNE(offset, Constant::getNullValue(offset->getType()));
MDBuilder MDB(pgcstack->getContext());
SmallVector<uint32_t, 2> Weights{9, 1};
TerminatorInst *fastTerm;
Expand All @@ -222,10 +224,14 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
*CFGModified = true;

auto fastTLS = emit_pgcstack_tp(offset, fastTerm);
auto phi = PHINode::Create(T_pppjlvalue, 2, "", pgcstack);
// refresh the basic block in the builder
builder.SetInsertPoint(pgcstack);
auto phi = builder.CreatePHI(T_pppjlvalue, 2, "pgcstack");
pgcstack->replaceAllUsesWith(phi);
pgcstack->moveBefore(slowTerm);
auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack);
// refresh the basic block in the builder
builder.SetInsertPoint(pgcstack);
auto getter = builder.CreateLoad(T_pgcstack_getter, pgcstack_func_slot);
getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter);
Expand All @@ -240,14 +246,14 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
// variable to be filled (in `staticdata.c`) at initialization time of the sysimg.
// This way we can bypass the extra indirection in `jl_get_pgcstack`
// since we may not know which getter function to use ahead of time.
auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack);
auto getter = builder.CreateLoad(T_pgcstack_getter, pgcstack_func_slot);
getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
if (TargetTriple.isOSDarwin()) {
auto key = new LoadInst(T_size, pgcstack_key_slot, "", false, pgcstack);
auto key = builder.CreateLoad(T_size, pgcstack_key_slot);
key->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
key->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, getter, {key}, "", pgcstack);
auto new_pgcstack = builder.CreateCall(FT_pgcstack_getter, getter, {key});
new_pgcstack->takeName(pgcstack);
pgcstack->replaceAllUsesWith(new_pgcstack);
pgcstack->eraseFromParent();
Expand Down

2 comments on commit 734cafa

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible new issues were detected.
A full report can be found here.

Please sign in to comment.