From 7804cf11a90e0a1ea05eb7f4ff7e580083fe89fa Mon Sep 17 00:00:00 2001 From: Lukas Diekmann Date: Thu, 4 Nov 2021 15:17:54 +0000 Subject: [PATCH] Annotate intrinsic calls with inlining decisions. When using intrinsics, such as `memcpy`, in LLVM IR, the compiler may decide to inline/optimise them during codegen, or leave them as is. This makes mapping a PT trace back to LLVM IR difficult, since we don't know whether to expect a hole in the trace (from the call to the intrinsic) or not. There's two ways to solve this: 1) Remove all optimisations that inline intrinsic 2) annotate intrinsics with metadata so we can identify inlined intrinsics during trace construction (in `JITModBuilder`). The problem with the first approach is that these optimisations are not in a single place and spread out through different optimisation levels and even architectures. This makes them easy to miss, which we will only notice when traces behave in unexpected ways (if we're lucky the trace compiler crashes). We can solve this problem with the second approach. By annotating an intrinsic, we can check during trace construction if it was inlined and behave accordingly. And by annotating an intrisinc in both inlined and not inlined cases, we can check if we've missed an intrinsic (i.e. it has no annotation) and crash the trace compiler. This second solution sounds much better, but comes with a small caveat. It requires a nasty cast from a constant to a non-constant in the codegen part of LLVM. I can picture the horror written in @ltratt's face upon reading this, but here's my reasoning: 1) casting from const to non-const is only UB if the variable is a real `const`, which LLVM IR is not 2) I believe the reason LLVM makes instructions `const` is so they don't accidentally alter the IR during codegen. Adding metadata doesn't semantically change the IR and so has no effect on the codegen. I thus believe the second solution to be the better option, which I have implemented here, starting with the `memcpy` intrinsic. --- llvm/lib/Target/X86/X86FastISel.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index bb95ed3ccdc53a..7ef800a35d9a96 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -2588,6 +2588,17 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, return true; } +// Add an annotation to an intrinsic instruction, specifying whether the +// intrinsic has been inlined or not. +void annotateIntrinsic(const IntrinsicInst *II, bool Inlined) { + IntrinsicInst *CI = const_cast(II); + LLVMContext& C = CI->getContext(); + ConstantInt *CInt; + CInt = ConstantInt::get(C, APInt(1, Inlined ? 1: 0)); + MDNode* N = MDNode::get(C, ConstantAsMetadata::get(CInt)); + CI->setMetadata("yk.intrinsic.inlined", N); +} + bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // FIXME: Handle more intrinsics. switch (II->getIntrinsicID()) { @@ -2725,6 +2736,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // without a call if possible. uint64_t Len = cast(MCI->getLength())->getZExtValue(); if (IsMemcpySmall(Len)) { + annotateIntrinsic(II, true); X86AddressMode DestAM, SrcAM; if (!X86SelectAddress(MCI->getRawDest(), DestAM) || !X86SelectAddress(MCI->getRawSource(), SrcAM)) @@ -2741,6 +2753,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255) return false; + annotateIntrinsic(II, false); return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1); } case Intrinsic::memset: {