Skip to content

Commit

Permalink
[NativeAOT] Simplifying access to thread static variables (#84566)
Browse files Browse the repository at this point in the history
Fixes: #84373

- [x] separate "fast" inlinable case . (used for singlemodule, not dynamic cases, when optimizing)
- [x] make the storage for fast threadstatics a single "combo" instance instead of array of instances.
  • Loading branch information
VSadov authored May 4, 2023
1 parent 15aa8d3 commit 36c507f
Show file tree
Hide file tree
Showing 33 changed files with 571 additions and 160 deletions.
2 changes: 0 additions & 2 deletions src/coreclr/nativeaot/Runtime/AsmOffsets.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ ASM_OFFSET( 0, 78, Thread, m_uHijackedReturnValueFlags)
ASM_OFFSET( 48, 80, Thread, m_pExInfoStackHead)
ASM_OFFSET( 4c, 88, Thread, m_threadAbortException)

ASM_OFFSET( 50, 90, Thread, m_pThreadLocalModuleStatics)

ASM_SIZEOF( 14, 20, EHEnum)

ASM_OFFSET( 0, 0, gc_alloc_context, alloc_ptr)
Expand Down
19 changes: 19 additions & 0 deletions src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,25 @@ RuntimeInstance::TypeManagerList& RuntimeInstance::GetTypeManagerList()
return m_TypeManagerList;
}

TypeManager* RuntimeInstance::GetSingleTypeManager()
{
auto head = m_TypeManagerList.GetHead();
if (head != NULL && head->m_pNext == NULL)
{
return head->m_pTypeManager;
}

return NULL;
}

COOP_PINVOKE_HELPER(TypeManagerHandle, RhGetSingleTypeManager, ())
{
TypeManager* typeManager = GetRuntimeInstance()->GetSingleTypeManager();
ASSERT(typeManager != NULL);

return TypeManagerHandle::Create(typeManager);
}

// static
bool RuntimeInstance::Initialize(HANDLE hPalInstance)
{
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/nativeaot/Runtime/RuntimeInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class RuntimeInstance

bool RegisterTypeManager(TypeManager * pTypeManager);
TypeManagerList& GetTypeManagerList();
TypeManager* GetSingleTypeManager();
OsModuleList* GetOsModuleList();

bool RegisterUnboxingStubs(PTR_VOID pvStartRange, uint32_t cbRange);
Expand Down
15 changes: 11 additions & 4 deletions src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc
Original file line number Diff line number Diff line change
Expand Up @@ -235,11 +235,11 @@ Name dq offset AddressToExport
_tls_array equ 58h ;; offsetof(TEB, ThreadLocalStoragePointer)

;;
;; __declspec(thread) version
;; __declspec(thread) variable
;;
INLINE_GETTHREAD macro destReg, trashReg
INLINE_GET_TLS_VAR macro destReg, trashReg, variable
EXTERN _tls_index : DWORD
EXTERN tls_CurrentThread:DWORD
EXTERN variable:DWORD

;;
;; construct 'eax' from 'rax' so that the register size and data size match
Expand All @@ -255,11 +255,18 @@ endif
mov destRegDWORD, [_tls_index]
mov trashReg, gs:[_tls_array]
mov trashReg, [trashReg + destReg * 8]
mov destRegDWORD, SECTIONREL tls_CurrentThread
mov destRegDWORD, SECTIONREL variable
add destReg, trashReg

endm

;;
;; __declspec(thread) tls_CurrentThread
;;
INLINE_GETTHREAD macro destReg, trashReg
INLINE_GET_TLS_VAR destReg, trashReg, tls_CurrentThread
endm

INLINE_THREAD_UNHIJACK macro threadReg, trashReg1, trashReg2
;;
;; Thread::Unhijack()
Expand Down
53 changes: 8 additions & 45 deletions src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S
Original file line number Diff line number Diff line change
Expand Up @@ -45,57 +45,20 @@ LOCAL_LABEL(ProbeLoop):
ret
NESTED_END RhpStackProbe, _TEXT

NESTED_ENTRY RhpGetThreadStaticBaseForType, _TEXT, NoHandler
// On entry:
// rdi - TypeManagerSlot*
// rsi - type index
NESTED_ENTRY RhpGetInlinedThreadStaticBase, _TEXT, NoHandler
// On exit:
// rax - the thread static base for the given type

push_nonvol_reg rbx
push_nonvol_reg r12

mov rbx, rdi // Save TypeManagerSlot*
mov r12, rsi // Save type index

// rax = GetThread()
INLINE_GETTHREAD

mov r8d, [rbx + 8] // Get ModuleIndex out of the TypeManagerSlot
// rdi = &tls_InlinedThreadStatics
INLINE_GET_TLS_VAR tls_InlinedThreadStatics
mov rdi, rax

// get per-thread storage
mov rax, [rax + OFFSETOF__Thread__m_pThreadLocalModuleStatics]

// get per-module storage
mov rax, [rdi]
test rax, rax
jz LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath)
cmp r8d, [rax + OFFSETOF__Array__m_Length]
jae LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath)
mov rax, [rax + r8 * 8 + 0x10]
jz C_FUNC(RhpGetInlinedThreadStaticBaseSlow) // rdi contains the storage ref

// get the actual per-type storage
test rax, rax
jz LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath)
cmp r12d, [rax + OFFSETOF__Array__m_Length]
jae LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath)
mov rax, [rax + r12 * 8 + 0x10]

// if have storage, return it
test rax, rax
jz LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath)

.cfi_remember_state
pop_nonvol_reg r12
pop_nonvol_reg rbx
// return it
ret
NESTED_END RhpGetInlinedThreadStaticBase, _TEXT

.cfi_restore_state
.cfi_def_cfa_offset 24 // workaround cfi_restore_state bug
LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath):
mov rdi, rbx // restore TypeManagerSlot*
mov rsi, r12 // restore type index

pop_nonvol_reg r12
pop_nonvol_reg rbx
jmp C_FUNC(RhpGetThreadStaticBaseForTypeSlow)
NESTED_END RhpGetThreadStaticBaseForType, _TEXT
37 changes: 8 additions & 29 deletions src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

include AsmMacros.inc

EXTERN RhpGetThreadStaticBaseForTypeSlow : PROC
EXTERN RhpGetInlinedThreadStaticBaseSlow : PROC

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The following helper will access ("probe") a word on each page of the stack
Expand Down Expand Up @@ -39,41 +39,20 @@ ProbeLoop:

LEAF_END RhpStackProbe, _TEXT

LEAF_ENTRY RhpGetThreadStaticBaseForType, _TEXT
; On entry and thorough the procedure:
; rcx - TypeManagerSlot*
; rdx - type index
LEAF_ENTRY RhpGetInlinedThreadStaticBase, _TEXT
; On exit:
; rax - the thread static base for the given type

;; rax = GetThread(), TRASHES r8
INLINE_GETTHREAD rax, r8

mov r8d, [rcx + 8] ; Get ModuleIndex out of the TypeManagerSlot
;; rcx = &tls_InlinedThreadStatics, TRASHES r8
INLINE_GET_TLS_VAR rcx, r8, tls_InlinedThreadStatics

;; get per-thread storage
mov rax, [rax + OFFSETOF__Thread__m_pThreadLocalModuleStatics]

;; get per-module storage
test rax, rax
jz RhpGetThreadStaticBaseForTypeSlow
cmp r8d, [rax + OFFSETOF__Array__m_Length]
jae RhpGetThreadStaticBaseForTypeSlow
mov rax, [rax + r8 * 8 + 10h]

;; get the actual per-type storage
mov rax, [rcx]
test rax, rax
jz RhpGetThreadStaticBaseForTypeSlow
cmp edx, [rax + OFFSETOF__Array__m_Length]
jae RhpGetThreadStaticBaseForTypeSlow
mov rax, [rax + rdx * 8 + 10h]

;; if have storage, return it
test rax, rax
jz RhpGetThreadStaticBaseForTypeSlow
jz RhpGetInlinedThreadStaticBaseSlow ;; rcx contains the storage ref

;; return it
ret

LEAF_END RhpGetThreadStaticBaseForType, _TEXT
LEAF_END RhpGetInlinedThreadStaticBase, _TEXT

end
18 changes: 18 additions & 0 deletions src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,21 @@
#include <unixasmmacros.inc>
#include "AsmOffsets.inc"

NESTED_ENTRY RhpGetInlinedThreadStaticBase, _TEXT, NoHandler
// On exit:
// x0 - the thread static base for the given type

// x1 = GetThread()
INLINE_GET_TLS_VAR x1, C_FUNC(tls_InlinedThreadStatics)

// get per-thread storage
ldr x0, [x1]
cbnz x0, HaveValue
mov x0, x1
b C_FUNC(RhpGetInlinedThreadStaticBaseSlow)

HaveValue:
// return it
ret

NESTED_END RhpGetInlinedThreadStaticBase, _TEXT
19 changes: 19 additions & 0 deletions src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@

#include "AsmMacros.h"

EXTERN RhpGetInlinedThreadStaticBaseSlow

TEXTAREA

;; On exit:
;; x0 - the thread static base for the given type
LEAF_ENTRY RhpGetInlinedThreadStaticBase
;; x1 = &tls_InlinedThreadStatics, TRASHES x2
INLINE_GET_TLS_VAR x1, x2, tls_InlinedThreadStatics

;; get per-thread storage
ldr x0, [x1]
cbnz x0, HaveValue
mov x0, x1
b RhpGetInlinedThreadStaticBaseSlow

HaveValue
;; return it
ret
LEAF_END RhpGetInlinedThreadStaticBase

end
8 changes: 8 additions & 0 deletions src/coreclr/nativeaot/Runtime/gcrhscan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ void GCToEEInterface::GcScanRoots(EnumGcRefCallbackFunc * fn, int condemned, in
else
#endif
{
InlinedThreadStaticRoot* pRoot = pThread->GetInlinedThreadStaticList();
while (pRoot != NULL)
{
STRESS_LOG2(LF_GC | LF_GCROOTS, LL_INFO100, "{ Scanning Thread's %p inline thread statics root %p. \n", pThread, pRoot);
GcEnumObject(&pRoot->m_threadStaticsBase, 0 /*flags*/, fn, sc);
pRoot = pRoot->m_next;
}

STRESS_LOG1(LF_GC | LF_GCROOTS, LL_INFO100, "{ Scanning Thread's %p thread statics root. \n", pThread);
GcEnumObject(pThread->GetThreadStaticStorage(), 0 /*flags*/, fn, sc);

Expand Down
25 changes: 22 additions & 3 deletions src/coreclr/nativeaot/Runtime/thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,8 @@ void Thread::Construct()

// Everything else should be initialized to 0 via the static initialization of tls_CurrentThread.

ASSERT(m_pThreadLocalModuleStatics == NULL);
ASSERT(m_pThreadLocalStatics == NULL);
ASSERT(m_pInlinedThreadLocalStatics == NULL);

ASSERT(m_pGCFrameRegistrations == NULL);

Expand Down Expand Up @@ -1266,15 +1267,33 @@ COOP_PINVOKE_HELPER(Object *, RhpGetThreadAbortException, ())

Object** Thread::GetThreadStaticStorage()
{
return &m_pThreadLocalModuleStatics;
return &m_pThreadLocalStatics;
}

COOP_PINVOKE_HELPER(Object**, RhGetThreadStaticStorage, ())
{
Thread * pCurrentThread = ThreadStore::RawGetCurrentThread();
Thread* pCurrentThread = ThreadStore::RawGetCurrentThread();
return pCurrentThread->GetThreadStaticStorage();
}

InlinedThreadStaticRoot* Thread::GetInlinedThreadStaticList()
{
return m_pInlinedThreadLocalStatics;
}

void Thread::RegisterInlinedThreadStaticRoot(InlinedThreadStaticRoot* newRoot)
{
ASSERT(newRoot->m_next == NULL);
newRoot->m_next = m_pInlinedThreadLocalStatics;
m_pInlinedThreadLocalStatics = newRoot;
}

COOP_PINVOKE_HELPER(void, RhRegisterInlinedThreadStaticRoot, (Object** root))
{
Thread* pCurrentThread = ThreadStore::RawGetCurrentThread();
pCurrentThread->RegisterInlinedThreadStaticRoot((InlinedThreadStaticRoot*)root);
}

// This is function is used to quickly query a value that can uniquely identify a thread
COOP_PINVOKE_HELPER(uint8_t*, RhCurrentNativeThreadId, ())
{
Expand Down
12 changes: 11 additions & 1 deletion src/coreclr/nativeaot/Runtime/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ struct GCFrameRegistration
int m_MaybeInterior;
};

struct InlinedThreadStaticRoot
{
Object* m_threadStaticsBase;
InlinedThreadStaticRoot* m_next;
};

struct ThreadBuffer
{
uint8_t m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT];
Expand All @@ -88,7 +94,8 @@ struct ThreadBuffer
uintptr_t m_uHijackedReturnValueFlags;
PTR_ExInfo m_pExInfoStackHead;
Object* m_threadAbortException; // ThreadAbortException instance -set only during thread abort
Object* m_pThreadLocalModuleStatics;
Object* m_pThreadLocalStatics;
InlinedThreadStaticRoot* m_pInlinedThreadLocalStatics;
GCFrameRegistration* m_pGCFrameRegistrations;
PTR_VOID m_pStackLow;
PTR_VOID m_pStackHigh;
Expand Down Expand Up @@ -288,6 +295,9 @@ class Thread : private ThreadBuffer

Object** GetThreadStaticStorage();

InlinedThreadStaticRoot* GetInlinedThreadStaticList();
void RegisterInlinedThreadStaticRoot(InlinedThreadStaticRoot* newRoot);

NATIVE_CONTEXT* GetInterruptedContext();

void PushGCFrameRegistration(GCFrameRegistration* pRegistration);
Expand Down
12 changes: 11 additions & 1 deletion src/coreclr/nativeaot/Runtime/threadstore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,13 +430,23 @@ C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer));

#ifndef _MSC_VER
__thread ThreadBuffer tls_CurrentThread;

// the root of inlined threadstatics storage
// there is only one now,
// eventually this will be emitted by ILC and we may have more than one such variable
__thread InlinedThreadStaticRoot tls_InlinedThreadStatics;
#endif

EXTERN_C ThreadBuffer* RhpGetThread()
{
return &tls_CurrentThread;
}

COOP_PINVOKE_HELPER(Object**, RhGetInlinedThreadStaticStorage, ())
{
return &tls_InlinedThreadStatics.m_threadStaticsBase;
}

#endif // !DACCESS_COMPILE

#ifdef _WIN32
Expand Down Expand Up @@ -505,4 +515,4 @@ void ThreadStore::SaveCurrentThreadOffsetForDAC()
{
}

#endif // _WIN32
#endif // _WIN32
6 changes: 6 additions & 0 deletions src/coreclr/nativeaot/Runtime/threadstore.inl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,14 @@
#ifdef _MSC_VER
// a workaround to prevent tls_CurrentThread from becoming dynamically checked/initialized.
EXTERN_C __declspec(selectany) __declspec(thread) ThreadBuffer tls_CurrentThread;

// the root of inlined threadstatics storage
// there is only one now,
// eventually this will be emitted by ILC and we may have more than one such variable
EXTERN_C __declspec(selectany) __declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics;
#else
EXTERN_C __thread ThreadBuffer tls_CurrentThread;
EXTERN_C __thread InlinedThreadStaticRoot tls_InlinedThreadStatics;
#endif

// static
Expand Down
Loading

0 comments on commit 36c507f

Please sign in to comment.