From b04f2a0f01902d11e21afb690e14d12d2c464184 Mon Sep 17 00:00:00 2001 From: Sergey Vojtovich Date: Fri, 1 Dec 2017 15:48:03 +0400 Subject: [PATCH] MDEV-14529 - InnoDB rw-locks: optimize memory barriers Relax memory barrier for lock_word. rw_lock_lock_word_decr() - used to acquire rw-lock, thus we only need to issue ACQUIRE when we succeed locking. rw_lock_x_lock_func_nowait() - same as above, but used to attempt to acquire X-lock. rw_lock_s_unlock_func() - used to release S-lock, RELEASE is what we need here. rw_lock_x_unlock_func() - used to release X-lock. Ideally we'd need only RELEASE here, but due to mess with waiters (they must be loaded after lock_word is stored) we have to issue both ACQUIRE and RELEASE. rw_lock_sx_unlock_func() - same as above, but used to release SX-lock. rw_lock_s_lock_spin(), rw_lock_x_lock_func(), rw_lock_sx_lock_func() - fetch-and-store to waiters has to issue only ACQUIRE memory barrier, so that waiters are stored before lock_word is loaded. Note that there is violation of RELEASE-ACQUIRE protocol here, because we do on lock: my_atomic_fas32_explicit((int32*) &lock->waiters, 1, MY_MEMORY_ORDER_ACQUIRE); my_atomic_load32_explicit(&lock->lock_word, MY_MEMORY_ORDER_RELAXED); on unlock my_atomic_add32_explicit(&lock->lock_word, X_LOCK_DECR, MY_MEMORY_ORDER_ACQ_REL); my_atomic_load32_explicit((int32*) &lock->waiters, MY_MEMORY_ORDER_RELAXED); That is we kind of synchronize ACQUIRE on lock_word with ACQUIRE on waiters. It was there before this patch. Simple fix may have negative performance impact. Proper fix requires refactoring of lock_word. --- storage/innobase/include/sync0rw.ic | 36 +++++++++++++++++++---------- storage/innobase/sync/sync0rw.cc | 6 ++--- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic index e9ee2bfb518b6..cbbf421d9f248 100644 --- a/storage/innobase/include/sync0rw.ic +++ b/storage/innobase/include/sync0rw.ic @@ -216,8 +216,11 @@ rw_lock_lock_word_decr( int32_t lock_copy = my_atomic_load32_explicit(&lock->lock_word, MY_MEMORY_ORDER_RELAXED); while (lock_copy > threshold) { - if (my_atomic_cas32(&lock->lock_word, &lock_copy, - lock_copy - amount)) { + if (my_atomic_cas32_strong_explicit(&lock->lock_word, + &lock_copy, + lock_copy - amount, + MY_MEMORY_ORDER_ACQUIRE, + MY_MEMORY_ORDER_RELAXED)) { return(true); } } @@ -307,7 +310,9 @@ rw_lock_x_lock_func_nowait( { int32_t oldval = X_LOCK_DECR; - if (my_atomic_cas32(&lock->lock_word, &oldval, 0)) { + if (my_atomic_cas32_strong_explicit(&lock->lock_word, &oldval, 0, + MY_MEMORY_ORDER_ACQUIRE, + MY_MEMORY_ORDER_RELAXED)) { lock->writer_thread = os_thread_get_curr_id(); } else if (os_thread_eq(lock->writer_thread, os_thread_get_curr_id())) { @@ -367,7 +372,8 @@ rw_lock_s_unlock_func( ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_S)); /* Increment lock_word to indicate 1 less reader */ - int32_t lock_word = my_atomic_add32(&lock->lock_word, 1) + 1; + int32_t lock_word = my_atomic_add32_explicit(&lock->lock_word, 1, + MY_MEMORY_ORDER_RELEASE) + 1; if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) { /* wait_ex waiter exists. It may not be asleep, but we signal @@ -407,11 +413,12 @@ rw_lock_x_unlock_func( ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X)); if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) { - /* There is 1 x-lock */ - /* atomic increment is needed, because it is last */ - if (my_atomic_add32(&lock->lock_word, X_LOCK_DECR) <= -X_LOCK_DECR) { - ut_error; - } + /* Last X-lock owned by this thread, it may still hold SX-locks. + ACQ_REL due to... + RELEASE: we release rw-lock + ACQUIRE: we want waiters to be loaded after lock_word is stored */ + my_atomic_add32_explicit(&lock->lock_word, X_LOCK_DECR, + MY_MEMORY_ORDER_ACQ_REL); /* This no longer has an X-lock but it may still have an SX-lock. So it is now free for S-locks by other threads. @@ -465,10 +472,15 @@ rw_lock_sx_unlock_func( /* Last caller in a possible recursive chain. */ if (lock_word > 0) { lock->writer_thread = 0; + ut_ad(lock_word <= INT_MAX32 - X_LOCK_HALF_DECR); + + /* Last SX-lock owned by this thread, doesn't own X-lock. + ACQ_REL due to... + RELEASE: we release rw-lock + ACQUIRE: we want waiters to be loaded after lock_word is stored */ + my_atomic_add32_explicit(&lock->lock_word, X_LOCK_HALF_DECR, + MY_MEMORY_ORDER_ACQ_REL); - if (my_atomic_add32(&lock->lock_word, X_LOCK_HALF_DECR) <= 0) { - ut_error; - } /* Lock is now free. May have to signal read/write waiters. We do not need to signal wait_ex waiters, since they cannot exist when there is an sx-lock diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc index 93898756a2737..509fd9cf19bad 100644 --- a/storage/innobase/sync/sync0rw.cc +++ b/storage/innobase/sync/sync0rw.cc @@ -361,7 +361,7 @@ rw_lock_s_lock_spin( /* Set waiters before checking lock_word to ensure wake-up signal is sent. This may lead to some unnecessary signals. */ - my_atomic_fas32((int32*) &lock->waiters, 1); + my_atomic_fas32_explicit(&lock->waiters, 1, MY_MEMORY_ORDER_ACQUIRE); if (rw_lock_s_lock_low(lock, pass, file_name, line)) { @@ -745,7 +745,7 @@ rw_lock_x_lock_func( /* Waiters must be set before checking lock_word, to ensure signal is sent. This could lead to a few unnecessary wake-up signals. */ - my_atomic_fas32((int32*) &lock->waiters, 1); + my_atomic_fas32_explicit(&lock->waiters, 1, MY_MEMORY_ORDER_ACQUIRE); if (rw_lock_x_lock_low(lock, pass, file_name, line)) { sync_array_free_cell(sync_arr, cell); @@ -850,7 +850,7 @@ rw_lock_sx_lock_func( /* Waiters must be set before checking lock_word, to ensure signal is sent. This could lead to a few unnecessary wake-up signals. */ - my_atomic_fas32((int32*) &lock->waiters, 1); + my_atomic_fas32_explicit(&lock->waiters, 1, MY_MEMORY_ORDER_ACQUIRE); if (rw_lock_sx_lock_low(lock, pass, file_name, line)) {