From a8654596f0371c2604c4d475422c48f4fc6a56c9 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:19 -0400 Subject: [PATCH] locking/rwsem: Enable lock event counting Add lock event counting calls so that we can track the number of lock events happening in the rwsem code. With CONFIG_LOCK_EVENT_COUNTS on and booting a 4-socket 112-thread x86-64 system, the rwsem counts after system bootup were as follows: rwsem_opt_fail=261 rwsem_opt_wlock=50636 rwsem_rlock=445 rwsem_rlock_fail=0 rwsem_rlock_fast=22 rwsem_rtrylock=810144 rwsem_sleep_reader=441 rwsem_sleep_writer=310 rwsem_wake_reader=355 rwsem_wake_writer=2335 rwsem_wlock=261 rwsem_wlock_fail=0 rwsem_wtrylock=20583 It can be seen that most of the lock acquisitions in the slowpath were write-locks in the optimistic spinning code path with no sleeping at all. For this system, over 97% of the locks are acquired via optimistic spinning. It illustrates the importance of optimistic spinning in improving the performance of rwsem. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Link: http://lkml.kernel.org/r/20190404174320.22416-11-longman@redhat.com Signed-off-by: Ingo Molnar --- arch/Kconfig | 1 - kernel/locking/lock_events_list.h | 17 +++++++++++++++++ kernel/locking/rwsem-xadd.c | 11 +++++++++++ kernel/locking/rwsem.h | 4 ++++ 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index 28c0f1ad80d7..02cb4e6a3e38 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -904,7 +904,6 @@ config ARCH_USE_MEMREMAP_PROT config LOCK_EVENT_COUNTS bool "Locking event counts collection" depends on DEBUG_FS - depends on QUEUED_SPINLOCKS ---help--- Enable light-weight counting of various locking related events in the system with minimal performance impact. This reduces diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h index 8b4d2e180475..ad7668cfc9da 100644 --- a/kernel/locking/lock_events_list.h +++ b/kernel/locking/lock_events_list.h @@ -48,3 +48,20 @@ LOCK_EVENT(lock_use_node3) /* # of locking ops that use 3rd percpu node */ LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */ LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */ #endif /* CONFIG_QUEUED_SPINLOCKS */ + +/* + * Locking events for rwsem + */ +LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */ +LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */ +LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */ +LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */ +LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */ +LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */ +LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */ +LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */ +LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */ +LOCK_EVENT(rwsem_rtrylock) /* # of read trylock calls */ +LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */ +LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */ +LOCK_EVENT(rwsem_wtrylock) /* # of write trylock calls */ diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index f6198e1a58f6..6b3ee9948bf1 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -147,6 +147,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, * will notice the queued writer. */ wake_q_add(wake_q, waiter->task); + lockevent_inc(rwsem_wake_writer); } return; @@ -214,6 +215,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, } adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; + lockevent_cond_inc(rwsem_wake_reader, woken); if (list_empty(&sem->wait_list)) { /* hit end of list above */ adjustment -= RWSEM_WAITING_BIAS; @@ -265,6 +267,7 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) if (atomic_long_try_cmpxchg_acquire(&sem->count, &count, count + RWSEM_ACTIVE_WRITE_BIAS)) { rwsem_set_owner(sem); + lockevent_inc(rwsem_opt_wlock); return true; } } @@ -389,6 +392,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) osq_unlock(&sem->osq); done: preempt_enable(); + lockevent_cond_inc(rwsem_opt_fail, !taken); return taken; } @@ -436,6 +440,7 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) if (atomic_long_read(&sem->count) >= 0) { raw_spin_unlock_irq(&sem->wait_lock); rwsem_set_reader_owned(sem); + lockevent_inc(rwsem_rlock_fast); return sem; } adjustment += RWSEM_WAITING_BIAS; @@ -472,9 +477,11 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) break; } schedule(); + lockevent_inc(rwsem_sleep_reader); } __set_current_state(TASK_RUNNING); + lockevent_inc(rwsem_rlock); return sem; out_nolock: list_del(&waiter.list); @@ -482,6 +489,7 @@ out_nolock: atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); raw_spin_unlock_irq(&sem->wait_lock); __set_current_state(TASK_RUNNING); + lockevent_inc(rwsem_rlock_fail); return ERR_PTR(-EINTR); } @@ -575,6 +583,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) goto out_nolock; schedule(); + lockevent_inc(rwsem_sleep_writer); set_current_state(state); } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK); @@ -583,6 +592,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) __set_current_state(TASK_RUNNING); list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); + lockevent_inc(rwsem_wlock); return ret; @@ -596,6 +606,7 @@ out_nolock: __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); wake_up_q(&wake_q); + lockevent_inc(rwsem_wlock_fail); return ERR_PTR(-EINTR); } diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 3059a2dc39f8..37db17890e36 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -23,6 +23,8 @@ * is involved. Ideally we would like to track all the readers that own * a rwsem, but the overhead is simply too big. */ +#include "lock_events.h" + #define RWSEM_READER_OWNED (1UL << 0) #define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) @@ -200,6 +202,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) */ long tmp = RWSEM_UNLOCKED_VALUE; + lockevent_inc(rwsem_rtrylock); do { if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, tmp + RWSEM_ACTIVE_READ_BIAS)) { @@ -241,6 +244,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) { long tmp; + lockevent_inc(rwsem_wtrylock); tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); if (tmp == RWSEM_UNLOCKED_VALUE) { -- 2.11.0