From ec9bb56264525d3b0bc01fbd02c7790bd49d2d41 Mon Sep 17 00:00:00 2001 From: kanoi Date: Sun, 15 May 2016 23:58:38 +1000 Subject: [PATCH] ckdb - allow the shift summarisation lock to be held longer --- src/ckdb.c | 4 +-- src/ckdb.h | 2 +- src/ckdb_data.c | 4 +-- src/ckdb_dbio.c | 2 +- src/klist.h | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 73 insertions(+), 6 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index 1a9f4e77..b856ac10 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -2101,7 +2101,7 @@ static bool setup_data() LOGWARNING("reload complete %.0fm %.3fs", min, sec); // full lock access since mark processing can occur - K_WLOCK(process_pplns_free); + K_KLONGWLOCK(process_pplns_free); K_WLOCK(workerstatus_free); K_RLOCK(sharesummary_free); @@ -3945,7 +3945,7 @@ static void summarise_blocks() ss_look.data = (void *)(&looksharesummary); // We don't want them in an indeterminate state due to pplns - K_WLOCK(process_pplns_free); + K_KLONGWLOCK(process_pplns_free); // For now, just lock all 3 K_RLOCK(sharesummary_free); diff --git a/src/ckdb.h b/src/ckdb.h index ba82aeac..7e9da3a9 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -52,7 +52,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.111" +#define CKDB_VERSION DB_VERSION"-2.112" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ diff --git a/src/ckdb_data.c b/src/ckdb_data.c index e2f9203f..f28fd314 100644 --- a/src/ckdb_data.c +++ b/src/ckdb_data.c @@ -3711,7 +3711,7 @@ bool process_pplns(int32_t height, char *blockhash, tv_t *addr_cd) * and simply avoids the problems that would cause without much more * strict locking than is used already */ - K_WLOCK(process_pplns_free); + K_KLONGWLOCK(process_pplns_free); setnow(&now); @@ -5636,7 +5636,7 @@ bool make_markersummaries(bool msg, char *by, char *code, char *inet, * payout is being generated * N.B. this is a long lock since it stores the markersummaries */ setnow(&proc_lock_stt); - K_WLOCK(process_pplns_free); + K_KLONGWLOCK(process_pplns_free); setnow(&proc_lock_got); ok = sharesummaries_to_markersummaries(conn, workmarkers, by, code, inet, &now, trf_root); diff --git a/src/ckdb_dbio.c b/src/ckdb_dbio.c index f1ec707c..8209d9ff 100644 --- a/src/ckdb_dbio.c +++ b/src/ckdb_dbio.c @@ -6398,7 +6398,7 @@ K_ITEM *payouts_full_expire(PGconn *conn, int64_t payoutid, tv_t *now, bool lock // If not already done before calling if (lock) - K_WLOCK(process_pplns_free); + K_KLONGWLOCK(process_pplns_free); // This will be rare so a full lock is best K_WLOCK(payouts_free); diff --git a/src/klist.h b/src/klist.h index c1712dc4..06b92e02 100644 --- a/src/klist.h +++ b/src/klist.h @@ -176,6 +176,66 @@ extern K_LISTS *all_klists; */ #define K_STORE K_LIST +// Extended ck wlock to allow >1 minute +#define SINGLE_TIMEOUT_S 10 +/* 5mins - should never happen, but the longest lock: shift summarisation, + * will get slower over time as the share rate rises + * Currently, only the code that uses the process_pplns_free lock, + * uses the k_longwlock function to acquire the lock, so that CKDB doesn't + * exit if a shift summarisation takes longer than the normal timeout limit + * Nothing else should need to */ +#define TIMEOUT_RETRIES 30 +static inline int wr_timedlock(pthread_rwlock_t *lock, int timeout) +{ + tv_t now; + ts_t abs; + int ret; + + tv_time(&now); + tv_to_ts(&abs, &now); + abs.tv_sec += timeout; + + ret = pthread_rwlock_timedwrlock(lock, &abs); + + return ret; +} + +static inline void k_longwlock(cklock_t *lock, KLIST_FFL_ARGS) +{ + int ret, retries = 0; + +retrym: + ret = _mutex_timedlock(&(lock->mutex), SINGLE_TIMEOUT_S, file, func, line); + if (unlikely(ret)) { + if (likely(ret == ETIMEDOUT)) { + LOGERR("WARNING: Prolonged mutex longlock contention from %s %s:%d, held by %s %s:%d", + file, func, line, lock->mutex.file, lock->mutex.func, lock->mutex.line); + if (++retries < TIMEOUT_RETRIES) + goto retrym; + quitfrom(1, file, func, line, "FAILED TO GRAB LONGMUTEX!"); + } + quitfrom(1, file, func, line, "WTF MUTEX ERROR ON LONGLOCK!"); + } + + retries = 0; +retry: + ret = wr_timedlock(&(lock->rwlock.rwlock), SINGLE_TIMEOUT_S); + if (unlikely(ret)) { + if (likely(ret == ETIMEDOUT)) { + LOGERR("WARNING: Prolonged longwrite lock contention from %s %s:%d, held by %s %s:%d", + file, func, line, lock->rwlock.file, lock->rwlock.func, lock->rwlock.line); + if (++retries < TIMEOUT_RETRIES) + goto retry; + quitfrom(1, file, func, line, "FAILED TO GRAB LONGWRITE LOCK!"); + } + quitfrom(1, file, func, line, "WTF ERROR ON LONGWRITE LOCK!"); + } + lock->rwlock.file = file; + lock->rwlock.func = func; + lock->rwlock.line = line; +} +#define ck_KLONGW(_lock) k_longwlock(_lock, __FILE__, __func__, __LINE__) + #if LOCK_CHECK #define LOCK_MAYBE /* The simple lock_check_init check is in case someone incorrectly changes ckdb.c ... @@ -407,6 +467,8 @@ extern K_LISTS *all_klists; #define CHECK_WLOCK(_list) CHECK_LOCK(_list, wlock, \ LOCK_MODE_LOCK, LOCK_TYPE_WRITE) +#define CHECK_KLONGWLOCK(_list) CHECK_LOCK(_list, KLONGW, \ + LOCK_MODE_LOCK, LOCK_TYPE_WRITE) #define CHECK_WUNLOCK(_list) CHECK_LOCK(_list, wunlock, \ LOCK_MODE_UNLOCK, LOCK_TYPE_WRITE) #define CHECK_RLOCK(_list) CHECK_LOCK(_list, rlock, \ @@ -535,6 +597,7 @@ static inline K_ITEM *list_rtail(K_LIST *list) lock_check_init = true; \ } while (0) #define CHECK_WLOCK(_list) ck_wlock((_list)->lock) +#define CHECK_KLONGWLOCK(_list) ck_KLONGW((_list)->lock) #define CHECK_WUNLOCK(_list) ck_wunlock((_list)->lock) #define CHECK_RLOCK(_list) ck_rlock((_list)->lock) #define CHECK_RUNLOCK(_list) ck_runlock((_list)->lock) @@ -563,6 +626,10 @@ static inline K_ITEM *list_rtail(K_LIST *list) CHECK_lock(_list); \ CHECK_WLOCK(_list); \ } while (0) +#define K_KLONGWLOCK(_list) do { \ + CHECK_lock(_list); \ + CHECK_KLONGWLOCK(_list); \ + } while (0) #define K_WUNLOCK(_list) do { \ CHECK_lock(_list); \ CHECK_WUNLOCK(_list); \