From e0528a5fb27e58a9da8f1746bcf2dc778eb50130 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 22 Jan 2015 16:57:52 +1100 Subject: [PATCH 1/5] Add error message if ref count drops below zero --- src/stratifier.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/stratifier.c b/src/stratifier.c index d5c052d4..927d7374 100644 --- a/src/stratifier.c +++ b/src/stratifier.c @@ -1104,11 +1104,18 @@ static stratum_instance_t *ref_instance_by_id(sdata_t *sdata, int64_t id) } /* Decrease the reference count of instance. */ -static void __dec_instance_ref(stratum_instance_t *instance) +static void ___dec_instance_ref(stratum_instance_t *instance, const char *file, const char *func, + const int line) { instance->ref--; + if (unlikely(instance->ref < 0)) { + LOGERR("Instance ref count dropped below zero from %s %s:%d", file, func, line); + instance->ref = 0; + } } +#define __dec_instance_ref(instance) ___dec_instance_ref(instance, __FILE__, __func__, __LINE__) + static void dec_instance_ref(sdata_t *sdata, stratum_instance_t *instance) { ck_wlock(&sdata->instance_lock); From c68b30c08edcfd463504c79d0b90e4362d2e3b83 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 22 Jan 2015 17:07:01 +1100 Subject: [PATCH 2/5] Grab the caller of dec_instance_ref for debugging --- src/stratifier.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/stratifier.c b/src/stratifier.c index 927d7374..d837df18 100644 --- a/src/stratifier.c +++ b/src/stratifier.c @@ -1104,25 +1104,19 @@ static stratum_instance_t *ref_instance_by_id(sdata_t *sdata, int64_t id) } /* Decrease the reference count of instance. */ -static void ___dec_instance_ref(stratum_instance_t *instance, const char *file, const char *func, - const int line) +static void _dec_instance_ref(sdata_t *sdata, stratum_instance_t *instance, const char *file, + const char *func, const int line) { - instance->ref--; - if (unlikely(instance->ref < 0)) { + ck_wlock(&sdata->instance_lock); + if (unlikely(--instance->ref < 0)) { LOGERR("Instance ref count dropped below zero from %s %s:%d", file, func, line); instance->ref = 0; } -} - -#define __dec_instance_ref(instance) ___dec_instance_ref(instance, __FILE__, __func__, __LINE__) - -static void dec_instance_ref(sdata_t *sdata, stratum_instance_t *instance) -{ - ck_wlock(&sdata->instance_lock); - __dec_instance_ref(instance); ck_wunlock(&sdata->instance_lock); } +#define dec_instance_ref(sdata, instance) _dec_instance_ref(sdata, instance, __FILE__, __func__, __LINE__) + /* Enter with write instance_lock held */ static stratum_instance_t *__stratum_add_instance(ckpool_t *ckp, int64_t id, int server) { From e11cb5c57e66afa3bb3eb61460c8b97b6aaa7d81 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 22 Jan 2015 17:45:02 +1100 Subject: [PATCH 3/5] Drop clients that are holding a reference count only after they've dropped it --- src/stratifier.c | 60 ++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/src/stratifier.c b/src/stratifier.c index d837df18..a8347a64 100644 --- a/src/stratifier.c +++ b/src/stratifier.c @@ -244,6 +244,7 @@ struct stratum_instance { char address[INET6_ADDRSTRLEN]; bool subscribed; bool authorised; + bool dropped; bool idle; int reject; /* Indicator that this client is having a run of rejects * or other problem and should be dropped lazily if @@ -1103,6 +1104,32 @@ static stratum_instance_t *ref_instance_by_id(sdata_t *sdata, int64_t id) return instance; } +static void __drop_client(sdata_t *sdata, stratum_instance_t *client, user_instance_t *instance, + int64_t id) +{ + stratum_instance_t *old_client = NULL; + + HASH_DEL(sdata->stratum_instances, client); + if (instance) + DL_DELETE(instance->instances, client); + HASH_FIND(hh, sdata->disconnected_instances, &client->enonce1_64, sizeof(uint64_t), old_client); + /* Only keep around one copy of the old client in server mode */ + if (!client->ckp->proxy && !old_client && client->enonce1_64 && client->authorised) { + LOGNOTICE("Disconnecting client %ld %s %s", id, client->workername, + client->dropped ? "lazily" : ""); + HASH_ADD(hh, sdata->disconnected_instances, enonce1_64, sizeof(uint64_t), client); + sdata->stats.disconnected++; + client->disconnected_time = time(NULL); + } else { + if (client->workername) + LOGNOTICE("Dropping client %ld %s %s", id, client->workername, + client->dropped ? "lazily" : ""); + else + LOGINFO("Dropping workerless client %ld %s", id, client->dropped ? "lazily" : ""); + __add_dead(sdata, client); + } +} + /* Decrease the reference count of instance. */ static void _dec_instance_ref(sdata_t *sdata, stratum_instance_t *instance, const char *file, const char *func, const int line) @@ -1112,6 +1139,10 @@ static void _dec_instance_ref(sdata_t *sdata, stratum_instance_t *instance, cons LOGERR("Instance ref count dropped below zero from %s %s:%d", file, func, line); instance->ref = 0; } + /* See if there are any instances that were dropped that could not be + * moved due to holding a reference and drop them now. */ + if (unlikely(instance->dropped && !instance->ref)) + __drop_client(sdata, instance, instance->user_instance, instance->id); ck_wunlock(&sdata->instance_lock); } @@ -1261,33 +1292,18 @@ static void drop_client(sdata_t *sdata, int64_t id) ck_wlock(&sdata->instance_lock); client = __instance_by_id(sdata, id); - if (client && likely(!client->ref)) { - stratum_instance_t *old_client = NULL; - + if (client) { instance = client->user_instance; if (client->authorised) { dec = true; - client->authorised = false; ckp = client->ckp; } - - HASH_DEL(sdata->stratum_instances, client); - if (instance) - DL_DELETE(instance->instances, client); - HASH_FIND(hh, sdata->disconnected_instances, &client->enonce1_64, sizeof(uint64_t), old_client); - /* Only keep around one copy of the old client in server mode */ - if (!client->ckp->proxy && !old_client && client->enonce1_64 && dec) { - LOGNOTICE("Disconnecting client %ld %s", client->id, client->workername); - HASH_ADD(hh, sdata->disconnected_instances, enonce1_64, sizeof(uint64_t), client); - sdata->stats.disconnected++; - client->disconnected_time = time(NULL); - } else { - if (client->workername) - LOGNOTICE("Dropping client %ld %s", client->id, client->workername); - else - LOGINFO("Dropping workerless client %ld", client->id); - __add_dead(sdata, client); - } + /* If the client is still holding a reference, don't drop them + * now but wait till the reference is dropped */ + if (likely(!client->ref)) + __drop_client(sdata, client, instance, id); + else + client->dropped = true; } /* Old disconnected instances will not have any valid shares so remove From 1624d527b9de84a55ed00da1f84759a2d1523381 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 23 Jan 2015 15:23:07 +1100 Subject: [PATCH 4/5] Use a portable boolean to tell us when ckdb is offline and change it under lock --- src/stratifier.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/src/stratifier.c b/src/stratifier.c index a8347a64..839e70eb 100644 --- a/src/stratifier.c +++ b/src/stratifier.c @@ -286,6 +286,8 @@ struct stratifier_data { /* Serialises sends/receives to ckdb if possible */ pthread_mutex_t ckdb_lock; + bool ckdb_offline; + /* Variable length enonce1 always refers back to a u64 */ union { uint64_t u64; @@ -3326,9 +3328,33 @@ static void parse_ckdb_cmd(ckpool_t __maybe_unused *ckp, const char *cmd) json_decref(val); } +/* Test a value under lock and set it, returning the original value */ +static bool test_and_set(bool *val, pthread_mutex_t *lock) +{ + bool ret; + + mutex_lock(lock); + ret = *val; + *val = true; + mutex_unlock(lock); + + return ret; +} + +static bool test_and_clear(bool *val, pthread_mutex_t *lock) +{ + bool ret; + + mutex_lock(lock); + ret = *val; + *val = false; + mutex_unlock(lock); + + return ret; +} + static void ckdbq_process(ckpool_t *ckp, char *msg) { - static bool failed = false; sdata_t *sdata = ckp->data; char *buf = NULL; @@ -3338,18 +3364,15 @@ static void ckdbq_process(ckpool_t *ckp, char *msg) mutex_unlock(&sdata->ckdb_lock); if (unlikely(!buf)) { - if (!failed) { - failed = true; + if (!test_and_set(&sdata->ckdb_offline, &sdata->ckdb_lock)) LOGWARNING("Failed to talk to ckdb, queueing messages"); - } sleep(5); } } free(msg); - if (failed) { - failed = false; + if (test_and_clear(&sdata->ckdb_offline, &sdata->ckdb_lock)) LOGWARNING("Successfully resumed talking to ckdb"); - } + /* TODO: Process any requests from ckdb that are heartbeat responses * with specific requests. */ if (likely(buf)) { From 0bd96f22e66d035b53d4192e7ef4c9d695469d20 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 23 Jan 2015 15:26:04 +1100 Subject: [PATCH 5/5] Don't queue workerstats while ckdb is offline --- src/stratifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/stratifier.c b/src/stratifier.c index 839e70eb..c69221a1 100644 --- a/src/stratifier.c +++ b/src/stratifier.c @@ -3507,6 +3507,11 @@ static void update_workerstats(ckpool_t *ckp, sdata_t *sdata) time_t now_t; ts_t ts_now; + if (sdata->ckdb_offline) { + LOGDEBUG("Not queueing workerstats due to ckdb offline"); + return; + } + if (++sdata->stats.userstats_cycle > 0x1f) sdata->stats.userstats_cycle = 0;