From db77dfc7889ad8f34ab1a79dbe36002cd693398e Mon Sep 17 00:00:00 2001 From: kanoi Date: Tue, 16 Aug 2016 13:54:26 +1000 Subject: [PATCH 01/23] ckdb - make share processing thread safe and enable a cmd socket during key_update --- src/ckdb.c | 715 ++++++++++++++++++++++++++++++------------------ src/ckdb.h | 33 +-- src/ckdb_cmd.c | 14 +- src/ckdb_data.c | 173 ++++++++---- src/ckdb_dbio.c | 231 +++++++++++----- 5 files changed, 739 insertions(+), 427 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index 77b785f0..3f4f9350 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -117,8 +117,15 @@ static bool blistener_using_data; static bool breakdown_using_data; static bool replier_using_data; +// Define the array size for thread data +#define THREAD_LIMIT 99 +/* Use -Q to set it higher + * Setting it higher can degrade performance if the server can't + * handle the extra locking or is swapping */ +static int queue_threads = 1; // -B to override calculated value static int breakdown_threads = -1; +#define BREAKDOWN_RATIO 3 static int reload_breakdown_count = 0; static int cmd_breakdown_count = 0; /* Lock for access to *breakdown_count @@ -275,6 +282,10 @@ int64_t confirm_last_workinfoid; #define WORKINFO_AGE 660 static tv_t reload_timestamp; +// Shared by threads - accessed under breakqueue_free lock +static uint64_t reload_processed = 0; +// Shared by threads - accessed under workqueue_free lock +static uint64_t workqueue_proc0 = 0, workqueue_proc1 = 0; /* Allow overriding the workinfoid range used in the db load of * workinfo and sharesummary */ @@ -297,7 +308,7 @@ bool prereload = true; // Different input data handling bool reloading = false; // Start marks processing during a larger reload -static bool reloaded_N_files = false; +bool reloaded_N_files = false; // Data load is complete bool startup_complete = false; // Set to true when pool0 completes, pool0 = socket data during reload @@ -389,6 +400,9 @@ K_STORE *logqueue_store; K_LIST *msgline_free; K_STORE *msgline_store; +// This can be set with the -q option +static int reload_queue_limit = RELOAD_QUEUE_LIMIT; + // BREAKQUEUE K_LIST *breakqueue_free; K_STORE *reload_breakqueue_store; @@ -498,6 +512,8 @@ K_TREE *users_root; K_TREE *userid_root; K_LIST *users_free; K_STORE *users_store; +// Emulate a list for lock checking +K_LIST *users_db_free; // USERATTS K_TREE *useratts_root; @@ -556,6 +572,8 @@ tv_t last_bc; // current network diff double current_ndiff; bool txn_tree_store = true; +// avoid trying to run 2 ages at the same time +bool workinfo_age_lock = false; // SHARES shares.id.json={...} K_TREE *shares_root; @@ -3716,7 +3734,7 @@ static void *breaker(void *arg) // Is this a reload thread or a cmd thread? reload = *(bool *)(arg); if (reload) { - queue_limit = RELOAD_QUEUE_LIMIT; + queue_limit = reload_queue_limit; queue_sleep = RELOAD_QUEUE_SLEEP_MS; when_add.tv_sec = RELOAD_QUEUE_SLEEP_MS / 1000; when_add.tv_nsec = (RELOAD_QUEUE_SLEEP_MS % 1000) * 1000000; @@ -4353,7 +4371,7 @@ static void make_a_shift_mark() int32_t prev_height; char wi_bits[TXT_SML+1]; bool was_block = false, ok, oc_look = true; - char cd_buf[DATE_BUFSIZ], cd_buf2[DATE_BUFSIZ], cd_buf3[DATE_BUFSIZ]; + char cd_buf[DATE_BUFSIZ], cd_buf2[DATE_BUFSIZ]; int used_wid; /* If there are no CURRENT marks, make the first one by @@ -4433,11 +4451,10 @@ static void make_a_shift_mark() if (ss_item) { tv_to_buf(&(sharesummary->lastshareacc), cd_buf, sizeof(cd_buf)); tv_to_buf(&(sharesummary->lastshare), cd_buf2, sizeof(cd_buf2)); - tv_to_buf(&(sharesummary->createdate), cd_buf3, sizeof(cd_buf3)); - LOGDEBUG("%s() last sharesummary %s/%s/%"PRId64"/%s/%s/%s", + LOGDEBUG("%s() last sharesummary %s/%s/%"PRId64"/%s/%s", __func__, sharesummary->complete, sharesummary->workername, - ss_age_wid, cd_buf, cd_buf2, cd_buf3); + ss_age_wid, cd_buf, cd_buf2); } LOGDEBUG("%s() age sharesummary limit wid %"PRId64, __func__, ss_age_wid); @@ -4676,14 +4693,12 @@ static void make_a_shift_mark() cd_buf, sizeof(cd_buf)); tv_to_buf(&(sharesummary->lastshare), cd_buf2, sizeof(cd_buf2)); - tv_to_buf(&(sharesummary->createdate), - cd_buf3, sizeof(cd_buf3)); LOGEMERG("%s() ERR unaged sharesummary " - "%s/%s/%"PRId64"/%s/%s/%s", + "%s/%s/%"PRId64"/%s/%s", __func__, sharesummary->complete, sharesummary->workername, sharesummary->workinfoid, - cd_buf, cd_buf2, cd_buf3); + cd_buf, cd_buf2); return; } } @@ -5367,6 +5382,34 @@ static void *process_socket(void *arg) DATA_BREAKQUEUE(bq, bq_item); DATA_MSGLINE(msgline, bq->ml_item); + + // Limited commands available during key_update + if (key_update) { + switch (bq->cmdnum) { + case CMD_TERMINATE: + case CMD_PING: + case CMD_VERSION: + case CMD_LOGLEVEL: + case CMD_FLUSH: + case CMD_STATS: + case CMD_SHSTA: + case CMD_CHKPASS: + case CMD_GETATTS: + case CMD_HOMEPAGE: + break; + default: + snprintf(reply, sizeof(reply), + "%s.%ld.unavailable.%s", + msgline->id, + bq->now.tv_sec, + msgline->cmd); + setnow(&(msgline->processed)); + ckdb_unix_msg(REPLIER_CMD, bq->sockd, + reply, msgline, true); + goto skippy; + } + } + if (SEQALL_LOG) { K_ITEM *seqall; if (msgline->trf_root) { @@ -5378,6 +5421,7 @@ static void *process_socket(void *arg) } } } + replied = btc = false; switch (bq->cmdnum) { case CMD_AUTH: @@ -5584,7 +5628,7 @@ static void *process_socket(void *arg) case CMD_BLOCKSTATUS: case CMD_MARKS: case CMD_QUERY: - if (!startup_complete) { + if (!startup_complete && !key_update) { snprintf(reply, sizeof(reply), "%s.%ld.loading.%s", msgline->id, @@ -5729,6 +5773,9 @@ static void *process_socket(void *arg) msgline, true); break; } + +skippy: + if (bq->sockd >= 0) dec_sockd = true; else @@ -5905,28 +5952,167 @@ static void *socketer(void *arg) return NULL; } -static void *process_reload(__maybe_unused void *arg) +static void process_reload_item(PGconn *conn, K_ITEM *bq_item) { - PGconn *conn = NULL; MSGLINE *msgline = NULL; - K_ITEM *bq_item = NULL; BREAKQUEUE *bq = NULL; enum cmd_values cmdnum; char *ans, *st = NULL; + + DATA_BREAKQUEUE(bq, bq_item); + DATA_MSGLINE(msgline, bq->ml_item); + if (SEQALL_LOG) { + K_ITEM *seqall; + if (msgline->trf_root) { + seqall = find_transfer(msgline->trf_root, SEQALL); + if (seqall) { + LOGNOTICE("%s() SEQALL %d %s", + __func__, bq->cmdnum, + transfer_data(seqall)); + } + } + } + switch (bq->cmdnum) { + // Ignore + case CMD_REPLY: + case CMD_ALERTEVENT: + case CMD_ALERTOVENT: + break; + // Shouldn't be there + case CMD_TERMINATE: + case CMD_PING: + case CMD_VERSION: + case CMD_LOGLEVEL: + case CMD_FLUSH: + // Non pool commands, shouldn't be there + case CMD_ADDUSER: + case CMD_NEWPASS: + case CMD_CHKPASS: + case CMD_2FA: + case CMD_USERSET: + case CMD_WORKERSET: + case CMD_BLOCKLIST: + case CMD_BLOCKSTATUS: + case CMD_NEWID: + case CMD_PAYMENTS: + case CMD_WORKERS: + case CMD_ALLUSERS: + case CMD_HOMEPAGE: + case CMD_GETATTS: + case CMD_SETATTS: + case CMD_EXPATTS: + case CMD_GETOPTS: + case CMD_SETOPTS: + case CMD_DSP: + case CMD_STATS: + case CMD_PPLNS: + case CMD_PPLNS2: + case CMD_PAYOUTS: + case CMD_MPAYOUTS: + case CMD_SHIFTS: + case CMD_USERSTATUS: + case CMD_MARKS: + case CMD_PSHIFT: + case CMD_SHSTA: + case CMD_USERINFO: + case CMD_BTCSET: + case CMD_QUERY: + case CMD_LOCKS: + case CMD_EVENTS: + case CMD_HIGH: + LOGERR("%s() INVALID message line %"PRIu64 + " ignored '%.42s...", + __func__, bq->count, + st = safe_text(msgline->msg)); + FREENULL(st); + break; + case CMD_HEARTBEAT: + case CMD_POOLSTAT: + case CMD_USERSTAT: + case CMD_WORKERSTAT: + case CMD_BLOCK: + if (key_update) + break; + case CMD_AUTH: + case CMD_ADDRAUTH: + if (confirm_sharesummary) + break; + case CMD_SHARELOG: + // This will return the same cmdnum or DUP + cmdnum = process_seq(msgline); + if (cmdnum != CMD_DUPSEQ) { + ans = ckdb_cmds[msgline->which_cmds].func(conn, + msgline->cmd, + msgline->id, + &(msgline->now), + by_default, + (char *)__func__, + inet_default, + &(msgline->cd), + msgline->trf_root, true); + FREENULL(ans); + } + // TODO: time stats from each msgline tv_t + break; + default: + // Force this switch to be updated if new cmds are added + quithere(1, "%s line %"PRIu64" '%s' - not " + "handled by reload", + bq->filename, bq->count, + st = safe_text_nonull(msgline->cmd)); + // Won't get here ... + FREENULL(st); + break; + } + + if (bq->ml_item) { + free_msgline_data(bq->ml_item, true, true); + K_WLOCK(msgline_free); + k_add_head(msgline_free, bq->ml_item); + K_WUNLOCK(msgline_free); + bq->ml_item = NULL; + } + free(bq->buf); +} + +static void *process_reload(__maybe_unused void *arg) +{ + pthread_t *procrel_pt; + PGconn *conn = NULL; + K_ITEM *bq_item = NULL; + char buf[128]; time_t now; - uint64_t processed = 0; + int i, *n, zeros; ts_t when, when_add; int ret; - pthread_detach(pthread_self()); + if (arg) + i = *(int *)(arg); + else { + pthread_detach(pthread_self()); - when_add.tv_sec = RELOAD_QUEUE_SLEEP_MS / 1000; - when_add.tv_nsec = (RELOAD_QUEUE_SLEEP_MS % 1000) * 1000000; + n = malloc(queue_threads * sizeof(int)); + procrel_pt = malloc(queue_threads * sizeof(*procrel_pt)); + for (i = 1; i < queue_threads; i++) { + n[i] = i; + create_pthread(&(procrel_pt[i]), process_reload, &(n[i])); + } + i = 0; - LOCK_INIT("db_procreload"); - rename_proc("db_procreload"); + LOGNOTICE("%s() starting", __func__); + } - LOGNOTICE("%s() starting", __func__); + if (queue_threads < 10) + zeros = 1; + else + zeros = (int)log10(queue_threads) + 1; + + snprintf(buf, sizeof(buf), "db_p%0*drload", zeros, i); + LOCK_INIT(buf); + rename_proc(buf); + + when_add.tv_sec = RELOAD_QUEUE_SLEEP_MS / 1000; + when_add.tv_nsec = (RELOAD_QUEUE_SLEEP_MS % 1000) * 1000000; conn = dbconnect(); now = time(NULL); @@ -5934,8 +6120,10 @@ static void *process_reload(__maybe_unused void *arg) while (!everyone_die) { K_WLOCK(breakqueue_free); bq_item = k_unlink_head(reload_done_breakqueue_store); - if (bq_item) + if (bq_item) { reload_processing++; + reload_processed++; + } K_WUNLOCK(breakqueue_free); if (!bq_item) { @@ -5958,8 +6146,6 @@ static void *process_reload(__maybe_unused void *arg) continue; } - processed++; - // Don't keep a connection for more than ~10s ... of processing if ((time(NULL) - now) > 10) { PQfinish(conn); @@ -5967,120 +6153,7 @@ static void *process_reload(__maybe_unused void *arg) now = time(NULL); } - DATA_BREAKQUEUE(bq, bq_item); - DATA_MSGLINE(msgline, bq->ml_item); - if (SEQALL_LOG) { - K_ITEM *seqall; - if (msgline->trf_root) { - seqall = find_transfer(msgline->trf_root, SEQALL); - if (seqall) { - LOGNOTICE("%s() SEQALL %d %s", - __func__, bq->cmdnum, - transfer_data(seqall)); - } - } - } - switch (bq->cmdnum) { - // Ignore - case CMD_REPLY: - case CMD_ALERTEVENT: - case CMD_ALERTOVENT: - break; - // Shouldn't be there - case CMD_TERMINATE: - case CMD_PING: - case CMD_VERSION: - case CMD_LOGLEVEL: - case CMD_FLUSH: - // Non pool commands, shouldn't be there - case CMD_ADDUSER: - case CMD_NEWPASS: - case CMD_CHKPASS: - case CMD_2FA: - case CMD_USERSET: - case CMD_WORKERSET: - case CMD_BLOCKLIST: - case CMD_BLOCKSTATUS: - case CMD_NEWID: - case CMD_PAYMENTS: - case CMD_WORKERS: - case CMD_ALLUSERS: - case CMD_HOMEPAGE: - case CMD_GETATTS: - case CMD_SETATTS: - case CMD_EXPATTS: - case CMD_GETOPTS: - case CMD_SETOPTS: - case CMD_DSP: - case CMD_STATS: - case CMD_PPLNS: - case CMD_PPLNS2: - case CMD_PAYOUTS: - case CMD_MPAYOUTS: - case CMD_SHIFTS: - case CMD_USERSTATUS: - case CMD_MARKS: - case CMD_PSHIFT: - case CMD_SHSTA: - case CMD_USERINFO: - case CMD_BTCSET: - case CMD_QUERY: - case CMD_LOCKS: - case CMD_EVENTS: - case CMD_HIGH: - LOGERR("%s() INVALID message line %"PRIu64 - " ignored '%.42s...", - __func__, bq->count, - st = safe_text(msgline->msg)); - FREENULL(st); - break; - case CMD_HEARTBEAT: - case CMD_POOLSTAT: - case CMD_USERSTAT: - case CMD_WORKERSTAT: - case CMD_BLOCK: - if (key_update) - break; - case CMD_AUTH: - case CMD_ADDRAUTH: - if (confirm_sharesummary) - break; - case CMD_SHARELOG: - // This will return the same cmdnum or DUP - cmdnum = process_seq(msgline); - if (cmdnum != CMD_DUPSEQ) { - ans = ckdb_cmds[msgline->which_cmds].func(conn, - msgline->cmd, - msgline->id, - &(msgline->now), - by_default, - (char *)__func__, - inet_default, - &(msgline->cd), - msgline->trf_root, true); - FREENULL(ans); - } - // TODO: time stats from each msgline tv_t - break; - default: - // Force this switch to be updated if new cmds are added - quithere(1, "%s line %"PRIu64" '%s' - not " - "handled by reload", - bq->filename, bq->count, - st = safe_text_nonull(msgline->cmd)); - // Won't get here ... - FREENULL(st); - break; - } - - if (bq->ml_item) { - free_msgline_data(bq->ml_item, true, true); - K_WLOCK(msgline_free); - k_add_head(msgline_free, bq->ml_item); - K_WUNLOCK(msgline_free); - bq->ml_item = NULL; - } - free(bq->buf); + process_reload_item(conn, bq_item); K_WLOCK(breakqueue_free); reload_processing--; @@ -6092,7 +6165,13 @@ static void *process_reload(__maybe_unused void *arg) PQfinish(conn); - LOGNOTICE("%s() exiting, processed %"PRIu64, __func__, processed); + if (!arg) { + for (i = 1; i < queue_threads; i++) + join_pthread(procrel_pt[i]); + + LOGNOTICE("%s() exiting, processed %"PRIu64, + __func__, reload_processed); + } return NULL; } @@ -6164,7 +6243,7 @@ static void reload_line(char *filename, char *buf, uint64_t count) pthread_cond_signal(&bq_reload_waitcond); mutex_unlock(&bq_reload_waitlock); - while (qcount > RELOAD_QUEUE_LIMIT) { + while (qcount > reload_queue_limit) { cksleep_ms(RELOAD_QUEUE_SLEEP_MS); K_RLOCK(breakqueue_free); qcount = reload_breakqueue_store->count; @@ -6546,129 +6625,57 @@ static void free_lost(SEQDATA *seqdata) } } -// TODO: equivalent of api_allow -static void *listener(void *arg) +static void *pqproc(void *arg) { + /* Process queued work - ensure pool0 is emptied first, + * even if there is pending pool0 data being processed by breaker() */ + static bool pool0 = true; + static tv_t wq_stt, wq_fin; + + pthread_t *queue_pt; PGconn *conn = NULL; - pthread_t log_pt; - pthread_t sock_pt; - pthread_t summ_pt; - pthread_t mark_pt; - pthread_t break_pt; K_ITEM *wq_item; time_t now = 0; - int bq, bqp, bqd, wq0count, wqcount, wqgot; - char ooo_buf[256]; - tv_t wq_stt, wq_fin; - double min, sec; + bool switch_msg = false, complete_msg; + int wqcount, wqgot; + char buf[128]; + double min, sec = 0; SEQSET *seqset = NULL; SEQDATA *seqdata; K_ITEM *ss_item; - int cpus, i; - bool reloader, cmder, pool0, switch_msg = false; - uint64_t proc0 = 0, proc1 = 0; + int i, *n, zeros; ts_t when, when_add; int ret; - pthread_detach(pthread_self()); - - when_add.tv_sec = CMD_QUEUE_SLEEP_MS / 1000; - when_add.tv_nsec = (CMD_QUEUE_SLEEP_MS % 1000) * 1000000; - - LOCK_INIT("db_plistener"); - rename_proc("db_plistener"); - - logqueue_free = k_new_list("LogQueue", sizeof(LOGQUEUE), - ALLOC_LOGQUEUE, LIMIT_LOGQUEUE, true); - logqueue_store = k_new_store(logqueue_free); - - breakqueue_free = k_new_list("BreakQueue", sizeof(BREAKQUEUE), - ALLOC_BREAKQUEUE, LIMIT_BREAKQUEUE, true); - reload_breakqueue_store = k_new_store(breakqueue_free); - reload_done_breakqueue_store = k_new_store(breakqueue_free); - cmd_breakqueue_store = k_new_store(breakqueue_free); - cmd_done_breakqueue_store = k_new_store(breakqueue_free); - -#if LOCK_CHECK - DLPRIO(logqueue, 94); - DLPRIO(breakqueue, PRIO_TERMINAL); -#endif - if (breakdown_threads <= 0) { - cpus = sysconf(_SC_NPROCESSORS_ONLN) ? : 1; - breakdown_threads = (int)(cpus / 3) ? : 1; - } - LOGWARNING("%s(): creating %d*2 breaker threads ...", - __func__, breakdown_threads); - reloader = true; - for (i = 0; i < breakdown_threads; i++) - create_pthread(&break_pt, breaker, &reloader); - cmder = false; - for (i = 0; i < breakdown_threads; i++) - create_pthread(&break_pt, breaker, &cmder); - - if (no_data_log == false) - create_pthread(&log_pt, logger, NULL); - - if (!confirm_sharesummary) - create_pthread(&sock_pt, socketer, arg); - - create_pthread(&summ_pt, summariser, NULL); - - create_pthread(&mark_pt, marker, NULL); - - plistener_using_data = true; + if (!arg) { + setnow(&wq_stt); - if (!setup_data()) { - if (!everyone_die) { - LOGEMERG("ABORTING"); - everyone_die = true; + n = malloc(queue_threads * sizeof(int)); + queue_pt = malloc(queue_threads * sizeof(*queue_pt)); + for (i = 1; i < queue_threads; i++) { + n[i] = i; + create_pthread(&(queue_pt[i]), pqproc, &(n[i])); } - goto sayonara; - } - - if (!everyone_die) { - K_RLOCK(workqueue_free); - wq0count = pool0_workqueue_store->count; - wqcount = pool_workqueue_store->count; - K_RUNLOCK(workqueue_free); - K_RLOCK(breakqueue_free); - bq = cmd_breakqueue_store->count; - bqp = cmd_processing; - bqd = cmd_done_breakqueue_store->count; - K_RUNLOCK(breakqueue_free); - - LOGWARNING("reload shares OoO %s", - ooo_status(ooo_buf, sizeof(ooo_buf))); - sequence_report(true); + } else { + i = *(int *)(arg); - LOGWARNING("%s(): ckdb ready, pool queue %d (%d/%d/%d/%d/%d)", - __func__, bq+bqp+bqd+wq0count+wqcount, - bq, bqp, bqd, wq0count, wqcount); + if (queue_threads < 10) + zeros = 1; + else + zeros = (int)log10(queue_threads) + 1; - /* Until startup_complete, the values should be ignored - * Setting them to 'now' means that they won't time out - * until after startup_complete */ - ck_wlock(&last_lock); - setnow(&last_heartbeat); - copy_tv(&last_workinfo, &last_heartbeat); - copy_tv(&last_share, &last_heartbeat); - copy_tv(&last_share_acc, &last_heartbeat); - copy_tv(&last_share_inv, &last_heartbeat); - copy_tv(&last_auth, &last_heartbeat); - ck_wunlock(&last_lock); + snprintf(buf, sizeof(buf), "db_p%0*dqproc", zeros, i); + LOCK_INIT(buf); + rename_proc(buf); + } - startup_complete = true; + when_add.tv_sec = CMD_QUEUE_SLEEP_MS / 1000; + when_add.tv_nsec = (CMD_QUEUE_SLEEP_MS % 1000) * 1000000; - setnow(&wq_stt); - conn = dbconnect(); - now = time(NULL); - wqgot = 0; - } + now = time(NULL); + conn = dbconnect(); + wqgot = 0; - LOGNOTICE("%s() processing pool0", __func__); - /* Process queued work - ensure pool0 is emptied first, - * even if there is pending pool0 data being processed by breaker() */ - pool0 = true; // Override checking until pool0 is complete wqcount = -1; while (!everyone_die) { @@ -6688,17 +6695,24 @@ static void *listener(void *arg) wq_item = k_unlink_head(pool_workqueue_store); wqcount = pool_workqueue_store->count; } + + if (wqcount == 0 && wq_stt.tv_sec != 0L) + setnow(&wq_fin); + + if (wq_item) { + if (pool0) + workqueue_proc0++; + else + workqueue_proc1++; + } K_WUNLOCK(workqueue_free); if (switch_msg) { switch_msg = false; LOGNOTICE("%s() pool0 complete, processed %"PRIu64, - __func__, proc0); + __func__, workqueue_proc0); } - if (wqcount == 0 && wq_stt.tv_sec != 0L) - setnow(&wq_fin); - /* Don't keep a connection for more than ~10s or ~10000 items * but always have a connection open */ if ((time(NULL) - now) > 10 || wqgot > 10000) { @@ -6709,24 +6723,27 @@ static void *listener(void *arg) } if (wq_item) { - if (pool0) - proc0++; - else - proc1++; wqgot++; process_queued(conn, wq_item); tick(); } + complete_msg = false; + K_WLOCK(workqueue_free); if (wqcount == 0 && wq_stt.tv_sec != 0L) { sec = tvdiff(&wq_fin, &wq_stt); - min = floor(sec / 60.0); - sec -= min * 60.0; - LOGWARNING("pool queue completed %.0fm %.3fs", min, sec); + complete_msg = true; // Used as the flag to display the message once wq_stt.tv_sec = 0L; reload_queue_complete = true; } + K_WUNLOCK(workqueue_free); + if (complete_msg) { + min = floor(sec / 60.0); + sec -= min * 60.0; + LOGWARNING("%s() pool queue completed %.0fm %.3fs", + __func__, min, sec); + } /* Checked outside lock but only changed under lock * This avoids taking out the lock repeatedly and the cleanup @@ -6771,15 +6788,127 @@ static void *listener(void *arg) } } -sayonara: + if (conn) + PQfinish(conn); + + if (!arg) { + for (i = 1; i < queue_threads; i++) + join_pthread(queue_pt[i]); + } + + return NULL; +} + +static void *listener(void *arg) +{ + pthread_t log_pt; + pthread_t sock_pt; + pthread_t summ_pt; + pthread_t mark_pt; + pthread_t break_pt; + int bq, bqp, bqd, wq0count, wqcount; + char ooo_buf[256]; + char buf[128]; + int cpus, i; + bool reloader, cmder; + + pthread_detach(pthread_self()); + + snprintf(buf, sizeof(buf), "db_p0qproc"); + LOCK_INIT(buf); + rename_proc(buf); + + logqueue_free = k_new_list("LogQueue", sizeof(LOGQUEUE), + ALLOC_LOGQUEUE, LIMIT_LOGQUEUE, true); + logqueue_store = k_new_store(logqueue_free); + + breakqueue_free = k_new_list("BreakQueue", sizeof(BREAKQUEUE), + ALLOC_BREAKQUEUE, LIMIT_BREAKQUEUE, true); + reload_breakqueue_store = k_new_store(breakqueue_free); + reload_done_breakqueue_store = k_new_store(breakqueue_free); + cmd_breakqueue_store = k_new_store(breakqueue_free); + cmd_done_breakqueue_store = k_new_store(breakqueue_free); + +#if LOCK_CHECK + DLPRIO(logqueue, 94); + DLPRIO(breakqueue, PRIO_TERMINAL); +#endif + if (breakdown_threads <= 0) { + cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (cpus < 1) + cpus = 1; + breakdown_threads = (int)(cpus / BREAKDOWN_RATIO) ? : 1; + } + LOGWARNING("%s(): creating %d*2 breaker threads ...", + __func__, breakdown_threads); + reloader = true; + for (i = 0; i < breakdown_threads; i++) + create_pthread(&break_pt, breaker, &reloader); + cmder = false; + for (i = 0; i < breakdown_threads; i++) + create_pthread(&break_pt, breaker, &cmder); + + if (no_data_log == false) + create_pthread(&log_pt, logger, NULL); + + if (!confirm_sharesummary) + create_pthread(&sock_pt, socketer, arg); + + create_pthread(&summ_pt, summariser, NULL); + + create_pthread(&mark_pt, marker, NULL); + + plistener_using_data = true; + + if (!setup_data()) { + if (!everyone_die) { + LOGEMERG("ABORTING"); + everyone_die = true; + } + } + + if (!everyone_die) { + K_RLOCK(workqueue_free); + wq0count = pool0_workqueue_store->count; + wqcount = pool_workqueue_store->count; + K_RUNLOCK(workqueue_free); + K_RLOCK(breakqueue_free); + bq = cmd_breakqueue_store->count; + bqp = cmd_processing; + bqd = cmd_done_breakqueue_store->count; + K_RUNLOCK(breakqueue_free); + + LOGWARNING("reload shares OoO %s", + ooo_status(ooo_buf, sizeof(ooo_buf))); + sequence_report(true); + + LOGWARNING("%s(): ckdb ready, pool queue %d (%d/%d/%d/%d/%d)", + __func__, bq+bqp+bqd+wq0count+wqcount, + bq, bqp, bqd, wq0count, wqcount); + + /* Until startup_complete, the values should be ignored + * Setting them to 'now' means that they won't time out + * until after startup_complete */ + ck_wlock(&last_lock); + setnow(&last_heartbeat); + copy_tv(&last_workinfo, &last_heartbeat); + copy_tv(&last_share, &last_heartbeat); + copy_tv(&last_share_acc, &last_heartbeat); + copy_tv(&last_share_inv, &last_heartbeat); + copy_tv(&last_auth, &last_heartbeat); + ck_wunlock(&last_lock); + + startup_complete = true; + + LOGNOTICE("%s() processing pool0", __func__); + pqproc(NULL); + } + LOGNOTICE("%s() exiting, pool0 %"PRIu64" pool %"PRIu64, - __func__, proc0, proc1); + __func__, workqueue_proc0, workqueue_proc1); plistener_using_data = false; - if (conn) - PQfinish(conn); - POOLINSTANCE_RESET_MSG("exiting"); return NULL; @@ -7062,14 +7191,14 @@ static void update_check(int64_t markerid_stt, int64_t markerid_fin) LOGWARNING("update complete %.0fm %.3fs", min, sec); } -static void update_keysummary() +static void update_keysummary(ckpool_t *ckp) { int64_t markerid_stt, markerid_fin; char *tmp, *minus; tv_t db_stt, db_fin; - pthread_t break_pt; + pthread_t break_pt, sock_pt; double min, sec; - bool reloader; + bool reloader, cmder; int cpus, i; // Simple value check to abort early @@ -7132,14 +7261,20 @@ static void update_keysummary() DLPRIO(breakqueue, PRIO_TERMINAL); #endif if (breakdown_threads <= 0) { - cpus = sysconf(_SC_NPROCESSORS_ONLN) ? : 1; - breakdown_threads = (int)(cpus / 3) ? : 1; + cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (cpus < 1) + cpus = 1; + breakdown_threads = (int)(cpus / BREAKDOWN_RATIO) ? : 1; } - LOGWARNING("%s(): creating %d breaker threads ...", + LOGWARNING("%s(): creating %d+1 breaker threads ...", __func__, breakdown_threads); reloader = true; for (i = 0; i < breakdown_threads; i++) create_pthread(&break_pt, breaker, &reloader); + cmder = false; + // Only needs one + for (i = 0; i < 1; i++) + create_pthread(&break_pt, breaker, &cmder); alloc_storage(); @@ -7147,6 +7282,8 @@ static void update_keysummary() setnow(&db_stt); + create_pthread(&sock_pt, socketer, &(ckp->main)); + if (!getdata1() || everyone_die) return; @@ -7773,6 +7910,8 @@ static struct option long_options[] = { { "name", required_argument, 0, 'n' }, { "dbpass", required_argument, 0, 'p' }, { "btc-pass", required_argument, 0, 'P' }, + { "reload-queue-limit", required_argument, 0, 'q' }, + { "queue-threads", required_argument, 0, 'Q' }, { "ckpool-logdir", required_argument, 0, 'r' }, { "logdir", required_argument, 0, 'R' }, { "sockdir", required_argument, 0, 's' }, @@ -7822,7 +7961,7 @@ int main(int argc, char **argv) memset(&ckp, 0, sizeof(ckp)); ckp.loglevel = LOG_NOTICE; - while ((c = getopt_long(argc, argv, "a:b:B:c:d:D:ghi:IkK:l:L:mM:n:p:P:r:R:s:S:t:Tu:U:vw:yY:", long_options, &i)) != -1) { + while ((c = getopt_long(argc, argv, "a:b:B:c:d:D:ghi:IkK:l:L:mM:n:p:P:q:Q:r:R:s:S:t:Tu:U:vw:yY:", long_options, &i)) != -1) { switch(c) { case '?': case ':': @@ -7850,11 +7989,11 @@ int main(int argc, char **argv) case 'B': { int bt = atoi(optarg); - if (bt < 1) { + if (bt < 1 || bt > THREAD_LIMIT) { quit(1, "Invalid breakdown " "thread count %d " - "- must be > 0", - bt); + "- must be >0 and <=%d", + bt, THREAD_LIMIT); } breakdown_threads = bt; } @@ -7985,6 +8124,29 @@ int main(int argc, char **argv) while (*kill) *(kill++) = '\0'; break; + case 'q': + { + int rql = atoi(optarg); + if (rql < 1) { + quit(1, "Invalid reload queue " + "limit %d - must be > 0", + rql); + } + reload_queue_limit = rql; + } + break; + case 'Q': + { + int qt = atoi(optarg); + if (qt < 1 || qt > THREAD_LIMIT) { + quit(1, "Invalid queue " + "thread count %d " + "- must be >0 and <=%d", + qt, THREAD_LIMIT); + } + queue_threads = qt; + } + break; case 'r': restorefrom = strdup(optarg); break; @@ -8156,11 +8318,13 @@ int main(int argc, char **argv) // Emulate a list for lock checking process_pplns_free = k_lock_only_list("ProcessPPLNS"); workers_db_free = k_lock_only_list("WorkersDB"); + users_db_free = k_lock_only_list("UsersDB"); event_limits_free = k_lock_only_list("EventLimits"); #if LOCK_CHECK DLPRIO(process_pplns, 99); DLPRIO(workers_db, 98); + DLPRIO(users_db, 97); DLPRIO(event_limits, 46); // events-2 #endif @@ -8173,7 +8337,12 @@ int main(int argc, char **argv) } if (key_update) { - update_keysummary(); + ckp.main.sockname = strdup("klistener"); + write_namepid(&ckp.main); + create_process_unixsock(&ckp.main); + fcntl(ckp.main.us.sockd, F_SETFD, FD_CLOEXEC); + + update_keysummary(&ckp); everyone_die = true; } else if (confirm_sharesummary) { // TODO: add a system lock to stop running 2 at once? diff --git a/src/ckdb.h b/src/ckdb.h index 8f8b760d..20796597 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -55,7 +55,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.305" +#define CKDB_VERSION DB_VERSION"-2.400" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ @@ -338,6 +338,8 @@ extern bool db_load_complete; extern bool prereload; // Different input data handling extern bool reloading; +// Start marks processing during a larger reload +extern bool reloaded_N_files; // Data load is complete extern bool startup_complete; // Tell everyone to die @@ -1624,6 +1626,8 @@ extern K_TREE *users_root; extern K_TREE *userid_root; extern K_LIST *users_free; extern K_STORE *users_store; +// Emulate a list for lock checking +extern K_LIST *users_db_free; // USERATTS typedef struct useratts { @@ -1890,6 +1894,8 @@ extern tv_t last_bc; // current network diff extern double current_ndiff; extern bool txn_tree_store; +// avoid trying to run 2 ages at the same time +extern bool workinfo_age_lock; // Offset in binary coinbase1 of the block number #define BLOCKNUM_OFFSET 42 @@ -2013,7 +2019,6 @@ typedef struct sharesummary { tv_t lastshareacc; double lastdiffacc; char complete[TXT_FLAG+1]; - MODIFYDATECONTROLPOINTERS; } SHARESUMMARY; /* After this many shares added, we need to update the DB record @@ -2697,7 +2702,6 @@ typedef struct keysharesummary { tv_t lastshareacc; double lastdiffacc; char complete[TXT_FLAG+1]; - SIMPLEDATECONTROLPOINTERS; } KEYSHARESUMMARY; #define ALLOC_KEYSHARESUMMARY 1000 @@ -3142,10 +3146,9 @@ extern cmp_t cmp_workinfo_height(K_ITEM *a, K_ITEM *b); #define find_workinfo(_wid, _ctx) _find_workinfo(_wid, false, _ctx); extern K_ITEM *_find_workinfo(int64_t workinfoid, bool gotlock, K_TREE_CTX *ctx); extern K_ITEM *next_workinfo(int64_t workinfoid, K_TREE_CTX *ctx); -extern bool workinfo_age(int64_t workinfoid, char *poolinstance, char *by, - char *code, char *inet, tv_t *cd, tv_t *ss_first, - tv_t *ss_last, int64_t *ss_count, int64_t *s_count, - int64_t *s_diff); +extern bool workinfo_age(int64_t workinfoid, char *poolinstance, tv_t *cd, + tv_t *ss_first, tv_t *ss_last, int64_t *ss_count, + int64_t *s_count, int64_t *s_diff); extern double coinbase_reward(int32_t height); extern double workinfo_pps(K_ITEM *w_item, int64_t workinfoid); extern cmp_t cmp_shares(K_ITEM *a, K_ITEM *b); @@ -3166,8 +3169,7 @@ extern void zero_sharesummary(SHARESUMMARY *row); extern K_ITEM *_find_sharesummary(int64_t userid, char *workername, int64_t workinfoid, bool pool); extern K_ITEM *find_last_sharesummary(int64_t userid, char *workername); -extern void auto_age_older(int64_t workinfoid, char *poolinstance, char *by, - char *code, char *inet, tv_t *cd); +extern void auto_age_older(int64_t workinfoid, char *poolinstance, tv_t *cd); #define dbhash2btchash(_hash, _buf, _siz) \ _dbhash2btchash(_hash, _buf, _siz, WHERE_FFL_HERE) void _dbhash2btchash(char *hash, char *buf, size_t siz, WHERE_FFL_ARGS); @@ -3411,16 +3413,11 @@ extern bool sharesummaries_to_markersummaries(PGconn *conn, WORKMARKERS *workmar tv_t *cd, K_TREE *trf_root); extern bool delete_markersummaries(PGconn *conn, WORKMARKERS *wm); extern char *ooo_status(char *buf, size_t siz); -#define sharesummary_update(_s_row, _e_row, _by, _code, _inet, _cd) \ - _sharesummary_update(_s_row, _e_row, _by, _code, _inet, _cd, \ - WHERE_FFL_HERE) -extern bool _sharesummary_update(SHARES *s_row, SHAREERRORS *e_row, char *by, - char *code, char *inet, tv_t *cd, +#define sharesummary_update(_s_row, _e_row, _cd) \ + _sharesummary_update(_s_row, _e_row, _cd, WHERE_FFL_HERE) +extern bool _sharesummary_update(SHARES *s_row, SHAREERRORS *e_row, tv_t *cd, WHERE_FFL_ARGS); -#define sharesummary_age(_ss_item, _by, _code, _inet, _cd) \ - _sharesummary_age(_ss_item, _by, _code, _inet, _cd, WHERE_FFL_HERE) -extern bool _sharesummary_age(K_ITEM *ss_item, char *by, char *code, char *inet, - tv_t *cd, WHERE_FFL_ARGS); +extern bool sharesummary_age(K_ITEM *ss_item); extern bool keysharesummary_age(K_ITEM *kss_item); extern bool sharesummary_fill(PGconn *conn); extern bool blocks_stats(PGconn *conn, int32_t height, char *blockhash, diff --git a/src/ckdb_cmd.c b/src/ckdb_cmd.c index ee34e613..dfe8b576 100644 --- a/src/ckdb_cmd.c +++ b/src/ckdb_cmd.c @@ -2865,18 +2865,20 @@ seconf: } ok = workinfo_age(workinfoid, transfer_data(i_poolinstance), - by, code, inet, cd, &ss_first, &ss_last, - &ss_count, &s_count, &s_diff); + cd, &ss_first, &ss_last, &ss_count, &s_count, + &s_diff); if (!ok) { LOGERR("%s(%s) %s.failed.DATA", __func__, cmd, id); return strdup("failed.DATA"); } else { - /* Don't slow down the reload - do them later */ - if (!reloading || key_update) { + /* Don't slow down the reload - do them later, + * unless it's a long reload since: + * Any pool restarts in the reload data will cause + * unaged workinfos and thus would stop marker() */ + if (!reloading || key_update || reloaded_N_files) { // Aging is a queued item thus the reply is ignored auto_age_older(workinfoid, - transfer_data(i_poolinstance), - by, code, inet, cd); + transfer_data(i_poolinstance), cd); } } LOGDEBUG("%s.ok.aged %"PRId64, id, workinfoid); diff --git a/src/ckdb_data.c b/src/ckdb_data.c index c8702d67..20c6d88e 100644 --- a/src/ckdb_data.c +++ b/src/ckdb_data.c @@ -89,12 +89,6 @@ void free_sharesummary_data(K_ITEM *item) DATA_SHARESUMMARY(sharesummary, item); LIST_MEM_SUB(sharesummary_free, sharesummary->workername); FREENULL(sharesummary->workername); - SET_CREATEBY(sharesummary_free, sharesummary->createby, EMPTY); - SET_CREATECODE(sharesummary_free, sharesummary->createcode, EMPTY); - SET_CREATEINET(sharesummary_free, sharesummary->createinet, EMPTY); - SET_MODIFYBY(sharesummary_free, sharesummary->modifyby, EMPTY); - SET_MODIFYCODE(sharesummary_free, sharesummary->modifycode, EMPTY); - SET_MODIFYINET(sharesummary_free, sharesummary->modifyinet, EMPTY); } void free_optioncontrol_data(K_ITEM *item) @@ -128,9 +122,6 @@ void free_keysharesummary_data(K_ITEM *item) DATA_KEYSHARESUMMARY(keysharesummary, item); LIST_MEM_SUB(keysharesummary_free, keysharesummary->key); FREENULL(keysharesummary->key); - SET_CREATEBY(keysharesummary_free, keysharesummary->createby, EMPTY); - SET_CREATECODE(keysharesummary_free, keysharesummary->createcode, EMPTY); - SET_CREATEINET(keysharesummary_free, keysharesummary->createinet, EMPTY); } void free_keysummary_data(K_ITEM *item) @@ -2144,6 +2135,8 @@ static void discard_shares(int64_t *shares_tot, int64_t *shares_dumped, SHARES lookshares, *shares; K_TREE_CTX s_ctx[1]; char error[1024]; + bool multiple = false; + int64_t curr_userid; error[0] = '\0'; INIT_SHARES(&s_look); @@ -2154,6 +2147,7 @@ static void discard_shares(int64_t *shares_tot, int64_t *shares_dumped, DATE_ZERO(&(lookshares.createdate)); s_look.data = (void *)(&lookshares); + curr_userid = userid; K_WLOCK(shares_free); s_item = find_after_in_ktree(shares_root, &s_look, s_ctx); while (s_item) { @@ -2167,37 +2161,61 @@ static void discard_shares(int64_t *shares_tot, int64_t *shares_dumped, break; } + // Avoid releasing the lock the first time in + if (curr_userid == DISCARD_ALL) + curr_userid = shares->userid; + + /* The shares being removed here wont be touched by any other + * code, so we don't need to hold the shares_free lock the + * whole time, since that would slow down incoming share + * processing too much - this only affects DISCARD_ALL + * TODO: delete the shares when they are summarised in the + * sharesummary */ + if (shares->userid != curr_userid) { + K_WUNLOCK(shares_free); + curr_userid = shares->userid; + K_WLOCK(shares_free); + } + (*shares_tot)++; if (shares->errn == SE_NONE) (*diff_tot) += shares->diff; + if (reloading && skipupdate) { + (*shares_dumped)++; + if (error[0]) + multiple = true; + else { + snprintf(error, sizeof(error), + "%"PRId64"/%"PRId64"/%s/%s%.0f", + shares->workinfoid, + shares->userid, + shares->workername, + (shares->errn == SE_NONE) ? "" : "*", + shares->diff); + } + } tmp_item = next_in_ktree(s_ctx); remove_from_ktree(shares_root, s_item); k_unlink_item(shares_store, s_item); - if (reloading && skipupdate) - (*shares_dumped)++; - if (reloading && skipupdate && !error[0]) { - snprintf(error, sizeof(error), - "reload found aged share: %"PRId64 - "/%"PRId64"/%s/%s%.0f", - shares->workinfoid, - shares->userid, - shares->workername, - (shares->errn == SE_NONE) ? "" : "*", - shares->diff); - } k_add_head(shares_free, s_item); s_item = tmp_item; } K_WUNLOCK(shares_free); - if (error[0]) - LOGERR("%s(): %s", __func__, error); + if (error[0]) { + LOGERR("%s(): reload found %s aged share%s%s: %s", + __func__, multiple ? "multiple" : "an", + multiple ? "s" : EMPTY, + multiple ? ", the first was" : EMPTY, + error); + } + } // Duplicates during a reload are set to not show messages -bool workinfo_age(int64_t workinfoid, char *poolinstance, char *by, char *code, - char *inet, tv_t *cd, tv_t *ss_first, tv_t *ss_last, - int64_t *ss_count, int64_t *s_count, int64_t *s_diff) +bool workinfo_age(int64_t workinfoid, char *poolinstance, tv_t *cd, + tv_t *ss_first, tv_t *ss_last, int64_t *ss_count, + int64_t *s_count, int64_t *s_diff) { K_ITEM *wi_item, ss_look, *ss_item; K_ITEM ks_look, *ks_item, *wm_item; @@ -2208,6 +2226,7 @@ bool workinfo_age(int64_t workinfoid, char *poolinstance, char *by, char *code, int64_t diff_tot; KEYSHARESUMMARY lookkeysharesummary, *keysharesummary; SHARESUMMARY looksharesummary, *sharesummary; + char complete[TXT_FLAG+1]; WORKINFO *workinfo; bool ok = false, ksok = false, skipupdate = false; @@ -2269,8 +2288,10 @@ bool workinfo_age(int64_t workinfoid, char *poolinstance, char *by, char *code, ss_look.data = (void *)(&looksharesummary); K_RLOCK(sharesummary_free); ss_item = find_after_in_ktree(sharesummary_workinfoid_root, &ss_look, ss_ctx); - K_RUNLOCK(sharesummary_free); DATA_SHARESUMMARY_NULL(sharesummary, ss_item); + // complete could change, the id fields wont be changed/removed yet + STRNCPY(complete, sharesummary->complete); + K_RUNLOCK(sharesummary_free); while (ss_item && sharesummary->workinfoid == workinfoid) { ss_tot++; skipupdate = false; @@ -2278,7 +2299,7 @@ bool workinfo_age(int64_t workinfoid, char *poolinstance, char *by, char *code, * so finding an aged sharesummary here is an error * N.B. this can only happen with (very) old reload files */ if (reloading) { - if (sharesummary->complete[0] == SUMMARY_COMPLETE) { + if (complete[0] == SUMMARY_COMPLETE) { ss_already++; skipupdate = true; if (confirm_sharesummary) { @@ -2292,7 +2313,9 @@ bool workinfo_age(int64_t workinfoid, char *poolinstance, char *by, char *code, } if (!skipupdate) { - if (!sharesummary_age(ss_item, by, code, inet, cd)) { + K_WLOCK(sharesummary_free); + if (!sharesummary_age(ss_item)) { + K_WUNLOCK(sharesummary_free); ss_failed++; LOGERR("%s(): Failed to age sharesummary %"PRId64"/%s/%"PRId64, __func__, sharesummary->userid, @@ -2308,6 +2331,7 @@ bool workinfo_age(int64_t workinfoid, char *poolinstance, char *by, char *code, copy_tv(ss_first, &(sharesummary->firstshare)); if (tv_newer(ss_last, &(sharesummary->lastshare))) copy_tv(ss_last, &(sharesummary->lastshare)); + K_WUNLOCK(sharesummary_free); } } @@ -2318,8 +2342,9 @@ bool workinfo_age(int64_t workinfoid, char *poolinstance, char *by, char *code, K_RLOCK(sharesummary_free); ss_item = next_in_ktree(ss_ctx); - K_RUNLOCK(sharesummary_free); DATA_SHARESUMMARY_NULL(sharesummary, ss_item); + STRNCPY(complete, sharesummary->complete); + K_RUNLOCK(sharesummary_free); } if (ss_already || ss_failed || shares_dumped) { @@ -2350,8 +2375,10 @@ skip_ss: ks_look.data = (void *)(&lookkeysharesummary); K_RLOCK(keysharesummary_free); ks_item = find_after_in_ktree(keysharesummary_root, &ks_look, ks_ctx); - K_RUNLOCK(keysharesummary_free); DATA_KEYSHARESUMMARY_NULL(keysharesummary, ks_item); + // complete could change, the id fields wont be changed/removed yet + STRNCPY(complete, keysharesummary->complete); + K_RUNLOCK(keysharesummary_free); while (ks_item && keysharesummary->workinfoid == workinfoid) { ks_tot++; skipupdate = false; @@ -2359,7 +2386,7 @@ skip_ss: * so finding an aged keysharesummary here is an error * N.B. this can only happen with (very) old reload files */ if (reloading && !key_update) { - if (keysharesummary->complete[0] == SUMMARY_COMPLETE) { + if (complete[0] == SUMMARY_COMPLETE) { ks_already++; skipupdate = true; if (confirm_sharesummary) { @@ -2373,20 +2400,25 @@ skip_ss: } if (!skipupdate) { + K_WLOCK(keysharesummary_free); if (!keysharesummary_age(ks_item)) { ks_failed++; + K_WUNLOCK(keysharesummary_free); LOGERR("%s(): Failed to age keysharesummary %"PRId64"/%s/%s", __func__, keysharesummary->workinfoid, keysharesummary->keytype, keysharesummary->key); ksok = false; + } else { + K_WUNLOCK(keysharesummary_free); } } K_RLOCK(keysharesummary_free); ks_item = next_in_ktree(ks_ctx); - K_RUNLOCK(keysharesummary_free); DATA_KEYSHARESUMMARY_NULL(keysharesummary, ks_item); + STRNCPY(complete, keysharesummary->complete); + K_RUNLOCK(keysharesummary_free); } /* All shares should have been discarded during sharesummary @@ -2527,19 +2559,17 @@ cmp_t cmp_shareerrors(K_ITEM *a, K_ITEM *b) void dsp_sharesummary(K_ITEM *item, FILE *stream) { - char createdate_buf[DATE_BUFSIZ]; SHARESUMMARY *s; if (!item) fprintf(stream, "%s() called with (null) item\n", __func__); else { DATA_SHARESUMMARY(s, item); - tv_to_buf(&(s->createdate), createdate_buf, sizeof(createdate_buf)); fprintf(stream, " uid=%"PRId64" wn='%s' wid=%"PRId64" " - "da=%f ds=%f ss=%f c='%s' cd=%s\n", + "da=%f ds=%f ss=%f c='%s'\n", s->userid, s->workername, s->workinfoid, s->diffacc, s->diffsta, s->sharesta, - s->complete, createdate_buf); + s->complete); } } @@ -2632,8 +2662,7 @@ K_ITEM *find_last_sharesummary(int64_t userid, char *workername) } // key_update must age keysharesummary directly -static void key_auto_age_older(int64_t workinfoid, char *poolinstance, char *by, - char *code, char *inet, tv_t *cd) +static void key_auto_age_older(int64_t workinfoid, char *poolinstance, tv_t *cd) { static int64_t last_attempted_id = -1; static int64_t prev_found = 0; @@ -2651,6 +2680,14 @@ static void key_auto_age_older(int64_t workinfoid, char *poolinstance, char *by, int64_t age_id, do_id, to_id; bool ok, found; + K_WLOCK(workinfo_free); + if (workinfo_age_lock) { + K_WUNLOCK(workinfo_free); + return; + } else + workinfo_age_lock = true; + K_WUNLOCK(workinfo_free); + LOGDEBUG("%s(): workinfoid=%"PRId64" prev=%"PRId64, __func__, workinfoid, prev_found); age_id = prev_found; @@ -2663,15 +2700,15 @@ static void key_auto_age_older(int64_t workinfoid, char *poolinstance, char *by, INIT_KEYSHARESUMMARY(&look); look.data = (void *)(&lookkeysharesummary); - K_RLOCK(keysharesummary_free); - kss_item = find_after_in_ktree(keysharesummary_root, &look, ctx); - DATA_KEYSHARESUMMARY_NULL(keysharesummary, kss_item); - DATE_ZERO(&kss_first_min); DATE_ZERO(&kss_last_max); kss_count_tot = s_count_tot = s_diff_tot = 0; - found = false; + + K_RLOCK(keysharesummary_free); + kss_item = find_after_in_ktree(keysharesummary_root, &look, ctx); + DATA_KEYSHARESUMMARY_NULL(keysharesummary, kss_item); + while (kss_item && keysharesummary->workinfoid < workinfoid) { if (keysharesummary->complete[0] == SUMMARY_NEW) { age_id = keysharesummary->workinfoid; @@ -2686,9 +2723,9 @@ static void key_auto_age_older(int64_t workinfoid, char *poolinstance, char *by, LOGDEBUG("%s(): age_id=%"PRId64" found=%d", __func__, age_id, found); // Don't repeat searching old items to avoid accessing their ram - if (!found) + if (!found) { prev_found = workinfoid; - else { + } else { /* Process all the consecutive keysharesummaries that's aren't aged * This way we find each oldest 'batch' of keysharesummaries that have * been missed and can report the range of data that was aged, @@ -2700,9 +2737,9 @@ static void key_auto_age_older(int64_t workinfoid, char *poolinstance, char *by, do_id = age_id; to_id = 0; do { - ok = workinfo_age(do_id, poolinstance, by, code, inet, - cd, &kss_first, &kss_last, &kss_count, - &s_count, &s_diff); + ok = workinfo_age(do_id, poolinstance, cd, &kss_first, + &kss_last, &kss_count, &s_count, + &s_diff); kss_count_tot += kss_count; s_count_tot += s_count; @@ -2774,12 +2811,14 @@ static void key_auto_age_older(int64_t workinfoid, char *poolinstance, char *by, idrange, keysharerange); } } + K_WLOCK(workinfo_free); + workinfo_age_lock = false; + K_WUNLOCK(workinfo_free); } /* TODO: markersummary checking? * However, there should be no issues since the sharesummaries are removed */ -void auto_age_older(int64_t workinfoid, char *poolinstance, char *by, - char *code, char *inet, tv_t *cd) +void auto_age_older(int64_t workinfoid, char *poolinstance, tv_t *cd) { static int64_t last_attempted_id = -1; static int64_t prev_found = 0; @@ -2798,10 +2837,21 @@ void auto_age_older(int64_t workinfoid, char *poolinstance, char *by, bool ok, found; if (key_update) { - key_auto_age_older(workinfoid, poolinstance, by, code, inet, cd); + key_auto_age_older(workinfoid, poolinstance, cd); return; } + /* Simply lock out more than one from running at the same time + * This locks access to prev_found, repeat and last_attempted_id + * If any are missed they'll be aged by the next age_workinfo in 30s */ + K_WLOCK(workinfo_free); + if (workinfo_age_lock) { + K_WUNLOCK(workinfo_free); + return; + } else + workinfo_age_lock = true; + K_WUNLOCK(workinfo_free); + LOGDEBUG("%s(): workinfoid=%"PRId64" prev=%"PRId64, __func__, workinfoid, prev_found); age_id = prev_found; @@ -2814,15 +2864,15 @@ void auto_age_older(int64_t workinfoid, char *poolinstance, char *by, INIT_SHARESUMMARY(&look); look.data = (void *)(&looksharesummary); - K_RLOCK(sharesummary_free); - ss_item = find_after_in_ktree(sharesummary_workinfoid_root, &look, ctx); - DATA_SHARESUMMARY_NULL(sharesummary, ss_item); - DATE_ZERO(&ss_first_min); DATE_ZERO(&ss_last_max); ss_count_tot = s_count_tot = s_diff_tot = 0; - found = false; + + K_RLOCK(sharesummary_free); + ss_item = find_after_in_ktree(sharesummary_workinfoid_root, &look, ctx); + DATA_SHARESUMMARY_NULL(sharesummary, ss_item); + while (ss_item && sharesummary->workinfoid < workinfoid) { if (sharesummary->complete[0] == SUMMARY_NEW) { age_id = sharesummary->workinfoid; @@ -2852,9 +2902,9 @@ void auto_age_older(int64_t workinfoid, char *poolinstance, char *by, do_id = age_id; to_id = 0; do { - ok = workinfo_age(do_id, poolinstance, by, code, inet, - cd, &ss_first, &ss_last, &ss_count, - &s_count, &s_diff); + ok = workinfo_age(do_id, poolinstance, cd, &ss_first, + &ss_last, &ss_count, &s_count, + &s_diff); ss_count_tot += ss_count; s_count_tot += s_count; @@ -2926,6 +2976,9 @@ void auto_age_older(int64_t workinfoid, char *poolinstance, char *by, idrange, sharerange); } } + K_WLOCK(workinfo_free); + workinfo_age_lock = false; + K_WUNLOCK(workinfo_free); } void _dbhash2btchash(char *hash, char *buf, size_t siz, WHERE_FFL_ARGS) diff --git a/src/ckdb_dbio.c b/src/ckdb_dbio.c index a6cec406..ec866d05 100644 --- a/src/ckdb_dbio.c +++ b/src/ckdb_dbio.c @@ -214,6 +214,17 @@ char *pqerrmsg(PGconn *conn) #undef PQexec #undef PQexecParams +/* Debug level to display write transactions - 0 removes the code + * Also enables checking the isread flag */ +#define CKPQ_SHOW_WRITE 0 + +#define CKPQ_ISREAD1 "select " +#define CKPQ_ISREAD1LEN (sizeof(CKPQ_ISREAD1)-1) +#define CKPQ_ISREAD2 "declare " +#define CKPQ_ISREAD2LEN (sizeof(CKPQ_ISREAD2)-1) +#define CKPQ_ISREAD3 "fetch " +#define CKPQ_ISREAD3LEN (sizeof(CKPQ_ISREAD3)-1) + // Bug check to ensure no unexpected write txns occur PGresult *_CKPQexec(PGconn *conn, const char *qry, bool isread, WHERE_FFL_ARGS) { @@ -221,6 +232,40 @@ PGresult *_CKPQexec(PGconn *conn, const char *qry, bool isread, WHERE_FFL_ARGS) if (!isread && confirm_sharesummary) quitfrom(1, file, func, line, "BUG: write txn during confirm"); +#if CKPQ_SHOW_WRITE + if (isread) { + if ((strncmp(qry, CKPQ_ISREAD1, CKPQ_ISREAD1LEN) != 0) && + (strncmp(qry, CKPQ_ISREAD2, CKPQ_ISREAD2LEN) != 0) && + (strncmp(qry, CKPQ_ISREAD3, CKPQ_ISREAD3LEN) != 0)) { + LOGERR("%s() ERR: query flagged as read, but isn't" + WHERE_FFL, __func__, WHERE_FFL_PASS); + isread = false; + } + } else { + if ((strncmp(qry, CKPQ_ISREAD1, CKPQ_ISREAD1LEN) == 0) || + (strncmp(qry, CKPQ_ISREAD2, CKPQ_ISREAD2LEN) == 0) || + (strncmp(qry, CKPQ_ISREAD3, CKPQ_ISREAD3LEN) == 0)) { + LOGERR("%s() ERR: query flagged as write, but isn't" + WHERE_FFL, __func__, WHERE_FFL_PASS); + isread = true; + } + } + if (!isread) { + char *buf = NULL, ffl[128]; + size_t len, off; + + APPEND_REALLOC_INIT(buf, off, len); + APPEND_REALLOC(buf, off, len, __func__); + APPEND_REALLOC(buf, off, len, "() W: '"); + APPEND_REALLOC(buf, off, len, qry); + APPEND_REALLOC(buf, off, len, "'"); + snprintf(ffl, sizeof(ffl), WHERE_FFL, WHERE_FFL_PASS); + APPEND_REALLOC(buf, off, len, ffl); + LOGMSGBUF(CKPQ_SHOW_WRITE, buf); + FREENULL(buf); + } +#endif + return PQexec(conn, qry); } @@ -237,6 +282,47 @@ PGresult *_CKPQexecParams(PGconn *conn, const char *qry, if (!isread && confirm_sharesummary) quitfrom(1, file, func, line, "BUG: write txn during confirm"); +#if CKPQ_SHOW_WRITE + if (isread) { + if ((strncmp(qry, CKPQ_ISREAD1, CKPQ_ISREAD1LEN) != 0) && + (strncmp(qry, CKPQ_ISREAD2, CKPQ_ISREAD2LEN) != 0) && + (strncmp(qry, CKPQ_ISREAD3, CKPQ_ISREAD3LEN) != 0)) { + LOGERR("%s() ERR: query flagged as read, but isn't" + WHERE_FFL, __func__, WHERE_FFL_PASS); + isread = false; + } + } else { + if ((strncmp(qry, CKPQ_ISREAD1, CKPQ_ISREAD1LEN) == 0) || + (strncmp(qry, CKPQ_ISREAD2, CKPQ_ISREAD2LEN) == 0) || + (strncmp(qry, CKPQ_ISREAD3, CKPQ_ISREAD3LEN) == 0)) { + LOGERR("%s() ERR: query flagged as write, but isn't" + WHERE_FFL, __func__, WHERE_FFL_PASS); + isread = true; + } + } + if (!isread) { + char *buf = NULL, num[16], ffl[128]; + size_t len, off; + int i; + + APPEND_REALLOC_INIT(buf, off, len); + APPEND_REALLOC(buf, off, len, __func__); + APPEND_REALLOC(buf, off, len, "() W: '"); + APPEND_REALLOC(buf, off, len, qry); + APPEND_REALLOC(buf, off, len, "'"); + for (i = 0; i < nParams; i++) { + snprintf(num, sizeof(num), " $%d='", i+1); + APPEND_REALLOC(buf, off, len, num); + APPEND_REALLOC(buf, off, len, paramValues[i]); + APPEND_REALLOC(buf, off, len, "'"); + } + snprintf(ffl, sizeof(ffl), WHERE_FFL, WHERE_FFL_PASS); + APPEND_REALLOC(buf, off, len, ffl); + LOGMSGBUF(CKPQ_SHOW_WRITE, buf); + FREENULL(buf); + } +#endif + return PQexecParams(conn, qry, nParams, paramTypes, paramValues, paramLengths, paramFormats, resultFormat); } @@ -558,6 +644,17 @@ K_ITEM *users_add(PGconn *conn, char *username, char *emailaddress, LOGDEBUG("%s(): add", __func__); + /* 2 attempts to add the same user at the same time will only do it once + * The 2nd attempt will get back the data provided by the 1st + * and thus throw away any differences in the 2nd */ + K_WLOCK(users_db_free); + + item = find_users(username); + if (item) { + ok = true; + goto already; + } + K_WLOCK(users_free); item = k_unlink_head(users_free); K_WUNLOCK(users_free); @@ -665,6 +762,10 @@ unitem: } K_WUNLOCK(users_free); +already: + + K_WUNLOCK(users_db_free); + if (ok) return item; else @@ -1469,6 +1570,11 @@ bool workers_update(PGconn *conn, K_ITEM *item, char *difficultydefault, LOGDEBUG("%s(): update", __func__); + /* Two attempts to update the same worker at the same time + * will determine the final state based on which gets the lock last, + * i.e. randomly, but without overwriting at the same time */ + K_WLOCK(workers_db_free); + DATA_WORKERS(row, item); if (check) { @@ -1583,6 +1689,9 @@ unparam: for (n = 0; n < par; n++) free(params[n]); early: + + K_WUNLOCK(workers_db_free); + return ok; } @@ -3399,6 +3508,7 @@ static bool shares_process(PGconn *conn, SHARES *shares, K_ITEM *wi_item, { K_ITEM *w_item, *wm_item, *ss_item; SHARESUMMARY *sharesummary; + char complete[TXT_FLAG+1]; WORKINFO *workinfo; char *st = NULL; @@ -3479,13 +3589,14 @@ static bool shares_process(PGconn *conn, SHARES *shares, K_ITEM *wi_item, K_RLOCK(sharesummary_free); ss_item = find_sharesummary(shares->userid, shares->workername, shares->workinfoid); - K_RUNLOCK(sharesummary_free); if (ss_item) { DATA_SHARESUMMARY(sharesummary, ss_item); if (sharesummary->complete[0] != SUMMARY_NEW) { + STRNCPY(complete, sharesummary->complete); + K_RUNLOCK(sharesummary_free); LOGDEBUG("%s(): '%s' sharesummary exists " "%"PRId64" %"PRId64"/%s/%ld,%ld", - __func__, sharesummary->complete, + __func__, complete, shares->workinfoid, shares->userid, st = safe_text_nonull(shares->workername), shares->createdate.tv_sec, @@ -3495,6 +3606,7 @@ static bool shares_process(PGconn *conn, SHARES *shares, K_ITEM *wi_item, return true; } } + K_RUNLOCK(sharesummary_free); } if (!key_update && !confirm_sharesummary) { @@ -3504,8 +3616,7 @@ static bool shares_process(PGconn *conn, SHARES *shares, K_ITEM *wi_item, K_WUNLOCK(userinfo_free); } - sharesummary_update(shares, NULL, shares->createby, shares->createcode, - shares->createinet, &(shares->createdate)); + sharesummary_update(shares, NULL, &(shares->createdate)); return true; } @@ -3660,6 +3771,8 @@ bool shares_add(PGconn *conn, char *workinfoid, char *username, char *workername K_RLOCK(users_free); u_item = find_users(username); K_RUNLOCK(users_free); + /* Can't change outside lock since we don't delete users + * or change their *userid */ if (!u_item) { btv_to_buf(cd, cd_buf, sizeof(cd_buf)); /* This should never happen unless there's a bug in ckpool @@ -3672,7 +3785,6 @@ bool shares_add(PGconn *conn, char *workinfoid, char *username, char *workername goto tisbad; } DATA_USERS(users, u_item); - shares->userid = users->userid; TXT_TO_BIGINT("workinfoid", workinfoid, shares->workinfoid); @@ -4142,6 +4254,7 @@ static bool shareerrors_process(PGconn *conn, SHAREERRORS *shareerrors, { K_ITEM *w_item, *wm_item, *ss_item; SHARESUMMARY *sharesummary; + char complete[TXT_FLAG+1]; char *st = NULL; LOGDEBUG("%s() add", __func__); @@ -4185,13 +4298,14 @@ static bool shareerrors_process(PGconn *conn, SHAREERRORS *shareerrors, ss_item = find_sharesummary(shareerrors->userid, shareerrors->workername, shareerrors->workinfoid); - K_RUNLOCK(sharesummary_free); if (ss_item) { DATA_SHARESUMMARY(sharesummary, ss_item); if (sharesummary->complete[0] != SUMMARY_NEW) { + STRNCPY(complete, sharesummary->complete); + K_RUNLOCK(sharesummary_free); LOGDEBUG("%s(): '%s' sharesummary exists " "%"PRId64" %"PRId64"/%s/%ld,%ld", - __func__, sharesummary->complete, + __func__, complete, shareerrors->workinfoid, shareerrors->userid, st = safe_text_nonull(shareerrors->workername), @@ -4201,11 +4315,10 @@ static bool shareerrors_process(PGconn *conn, SHAREERRORS *shareerrors, return false; } } + K_RUNLOCK(sharesummary_free); } - sharesummary_update(NULL, shareerrors, shareerrors->createby, - shareerrors->createcode, shareerrors->createinet, - &(shareerrors->createdate)); + sharesummary_update(NULL, shareerrors, &(shareerrors->createdate)); return true; } @@ -5182,14 +5295,13 @@ flail: return ok; } +// Requires K_WLOCK(sharesummary_free) static void set_sharesummary_stats(SHARESUMMARY *row, SHARES *s_row, SHAREERRORS *e_row, bool new, double *tdf, double *tdl) { tv_t *createdate; - K_WLOCK(sharesummary_free); - if (s_row) createdate = &(s_row->createdate); else @@ -5251,15 +5363,11 @@ static void set_sharesummary_stats(SHARESUMMARY *row, SHARES *s_row, *tdf = tvdiff(createdate, &(row->firstshare)); *tdl = tvdiff(createdate, &(row->lastshare)); } - - K_WUNLOCK(sharesummary_free); } static void set_keysharesummary_stats(KEYSHARESUMMARY *row, SHARES *s_row, bool new) { - K_WLOCK(keysharesummary_free); - if (new) { zero_keysharesummary(row); copy_tv(&(row->firstshare), &(s_row->createdate)); @@ -5307,8 +5415,6 @@ static void set_keysharesummary_stats(KEYSHARESUMMARY *row, SHARES *s_row, row->sharerej++; break; } - - K_WUNLOCK(keysharesummary_free); } /* Keep some simple stats on how often shares are out of order @@ -5330,15 +5436,15 @@ char *ooo_status(char *buf, size_t siz) /* sharesummaries are no longer stored in the DB but fields are updated as b4 * This creates/updates both the sharesummaries and the keysharesummaries */ -bool _sharesummary_update(SHARES *s_row, SHAREERRORS *e_row, char *by, - char *code, char *inet, tv_t *cd, WHERE_FFL_ARGS) +bool _sharesummary_update(SHARES *s_row, SHAREERRORS *e_row, tv_t *cd, + WHERE_FFL_ARGS) { WORKMARKERS *wm; SHARESUMMARY *row, *p_row; KEYSHARESUMMARY *ki_row = NULL, *ka_row = NULL; K_ITEM *ss_item, *kiss_item = NULL, *kass_item = NULL, *wm_item, *p_item = NULL; bool new = false, p_new = false, ki_new = false, ka_new = false; - int64_t userid, workinfoid; + int64_t userid, workinfoid, markerid; char *workername, *address = NULL, *agent = NULL; char *st = NULL, *db = NULL; char ooo_buf[256]; @@ -5371,33 +5477,30 @@ bool _sharesummary_update(SHARES *s_row, SHAREERRORS *e_row, char *by, K_RLOCK(workmarkers_free); wm_item = find_workmarkers(workinfoid, false, MARKER_PROCESSED, NULL); - K_RUNLOCK(workmarkers_free); if (wm_item) { DATA_WORKMARKERS(wm, wm_item); + markerid = wm->markerid; + K_RUNLOCK(workmarkers_free); LOGERR("%s(): attempt to update sharesummary " "with %s %"PRId64"/%"PRId64"/%s "CDDB" %s" " but processed workmarkers %"PRId64" exists", __func__, s_row ? "shares" : "shareerrors", workinfoid, userid, st = safe_text(workername), - db = ctv_to_buf(cd, NULL, 0), - wm->markerid); + db = ctv_to_buf(cd, NULL, 0), markerid); FREENULL(st); FREENULL(db); return false; } + K_RUNLOCK(workmarkers_free); - K_RLOCK(sharesummary_free); + K_WLOCK(sharesummary_free); ss_item = find_sharesummary(userid, workername, workinfoid); - p_item = find_sharesummary_p(workinfoid); - K_RUNLOCK(sharesummary_free); if (ss_item) { DATA_SHARESUMMARY(row, ss_item); } else { new = true; - K_WLOCK(sharesummary_free); ss_item = k_unlink_head(sharesummary_free); - K_WUNLOCK(sharesummary_free); DATA_SHARESUMMARY(row, ss_item); bzero(row, sizeof(*row)); row->userid = userid; @@ -5409,20 +5512,23 @@ bool _sharesummary_update(SHARES *s_row, SHAREERRORS *e_row, char *by, // N.B. this directly updates the non-key data set_sharesummary_stats(row, s_row, e_row, new, &tdf, &tdl); + if (new) { + add_to_ktree(sharesummary_root, ss_item); + add_to_ktree(sharesummary_workinfoid_root, ss_item); + k_add_head(sharesummary_store, ss_item); + } + K_WUNLOCK(sharesummary_free); + // Ignore shareerrors for keysummaries if (s_row) { - K_RLOCK(keysharesummary_free); + K_WLOCK(keysharesummary_free); kiss_item = find_keysharesummary(workinfoid, KEYTYPE_IP, address); - kass_item = find_keysharesummary(workinfoid, KEYTYPE_AGENT, agent); - K_RUNLOCK(keysharesummary_free); if (kiss_item) { DATA_KEYSHARESUMMARY(ki_row, kiss_item); } else { ki_new = true; - K_WLOCK(keysharesummary_free); kiss_item = k_unlink_head(keysharesummary_free); - K_WUNLOCK(keysharesummary_free); DATA_KEYSHARESUMMARY(ki_row, kiss_item); bzero(ki_row, sizeof(*ki_row)); ki_row->workinfoid = workinfoid; @@ -5433,14 +5539,20 @@ bool _sharesummary_update(SHARES *s_row, SHAREERRORS *e_row, char *by, // N.B. this directly updates the non-key data set_keysharesummary_stats(ki_row, s_row, ki_new); + if (ki_new) { + add_to_ktree(keysharesummary_root, kiss_item); + k_add_head(keysharesummary_store, kiss_item); + } + K_WUNLOCK(keysharesummary_free); + + K_WLOCK(keysharesummary_free); + kass_item = find_keysharesummary(workinfoid, KEYTYPE_AGENT, agent); if (kass_item) { DATA_KEYSHARESUMMARY(ka_row, kass_item); } else { ka_new = true; - K_WLOCK(keysharesummary_free); kass_item = k_unlink_head(keysharesummary_free); - K_WUNLOCK(keysharesummary_free); DATA_KEYSHARESUMMARY(ka_row, kass_item); bzero(ka_row, sizeof(*ka_row)); ka_row->workinfoid = workinfoid; @@ -5451,6 +5563,11 @@ bool _sharesummary_update(SHARES *s_row, SHAREERRORS *e_row, char *by, // N.B. this directly updates the non-key data set_keysharesummary_stats(ka_row, s_row, ka_new); + if (ka_new) { + add_to_ktree(keysharesummary_root, kass_item); + k_add_head(keysharesummary_store, kass_item); + } + K_WUNLOCK(keysharesummary_free); } if (!new) { @@ -5506,13 +5623,14 @@ bool _sharesummary_update(SHARES *s_row, SHAREERRORS *e_row, char *by, } } + K_WLOCK(sharesummary_free); + p_item = find_sharesummary_p(workinfoid); + if (p_item) { DATA_SHARESUMMARY(p_row, p_item); } else { p_new = true; - K_WLOCK(sharesummary_free); p_item = k_unlink_head(sharesummary_free); - K_WUNLOCK(sharesummary_free); DATA_SHARESUMMARY(p_row, p_item); bzero(p_row, sizeof(*p_row)); POOL_SS(p_row); @@ -5521,42 +5639,17 @@ bool _sharesummary_update(SHARES *s_row, SHAREERRORS *e_row, char *by, set_sharesummary_stats(p_row, s_row, e_row, p_new, &tdf, &tdl); - MODIFYDATEPOINTERS(sharesummary_free, row, cd, by, code, inet); - - // Store either new item - if (new || p_new) { - K_WLOCK(sharesummary_free); - if (new) { - add_to_ktree(sharesummary_root, ss_item); - add_to_ktree(sharesummary_workinfoid_root, ss_item); - k_add_head(sharesummary_store, ss_item); - } - if (p_new) { - add_to_ktree(sharesummary_pool_root, p_item); - k_add_head(sharesummary_pool_store, p_item); - } - K_WUNLOCK(sharesummary_free); - } - - if (ki_new || ka_new) { - K_WLOCK(keysharesummary_free); - if (ki_new) { - add_to_ktree(keysharesummary_root, kiss_item); - k_add_head(keysharesummary_store, kiss_item); - } - if (ka_new) { - add_to_ktree(keysharesummary_root, kass_item); - k_add_head(keysharesummary_store, kass_item); - } - K_WUNLOCK(keysharesummary_free); + if (p_new) { + add_to_ktree(sharesummary_pool_root, p_item); + k_add_head(sharesummary_pool_store, p_item); } + K_WUNLOCK(sharesummary_free); return true; } // No key fields are modified -bool _sharesummary_age(K_ITEM *ss_item, char *by, char *code, char *inet, - tv_t *cd, WHERE_FFL_ARGS) +bool sharesummary_age(K_ITEM *ss_item) { SHARESUMMARY *row; @@ -5566,8 +5659,6 @@ bool _sharesummary_age(K_ITEM *ss_item, char *by, char *code, char *inet, row->complete[0] = SUMMARY_COMPLETE; row->complete[1] = '\0'; - MODIFYDATEPOINTERS(sharesummary_free, row, cd, by, code, inet); - return true; } From 366a36a9ae09536a3950b888a2a609eccb6135fb Mon Sep 17 00:00:00 2001 From: kanoi Date: Wed, 17 Aug 2016 10:27:27 +1000 Subject: [PATCH 02/23] ckdb - allow adjusting the reload process thread count --- src/ckdb.c | 118 ++++++++++++++++++++++++++++++++++++++----------- src/ckdb.h | 15 ++++++- src/ckdb_cmd.c | 56 +++++++++++++++++++++++ 3 files changed, 161 insertions(+), 28 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index 3f4f9350..afd1d524 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -117,14 +117,17 @@ static bool blistener_using_data; static bool breakdown_using_data; static bool replier_using_data; -// Define the array size for thread data -#define THREAD_LIMIT 99 +/* Flag to notify thread changes + * Set/checked under the function's main loop's first lock + * This is always a 'delta' value meaning add or subtract that many */ +int queue_threads_delta = 0; /* Use -Q to set it higher - * Setting it higher can degrade performance if the server can't - * handle the extra locking or is swapping */ + * Setting it higher can degrade performance if the CPUs can't + * handle the extra locking or the threads are swapping */ static int queue_threads = 1; // -B to override calculated value static int breakdown_threads = -1; +// cpu count to breakdown thread ratio #define BREAKDOWN_RATIO 3 static int reload_breakdown_count = 0; static int cmd_breakdown_count = 0; @@ -5395,6 +5398,7 @@ static void *process_socket(void *arg) case CMD_SHSTA: case CMD_CHKPASS: case CMD_GETATTS: + case CMD_THREADS: case CMD_HOMEPAGE: break; default: @@ -5586,6 +5590,7 @@ static void *process_socket(void *arg) case CMD_LOCKS: case CMD_EVENTS: case CMD_HIGH: + case CMD_THREADS: msgline->sockd = bq->sockd; bq->sockd = -1; K_WLOCK(workqueue_free); @@ -6020,6 +6025,7 @@ static void process_reload_item(PGconn *conn, K_ITEM *bq_item) case CMD_LOCKS: case CMD_EVENTS: case CMD_HIGH: + case CMD_THREADS: LOGERR("%s() INVALID message line %"PRIu64 " ignored '%.42s...", __func__, bq->count, @@ -6077,37 +6083,37 @@ static void process_reload_item(PGconn *conn, K_ITEM *bq_item) static void *process_reload(__maybe_unused void *arg) { - pthread_t *procrel_pt; + static pthread_t procrel_pt[THREAD_LIMIT]; + static int n[THREAD_LIMIT]; + static bool running[THREAD_LIMIT]; + PGconn *conn = NULL; K_ITEM *bq_item = NULL; char buf[128]; time_t now; - int i, *n, zeros; ts_t when, when_add; - int ret; + int i, mythread, threads_delta = 0, done, tot, ret; if (arg) - i = *(int *)(arg); + mythread = *(int *)(arg); else { pthread_detach(pthread_self()); - n = malloc(queue_threads * sizeof(int)); - procrel_pt = malloc(queue_threads * sizeof(*procrel_pt)); - for (i = 1; i < queue_threads; i++) { + for (i = 0; i < THREAD_LIMIT; i++) { n[i] = i; - create_pthread(&(procrel_pt[i]), process_reload, &(n[i])); + running[i] = false; } - i = 0; + + mythread = 0; + running[0] = true; + + // Set to create the rest of the threads + queue_threads_delta = queue_threads - 1; LOGNOTICE("%s() starting", __func__); } - if (queue_threads < 10) - zeros = 1; - else - zeros = (int)log10(queue_threads) + 1; - - snprintf(buf, sizeof(buf), "db_p%0*drload", zeros, i); + snprintf(buf, sizeof(buf), "db_p%02drload", mythread); LOCK_INIT(buf); rename_proc(buf); @@ -6119,13 +6125,70 @@ static void *process_reload(__maybe_unused void *arg) while (!everyone_die) { K_WLOCK(breakqueue_free); - bq_item = k_unlink_head(reload_done_breakqueue_store); - if (bq_item) { - reload_processing++; - reload_processed++; + if (mythread == 0 && queue_threads_delta != 0) { + threads_delta = queue_threads_delta; + queue_threads_delta = 0; + } else { + bq_item = k_unlink_head(reload_done_breakqueue_store); + if (bq_item) { + reload_processing++; + reload_processed++; + } } K_WUNLOCK(breakqueue_free); + if (!running[mythread]) + break; + + // TODO: deal with thread creation/shutdown failure + if (threads_delta != 0) { + if (threads_delta > 0) { + // Add threads + tot = 1; + done = 0; + for (i = 1; i < THREAD_LIMIT; i++) { + if (!running[i]) { + if (threads_delta > 0) { + threads_delta--; + running[i] = true; + create_pthread(&(procrel_pt[i]), + process_reload, + &(n[i])); + done++; + tot++; + } + } else + tot++; + } + LOGWARNING("%s() created %d thread%s total now" + " %d", + __func__, done, + (done == 1) ? EMPTY : "s", tot); + } else { + // Notify and wait for each to exit + tot = 1; + done = 0; + i = THREAD_LIMIT - 1; + for (i = THREAD_LIMIT - 1; i > 0; i--) { + if (running[i]) { + if (threads_delta < 0) { + threads_delta++; + running[i] = false; + join_pthread(procrel_pt[i]); + done++; + } else + tot++; + } + } + LOGWARNING("%s() stopped %d thread%s total now" + " %d", + __func__, done, + (done == 1) ? EMPTY : "s", tot); + } + threads_delta = 0; + continue; + } + if (!bq_item) { // Finished reloading? if (!reloading) @@ -6165,9 +6228,12 @@ static void *process_reload(__maybe_unused void *arg) PQfinish(conn); - if (!arg) { - for (i = 1; i < queue_threads; i++) - join_pthread(procrel_pt[i]); + // Only when everyone_die is true + if (mythread == 0) { + for (i = 1; i < THREAD_LIMIT; i++) { + if (running[i]) + join_pthread(procrel_pt[i]); + } LOGNOTICE("%s() exiting, processed %"PRIu64, __func__, reload_processed); diff --git a/src/ckdb.h b/src/ckdb.h index 20796597..ddf8213a 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -11,8 +11,11 @@ #ifndef CKDB_H #define CKDB_H -// Remove this line if you have an old GCC version +#ifdef __GNUC__ +#if __GNUC__ >= 6 #pragma GCC diagnostic ignored "-Wtautological-compare" +#endif +#endif #include "config.h" @@ -55,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.400" +#define CKDB_VERSION DB_VERSION"-2.401" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ @@ -115,6 +118,13 @@ enum free_modes { extern enum free_modes free_mode; +// Define the array size for thread data +#define THREAD_LIMIT 99 +/* Flag to notify thread changes + * Set/checked under the function's main loop's first lock + * This is always a 'delta' value meaning add or subtract that many */ +extern int queue_threads_delta; + #define BLANK " " extern char *EMPTY; extern const char *nullstr; @@ -722,6 +732,7 @@ enum cmd_values { CMD_LOCKS, CMD_EVENTS, CMD_HIGH, + CMD_THREADS, CMD_END }; diff --git a/src/ckdb_cmd.c b/src/ckdb_cmd.c index dfe8b576..a0f432be 100644 --- a/src/ckdb_cmd.c +++ b/src/ckdb_cmd.c @@ -8356,6 +8356,61 @@ static char *cmd_high(PGconn *conn, char *cmd, char *id, return buf; } +// Running thread adjustments +static char *cmd_threads(__maybe_unused PGconn *conn, char *cmd, char *id, + __maybe_unused tv_t *now, __maybe_unused char *by, + __maybe_unused char *code, __maybe_unused char *inet, + __maybe_unused tv_t *cd, K_TREE *trf_root, + __maybe_unused bool reload_data) +{ + K_ITEM *i_name, *i_delta; + char *name, *delta; + char reply[1024] = ""; + size_t siz = sizeof(reply); + char *buf = NULL; + int delta_value = 0; + + LOGDEBUG("%s(): cmd '%s'", __func__, cmd); + + i_name = require_name(trf_root, "name", 1, NULL, reply, siz); + if (!i_name) + return strdup(reply); + name = transfer_data(i_name); + i_delta = require_name(trf_root, "delta", 2, NULL, reply, siz); + if (!i_delta) + return strdup(reply); + delta = transfer_data(i_delta); + if (*delta != '+' && *delta != '-') { + snprintf(reply, siz, "invalid delta '%s'", delta); + LOGERR("%s() %s.%s", __func__, id, reply); + return strdup(reply); + } + delta_value = atoi(delta+1); + if (delta_value < 1 || delta_value >= THREAD_LIMIT) { + snprintf(reply, siz, "invalid delta range '%s'", delta); + LOGERR("%s() %s.%s", __func__, id, reply); + return strdup(reply); + } + if (*delta == '-') + delta_value = -delta_value; + + if (strcasecmp(name, "pr") == 0 || + strcasecmp(name, "process_reload") == 0) { + K_WLOCK(breakqueue_free); + // Just overwrite whatever's there + queue_threads_delta = delta_value; + K_WUNLOCK(breakqueue_free); + snprintf(reply, siz, "ok.delta %d request sent", delta_value); + return strdup(reply); + } else { + snprintf(reply, siz, "unknown name '%s'", name); + LOGERR("%s() %s.%s", __func__, id, reply); + return strdup(reply); + } + + return buf; +} + /* The socket command format is as follows: * Basic structure: * cmd.ID.fld1=value1 FLDSEP fld2=value2 FLDSEP fld3=... @@ -8468,5 +8523,6 @@ struct CMDS ckdb_cmds[] = { { CMD_LOCKS, "locks", false, false, cmd_locks, SEQ_NONE, ACCESS_SYSTEM }, { CMD_EVENTS, "events", false, false, cmd_events, SEQ_NONE, ACCESS_SYSTEM }, { CMD_HIGH, "high", false, false, cmd_high, SEQ_NONE, ACCESS_SYSTEM }, + { CMD_THREADS, "threads", false, false, cmd_threads, SEQ_NONE, ACCESS_SYSTEM }, { CMD_END, NULL, false, false, NULL, SEQ_NONE, 0 } }; From bfb0f065a4871c6da388e603d34c75d2eafa8c60 Mon Sep 17 00:00:00 2001 From: kanoi Date: Wed, 17 Aug 2016 11:30:54 +1000 Subject: [PATCH 03/23] ckdb - disable all lock checking if thread limits are exceeded --- src/ckdb.c | 28 ++++++++++++++++++++-------- src/ckdb.h | 2 +- src/klist.c | 1 + src/klist.h | 27 +++++++++++++++++++-------- 4 files changed, 41 insertions(+), 17 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index afd1d524..652679a0 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -6160,10 +6160,16 @@ static void *process_reload(__maybe_unused void *arg) } else tot++; } - LOGWARNING("%s() created %d thread%s total now" - " %d", - __func__, done, - (done == 1) ? EMPTY : "s", tot); + LOGWARNING("%s() created %d thread%s total=%d" +#if LOCK_CHECK + " next_thread_id=%d" +#endif + , __func__, done, + (done == 1) ? EMPTY : "s", tot +#if LOCK_CHECK + , next_thread_id +#endif + ); } else { // Notify and wait for each to exit tot = 1; @@ -6180,10 +6186,16 @@ static void *process_reload(__maybe_unused void *arg) tot++; } } - LOGWARNING("%s() stopped %d thread%s total now" - " %d", - __func__, done, - (done == 1) ? EMPTY : "s", tot); + LOGWARNING("%s() stopped %d thread%s total=%d " +#if LOCK_CHECK + " next_thread_id=%d" +#endif + , __func__, done, + (done == 1) ? EMPTY : "s", tot +#if LOCK_CHECK + , next_thread_id +#endif + ); } threads_delta = 0; continue; diff --git a/src/ckdb.h b/src/ckdb.h index ddf8213a..ca156cf9 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.401" +#define CKDB_VERSION DB_VERSION"-2.402" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ diff --git a/src/klist.c b/src/klist.c index 0b88a8d8..1741489e 100644 --- a/src/klist.c +++ b/src/klist.c @@ -13,6 +13,7 @@ const char *tree_node_list_name = "TreeNodes"; #if LOCK_CHECK +bool disable_checks = false; bool check_locks = true; const char *thread_noname = "UNSET"; int next_thread_id = 0; diff --git a/src/klist.h b/src/klist.h index 06b92e02..8450dd6c 100644 --- a/src/klist.h +++ b/src/klist.h @@ -75,10 +75,13 @@ extern const char *tree_node_list_name; * default to false, * or turn off check_locks during ckdb startup with a ckpmsg 'locks.ID.locks' * If you turn deadlock prediction on with ckpmsg 'locks.1.deadlocks=y' - * it will not re-enable it for any thread that has alread predicted + * it will not re-enable it for any thread that has already predicted * a deadlock */ #if LOCK_CHECK +// Disable all lock checks permanently if thread limits are exceeded +extern bool disable_checks; + // We disable lock checking if an error is encountered extern bool check_locks; /* Maximum number of threads preallocated @@ -239,7 +242,9 @@ retry: #if LOCK_CHECK #define LOCK_MAYBE /* The simple lock_check_init check is in case someone incorrectly changes ckdb.c ... - * It's not fool proof :P */ + * It's not fool proof :P + * If LOCK_INIT() is called too many times, i.e. too many threads, + * it will report and disable lock checking */ #define LOCK_INIT(_name) do { \ if (!lock_check_init) { \ quithere(1, "In thread %s, lock_check_lock has not been " \ @@ -248,6 +253,12 @@ retry: ck_wlock(&lock_check_lock); \ my_thread_id = next_thread_id++; \ ck_wunlock(&lock_check_lock); \ + if (my_thread_id >= MAX_THREADS) { \ + disable_checks = true; \ + LOGERR("WARNING: all lock checking disabled due to " \ + "initialising too many threads - limit %d", \ + MAX_THREADS); \ + } \ my_thread_name = strdup(_name); \ } while (0) #define FIRST_LOCK_INIT(_name) do { \ @@ -313,7 +324,7 @@ retry: static const char *_fl = __FILE__; \ static const char *_f = __func__; \ static const int _l = __LINE__; \ - if (my_check_locks && check_locks) { \ + if (!disable_checks && my_check_locks && check_locks) { \ if (_mode == LOCK_MODE_LOCK) { \ if (THRLCK(_list).first_held || \ (THRLCK(_list).r_count != 0) || \ @@ -384,7 +395,7 @@ retry: } \ } \ } \ - if (check_deadlocks && my_check_deadlocks) { \ + if (!disable_checks && check_deadlocks && my_check_deadlocks) { \ int _dp = (_list)->deadlock_priority; \ if (my_lock_level == 0) { \ if (_mode == LOCK_MODE_LOCK) { \ @@ -477,7 +488,7 @@ retry: LOCK_MODE_UNLOCK, LOCK_TYPE_READ) #define _LIST_WRITE(_list, _chklock, _file, _func, _line) do { \ - if (my_check_locks && check_locks && _chklock) { \ + if (!disable_checks && my_check_locks && check_locks && _chklock) { \ if (!THRLCK(_list).first_held || \ (THRLCK(_list).r_count != 0) || \ (THRLCK(_list).w_count != 1)) { \ @@ -498,7 +509,7 @@ retry: } \ } while (0) #define _LIST_WRITE2(_list, _chklock) do { \ - if (my_check_locks && check_locks && _chklock) { \ + if (!disable_checks && my_check_locks && check_locks && _chklock) { \ if (!THRLCK(_list).first_held || \ (THRLCK(_list).r_count != 0) || \ (THRLCK(_list).w_count != 1)) { \ @@ -519,7 +530,7 @@ retry: } while (0) // read is ok under read or write #define _LIST_READ(_list, _chklock, _file, _func, _line) do { \ - if (my_check_locks && check_locks && _chklock) { \ + if (!disable_checks && my_check_locks && check_locks && _chklock) { \ if (!THRLCK(_list).first_held || \ (THRLCK(_list).r_count + \ THRLCK(_list).w_count) != 1) { \ @@ -540,7 +551,7 @@ retry: } \ } while (0) #define _LIST_READ2(_list, _chklock) do { \ - if (my_check_locks && check_locks && _chklock) { \ + if (!disable_checks && my_check_locks && check_locks && _chklock) { \ if (!THRLCK(_list).first_held || \ (THRLCK(_list).r_count + \ THRLCK(_list).w_count) != 1) { \ From de85c3fb5ad057ab02ad878e54075c0c53be9d8f Mon Sep 17 00:00:00 2001 From: kanoi Date: Thu, 18 Aug 2016 14:03:07 +1000 Subject: [PATCH 04/23] ckdb - breaker() thread management --- src/ckdb.c | 341 ++++++++++++++++++++++++++++++++++--------------- src/ckdb.h | 5 +- src/ckdb_cmd.c | 16 +++ 3 files changed, 260 insertions(+), 102 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index 652679a0..712fe132 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -126,13 +126,16 @@ int queue_threads_delta = 0; * handle the extra locking or the threads are swapping */ static int queue_threads = 1; // -B to override calculated value -static int breakdown_threads = -1; +static int reload_breakdown_threads = -1; +// This is normally the same as above, but key_update only requires 1 +static int cmd_breakdown_threads = -1; // cpu count to breakdown thread ratio #define BREAKDOWN_RATIO 3 -static int reload_breakdown_count = 0; -static int cmd_breakdown_count = 0; -/* Lock for access to *breakdown_count - * Any change to/from 0 will update breakdown_using_data */ +// Flags to notify thread changes +int reload_breakdown_threads_delta = 0; +int cmd_breakdown_threads_delta = 0; + +// Lock used to determine when the last breakdown thread exits static cklock_t breakdown_lock; static int replier_count = 0; @@ -3719,23 +3722,63 @@ nogood: return CMD_REPLY; } +struct breaker_setup { + bool reload; + int thread; +}; + +#define ISRELOAD 0 +#define ISCMD 1 + static void *breaker(void *arg) { + static pthread_t breaker_pt[2][THREAD_LIMIT]; + static struct breaker_setup breaker_setup[2][THREAD_LIMIT]; + static bool breaker_running[2][THREAD_LIMIT]; + static bool reload0 = false; + static bool cmd0 = false; + + struct breaker_setup *setup; K_ITEM *bq_item = NULL; BREAKQUEUE *bq = NULL; MSGLINE *msgline = NULL; char buf[128]; - int thr, zeros; - bool reload, was_null, msg = false; + bool reload, was_null, msg; int queue_sleep, queue_limit, count; uint64_t processed = 0; ts_t when, when_add; - int ret; + int i, typ, mythread, done, tot, ret; + int breaker_delta = 0; - pthread_detach(pthread_self()); + setup = (struct breaker_setup *)(arg); + mythread = setup->thread; + if ((reload = setup->reload)) + typ = ISRELOAD; + else + typ = ISCMD; + + if (mythread == 0) { + pthread_detach(pthread_self()); + + for (i = 0; i < THREAD_LIMIT; i++) { + breaker_setup[typ][i].thread = i; + breaker_setup[typ][i].reload = reload; + breaker_running[typ][i] = false; + } + breaker_running[typ][0] = true; + + if (reload) { + reload0 = true; + breaker_delta = reload_breakdown_threads - 1; + } else { + cmd0 = true; + breaker_delta = cmd_breakdown_threads - 1; + } + + LOGNOTICE("%s() %s initialised - delta %d", + __func__, reload ? "reload" : "cmd", breaker_delta); + } - // Is this a reload thread or a cmd thread? - reload = *(bool *)(arg); if (reload) { queue_limit = reload_queue_limit; queue_sleep = RELOAD_QUEUE_SLEEP_MS; @@ -3748,37 +3791,22 @@ static void *breaker(void *arg) when_add.tv_nsec = (CMD_QUEUE_SLEEP_MS % 1000) * 1000000; } - ck_wlock(&breakdown_lock); - if (reload) - thr = ++reload_breakdown_count; - else - thr = ++cmd_breakdown_count; - breakdown_using_data = true; - ck_wunlock(&breakdown_lock); - - if (breakdown_threads < 10) - zeros = 1; - else - zeros = (int)log10(breakdown_threads) + 1; - - snprintf(buf, sizeof(buf), "db_%c%0*d%s", - reload ? 'r' : 'c', zeros, thr, __func__); + snprintf(buf, sizeof(buf), "db_%c%02d%s", + reload ? 'r' : 'c', mythread, __func__); LOCK_INIT(buf); rename_proc(buf); - LOGNOTICE("%s() %s %s starting", - __func__, buf, reload ? "reload" : "cmd"); + LOGNOTICE("%s() %s starting", __func__, buf); if (reload) { /* reload has to wait for the reload to start, however, also * check for startup_complete in case we miss the reload */ while (!everyone_die && !reloading && !startup_complete) cksleep_ms(queue_sleep); - - LOGNOTICE("%s() %s reload processing", __func__, buf); - } + LOGNOTICE("%s() %s processing", __func__, buf); + // The first one to start K_WLOCK(breakqueue_free); if (reload) { @@ -3790,31 +3818,112 @@ static void *breaker(void *arg) } K_WUNLOCK(breakqueue_free); while (!everyone_die) { - K_WLOCK(breakqueue_free); - bq_item = NULL; - was_null = false; - if (reload) - count = reload_done_breakqueue_store->count; - else - count = cmd_done_breakqueue_store->count; + if (mythread && !breaker_running[typ][mythread]) + break; - // Don't unlink if we are above the limit - if (count <= queue_limit) { - if (reload) - bq_item = k_unlink_head(reload_breakqueue_store); - else - bq_item = k_unlink_head(cmd_breakqueue_store); - if (!bq_item) - was_null = true; - } - if (bq_item) { + K_WLOCK(breakqueue_free); + if (mythread == 0 && reload && reload_breakdown_threads_delta != 0) { + breaker_delta = reload_breakdown_threads_delta; + reload_breakdown_threads_delta = 0; + } else if (mythread == 0 && !reload && cmd_breakdown_threads_delta != 0) { + breaker_delta = cmd_breakdown_threads_delta; + cmd_breakdown_threads_delta = 0; + } else { + bq_item = NULL; + was_null = false; if (reload) - break_reload_processed++; + count = reload_done_breakqueue_store->count; else - break_cmd_processed++; + count = cmd_done_breakqueue_store->count; + + // Don't unlink if we are above the limit + if (count <= queue_limit) { + if (reload) + bq_item = k_unlink_head(reload_breakqueue_store); + else + bq_item = k_unlink_head(cmd_breakqueue_store); + if (!bq_item) + was_null = true; + } + if (bq_item) { + if (reload) + break_reload_processed++; + else + break_cmd_processed++; + } } K_WUNLOCK(breakqueue_free); + // TODO: deal with thread creation/shutdown failure + if (breaker_delta != 0) { + if (breaker_delta > 0) { + // Add threads + tot = 1; + done = 0; + for (i = 1; i < THREAD_LIMIT; i++) { + if (!breaker_running[typ][i]) { + if (breaker_delta > 0) { + breaker_delta--; + breaker_running[typ][i] = true; + create_pthread(&(breaker_pt[typ][i]), + breaker, + &(breaker_setup[typ][i])); + done++; + tot++; + } + } else + tot++; + } + LOGWARNING("%s() created %d %s thread%s total=%d" +#if LOCK_CHECK + " next_thread_id=%d" +#endif + , __func__, + done, + reload ? "reload" : "cmd", + (done == 1) ? EMPTY : "s", + tot +#if LOCK_CHECK + , next_thread_id +#endif + ); + } else { + // Notify and wait for each to exit + tot = 1; + done = 0; + for (i = THREAD_LIMIT - 1; i > 0; i--) { + if (breaker_running[typ][i]) { + if (breaker_delta < 0) { + breaker_delta++; + LOGNOTICE("%s() %s stopping %d", + __func__, + reload ? "reload" : "cmd", + i); + breaker_running[typ][i] = false; + join_pthread(breaker_pt[typ][i]); + done++; + } else + tot++; + } + } + LOGWARNING("%s() stopped %d %s thread%s total=%d" +#if LOCK_CHECK + " next_thread_id=%d" +#endif + , __func__, + done, + reload ? "reload" : "cmd", + (done == 1) ? EMPTY : "s", + tot +#if LOCK_CHECK + , next_thread_id +#endif + ); + } + breaker_delta = 0; + continue; + } + if (!bq_item) { // Is the queue empty and the reload completed? if (was_null && reload && !reloading) @@ -3913,23 +4022,37 @@ static void *breaker(void *arg) count = max_sockd_count; K_RUNLOCK(breakqueue_free); - ck_wlock(&breakdown_lock); - if (reload) { - reload_breakdown_count--; - // The last one to finish - updated each exit - setnow(&break_reload_fin); - } else - cmd_breakdown_count--; + if (mythread == 0) { + for (i = 1; i < THREAD_LIMIT; i++) { + if (breaker_running[typ][i]) { + breaker_running[typ][i] = false; + LOGNOTICE("%s() %s waiting for %d", + __func__, buf, i); + join_pthread(breaker_pt[typ][i]); + } + } - if ((reload_breakdown_count + cmd_breakdown_count) < 1) { - breakdown_using_data = false; - msg = true; - } - ck_wunlock(&breakdown_lock); + if (reload) + setnow(&break_reload_fin); + + msg = false; + ck_wlock(&breakdown_lock); + if (reload) + reload0 = false; + else + cmd0 = false; - if (msg) { - LOGWARNING("%s() threads shut down - max_sockd_count=%d", - __func__, count); + if (reload0 == false && cmd0 == false) { + breakdown_using_data = false; + msg = true; + } + ck_wunlock(&breakdown_lock); + + if (msg) { + LOGWARNING("%s() threads shut down - " + "max_sockd_count=%d", + __func__, count); + } } return NULL; @@ -6124,6 +6247,9 @@ static void *process_reload(__maybe_unused void *arg) now = time(NULL); while (!everyone_die) { + if (mythread && !running[mythread]) + break; + K_WLOCK(breakqueue_free); if (mythread == 0 && queue_threads_delta != 0) { threads_delta = queue_threads_delta; @@ -6137,9 +6263,6 @@ static void *process_reload(__maybe_unused void *arg) } K_WUNLOCK(breakqueue_free); - if (!running[mythread]) - break; - // TODO: deal with thread creation/shutdown failure if (threads_delta != 0) { if (threads_delta > 0) { @@ -6174,11 +6297,12 @@ static void *process_reload(__maybe_unused void *arg) // Notify and wait for each to exit tot = 1; done = 0; - i = THREAD_LIMIT - 1; for (i = THREAD_LIMIT - 1; i > 0; i--) { if (running[i]) { if (threads_delta < 0) { threads_delta++; + LOGNOTICE("%s() stopping %d", + __func__, i); running[i] = false; join_pthread(procrel_pt[i]); done++; @@ -6240,11 +6364,13 @@ static void *process_reload(__maybe_unused void *arg) PQfinish(conn); - // Only when everyone_die is true if (mythread == 0) { for (i = 1; i < THREAD_LIMIT; i++) { - if (running[i]) + if (running[i]) { + running[i] = false; + LOGNOTICE("%s() waiting for %d", __func__, i); join_pthread(procrel_pt[i]); + } } LOGNOTICE("%s() exiting, processed %"PRIu64, @@ -6883,12 +7009,13 @@ static void *listener(void *arg) pthread_t sock_pt; pthread_t summ_pt; pthread_t mark_pt; - pthread_t break_pt; + pthread_t reload_break_pt; + pthread_t cmd_break_pt; int bq, bqp, bqd, wq0count, wqcount; char ooo_buf[256]; char buf[128]; - int cpus, i; - bool reloader, cmder; + int cpus; + struct breaker_setup reloader, cmder; pthread_detach(pthread_self()); @@ -6911,20 +7038,25 @@ static void *listener(void *arg) DLPRIO(logqueue, 94); DLPRIO(breakqueue, PRIO_TERMINAL); #endif - if (breakdown_threads <= 0) { + if (reload_breakdown_threads <= 0) { cpus = sysconf(_SC_NPROCESSORS_ONLN); if (cpus < 1) cpus = 1; - breakdown_threads = (int)(cpus / BREAKDOWN_RATIO) ? : 1; + reload_breakdown_threads = (int)(cpus / BREAKDOWN_RATIO) ? : 1; } + cmd_breakdown_threads = reload_breakdown_threads; LOGWARNING("%s(): creating %d*2 breaker threads ...", - __func__, breakdown_threads); - reloader = true; - for (i = 0; i < breakdown_threads; i++) - create_pthread(&break_pt, breaker, &reloader); - cmder = false; - for (i = 0; i < breakdown_threads; i++) - create_pthread(&break_pt, breaker, &cmder); + __func__, reload_breakdown_threads); + + breakdown_using_data = true; + + reloader.reload = true; + reloader.thread = 0; + create_pthread(&reload_break_pt, breaker, &reloader); + + cmder.reload = false; + cmder.thread = 0; + create_pthread(&cmd_break_pt, breaker, &cmder); if (no_data_log == false) create_pthread(&log_pt, logger, NULL); @@ -7274,10 +7406,12 @@ static void update_keysummary(ckpool_t *ckp) int64_t markerid_stt, markerid_fin; char *tmp, *minus; tv_t db_stt, db_fin; - pthread_t break_pt, sock_pt; + pthread_t reload_break_pt; + pthread_t cmd_break_pt; + pthread_t sock_pt; double min, sec; - bool reloader, cmder; - int cpus, i; + int cpus; + struct breaker_setup reloader, cmder; // Simple value check to abort early if (!key_range || !(*key_range)) { @@ -7338,21 +7472,26 @@ static void update_keysummary(ckpool_t *ckp) #if LOCK_CHECK DLPRIO(breakqueue, PRIO_TERMINAL); #endif - if (breakdown_threads <= 0) { + if (reload_breakdown_threads <= 0) { cpus = sysconf(_SC_NPROCESSORS_ONLN); if (cpus < 1) cpus = 1; - breakdown_threads = (int)(cpus / BREAKDOWN_RATIO) ? : 1; + reload_breakdown_threads = (int)(cpus / BREAKDOWN_RATIO) ? : 1; } - LOGWARNING("%s(): creating %d+1 breaker threads ...", - __func__, breakdown_threads); - reloader = true; - for (i = 0; i < breakdown_threads; i++) - create_pthread(&break_pt, breaker, &reloader); - cmder = false; // Only needs one - for (i = 0; i < 1; i++) - create_pthread(&break_pt, breaker, &cmder); + cmd_breakdown_threads = 1; + LOGWARNING("%s(): creating %d+1 breaker threads ...", + __func__, reload_breakdown_threads); + + breakdown_using_data = true; + + reloader.reload = true; + reloader.thread = 0; + create_pthread(&reload_break_pt, breaker, &reloader); + + cmder.reload = false; + cmder.thread = 0; + create_pthread(&cmd_break_pt, breaker, &cmder); alloc_storage(); @@ -8039,7 +8178,7 @@ int main(int argc, char **argv) memset(&ckp, 0, sizeof(ckp)); ckp.loglevel = LOG_NOTICE; - while ((c = getopt_long(argc, argv, "a:b:B:c:d:D:ghi:IkK:l:L:mM:n:p:P:q:Q:r:R:s:S:t:Tu:U:vw:yY:", long_options, &i)) != -1) { + while ((c = getopt_long(argc, argv, "a:b:B:c:d:D:f:ghi:IkK:l:L:mM:n:p:P:q:Q:r:R:s:S:t:Tu:U:vw:yY:", long_options, &i)) != -1) { switch(c) { case '?': case ':': @@ -8073,7 +8212,7 @@ int main(int argc, char **argv) "- must be >0 and <=%d", bt, THREAD_LIMIT); } - breakdown_threads = bt; + reload_breakdown_threads = bt; } break; case 'c': @@ -8237,12 +8376,12 @@ int main(int argc, char **argv) case 'S': btc_server = strdup(optarg); break; - case 'T': - txn_tree_store = false; - break; case 't': btc_timeout = atoi(optarg); break; + case 'T': + txn_tree_store = false; + break; case 'u': db_user = strdup(optarg); kill = optarg; diff --git a/src/ckdb.h b/src/ckdb.h index ca156cf9..a96f7d9f 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.402" +#define CKDB_VERSION DB_VERSION"-2.403" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ @@ -124,6 +124,9 @@ extern enum free_modes free_mode; * Set/checked under the function's main loop's first lock * This is always a 'delta' value meaning add or subtract that many */ extern int queue_threads_delta; +/* Flags to notify thread changes */ +extern int reload_breakdown_threads_delta; +extern int cmd_breakdown_threads_delta; #define BLANK " " extern char *EMPTY; diff --git a/src/ckdb_cmd.c b/src/ckdb_cmd.c index a0f432be..1082560e 100644 --- a/src/ckdb_cmd.c +++ b/src/ckdb_cmd.c @@ -8402,6 +8402,22 @@ static char *cmd_threads(__maybe_unused PGconn *conn, char *cmd, char *id, K_WUNLOCK(breakqueue_free); snprintf(reply, siz, "ok.delta %d request sent", delta_value); return strdup(reply); + } else if (strcasecmp(name, "rb") == 0 || + strcasecmp(name, "reload_breaker") == 0) { + K_WLOCK(breakqueue_free); + // Just overwrite whatever's there + reload_breakdown_threads_delta = delta_value; + K_WUNLOCK(breakqueue_free); + snprintf(reply, siz, "ok.delta %d request sent", delta_value); + return strdup(reply); + } else if (strcasecmp(name, "cb") == 0 || + strcasecmp(name, "cmd_breaker") == 0) { + K_WLOCK(breakqueue_free); + // Just overwrite whatever's there + cmd_breakdown_threads_delta = delta_value; + K_WUNLOCK(breakqueue_free); + snprintf(reply, siz, "ok.delta %d request sent", delta_value); + return strdup(reply); } else { snprintf(reply, siz, "unknown name '%s'", name); LOGERR("%s() %s.%s", __func__, id, reply); From 920070f07ce96660d8360f0ecca25818776bb877 Mon Sep 17 00:00:00 2001 From: kanoi Date: Fri, 19 Aug 2016 00:37:36 +1000 Subject: [PATCH 05/23] ckdb - add missing null checks in workinfo_age --- src/ckdb.c | 4 ++-- src/ckdb.h | 6 +++--- src/ckdb_data.c | 28 ++++++++++++++++++---------- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index 712fe132..ccb994a6 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -117,7 +117,7 @@ static bool blistener_using_data; static bool breakdown_using_data; static bool replier_using_data; -/* Flag to notify thread changes +/* To notify thread changes * Set/checked under the function's main loop's first lock * This is always a 'delta' value meaning add or subtract that many */ int queue_threads_delta = 0; @@ -131,7 +131,7 @@ static int reload_breakdown_threads = -1; static int cmd_breakdown_threads = -1; // cpu count to breakdown thread ratio #define BREAKDOWN_RATIO 3 -// Flags to notify thread changes +// To notify thread changes int reload_breakdown_threads_delta = 0; int cmd_breakdown_threads_delta = 0; diff --git a/src/ckdb.h b/src/ckdb.h index a96f7d9f..8cec4bc3 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.403" +#define CKDB_VERSION DB_VERSION"-2.404" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ @@ -120,11 +120,11 @@ extern enum free_modes free_mode; // Define the array size for thread data #define THREAD_LIMIT 99 -/* Flag to notify thread changes +/* To notify thread changes * Set/checked under the function's main loop's first lock * This is always a 'delta' value meaning add or subtract that many */ extern int queue_threads_delta; -/* Flags to notify thread changes */ +// To notify thread changes extern int reload_breakdown_threads_delta; extern int cmd_breakdown_threads_delta; diff --git a/src/ckdb_data.c b/src/ckdb_data.c index 20c6d88e..c5d79ce4 100644 --- a/src/ckdb_data.c +++ b/src/ckdb_data.c @@ -2288,9 +2288,11 @@ bool workinfo_age(int64_t workinfoid, char *poolinstance, tv_t *cd, ss_look.data = (void *)(&looksharesummary); K_RLOCK(sharesummary_free); ss_item = find_after_in_ktree(sharesummary_workinfoid_root, &ss_look, ss_ctx); - DATA_SHARESUMMARY_NULL(sharesummary, ss_item); - // complete could change, the id fields wont be changed/removed yet - STRNCPY(complete, sharesummary->complete); + if (ss_item) { + DATA_SHARESUMMARY(sharesummary, ss_item); + // complete could change, the id fields wont be changed/removed yet + STRNCPY(complete, sharesummary->complete); + } K_RUNLOCK(sharesummary_free); while (ss_item && sharesummary->workinfoid == workinfoid) { ss_tot++; @@ -2342,8 +2344,10 @@ bool workinfo_age(int64_t workinfoid, char *poolinstance, tv_t *cd, K_RLOCK(sharesummary_free); ss_item = next_in_ktree(ss_ctx); - DATA_SHARESUMMARY_NULL(sharesummary, ss_item); - STRNCPY(complete, sharesummary->complete); + if (ss_item) { + DATA_SHARESUMMARY(sharesummary, ss_item); + STRNCPY(complete, sharesummary->complete); + } K_RUNLOCK(sharesummary_free); } @@ -2375,9 +2379,11 @@ skip_ss: ks_look.data = (void *)(&lookkeysharesummary); K_RLOCK(keysharesummary_free); ks_item = find_after_in_ktree(keysharesummary_root, &ks_look, ks_ctx); - DATA_KEYSHARESUMMARY_NULL(keysharesummary, ks_item); - // complete could change, the id fields wont be changed/removed yet - STRNCPY(complete, keysharesummary->complete); + if (ks_item) { + DATA_KEYSHARESUMMARY(keysharesummary, ks_item); + // complete could change, the id fields wont be changed/removed yet + STRNCPY(complete, keysharesummary->complete); + } K_RUNLOCK(keysharesummary_free); while (ks_item && keysharesummary->workinfoid == workinfoid) { ks_tot++; @@ -2416,8 +2422,10 @@ skip_ss: K_RLOCK(keysharesummary_free); ks_item = next_in_ktree(ks_ctx); - DATA_KEYSHARESUMMARY_NULL(keysharesummary, ks_item); - STRNCPY(complete, keysharesummary->complete); + if (ks_item) { + DATA_KEYSHARESUMMARY(keysharesummary, ks_item); + STRNCPY(complete, keysharesummary->complete); + } K_RUNLOCK(keysharesummary_free); } From 4b302af97fabdd7892f84118ccc85e2e4d9ccc3d Mon Sep 17 00:00:00 2001 From: kanoi Date: Fri, 19 Aug 2016 11:38:26 +1000 Subject: [PATCH 06/23] ckdb - pqproc() thread management --- src/ckdb.c | 200 ++++++++++++++++++++++++++++++++++++------------- src/ckdb.h | 5 +- src/ckdb_cmd.c | 10 ++- 3 files changed, 161 insertions(+), 54 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index ccb994a6..a24a51eb 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -120,11 +120,13 @@ static bool replier_using_data; /* To notify thread changes * Set/checked under the function's main loop's first lock * This is always a 'delta' value meaning add or subtract that many */ -int queue_threads_delta = 0; -/* Use -Q to set it higher - * Setting it higher can degrade performance if the CPUs can't +int reload_queue_threads_delta = 0; +int proc_queue_threads_delta = 0; +/* Use -Q to set them higher + * Setting them higher can degrade performance if the CPUs can't * handle the extra locking or the threads are swapping */ -static int queue_threads = 1; +static int reload_queue_threads = 1; +static int proc_queue_threads = 1; // -B to override calculated value static int reload_breakdown_threads = -1; // This is normally the same as above, but key_update only requires 1 @@ -6231,7 +6233,7 @@ static void *process_reload(__maybe_unused void *arg) running[0] = true; // Set to create the rest of the threads - queue_threads_delta = queue_threads - 1; + reload_queue_threads_delta = reload_queue_threads - 1; LOGNOTICE("%s() starting", __func__); } @@ -6251,9 +6253,9 @@ static void *process_reload(__maybe_unused void *arg) break; K_WLOCK(breakqueue_free); - if (mythread == 0 && queue_threads_delta != 0) { - threads_delta = queue_threads_delta; - queue_threads_delta = 0; + if (mythread == 0 && reload_queue_threads_delta != 0) { + threads_delta = reload_queue_threads_delta; + reload_queue_threads_delta = 0; } else { bq_item = k_unlink_head(reload_done_breakqueue_store); if (bq_item) { @@ -6362,7 +6364,8 @@ static void *process_reload(__maybe_unused void *arg) tick(); } - PQfinish(conn); + if (conn) + PQfinish(conn); if (mythread == 0) { for (i = 1; i < THREAD_LIMIT; i++) { @@ -6831,12 +6834,15 @@ static void free_lost(SEQDATA *seqdata) static void *pqproc(void *arg) { + static pthread_t pqproc_pt[THREAD_LIMIT]; + static int n[THREAD_LIMIT]; + static bool running[THREAD_LIMIT]; + /* Process queued work - ensure pool0 is emptied first, * even if there is pending pool0 data being processed by breaker() */ static bool pool0 = true; static tv_t wq_stt, wq_fin; - pthread_t *queue_pt; PGconn *conn = NULL; K_ITEM *wq_item; time_t now = 0; @@ -6847,28 +6853,26 @@ static void *pqproc(void *arg) SEQSET *seqset = NULL; SEQDATA *seqdata; K_ITEM *ss_item; - int i, *n, zeros; ts_t when, when_add; - int ret; + int i, mythread, threads_delta = 0, done, tot, ret; if (!arg) { setnow(&wq_stt); - n = malloc(queue_threads * sizeof(int)); - queue_pt = malloc(queue_threads * sizeof(*queue_pt)); - for (i = 1; i < queue_threads; i++) { + for (i = 0; i < THREAD_LIMIT; i++) { n[i] = i; - create_pthread(&(queue_pt[i]), pqproc, &(n[i])); + running[i] = false; } - } else { - i = *(int *)(arg); - if (queue_threads < 10) - zeros = 1; - else - zeros = (int)log10(queue_threads) + 1; + mythread = 0; + running[0] = true; - snprintf(buf, sizeof(buf), "db_p%0*dqproc", zeros, i); + // Set to create the rest of the threads + proc_queue_threads_delta = proc_queue_threads - 1; + } else { + mythread = *(int *)(arg); + + snprintf(buf, sizeof(buf), "db_p%02dqproc", mythread); LOCK_INIT(buf); rename_proc(buf); } @@ -6883,34 +6887,104 @@ static void *pqproc(void *arg) // Override checking until pool0 is complete wqcount = -1; while (!everyone_die) { + if (mythread && !running[mythread]) + break; + wq_item = NULL; K_WLOCK(workqueue_free); - if (pool0) { - if (earlysock_left == 0) { - pool0 = false; - switch_msg = true; - } else { - wq_item = k_unlink_head(pool0_workqueue_store); - if (wq_item) - earlysock_left--; + if (mythread == 0 && proc_queue_threads_delta != 0) { + threads_delta = proc_queue_threads_delta; + proc_queue_threads_delta = 0; + } else { + if (pool0) { + if (earlysock_left == 0) { + pool0 = false; + switch_msg = true; + } else { + wq_item = k_unlink_head(pool0_workqueue_store); + if (wq_item) + earlysock_left--; + } + } + if (!pool0) { + wq_item = k_unlink_head(pool_workqueue_store); + wqcount = pool_workqueue_store->count; } - } - if (!pool0) { - wq_item = k_unlink_head(pool_workqueue_store); - wqcount = pool_workqueue_store->count; - } - if (wqcount == 0 && wq_stt.tv_sec != 0L) - setnow(&wq_fin); + if (wqcount == 0 && wq_stt.tv_sec != 0L) + setnow(&wq_fin); - if (wq_item) { - if (pool0) - workqueue_proc0++; - else - workqueue_proc1++; + if (wq_item) { + if (pool0) + workqueue_proc0++; + else + workqueue_proc1++; + } } K_WUNLOCK(workqueue_free); + // TODO: deal with thread creation/shutdown failure + if (threads_delta != 0) { + if (threads_delta > 0) { + // Add threads + tot = 1; + done = 0; + for (i = 1; i < THREAD_LIMIT; i++) { + if (!running[i]) { + if (threads_delta > 0) { + threads_delta--; + running[i] = true; + create_pthread(&(pqproc_pt[i]), + pqproc, + &(n[i])); + done++; + tot++; + } + } else + tot++; + } + LOGWARNING("%s() created %d thread%s total=%d" +#if LOCK_CHECK + " next_thread_id=%d" +#endif + , __func__, done, + (done == 1) ? EMPTY : "s", tot +#if LOCK_CHECK + , next_thread_id +#endif + ); + } else { + // Notify and wait for each to exit + tot = 1; + done = 0; + for (i = THREAD_LIMIT - 1; i > 0; i--) { + if (running[i]) { + if (threads_delta < 0) { + threads_delta++; + LOGNOTICE("%s() stopping %d", + __func__, i); + running[i] = false; + join_pthread(pqproc_pt[i]); + done++; + } else + tot++; + } + } + LOGWARNING("%s() stopped %d thread%s total=%d " +#if LOCK_CHECK + " next_thread_id=%d" +#endif + , __func__, done, + (done == 1) ? EMPTY : "s", tot +#if LOCK_CHECK + , next_thread_id +#endif + ); + } + threads_delta = 0; + continue; + } + if (switch_msg) { switch_msg = false; LOGNOTICE("%s() pool0 complete, processed %"PRIu64, @@ -6995,9 +7069,14 @@ static void *pqproc(void *arg) if (conn) PQfinish(conn); - if (!arg) { - for (i = 1; i < queue_threads; i++) - join_pthread(queue_pt[i]); + if (mythread == 0) { + for (i = 1; i < THREAD_LIMIT; i++) { + if (running[i]) { + running[i] = false; + LOGNOTICE("%s() waiting for %d", __func__, i); + join_pthread(pqproc_pt[i]); + } + } } return NULL; @@ -7019,7 +7098,7 @@ static void *listener(void *arg) pthread_detach(pthread_self()); - snprintf(buf, sizeof(buf), "db_p0qproc"); + snprintf(buf, sizeof(buf), "db_p00qproc"); LOCK_INIT(buf); rename_proc(buf); @@ -8354,14 +8433,33 @@ int main(int argc, char **argv) break; case 'Q': { - int qt = atoi(optarg); + // N for both or N:M + char *q = strdup(optarg); + char *colon = strchr(q, ':'); + int qt; + if (colon) + *(colon++) = '\0'; + qt = atoi(q); if (qt < 1 || qt > THREAD_LIMIT) { quit(1, "Invalid queue " - "thread count %d " + "thread count '%s' " "- must be >0 and <=%d", - qt, THREAD_LIMIT); + colon ? q : optarg, + THREAD_LIMIT); + } + reload_queue_threads = qt; + if (!colon) + proc_queue_threads = qt; + else { + qt = atoi(colon); + if (qt < 1 || qt > THREAD_LIMIT) { + quit(1, "Invalid 2nd queue " + "thread count '%s' " + "- must be >0 and <=%d", + colon, THREAD_LIMIT); + } + proc_queue_threads = qt; } - queue_threads = qt; } break; case 'r': diff --git a/src/ckdb.h b/src/ckdb.h index 8cec4bc3..2c8d8b02 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.404" +#define CKDB_VERSION DB_VERSION"-2.405" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ @@ -123,7 +123,8 @@ extern enum free_modes free_mode; /* To notify thread changes * Set/checked under the function's main loop's first lock * This is always a 'delta' value meaning add or subtract that many */ -extern int queue_threads_delta; +extern int reload_queue_threads_delta; +extern int proc_queue_threads_delta; // To notify thread changes extern int reload_breakdown_threads_delta; extern int cmd_breakdown_threads_delta; diff --git a/src/ckdb_cmd.c b/src/ckdb_cmd.c index 1082560e..61ef1fff 100644 --- a/src/ckdb_cmd.c +++ b/src/ckdb_cmd.c @@ -8398,10 +8398,18 @@ static char *cmd_threads(__maybe_unused PGconn *conn, char *cmd, char *id, strcasecmp(name, "process_reload") == 0) { K_WLOCK(breakqueue_free); // Just overwrite whatever's there - queue_threads_delta = delta_value; + reload_queue_threads_delta = delta_value; K_WUNLOCK(breakqueue_free); snprintf(reply, siz, "ok.delta %d request sent", delta_value); return strdup(reply); + } else if (strcasecmp(name, "pq") == 0 || + strcasecmp(name, "pqproc") == 0) { + K_WLOCK(workqueue_free); + // Just overwrite whatever's there + proc_queue_threads_delta = delta_value; + K_WUNLOCK(workqueue_free); + snprintf(reply, siz, "ok.delta %d request sent", delta_value); + return strdup(reply); } else if (strcasecmp(name, "rb") == 0 || strcasecmp(name, "reload_breaker") == 0) { K_WLOCK(breakqueue_free); From 649461ff0142416139c48db74eb8f8b258a4d561 Mon Sep 17 00:00:00 2001 From: kanoi Date: Fri, 19 Aug 2016 12:33:25 +1000 Subject: [PATCH 07/23] ckdb - add a simple socketer timeout for quiet pools :) --- src/ckdb.c | 25 +++++++++++++++++++++++-- src/ckdb.h | 2 +- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index a24a51eb..2412e174 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -5940,8 +5940,9 @@ static void *socketer(void *arg) char *end, *buf = NULL; K_ITEM *bq_item = NULL; BREAKQUEUE *bq = NULL; - int sockd; - tv_t now, nowacc, now1, now2; + int ret, sockd; + fd_set rfds; + tv_t now, nowacc, now1, now2, tmo; pthread_detach(pthread_self()); @@ -5972,9 +5973,29 @@ static void *socketer(void *arg) create_pthread(&proc_pt, process_socket, arg); } + ret = 0; setnow(&sock_stt); while (!everyone_die) { setnow(&now1); + while (!everyone_die) { + FD_ZERO(&rfds); + FD_SET(us->sockd, &rfds); + tmo.tv_sec = 1; + tmo.tv_usec = 0; + ret = select(us->sockd + 1, &rfds, NULL, NULL, &tmo); + if (ret > 0) + break; + if (ret < 0) { + int e = errno; + LOGERR("%s() Failed to select on socket (%d:%s)", + __func__, e, strerror(e)); + break; + } + } + // Timeout exit on no input (or error) + if (everyone_die || ret < 0) + break; + sockd = accept(us->sockd, NULL, NULL); if (sockd < 0) { int e = errno; diff --git a/src/ckdb.h b/src/ckdb.h index 2c8d8b02..c2e3d54e 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.405" +#define CKDB_VERSION DB_VERSION"-2.406" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ From 1268d4c3ab3a2f6f8a36194f2c83fc942f7d7d12 Mon Sep 17 00:00:00 2001 From: kanoi Date: Fri, 19 Aug 2016 13:13:33 +1000 Subject: [PATCH 08/23] ckdb - add dbcode to all thread names --- src/ckdb.c | 70 ++++++++++++++++++++++++++++++++---------------------- src/ckdb.h | 2 +- 2 files changed, 43 insertions(+), 29 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index 2412e174..18845864 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -3793,8 +3793,8 @@ static void *breaker(void *arg) when_add.tv_nsec = (CMD_QUEUE_SLEEP_MS % 1000) * 1000000; } - snprintf(buf, sizeof(buf), "db_%c%02d%s", - reload ? 'r' : 'c', mythread, __func__); + snprintf(buf, sizeof(buf), "db%s_%c%02d%s", + dbcode, reload ? 'r' : 'c', mythread, __func__); LOCK_INIT(buf); rename_proc(buf); @@ -4371,12 +4371,14 @@ static void summarise_blocks() static void *summariser(__maybe_unused void *arg) { bool orphan_check = false; + char buf[128]; int i; pthread_detach(pthread_self()); - LOCK_INIT("db_summariser"); - rename_proc("db_summariser"); + snprintf(buf, sizeof(buf), "db%s_%s", dbcode, __func__); + LOCK_INIT(buf); + rename_proc(buf); /* Don't do any summarisation until the reload queue completes coz: * 1) It locks/accesses a lot of data - workinfo/markersummary that @@ -4961,12 +4963,14 @@ static void make_a_workmarker() static void *marker(__maybe_unused void *arg) { + char buf[128]; int i; pthread_detach(pthread_self()); - LOCK_INIT("db_marker"); - rename_proc("db_marker"); + snprintf(buf, sizeof(buf), "db%s_%s", dbcode, __func__); + LOCK_INIT(buf); + rename_proc(buf); /* We want this to start during the CCL reload so that if we run a * large reload and it fails at some point, the next reload will not @@ -5038,7 +5042,7 @@ static void *logger(__maybe_unused void *arg) pthread_detach(pthread_self()); - snprintf(buf, sizeof(buf), "db%s_logger", dbcode); + snprintf(buf, sizeof(buf), "db%s_%s", dbcode, __func__); LOCK_INIT(buf); rename_proc(buf); @@ -5156,7 +5160,7 @@ static void *replier(void *arg) break; } - snprintf(buf, sizeof(buf), "db_%c%s", typ, __func__); + snprintf(buf, sizeof(buf), "db%s_%c%s", dbcode, typ, __func__); LOCK_INIT(buf); rename_proc(buf); @@ -5334,20 +5338,22 @@ static void *clistener(__maybe_unused void *arg) PGconn *conn = NULL; K_ITEM *wq_item; tv_t now1, now2; + char buf[128]; time_t now; ts_t when, when_add; int ret; pthread_detach(pthread_self()); - when_add.tv_sec = CMD_QUEUE_SLEEP_MS / 1000; - when_add.tv_nsec = (CMD_QUEUE_SLEEP_MS % 1000) * 1000000; - - LOCK_INIT("db_clistener"); - rename_proc("db_clistener"); + snprintf(buf, sizeof(buf), "db%s_%s", dbcode, __func__); + LOCK_INIT(buf); + rename_proc(buf); LOGNOTICE("%s() processing", __func__); + when_add.tv_sec = CMD_QUEUE_SLEEP_MS / 1000; + when_add.tv_nsec = (CMD_QUEUE_SLEEP_MS % 1000) * 1000000; + clistener_using_data = true; conn = dbconnect(); @@ -5401,20 +5407,22 @@ static void *blistener(__maybe_unused void *arg) PGconn *conn = NULL; K_ITEM *wq_item; tv_t now1, now2; + char buf[128]; time_t now; ts_t when, when_add; int ret; pthread_detach(pthread_self()); - when_add.tv_sec = CMD_QUEUE_SLEEP_MS / 1000; - when_add.tv_nsec = (CMD_QUEUE_SLEEP_MS % 1000) * 1000000; - - LOCK_INIT("db_blistener"); - rename_proc("db_blistener"); + snprintf(buf, sizeof(buf), "db%s_%s", dbcode, __func__); + LOCK_INIT(buf); + rename_proc(buf); LOGNOTICE("%s() processing", __func__); + when_add.tv_sec = CMD_QUEUE_SLEEP_MS / 1000; + when_add.tv_nsec = (CMD_QUEUE_SLEEP_MS % 1000) * 1000000; + blistener_using_data = true; now = time(NULL); @@ -5474,16 +5482,18 @@ static void *process_socket(void *arg) char reply[1024+1]; char *ans = NULL, *rep = NULL, *tmp, *st; size_t siz; + char buf[128]; ts_t when, when_add; pthread_detach(pthread_self()); + snprintf(buf, sizeof(buf), "db%s_procsock", dbcode); + LOCK_INIT(buf); + rename_proc(buf); + when_add.tv_sec = CMD_QUEUE_SLEEP_MS / 1000; when_add.tv_nsec = (CMD_QUEUE_SLEEP_MS % 1000) * 1000000; - LOCK_INIT("db_procsock"); - rename_proc("db_procsock"); - want_first = true; while (!everyone_die) { K_WLOCK(breakqueue_free); @@ -5942,12 +5952,14 @@ static void *socketer(void *arg) BREAKQUEUE *bq = NULL; int ret, sockd; fd_set rfds; + char nbuf[128]; tv_t now, nowacc, now1, now2, tmo; pthread_detach(pthread_self()); - LOCK_INIT("db_socketer"); - rename_proc("db_socketer"); + snprintf(nbuf, sizeof(nbuf), "db%s_%s", dbcode, __func__); + LOCK_INIT(nbuf); + rename_proc(nbuf); while (!everyone_die && !db_users_complete) cksem_mswait(&socketer_sem, 420); @@ -6259,7 +6271,7 @@ static void *process_reload(__maybe_unused void *arg) LOGNOTICE("%s() starting", __func__); } - snprintf(buf, sizeof(buf), "db_p%02drload", mythread); + snprintf(buf, sizeof(buf), "db%s_p%02drload", dbcode, mythread); LOCK_INIT(buf); rename_proc(buf); @@ -6893,7 +6905,7 @@ static void *pqproc(void *arg) } else { mythread = *(int *)(arg); - snprintf(buf, sizeof(buf), "db_p%02dqproc", mythread); + snprintf(buf, sizeof(buf), "db%s_p%02dqproc", dbcode, mythread); LOCK_INIT(buf); rename_proc(buf); } @@ -7119,7 +7131,7 @@ static void *listener(void *arg) pthread_detach(pthread_self()); - snprintf(buf, sizeof(buf), "db_p00qproc"); + snprintf(buf, sizeof(buf), "db%s_p00qproc", dbcode); LOCK_INIT(buf); rename_proc(buf); @@ -7318,10 +7330,12 @@ static bool make_keysummaries() static void *keymarker(__maybe_unused void *arg) { pthread_detach(pthread_self()); + char buf[128]; bool ok = true; - LOCK_INIT("db_keymarker"); - rename_proc("db_keymarker"); + snprintf(buf, sizeof(buf), "db%s_%s", dbcode, __func__); + LOCK_INIT(buf); + rename_proc(buf); if (!everyone_die) { LOGWARNING("%s() Start key processing...", __func__); diff --git a/src/ckdb.h b/src/ckdb.h index c2e3d54e..4e70198f 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.406" +#define CKDB_VERSION DB_VERSION"-2.407" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ From 798ad0cc2846a236d4a5782ea632625a76d5623c Mon Sep 17 00:00:00 2001 From: kanoi Date: Fri, 19 Aug 2016 17:51:33 +1000 Subject: [PATCH 09/23] ckdb - remove useless compile warning --- src/ckdb.c | 8 ++++---- src/ckdb.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index 18845864..e3e8eb25 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -3824,6 +3824,8 @@ static void *breaker(void *arg) break; K_WLOCK(breakqueue_free); + bq_item = NULL; + was_null = false; if (mythread == 0 && reload && reload_breakdown_threads_delta != 0) { breaker_delta = reload_breakdown_threads_delta; reload_breakdown_threads_delta = 0; @@ -3831,8 +3833,6 @@ static void *breaker(void *arg) breaker_delta = cmd_breakdown_threads_delta; cmd_breakdown_threads_delta = 0; } else { - bq_item = NULL; - was_null = false; if (reload) count = reload_done_breakqueue_store->count; else @@ -6345,7 +6345,7 @@ static void *process_reload(__maybe_unused void *arg) tot++; } } - LOGWARNING("%s() stopped %d thread%s total=%d " + LOGWARNING("%s() stopped %d thread%s total=%d" #if LOCK_CHECK " next_thread_id=%d" #endif @@ -7003,7 +7003,7 @@ static void *pqproc(void *arg) tot++; } } - LOGWARNING("%s() stopped %d thread%s total=%d " + LOGWARNING("%s() stopped %d thread%s total=%d" #if LOCK_CHECK " next_thread_id=%d" #endif diff --git a/src/ckdb.h b/src/ckdb.h index 4e70198f..872d139c 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.407" +#define CKDB_VERSION DB_VERSION"-2.408" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ From cb13b08eea5eb478c9fff7132c3ef1047c086578 Mon Sep 17 00:00:00 2001 From: kanoi Date: Fri, 19 Aug 2016 23:00:54 +1000 Subject: [PATCH 10/23] ckdb - missing lock on users access --- src/ckdb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ckdb.h b/src/ckdb.h index 872d139c..b5028120 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.408" +#define CKDB_VERSION DB_VERSION"-2.409" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ From 96b6a9fc6b98ead07237f278be9055cd28fe9cf3 Mon Sep 17 00:00:00 2001 From: kanoi Date: Fri, 19 Aug 2016 23:02:49 +1000 Subject: [PATCH 11/23] ckdb - missing lock on users access - part 2 :) --- src/ckdb_dbio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ckdb_dbio.c b/src/ckdb_dbio.c index ec866d05..393a8b2e 100644 --- a/src/ckdb_dbio.c +++ b/src/ckdb_dbio.c @@ -649,7 +649,9 @@ K_ITEM *users_add(PGconn *conn, char *username, char *emailaddress, * and thus throw away any differences in the 2nd */ K_WLOCK(users_db_free); + K_RLOCK(users_free); item = find_users(username); + K_RUNLOCK(users_free); if (item) { ok = true; goto already; From a25111dfb80e0c6fc1ba0dfe5b77f65a54ad626a Mon Sep 17 00:00:00 2001 From: kanoi Date: Sun, 21 Aug 2016 01:55:24 +1000 Subject: [PATCH 12/23] ckdb - display trans found msgs as ranges --- src/ckdb.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++-- src/ckdb.h | 12 +++- src/ckdb_data.c | 27 +++++++++ 3 files changed, 185 insertions(+), 7 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index e3e8eb25..578bb942 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -503,6 +503,8 @@ K_LIST *seqset_free; static K_STORE *seqset_store; // Initialised when seqset_free is allocated static char *seqnam[SEQ_MAX]; +// Lock access to the static found data in update_seq() +static cklock_t seq_found_lock; // Full lock for access to sequence processing data #define SEQLOCK() K_WLOCK(seqset_free); @@ -2604,6 +2606,62 @@ static void seq_reloadmax() SEQUNLOCK(); } +/* Local structure for update_seq() to remember trans found as ranges + * It's in use if last.tv_sec != 0 */ +typedef struct seqfound { + tv_t last; + int set; + uint64_t seqstt; + uint64_t seqpid; + // seq range + uint64_t seq1, seq2; + // cd range + tv_t cd1, cd2; + bool forced_msg; +} SEQFOUND; + +static void msgs_seq(SEQFOUND *found_msgs) +{ + char t_buf[DATE_BUFSIZ], t_buf2[DATE_BUFSIZ]; + char r_buf[64], t_buf3[DATE_BUFSIZ], c_buf[64]; + int i; + + for (i = 0; i < SEQ_MAX; i++) { + if (found_msgs[i].last.tv_sec != 0) { + btu64_to_buf(&(found_msgs[i].seqstt), t_buf, + sizeof(t_buf)); + bt_to_buf(&(found_msgs[i].cd1.tv_sec), t_buf2, + sizeof(t_buf2)); + if (found_msgs[i].seq2 == found_msgs[i].seq1) { + r_buf[0] = '\0'; + c_buf[0] = '\0'; + } else { + snprintf(r_buf, sizeof(r_buf), + "-%"PRIu64, + found_msgs[i].seq2); + snprintf(c_buf, sizeof(c_buf), + " (%"PRIu64")", + found_msgs[i].seq2 + 1 - + found_msgs[i].seq1); + } + if (found_msgs[i].cd1.tv_sec == found_msgs[i].cd2.tv_sec) + t_buf3[0] = '\0'; + else { + ms_to_buf(&(found_msgs[i].cd2.tv_sec), + t_buf3, sizeof(t_buf3)); + } + LOGWARNING("Seq found trans %s %"PRIu64"%s%s" + " set:%d/%"PRIu64"=%s/%"PRIu64 + " %s%s%s", + seqnam[i], found_msgs[i].seq1, r_buf, c_buf, + found_msgs[i].set, + found_msgs[i].seqstt, t_buf, + found_msgs[i].seqpid, t_buf2, + t_buf3[0] ? ".." : EMPTY, t_buf3); + } + } +} + /* Most of the extra message logic in here is to avoid putting too many * messages or incorrect messages on the console when errors occur * It wont lose msglines from the reload or the queue, since if there is any @@ -2616,11 +2674,15 @@ static bool update_seq(enum seq_num seq, uint64_t n_seqcmd, char *nam, tv_t *now, tv_t *cd, char *code, int seqentryflags, char *msg) { + static SEQFOUND found[SEQ_MAX]; + // flag to avoid always accessing the lock since trans is extremely rare + static bool has_found; + char t_buf[DATE_BUFSIZ], t_buf2[DATE_BUFSIZ], *st = NULL; bool firstseq, newseq, expseq, gothigh, okhi, gotstale, gotstalestart; SEQSET *seqset = NULL, *seqset0 = NULL, seqset_pre = { 0 }; SEQSET seqset_exp = { 0 }, seqset_copy = { 0 }; - bool dup, wastrans, doitem, dotime, gotrecover; + bool dup, wastrans, doitem, dotime, gotrecover, used; SEQDATA *seqdata; SEQENTRY *seqentry, seqentry_copy, *u_entry; K_ITEM *seqset_item = NULL, *st_item = NULL, *stl_item = NULL; @@ -2630,6 +2692,8 @@ static bool update_seq(enum seq_num seq, uint64_t n_seqcmd, uint64_t u; int set = -1, expset = -1, highlimit, i; K_STORE *lost = NULL; + SEQFOUND found_msgs[SEQ_MAX]; + tv_t found_now; LOGDEBUG("%s() SQ %c:%d/%s/%"PRIu64"/%"PRIu64"/%"PRIu64"/%s '%.80s...", __func__, SECHR(seqentryflags), seq, nam, n_seqcmd, n_seqstt, @@ -3111,12 +3175,90 @@ setitemdata: FREENULL(st); } - if (wastrans || gotrecover) { + if (wastrans) { + for (i = 0; i < SEQ_MAX; i++) + found_msgs[i].last.tv_sec = 0; + used = false; + setnow(&found_now); + ck_wlock(&seq_found_lock); + if (found[seq].last.tv_sec != 0) { + // Can we append it? + if (found[seq].seq2 == (n_seqcmd - 1) && + found[seq].set == set && + found[seq].seqstt == n_seqstt && + found[seq].seqpid == n_seqpid) { + found[seq].seq2++; + if (tv_newer(cd, &(found[seq].cd1))) + copy_tv(&(found[seq].cd1), cd); + if (tv_newer(&(found[seq].cd2), cd)) + copy_tv(&(found[seq].cd2), cd); + used = true; + } else { + // No, so force display it + found[seq].forced_msg = true; + } + } + // Check if there are any ranges >= 2s old (or forced) + for (i = 0; i < SEQ_MAX; i++) { + if (found[i].forced_msg || (found[i].last.tv_sec != 0 && + tvdiff(&found_now, &(found[i].last)) >= 2.0)) { + memcpy(&(found_msgs[i]), &(found[i]), + sizeof(SEQFOUND)); + // will be displayed, so erase it + found[i].last.tv_sec = 0; + found[i].forced_msg = false; + } + } + // Store it - found[seq] will (now) be unused + if (!used) { + copy_tv(&(found[seq].last), &found_now); + found[seq].set = set; + found[seq].seqstt = n_seqstt; + found[seq].seqpid = n_seqpid; + found[seq].seq1 = found[seq].seq2 = n_seqcmd; + copy_tv(&(found[seq].cd1), cd); + copy_tv(&(found[seq].cd2), cd); + } + has_found = false; + for (i = 0; i < SEQ_MAX; i++) { + if (found[i].last.tv_sec != 0) + has_found = true; + } + ck_wunlock(&seq_found_lock); + msgs_seq(found_msgs); + } else { + if (has_found) { + for (i = 0; i < SEQ_MAX; i++) + found_msgs[i].last.tv_sec = 0; + ck_wlock(&seq_found_lock); + if (has_found) { + for (i = 0; i < SEQ_MAX; i++) { + if (found[i].last.tv_sec != 0 && + tvdiff(&found_now, &(found[i].last)) >= 2.0) { + memcpy(&(found_msgs[i]), + &(found[i]), + sizeof(SEQFOUND)); + // will be displayed, so erase it + found[i].last.tv_sec = 0; + found[i].forced_msg = false; + } + } + has_found = false; + for (i = 0; i < SEQ_MAX; i++) { + if (found[i].last.tv_sec != 0) + has_found = true; + } + } + ck_wunlock(&seq_found_lock); + msgs_seq(found_msgs); + } + } + + if (gotrecover) { btu64_to_buf(&n_seqstt, t_buf, sizeof(t_buf)); bt_to_buf(&(cd->tv_sec), t_buf2, sizeof(t_buf2)); - LOGWARNING("%s %s %"PRIu64" set:%d/%"PRIu64"=%s/%"PRIu64 - " %s/%s", - gotrecover ? "SEQ recovered" : "Seq found trans", + LOGWARNING("SEQ recovered %s %"PRIu64" set:%d/%"PRIu64 + "=%s/%"PRIu64" %s/%s", nam, n_seqcmd, set, n_seqstt, t_buf, n_seqpid, t_buf2, code); } @@ -8647,6 +8789,7 @@ int main(int argc, char **argv) cklock_init(&last_lock); cklock_init(&btc_lock); cklock_init(&poolinstance_lock); + cklock_init(&seq_found_lock); mutex_init(&bq_reload_waitlock); mutex_init(&bq_cmd_waitlock); diff --git a/src/ckdb.h b/src/ckdb.h index b5028120..83117ffe 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.409" +#define CKDB_VERSION DB_VERSION"-2.410" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ @@ -279,7 +279,9 @@ enum data_type { TYPE_BLOB, TYPE_DOUBLE, TYPE_T, - TYPE_BT + TYPE_BT, + TYPE_HMS, + TYPE_MS }; // BLOB does what PTR needs @@ -3037,6 +3039,8 @@ extern char *_data_to_buf(enum data_type typ, void *data, char *buf, size_t siz, #define t_to_buf(_data, _buf, _siz) _t_to_buf(_data, _buf, _siz, WHERE_FFL_HERE) #define bt_to_buf(_data, _buf, _siz) _bt_to_buf(_data, _buf, _siz, WHERE_FFL_HERE) #define btu64_to_buf(_data, _buf, _siz) _btu64_to_buf(_data, _buf, _siz, WHERE_FFL_HERE) +#define hms_to_buf(_data, _buf, _siz) _hms_to_buf(_data, _buf, _siz, WHERE_FFL_HERE) +#define ms_to_buf(_data, _buf, _siz) _ms_to_buf(_data, _buf, _siz, WHERE_FFL_HERE) extern char *_str_to_buf(char data[], char *buf, size_t siz, WHERE_FFL_ARGS); extern char *_bigint_to_buf(int64_t data, char *buf, size_t siz, WHERE_FFL_ARGS); @@ -3059,6 +3063,10 @@ extern char *_t_to_buf(time_t *data, char *buf, size_t siz, WHERE_FFL_ARGS); // Convert seconds (only) time to (brief) M-DD/HH:MM:SS extern char *_bt_to_buf(time_t *data, char *buf, size_t siz, WHERE_FFL_ARGS); extern char *_btu64_to_buf(uint64_t *data, char *buf, size_t siz, WHERE_FFL_ARGS); +// Convert tv to HH:MM:SS +extern char *_hms_to_buf(time_t *data, char *buf, size_t siz, WHERE_FFL_ARGS); +// Convert tv to MM:SS +extern char *_ms_to_buf(time_t *data, char *buf, size_t siz, WHERE_FFL_ARGS); extern char *_transfer_data(K_ITEM *item, WHERE_FFL_ARGS); extern void dsp_transfer(K_ITEM *item, FILE *stream); diff --git a/src/ckdb_data.c b/src/ckdb_data.c index c5d79ce4..52c2fad8 100644 --- a/src/ckdb_data.c +++ b/src/ckdb_data.c @@ -511,6 +511,8 @@ char *_data_to_buf(enum data_type typ, void *data, char *buf, size_t siz, WHERE_ case TYPE_BTV: case TYPE_T: case TYPE_BT: + case TYPE_HMS: + case TYPE_MS: siz = DATE_BUFSIZ; break; case TYPE_CTV: @@ -597,6 +599,19 @@ char *_data_to_buf(enum data_type typ, void *data, char *buf, size_t siz, WHERE_ tm.tm_min, tm.tm_sec); break; + case TYPE_HMS: + gmtime_r((time_t *)data, &tm); + snprintf(buf, siz, "%02d:%02d:%02d", + tm.tm_hour, + tm.tm_min, + tm.tm_sec); + break; + case TYPE_MS: + gmtime_r((time_t *)data, &tm); + snprintf(buf, siz, "%02d:%02d", + tm.tm_min, + tm.tm_sec); + break; } return buf; @@ -674,6 +689,18 @@ char *_btu64_to_buf(uint64_t *data, char *buf, size_t siz, WHERE_FFL_ARGS) return _data_to_buf(TYPE_BT, (void *)&t, buf, siz, WHERE_FFL_PASS); } +// Convert to HH:MM:SS +char *_hms_to_buf(time_t *data, char *buf, size_t siz, WHERE_FFL_ARGS) +{ + return _data_to_buf(TYPE_HMS, (void *)data, buf, siz, WHERE_FFL_PASS); +} + +// Convert to MM:SS +char *_ms_to_buf(time_t *data, char *buf, size_t siz, WHERE_FFL_ARGS) +{ + return _data_to_buf(TYPE_MS, (void *)data, buf, siz, WHERE_FFL_PASS); +} + // For mutiple variable function calls that need the data char *_transfer_data(K_ITEM *item, WHERE_FFL_ARGS) { From 146142ff2c6bcdd85fe52b5160739b5c0bc75f2f Mon Sep 17 00:00:00 2001 From: kanoi Date: Sun, 21 Aug 2016 02:15:28 +1000 Subject: [PATCH 13/23] ckdb - missing time initialisation --- src/ckdb.c | 1 + src/ckdb.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ckdb.c b/src/ckdb.c index 578bb942..c8f3c9f0 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -3232,6 +3232,7 @@ setitemdata: found_msgs[i].last.tv_sec = 0; ck_wlock(&seq_found_lock); if (has_found) { + setnow(&found_now); for (i = 0; i < SEQ_MAX; i++) { if (found[i].last.tv_sec != 0 && tvdiff(&found_now, &(found[i].last)) >= 2.0) { diff --git a/src/ckdb.h b/src/ckdb.h index 83117ffe..628bcb5f 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.410" +#define CKDB_VERSION DB_VERSION"-2.411" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ From 1c42be20fffe726204a5886d451a5dded498bdc2 Mon Sep 17 00:00:00 2001 From: kanoi Date: Sun, 21 Aug 2016 03:28:07 +1000 Subject: [PATCH 14/23] ckdb - add a file processing time ratio to the reload messages --- src/ckdb.c | 13 ++++++++----- src/ckdb.h | 6 +++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index c8f3c9f0..68496fd2 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -6730,7 +6730,7 @@ static bool reload_from(tv_t *start, const tv_t *finish) char *filename = NULL; uint64_t count, total; tv_t now, begin, file_begin, file_end; - double diff; + double diff, ratio; FILE *fp = NULL; int file_N_limit; time_t tick_time, tmp_time; @@ -6815,12 +6815,14 @@ static bool reload_from(tv_t *start, const tv_t *finish) diff = tvdiff(&file_end, &file_begin); if (diff == 0) diff = 1; + ratio = (double)ROLL_S / diff; - LOGWARNING("%s(): %sread %"PRIu64" line%s %.2f/s from %s", + LOGWARNING("%s(): %sread %"PRIu64" line%s %.2f/s (%.1fx)" + " from %s", __func__, everyone_die ? "Terminate, aborting - " : "", count, count == 1 ? "" : "s", (count / diff), - filename); + ratio, filename); total += count; if (apipe) { pclose(fp); @@ -6905,14 +6907,15 @@ static bool reload_from(tv_t *start, const tv_t *finish) diff = tvdiff(&now, &begin); if (diff == 0) diff = 1; + ratio = (double)(processing * ROLL_S) / diff; snprintf(reload_buf, MAX_READ, "reload.%s.%"PRIu64, run, total); LOGQUE(reload_buf, true); LOGQUE(reload_buf, false); - LOGWARNING("%s(): read %d file%s, total %"PRIu64" line%s %.2f/s", + LOGWARNING("%s(): read %d file%s, total %"PRIu64" line%s %.2f/s (%.1fx)", __func__, processing, processing == 1 ? "" : "s", - total, total == 1 ? "" : "s", (total / diff)); + total, total == 1 ? "" : "s", (total / diff), ratio); if (everyone_die) return true; diff --git a/src/ckdb.h b/src/ckdb.h index 628bcb5f..37669622 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.411" +#define CKDB_VERSION DB_VERSION"-2.412" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ @@ -3063,9 +3063,9 @@ extern char *_t_to_buf(time_t *data, char *buf, size_t siz, WHERE_FFL_ARGS); // Convert seconds (only) time to (brief) M-DD/HH:MM:SS extern char *_bt_to_buf(time_t *data, char *buf, size_t siz, WHERE_FFL_ARGS); extern char *_btu64_to_buf(uint64_t *data, char *buf, size_t siz, WHERE_FFL_ARGS); -// Convert tv to HH:MM:SS +// Convert to HH:MM:SS extern char *_hms_to_buf(time_t *data, char *buf, size_t siz, WHERE_FFL_ARGS); -// Convert tv to MM:SS +// Convert to MM:SS extern char *_ms_to_buf(time_t *data, char *buf, size_t siz, WHERE_FFL_ARGS); extern char *_transfer_data(K_ITEM *item, WHERE_FFL_ARGS); From 72374c328cf394d6fd18fe84f17d412648307183 Mon Sep 17 00:00:00 2001 From: kanoi Date: Sun, 21 Aug 2016 22:17:22 +1000 Subject: [PATCH 15/23] ckdb - calculate file time lengths for the reload ratio --- src/ckdb.c | 41 +++++++++++++++++++++++++++++++++-------- src/ckdb.h | 2 +- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index 68496fd2..ba09f3b6 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -290,6 +290,8 @@ int64_t confirm_last_workinfoid; #define WORKINFO_AGE 660 static tv_t reload_timestamp; +// CDTRF in the last reloaded record - locked under breakqueue_free +static char last_reload_cd[CDATE_BUFSIZ]; // Shared by threads - accessed under breakqueue_free lock static uint64_t reload_processed = 0; // Shared by threads - accessed under workqueue_free lock @@ -3884,9 +3886,10 @@ static void *breaker(void *arg) static bool cmd0 = false; struct breaker_setup *setup; - K_ITEM *bq_item = NULL; + K_ITEM *bq_item = NULL, *cd_item = NULL; BREAKQUEUE *bq = NULL; MSGLINE *msgline = NULL; + TRANSFER *cd_trf; char buf[128]; bool reload, was_null, msg; int queue_sleep, queue_limit, count; @@ -4137,8 +4140,14 @@ static void *breaker(void *arg) } } } + if (reload) + cd_item = find_transfer(msgline->trf_root, CDTRF); K_WLOCK(breakqueue_free); if (reload) { + if (cd_item) { + DATA_TRANSFER(cd_trf, cd_item); + STRNCPY(last_reload_cd, cd_trf->mvalue); + } k_add_tail(reload_done_breakqueue_store, bq_item); mutex_lock(&process_reload_waitlock); process_reload_signals++; @@ -6729,11 +6738,12 @@ static bool reload_from(tv_t *start, const tv_t *finish) bool finished = false, ret = true, ok, apipe = false; char *filename = NULL; uint64_t count, total; - tv_t now, begin, file_begin, file_end; + tv_t now, begin, file_begin, file_end, last_cd; + time_t last_file = 0; double diff, ratio; FILE *fp = NULL; int file_N_limit; - time_t tick_time, tmp_time; + time_t tick_time, tmp_time, last_sec, tot_sec = 0; reload_buf = malloc(MAX_READ); if (!reload_buf) @@ -6755,6 +6765,7 @@ static bool reload_from(tv_t *start, const tv_t *finish) filename = hour_filename(restorefrom, restorename, reload_timestamp.tv_sec); if (!logopen(&filename, &fp, &apipe)) quithere(1, "Failed to open '%s'", filename); + last_file = reload_timestamp.tv_sec; setnow(&now); copy_tv(&begin, &now); @@ -6815,7 +6826,17 @@ static bool reload_from(tv_t *start, const tv_t *finish) diff = tvdiff(&file_end, &file_begin); if (diff == 0) diff = 1; - ratio = (double)ROLL_S / diff; + // Work out how long the file was + K_RLOCK(breakqueue_free); + txt_to_ctv(CDTRF, last_reload_cd, &last_cd, sizeof(last_cd)); + K_RUNLOCK(breakqueue_free); + last_sec = last_cd.tv_sec - last_file; + if (last_cd.tv_usec) + last_sec++; + if (last_sec < 2 || last_sec > ROLL_S) + last_sec = ROLL_S; + ratio = (double)last_sec / diff; + tot_sec += last_sec; LOGWARNING("%s(): %sread %"PRIu64" line%s %.2f/s (%.1fx)" " from %s", @@ -6854,7 +6875,9 @@ static bool reload_from(tv_t *start, const tv_t *finish) filename = hour_filename(restorefrom, restorename, reload_timestamp.tv_sec); ok = logopen(&filename, &fp, &apipe); - if (!ok) { + if (ok) + last_file = reload_timestamp.tv_sec; + else { missingfirst = strdup(filename); FREENULL(filename); errno = 0; @@ -6874,8 +6897,10 @@ static bool reload_from(tv_t *start, const tv_t *finish) } filename = hour_filename(restorefrom, restorename, reload_timestamp.tv_sec); ok = logopen(&filename, &fp, &apipe); - if (ok) + if (ok) { + last_file = reload_timestamp.tv_sec; break; + } errno = 0; if (missing_count++ > 1) free(missinglast); @@ -6907,7 +6932,7 @@ static bool reload_from(tv_t *start, const tv_t *finish) diff = tvdiff(&now, &begin); if (diff == 0) diff = 1; - ratio = (double)(processing * ROLL_S) / diff; + ratio = (double)tot_sec / diff; snprintf(reload_buf, MAX_READ, "reload.%s.%"PRIu64, run, total); LOGQUE(reload_buf, true); @@ -7466,7 +7491,7 @@ static bool make_keysummaries() &now, NULL); K_WUNLOCK(process_pplns_free); setnow(&proc_lock_fin); - LOGWARNING("%s() pplns lock time %.3fs+%.3fs", + LOGWARNING("%s() pplns lock time %.3f+%.3fs", __func__, tvdiff(&proc_lock_got, &proc_lock_stt), tvdiff(&proc_lock_fin, &proc_lock_got)); diff --git a/src/ckdb.h b/src/ckdb.h index 37669622..936daf1c 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.412" +#define CKDB_VERSION DB_VERSION"-2.413" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ From 29d2612c6d05bcdf087603ff1bd471ec42eaa501 Mon Sep 17 00:00:00 2001 From: kanoi Date: Sun, 21 Aug 2016 23:04:00 +1000 Subject: [PATCH 16/23] ckdb - log the workinfo height for close to block, shares --- src/ckdb.h | 2 +- src/ckdb_dbio.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ckdb.h b/src/ckdb.h index 936daf1c..32b158be 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.413" +#define CKDB_VERSION DB_VERSION"-2.414" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ diff --git a/src/ckdb_dbio.c b/src/ckdb_dbio.c index 393a8b2e..f45c8aab 100644 --- a/src/ckdb_dbio.c +++ b/src/ckdb_dbio.c @@ -3546,9 +3546,9 @@ static bool shares_process(PGconn *conn, SHARES *shares, K_ITEM *wi_item, 100.0 * pool.diffacc / workinfo->diff_target); } - LOGWARNING("%s %s Diff %.1f%% (%.0f/%.1f) %s " - "Pool %.1f%s%s", - block ? "BLOCK!" : "Share", + LOGWARNING("%s (%"PRIu32") %s Diff %.1f%% (%.0f/%.1f) " + "%s Pool %.1f%s%s", + block ? "BLOCK!" : "Share", workinfo->height, (sta == NULL) ? "ok" : sta, 100.0 * shares->sdiff / workinfo->diff_target, shares->sdiff, workinfo->diff_target, From e57d79701469cb75e99e40c57b0db8bd62426340 Mon Sep 17 00:00:00 2001 From: kanoi Date: Mon, 22 Aug 2016 01:38:38 +1000 Subject: [PATCH 17/23] ckdb - add shareinfo to cmd_query and allow cmd_query during key_update --- src/ckdb.c | 1 + src/ckdb.h | 2 +- src/ckdb_cmd.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/ckdb.c b/src/ckdb.c index ba09f3b6..59db2800 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -5687,6 +5687,7 @@ static void *process_socket(void *arg) case CMD_GETATTS: case CMD_THREADS: case CMD_HOMEPAGE: + case CMD_QUERY: break; default: snprintf(reply, sizeof(reply), diff --git a/src/ckdb.h b/src/ckdb.h index 32b158be..383e5d4c 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.414" +#define CKDB_VERSION DB_VERSION"-2.415" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ diff --git a/src/ckdb_cmd.c b/src/ckdb_cmd.c index 61ef1fff..fed39d11 100644 --- a/src/ckdb_cmd.c +++ b/src/ckdb_cmd.c @@ -7665,6 +7665,67 @@ static char *cmd_query(__maybe_unused PGconn *conn, char *cmd, char *id, "Payouts", FLDSEP, "", FLDSEP); APPEND_REALLOC(buf, off, len, tmp); + ok = true; + } else if (strcasecmp(request, "shareinfo") == 0) { + /* return share information for the workinfo with wid>=value + * if wid=0 then find the oldest workinfo that has shares */ + K_ITEM *i_wid, s_look, *s_item; + SHARES lookshares, *shares; + int64_t selwid, wid, s_count = 0, s_diff = 0, s_sdiff = 0; + bool found; + + i_wid = require_name(trf_root, "wid", + 1, (char *)intpatt, + reply, siz); + if (!i_wid) + return strdup(reply); + TXT_TO_BIGINT("wid", transfer_data(i_wid), selwid); + + INIT_SHARES(&s_look); + lookshares.workinfoid = selwid; + lookshares.userid = -1; + lookshares.workername[0] = '\0'; + DATE_ZERO(&(lookshares.createdate)); + s_look.data = (void *)(&lookshares); + found = false; + K_RLOCK(shares_free); + s_item = find_after_in_ktree(shares_root, &s_look, ctx); + if (s_item) { + found = true; + DATA_SHARES(shares, s_item); + wid = shares->workinfoid; + while (s_item) { + DATA_SHARES(shares, s_item); + if (shares->workinfoid != wid) + break; + s_count++; + s_diff += shares->diff; + if (s_sdiff < shares->sdiff) + s_sdiff = shares->sdiff; + s_item = next_in_ktree(ctx); + } + } + K_RUNLOCK(shares_free); + + if (found) { + snprintf(tmp, sizeof(tmp), "selwid=%"PRId64"%c", + selwid, FLDSEP); + APPEND_REALLOC(buf, off, len, tmp); + snprintf(tmp, sizeof(tmp), "wid=%"PRId64"%c", + wid, FLDSEP); + APPEND_REALLOC(buf, off, len, tmp); + snprintf(tmp, sizeof(tmp), "shares=%"PRId64"%c", + s_count, FLDSEP); + APPEND_REALLOC(buf, off, len, tmp); + snprintf(tmp, sizeof(tmp), "diff=%"PRId64"%c", + s_diff, FLDSEP); + APPEND_REALLOC(buf, off, len, tmp); + snprintf(tmp, sizeof(tmp), "maxsdiff=%"PRId64"%c", + s_sdiff, FLDSEP); + APPEND_REALLOC(buf, off, len, tmp); + rows++; + } + ok = true; } else { free(buf); From e6510bba846fefe2c3cbd9b8379f77d94532aa30 Mon Sep 17 00:00:00 2001 From: kanoi Date: Mon, 22 Aug 2016 17:34:00 +1000 Subject: [PATCH 18/23] ckdb - discard shares as soon as they've been summarised --- src/ckdb.h | 2 +- src/ckdb_data.c | 6 ++++++ src/ckdb_dbio.c | 15 ++++++++------- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/ckdb.h b/src/ckdb.h index 383e5d4c..557903ad 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.415" +#define CKDB_VERSION DB_VERSION"-2.416" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ diff --git a/src/ckdb_data.c b/src/ckdb_data.c index 52c2fad8..84cb34f2 100644 --- a/src/ckdb_data.c +++ b/src/ckdb_data.c @@ -2153,6 +2153,11 @@ K_ITEM *next_workinfo(int64_t workinfoid, K_TREE_CTX *ctx) } #define DISCARD_ALL -1 +/* No longer required since we already discard the shares after being added + * to the sharesummary */ +#if 1 +#define discard_shares(...) +#else // userid = DISCARD_ALL will dump all shares for the given workinfoid static void discard_shares(int64_t *shares_tot, int64_t *shares_dumped, int64_t *diff_tot, bool skipupdate, @@ -2238,6 +2243,7 @@ static void discard_shares(int64_t *shares_tot, int64_t *shares_dumped, } } +#endif // Duplicates during a reload are set to not show messages bool workinfo_age(int64_t workinfoid, char *poolinstance, tv_t *cd, diff --git a/src/ckdb_dbio.c b/src/ckdb_dbio.c index f45c8aab..00eca084 100644 --- a/src/ckdb_dbio.c +++ b/src/ckdb_dbio.c @@ -3714,8 +3714,8 @@ keep: early_shares->oldcount, early_shares->redo); FREENULL(st); K_WLOCK(shares_free); - add_to_ktree(shares_root, es_item); - k_add_head(shares_store, es_item); + // Discard it, it's been processed + k_add_head(shares_free, es_item); K_WUNLOCK(shares_free); return; discard: @@ -3752,6 +3752,7 @@ bool shares_add(PGconn *conn, char *workinfoid, char *username, char *workername USERS *users; bool ok = false, dup = false; char *st = NULL; + tv_t share_cd; LOGDEBUG("%s(): %s/%s/%s/%s/%ld,%ld", __func__, @@ -3876,9 +3877,10 @@ bool shares_add(PGconn *conn, char *workinfoid, char *username, char *workername ok = shares_process(conn, shares, wi_item, trf_root); if (ok) { + copy_tv(&share_cd, &(shares->createdate)); K_WLOCK(shares_free); - add_to_ktree(shares_root, s_item); - k_add_head(shares_store, s_item); + // Discard it, it's been processed + k_add_head(shares_free, s_item); if (s2_item) { // Discard duplicates tmp_item = find_in_ktree(shares_db_root, s2_item, ctx); @@ -3905,11 +3907,10 @@ bool shares_add(PGconn *conn, char *workinfoid, char *username, char *workername FREENULL(st); } - shares_process_early(conn, wi_item, &(shares->createdate), - trf_root); + shares_process_early(conn, wi_item, &share_cd, trf_root); // Call both since shareerrors may be rare shareerrors_process_early(conn, shares->workinfoid, - &(shares->createdate), trf_root); + &share_cd, trf_root); // The original share was ok return true; From dbab97f3dafd6cec311d16072fe9d100b961511b Mon Sep 17 00:00:00 2001 From: kanoi Date: Mon, 22 Aug 2016 17:35:37 +1000 Subject: [PATCH 19/23] ckdb - key update: discard sharesummaries since they are not used --- src/ckdb.h | 2 +- src/ckdb_dbio.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/ckdb.h b/src/ckdb.h index 557903ad..57768337 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.416" +#define CKDB_VERSION DB_VERSION"-2.417" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ diff --git a/src/ckdb_dbio.c b/src/ckdb_dbio.c index 00eca084..a9580be1 100644 --- a/src/ckdb_dbio.c +++ b/src/ckdb_dbio.c @@ -4787,6 +4787,39 @@ bool sharesummaries_to_markersummaries(PGconn *conn, WORKMARKERS *workmarkers, dokey: + if (key_update) { + setnow(&add_stt); + + // Discard the sharesummaries + looksharesummary.workinfoid = workmarkers->workinfoidend; + looksharesummary.userid = MAXID; + looksharesummary.workername = EMPTY; + + INIT_SHARESUMMARY(&ss_look); + ss_look.data = (void *)(&looksharesummary); + /* Since shares come in from ckpool at a high rate, + * we don't want to lock sharesummary for long + * Those incoming shares will not be touching the sharesummaries + * we are processing here */ + K_RLOCK(sharesummary_free); + ss_item = find_before_in_ktree(sharesummary_workinfoid_root, + &ss_look, ss_ctx); + K_RUNLOCK(sharesummary_free); + while (ss_item) { + DATA_SHARESUMMARY(sharesummary, ss_item); + if (sharesummary->workinfoid < workmarkers->workinfoidstart) + break; + K_WLOCK(sharesummary_free); + ss_prev = prev_in_ktree(ss_ctx); + k_unlink_item(sharesummary_store, ss_item); + K_WUNLOCK(sharesummary_free); + k_add_head_nolock(old_sharesummary_store, ss_item); + + ss_item = ss_prev; + } + setnow(&add_fin); + } + setnow(&kadd_stt); INIT_KEYSUMMARY(&ks_look); From e9dcdd3bd281828543fa1ac6bd3dfa8f2c4fb569 Mon Sep 17 00:00:00 2001 From: kanoi Date: Tue, 23 Aug 2016 11:17:54 +1000 Subject: [PATCH 20/23] ckdb - use -N to change the listener socket name --- src/ckdb.c | 14 +++++++++++--- src/ckdb.h | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index 59db2800..54e12ec3 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -807,6 +807,7 @@ K_TREE *userinfo_root; K_LIST *userinfo_free; K_STORE *userinfo_store; +static char *listener_base = "listener"; static char logname_db[512]; static char logname_io[512]; static char *dbcode; @@ -8411,6 +8412,8 @@ static struct option long_options[] = { { "marker", no_argument, 0, 'm' }, { "markstart", required_argument, 0, 'M' }, { "name", required_argument, 0, 'n' }, + // base socket name to use instead of 'listener' + { "listener", required_argument, 0, 'N' }, { "dbpass", required_argument, 0, 'p' }, { "btc-pass", required_argument, 0, 'P' }, { "reload-queue-limit", required_argument, 0, 'q' }, @@ -8464,7 +8467,7 @@ int main(int argc, char **argv) memset(&ckp, 0, sizeof(ckp)); ckp.loglevel = LOG_NOTICE; - while ((c = getopt_long(argc, argv, "a:b:B:c:d:D:f:ghi:IkK:l:L:mM:n:p:P:q:Q:r:R:s:S:t:Tu:U:vw:yY:", long_options, &i)) != -1) { + while ((c = getopt_long(argc, argv, "a:b:B:c:d:D:f:ghi:IkK:l:L:mM:n:N:p:P:q:Q:r:R:s:S:t:Tu:U:vw:yY:", long_options, &i)) != -1) { switch(c) { case '?': case ':': @@ -8611,6 +8614,9 @@ int main(int argc, char **argv) case 'n': ckp.name = strdup(optarg); break; + case 'N': + listener_base = strdup(optarg); + break; case 'p': db_pass = strdup(optarg); kill = optarg; @@ -8860,7 +8866,9 @@ int main(int argc, char **argv) } if (key_update) { - ckp.main.sockname = strdup("klistener"); + char buf[64]; + snprintf(buf, sizeof(buf), "k%s", listener_base); + ckp.main.sockname = strdup(buf); write_namepid(&ckp.main); create_process_unixsock(&ckp.main); fcntl(ckp.main.us.sockd, F_SETFD, FD_CLOEXEC); @@ -8872,7 +8880,7 @@ int main(int argc, char **argv) confirm_summaries(); everyone_die = true; } else { - ckp.main.sockname = strdup("listener"); + ckp.main.sockname = strdup(listener_base); write_namepid(&ckp.main); create_process_unixsock(&ckp.main); fcntl(ckp.main.us.sockd, F_SETFD, FD_CLOEXEC); diff --git a/src/ckdb.h b/src/ckdb.h index 57768337..c67b02c9 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.417" +#define CKDB_VERSION DB_VERSION"-2.418" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ From 1858acbc56b9e7adbfc5571e0223d17a821b5797 Mon Sep 17 00:00:00 2001 From: kanoi Date: Tue, 23 Aug 2016 11:41:57 +1000 Subject: [PATCH 21/23] ckdb - change the main processname with -o --- src/ckdb.c | 10 ++++++++-- src/ckdb.h | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index 54e12ec3..8243f507 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -808,6 +808,7 @@ K_LIST *userinfo_free; K_STORE *userinfo_store; static char *listener_base = "listener"; +static char *process_name = "main"; static char logname_db[512]; static char logname_io[512]; static char *dbcode; @@ -8414,6 +8415,8 @@ static struct option long_options[] = { { "name", required_argument, 0, 'n' }, // base socket name to use instead of 'listener' { "listener", required_argument, 0, 'N' }, + // process name to use instead of "main" + { "process", required_argument, 0, 'o' }, { "dbpass", required_argument, 0, 'p' }, { "btc-pass", required_argument, 0, 'P' }, { "reload-queue-limit", required_argument, 0, 'q' }, @@ -8467,7 +8470,7 @@ int main(int argc, char **argv) memset(&ckp, 0, sizeof(ckp)); ckp.loglevel = LOG_NOTICE; - while ((c = getopt_long(argc, argv, "a:b:B:c:d:D:f:ghi:IkK:l:L:mM:n:N:p:P:q:Q:r:R:s:S:t:Tu:U:vw:yY:", long_options, &i)) != -1) { + while ((c = getopt_long(argc, argv, "a:b:B:c:d:D:f:ghi:IkK:l:L:mM:n:N:o:p:P:q:Q:r:R:s:S:t:Tu:U:vw:yY:", long_options, &i)) != -1) { switch(c) { case '?': case ':': @@ -8617,6 +8620,9 @@ int main(int argc, char **argv) case 'N': listener_base = strdup(optarg); break; + case 'o': + process_name = strdup(optarg); + break; case 'p': db_pass = strdup(optarg); kill = optarg; @@ -8818,7 +8824,7 @@ int main(int argc, char **argv) srandom((unsigned int)(now.tv_usec * 4096 + now.tv_sec % 4096)); ckp.main.ckp = &ckp; - ckp.main.processname = strdup("main"); + ckp.main.processname = strdup(process_name); cklock_init(&breakdown_lock); cklock_init(&replier_lock); diff --git a/src/ckdb.h b/src/ckdb.h index c67b02c9..d76dd319 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.418" +#define CKDB_VERSION DB_VERSION"-2.419" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ From ee10a5b339aee79a42445ad0ac4090515a37de7b Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Tue, 23 Aug 2016 22:42:08 +1000 Subject: [PATCH 22/23] Bump version to 0.9.4 --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 2043d020..ba4082cd 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT(ckpool, 0.9.3, kernel@kolivas.org) +AC_INIT(ckpool, 0.9.4, kernel@kolivas.org) AC_CANONICAL_SYSTEM AC_CONFIG_MACRO_DIR([m4]) From 654baf6bf512f475276e9ca9109bca220bc3cef3 Mon Sep 17 00:00:00 2001 From: kanoi Date: Wed, 24 Aug 2016 17:28:11 +1000 Subject: [PATCH 23/23] ckdb - use a separate thread for message logging --- src/ckdb.c | 409 +++++++++++++++++++++++++++++++++++++++++------------ src/ckdb.h | 20 ++- 2 files changed, 340 insertions(+), 89 deletions(-) diff --git a/src/ckdb.c b/src/ckdb.c index 8243f507..52994d67 100644 --- a/src/ckdb.c +++ b/src/ckdb.c @@ -327,6 +327,15 @@ static bool reload_queue_complete = false; bool everyone_die = false; // Set to true every time a store is created static bool seqdata_reload_lost = false; +// Tell the ioqueue thread to exit +static bool ioqueue_die = false; +/* Tell the console ioqueue to clear it's list as fast as possible + * In the case where a console log unexpectedly ends up having a very large + * number of messages, since it limits the number of messages per second + * this could take a long time to clear + * To resolve this, you can disconnect from the console if your IO speed to + * the console is slow, then send it all by sending flush.2 */ +static bool ioqueue_flush = false; /* These are included in cmd_homepage * to help identify when ckpool locks up (or dies) */ @@ -402,6 +411,14 @@ int64_t mismatch_all_workmarkers; int64_t mismatch_all_marks; int64_t mismatch_all_total; +// IOQUEUE +static K_LIST *ioqueue_free; +static K_STORE *ioqueue_store; +static K_STORE *console_ioqueue_store; +// Trigger ioqueue_store processing +static mutex_t f_ioqueue_waitlock; +static pthread_cond_t f_ioqueue_waitcond; + // LOGQUEUE K_LIST *logqueue_free; K_STORE *logqueue_store; @@ -823,6 +840,245 @@ static bool no_data_log = false; * 'restorefrom/' */ static char *logpath; +static void ioprocess(IOQUEUE *io) +{ + char stamp[128], tzinfo[16], tzch; + long minoff, hroff; + struct tm tm; + int ms; + + if (io->when.tv_sec == 0) + stamp[0] = '\0'; + else { + ms = (int)(io->when.tv_usec / 1000); + localtime_r(&(io->when.tv_sec), &tm); + minoff = tm.tm_gmtoff / 60; + if (minoff < 0) { + tzch = '-'; + minoff *= -1; + } else + tzch = '+'; + hroff = minoff / 60; + if (minoff % 60) { + snprintf(tzinfo, sizeof(tzinfo), + "%c%02ld:%02ld", + tzch, hroff, minoff % 60); + } else { + snprintf(tzinfo, sizeof(tzinfo), + "%c%02ld", + tzch, hroff); + } + snprintf(stamp, sizeof(stamp), + "[%d-%02d-%02d %02d:%02d:%02d.%03d%s] ", + tm.tm_year + 1900, + tm.tm_mon + 1, + tm.tm_mday, + tm.tm_hour, + tm.tm_min, + tm.tm_sec, ms, + tzinfo); + } + + if (io->logfd && global_ckp) { + int logfd = global_ckp->logfd; + if (logfd) { + FILE *LOGFP = global_ckp->logfp; + + flock(logfd, LOCK_EX); + if (io->errn) { + fprintf(LOGFP, "%s%s with errno %d: %s\n", + stamp, io->msg, + io->errn, strerror(io->errn)); + } else + fprintf(LOGFP, "%s%s\n", stamp, io->msg); + flock(logfd, LOCK_UN); + } + } + + if (io->logout) { + if (io->errn) { + fprintf(stdout, "%s%s with errno %d: %s%s", + stamp, io->msg, + io->errn, strerror(io->errn), + io->eol ? "\n" : "\r"); + } else { + fprintf(stdout, "%s%s%s", + stamp, io->msg, + io->eol ? "\n" : "\r"); + } + if (io->flush) + fflush(stdout); + } + + if (io->logerr) { + if (io->errn) { + fprintf(stderr, "%s%s with errno %d: %s%s", + stamp, io->msg, + io->errn, strerror(io->errn), + io->eol ? "\n" : "\r"); + } else { + fprintf(stderr, "%s%s%s", + stamp, io->msg, + io->eol ? "\n" : "\r"); + } + if (io->flush) + fflush(stderr); + } +} + +static void *iomsgs(void *consol) +{ + ts_t when, when_add; + K_ITEM *io_item; + IOQUEUE *io; + char buf[64]; + + snprintf(buf, sizeof(buf), + "db%s_%s%s", + dbcode, consol ? "c" : "f", __func__); + LOCK_INIT(buf); + rename_proc(buf); + + when_add.tv_sec = 0; + when_add.tv_nsec = 100000000; // 100ms + + while (0x80085) { + // WARNING taking locks can produce messages ... + K_WLOCK(ioqueue_free); + if (consol) + io_item = k_unlink_head(console_ioqueue_store); + else + io_item = k_unlink_head(ioqueue_store); + K_WUNLOCK(ioqueue_free); + + if (io_item) { + DATA_IOQUEUE(io, io_item); + ioprocess(io); + free(io->msg); + // WARNING taking locks can produce messages ... + K_WLOCK(ioqueue_free); + k_add_head(ioqueue_free, io_item); + K_WUNLOCK(ioqueue_free); + } else { + if (ioqueue_die) + break; + if (consol) { + // The queue is clear + ioqueue_flush = false; + } + } + + if (consol) { + if (!ioqueue_flush) { + // max 50 per second + cksleep_ms(20); + } + } else { + if (!io_item) { + setnowts(&when); + timeraddspec(&when, &when_add); + + mutex_lock(&f_ioqueue_waitlock); + cond_timedwait(&f_ioqueue_waitcond, + &f_ioqueue_waitlock, + &when); + mutex_unlock(&f_ioqueue_waitlock); + } + } + } + + return NULL; +} + +#define io_msg(stamp, msg, errn, logfd, logerr) \ + _io_msg(stamp, msg, false, errn, logfd, false, logerr, true, true, \ + WHERE_FFL_HERE) +#define cr_msg(stamp, msg) \ + _io_msg(stamp, msg, true, 0, false, true, false, false, true, WHERE_FFL_HERE) +#define lf_msg(stamp, msg) \ + _io_msg(stamp, msg, true, 0, false, true, false, true, true, WHERE_FFL_HERE) +#define err_msg(stamp, msg, errn) \ + _io_msg(stamp, msg, true, errn, false, false, true, true, true, WHERE_FFL_HERE) + +static void _io_msg(bool stamp, char *msg, bool alloc, int errn, bool logfd, + bool logout, bool logerr, bool eol, bool flush, + WHERE_FFL_ARGS) +{ + K_ITEM *fio_item = NULL, *cio_item = NULL; + bool msgused = false; + IOQUEUE *io; + tv_t now; + + if (!logfd && !logout && !logerr) { + quitfrom(1, file, func, line, + "%s() called without output", __func__); + } + + if (stamp) + setnow(&now); + else + now.tv_sec = 0; + + // WARNING taking locks can produce messages ... + K_WLOCK(ioqueue_free); + if (logfd) + fio_item = k_unlink_head(ioqueue_free); + if (logout || logerr) + cio_item = k_unlink_head(ioqueue_free); + K_WUNLOCK(ioqueue_free); + + if (logfd) { + DATA_IOQUEUE(io, fio_item); + if (!alloc) { + io->msg = msg; + msgused = true; + } else { + io->msg = strdup(msg); + if (!(io->msg)) + quithere(1, "strdup (%d) OOM", (int)strlen(msg)); + } + copy_tv(&(io->when), &now); + io->errn = errn; + io->logfd = logfd; + io->logout = false; + io->logerr = false; + io->eol = eol; + io->flush = flush; + } + + if (logout || logerr) { + DATA_IOQUEUE(io, cio_item); + if (!alloc && !msgused) + io->msg = msg; + else { + io->msg = strdup(msg); + if (!(io->msg)) + quithere(1, "strdup (%d) OOM", (int)strlen(msg)); + } + copy_tv(&(io->when), &now); + io->errn = errn; + io->logfd = false; + io->logout = logout; + io->logerr = logerr; + io->eol = eol; + io->flush = flush; + } + + // WARNING taking locks can produce messages ... + K_WLOCK(ioqueue_free); + if (fio_item) + k_add_tail(ioqueue_store, fio_item); + if (cio_item) + k_add_tail(console_ioqueue_store, cio_item); + K_WUNLOCK(ioqueue_free); + + if (fio_item) { + mutex_lock(&f_ioqueue_waitlock); + pthread_cond_signal(&f_ioqueue_waitcond); + mutex_unlock(&f_ioqueue_waitlock); + } +} + static void replace_ymd(char *srch, char *match, int val) { char buf[32], *ptr, *found; @@ -914,85 +1170,30 @@ static void log_queue_message(char *msg, bool db) void logmsg(int loglevel, const char *fmt, ...) { - int logfd = 0; char *buf = NULL; - struct tm tm; - tv_t now_tv; - int ms; va_list ap; - char stamp[128]; - char *extra = EMPTY; - char tzinfo[16]; - char tzch; - long minoff, hroff; + int errn; - if (loglevel > global_ckp->loglevel) - return; - - tv_time(&now_tv); - ms = (int)(now_tv.tv_usec / 1000); - localtime_r(&(now_tv.tv_sec), &tm); - minoff = tm.tm_gmtoff / 60; - if (minoff < 0) { - tzch = '-'; - minoff *= -1; - } else - tzch = '+'; - hroff = minoff / 60; - if (minoff % 60) { - snprintf(tzinfo, sizeof(tzinfo), - "%c%02ld:%02ld", - tzch, hroff, minoff % 60); - } else { - snprintf(tzinfo, sizeof(tzinfo), - "%c%02ld", - tzch, hroff); - } - snprintf(stamp, sizeof(stamp), - "[%d-%02d-%02d %02d:%02d:%02d.%03d%s]", - tm.tm_year + 1900, - tm.tm_mon + 1, - tm.tm_mday, - tm.tm_hour, - tm.tm_min, - tm.tm_sec, ms, - tzinfo); + errn = errno; + errno = 0; if (!fmt) { - fprintf(stderr, "%s %s() called without fmt\n", stamp, __func__); + err_msg(true, "logmsg() called without fmt", errn); return; } - if (!global_ckp) - extra = " !!NULL global_ckp!!"; - else - logfd = global_ckp->logfd; + if (loglevel > global_ckp->loglevel) + return; va_start(ap, fmt); VASPRINTF(&buf, fmt, ap); va_end(ap); - if (logfd) { - FILE *LOGFP = global_ckp->logfp; - - flock(logfd, LOCK_EX); - fprintf(LOGFP, "%s %s", stamp, buf); - if (loglevel <= LOG_ERR && errno != 0) - fprintf(LOGFP, " with errno %d: %s", errno, strerror(errno)); - errno = 0; - fprintf(LOGFP, "\n"); - flock(logfd, LOCK_UN); - } - if (loglevel <= LOG_WARNING) { - if (loglevel <= LOG_ERR && errno != 0) { - fprintf(stderr, "%s %s with errno %d: %s%s\n", - stamp, buf, errno, strerror(errno), extra); - errno = 0; - } else - fprintf(stderr, "%s %s%s\n", stamp, buf, extra); - fflush(stderr); - } - free(buf); + // iomsgs() will free buf + if (loglevel <= LOG_ERR) + io_msg(true, buf, errn, true, loglevel <= LOG_WARNING); + else + io_msg(true, buf, 0, true, loglevel <= LOG_WARNING); } void setnowts(ts_t *now) @@ -1230,15 +1431,14 @@ static time_t last_tick; void tick() { time_t now; - char ch; + char ch[2]; now = time(NULL); if (now != last_tick) { last_tick = now; - ch = status_chars[ticks++ & 0x3]; - putchar(ch); - putchar('\r'); - fflush(stdout); + ch[0] = status_chars[ticks++ & 0x3]; + ch[1] = '\0'; + cr_msg(false, ch); } } @@ -3907,6 +4107,11 @@ static void *breaker(void *arg) else typ = ISCMD; + snprintf(buf, sizeof(buf), "db%s_%c%02d%s", + dbcode, reload ? 'r' : 'c', mythread, __func__); + LOCK_INIT(buf); + rename_proc(buf); + if (mythread == 0) { pthread_detach(pthread_self()); @@ -3941,11 +4146,6 @@ static void *breaker(void *arg) when_add.tv_nsec = (CMD_QUEUE_SLEEP_MS % 1000) * 1000000; } - snprintf(buf, sizeof(buf), "db%s_%c%02d%s", - dbcode, reload ? 'r' : 'c', mythread, __func__); - LOCK_INIT(buf); - rename_proc(buf); - LOGNOTICE("%s() %s starting", __func__, buf); if (reload) { @@ -5856,6 +6056,11 @@ static void *process_socket(void *arg) fflush(stderr); if (global_ckp && global_ckp->logfd) fflush(global_ckp->logfp); + if (*(msgline->id)) { + // If you set the flush id to 2 + if(atoi(msgline->id) == 2) + ioqueue_flush = true; + } setnow(&(msgline->processed)); break; case CMD_USERSET: @@ -6409,7 +6614,14 @@ static void *process_reload(__maybe_unused void *arg) if (arg) mythread = *(int *)(arg); - else { + else + mythread = 0; + + snprintf(buf, sizeof(buf), "db%s_p%02drload", dbcode, mythread); + LOCK_INIT(buf); + rename_proc(buf); + + if (!arg) { pthread_detach(pthread_self()); for (i = 0; i < THREAD_LIMIT; i++) { @@ -6417,7 +6629,6 @@ static void *process_reload(__maybe_unused void *arg) running[i] = false; } - mythread = 0; running[0] = true; // Set to create the rest of the threads @@ -6426,10 +6637,6 @@ static void *process_reload(__maybe_unused void *arg) LOGNOTICE("%s() starting", __func__); } - snprintf(buf, sizeof(buf), "db%s_p%02drload", dbcode, mythread); - LOCK_INIT(buf); - rename_proc(buf); - when_add.tv_sec = RELOAD_QUEUE_SLEEP_MS / 1000; when_add.tv_nsec = (RELOAD_QUEUE_SLEEP_MS % 1000) * 1000000; @@ -6734,7 +6941,7 @@ static bool reload_from(tv_t *start, const tv_t *finish) { // proc_pt could exit after this returns static pthread_t proc_pt; - char buf[DATE_BUFSIZ+1], run[DATE_BUFSIZ+1]; + char buf[DATE_BUFSIZ+1], run[DATE_BUFSIZ+1], tickmsg[256]; size_t rflen = strlen(restorefrom); char *missingfirst = NULL, *missinglast = NULL, *st = NULL; int missing_count, processing, counter; @@ -6816,11 +7023,12 @@ static bool reload_from(tv_t *start, const tv_t *finish) poolq = pool_workqueue_store->count; // pool_workqueue_store should be zero K_RUNLOCK(workqueue_free); - printf(TICK_PREFIX"reload %"PRIu64"/%d/%d" - " ckp %d/%d/%d/%d (%d) \r", + snprintf(tickmsg, sizeof(tickmsg), + TICK_PREFIX"reload %"PRIu64"/%d/%d" + " ckp %d/%d/%d/%d (%d) ", total+count, relq, relqd, cmdq, cmdqd, pool0q, poolq, mx); - fflush(stdout); + cr_msg(false, tickmsg); tick_time = tmp_time; } } @@ -8450,6 +8658,7 @@ int main(int argc, char **argv) struct sigaction handler; char *btc_user = "user"; char *btc_pass = "p"; + pthread_t f_iomsgs_pt, c_iomsgs_pt; char buf[512]; ckpool_t ckp; int c, ret, i = 0, j; @@ -8850,6 +9059,23 @@ int main(int argc, char **argv) cond_init(&wq_cmd_waitcond); cond_init(&wq_btc_waitcond); + mutex_init(&f_ioqueue_waitlock); + cond_init(&f_ioqueue_waitcond); + + // Initialise IOQUEUE before anything needs it + ioqueue_free = k_new_list("IOQueue", sizeof(IOQUEUE), + ALLOC_IOQUEUE, LIMIT_IOQUEUE, true); + ioqueue_store = k_new_store(ioqueue_free); + console_ioqueue_store = k_new_store(ioqueue_free); + +#if LOCK_CHECK + DLPRIO(ioqueue, PRIO_TERMINAL); +#endif + + create_pthread(&f_iomsgs_pt, iomsgs, NULL); + bool consol = true; + create_pthread(&c_iomsgs_pt, iomsgs, &consol); + // Emulate a list for lock checking process_pplns_free = k_lock_only_list("ProcessPPLNS"); workers_db_free = k_lock_only_list("WorkersDB"); @@ -8923,7 +9149,8 @@ int main(int argc, char **argv) } if (msg) { trigger = curr; - printf("%s %ds due to%s%s%s%s%s%s%s%s%s\n", + snprintf(buf, sizeof(buf), + "%s %ds due to%s%s%s%s%s%s%s%s%s\n", msg, (int)(curr - start), socketer_using_data ? " socketer" : EMPTY, summariser_using_data ? " summariser" : EMPTY, @@ -8934,13 +9161,19 @@ int main(int argc, char **argv) marker_using_data ? " marker" : EMPTY, breakdown_using_data ? " breakdown" : EMPTY, replier_using_data ? " replier" : EMPTY); - fflush(stdout); + lf_msg(true, buf); } sleep(1); } dealloc_storage(); + ioqueue_die = true; + join_pthread(f_iomsgs_pt); + join_pthread(c_iomsgs_pt); + FREE_STORE(console_ioqueue); + FREE_LISTS(ioqueue); + clean_up(&ckp); return 0; diff --git a/src/ckdb.h b/src/ckdb.h index d76dd319..bf623a9c 100644 --- a/src/ckdb.h +++ b/src/ckdb.h @@ -58,7 +58,7 @@ #define DB_VLOCK "1" #define DB_VERSION "1.0.7" -#define CKDB_VERSION DB_VERSION"-2.419" +#define CKDB_VERSION DB_VERSION"-2.420" #define WHERE_FFL " - from %s %s() line %d" #define WHERE_FFL_HERE __FILE__, __func__, __LINE__ @@ -1115,6 +1115,23 @@ enum cmd_values { (_row)->pointers = (_row)->pointers; \ } while (0) +// IOQUEUE +typedef struct ioqueue { + char *msg; + tv_t when; + int errn; + bool logfd; + bool logout; + bool logerr; + bool eol; + bool flush; +} IOQUEUE; + +#define ALLOC_IOQUEUE 1024 +#define LIMIT_IOQUEUE 0 +#define INIT_IOQUEUE(_item) INIT_GENERIC(_item, ioqueue) +#define DATA_IOQUEUE(_var, _item) DATA_GENERIC(_var, _item, ioqueue, true) + // LOGQUEUE typedef struct logqueue { char *msg; @@ -2945,6 +2962,7 @@ enum reply_type { }; extern void logmsg(int loglevel, const char *fmt, ...); +extern void setnowts(ts_t *now); extern void setnow(tv_t *now); extern void tick(); extern PGconn *dbconnect();