ckdb - startup documentation to be implemented

11 years ago · 2b3f618b63
1 changed files with 61 additions and 8 deletions
--- a/src/ckdb.c
+++ b/src/ckdb.c
@ -72,10 +72,63 @@ static char *status_chars = "|/-\\";
 static char *restorefrom;
-/* Restart data needed
+/* Startup
- * -------------------
+ * -------
- * After the DB load, load "ckpool's ckdb logfile" (CCL), and all
+ * During startup we load the DB and track where it is up to with
- * later CCLs, that contains the oldest date of all of the following:
+ *  dbstatus, we then reload "ckpool's ckdb logfiles" (CCLs) based
 *  on dbstatus
 *TODO:
 * Once the DB is loaded, we can immediately start receiving ckpool
 *  messages since ckpool already has logged all messages to the CLLs
 *  and ckpool only verifies authorise responses
 *  Thus we can queue all messages:
 *	workinfo, shares, shareerror, ageworkinfo, poolstats, userstats
 *	and block
 *  to be processed after the reload completes and just process authorise
 *  messages immediately while the reload runs
 *  This can't cause a duplicate process of an authorise message since a
 *  reload will ignore any messages before the last DB auths message,
 *  however, if ckdb and ckpool get out of sync due to ckpool starting
 *  during the reload (as mentioned below) it is possible for ckdb to
 *  find an authorise message in the CCLs that was processed in the
 *  message queue and thus is already in the DB.
 *  This error would be very rare and also not an issue
 * The first ckpool message also allows us to know where ckpool is up to
 *  in the CCLs and thus where to stop processing the CCLs to stay in
 *  sync with ckpool
 * If ckpool isn't running, then the reload will complete at the end of
 *  the last CCL file, however if a message arrives from ckpool while
 *  processing the CCLs, that will mark the point where to stop processing
 *  but can also produce a fatal error at the end of processing, reporting
 *  the full ckpool message, if the message was not found in the CCL
 *  processing after the message was received
 *  This can be caused by two circumstances:
 *  1) the disk had not yet written it to the CCL when ckdb read EOF and
 *	ckpool was started at about the same time as the reload completed.
 *	This can be seen if the message displayed in the fatal error IS NOT
 *	in ckdb's message logfile. A ckdb restart will resolve this
 *  2) ckpool was started at the time of the end of the reload, but the
 *	authorise message was written to disk and found in the CCL before
 *	it was processed in the message queue. This can be seen if the
 *	message displayed in the fatal error IS in ckdb's message logfile
 *	and means the messages after it in the logfile have already been
 *	processed. Again, a ckdb restart will resolve this
 *  In both the above (very rare) cases, if ckdb was to continue running,
 *  it would break the synchronisation and could cause DB problems, so
 *  ckdb aborting and needing a restart resolves this
 * The users table, required for the authorise messages, is always updated
 *  immediately and is not affected by ckpool messages until we
 *   TODO: allow bitcoin addresses - this will also need to be handled
 *    while filling the queue during reload, once we allow BTC addresses
 * During the reload we can use the userstats createdate as 'now' for
 *  the userstats summarisation process to allow the summarisation to
 *  run during the reload
 */
 /* Reload data needed
 * ------------------
 * After the DB load completes, load "ckpool's ckdb logfile" (CCL), and
 * all later CCLs, that contains the oldest date of all of the following:
 *  RAM shares: oldest DB sharesummary firstshare where complete='n'
 *	All shares before this have been summarised to the DB with
 *	complete='a' (or 'y') and were deleted from RAM
@ -108,7 +161,7 @@ static char *restorefrom;
 *	will be after the last DB workinfo
 *  DB+RAM accountbalance (TODO): resolved by shares/workinfo/blocks
 *  RAM workerstatus: last_auth, last_share, last_stats all handled by
- *	DB load up to whatever the CCL restart point is, and then
+ *	DB load up to whatever the CCL reload point is, and then
 *	corrected with the CCL reload
 *	last_idle will be the last idle userstats in the CCL load or 0
 *	Code currently doesn't use last_idle, so for now this is OK
@ -161,7 +214,7 @@ typedef struct loadstatus {
 } LOADSTATUS;
 static LOADSTATUS dbstatus;
-/* Temporary while doing restart - it (of course) contains the fields
+/* Temporary while doing reload - it (of course) contains the fields
 * required to track the newest userstats per user/worker
 */
 static K_TREE *userstats_db_root;
@ -2878,7 +2931,7 @@ static bool _sharesummary_update(PGconn *conn, SHARES *s_row, SHAREERRORS *e_row
 static double cmp_sharesummary_workinfoid(K_ITEM *a, K_ITEM *b);
 static double cmp_shares(K_ITEM *a, K_ITEM *b);
-/* N.B. a DB check can be done to find sharesummaries that were missed being
+/* N.B. a DB check can be done to find sharesummaries that have missed being
 *  aged (and a possible problem with the aging process):
 *  e.g. for a date D in the past of at least a few hours
 *	select count(*) from sharesummary where createdate<'D' and complete='n';
@ -2886,7 +2939,7 @@ static double cmp_shares(K_ITEM *a, K_ITEM *b);
 *	update sharesummary set complete='a' where createdate<'D' and complete='n';
 * It's important to make sure the D value is far enough in the past such that
 *  all the matching sharesummary records in ckdb have certainly completed
- *  ckdb would need a restart to get the updated DB information though it would
+ *  ckdb would need to restart to get the updated DB information though it would
 *  not affect current ckdb code
 */
 static bool workinfo_age(PGconn *conn, char *workinfoidstr, char *poolinstance,