ckdb - startup documentation to be implemented

11 years ago · 2b3f618b63
1 changed files with 61 additions and 8 deletions
--- a/src/ckdb.c
+++ b/src/ckdb.c
@ -72,10 +72,63 @@ static char *status_chars = "|/-\\";

 static char *restorefrom;

-/* Restart data needed
- * -------------------
- * After the DB load, load "ckpool's ckdb logfile" (CCL), and all
- * later CCLs, that contains the oldest date of all of the following:
+/* Startup
+ * -------
+ * During startup we load the DB and track where it is up to with
+ *  dbstatus, we then reload "ckpool's ckdb logfiles" (CCLs) based
+ *  on dbstatus
+ *TODO:
+ * Once the DB is loaded, we can immediately start receiving ckpool
+ *  messages since ckpool already has logged all messages to the CLLs
+ *  and ckpool only verifies authorise responses
+ *  Thus we can queue all messages:
+ *	workinfo, shares, shareerror, ageworkinfo, poolstats, userstats
+ *	and block
+ *  to be processed after the reload completes and just process authorise
+ *  messages immediately while the reload runs
+ *  This can't cause a duplicate process of an authorise message since a
+ *  reload will ignore any messages before the last DB auths message,
+ *  however, if ckdb and ckpool get out of sync due to ckpool starting
+ *  during the reload (as mentioned below) it is possible for ckdb to
+ *  find an authorise message in the CCLs that was processed in the
+ *  message queue and thus is already in the DB.
+ *  This error would be very rare and also not an issue
+ * The first ckpool message also allows us to know where ckpool is up to
+ *  in the CCLs and thus where to stop processing the CCLs to stay in
+ *  sync with ckpool
+ * If ckpool isn't running, then the reload will complete at the end of
+ *  the last CCL file, however if a message arrives from ckpool while
+ *  processing the CCLs, that will mark the point where to stop processing
+ *  but can also produce a fatal error at the end of processing, reporting
+ *  the full ckpool message, if the message was not found in the CCL
+ *  processing after the message was received
+ *  This can be caused by two circumstances:
+ *  1) the disk had not yet written it to the CCL when ckdb read EOF and
+ *	ckpool was started at about the same time as the reload completed.
+ *	This can be seen if the message displayed in the fatal error IS NOT
+ *	in ckdb's message logfile. A ckdb restart will resolve this
+ *  2) ckpool was started at the time of the end of the reload, but the
+ *	authorise message was written to disk and found in the CCL before
+ *	it was processed in the message queue. This can be seen if the
+ *	message displayed in the fatal error IS in ckdb's message logfile
+ *	and means the messages after it in the logfile have already been
+ *	processed. Again, a ckdb restart will resolve this
+ *  In both the above (very rare) cases, if ckdb was to continue running,
+ *  it would break the synchronisation and could cause DB problems, so
+ *  ckdb aborting and needing a restart resolves this
+ * The users table, required for the authorise messages, is always updated
+ *  immediately and is not affected by ckpool messages until we
+ *   TODO: allow bitcoin addresses - this will also need to be handled
+ *    while filling the queue during reload, once we allow BTC addresses
+ * During the reload we can use the userstats createdate as 'now' for
+ *  the userstats summarisation process to allow the summarisation to
+ *  run during the reload
+ */
+
+/* Reload data needed
+ * ------------------
+ * After the DB load completes, load "ckpool's ckdb logfile" (CCL), and
+ * all later CCLs, that contains the oldest date of all of the following:
 *  RAM shares: oldest DB sharesummary firstshare where complete='n'
 *	All shares before this have been summarised to the DB with
 *	complete='a' (or 'y') and were deleted from RAM
@ -108,7 +161,7 @@ static char *restorefrom;
 *	will be after the last DB workinfo
 *  DB+RAM accountbalance (TODO): resolved by shares/workinfo/blocks
 *  RAM workerstatus: last_auth, last_share, last_stats all handled by
- *	DB load up to whatever the CCL restart point is, and then
+ *	DB load up to whatever the CCL reload point is, and then
 *	corrected with the CCL reload
 *	last_idle will be the last idle userstats in the CCL load or 0
 *	Code currently doesn't use last_idle, so for now this is OK
@ -161,7 +214,7 @@ typedef struct loadstatus {
 } LOADSTATUS;
 static LOADSTATUS dbstatus;

-/* Temporary while doing restart - it (of course) contains the fields
+/* Temporary while doing reload - it (of course) contains the fields
 * required to track the newest userstats per user/worker
 */
 static K_TREE *userstats_db_root;
@ -2878,7 +2931,7 @@ static bool _sharesummary_update(PGconn *conn, SHARES *s_row, SHAREERRORS *e_row
 static double cmp_sharesummary_workinfoid(K_ITEM *a, K_ITEM *b);
 static double cmp_shares(K_ITEM *a, K_ITEM *b);

-/* N.B. a DB check can be done to find sharesummaries that were missed being
+/* N.B. a DB check can be done to find sharesummaries that have missed being
 *  aged (and a possible problem with the aging process):
 *  e.g. for a date D in the past of at least a few hours
 *	select count(*) from sharesummary where createdate<'D' and complete='n';
@ -2886,7 +2939,7 @@ static double cmp_shares(K_ITEM *a, K_ITEM *b);
 *	update sharesummary set complete='a' where createdate<'D' and complete='n';
 * It's important to make sure the D value is far enough in the past such that
 *  all the matching sharesummary records in ckdb have certainly completed
- *  ckdb would need a restart to get the updated DB information though it would
+ *  ckdb would need to restart to get the updated DB information though it would
 *  not affect current ckdb code
 */
 static bool workinfo_age(PGconn *conn, char *workinfoidstr, char *poolinstance,