What replicate task uses: all new files go with "nexttry" 0, rather than relying on "devcount".
CREATE TABLE needs_replicate (
fid INT UNSIGNED NOT NULL PRIMARY KEY,
nexttry INT UNSIGNED NOT NULL,
INDEX (nexttry),
fromdevid INT UNSIGNED, /* NULL means no preference */
failcount TINYINT UNSIGNED
flags? /* one bit means corrupt, choose source where two files are the same md5? reserved for future use. */
);
server_settings:
'fsck_enable' {off,locations,quick,full} location=file_on only, quick=file_on + HEAD requests, full=contents
'fsck_highest_fid_checked' = <fidnumber>
update server_settings SET val=foo WHERE key='highest_fid_inserted' and val < foo;
CREATE TABLE fsck (
fid INT UNSIGNED NOT NULL PRIMARY KEY,
nextcheck INT UNSIGNED NOT NULL,
INDEX (nextcheck),
)
# todo: replicate_now command to make all needs_replicate files nexttry=0;
# todo: fsck, if no copies, still puts in needs_replicate but with high nexttry time and of course no fromdevid.
# how often do recheck files? --never, only when they kick it off.
# todo: replication policy saying "not quite happy" goes into "needs_replicate" table for to-be-fixed-later
# todo: stat command to get count of rows in needs_replicate (!repl replacement)
# provide command to drop fsck table to force global re-indexing
# interface from fsck job to replicate job. or do it ourselves sharing code. (probably sharing code)
# perhaps command-line version is just running:
# mogilefsd --run-only-job=fsck --foreground --verbose
#
while (1) {
# create table if not exist. (the fsck table)
# while (find files to check from fsck table),
# randomize
# check them, inserting into needs_replicate or updating nextcheck time, or deleting if okay.
# once no more files to check....
# find files that are so new they don't have records in fsck
while (max(file) > max(server_settings.fsck._highestfidcheck) && fsck enabled) {
insert into fsck select fid, ? ? ? ? from file where fid > max(fsck) order by fid limit 500
}
}