blob: c0d02e65dfc1253d8c4cb9fd02abc59e80060853 [file] [log] [blame]
/*
* Mirror one arena partition onto another.
* Be careful to copy only new data.
*/
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
Channel *writechan;
typedef struct Write Write;
struct Write
{
uchar *p;
int n;
uvlong o;
int error;
};
Part *src;
Part *dst;
int force;
int verbose;
int dosha1 = 1;
char *status;
uvlong astart, aend;
void
usage(void)
{
fprint(2, "usage: mirrorarenas [-sv] src dst [ranges]\n");
threadexitsall("usage");
}
char *tagged;
char *tagname;
int tagindx;
void
tag(int indx, char *name, char *fmt, ...)
{
va_list arg;
if(tagged){
free(tagged);
tagged = nil;
}
tagindx = indx;
tagname = name;
va_start(arg, fmt);
tagged = vsmprint(fmt, arg);
va_end(arg);
}
enum
{
Sealed = 1,
Mirrored = 2,
Empty = 4,
};
void
setstatus(int bits)
{
static int startindx = -1;
static char *startname, *endname;
static int lastbits;
char buf[100];
if(bits != lastbits) {
if(startindx >= 0) {
switch(lastbits) {
case Sealed:
snprint(buf, sizeof buf, "sealed");
break;
case Mirrored:
snprint(buf, sizeof buf, "mirrored");
break;
case Sealed+Mirrored:
snprint(buf, sizeof buf, "mirrored sealed");
break;
case Empty:
snprint(buf, sizeof buf, "empty");
break;
default:
snprint(buf, sizeof buf, "%d", bits);
break;
}
print("%T %s-%s %s\n", startname, endname, buf);
}
lastbits = bits;
startindx = tagindx;
startname = tagname;
endname = tagname;
} else {
endname = tagname;
}
if(bits < 0) {
startindx = -1;
return;
}
}
void
chat(char *fmt, ...)
{
va_list arg;
setstatus(-1);
if(tagged){
write(1, tagged, strlen(tagged));
free(tagged);
tagged = nil;
}
va_start(arg, fmt);
vfprint(1, fmt, arg);
va_end(arg);
}
#pragma varargck argpos tag 3
#pragma varargck argpos chat 1
int
ereadpart(Part *p, u64int offset, u8int *buf, u32int count)
{
if(readpart(p, offset, buf, count) != count){
chat("%T readpart %s at %#llux+%ud: %r\n", p->name, offset, count);
return -1;
}
return 0;
}
int
ewritepart(Part *p, u64int offset, u8int *buf, u32int count)
{
if(writepart(p, offset, buf, count) != count || flushpart(p) < 0){
chat("%T writepart %s at %#llux+%ud: %r\n", p->name, offset, count);
return -1;
}
return 0;
}
/*
* Extra proc to do writes to dst, so that we can overlap reading
* src with writing dst during copy. This is an easy factor of two
* (almost) in performance.
*/
static Write wsync;
static void
writeproc(void *v)
{
Write *w;
USED(v);
while((w = recvp(writechan)) != nil){
if(w == &wsync)
continue;
if(ewritepart(dst, w->o, w->p, w->n) < 0)
w->error = 1;
}
}
int
copy(uvlong start, uvlong end, char *what, DigestState *ds)
{
int i, n;
uvlong o;
enum {
Chunk = 1024*1024
};
static uchar tmpbuf[2*Chunk+MaxIo];
static uchar *tmp[2];
uchar *p;
Write w[2];
assert(start <= end);
assert(astart <= start && start < aend);
assert(astart <= end && end <= aend);
// align the buffers so readpart/writepart can do big transfers
p = tmpbuf;
if((uintptr)p%MaxIo)
p += MaxIo - (uintptr)p%MaxIo;
tmp[0] = p;
tmp[1] = p + Chunk;
if(verbose && start != end)
chat("%T copy %,llud-%,llud %s\n", start, end, what);
i = 0;
memset(w, 0, sizeof w);
for(o=start; o<end; o+=n){
if(w[i].error)
goto error;
n = Chunk;
if(o+n > end)
n = end - o;
if(ereadpart(src, o, tmp[i], n) < 0)
goto error;
w[i].p = tmp[i];
w[i].o = o;
w[i].n = n;
w[i].error = 0;
sendp(writechan, &w[i]);
if(ds)
sha1(tmp[i], n, nil, ds);
i = 1-i;
}
if(w[i].error)
goto error;
/*
* wait for queued write to finish
*/
sendp(writechan, &wsync);
i = 1-i;
if(w[i].error)
return -1;
return 0;
error:
/*
* sync with write proc
*/
w[i].p = nil;
w[i].o = 0;
w[i].n = 0;
w[i].error = 0;
sendp(writechan, &w[i]);
return -1;
}
/* single-threaded, for reference */
int
copy1(uvlong start, uvlong end, char *what, DigestState *ds)
{
int n;
uvlong o;
static uchar tmp[1024*1024];
assert(start <= end);
assert(astart <= start && start < aend);
assert(astart <= end && end <= aend);
if(verbose && start != end)
chat("%T copy %,llud-%,llud %s\n", start, end, what);
for(o=start; o<end; o+=n){
n = sizeof tmp;
if(o+n > end)
n = end - o;
if(ereadpart(src, o, tmp, n) < 0)
return -1;
if(ds)
sha1(tmp, n, nil, ds);
if(ewritepart(dst, o, tmp, n) < 0)
return -1;
}
return 0;
}
int
asha1(Part *p, uvlong start, uvlong end, DigestState *ds)
{
int n;
uvlong o;
static uchar tmp[1024*1024];
if(start == end)
return 0;
assert(start < end);
if(verbose)
chat("%T sha1 %,llud-%,llud\n", start, end);
for(o=start; o<end; o+=n){
n = sizeof tmp;
if(o+n > end)
n = end - o;
if(ereadpart(p, o, tmp, n) < 0)
return -1;
sha1(tmp, n, nil, ds);
}
return 0;
}
uvlong
rdown(uvlong a, int b)
{
return a-a%b;
}
uvlong
rup(uvlong a, int b)
{
if(a%b == 0)
return a;
return a+b-a%b;
}
void
mirror(int indx, Arena *sa, Arena *da)
{
vlong v, si, di, end;
int clumpmax, blocksize, sealed;
static uchar buf[MaxIoSize];
ArenaHead h;
DigestState xds, *ds;
vlong shaoff, base;
base = sa->base;
blocksize = sa->blocksize;
end = sa->base + sa->size;
astart = base - blocksize;
aend = end + blocksize;
tag(indx, sa->name, "%T %s (%,llud-%,llud)\n", sa->name, astart, aend);
if(force){
copy(astart, aend, "all", nil);
return;
}
if(sa->diskstats.sealed && da->diskstats.sealed && scorecmp(da->score, zeroscore) != 0){
if(scorecmp(sa->score, da->score) == 0){
setstatus(Sealed+Mirrored);
if(verbose > 1)
chat("%T %s: %V sealed mirrored\n", sa->name, sa->score);
return;
}
chat("%T %s: warning: sealed score mismatch %V vs %V\n", sa->name, sa->score, da->score);
/* Keep executing; will correct seal if possible. */
}
if(!sa->diskstats.sealed && da->diskstats.sealed && scorecmp(da->score, zeroscore) != 0){
chat("%T %s: dst is sealed, src is not\n", sa->name);
status = "errors";
return;
}
if(sa->diskstats.used < da->diskstats.used){
chat("%T %s: src used %,lld < dst used %,lld\n", sa->name, sa->diskstats.used, da->diskstats.used);
status = "errors";
return;
}
if(da->clumpmagic != sa->clumpmagic){
/*
* Write this now to reduce the window in which
* the head and tail disagree about clumpmagic.
*/
da->clumpmagic = sa->clumpmagic;
memset(buf, 0, sizeof buf);
packarena(da, buf);
if(ewritepart(dst, end, buf, blocksize) < 0)
return;
}
memset(&h, 0, sizeof h);
h.version = da->version;
strcpy(h.name, da->name);
h.blocksize = da->blocksize;
h.size = da->size + 2*da->blocksize;
h.clumpmagic = da->clumpmagic;
memset(buf, 0, sizeof buf);
packarenahead(&h, buf);
if(ewritepart(dst, base - blocksize, buf, blocksize) < 0)
return;
shaoff = 0;
ds = nil;
sealed = sa->diskstats.sealed && scorecmp(sa->score, zeroscore) != 0;
if(sealed && dosha1){
/* start sha1 state with header */
memset(&xds, 0, sizeof xds);
ds = &xds;
sha1(buf, blocksize, nil, ds);
shaoff = base;
}
if(sa->diskstats.used != da->diskstats.used){
di = base+rdown(da->diskstats.used, blocksize);
si = base+rup(sa->diskstats.used, blocksize);
if(ds && asha1(dst, shaoff, di, ds) < 0)
return;
if(copy(di, si, "data", ds) < 0)
return;
shaoff = si;
}
clumpmax = sa->clumpmax;
di = end - da->diskstats.clumps/clumpmax * blocksize;
si = end - (sa->diskstats.clumps+clumpmax-1)/clumpmax * blocksize;
if(sa->diskstats.sealed){
/*
* might be a small hole between the end of the
* data and the beginning of the directory.
*/
v = base+rup(sa->diskstats.used, blocksize);
if(ds && asha1(dst, shaoff, v, ds) < 0)
return;
if(copy(v, si, "hole", ds) < 0)
return;
shaoff = si;
}
if(da->diskstats.clumps != sa->diskstats.clumps){
if(ds && asha1(dst, shaoff, si, ds) < 0)
return;
if(copy(si, di, "directory", ds) < 0) /* si < di because clumpinfo blocks grow down */
return;
shaoff = di;
}
da->ctime = sa->ctime;
da->wtime = sa->wtime;
da->diskstats = sa->diskstats;
da->diskstats.sealed = 0;
/*
* Repack the arena tail information
* and save it for next time...
*/
memset(buf, 0, sizeof buf);
packarena(da, buf);
if(ewritepart(dst, end, buf, blocksize) < 0)
return;
if(sealed){
/*
* ... but on the final pass, copy the encoding
* of the tail information from the source
* arena itself. There are multiple possible
* ways to write the tail info out (the exact
* details have changed as venti went through
* revisions), and to keep the SHA1 hash the
* same, we have to use what the disk uses.
*/
if(asha1(dst, shaoff, end, ds) < 0
|| copy(end, end+blocksize-VtScoreSize, "tail", ds) < 0)
return;
if(dosha1){
memset(buf, 0, VtScoreSize);
sha1(buf, VtScoreSize, da->score, ds);
if(scorecmp(sa->score, da->score) == 0){
setstatus(Sealed+Mirrored);
if(verbose > 1)
chat("%T %s: %V sealed mirrored\n", sa->name, sa->score);
if(ewritepart(dst, end+blocksize-VtScoreSize, da->score, VtScoreSize) < 0)
return;
}else{
chat("%T %s: sealing dst: score mismatch: %V vs %V\n", sa->name, sa->score, da->score);
memset(&xds, 0, sizeof xds);
asha1(dst, base-blocksize, end+blocksize-VtScoreSize, &xds);
sha1(buf, VtScoreSize, 0, &xds);
chat("%T reseal: %V\n", da->score);
status = "errors";
}
}else{
setstatus(Mirrored);
if(verbose > 1)
chat("%T %s: %V mirrored\n", sa->name, sa->score);
if(ewritepart(dst, end+blocksize-VtScoreSize, sa->score, VtScoreSize) < 0)
return;
}
}else{
if(sa->diskstats.used > 0 || verbose > 1) {
chat("%T %s: %,lld used mirrored\n",
sa->name, sa->diskstats.used);
}
if(sa->diskstats.used > 0)
setstatus(Mirrored);
else
setstatus(Empty);
}
}
void
mirrormany(ArenaPart *sp, ArenaPart *dp, char *range)
{
int i, lo, hi;
char *s, *t;
Arena *sa, *da;
if(range == nil){
for(i=0; i<sp->narenas; i++){
sa = sp->arenas[i];
da = dp->arenas[i];
mirror(i, sa, da);
}
setstatus(-1);
return;
}
if(strcmp(range, "none") == 0)
return;
for(s=range; *s; s=t){
t = strchr(s, ',');
if(t)
*t++ = 0;
else
t = s+strlen(s);
if(*s == '-')
lo = 0;
else
lo = strtol(s, &s, 0);
hi = lo;
if(*s == '-'){
s++;
if(*s == 0)
hi = sp->narenas-1;
else
hi = strtol(s, &s, 0);
}
if(*s != 0){
chat("%T bad arena range: %s\n", s);
continue;
}
for(i=lo; i<=hi; i++){
sa = sp->arenas[i];
da = dp->arenas[i];
mirror(i, sa, da);
}
setstatus(-1);
}
}
void
threadmain(int argc, char **argv)
{
int i;
Arena *sa, *da;
ArenaPart *s, *d;
char *ranges;
ventifmtinstall();
ARGBEGIN{
case 'F':
force = 1;
break;
case 'v':
verbose++;
break;
case 's':
dosha1 = 0;
break;
default:
usage();
}ARGEND
if(argc != 2 && argc != 3)
usage();
ranges = nil;
if(argc == 3)
ranges = argv[2];
if((src = initpart(argv[0], OREAD)) == nil)
sysfatal("initpart %s: %r", argv[0]);
if((dst = initpart(argv[1], ORDWR)) == nil)
sysfatal("initpart %s: %r", argv[1]);
if((s = initarenapart(src)) == nil)
sysfatal("initarenapart %s: %r", argv[0]);
for(i=0; i<s->narenas; i++)
delarena(s->arenas[i]);
if((d = initarenapart(dst)) == nil)
sysfatal("loadarenapart %s: %r", argv[1]);
for(i=0; i<d->narenas; i++)
delarena(d->arenas[i]);
/*
* The arena geometries must match or all bets are off.
*/
if(s->narenas != d->narenas)
sysfatal("arena count mismatch: %d vs %d", s->narenas, d->narenas);
for(i=0; i<s->narenas; i++){
sa = s->arenas[i];
da = d->arenas[i];
if(sa->version != da->version)
sysfatal("arena %d: version mismatch: %d vs %d", i, sa->version, da->version);
if(sa->blocksize != da->blocksize)
sysfatal("arena %d: blocksize mismatch: %d vs %d", i, sa->blocksize, da->blocksize);
if(sa->size != da->size)
sysfatal("arena %d: size mismatch: %,lld vs %,lld", i, sa->size, da->size);
if(strcmp(sa->name, da->name) != 0)
sysfatal("arena %d: name mismatch: %s vs %s", i, sa->name, da->name);
}
/*
* Mirror one arena at a time.
*/
writechan = chancreate(sizeof(void*), 0);
vtproc(writeproc, nil);
mirrormany(s, d, ranges);
sendp(writechan, nil);
threadexitsall(status);
}