| /* |
| * Check and fix an arena partition. |
| * |
| * This is a lot grittier than the rest of Venti because |
| * it can't just give up if a byte here or there is wrong. |
| * |
| * The rule here (hopefully followed!) is that block corruption |
| * only ever has a local effect -- there are no blocks that you |
| * can wipe out that will cause large portions of |
| * uncorrupted data blocks to be useless. |
| */ |
| |
| #include "stdinc.h" |
| #include "dat.h" |
| #include "fns.h" |
| #include "whack.h" |
| |
| #define ROUNDUP(x,n) (((x)+(n)-1)&~((n)-1)) |
| |
| #pragma varargck type "z" uvlong |
| #pragma varargck type "z" vlong |
| #pragma varargck type "t" uint |
| |
| enum |
| { |
| K = 1024, |
| M = 1024*1024, |
| G = 1024*1024*1024, |
| |
| Block = 4096, |
| }; |
| |
| int debugsha1; |
| |
| int verbose; |
| Part *part; |
| char *file; |
| char *basename; |
| char *dumpbase; |
| int fix; |
| int badreads; |
| int unseal; |
| uchar zero[MaxDiskBlock]; |
| |
| Arena lastarena; |
| ArenaPart ap; |
| uvlong arenasize; |
| int nbadread; |
| int nbad; |
| uvlong partend; |
| void checkarena(vlong, int); |
| |
| void |
| usage(void) |
| { |
| fprint(2, "usage: fixarenas [-fv] [-a arenasize] [-b blocksize] file [ranges]\n"); |
| threadexitsall(0); |
| } |
| |
| /* |
| * Format number in simplest way that is okay with unittoull. |
| */ |
| static int |
| zfmt(Fmt *fmt) |
| { |
| vlong x; |
| |
| x = va_arg(fmt->args, vlong); |
| if(x == 0) |
| return fmtstrcpy(fmt, "0"); |
| if(x%G == 0) |
| return fmtprint(fmt, "%lldG", x/G); |
| if(x%M == 0) |
| return fmtprint(fmt, "%lldM", x/M); |
| if(x%K == 0) |
| return fmtprint(fmt, "%lldK", x/K); |
| return fmtprint(fmt, "%lld", x); |
| } |
| |
| /* |
| * Format time like ctime without newline. |
| */ |
| static int |
| tfmt(Fmt *fmt) |
| { |
| uint t; |
| char buf[30]; |
| |
| t = va_arg(fmt->args, uint); |
| strcpy(buf, ctime(t)); |
| buf[28] = 0; |
| return fmtstrcpy(fmt, buf); |
| } |
| |
| /* |
| * Coalesce messages about unreadable sectors into larger ranges. |
| * bad(0, 0) flushes the buffer. |
| */ |
| static void |
| bad(char *msg, vlong o, int len) |
| { |
| static vlong lb0, lb1; |
| static char *lmsg; |
| |
| if(msg == nil) |
| msg = lmsg; |
| if(o == -1){ |
| lmsg = nil; |
| lb0 = 0; |
| lb1 = 0; |
| return; |
| } |
| if(lb1 != o || (msg && lmsg && strcmp(msg, lmsg) != 0)){ |
| if(lb0 != lb1) |
| print("%s %#llux+%#llux (%,lld+%,lld)\n", |
| lmsg, lb0, lb1-lb0, lb0, lb1-lb0); |
| lb0 = o; |
| } |
| lmsg = msg; |
| lb1 = o+len; |
| } |
| |
| /* |
| * Read in the len bytes of data at the offset. If can't for whatever reason, |
| * fill it with garbage but print an error. |
| */ |
| static uchar* |
| readdisk(uchar *buf, vlong offset, int len) |
| { |
| int i, j, k, n; |
| |
| if(offset >= partend){ |
| memset(buf, 0xFB, len); |
| return buf; |
| } |
| |
| if(offset+len > partend){ |
| memset(buf, 0xFB, len); |
| len = partend - offset; |
| } |
| |
| if(readpart(part, offset, buf, len) >= 0) |
| return buf; |
| |
| /* |
| * The read failed. Clear the buffer to nonsense, and |
| * then try reading in smaller pieces. If that fails, |
| * read in even smaller pieces. And so on down to sectors. |
| */ |
| memset(buf, 0xFD, len); |
| for(i=0; i<len; i+=64*K){ |
| n = 64*K; |
| if(i+n > len) |
| n = len-i; |
| if(readpart(part, offset+i, buf+i, n) >= 0) |
| continue; |
| for(j=i; j<len && j<i+64*K; j+=4*K){ |
| n = 4*K; |
| if(j+n > len) |
| n = len-j; |
| if(readpart(part, offset+j, buf+j, n) >= 0) |
| continue; |
| for(k=j; k<len && k<j+4*K; k+=512){ |
| if(readpart(part, offset+k, buf+k, 512) >= 0) |
| continue; |
| bad("disk read failed at", k, 512); |
| badreads++; |
| } |
| } |
| } |
| bad(nil, 0, 0); |
| return buf; |
| } |
| |
| /* |
| * Buffer to support running SHA1 hash of the disk. |
| */ |
| typedef struct Shabuf Shabuf; |
| struct Shabuf |
| { |
| int fd; |
| vlong offset; |
| DigestState state; |
| int rollback; |
| vlong r0; |
| DigestState *hist; |
| int nhist; |
| }; |
| |
| void |
| sbdebug(Shabuf *sb, char *file) |
| { |
| int fd; |
| |
| if(sb->fd > 0){ |
| close(sb->fd); |
| sb->fd = 0; |
| } |
| if((fd = create(file, OWRITE, 0666)) < 0) |
| return; |
| if(fd == 0){ |
| fd = dup(fd, -1); |
| close(0); |
| } |
| sb->fd = fd; |
| } |
| |
| void |
| sbupdate(Shabuf *sb, uchar *p, vlong offset, int len) |
| { |
| int n, x; |
| vlong o; |
| |
| if(sb->rollback && !sb->hist){ |
| sb->r0 = offset; |
| sb->nhist = 1; |
| sb->hist = vtmalloc(sb->nhist*sizeof *sb->hist); |
| memset(sb->hist, 0, sizeof sb->hist[0]); |
| } |
| if(sb->r0 == 0) |
| sb->r0 = offset; |
| |
| if(sb->offset < offset || sb->offset >= offset+len){ |
| if(0) print("sbupdate %p %#llux+%d but offset=%#llux\n", |
| p, offset, len, sb->offset); |
| return; |
| } |
| x = sb->offset - offset; |
| if(0) print("sbupdate %p %#llux+%d skip %d\n", |
| sb, offset, len, x); |
| if(x){ |
| p += x; |
| offset += x; |
| len -= x; |
| } |
| assert(sb->offset == offset); |
| |
| if(sb->fd > 0) |
| pwrite(sb->fd, p, len, offset - sb->r0); |
| |
| if(!sb->rollback){ |
| sha1(p, len, nil, &sb->state); |
| sb->offset += len; |
| return; |
| } |
| |
| /* save state every 4M so we can roll back quickly */ |
| o = offset - sb->r0; |
| while(len > 0){ |
| n = 4*M - o%(4*M); |
| if(n > len) |
| n = len; |
| sha1(p, n, nil, &sb->state); |
| sb->offset += n; |
| o += n; |
| p += n; |
| len -= n; |
| if(o%(4*M) == 0){ |
| x = o/(4*M); |
| if(x >= sb->nhist){ |
| if(x != sb->nhist) |
| print("oops! x=%d nhist=%d\n", x, sb->nhist); |
| sb->nhist += 32; |
| sb->hist = vtrealloc(sb->hist, sb->nhist*sizeof *sb->hist); |
| } |
| sb->hist[x] = sb->state; |
| } |
| } |
| } |
| |
| void |
| sbdiskhash(Shabuf *sb, vlong eoffset) |
| { |
| static uchar dbuf[4*M]; |
| int n; |
| |
| while(sb->offset < eoffset){ |
| n = sizeof dbuf; |
| if(sb->offset+n > eoffset) |
| n = eoffset - sb->offset; |
| readdisk(dbuf, sb->offset, n); |
| sbupdate(sb, dbuf, sb->offset, n); |
| } |
| } |
| |
| void |
| sbrollback(Shabuf *sb, vlong offset) |
| { |
| int x; |
| vlong o; |
| Dir d; |
| |
| if(!sb->rollback || !sb->r0){ |
| print("cannot rollback sha\n"); |
| return; |
| } |
| if(offset >= sb->offset) |
| return; |
| o = offset - sb->r0; |
| x = o/(4*M); |
| if(x >= sb->nhist){ |
| print("cannot rollback sha\n"); |
| return; |
| } |
| sb->state = sb->hist[x]; |
| sb->offset = sb->r0 + x*4*M; |
| assert(sb->offset <= offset); |
| |
| if(sb->fd > 0){ |
| nulldir(&d); |
| d.length = sb->offset - sb->r0; |
| dirfwstat(sb->fd, &d); |
| } |
| } |
| |
| void |
| sbscore(Shabuf *sb, uchar *score) |
| { |
| if(sb->hist){ |
| free(sb->hist); |
| sb->hist = nil; |
| } |
| sha1(nil, 0, score, &sb->state); |
| } |
| |
| /* |
| * If we're fixing arenas, then editing this memory edits the disk! |
| * It will be written back out as new data is paged in. |
| */ |
| uchar buf[4*M]; |
| uchar sbuf[4*M]; |
| vlong bufoffset; |
| int buflen; |
| |
| static void pageout(void); |
| static uchar* |
| pagein(vlong offset, int len) |
| { |
| pageout(); |
| if(offset >= partend){ |
| memset(buf, 0xFB, sizeof buf); |
| return buf; |
| } |
| |
| if(offset+len > partend){ |
| memset(buf, 0xFB, sizeof buf); |
| len = partend - offset; |
| } |
| bufoffset = offset; |
| buflen = len; |
| readdisk(buf, offset, len); |
| memmove(sbuf, buf, len); |
| return buf; |
| } |
| |
| static void |
| pageout(void) |
| { |
| if(buflen==0 || !fix || memcmp(buf, sbuf, buflen) == 0){ |
| buflen = 0; |
| return; |
| } |
| if(writepart(part, bufoffset, buf, buflen) < 0) |
| print("disk write failed at %#llux+%#ux (%,lld+%,d)\n", |
| bufoffset, buflen, bufoffset, buflen); |
| buflen = 0; |
| } |
| |
| static void |
| zerorange(vlong offset, int len) |
| { |
| int i; |
| vlong ooff; |
| int olen; |
| enum { MinBlock = 4*K, MaxBlock = 8*K }; |
| |
| if(0) |
| if(bufoffset <= offset && offset+len <= bufoffset+buflen){ |
| memset(buf+(offset-bufoffset), 0, len); |
| return; |
| } |
| |
| ooff = bufoffset; |
| olen = buflen; |
| |
| i = offset%MinBlock; |
| if(i+len < MaxBlock){ |
| pagein(offset-i, (len+MinBlock-1)&~(MinBlock-1)); |
| memset(buf+i, 0, len); |
| }else{ |
| pagein(offset-i, MaxBlock); |
| memset(buf+i, 0, MaxBlock-i); |
| offset += MaxBlock-i; |
| len -= MaxBlock-i; |
| while(len >= MaxBlock){ |
| pagein(offset, MaxBlock); |
| memset(buf, 0, MaxBlock); |
| offset += MaxBlock; |
| len -= MaxBlock; |
| } |
| pagein(offset, (len+MinBlock-1)&~(MinBlock-1)); |
| memset(buf, 0, len); |
| } |
| pagein(ooff, olen); |
| } |
| |
| /* |
| * read/write integers |
| * |
| static void |
| p16(uchar *p, u16int u) |
| { |
| p[0] = (u>>8) & 0xFF; |
| p[1] = u & 0xFF; |
| } |
| */ |
| |
| static u16int |
| u16(uchar *p) |
| { |
| return (p[0]<<8)|p[1]; |
| } |
| |
| static void |
| p32(uchar *p, u32int u) |
| { |
| p[0] = (u>>24) & 0xFF; |
| p[1] = (u>>16) & 0xFF; |
| p[2] = (u>>8) & 0xFF; |
| p[3] = u & 0xFF; |
| } |
| |
| static u32int |
| u32(uchar *p) |
| { |
| return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3]; |
| } |
| |
| /* |
| static void |
| p64(uchar *p, u64int u) |
| { |
| p32(p, u>>32); |
| p32(p, u); |
| } |
| */ |
| |
| static u64int |
| u64(uchar *p) |
| { |
| return ((u64int)u32(p)<<32) | u32(p+4); |
| } |
| |
| static int |
| vlongcmp(const void *va, const void *vb) |
| { |
| vlong a, b; |
| |
| a = *(vlong*)va; |
| b = *(vlong*)vb; |
| if(a < b) |
| return -1; |
| if(b > a) |
| return 1; |
| return 0; |
| } |
| |
| /* D and S are in draw.h */ |
| #define D VD |
| #define S VS |
| |
| enum |
| { |
| D = 0x10000, |
| Z = 0x20000, |
| S = 0x30000, |
| T = 0x40000, |
| N = 0xFFFF |
| }; |
| typedef struct Info Info; |
| struct Info |
| { |
| int len; |
| char *name; |
| }; |
| |
| Info partinfo[] = { |
| 4, "magic", |
| D|4, "version", |
| Z|4, "blocksize", |
| 4, "arenabase", |
| 0 |
| }; |
| |
| Info headinfo4[] = { |
| 4, "magic", |
| D|4, "version", |
| S|ANameSize, "name", |
| Z|4, "blocksize", |
| Z|8, "size", |
| 0 |
| }; |
| |
| Info headinfo5[] = { |
| 4, "magic", |
| D|4, "version", |
| S|ANameSize, "name", |
| Z|4, "blocksize", |
| Z|8, "size", |
| 4, "clumpmagic", |
| 0 |
| }; |
| |
| Info tailinfo4[] = { |
| 4, "magic", |
| D|4, "version", |
| S|ANameSize, "name", |
| D|4, "clumps", |
| D|4, "cclumps", |
| T|4, "ctime", |
| T|4, "wtime", |
| D|8, "used", |
| D|8, "uncsize", |
| 1, "sealed", |
| 0 |
| }; |
| |
| Info tailinfo4a[] = { |
| /* tailinfo 4 */ |
| 4, "magic", |
| D|4, "version", |
| S|ANameSize, "name", |
| D|4, "clumps", |
| D|4, "cclumps", |
| T|4, "ctime", |
| T|4, "wtime", |
| D|8, "used", |
| D|8, "uncsize", |
| 1, "sealed", |
| |
| /* mem stats */ |
| 1, "extension", |
| D|4, "mem.clumps", |
| D|4, "mem.cclumps", |
| D|8, "mem.used", |
| D|8, "mem.uncsize", |
| 1, "mem.sealed", |
| 0 |
| }; |
| |
| Info tailinfo5[] = { |
| 4, "magic", |
| D|4, "version", |
| S|ANameSize, "name", |
| D|4, "clumps", |
| D|4, "cclumps", |
| T|4, "ctime", |
| T|4, "wtime", |
| 4, "clumpmagic", |
| D|8, "used", |
| D|8, "uncsize", |
| 1, "sealed", |
| 0 |
| }; |
| |
| Info tailinfo5a[] = { |
| /* tailinfo 5 */ |
| 4, "magic", |
| D|4, "version", |
| S|ANameSize, "name", |
| D|4, "clumps", |
| D|4, "cclumps", |
| T|4, "ctime", |
| T|4, "wtime", |
| 4, "clumpmagic", |
| D|8, "used", |
| D|8, "uncsize", |
| 1, "sealed", |
| |
| /* mem stats */ |
| 1, "extension", |
| D|4, "mem.clumps", |
| D|4, "mem.cclumps", |
| D|8, "mem.used", |
| D|8, "mem.uncsize", |
| 1, "mem.sealed", |
| 0 |
| }; |
| |
| void |
| showdiffs(uchar *want, uchar *have, int len, Info *info) |
| { |
| int n; |
| |
| while(len > 0 && (n=info->len&N) > 0){ |
| if(memcmp(have, want, n) != 0){ |
| switch(info->len){ |
| case 1: |
| print("\t%s: correct=%d disk=%d\n", |
| info->name, *want, *have); |
| break; |
| case 4: |
| print("\t%s: correct=%#ux disk=%#ux\n", |
| info->name, u32(want), u32(have)); |
| break; |
| case D|4: |
| print("\t%s: correct=%,ud disk=%,ud\n", |
| info->name, u32(want), u32(have)); |
| break; |
| case T|4: |
| print("\t%s: correct=%t\n\t\tdisk=%t\n", |
| info->name, u32(want), u32(have)); |
| break; |
| case Z|4: |
| print("\t%s: correct=%z disk=%z\n", |
| info->name, (uvlong)u32(want), (uvlong)u32(have)); |
| break; |
| case D|8: |
| print("\t%s: correct=%,lld disk=%,lld\n", |
| info->name, u64(want), u64(have)); |
| break; |
| case Z|8: |
| print("\t%s: correct=%z disk=%z\n", |
| info->name, u64(want), u64(have)); |
| break; |
| case S|ANameSize: |
| print("\t%s: correct=%s disk=%.*s\n", |
| info->name, (char*)want, |
| utfnlen((char*)have, ANameSize-1), |
| (char*)have); |
| break; |
| default: |
| print("\t%s: correct=%.*H disk=%.*H\n", |
| info->name, n, want, n, have); |
| break; |
| } |
| } |
| have += n; |
| want += n; |
| len -= n; |
| info++; |
| } |
| if(len > 0 && memcmp(have, want, len) != 0){ |
| if(memcmp(want, zero, len) != 0) |
| print("!!\textra want data in showdiffs (bug in fixarenas)\n"); |
| else |
| print("\tnon-zero data on disk after structure\n"); |
| if(verbose > 1){ |
| print("want: %.*H\n", len, want); |
| print("have: %.*H\n", len, have); |
| } |
| } |
| } |
| |
| /* |
| * Does part begin with an arena? |
| */ |
| int |
| isonearena(void) |
| { |
| return u32(pagein(0, Block)) == ArenaHeadMagic; |
| } |
| |
| static int tabsizes[] = { 16*1024, 64*1024, 512*1024, 768*1024, }; |
| /* |
| * Poke around on the disk to guess what the ArenaPart numbers are. |
| */ |
| void |
| guessgeometry(void) |
| { |
| int i, j, n, bestn, ndiff, nhead, ntail; |
| uchar *p, *ep, *sp; |
| u64int diff[100], head[20], tail[20]; |
| u64int offset, bestdiff; |
| |
| ap.version = ArenaPartVersion; |
| |
| if(arenasize == 0 || ap.blocksize == 0){ |
| /* |
| * The ArenaPart block at offset PartBlank may be corrupt or just wrong. |
| * Instead, look for the individual arena headers and tails, which there |
| * are many of, and once we've seen enough, infer the spacing. |
| * |
| * Of course, nothing in the file format requires that arenas be evenly |
| * spaced, but fmtarenas always does that for us. |
| */ |
| nhead = 0; |
| ntail = 0; |
| for(offset=PartBlank; offset<partend; offset+=4*M){ |
| p = pagein(offset, 4*M); |
| for(sp=p, ep=p+4*M; p<ep; p+=K){ |
| if(u32(p) == ArenaHeadMagic && nhead < nelem(head)){ |
| if(verbose) |
| print("arena head at %#llx\n", offset+(p-sp)); |
| head[nhead++] = offset+(p-sp); |
| } |
| if(u32(p) == ArenaMagic && ntail < nelem(tail)){ |
| tail[ntail++] = offset+(p-sp); |
| if(verbose) |
| print("arena tail at %#llx\n", offset+(p-sp)); |
| } |
| } |
| if(nhead == nelem(head) && ntail == nelem(tail)) |
| break; |
| } |
| if(nhead < 3 && ntail < 3) |
| sysfatal("too few intact arenas: %d heads, %d tails", nhead, ntail); |
| |
| /* |
| * Arena size is likely the most common |
| * inter-head or inter-tail spacing. |
| */ |
| ndiff = 0; |
| for(i=1; i<nhead; i++) |
| diff[ndiff++] = head[i] - head[i-1]; |
| for(i=1; i<ntail; i++) |
| diff[ndiff++] = tail[i] - tail[i-1]; |
| qsort(diff, ndiff, sizeof diff[0], vlongcmp); |
| bestn = 0; |
| bestdiff = 0; |
| for(i=1, n=1; i<=ndiff; i++, n++){ |
| if(i==ndiff || diff[i] != diff[i-1]){ |
| if(n > bestn){ |
| bestn = n; |
| bestdiff = diff[i-1]; |
| } |
| n = 0; |
| } |
| } |
| print("arena size likely %z (%d of %d)\n", bestdiff, bestn, ndiff); |
| if(arenasize != 0 && arenasize != bestdiff) |
| print("using user-specified size %z instead\n", arenasize); |
| else |
| arenasize = bestdiff; |
| |
| /* |
| * The arena tail for an arena is arenasize-blocksize from the head. |
| */ |
| ndiff = 0; |
| for(i=j=0; i<nhead && j<ntail; ){ |
| if(tail[j] < head[i]){ |
| j++; |
| continue; |
| } |
| if(tail[j] < head[i]+arenasize){ |
| diff[ndiff++] = head[i]+arenasize - tail[j]; |
| j++; |
| continue; |
| } |
| i++; |
| } |
| if(ndiff < 3) |
| sysfatal("too few intact arenas: %d head, tail pairs", ndiff); |
| qsort(diff, ndiff, sizeof diff[0], vlongcmp); |
| bestn = 0; |
| bestdiff = 0; |
| for(i=1, n=1; i<=ndiff; i++, n++){ |
| if(i==ndiff || diff[i] != diff[i-1]){ |
| if(n > bestn){ |
| bestn = n; |
| bestdiff = diff[i-1]; |
| } |
| n = 0; |
| } |
| } |
| print("block size likely %z (%d of %d)\n", bestdiff, bestn, ndiff); |
| if(ap.blocksize != 0 && ap.blocksize != bestdiff) |
| print("using user-specified size %z instead\n", (vlong)ap.blocksize); |
| else |
| ap.blocksize = bestdiff; |
| if(ap.blocksize == 0 || ap.blocksize&(ap.blocksize-1)) |
| sysfatal("block size not a power of two"); |
| if(ap.blocksize > MaxDiskBlock) |
| sysfatal("block size too big (max=%d)", MaxDiskBlock); |
| |
| /* |
| * Use head/tail information to deduce arena base. |
| */ |
| ndiff = 0; |
| for(i=0; i<nhead; i++) |
| diff[ndiff++] = head[i]%arenasize; |
| for(i=0; i<ntail; i++) |
| diff[ndiff++] = (tail[i]+ap.blocksize)%arenasize; |
| qsort(diff, ndiff, sizeof diff[0], vlongcmp); |
| bestn = 0; |
| bestdiff = 0; |
| for(i=1, n=1; i<=ndiff; i++, n++){ |
| if(i==ndiff || diff[i] != diff[i-1]){ |
| if(n > bestn){ |
| bestn = n; |
| bestdiff = diff[i-1]; |
| } |
| n = 0; |
| } |
| } |
| ap.arenabase = bestdiff; |
| } |
| |
| ap.tabbase = ROUNDUP(PartBlank+HeadSize, ap.blocksize); |
| /* |
| * XXX pick up table, check arenabase. |
| * XXX pick up table, record base name. |
| */ |
| |
| /* |
| * Somewhat standard computation. |
| * Fmtarenas used to use 64k tab, now uses 512k tab. |
| */ |
| if(ap.arenabase == 0){ |
| print("trying standard arena bases...\n"); |
| for(i=0; i<nelem(tabsizes); i++){ |
| ap.arenabase = ROUNDUP(PartBlank+HeadSize+tabsizes[i], ap.blocksize); |
| p = pagein(ap.arenabase, Block); |
| if(u32(p) == ArenaHeadMagic) |
| break; |
| } |
| } |
| p = pagein(ap.arenabase, Block); |
| print("arena base likely %z%s\n", (vlong)ap.arenabase, |
| u32(p)!=ArenaHeadMagic ? " (but no arena head there)" : ""); |
| |
| ap.tabsize = ap.arenabase - ap.tabbase; |
| } |
| |
| /* |
| * Check the arena partition blocks and then the arenas listed in range. |
| */ |
| void |
| checkarenas(char *range) |
| { |
| char *s, *t; |
| int i, lo, hi, narena; |
| uchar dbuf[HeadSize]; |
| uchar *p; |
| |
| guessgeometry(); |
| |
| partend -= partend%ap.blocksize; |
| |
| memset(dbuf, 0, sizeof dbuf); |
| packarenapart(&ap, dbuf); |
| p = pagein(PartBlank, Block); |
| if(memcmp(p, dbuf, HeadSize) != 0){ |
| print("on-disk arena part superblock incorrect\n"); |
| showdiffs(dbuf, p, HeadSize, partinfo); |
| } |
| memmove(p, dbuf, HeadSize); |
| |
| narena = (partend-ap.arenabase + arenasize-1)/arenasize; |
| if(range == nil){ |
| for(i=0; i<narena; i++) |
| checkarena(ap.arenabase+(vlong)i*arenasize, i); |
| }else if(strcmp(range, "none") == 0){ |
| /* nothing */ |
| }else{ |
| /* parse, e.g., -4,8-9,10- */ |
| for(s=range; *s; s=t){ |
| t = strchr(s, ','); |
| if(t) |
| *t++ = 0; |
| else |
| t = s+strlen(s); |
| if(*s == '-') |
| lo = 0; |
| else |
| lo = strtol(s, &s, 0); |
| hi = lo; |
| if(*s == '-'){ |
| s++; |
| if(*s == 0) |
| hi = narena-1; |
| else |
| hi = strtol(s, &s, 0); |
| } |
| if(*s != 0){ |
| print("bad arena range: %s\n", s); |
| continue; |
| } |
| for(i=lo; i<=hi; i++) |
| checkarena(ap.arenabase+(vlong)i*arenasize, i); |
| } |
| } |
| } |
| |
| /* |
| * Is there a clump here at p? |
| */ |
| static int |
| isclump(uchar *p, Clump *cl, u32int *pmagic) |
| { |
| int n; |
| u32int magic; |
| uchar score[VtScoreSize], *bp; |
| Unwhack uw; |
| uchar ubuf[70*1024]; |
| |
| bp = p; |
| magic = u32(p); |
| if(magic == 0) |
| return 0; |
| p += U32Size; |
| |
| cl->info.type = vtfromdisktype(*p); |
| if(cl->info.type == 0xFF) |
| return 0; |
| p++; |
| cl->info.size = u16(p); |
| p += U16Size; |
| cl->info.uncsize = u16(p); |
| if(cl->info.size > cl->info.uncsize) |
| return 0; |
| p += U16Size; |
| scorecp(cl->info.score, p); |
| p += VtScoreSize; |
| cl->encoding = *p; |
| p++; |
| cl->creator = u32(p); |
| p += U32Size; |
| cl->time = u32(p); |
| p += U32Size; |
| |
| switch(cl->encoding){ |
| case ClumpENone: |
| if(cl->info.size != cl->info.uncsize) |
| return 0; |
| scoremem(score, p, cl->info.size); |
| if(scorecmp(score, cl->info.score) != 0) |
| return 0; |
| break; |
| case ClumpECompress: |
| if(cl->info.size >= cl->info.uncsize) |
| return 0; |
| unwhackinit(&uw); |
| n = unwhack(&uw, ubuf, cl->info.uncsize, p, cl->info.size); |
| if(n != cl->info.uncsize) |
| return 0; |
| scoremem(score, ubuf, cl->info.uncsize); |
| if(scorecmp(score, cl->info.score) != 0) |
| return 0; |
| break; |
| default: |
| return 0; |
| } |
| p += cl->info.size; |
| |
| /* it all worked out in the end */ |
| *pmagic = magic; |
| return p - bp; |
| } |
| |
| /* |
| * All ClumpInfos seen in this arena. |
| * Kept in binary tree so we can look up by score. |
| */ |
| typedef struct Cit Cit; |
| struct Cit |
| { |
| int left; |
| int right; |
| vlong corrupt; |
| ClumpInfo ci; |
| }; |
| Cit *cibuf; |
| int ciroot; |
| int ncibuf, mcibuf; |
| |
| void |
| resetcibuf(void) |
| { |
| ncibuf = 0; |
| ciroot = -1; |
| } |
| |
| int* |
| ltreewalk(int *p, uchar *score) |
| { |
| int i; |
| |
| for(;;){ |
| if(*p == -1) |
| return p; |
| i = scorecmp(cibuf[*p].ci.score, score); |
| if(i == 0) |
| return p; |
| if(i < 0) |
| p = &cibuf[*p].right; |
| else |
| p = &cibuf[*p].left; |
| } |
| } |
| |
| void |
| addcibuf(ClumpInfo *ci, vlong corrupt) |
| { |
| Cit *cit; |
| |
| if(ncibuf == mcibuf){ |
| mcibuf += 131072; |
| cibuf = vtrealloc(cibuf, mcibuf*sizeof cibuf[0]); |
| } |
| cit = &cibuf[ncibuf]; |
| cit->ci = *ci; |
| cit->left = -1; |
| cit->right = -1; |
| cit->corrupt = corrupt; |
| if(!corrupt) |
| *ltreewalk(&ciroot, ci->score) = ncibuf; |
| ncibuf++; |
| } |
| |
| void |
| addcicorrupt(vlong len) |
| { |
| static ClumpInfo zci; |
| |
| addcibuf(&zci, len); |
| } |
| |
| int |
| haveclump(uchar *score) |
| { |
| int i; |
| int p; |
| |
| p = ciroot; |
| for(;;){ |
| if(p == -1) |
| return 0; |
| i = scorecmp(cibuf[p].ci.score, score); |
| if(i == 0) |
| return 1; |
| if(i < 0) |
| p = cibuf[p].right; |
| else |
| p = cibuf[p].left; |
| } |
| } |
| |
| int |
| matchci(ClumpInfo *ci, uchar *p) |
| { |
| if(ci->type != vtfromdisktype(p[0])) |
| return 0; |
| if(ci->size != u16(p+1)) |
| return 0; |
| if(ci->uncsize != u16(p+3)) |
| return 0; |
| if(scorecmp(ci->score, p+5) != 0) |
| return 0; |
| return 1; |
| } |
| |
| int |
| sealedarena(uchar *p, int blocksize) |
| { |
| int v, n; |
| |
| v = u32(p+4); |
| switch(v){ |
| default: |
| return 0; |
| case ArenaVersion4: |
| n = ArenaSize4; |
| break; |
| case ArenaVersion5: |
| n = ArenaSize5; |
| break; |
| } |
| if(p[n-1] != 1){ |
| print("arena tail says not sealed\n"); |
| return 0; |
| } |
| if(memcmp(p+n, zero, blocksize-VtScoreSize-n) != 0){ |
| print("arena tail followed by non-zero data\n"); |
| return 0; |
| } |
| if(memcmp(p+blocksize-VtScoreSize, zero, VtScoreSize) == 0){ |
| print("arena score zero\n"); |
| return 0; |
| } |
| return 1; |
| } |
| |
| int |
| okayname(char *name, int n) |
| { |
| char buf[20]; |
| |
| if(nameok(name) < 0) |
| return 0; |
| sprint(buf, "%d", n); |
| if(n == 0) |
| buf[0] = 0; |
| if(strlen(name) < strlen(buf) |
| || strcmp(name+strlen(name)-strlen(buf), buf) != 0) |
| return 0; |
| return 1; |
| } |
| |
| int |
| clumpinfocmp(ClumpInfo *a, ClumpInfo *b) |
| { |
| if(a->type != b->type) |
| return a->type - b->type; |
| if(a->size != b->size) |
| return a->size - b->size; |
| if(a->uncsize != b->uncsize) |
| return a->uncsize - b->uncsize; |
| return scorecmp(a->score, b->score); |
| } |
| |
| ClumpInfo* |
| loadci(vlong offset, Arena *arena, int nci) |
| { |
| int i, j, per; |
| uchar *p, *sp; |
| ClumpInfo *bci, *ci; |
| |
| per = arena->blocksize/ClumpInfoSize; |
| bci = vtmalloc(nci*sizeof bci[0]); |
| ci = bci; |
| offset += arena->size - arena->blocksize; |
| p = sp = nil; |
| for(i=0; i<nci; i+=per){ |
| if(p == sp){ |
| sp = pagein(offset-4*M, 4*M); |
| p = sp+4*M; |
| } |
| p -= arena->blocksize; |
| offset -= arena->blocksize; |
| for(j=0; j<per && i+j<nci; j++) |
| unpackclumpinfo(ci++, p+j*ClumpInfoSize); |
| } |
| return bci; |
| } |
| |
| vlong |
| writeci(vlong offset, Arena *arena, ClumpInfo *ci, int nci) |
| { |
| int i, j, per; |
| uchar *p, *sp; |
| |
| per = arena->blocksize/ClumpInfoSize; |
| offset += arena->size - arena->blocksize; |
| p = sp = nil; |
| for(i=0; i<nci; i+=per){ |
| if(p == sp){ |
| sp = pagein(offset-4*M, 4*M); |
| p = sp+4*M; |
| } |
| p -= arena->blocksize; |
| offset -= arena->blocksize; |
| memset(p, 0, arena->blocksize); |
| for(j=0; j<per && i+j<nci; j++) |
| packclumpinfo(ci++, p+j*ClumpInfoSize); |
| } |
| pageout(); |
| return offset; |
| } |
| |
| void |
| loadarenabasics(vlong offset0, int anum, ArenaHead *head, Arena *arena) |
| { |
| char dname[ANameSize]; |
| static char lastbase[ANameSize]; |
| uchar *p; |
| Arena oarena; |
| ArenaHead ohead; |
| |
| /* |
| * Fmtarenas makes all arenas the same size |
| * except the last, which may be smaller. |
| * It uses the same block size for arenas as for |
| * the arena partition blocks. |
| */ |
| arena->size = arenasize; |
| if(offset0+arena->size > partend) |
| arena->size = partend - offset0; |
| head->size = arena->size; |
| |
| arena->blocksize = ap.blocksize; |
| head->blocksize = arena->blocksize; |
| |
| /* |
| * Look for clump magic and name in head/tail blocks. |
| * All the other info we will reconstruct just in case. |
| */ |
| p = pagein(offset0, arena->blocksize); |
| memset(&ohead, 0, sizeof ohead); |
| if(unpackarenahead(&ohead, p) >= 0){ |
| head->version = ohead.version; |
| head->clumpmagic = ohead.clumpmagic; |
| if(okayname(ohead.name, anum)) |
| strcpy(head->name, ohead.name); |
| } |
| |
| p = pagein(offset0+arena->size-arena->blocksize, |
| arena->blocksize); |
| memset(&oarena, 0, sizeof oarena); |
| if(unpackarena(&oarena, p) >= 0){ |
| arena->version = oarena.version; |
| arena->clumpmagic = oarena.clumpmagic; |
| if(okayname(oarena.name, anum)) |
| strcpy(arena->name, oarena.name); |
| arena->diskstats.clumps = oarena.diskstats.clumps; |
| print("old arena: sealed=%d\n", oarena.diskstats.sealed); |
| arena->diskstats.sealed = oarena.diskstats.sealed; |
| } |
| |
| /* Head trumps arena. */ |
| if(head->version){ |
| arena->version = head->version; |
| arena->clumpmagic = head->clumpmagic; |
| } |
| if(arena->version == 0) |
| arena->version = ArenaVersion5; |
| if(basename){ |
| if(anum == -1) |
| snprint(arena->name, ANameSize, "%s", basename); |
| else |
| snprint(arena->name, ANameSize, "%s%d", basename, anum); |
| }else if(lastbase[0]) |
| snprint(arena->name, ANameSize, "%s%d", lastbase, anum); |
| else if(head->name[0]) |
| strcpy(arena->name, head->name); |
| else if(arena->name[0] == 0) |
| sysfatal("cannot determine base name for arena; use -n"); |
| strcpy(lastbase, arena->name); |
| sprint(dname, "%d", anum); |
| lastbase[strlen(lastbase)-strlen(dname)] = 0; |
| |
| /* Was working in arena, now copy to head. */ |
| head->version = arena->version; |
| memmove(head->name, arena->name, sizeof head->name); |
| head->blocksize = arena->blocksize; |
| head->size = arena->size; |
| } |
| |
| void |
| shahead(Shabuf *sb, vlong offset0, ArenaHead *head) |
| { |
| uchar headbuf[MaxDiskBlock]; |
| |
| sb->offset = offset0; |
| memset(headbuf, 0, sizeof headbuf); |
| packarenahead(head, headbuf); |
| sbupdate(sb, headbuf, offset0, head->blocksize); |
| } |
| |
| u32int |
| newclumpmagic(int version) |
| { |
| u32int m; |
| |
| if(version == ArenaVersion4) |
| return _ClumpMagic; |
| do{ |
| m = fastrand(); |
| }while(m==0 || m == _ClumpMagic); |
| return m; |
| } |
| |
| /* |
| * Poke around in the arena to find the clump data |
| * and compute the relevant statistics. |
| */ |
| void |
| guessarena(vlong offset0, int anum, ArenaHead *head, Arena *arena, |
| uchar *oldscore, uchar *score) |
| { |
| uchar dbuf[MaxDiskBlock]; |
| int needtozero, clumps, nb1, nb2, minclumps; |
| int inbad, n, ncib, printed, sealing, smart; |
| u32int magic; |
| uchar *sp, *ep, *p; |
| vlong boffset, eoffset, lastclumpend, leaked; |
| vlong offset, toffset, totalcorrupt, v; |
| Clump cl; |
| ClumpInfo *bci, *ci, *eci, *xci; |
| Cit *bcit, *cit, *ecit; |
| Shabuf oldsha, newsha; |
| |
| /* |
| * We expect to find an arena, with data, between offset |
| * and offset+arenasize. With any luck, the data starts at |
| * offset+ap.blocksize. The blocks have variable size and |
| * aren't padded at all, which doesn't give us any alignment |
| * constraints. The blocks are compressed or high entropy, |
| * but the headers are pretty low entropy (except the score): |
| * |
| * type[1] (range 0 thru 9, 13) |
| * size[2] |
| * uncsize[2] (<= size) |
| * |
| * so we can look for these. We check the scores as we go, |
| * so we can't make any wrong turns. If we find ourselves |
| * in a dead end, scan forward looking for a new start. |
| */ |
| |
| resetcibuf(); |
| memset(head, 0, sizeof *head); |
| memset(arena, 0, sizeof *arena); |
| memset(oldscore, 0, VtScoreSize); |
| memset(score, 0, VtScoreSize); |
| memset(&oldsha, 0, sizeof oldsha); |
| memset(&newsha, 0, sizeof newsha); |
| newsha.rollback = 1; |
| |
| if(0){ |
| sbdebug(&oldsha, "old.sha"); |
| sbdebug(&newsha, "new.sha"); |
| } |
| |
| loadarenabasics(offset0, anum, head, arena); |
| |
| /* start the clump hunt */ |
| |
| clumps = 0; |
| totalcorrupt = 0; |
| sealing = 1; |
| boffset = offset0 + arena->blocksize; |
| offset = boffset; |
| eoffset = offset0+arena->size - arena->blocksize; |
| toffset = eoffset; |
| sp = pagein(offset0, 4*M); |
| |
| if(arena->diskstats.sealed){ |
| oldsha.offset = offset0; |
| sbupdate(&oldsha, sp, offset0, 4*M); |
| } |
| ep = sp+4*M; |
| p = sp + (boffset - offset0); |
| ncib = arena->blocksize / ClumpInfoSize; /* ci per block in index */ |
| lastclumpend = offset; |
| nbad = 0; |
| inbad = 0; |
| needtozero = 0; |
| minclumps = 0; |
| while(offset < eoffset){ |
| /* |
| * Shift buffer if we're running out of room. |
| */ |
| if(p+70*K >= ep){ |
| /* |
| * Start the post SHA1 buffer. By now we should know the |
| * clumpmagic and arena version, so we can create a |
| * correct head block to get things going. |
| */ |
| if(sealing && fix && newsha.offset == 0){ |
| newsha.offset = offset0; |
| if(arena->clumpmagic == 0){ |
| if(arena->version == 0) |
| arena->version = ArenaVersion5; |
| arena->clumpmagic = newclumpmagic(arena->version); |
| } |
| head->clumpmagic = arena->clumpmagic; |
| shahead(&newsha, offset0, head); |
| } |
| n = 4*M-256*K; |
| if(sealing && fix){ |
| sbdiskhash(&newsha, bufoffset); |
| sbupdate(&newsha, buf, bufoffset, 4*M-256*K); |
| } |
| pagein(bufoffset+n, 4*M); |
| p -= n; |
| if(arena->diskstats.sealed) |
| sbupdate(&oldsha, buf, bufoffset, 4*M); |
| } |
| |
| /* |
| * Check for a clump at p, which is at offset in the disk. |
| * Duplicate clumps happen in corrupted disks |
| * (the same pattern gets written many times in a row) |
| * and should never happen during regular use. |
| */ |
| magic = 0; |
| if((n = isclump(p, &cl, &magic)) > 0){ |
| /* |
| * If we were in the middle of some corrupted data, |
| * flush a warning about it and then add any clump |
| * info blocks as necessary. |
| */ |
| if(inbad){ |
| inbad = 0; |
| v = offset-lastclumpend; |
| if(needtozero){ |
| zerorange(lastclumpend, v); |
| sbrollback(&newsha, lastclumpend); |
| print("corrupt clump data - %#llux+%#llux (%,llud bytes)\n", |
| lastclumpend, v, v); |
| } |
| addcicorrupt(v); |
| totalcorrupt += v; |
| nb1 = (minclumps+ncib-1)/ncib; |
| minclumps += (v+ClumpSize+VtMaxLumpSize-1)/(ClumpSize+VtMaxLumpSize); |
| nb2 = (minclumps+ncib-1)/ncib; |
| eoffset -= (nb2-nb1)*arena->blocksize; |
| } |
| |
| if(haveclump(cl.info.score)) |
| print("warning: duplicate clump %d %V at %#llux+%#d\n", cl.info.type, cl.info.score, offset, n); |
| |
| /* |
| * If clumps use different magic numbers, we don't care. |
| * We'll just use the first one we find and make the others |
| * follow suit. |
| */ |
| if(arena->clumpmagic == 0){ |
| print("clump type %d size %d score %V magic %x\n", |
| cl.info.type, cl.info.size, cl.info.score, magic); |
| arena->clumpmagic = magic; |
| if(magic == _ClumpMagic) |
| arena->version = ArenaVersion4; |
| else |
| arena->version = ArenaVersion5; |
| } |
| if(magic != arena->clumpmagic) |
| p32(p, arena->clumpmagic); |
| if(clumps == 0) |
| arena->ctime = cl.time; |
| |
| /* |
| * Record the clump, update arena stats, |
| * grow clump info blocks if needed. |
| */ |
| if(verbose > 1) |
| print("\tclump %d: %d %V at %#llux+%#ux (%d)\n", |
| clumps, cl.info.type, cl.info.score, offset, n, n); |
| addcibuf(&cl.info, 0); |
| if(minclumps%ncib == 0) |
| eoffset -= arena->blocksize; |
| minclumps++; |
| clumps++; |
| if(cl.encoding != ClumpENone) |
| arena->diskstats.cclumps++; |
| arena->diskstats.uncsize += cl.info.uncsize; |
| arena->wtime = cl.time; |
| |
| /* |
| * Move to next clump. |
| */ |
| offset += n; |
| p += n; |
| lastclumpend = offset; |
| }else{ |
| /* |
| * Overwrite malformed clump data with zeros later. |
| * For now, just record whether it needs to be overwritten. |
| * Bad regions must be of size at least ClumpSize. |
| * Postponing the overwriting keeps us from writing past |
| * the end of the arena data (which might be directory data) |
| * with zeros. |
| */ |
| if(!inbad){ |
| inbad = 1; |
| needtozero = 0; |
| if(memcmp(p, zero, ClumpSize) != 0) |
| needtozero = 1; |
| p += ClumpSize; |
| offset += ClumpSize; |
| nbad++; |
| }else{ |
| if(*p != 0) |
| needtozero = 1; |
| p++; |
| offset++; |
| } |
| } |
| } |
| pageout(); |
| |
| if(verbose) |
| print("readable clumps: %d; min. directory entries: %d\n", |
| clumps, minclumps); |
| arena->diskstats.used = lastclumpend - boffset; |
| leaked = eoffset - lastclumpend; |
| if(verbose) |
| print("used from %#llux to %#llux = %,lld (%,lld unused)\n", |
| boffset, lastclumpend, arena->diskstats.used, leaked); |
| |
| /* |
| * Finish the SHA1 of the old data. |
| */ |
| if(arena->diskstats.sealed){ |
| sbdiskhash(&oldsha, toffset); |
| readdisk(dbuf, toffset, arena->blocksize); |
| scorecp(dbuf+arena->blocksize-VtScoreSize, zero); |
| sbupdate(&oldsha, dbuf, toffset, arena->blocksize); |
| sbscore(&oldsha, oldscore); |
| } |
| |
| /* |
| * If we still don't know the clump magic, the arena |
| * must be empty. It still needs a value, so make |
| * something up. |
| */ |
| if(arena->version == 0) |
| arena->version = ArenaVersion5; |
| if(arena->clumpmagic == 0){ |
| if(arena->version == ArenaVersion4) |
| arena->clumpmagic = _ClumpMagic; |
| else{ |
| do |
| arena->clumpmagic = fastrand(); |
| while(arena->clumpmagic==_ClumpMagic |
| ||arena->clumpmagic==0); |
| } |
| head->clumpmagic = arena->clumpmagic; |
| } |
| |
| /* |
| * Guess at number of clumpinfo blocks to load. |
| * If we guess high, it's no big deal. If we guess low, |
| * we'll be forced into rewriting the whole directory. |
| * Still not such a big deal. |
| */ |
| if(clumps == 0 || arena->diskstats.used == totalcorrupt) |
| goto Nocib; |
| if(clumps < arena->diskstats.clumps) |
| clumps = arena->diskstats.clumps; |
| if(clumps < ncibuf) |
| clumps = ncibuf; |
| clumps += totalcorrupt/ |
| ((arena->diskstats.used - totalcorrupt)/clumps); |
| clumps += totalcorrupt/2000; |
| if(clumps < minclumps) |
| clumps = minclumps; |
| clumps += ncib-1; |
| clumps -= clumps%ncib; |
| |
| /* |
| * Can't write into the actual data. |
| */ |
| v = offset0 + arena->size - arena->blocksize; |
| v -= (clumps+ncib-1)/ncib * arena->blocksize; |
| if(v < lastclumpend){ |
| v = offset0 + arena->size - arena->blocksize; |
| clumps = (v-lastclumpend)/arena->blocksize * ncib; |
| } |
| |
| if(clumps < minclumps) |
| print("cannot happen?\n"); |
| |
| /* |
| * Check clumpinfo blocks against directory we created. |
| * The tricky part is handling the corrupt sections of arena. |
| * If possible, we remark just the affected directory entries |
| * rather than slide everything down. |
| * |
| * Allocate clumps+1 blocks and check that we don't need |
| * the last one at the end. |
| */ |
| bci = loadci(offset0, arena, clumps+1); |
| eci = bci+clumps+1; |
| bcit = cibuf; |
| ecit = cibuf+ncibuf; |
| |
| smart = 0; /* Somehow the smart code doesn't do corrupt clumps right. */ |
| Again: |
| nbad = 0; |
| ci = bci; |
| for(cit=bcit; cit<ecit && ci<eci; cit++){ |
| if(cit->corrupt){ |
| vlong n, m; |
| if(smart){ |
| /* |
| * If we can, just mark existing entries as corrupt. |
| */ |
| n = cit->corrupt; |
| for(xci=ci; n>0 && xci<eci; xci++) |
| n -= ClumpSize+xci->size; |
| if(n > 0 || xci >= eci) |
| goto Dumb; |
| printed = 0; |
| for(; ci<xci; ci++){ |
| if(verbose && ci->type != VtCorruptType){ |
| if(!printed){ |
| print("marking directory %d-%d as corrupt\n", |
| (int)(ci-bci), (int)(xci-bci)); |
| printed = 1; |
| } |
| print("\ttype=%d size=%d uncsize=%d score=%V\n", |
| ci->type, ci->size, ci->uncsize, ci->score); |
| } |
| ci->type = VtCorruptType; |
| } |
| }else{ |
| Dumb: |
| print("\trewriting clump directory\n"); |
| /* |
| * Otherwise, blaze a new trail. |
| */ |
| n = cit->corrupt; |
| while(n > 0 && ci < eci){ |
| if(n < ClumpSize) |
| sysfatal("bad math in clump corrupt"); |
| if(n <= VtMaxLumpSize+ClumpSize) |
| m = n; |
| else{ |
| m = VtMaxLumpSize+ClumpSize; |
| if(n-m < ClumpSize) |
| m -= ClumpSize; |
| } |
| ci->type = VtCorruptType; |
| ci->size = m-ClumpSize; |
| ci->uncsize = m-ClumpSize; |
| memset(ci->score, 0, VtScoreSize); |
| ci++; |
| n -= m; |
| } |
| } |
| continue; |
| } |
| if(clumpinfocmp(&cit->ci, ci) != 0){ |
| if(verbose && (smart || verbose>1)){ |
| print("clumpinfo %d\n", (int)(ci-bci)); |
| print("\twant: %d %d %d %V\n", |
| cit->ci.type, cit->ci.size, |
| cit->ci.uncsize, cit->ci.score); |
| print("\thave: %d %d %d %V\n", |
| ci->type, ci->size, |
| ci->uncsize, ci->score); |
| } |
| *ci = cit->ci; |
| nbad++; |
| } |
| ci++; |
| } |
| if(ci >= eci || cit < ecit){ |
| print("ran out of space editing existing directory; rewriting\n"); |
| print("# eci %ld ci %ld ecit %ld cit %ld\n", eci-bci, ci-bci, ecit-bcit, cit-bcit); |
| assert(smart); /* can't happen second time thru */ |
| smart = 0; |
| goto Again; |
| } |
| |
| assert(ci <= eci); |
| arena->diskstats.clumps = ci-bci; |
| eoffset = writeci(offset0, arena, bci, ci-bci); |
| if(sealing && fix) |
| sbrollback(&newsha, v); |
| print("eoffset=%lld lastclumpend=%lld diff=%lld unseal=%d\n", eoffset, lastclumpend, eoffset-lastclumpend, unseal); |
| if(lastclumpend > eoffset) |
| print("arena directory overwrote blocks! cannot happen!\n"); |
| free(bci); |
| if(smart && nbad) |
| print("arena directory has %d bad or missing entries\n", nbad); |
| Nocib: |
| if(eoffset - lastclumpend > 64*1024 && (!arena->diskstats.sealed || unseal)){ |
| if(arena->diskstats.sealed) |
| print("unsealing arena\n"); |
| sealing = 0; |
| memset(oldscore, 0, VtScoreSize); |
| } |
| |
| /* |
| * Finish the SHA1 of the new data - only meaningful |
| * if we've been writing to disk (`fix'). |
| */ |
| arena->diskstats.sealed = sealing; |
| arena->memstats = arena->diskstats; |
| if(sealing && fix){ |
| uchar tbuf[MaxDiskBlock]; |
| |
| sbdiskhash(&newsha, toffset); |
| memset(tbuf, 0, sizeof tbuf); |
| packarena(arena, tbuf); |
| sbupdate(&newsha, tbuf, toffset, arena->blocksize); |
| sbscore(&newsha, score); |
| } |
| } |
| |
| void |
| dumparena(vlong offset, int anum, Arena *arena) |
| { |
| char buf[1000]; |
| vlong o, e; |
| int fd, n; |
| |
| snprint(buf, sizeof buf, "%s.%d", dumpbase, anum); |
| if((fd = create(buf, OWRITE, 0666)) < 0){ |
| fprint(2, "create %s: %r\n", buf); |
| return; |
| } |
| e = offset+arena->size; |
| for(o=offset; o<e; o+=n){ |
| n = 4*M; |
| if(o+n > e) |
| n = e-o; |
| if(pwrite(fd, pagein(o, n), n, o-offset) != n){ |
| fprint(2, "write %s at %#llux: %r\n", buf, o-offset); |
| return; |
| } |
| } |
| } |
| |
| void |
| checkarena(vlong offset, int anum) |
| { |
| uchar dbuf[MaxDiskBlock]; |
| uchar *p, oldscore[VtScoreSize], score[VtScoreSize]; |
| Arena arena, oarena; |
| ArenaHead head; |
| Info *fmt, *fmta; |
| int sz; |
| |
| print("# arena %d: offset %#llux\n", anum, offset); |
| |
| if(offset >= partend){ |
| print("arena offset out of bounds\n"); |
| return; |
| } |
| |
| guessarena(offset, anum, &head, &arena, oldscore, score); |
| |
| if(verbose){ |
| print("#\tversion=%d name=%s blocksize=%d size=%z", |
| head.version, head.name, head.blocksize, head.size); |
| if(head.clumpmagic) |
| print(" clumpmagic=%#.8ux", head.clumpmagic); |
| print("\n#\tclumps=%d cclumps=%d used=%,lld uncsize=%,lld\n", |
| arena.diskstats.clumps, arena.diskstats.cclumps, |
| arena.diskstats.used, arena.diskstats.uncsize); |
| print("#\tctime=%t\n", arena.ctime); |
| print("#\twtime=%t\n", arena.wtime); |
| if(arena.diskstats.sealed) |
| print("#\tsealed score=%V\n", score); |
| } |
| |
| if(dumpbase){ |
| dumparena(offset, anum, &arena); |
| return; |
| } |
| |
| memset(dbuf, 0, sizeof dbuf); |
| packarenahead(&head, dbuf); |
| p = pagein(offset, arena.blocksize); |
| if(memcmp(dbuf, p, arena.blocksize) != 0){ |
| print("on-disk arena header incorrect\n"); |
| showdiffs(dbuf, p, arena.blocksize, |
| arena.version==ArenaVersion4 ? headinfo4 : headinfo5); |
| } |
| memmove(p, dbuf, arena.blocksize); |
| |
| memset(dbuf, 0, sizeof dbuf); |
| packarena(&arena, dbuf); |
| if(arena.diskstats.sealed) |
| scorecp(dbuf+arena.blocksize-VtScoreSize, score); |
| p = pagein(offset+arena.size-arena.blocksize, arena.blocksize); |
| memset(&oarena, 0, sizeof oarena); |
| unpackarena(&oarena, p); |
| if(arena.version == ArenaVersion4){ |
| sz = ArenaSize4; |
| fmt = tailinfo4; |
| fmta = tailinfo4a; |
| }else{ |
| sz = ArenaSize5; |
| fmt = tailinfo5; |
| fmta = tailinfo5a; |
| } |
| if(p[sz] == 1){ |
| fmt = fmta; |
| if(oarena.diskstats.sealed){ |
| /* |
| * some arenas were sealed with the extension |
| * before we adopted the convention that if it didn't |
| * add new information it gets dropped. |
| */ |
| _packarena(&arena, dbuf, 1); |
| } |
| } |
| if(memcmp(dbuf, p, arena.blocksize-VtScoreSize) != 0){ |
| print("on-disk arena tail incorrect\n"); |
| showdiffs(dbuf, p, arena.blocksize-VtScoreSize, fmt); |
| } |
| if(arena.diskstats.sealed){ |
| if(oarena.diskstats.sealed) |
| if(scorecmp(p+arena.blocksize-VtScoreSize, oldscore) != 0){ |
| print("on-disk arena seal score incorrect\n"); |
| print("\tcorrect=%V\n", oldscore); |
| print("\t disk=%V\n", p+arena.blocksize-VtScoreSize); |
| } |
| if(fix && scorecmp(p+arena.blocksize-VtScoreSize, score) != 0){ |
| print("%ssealing arena%s: %V\n", |
| oarena.diskstats.sealed ? "re" : "", |
| scorecmp(oldscore, score) == 0 ? |
| "" : " after changes", score); |
| } |
| } |
| memmove(p, dbuf, arena.blocksize); |
| |
| pageout(); |
| } |
| |
| AMapN* |
| buildamap(void) |
| { |
| uchar *p; |
| vlong o; |
| ArenaHead h; |
| AMapN *an; |
| AMap *m; |
| |
| an = vtmallocz(sizeof *an); |
| for(o=ap.arenabase; o<partend; o+=arenasize){ |
| p = pagein(o, Block); |
| if(unpackarenahead(&h, p) >= 0){ |
| an->map = vtrealloc(an->map, (an->n+1)*sizeof an->map[0]); |
| m = &an->map[an->n++]; |
| m->start = o; |
| m->stop = o+h.size; |
| strcpy(m->name, h.name); |
| } |
| } |
| return an; |
| } |
| |
| void |
| checkmap(void) |
| { |
| char *s; |
| uchar *p; |
| int i, len; |
| AMapN *an; |
| Fmt fmt; |
| |
| an = buildamap(); |
| fmtstrinit(&fmt); |
| fmtprint(&fmt, "%ud\n", an->n); |
| for(i=0; i<an->n; i++) |
| fmtprint(&fmt, "%s\t%lld\t%lld\n", |
| an->map[i].name, an->map[i].start, an->map[i].stop); |
| s = fmtstrflush(&fmt); |
| len = strlen(s); |
| if(len > ap.tabsize){ |
| print("arena partition map too long: need %z bytes have %z\n", |
| (vlong)len, (vlong)ap.tabsize); |
| len = ap.tabsize; |
| } |
| |
| if(ap.tabsize >= 4*M){ /* can't happen - max arenas is 2000 */ |
| print("arena partition map *way* too long\n"); |
| return; |
| } |
| |
| p = pagein(ap.tabbase, ap.tabsize); |
| if(memcmp(p, s, len) != 0){ |
| print("arena partition map incorrect; rewriting.\n"); |
| memmove(p, s, len); |
| } |
| pageout(); |
| } |
| |
| int mainstacksize = 512*1024; |
| |
| void |
| threadmain(int argc, char **argv) |
| { |
| int mode; |
| |
| mode = OREAD; |
| readonly = 1; |
| ARGBEGIN{ |
| case 'U': |
| unseal = 1; |
| break; |
| case 'a': |
| arenasize = unittoull(EARGF(usage())); |
| break; |
| case 'b': |
| ap.blocksize = unittoull(EARGF(usage())); |
| break; |
| case 'f': |
| fix = 1; |
| mode = ORDWR; |
| readonly = 0; |
| break; |
| case 'n': |
| basename = EARGF(usage()); |
| break; |
| case 'v': |
| verbose++; |
| break; |
| case 'x': |
| dumpbase = EARGF(usage()); |
| break; |
| default: |
| usage(); |
| }ARGEND |
| |
| if(argc != 1 && argc != 2) |
| usage(); |
| |
| file = argv[0]; |
| |
| ventifmtinstall(); |
| fmtinstall('z', zfmt); |
| fmtinstall('t', tfmt); |
| quotefmtinstall(); |
| |
| part = initpart(file, mode|ODIRECT); |
| if(part == nil) |
| sysfatal("can't open %s: %r", file); |
| partend = part->size; |
| |
| if(isonearena()){ |
| checkarena(0, -1); |
| threadexitsall(nil); |
| } |
| checkarenas(argc > 1 ? argv[1] : nil); |
| checkmap(); |
| threadexitsall(nil); |
| } |
| |