return of venti
diff --git a/src/cmd/venti/copy.c b/src/cmd/venti/copy.c
new file mode 100644
index 0000000..89fbbac
--- /dev/null
+++ b/src/cmd/venti/copy.c
@@ -0,0 +1,170 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <libsec.h>
+#include <thread.h>
+
+int changes;
+int rewrite;
+int ignoreerrors;
+int fast;
+int verbose;
+VtConn *zsrc, *zdst;
+
+void
+usage(void)
+{
+	fprint(2, "usage: copy [-fir] [-t type] srchost dsthost score\n");
+	threadexitsall("usage");
+}
+
+void
+walk(uchar score[VtScoreSize], uint type, int base)
+{
+	int i, n;
+	uchar *buf;
+	VtEntry e;
+	VtRoot root;
+
+	if(memcmp(score, vtzeroscore, VtScoreSize) == 0)
+		return;
+
+	buf = vtmallocz(VtMaxLumpSize);
+	if(fast && vtread(zdst, score, type, buf, VtMaxLumpSize) >= 0){
+		if(verbose)
+			fprint(2, "skip %V\n", score);
+		free(buf);
+		return;
+	}
+
+	n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
+	if(n < 0){
+		if(rewrite){
+			changes++;
+			memmove(score, vtzeroscore, VtScoreSize);
+		}else if(!ignoreerrors)
+			sysfatal("reading block %V (type %d): %r", type, score);
+		return;
+	}
+
+	switch(type){
+	case VtRootType:
+		if(vtrootunpack(&root, buf) < 0){
+			fprint(2, "warning: could not unpack root in %V %d\n", score, type);
+			break;
+		}
+		walk(root.score, VtDirType, 0);
+		walk(root.prev, VtRootType, 0);
+		vtrootpack(&root, buf);	/* walk might have changed score */
+		break;
+
+	case VtDirType:
+		for(i=0; i<n/VtEntrySize; i++){
+			if(vtentryunpack(&e, buf, i) < 0){
+				fprint(2, "warning: could not unpack entry #%d in %V %d\n", i, score, type);
+				continue;
+			}
+			if(!(e.flags & VtEntryActive))
+				continue;
+			walk(e.score, e.type, e.type&VtTypeBaseMask);
+			vtentrypack(&e, buf, i);
+		}
+		break;
+
+	case VtDataType:
+		break;
+
+	default:	/* pointers */
+		for(i=0; i<n; i+=VtScoreSize)
+			if(memcmp(buf+i, vtzeroscore, VtScoreSize) != 0)
+				walk(buf+i, type-1, base);
+		break;
+	}
+
+	if(vtwrite(zdst, score, type, buf, n) < 0){
+		/* figure out score for better error message */
+		/* can't use input argument - might have changed contents */
+		n = vtzerotruncate(type, buf, n);
+		sha1(buf, n, score, nil);
+		sysfatal("writing block %V (type %d): %r", score, type);
+	}
+	free(buf);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int type, n;
+	uchar score[VtScoreSize];
+	uchar *buf;
+	char *prefix;
+
+	fmtinstall('F', vtfcallfmt);
+	fmtinstall('V', vtscorefmt);
+
+	type = -1;
+	ARGBEGIN{
+	case 'f':
+		fast = 1;
+		break;
+	case 'i':
+		if(rewrite)
+			usage();
+		ignoreerrors = 1;
+		break;
+	case 'r':
+		if(ignoreerrors)
+			usage();
+		rewrite = 1;
+		break;
+	case 't':
+		type = atoi(EARGF(usage()));
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 3)
+		usage();
+
+	if(vtparsescore(argv[2], &prefix, score) < 0)
+		sysfatal("could not parse score: %r");
+
+	buf = vtmallocz(VtMaxLumpSize);
+
+	zsrc = vtdial(argv[0]);
+	if(zsrc == nil)
+		sysfatal("could not dial src server: %r");
+	if(vtconnect(zsrc) < 0)
+		sysfatal("vtconnect src: %r");
+
+	zdst = vtdial(argv[1]);
+	if(zdst == nil)
+		sysfatal("could not dial dst server: %r");
+	if(vtconnect(zdst) < 0)
+		sysfatal("vtconnect dst: %r");
+
+	if(type != -1){
+		n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
+		if(n < 0)
+			sysfatal("could not read block: %r");
+	}else{
+		for(type=0; type<VtMaxType; type++){
+			n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
+			if(n >= 0)
+				break;
+		}
+		if(type == VtMaxType)
+			sysfatal("could not find block %V of any type", score);
+	}
+
+	walk(score, type, VtDirType);
+	if(changes)
+		print("%s:%V (%d pointers rewritten)\n", prefix, score, changes);
+
+	if(vtsync(zdst) < 0)
+		sysfatal("could not sync dst server: %r");
+
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/devnull.c b/src/cmd/venti/devnull.c
new file mode 100644
index 0000000..29cf9ec
--- /dev/null
+++ b/src/cmd/venti/devnull.c
@@ -0,0 +1,80 @@
+/* Copyright (c) 2004 Russ Cox */
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <thread.h>
+#include <libsec.h>
+
+#ifndef _UNISTD_H_
+#pragma varargck type "F" VtFcall*
+#pragma varargck type "T" void
+#endif
+
+int verbose;
+
+enum
+{
+	STACK = 8192,
+};
+
+void
+usage(void)
+{
+	fprint(2, "usage: venti/devnull [-v] [-a address]\n");
+	threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char **argv)
+{
+	VtReq *r;
+	VtSrv *srv;
+	char *address;
+	Packet *p;
+
+	fmtinstall('V', vtscorefmt);
+	fmtinstall('F', vtfcallfmt);
+	
+	address = "tcp!*!venti";
+
+	ARGBEGIN{
+	case 'v':
+		verbose++;
+		break;
+	case 'a':
+		address = EARGF(usage());
+		break;
+	default:
+		usage();
+	}ARGEND
+
+	srv = vtlisten(address);
+	if(srv == nil)
+		sysfatal("vtlisten %s: %r", argv[1]);
+
+	while((r = vtgetreq(srv)) != nil){
+		r->rx.msgtype = r->tx.msgtype+1;
+		if(verbose)
+			fprint(2, "<- %F\n", &r->tx);
+		switch(r->tx.msgtype){
+		case VtTping:
+			break;
+		case VtTgoodbye:
+			break;
+		case VtTread:
+			r->rx.error = vtstrdup("no such block");
+			r->rx.msgtype = VtRerror;
+			break;
+		case VtTwrite:
+			packetsha1(r->tx.data, r->rx.score);
+			break;
+		case VtTsync:
+			break;
+		}
+		if(verbose)
+			fprint(2, "-> %F\n", &r->rx);
+		vtrespond(r);
+	}
+	threadexitsall(nil);
+}
+
diff --git a/src/cmd/venti/mkfile b/src/cmd/venti/mkfile
index 1a1b652..bba8d2e 100644
--- a/src/cmd/venti/mkfile
+++ b/src/cmd/venti/mkfile
@@ -1,3 +1,13 @@
-%:VQ:
-	echo venti will return once it is debugged.
+<$PLAN9/src/mkhdr
+
+DIRS=srv
+
+TARG=\
+	copy\
+	read\
+	sync\
+	write\
+
+<$PLAN9/src/mkmany
+<$PLAN9/src/mkdirs
 
diff --git a/src/cmd/venti/mkroot.c b/src/cmd/venti/mkroot.c
new file mode 100644
index 0000000..f18cbf3
--- /dev/null
+++ b/src/cmd/venti/mkroot.c
@@ -0,0 +1,59 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+char *host;
+
+void
+usage(void)
+{
+	fprint(2, "usage: mkroot [-h host] name type score blocksize prev\n");
+	threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	uchar score[VtScoreSize];
+	uchar buf[VtRootSize];
+	VtConn *z;
+	VtRoot root;
+
+	ARGBEGIN{
+	case 'h':
+		host = EARGF(usage());
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 5)
+		usage();
+
+	ventifmtinstall();
+
+	strecpy(root.name, root.name+sizeof root.name, argv[0]);
+	strecpy(root.type, root.type+sizeof root.type, argv[1]);
+	if(vtparsescore(argv[2], strlen(argv[2]), nil, root.score) < 0)
+		sysfatal("bad score '%s'", argv[2]);
+	root.blocksize = atoi(argv[3]);
+	if(vtparsescore(argv[4], strlen(argv[4]), nil, root.prev) < 0)
+		sysfatal("bad score '%s'", argv[4]);
+	vtrootpack(&root, buf);
+
+	z = vtdial(host);
+	if(z == nil)
+		sysfatal("could not connect to server: %r");
+
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+
+	if(vtwrite(z, score, VtRootType, buf, VtRootSize) < 0)
+		sysfatal("vtwrite: %r");
+	if(vtsync(z) < 0)
+		sysfatal("vtsync: %r");
+	vthangup(z);
+	print("%V\n", score);
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/randtest.c b/src/cmd/venti/randtest.c
new file mode 100644
index 0000000..b7a09ef
--- /dev/null
+++ b/src/cmd/venti/randtest.c
@@ -0,0 +1,334 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <libsec.h>
+#include <thread.h>
+
+
+enum { STACK = 32768 };
+void xxxsrand(long);
+long xxxlrand(void);
+
+Channel *cw;
+Channel *cr;
+char *host;
+int blocksize, seed, randpct;
+int doread, dowrite, packets, permute;
+vlong totalbytes, cur;
+VtConn *z;
+int multi;
+int maxpackets;
+int sequence;
+int doublecheck = 1;
+uint *order;
+
+void
+usage(void)
+{
+	fprint(2, "usage: randtest [-q] [-h host] [-s seed] [-b blocksize] [-p randpct] [-n totalbytes] [-M maxblocks] [-P] [-r] [-w]\n");
+	threadexitsall("usage");
+}
+
+void
+wr(char *buf, char *buf2)
+{
+	uchar score[VtScoreSize], score2[VtScoreSize];
+	DigestState ds;
+
+	memset(&ds, 0, sizeof ds);
+	if(doublecheck)
+		sha1((uchar*)buf, blocksize, score, &ds);
+	if(vtwrite(z, score2, VtDataType, (uchar*)buf, blocksize) < 0)
+		sysfatal("vtwrite %V at %,lld: %r", score, cur);
+	if(doublecheck && memcmp(score, score2, VtScoreSize) != 0)
+		sysfatal("score mismatch! %V %V", score, score2);
+}
+
+void
+wrthread(void *v)
+{
+	char *p;
+
+	USED(v);
+	while((p = recvp(cw)) != nil){
+		wr(p, nil);
+		free(p);
+	}
+}
+
+void
+rd(char *buf, char *buf2)
+{
+	uchar score[VtScoreSize];
+	DigestState ds;
+
+	memset(&ds, 0, sizeof ds);
+	sha1((uchar*)buf, blocksize, score, &ds);
+	if(vtread(z, score, VtDataType, (uchar*)buf2, blocksize) < 0)
+		sysfatal("vtread %V at %,lld: %r", score, cur);
+	if(memcmp(buf, buf2, blocksize) != 0)
+		sysfatal("bad data read! %V", score);
+}
+
+void
+rdthread(void *v)
+{
+	char *p, *buf2;
+
+	buf2 = vtmalloc(blocksize);
+	USED(v);
+	while((p = recvp(cr)) != nil){
+		rd(p, buf2);
+		free(p);
+	}
+}
+
+char *template;
+
+void
+run(void (*fn)(char*, char*), Channel *c)
+{
+	int i, t, j, packets;
+	char *buf2, *buf;
+
+	buf2 = vtmalloc(blocksize);
+	buf = vtmalloc(blocksize);
+	cur = 0;
+	packets = totalbytes/blocksize;
+	if(maxpackets == 0)
+		maxpackets = packets;
+	order = vtmalloc(packets*sizeof order[0]);
+	for(i=0; i<packets; i++)
+		order[i] = i;
+	if(permute){
+		for(i=1; i<packets; i++){
+			j = nrand(i+1);
+			t = order[i];
+			order[i] = order[j];
+			order[j] = t;
+		}
+	}
+	for(i=0; i<packets && i<maxpackets; i++){
+		memmove(buf, template, blocksize);
+		*(uint*)buf = order[i];
+		if(c){
+			sendp(c, buf);
+			buf = vtmalloc(blocksize);
+		}else
+			(*fn)(buf, buf2);
+		cur += blocksize;
+	}
+	free(order);
+}
+
+#define TWID64	((u64int)~(u64int)0)
+
+u64int
+unittoull(char *s)
+{
+	char *es;
+	u64int n;
+
+	if(s == nil)
+		return TWID64;
+	n = strtoul(s, &es, 0);
+	if(*es == 'k' || *es == 'K'){
+		n *= 1024;
+		es++;
+	}else if(*es == 'm' || *es == 'M'){
+		n *= 1024*1024;
+		es++;
+	}else if(*es == 'g' || *es == 'G'){
+		n *= 1024*1024*1024;
+		es++;
+	}else if(*es == 't' || *es == 'T'){
+		n *= 1024*1024;
+		n *= 1024*1024;
+	}
+	if(*es != '\0')
+		return TWID64;
+	return n;
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int i, max;
+	vlong t0;
+	double t;
+
+	blocksize = 8192;
+	seed = 0;
+	randpct = 50;
+	host = nil;
+	doread = 0;
+	dowrite = 0;
+	totalbytes = 1*1024*1024*1024;
+	fmtinstall('V', vtscorefmt);
+	fmtinstall('F', vtfcallfmt);
+
+	ARGBEGIN{
+	case 'b':
+		blocksize = unittoull(EARGF(usage()));
+		break;
+	case 'h':
+		host = EARGF(usage());
+		break;
+	case 'M':
+		maxpackets = unittoull(EARGF(usage()));
+		break;
+	case 'm':
+		multi = atoi(EARGF(usage()));
+		break;
+	case 'n':
+		totalbytes = unittoull(EARGF(usage()));
+		break;
+	case 'p':
+		randpct = atoi(EARGF(usage()));
+		break;
+	case 'P':
+		permute = 1;
+		break;
+	case 'S':
+		doublecheck = 0;
+		ventidoublechecksha1 = 0;
+		break;
+	case 's':
+		seed = atoi(EARGF(usage()));
+		break;
+	case 'r':
+		doread = 1;
+		break;
+	case 'w':
+		dowrite = 1;
+		break;
+	case 'V':
+		chattyventi++;
+		break;
+	default:
+		usage();
+	}ARGEND
+
+	if(doread==0 && dowrite==0){
+		doread = 1;
+		dowrite = 1;
+	}
+
+	z = vtdial(host);
+	if(z == nil)
+		sysfatal("could not connect to server: %r");
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+
+	if(multi){
+		cr = chancreate(sizeof(void*), 0);
+		cw = chancreate(sizeof(void*), 0);
+		for(i=0; i<multi; i++){
+			proccreate(wrthread, nil, STACK);
+			proccreate(rdthread, nil, STACK);
+		}
+	}
+
+	template = vtmalloc(blocksize);
+	xxxsrand(seed);
+	max = (256*randpct)/100;
+	if(max == 0)
+		max = 1;
+	for(i=0; i<blocksize; i++)
+		template[i] = xxxlrand()%max;
+	if(dowrite){
+		t0 = nsec();
+		run(wr, cw);
+		for(i=0; i<multi; i++)
+			sendp(cw, nil);
+		t = (nsec() - t0)/1.e9;
+		print("write: %lld bytes / %.3f seconds = %.6f MB/s\n",
+			totalbytes, t, (double)totalbytes/1e6/t);
+	}
+	if(doread){
+		t0 = nsec();
+		run(rd, cr);
+		for(i=0; i<multi; i++)
+			sendp(cr, nil);
+		t = (nsec() - t0)/1.e9;
+		print("read: %lld bytes / %.3f seconds = %.6f MB/s\n",
+			totalbytes, t, (double)totalbytes/1e6/t);
+	}
+	threadexitsall(nil);
+}
+
+
+/*
+ *	algorithm by
+ *	D. P. Mitchell & J. A. Reeds
+ */
+
+#define	LEN	607
+#define	TAP	273
+#define	MASK	0x7fffffffL
+#define	A	48271
+#define	M	2147483647
+#define	Q	44488
+#define	R	3399
+#define	NORM	(1.0/(1.0+MASK))
+
+static	ulong	rng_vec[LEN];
+static	ulong*	rng_tap = rng_vec;
+static	ulong*	rng_feed = 0;
+
+static void
+isrand(long seed)
+{
+	long lo, hi, x;
+	int i;
+
+	rng_tap = rng_vec;
+	rng_feed = rng_vec+LEN-TAP;
+	seed = seed%M;
+	if(seed < 0)
+		seed += M;
+	if(seed == 0)
+		seed = 89482311;
+	x = seed;
+	/*
+	 *	Initialize by x[n+1] = 48271 * x[n] mod (2**31 - 1)
+	 */
+	for(i = -20; i < LEN; i++) {
+		hi = x / Q;
+		lo = x % Q;
+		x = A*lo - R*hi;
+		if(x < 0)
+			x += M;
+		if(i >= 0)
+			rng_vec[i] = x;
+	}
+}
+
+void
+xxxsrand(long seed)
+{
+	isrand(seed);
+}
+
+long
+xxxlrand(void)
+{
+	ulong x;
+
+	rng_tap--;
+	if(rng_tap < rng_vec) {
+		if(rng_feed == 0) {
+			isrand(1);
+			rng_tap--;
+		}
+		rng_tap += LEN;
+	}
+	rng_feed--;
+	if(rng_feed < rng_vec)
+		rng_feed += LEN;
+	x = (*rng_feed + *rng_tap) & MASK;
+	*rng_feed = x;
+
+	return x;
+}
+
diff --git a/src/cmd/venti/read.c b/src/cmd/venti/read.c
new file mode 100644
index 0000000..3f3441e
--- /dev/null
+++ b/src/cmd/venti/read.c
@@ -0,0 +1,75 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <libsec.h>
+#include <thread.h>
+
+void
+usage(void)
+{
+	fprint(2, "usage: read [-h host] [-t type] score\n");
+	threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int type, n;
+	uchar score[VtScoreSize];
+	uchar *buf;
+	VtConn *z;
+	char *host;
+
+	fmtinstall('F', vtfcallfmt);
+	fmtinstall('V', vtscorefmt);
+
+	host = nil;
+	type = -1;
+	ARGBEGIN{
+	case 'h':
+		host = EARGF(usage());
+		break;
+	case 't':
+		type = atoi(argv[1]);
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 1)
+		usage();
+
+	if(vtparsescore(argv[0], nil, score) < 0)
+		sysfatal("could not parse score '%s': %r", argv[0]);
+
+	buf = vtmallocz(VtMaxLumpSize);
+
+	z = vtdial(host);
+	if(z == nil)
+		sysfatal("could not connect to server: %r");
+
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+
+	if(type == -1){
+		n = -1;
+		for(type=0; type<VtMaxType; type++){
+			n = vtread(z, score, type, buf, VtMaxLumpSize);
+			if(n >= 0){
+				fprint(2, "venti/read%s%s %V %d\n", host ? " -h" : "", host ? host : "",
+					score, type);
+				break;
+			}
+		}
+	}else{
+		type = atoi(argv[1]);
+		n = vtread(z, score, type, buf, VtMaxLumpSize);
+	}
+	vthangup(z);
+	if(n < 0)
+		sysfatal("could not read block: %r");
+	if(write(1, buf, n) != n)
+		sysfatal("write: %r");
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/readlist.c b/src/cmd/venti/readlist.c
new file mode 100644
index 0000000..bb1d9b6
--- /dev/null
+++ b/src/cmd/venti/readlist.c
@@ -0,0 +1,112 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <venti.h>
+#include <bio.h>
+
+char *host;
+Biobuf b;
+VtConn *z;
+uchar *buf;
+void run(Biobuf*);
+int nn;
+
+void
+usage(void)
+{
+	fprint(2, "usage: readlist [-h host] list\n");
+	threadexitsall("usage");
+}
+
+int
+parsescore(uchar *score, char *buf, int n)
+{
+	int i, c;
+
+	memset(score, 0, VtScoreSize);
+
+	if(n != VtScoreSize*2){
+		werrstr("score wrong length %d", n);
+		return -1;
+	}
+	for(i=0; i<VtScoreSize*2; i++) {
+		if(buf[i] >= '0' && buf[i] <= '9')
+			c = buf[i] - '0';
+		else if(buf[i] >= 'a' && buf[i] <= 'f')
+			c = buf[i] - 'a' + 10;
+		else if(buf[i] >= 'A' && buf[i] <= 'F')
+			c = buf[i] - 'A' + 10;
+		else {
+			c = buf[i];
+			werrstr("bad score char %d '%c'", c, c);
+			return -1;
+		}
+
+		if((i & 1) == 0)
+			c <<= 4;
+	
+		score[i>>1] |= c;
+	}
+	return 0;
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int fd, i;
+
+	ARGBEGIN{
+	case 'h':
+		host = EARGF(usage());
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	fmtinstall('V', vtscorefmt);
+	buf = vtmallocz(VtMaxLumpSize);
+	z = vtdial(host);
+	if(z == nil)
+		sysfatal("could not connect to server: %r");
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+
+	if(argc == 0){
+		Binit(&b, 0, OREAD);
+		run(&b);
+	}else{
+		for(i=0; i<argc; i++){
+			if((fd = open(argv[i], OREAD)) < 0)
+				sysfatal("open %s: %r", argv[i]);
+			Binit(&b, fd, OREAD);
+			run(&b);
+		}
+	}
+	threadexitsall(nil);
+}
+
+void
+run(Biobuf *b)
+{
+	char *p, *f[10];
+	int nf;
+	uchar score[20];
+	int type, n;
+
+	while((p = Brdline(b, '\n')) != nil){
+		p[Blinelen(b)-1] = 0;
+		nf = tokenize(p, f, nelem(f));
+		if(nf != 2)
+			sysfatal("syntax error in work list");
+		if(parsescore(score, f[0], strlen(f[0])) < 0)
+			sysfatal("bad score %s in work list", f[0]);
+		type = atoi(f[1]);
+		n = vtread(z, score, type, buf, VtMaxLumpSize);
+		if(n < 0)
+			sysfatal("could not read %s %s: %r", f[0], f[1]);
+		// write(1, buf, n);
+		if(++nn%1000 == 0)
+			print("%d...", nn);
+	}
+}
diff --git a/src/cmd/venti/ro.c b/src/cmd/venti/ro.c
new file mode 100644
index 0000000..541dae4
--- /dev/null
+++ b/src/cmd/venti/ro.c
@@ -0,0 +1,112 @@
+/* Copyright (c) 2004 Russ Cox */
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <thread.h>
+#include <libsec.h>
+
+#ifndef _UNISTD_H_
+#pragma varargck type "F" VtFcall*
+#pragma varargck type "T" void
+#endif
+
+VtConn *z;
+int verbose;
+
+enum
+{
+	STACK = 8192,
+};
+
+void
+usage(void)
+{
+	fprint(2, "usage: venti/ro [-v] [-a address] [-h address]\n");
+	threadexitsall("usage");
+}
+
+void
+readthread(void *v)
+{
+	char err[ERRMAX];
+	VtReq *r;
+	uchar *buf;
+	int n;
+	
+	r = v;
+	buf = vtmalloc(r->tx.count);
+	if((n=vtread(z, r->tx.score, r->tx.blocktype, buf, r->tx.count)) < 0){
+		r->rx.msgtype = VtRerror;
+		rerrstr(err, sizeof err);
+		r->rx.error = vtstrdup(err);
+		free(buf);
+	}else{
+		r->rx.data = packetforeign(buf, n, free, buf);
+	}
+	if(verbose)
+		fprint(2, "-> %F\n", &r->rx);
+	vtrespond(r);
+}
+
+void
+threadmain(int argc, char **argv)
+{
+	VtReq *r;
+	VtSrv *srv;
+	char *address, *ventiaddress;
+
+	fmtinstall('F', vtfcallfmt);
+	fmtinstall('V', vtscorefmt);
+	
+	address = "tcp!*!venti";
+	ventiaddress = nil;
+	
+	ARGBEGIN{
+	case 'v':
+		verbose++;
+		break;
+	case 'a':
+		address = EARGF(usage());
+		break;
+	case 'h':
+		ventiaddress = EARGF(usage());
+		break;
+	default:
+		usage();
+	}ARGEND
+
+	if((z = vtdial(ventiaddress)) == nil)
+		sysfatal("vtdial %s: %r", ventiaddress);
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+
+	srv = vtlisten(address);
+	if(srv == nil)
+		sysfatal("vtlisten %s: %r", address);
+
+	while((r = vtgetreq(srv)) != nil){
+		r->rx.msgtype = r->tx.msgtype+1;
+		if(verbose)
+			fprint(2, "<- %F\n", &r->tx);
+		switch(r->tx.msgtype){
+		case VtTping:
+			break;
+		case VtTgoodbye:
+			break;
+		case VtTread:
+			threadcreate(readthread, r, 16384);
+			continue;
+		case VtTwrite:
+			r->rx.error = vtstrdup("read-only server");
+			r->rx.msgtype = VtRerror;
+			break;
+		case VtTsync:
+			break;
+		}
+		if(verbose)
+			fprint(2, "-> %F\n", &r->rx);
+		vtrespond(r);
+	}
+	threadexitsall(nil);
+}
+
diff --git a/src/cmd/venti/srv/arena.c b/src/cmd/venti/srv/arena.c
new file mode 100644
index 0000000..15bf44d
--- /dev/null
+++ b/src/cmd/venti/srv/arena.c
@@ -0,0 +1,737 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+typedef struct ASum ASum;
+
+struct ASum
+{
+	Arena	*arena;
+	ASum	*next;
+};
+
+static void	sealarena(Arena *arena);
+static int	okarena(Arena *arena);
+static int	loadarena(Arena *arena);
+static CIBlock	*getcib(Arena *arena, int clump, int writing, CIBlock *rock);
+static void	putcib(Arena *arena, CIBlock *cib);
+static void	sumproc(void *);
+
+static QLock	sumlock;
+static Rendez	sumwait;
+static ASum	*sumq;
+static uchar zero[8192];
+
+int	arenasumsleeptime;
+
+int
+initarenasum(void)
+{
+	sumwait.l = &sumlock;
+
+	if(vtproc(sumproc, nil) < 0){
+		seterr(EOk, "can't start arena checksum slave: %r");
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * make an Arena, and initialize it based upon the disk header and trailer.
+ */
+Arena*
+initarena(Part *part, u64int base, u64int size, u32int blocksize)
+{
+	Arena *arena;
+
+	arena = MKZ(Arena);
+	arena->part = part;
+	arena->blocksize = blocksize;
+	arena->clumpmax = arena->blocksize / ClumpInfoSize;
+	arena->base = base + blocksize;
+	arena->size = size - 2 * blocksize;
+
+	if(loadarena(arena) < 0){
+		seterr(ECorrupt, "arena header or trailer corrupted");
+		freearena(arena);
+		return nil;
+	}
+	if(okarena(arena) < 0){
+		freearena(arena);
+		return nil;
+	}
+
+	if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
+		backsumarena(arena);
+
+	return arena;
+}
+
+void
+freearena(Arena *arena)
+{
+	if(arena == nil)
+		return;
+	free(arena);
+}
+
+Arena*
+newarena(Part *part, u32int vers, char *name, u64int base, u64int size, u32int blocksize)
+{
+	int bsize;
+	Arena *arena;
+
+	if(nameok(name) < 0){
+		seterr(EOk, "illegal arena name", name);
+		return nil;
+	}
+	arena = MKZ(Arena);
+	arena->part = part;
+	arena->version = vers;
+	if(vers == ArenaVersion4)
+		arena->clumpmagic = _ClumpMagic;
+	else{
+		do
+			arena->clumpmagic = fastrand();
+		while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
+	}
+	arena->blocksize = blocksize;
+	arena->clumpmax = arena->blocksize / ClumpInfoSize;
+	arena->base = base + blocksize;
+	arena->size = size - 2 * blocksize;
+
+	namecp(arena->name, name);
+
+	bsize = sizeof zero;
+	if(bsize > arena->blocksize)
+		bsize = arena->blocksize;
+
+	if(wbarena(arena)<0 || wbarenahead(arena)<0
+	|| writepart(arena->part, arena->base, zero, bsize)<0){
+		freearena(arena);
+		return nil;
+	}
+
+	return arena;
+}
+
+int
+readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
+{
+	CIBlock *cib, r;
+
+	cib = getcib(arena, clump, 0, &r);
+	if(cib == nil)
+		return -1;
+	unpackclumpinfo(ci, &cib->data->data[cib->offset]);
+	putcib(arena, cib);
+	return 0;
+}
+
+int
+readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
+{
+	CIBlock *cib, r;
+	int i;
+
+	for(i = 0; i < n; i++){
+		cib = getcib(arena, clump + i, 0, &r);
+		if(cib == nil)
+			break;
+		unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
+		putcib(arena, cib);
+	}
+	return i;
+}
+
+/*
+ * write directory information for one clump
+ * must be called the arena locked
+ */
+int
+writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
+{
+	CIBlock *cib, r;
+
+	cib = getcib(arena, clump, 1, &r);
+	if(cib == nil)
+		return -1;
+	dirtydblock(cib->data, DirtyArenaCib);
+	packclumpinfo(ci, &cib->data->data[cib->offset]);
+	putcib(arena, cib);
+	return 0;
+}
+
+u64int
+arenadirsize(Arena *arena, u32int clumps)
+{
+	return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
+}
+
+/*
+ * read a clump of data
+ * n is a hint of the size of the data, not including the header
+ * make sure it won't run off the end, then return the number of bytes actually read
+ */
+u32int
+readarena(Arena *arena, u64int aa, u8int *buf, long n)
+{
+	DBlock *b;
+	u64int a;
+	u32int blocksize, off, m;
+	long nn;
+
+	if(n == 0)
+		return -1;
+
+	qlock(&arena->lock);
+	a = arena->size - arenadirsize(arena, arena->memstats.clumps);
+	qunlock(&arena->lock);
+	if(aa >= a){
+		seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
+		return -1;
+	}
+	if(aa + n > a)
+		n = a - aa;
+
+	blocksize = arena->blocksize;
+	a = arena->base + aa;
+	off = a & (blocksize - 1);
+	a -= off;
+	nn = 0;
+	for(;;){
+		b = getdblock(arena->part, a, OREAD);
+		if(b == nil)
+			return -1;
+		m = blocksize - off;
+		if(m > n - nn)
+			m = n - nn;
+		memmove(&buf[nn], &b->data[off], m);
+		putdblock(b);
+		nn += m;
+		if(nn == n)
+			break;
+		off = 0;
+		a += blocksize;
+	}
+	return n;
+}
+
+/*
+ * write some data to the clump section at a given offset
+ * used to fix up corrupted arenas.
+ */
+u32int
+writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
+{
+	DBlock *b;
+	u64int a;
+	u32int blocksize, off, m;
+	long nn;
+	int ok;
+
+	if(n == 0)
+		return -1;
+
+	qlock(&arena->lock);
+	a = arena->size - arenadirsize(arena, arena->memstats.clumps);
+	if(aa >= a || aa + n > a){
+		qunlock(&arena->lock);
+		seterr(EOk, "writing beyond arena clump storage");
+		return -1;
+	}
+
+	blocksize = arena->blocksize;
+	a = arena->base + aa;
+	off = a & (blocksize - 1);
+	a -= off;
+	nn = 0;
+	for(;;){
+		b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
+		if(b == nil){
+			qunlock(&arena->lock);
+			return -1;
+		}
+		dirtydblock(b, DirtyArena);
+		m = blocksize - off;
+		if(m > n - nn)
+			m = n - nn;
+		memmove(&b->data[off], &clbuf[nn], m);
+		// ok = writepart(arena->part, a, b->data, blocksize);
+		ok = 0;
+		putdblock(b);
+		if(ok < 0){
+			qunlock(&arena->lock);
+			return -1;
+		}
+		nn += m;
+		if(nn == n)
+			break;
+		off = 0;
+		a += blocksize;
+	}
+	qunlock(&arena->lock);
+	return n;
+}
+
+/*
+ * allocate space for the clump and write it,
+ * updating the arena directory
+ZZZ question: should this distinguish between an arena
+filling up and real errors writing the clump?
+ */
+u64int
+writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int start, u64int *pa)
+{
+	DBlock *b;
+	u64int a, aa;
+	u32int clump, n, nn, m, off, blocksize;
+	int ok;
+	AState as;
+
+	n = c->info.size + ClumpSize + U32Size;
+	qlock(&arena->lock);
+	aa = arena->memstats.used;
+	if(arena->memstats.sealed
+	|| aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
+		if(!arena->memstats.sealed){
+			trace(0, "seal memstats %s", arena->name);
+			arena->memstats.sealed = 1;
+			as.arena = arena;
+			as.aa = start+aa;
+			as.stats = arena->memstats;
+			setdcachestate(&as);
+		}
+		qunlock(&arena->lock);
+		return TWID64;
+	}
+	if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
+		qunlock(&arena->lock);
+		return TWID64;
+	}
+
+	/*
+	 * write the data out one block at a time
+	 */
+	blocksize = arena->blocksize;
+	a = arena->base + aa;
+	off = a & (blocksize - 1);
+	a -= off;
+	nn = 0;
+	for(;;){
+		b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
+		if(b == nil){
+			qunlock(&arena->lock);
+			return TWID64;
+		}
+		dirtydblock(b, DirtyArena);
+		m = blocksize - off;
+		if(m > n - nn)
+			m = n - nn;
+		memmove(&b->data[off], &clbuf[nn], m);
+	//	ok = writepart(arena->part, a, b->data, blocksize);
+		ok = 0;
+		putdblock(b);
+		if(ok < 0){
+			qunlock(&arena->lock);
+			return TWID64;
+		}
+		nn += m;
+		if(nn == n)
+			break;
+		off = 0;
+		a += blocksize;
+	}
+
+	arena->memstats.used += c->info.size + ClumpSize;
+	arena->memstats.uncsize += c->info.uncsize;
+	if(c->info.size < c->info.uncsize)
+		arena->memstats.cclumps++;
+
+	clump = arena->memstats.clumps++;
+	if(arena->memstats.clumps == 0)
+		sysfatal("clumps wrapped");
+	arena->wtime = now();
+	if(arena->ctime == 0)
+		arena->ctime = arena->wtime;
+
+	writeclumpinfo(arena, clump, &c->info);
+
+	/* set up for call to setdcachestate */
+	as.arena = arena;
+	as.aa = start+arena->memstats.used;
+	as.stats = arena->memstats;
+
+	/* update this before calling setdcachestate so it cannot be behind dcache.diskstate */
+	*pa = start+aa;
+	setdcachestate(&as);
+	qunlock(&arena->lock);
+
+	return aa;
+}
+
+int
+atailcmp(ATailStats *a, ATailStats *b)
+{
+	/* good test */
+	if(a->used < b->used)
+		return -1;
+	if(a->used > b->used)
+		return 1;
+		
+	/* suspect tests - why order this way? (no one cares) */
+	if(a->clumps < b->clumps)
+		return -1;
+	if(a->clumps > b->clumps)
+		return 1;
+	if(a->cclumps < b->cclumps)
+		return -1;
+	if(a->cclumps > b->cclumps)
+		return 1;
+	if(a->uncsize < b->uncsize)
+		return -1;
+	if(a->uncsize > b->uncsize)
+		return 1;
+	if(a->sealed < b->sealed)
+		return -1;
+	if(a->sealed > b->sealed)
+		return 1;
+		
+	/* everything matches */
+	return 0;
+}
+
+void
+setatailstate(AState *as)
+{
+	int i, j, osealed;
+	Arena *a;
+	Index *ix;
+
+	trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
+
+	ix = mainindex;
+	for(i=0; i<ix->narenas; i++)
+		if(ix->arenas[i] == as->arena)
+			break;
+	if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
+		fprint(2, "funny settailstate 0x%llux\n", as->aa);
+		return;
+	}
+
+	for(j=i; --j>=0; ){
+		a = ix->arenas[j];
+		if(atailcmp(&a->diskstats, &a->memstats) == 0)
+			break;
+	}
+	for(j++; j<=i; j++){
+		a = ix->arenas[j];
+		qlock(&a->lock);
+		osealed = a->diskstats.sealed;
+		if(j == i)
+			a->diskstats = as->stats;
+		else
+			a->diskstats = a->memstats;
+		wbarena(a);
+		if(a->diskstats.sealed != osealed && !a->inqueue)
+			sealarena(a);
+		qunlock(&a->lock);
+	}
+}
+
+/*
+ * once sealed, an arena never has any data added to it.
+ * it should only be changed to fix errors.
+ * this also syncs the clump directory.
+ */
+static void
+sealarena(Arena *arena)
+{
+	arena->inqueue = 1;
+	backsumarena(arena);
+}
+
+void
+backsumarena(Arena *arena)
+{
+	ASum *as;
+
+	if(sumwait.l == nil)
+		return;
+
+	as = MK(ASum);
+	if(as == nil)
+		return;
+	qlock(&sumlock);
+	as->arena = arena;
+	as->next = sumq;
+	sumq = as;
+	rwakeup(&sumwait);
+	qunlock(&sumlock);
+}
+
+static void
+sumproc(void *unused)
+{
+	ASum *as;
+	Arena *arena;
+
+	USED(unused);
+
+	for(;;){
+		qlock(&sumlock);
+		while(sumq == nil)
+			rsleep(&sumwait);
+		as = sumq;
+		sumq = as->next;
+		qunlock(&sumlock);
+		arena = as->arena;
+		free(as);
+
+		sumarena(arena);
+	}
+}
+
+void
+sumarena(Arena *arena)
+{
+	ZBlock *b;
+	DigestState s;
+	u64int a, e;
+	u32int bs;
+	u8int score[VtScoreSize];
+
+	bs = MaxIoSize;
+	if(bs < arena->blocksize)
+		bs = arena->blocksize;
+
+	/*
+	 * read & sum all blocks except the last one
+	 */
+	memset(&s, 0, sizeof s);
+	b = alloczblock(bs, 0, arena->part->blocksize);
+	e = arena->base + arena->size;
+	for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
+		sleep(arenasumsleeptime);
+		if(a + bs > e)
+			bs = arena->blocksize;
+		if(readpart(arena->part, a, b->data, bs) < 0)
+			goto ReadErr;
+		addstat(StatSumRead, 1);
+		addstat(StatSumReadBytes, bs);
+		sha1(b->data, bs, nil, &s);
+	}
+
+	/*
+	 * the last one is special, since it may already have the checksum included
+	 */
+	bs = arena->blocksize;
+	if(readpart(arena->part, e, b->data, bs) < 0){
+ReadErr:
+		logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
+		freezblock(b);
+		return;
+	}
+	addstat(StatSumRead, 1);
+	addstat(StatSumReadBytes, bs);
+
+	sha1(b->data, bs-VtScoreSize, nil, &s);
+	sha1(zeroscore, VtScoreSize, nil, &s);
+	sha1(nil, 0, score, &s);
+
+	/*
+	 * check for no checksum or the same
+	 *
+	 * the writepart is okay because we flushed the dcache in sealarena
+	 */
+	if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0){
+		if(scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
+			logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
+				arena->name, &b->data[bs - VtScoreSize], score);
+		scorecp(&b->data[bs - VtScoreSize], score);
+		if(writepart(arena->part, e, b->data, bs) < 0)
+			logerr(EOk, "sumarena can't write sum for %s: %r", arena->name);
+	}
+	freezblock(b);
+
+	qlock(&arena->lock);
+	scorecp(arena->score, score);
+	qunlock(&arena->lock);
+}
+
+/*
+ * write the arena trailer block to the partition
+ */
+int
+wbarena(Arena *arena)
+{
+	DBlock *b;
+	int bad;
+
+	if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
+		logerr(EAdmin, "can't write arena trailer: %r");
+		return -1;
+	}
+	dirtydblock(b, DirtyArenaTrailer);
+	bad = okarena(arena)<0 || packarena(arena, b->data)<0;
+	putdblock(b);
+	if(bad)
+		return -1;
+	return 0;
+}
+
+int
+wbarenahead(Arena *arena)
+{
+	ZBlock *b;
+	ArenaHead head;
+	int bad;
+
+	namecp(head.name, arena->name);
+	head.version = arena->version;
+	head.size = arena->size + 2 * arena->blocksize;
+	head.blocksize = arena->blocksize;
+	head.clumpmagic = arena->clumpmagic;
+	b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
+	if(b == nil){
+		logerr(EAdmin, "can't write arena header: %r");
+///ZZZ add error message?
+		return -1;
+	}
+	/*
+	 * this writepart is okay because it only happens
+	 * during initialization.
+	 */
+	bad = packarenahead(&head, b->data)<0 ||
+	      writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0;
+	freezblock(b);
+	if(bad)
+		return -1;
+	return 0;
+}
+
+/*
+ * read the arena header and trailer blocks from disk
+ */
+static int
+loadarena(Arena *arena)
+{
+	ArenaHead head;
+	ZBlock *b;
+
+	b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
+	if(b == nil)
+		return -1;
+	if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
+		freezblock(b);
+		return -1;
+	}
+	if(unpackarena(arena, b->data) < 0){
+		freezblock(b);
+		return -1;
+	}
+	if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
+		seterr(EAdmin, "unknown arena version %d", arena->version);
+		freezblock(b);
+		return -1;
+	}
+	scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
+
+	if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
+		logerr(EAdmin, "can't read arena header: %r");
+		freezblock(b);
+		return 0;
+	}
+	if(unpackarenahead(&head, b->data) < 0)
+		logerr(ECorrupt, "corrupted arena header: %r");
+	else if(namecmp(arena->name, head.name)!=0
+	     || arena->clumpmagic != head.clumpmagic
+	     || arena->version != head.version
+	     || arena->blocksize != head.blocksize
+	     || arena->size + 2 * arena->blocksize != head.size){
+		if(namecmp(arena->name, head.name)!=0)
+			logerr(ECorrupt, "arena tail name %s head %s", 
+				arena->name, head.name);
+		else if(arena->clumpmagic != head.clumpmagic)
+			logerr(ECorrupt, "arena tail clumpmagic 0x%lux head 0x%lux",
+				(ulong)arena->clumpmagic, (ulong)head.clumpmagic);
+		else if(arena->version != head.version)
+			logerr(ECorrupt, "arena tail version %d head version %d",
+				arena->version, head.version);
+		else if(arena->blocksize != head.blocksize)
+			logerr(ECorrupt, "arena tail block size %d head %d",
+				arena->blocksize, head.blocksize);
+		else if(arena->size+2*arena->blocksize != head.size)
+			logerr(ECorrupt, "arena tail size %lud head %lud",
+				(ulong)arena->size+2*arena->blocksize, head.size);
+		else
+			logerr(ECorrupt, "arena header inconsistent with arena data");
+	}
+	freezblock(b);
+
+	return 0;
+}
+
+static int
+okarena(Arena *arena)
+{
+	u64int dsize;
+	int ok;
+
+	ok = 0;
+	dsize = arenadirsize(arena, arena->diskstats.clumps);
+	if(arena->diskstats.used + dsize > arena->size){
+		seterr(ECorrupt, "arena used > size");
+		ok = -1;
+	}
+
+	if(arena->diskstats.cclumps > arena->diskstats.clumps)
+		logerr(ECorrupt, "arena has more compressed clumps than total clumps");
+
+	if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
+		logerr(ECorrupt, "arena uncompressed size inconsistent with used space %lld %d %lld", arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
+
+	if(arena->ctime > arena->wtime)
+		logerr(ECorrupt, "arena creation time after last write time");
+
+	return ok;
+}
+
+static CIBlock*
+getcib(Arena *arena, int clump, int writing, CIBlock *rock)
+{
+	int mode;
+	CIBlock *cib;
+	u32int block, off;
+
+	if(clump >= arena->memstats.clumps){
+		seterr(EOk, "clump directory access out of range");
+		return nil;
+	}
+	block = clump / arena->clumpmax;
+	off = (clump - block * arena->clumpmax) * ClumpInfoSize;
+	cib = rock;
+	cib->block = block;
+	cib->offset = off;
+
+	if(writing){
+		if(off == 0 && clump == arena->memstats.clumps-1)
+			mode = OWRITE;
+		else
+			mode = ORDWR;
+	}else
+		mode = OREAD;
+
+	cib->data = getdblock(arena->part,
+		arena->base + arena->size - (block + 1) * arena->blocksize, mode);
+	if(cib->data == nil)
+		return nil;
+	return cib;
+}
+
+static void
+putcib(Arena *arena, CIBlock *cib)
+{
+	putdblock(cib->data);
+	cib->data = nil;
+}
diff --git a/src/cmd/venti/srv/arenas.c b/src/cmd/venti/srv/arenas.c
new file mode 100644
index 0000000..2ad1bb0
--- /dev/null
+++ b/src/cmd/venti/srv/arenas.c
@@ -0,0 +1,414 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+typedef struct AHash	AHash;
+
+/*
+ * hash table for finding arena's based on their names.
+ */
+struct AHash
+{
+	AHash	*next;
+	Arena	*arena;
+};
+
+enum
+{
+	AHashSize	= 512
+};
+
+static AHash	*ahash[AHashSize];
+
+static u32int
+hashstr(char *s)
+{
+	u32int h;
+	int c;
+
+	h = 0;
+	for(; c = *s; s++){
+		c ^= c << 6;
+		h += (c << 11) ^ (c >> 1);
+		c = *s;
+		h ^= (c << 14) + (c << 7) + (c << 4) + c;
+	}
+	return h;
+}
+
+int
+addarena(Arena *arena)
+{
+	AHash *a;
+	u32int h;
+
+	h = hashstr(arena->name) & (AHashSize - 1);
+	a = MK(AHash);
+	if(a == nil)
+		return -1;
+	a->arena = arena;
+	a->next = ahash[h];
+	ahash[h] = a;
+	return 0;
+}
+
+Arena*
+findarena(char *name)
+{
+	AHash *a;
+	u32int h;
+
+	h = hashstr(name) & (AHashSize - 1);
+	for(a = ahash[h]; a != nil; a = a->next)
+		if(strcmp(a->arena->name, name) == 0)
+			return a->arena;
+	return nil;
+}
+
+int
+delarena(Arena *arena)
+{
+	AHash *a, *last;
+	u32int h;
+
+	h = hashstr(arena->name) & (AHashSize - 1);
+	last = nil;
+	for(a = ahash[h]; a != nil; a = a->next){
+		if(a->arena == arena){
+			if(last != nil)
+				last->next = a->next;
+			else
+				ahash[h] = a->next;
+			free(a);
+			return 0;
+		}
+		last = a;
+	}
+	return -1;
+}
+
+ArenaPart*
+initarenapart(Part *part)
+{
+	AMapN amn;
+	ArenaPart *ap;
+	ZBlock *b;
+	u32int i;
+	int ok;
+
+	b = alloczblock(HeadSize, 0, 0);
+	if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
+		seterr(EAdmin, "can't read arena partition header: %r");
+		return nil;
+	}
+
+	ap = MKZ(ArenaPart);
+	if(ap == nil){
+		freezblock(b);
+		return nil;
+	}
+	ap->part = part;
+	ok = unpackarenapart(ap, b->data);
+	freezblock(b);
+	if(ok < 0){
+		freearenapart(ap, 0);
+		return nil;
+	}
+
+	ap->tabbase = (PartBlank + HeadSize + ap->blocksize - 1) & ~(ap->blocksize - 1);
+	if(ap->version != ArenaPartVersion){
+		seterr(ECorrupt, "unknown arena partition version %d", ap->version);
+		freearenapart(ap, 0);
+		return nil;
+	}
+	if(ap->blocksize & (ap->blocksize - 1)){
+		seterr(ECorrupt, "illegal non-power-of-2 block size %d\n", ap->blocksize);
+		freearenapart(ap, 0);
+		return nil;
+	}
+	if(ap->tabbase >= ap->arenabase){
+		seterr(ECorrupt, "arena partition table overlaps with arena storage");
+		freearenapart(ap, 0);
+		return nil;
+	}
+	ap->tabsize = ap->arenabase - ap->tabbase;
+	partblocksize(part, ap->blocksize);
+	ap->size = ap->part->size & ~(u64int)(ap->blocksize - 1);
+
+	if(readarenamap(&amn, part, ap->tabbase, ap->tabsize) < 0){
+		freearenapart(ap, 0);
+		return nil;
+	}
+	ap->narenas = amn.n;
+	ap->map = amn.map;
+	if(okamap(ap->map, ap->narenas, ap->arenabase, ap->size, "arena table") < 0){
+		freearenapart(ap, 0);
+		return nil;
+	}
+
+	ap->arenas = MKNZ(Arena*, ap->narenas);
+	for(i = 0; i < ap->narenas; i++){
+		ap->arenas[i] = initarena(part, ap->map[i].start, ap->map[i].stop - ap->map[i].start, ap->blocksize);
+		if(ap->arenas[i] == nil){
+			seterr(ECorrupt, "%s: %r", ap->map[i].name);
+			freearenapart(ap, 1);
+			return nil;
+		}
+		if(namecmp(ap->map[i].name, ap->arenas[i]->name) != 0){
+			seterr(ECorrupt, "arena name mismatches with expected name: %s vs. %s",
+				ap->map[i].name, ap->arenas[i]->name);
+			freearenapart(ap, 1);
+			return nil;
+		}
+		if(findarena(ap->arenas[i]->name)){
+			seterr(ECorrupt, "duplicate arena name %s in %s",
+				ap->map[i].name, ap->part->name);
+			freearenapart(ap, 1);
+			return nil;
+		}
+	}
+
+	for(i = 0; i < ap->narenas; i++)
+		addarena(ap->arenas[i]);
+
+	return ap;
+}
+
+ArenaPart*
+newarenapart(Part *part, u32int blocksize, u32int tabsize)
+{
+	ArenaPart *ap;
+
+	if(blocksize & (blocksize - 1)){
+		seterr(ECorrupt, "illegal non-power-of-2 block size %d\n", blocksize);
+		return nil;
+	}
+	ap = MKZ(ArenaPart);
+	if(ap == nil)
+		return nil;
+
+	ap->version = ArenaPartVersion;
+	ap->part = part;
+	ap->blocksize = blocksize;
+	partblocksize(part, blocksize);
+	ap->size = part->size & ~(u64int)(blocksize - 1);
+	ap->tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
+	ap->arenabase = (ap->tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
+	ap->tabsize = ap->arenabase - ap->tabbase;
+	ap->narenas = 0;
+
+	if(wbarenapart(ap) < 0){
+		freearenapart(ap, 0);
+		return nil;
+	}
+
+	return ap;
+}
+
+int
+wbarenapart(ArenaPart *ap)
+{
+	ZBlock *b;
+
+	if(okamap(ap->map, ap->narenas, ap->arenabase, ap->size, "arena table") < 0)
+		return -1;
+	b = alloczblock(HeadSize, 1, 0);
+	if(b == nil)
+//ZZZ set error message?
+		return -1;
+
+	if(packarenapart(ap, b->data) < 0){
+		seterr(ECorrupt, "can't make arena partition header: %r");
+		freezblock(b);
+		return -1;
+	}
+	if(writepart(ap->part, PartBlank, b->data, HeadSize) < 0){
+		seterr(EAdmin, "can't write arena partition header: %r");
+		freezblock(b);
+		return -1;
+	}
+	freezblock(b);
+
+	return wbarenamap(ap->map, ap->narenas, ap->part, ap->tabbase, ap->tabsize);
+}
+
+void
+freearenapart(ArenaPart *ap, int freearenas)
+{
+	int i;
+
+	if(ap == nil)
+		return;
+	if(freearenas){
+		for(i = 0; i < ap->narenas; i++){
+			if(ap->arenas[i] == nil)
+				continue;
+			delarena(ap->arenas[i]);
+			freearena(ap->arenas[i]);
+		}
+	}
+	free(ap->map);
+	free(ap->arenas);
+	free(ap);
+}
+
+int
+okamap(AMap *am, int n, u64int start, u64int stop, char *what)
+{
+	u64int last;
+	u32int i;
+
+	last = start;
+	for(i = 0; i < n; i++){
+		if(am[i].start < last){
+			if(i == 0)
+				seterr(ECorrupt, "invalid start address in %s", what);
+			else
+				seterr(ECorrupt, "overlapping ranges in %s", what);
+			return -1;
+		}
+		if(am[i].stop < am[i].start){
+			seterr(ECorrupt, "invalid range in %s", what);
+			return -1;
+		}
+		last = am[i].stop;
+	}
+	if(last > stop){
+		seterr(ECorrupt, "invalid ending address in %s", what);
+		return -1;
+	}
+	return 0;
+}
+
+int
+maparenas(AMap *am, Arena **arenas, int n, char *what)
+{
+	u32int i;
+
+	for(i = 0; i < n; i++){
+		arenas[i] = findarena(am[i].name);
+		if(arenas[i] == nil){
+			seterr(EAdmin, "can't find arena '%s' for '%s'\n", am[i].name, what);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+int
+readarenamap(AMapN *amn, Part *part, u64int base, u32int size)
+{
+	IFile f;
+	u32int ok;
+
+	if(partifile(&f, part, base, size) < 0)
+		return -1;
+	ok = parseamap(&f, amn);
+	freeifile(&f);
+	return ok;
+}
+
+int
+wbarenamap(AMap *am, int n, Part *part, u64int base, u64int size)
+{
+	Fmt f;
+	ZBlock *b;
+
+	b = alloczblock(size, 1, part->blocksize);
+	if(b == nil)
+		return -1;
+
+	fmtzbinit(&f, b);
+
+	if(outputamap(&f, am, n) < 0){
+		seterr(ECorrupt, "arena set size too small");
+		freezblock(b);
+		return -1;
+	}
+	if(writepart(part, base, b->data, size) < 0){
+		seterr(EAdmin, "can't write arena set: %r");
+		freezblock(b);
+		return -1;
+	}
+	freezblock(b);
+	return 0;
+}
+
+/*
+ * amap: n '\n' amapelem * n
+ * n: u32int
+ * amapelem: name '\t' astart '\t' asize '\n'
+ * astart, asize: u64int
+ */
+int
+parseamap(IFile *f, AMapN *amn)
+{
+	AMap *am;
+	u64int v64;
+	u32int v;
+	char *s, *t, *flds[4];
+	int i, n;
+
+	/*
+	 * arenas
+	 */
+	if(ifileu32int(f, &v) < 0){
+		seterr(ECorrupt, "syntax error: bad number of elements in %s", f->name);
+		return -1;
+	}
+	n = v;
+	if(n > MaxAMap){
+		seterr(ECorrupt, "illegal number of elements in %s", f->name);
+		return -1;
+	}
+	am = MKNZ(AMap, n);
+	if(am == nil){
+		fprint(2, "out of memory\n");
+		return -1;
+	}
+	for(i = 0; i < n; i++){
+		s = ifileline(f);
+		if(s)
+			t = estrdup(s);
+		else
+			t = nil;
+		if(s == nil || getfields(s, flds, 4, 0, "\t") != 3){
+			fprint(2, "early eof after %d of %d, %s:#%d: %s\n", i, n, f->name, f->pos, t);
+			free(t);
+			return -1;
+		}
+		free(t);
+		if(nameok(flds[0]) < 0)
+			return -1;
+		namecp(am[i].name, flds[0]);
+		if(stru64int(flds[1], &v64) < 0){
+			seterr(ECorrupt, "syntax error: bad arena base address in %s", f->name);
+			free(am);
+			return -1;
+		}
+		am[i].start = v64;
+		if(stru64int(flds[2], &v64) < 0){
+			seterr(ECorrupt, "syntax error: bad arena size in %s", f->name);
+			free(am);
+			return -1;
+		}
+		am[i].stop = v64;
+	}
+
+	amn->map = am;
+	amn->n = n;
+	return 0;
+}
+
+int
+outputamap(Fmt *f, AMap *am, int n)
+{
+	int i;
+
+	if(fmtprint(f, "%ud\n", n) < 0)
+		return -1;
+	for(i = 0; i < n; i++)
+		if(fmtprint(f, "%s\t%llud\t%llud\n", am[i].name, am[i].start, am[i].stop) < 0)
+			return -1;
+	return 0;
+}
diff --git a/src/cmd/venti/srv/bloom.c b/src/cmd/venti/srv/bloom.c
new file mode 100644
index 0000000..5c50a0d
--- /dev/null
+++ b/src/cmd/venti/srv/bloom.c
@@ -0,0 +1,210 @@
+/*
+ * Bloom filter tracking which scores are present in our arenas
+ * and (more importantly) which are not.  
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+int
+bloominit(Bloom *b, vlong vsize, u8int *data)
+{
+	ulong size;
+	
+	size = vsize;
+	if(size != vsize){	/* truncation */
+		werrstr("bloom data too big");
+		return -1;
+	}
+	
+	b->size = size;
+	b->nhash = 32;	/* will be fixed by caller on initialization */
+	if(data != nil)
+		if(unpackbloomhead(b, data) < 0)
+			return -1;
+	
+fprint(2, "bloom size %lud nhash %d\n", b->size, b->nhash);
+	b->mask = b->size-1;
+	b->data = data;
+	return 0;
+}
+
+void
+wbbloomhead(Bloom *b)
+{
+	packbloomhead(b, b->data);
+}
+
+Bloom*
+readbloom(Part *p)
+{
+	int i, n;
+	uint ones;
+	uchar buf[512];
+	uchar *data;
+	u32int *a;
+	Bloom *b;
+	
+	b = vtmallocz(sizeof *b);
+	if(readpart(p, 0, buf, sizeof buf) < 0)
+		return nil;
+fprint(2, "header %.16H\n", buf);
+	if(bloominit(b, 0, buf) < 0){
+		vtfree(b);
+		return nil;
+	}
+	data = vtmallocz(b->size);
+	if(readpart(p, 0, data, b->size) < 0){
+		vtfree(b);
+		vtfree(data);
+		return nil;
+	}
+	b->data = data;
+	b->part = p;
+
+	a = (u32int*)b->data;
+	n = b->size/4;
+	ones = 0;
+	for(i=0; i<n; i++)
+		ones += countbits(a[i]); 
+	addstat(StatBloomOnes, ones);
+
+	if(b->size == MaxBloomSize)	/* 2^32 overflows ulong */
+		addstat(StatBloomBits, b->size*8-1);
+	else
+		addstat(StatBloomBits, b->size*8);
+		
+	return b;
+}
+
+int
+writebloom(Bloom *b)
+{
+	wbbloomhead(b);
+	return writepart(b->part, 0, b->data, b->size);
+}
+
+/*
+ * Derive two random 32-bit quantities a, b from the score
+ * and then use a+b*i as a sequence of bloom filter indices.
+ * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
+ * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
+ */
+static void
+gethashes(u8int *score, ulong *h)
+{
+	int i;
+	u32int a, b;
+
+	a = 0;
+	b = 0;
+	for(i=4; i+8<=VtScoreSize; i+=8){
+		a ^= *(u32int*)(score+i);
+		b ^= *(u32int*)(score+i+4);
+	}
+	if(i+4 <= VtScoreSize)	/* 20 is not 4-aligned */
+		a ^= *(u32int*)(score+i);
+	for(i=0; i<BloomMaxHash; i++, a+=b)
+		h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
+}
+
+static void
+_markbloomfilter(Bloom *b, u8int *score)
+{
+	int i, nnew;
+	ulong h[BloomMaxHash];
+	u32int x, *y, z, *tab;
+
+	trace("markbloomfilter", "markbloomfilter %V", score);
+	gethashes(score, h);
+	nnew = 0;
+	tab = (u32int*)b->data;
+	for(i=0; i<b->nhash; i++){
+		x = h[i];
+		y = &tab[(x&b->mask)>>5];
+		z = 1<<(x&31);
+		if(!(*y&z)){
+			nnew++;
+			*y |= z;
+		}
+	}
+	if(nnew)
+		addstat(StatBloomOnes, nnew);
+
+	trace("markbloomfilter", "markbloomfilter exit");
+}
+
+static int
+_inbloomfilter(Bloom *b, u8int *score)
+{
+	int i;
+	ulong h[BloomMaxHash], x;
+	u32int *tab;
+
+	gethashes(score, h);
+	tab = (u32int*)b->data;
+	for(i=0; i<b->nhash; i++){
+		x = h[i];
+		if(!(tab[(x&b->mask)>>5] & (1<<(x&31))))
+			return 0;
+	}
+	return 1;
+}
+
+int
+inbloomfilter(Bloom *b, u8int *score)
+{
+	int r;
+	uint ms;
+
+	if(b == nil)
+		return 1;
+
+	ms = msec();
+	rlock(&b->lk);
+	r = _inbloomfilter(b, score);
+	runlock(&b->lk);
+	ms = ms - msec();
+	addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
+	if(r)
+		addstat(StatBloomMiss, 1);
+	else
+		addstat(StatBloomHit, 1);
+	return r;
+}
+
+void
+markbloomfilter(Bloom *b, u8int *score)
+{
+	if(b == nil)
+		return;
+
+	rlock(&b->lk);
+	qlock(&b->mod);
+	_markbloomfilter(b, score);
+	qunlock(&b->mod);
+	runlock(&b->lk);
+}
+
+static void
+bloomwriteproc(void *v)
+{
+	Bloom *b;
+	
+	b = v;
+	for(;;){
+		recv(b->writechan, 0);
+		if(writebloom(b) < 0)
+			fprint(2, "oops! writing bloom: %r\n");
+		send(b->writedonechan, 0);
+	}
+}
+
+void
+startbloomproc(Bloom *b)
+{
+	b->writechan = chancreate(sizeof(void*), 0);
+	b->writedonechan = chancreate(sizeof(void*), 0);
+	vtproc(bloomwriteproc, b);	
+}
diff --git a/src/cmd/venti/srv/buildbuck.c b/src/cmd/venti/srv/buildbuck.c
new file mode 100644
index 0000000..240e77d
--- /dev/null
+++ b/src/cmd/venti/srv/buildbuck.c
@@ -0,0 +1,132 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+/*
+ * An IEStream is a sorted list of index entries.
+ */
+struct IEStream
+{
+	Part	*part;
+	u64int	off;		/* read position within part */
+	u64int	n;		/* number of valid ientries left to read */
+	u32int	size;		/* allocated space in buffer */
+	u8int	*buf;
+	u8int	*pos;		/* current place in buffer */
+	u8int	*epos;		/* end of valid buffer contents */
+};
+
+IEStream*
+initiestream(Part *part, u64int off, u64int clumps, u32int size)
+{
+	IEStream *ies;
+
+//ZZZ out of memory?
+	ies = MKZ(IEStream);
+	ies->buf = MKN(u8int, size);
+	ies->epos = ies->buf;
+	ies->pos = ies->epos;
+	ies->off = off;
+	ies->n = clumps;
+	ies->size = size;
+	ies->part = part;
+	return ies;
+}
+
+void
+freeiestream(IEStream *ies)
+{
+	if(ies == nil)
+		return;
+	free(ies->buf);
+	free(ies);
+}
+
+/*
+ * Return the next IEntry (still packed) in the stream.
+ */
+static u8int*
+peekientry(IEStream *ies)
+{
+	u32int n, nn;
+
+	n = ies->epos - ies->pos;
+	if(n < IEntrySize){
+		memmove(ies->buf, ies->pos, n);
+		ies->epos = &ies->buf[n];
+		ies->pos = ies->buf;
+		nn = ies->size;
+		if(nn > ies->n * IEntrySize)
+			nn = ies->n * IEntrySize;
+		nn -= n;
+		if(nn == 0)
+			return nil;
+//fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos);
+		if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
+			seterr(EOk, "can't read sorted index entries: %r");
+			return nil;
+		}
+		ies->epos += nn;
+		ies->off += nn;
+	}
+	return ies->pos;
+}
+
+/*
+ * Compute the bucket number for the given IEntry.
+ * Knows that the score is the first thing in the packed
+ * representation.
+ */
+static u32int
+iebuck(Index *ix, u8int *b, IBucket *ib, IEStream *ies)
+{
+	USED(ies);
+	USED(ib);
+	return hashbits(b, 32) / ix->div;
+}
+
+/*
+ * Fill ib with the next bucket in the stream.
+ */
+u32int
+buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint maxdata)
+{
+	IEntry ie1, ie2;
+	u8int *b;
+	u32int buck;
+
+	buck = TWID32;
+	ib->n = 0;
+	while(ies->n){
+		b = peekientry(ies);
+		if(b == nil)
+			return TWID32;
+//fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b);
+		if(ib->n == 0)
+			buck = iebuck(ix, b, ib, ies);
+		else{
+			if(buck != iebuck(ix, b, ib, ies))
+				break;
+			if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){
+				/*
+				 * guess that the larger address is the correct one to use
+				 */
+				unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]);
+				unpackientry(&ie2, b);
+				seterr(EOk, "duplicate index entry for score=%V type=%d", ie1.score, ie1.ia.type);
+				ib->n--;
+				if(ie1.ia.addr > ie2.ia.addr)
+					memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize);
+			}
+		}
+		if((ib->n+1)*IEntrySize > maxdata){
+			seterr(EOk, "bucket overflow");
+			return TWID32;
+		}
+		memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize);
+		ib->n++;
+		ies->n--;
+		ies->pos += IEntrySize;
+	}
+	return buck;
+}
diff --git a/src/cmd/venti/srv/buildindex.c b/src/cmd/venti/srv/buildindex.c
new file mode 100644
index 0000000..8714474
--- /dev/null
+++ b/src/cmd/venti/srv/buildindex.c
@@ -0,0 +1,160 @@
+/*
+ * Rebuild the Venti index from scratch.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+/*
+ * Write a single bucket.  Could profit from a big buffer here
+ * so that we can absorb sporadic runs of blocks into one write,
+ * avoiding disk seeks.
+ */
+static int
+writebucket(Index *ix, u32int buck, IBucket *ib, ZBlock *b)
+{
+	ISect *is;
+
+	is = ix->sects[indexsect0(ix, buck)];
+	if(buck < is->start || buck >= is->stop){
+		seterr(EAdmin, "cannot find index section for bucket %lud\n", (ulong)buck);
+		return -1;
+	}
+	buck -= is->start;
+
+/*
+	qlock(&stats.lock);
+	stats.indexwrites++;
+	qunlock(&stats.lock);
+*/
+	packibucket(ib, b->data, is->bucketmagic);
+	return writepart(is->part, is->blockbase + ((u64int)buck << is->blocklog), b->data, is->blocksize);
+}
+
+static int
+buildindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
+{
+	IEStream *ies;
+	IBucket ib, zib;
+	ZBlock *z, *b;
+	u32int next, buck;
+	int ok;
+	uint nbuck;
+	u64int found = 0;
+
+//ZZZ make buffer size configurable
+	b = alloczblock(ix->blocksize, 0, ix->blocksize);
+	z = alloczblock(ix->blocksize, 1, ix->blocksize);
+	ies = initiestream(part, off, clumps, 64*1024);
+	if(b == nil || z == nil || ies == nil){
+		ok = 0;
+		goto breakout;
+		return -1;
+	}
+	ok = 0;
+	next = 0;
+	memset(&ib, 0, sizeof ib);
+	ib.data = b->data + IBucketSize;
+	zib.data = z->data + IBucketSize;
+	zib.n = 0;
+	nbuck = 0;
+	for(;;){
+		buck = buildbucket(ix, ies, &ib, ix->blocksize-IBucketSize);
+		found += ib.n;
+		if(zero){
+			for(; next != buck; next++){
+				if(next == ix->buckets){
+					if(buck != TWID32){
+						fprint(2, "bucket out of range\n");
+						ok = -1;
+					}
+					goto breakout;
+				}
+				if(writebucket(ix, next, &zib, z) < 0){
+					fprint(2, "can't write zero bucket to buck=%d: %r", next);
+					ok = -1;
+				}
+			}
+		}
+		if(buck >= ix->buckets){
+			if(buck == TWID32)
+				break;
+			fprint(2, "bucket out of range\n");
+			ok = -1;
+			goto breakout;
+		}
+		if(writebucket(ix, buck, &ib, b) < 0){
+			fprint(2, "bad bucket found=%lld: %r\n", found);
+			ok = -1;
+		}
+		next = buck + 1;
+		if(++nbuck%10000 == 0)
+			fprint(2, "\t%,d buckets written...\n", nbuck);
+	}
+breakout:;
+	fprint(2, "wrote index with %lld entries\n", found);
+	freeiestream(ies);
+	freezblock(z);
+	freezblock(b);
+	return ok;
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: buildindex [-Z] [-B blockcachesize] config tmppart\n");
+	threadexitsall(0);
+}
+
+Config conf;
+
+void
+threadmain(int argc, char *argv[])
+{
+	Part *part;
+	u64int clumps, base;
+	u32int bcmem;
+	int zero;
+
+	zero = 1;
+	bcmem = 0;
+	ARGBEGIN{
+	case 'B':
+		bcmem = unittoull(ARGF());
+		break;
+	case 'Z':
+		zero = 0;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 2)
+		usage();
+
+	if(initventi(argv[0], &conf) < 0)
+		sysfatal("can't init venti: %r");
+
+	if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
+		bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
+	if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
+	initdcache(bcmem);
+
+	fprint(2, "building a new index %s using %s for temporary storage\n", mainindex->name, argv[1]);
+
+	part = initpart(argv[1], ORDWR|ODIRECT);
+	if(part == nil)
+		sysfatal("can't initialize temporary partition: %r");
+
+	clumps = sortrawientries(mainindex, part, &base, mainindex->bloom);
+	if(clumps == TWID64)
+		sysfatal("can't build sorted index: %r");
+	fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", clumps, base);
+
+	if(buildindex(mainindex, part, base, clumps, zero) < 0)
+		sysfatal("can't build new index: %r");
+	
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/checkarenas.c b/src/cmd/venti/srv/checkarenas.c
new file mode 100644
index 0000000..525a634
--- /dev/null
+++ b/src/cmd/venti/srv/checkarenas.c
@@ -0,0 +1,135 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static int	verbose;
+
+static void
+checkarena(Arena *arena, int scan, int fix)
+{
+	ATailStats old;
+	int err, e;
+
+	if(verbose && arena->memstats.clumps)
+		printarena(2, arena);
+
+	old = arena->memstats;
+
+	if(scan){
+		arena->memstats.used = 0;
+		arena->memstats.clumps = 0;
+		arena->memstats.cclumps = 0;
+		arena->memstats.uncsize = 0;
+	}
+
+	err = 0;
+	for(;;){
+		e = syncarena(arena, 0, 1000, 0, fix);
+		err |= e;
+		if(!(e & SyncHeader))
+			break;
+		if(verbose && arena->memstats.clumps)
+			fprint(2, ".");
+	}
+	if(verbose && arena->memstats.clumps)
+		fprint(2, "\n");
+
+	err &= ~SyncHeader;
+	if(arena->memstats.used != old.used
+	|| arena->memstats.clumps != old.clumps
+	|| arena->memstats.cclumps != old.cclumps
+	|| arena->memstats.uncsize != old.uncsize){
+		fprint(2, "%s: incorrect arena header fields\n", arena->name);
+		printarena(2, arena);
+		err |= SyncHeader;
+	}
+
+	if(!err || !fix)
+		return;
+
+	fprint(2, "%s: writing fixed arena header fields\n", arena->name);
+	arena->diskstats = arena->memstats;
+	if(wbarena(arena) < 0)
+		fprint(2, "arena header write failed: %r\n");
+	flushdcache();
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: checkarenas [-afv] file [arenaname...]\n");
+	threadexitsall(0);
+}
+
+int
+should(char *name, int argc, char **argv)
+{
+	int i;
+
+	if(argc == 0)
+		return 1;
+	for(i=0; i<argc; i++)
+		if(strcmp(name, argv[i]) == 0)
+			return 1;
+	return 0;
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	ArenaPart *ap;
+	Part *part;
+	char *file;
+	int i, fix, scan;
+
+	ventifmtinstall();
+	statsinit();
+
+	fix = 0;
+	scan = 0;
+	ARGBEGIN{
+	case 'f':
+		fix++;
+		break;
+	case 'a':
+		scan = 1;
+		break;
+	case 'v':
+		verbose++;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(!fix)
+		readonly = 1;
+
+	if(argc < 1)
+		usage();
+
+	file = argv[0];
+
+	part = initpart(file, ORDWR|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open partition %s: %r", file);
+
+	ap = initarenapart(part);
+	if(ap == nil)
+		sysfatal("can't initialize arena partition in %s: %r", file);
+
+	if(verbose > 1){
+		printarenapart(2, ap);
+		fprint(2, "\n");
+	}
+
+	initdcache(8 * MaxDiskBlock);
+
+	for(i = 0; i < ap->narenas; i++)
+		if(should(ap->arenas[i]->name, argc, argv))
+			checkarena(ap->arenas[i], scan, fix);
+
+	if(verbose > 1)
+		printstats();
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/checkindex.c b/src/cmd/venti/srv/checkindex.c
new file mode 100644
index 0000000..f7040d1
--- /dev/null
+++ b/src/cmd/venti/srv/checkindex.c
@@ -0,0 +1,293 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static int extra, missing, wrong;
+
+static void
+phdr(DBlock *eb)
+{
+	static int did;
+	
+	if(!did){
+		did = 1;
+		print("# diff actual correct\n");
+	}
+	print("%s block 0x%llux\n", eb->part->name, eb->addr);
+}
+
+static void
+pie(IEntry *ie, char c)
+{
+	print("%c %V %22lld %3d %5d %3d\n",
+		c, ie->score, ie->ia.addr, ie->ia.type, ie->ia.size, ie->ia.blocks);
+}
+
+static int
+checkbucket(Index *ix, u32int buck, IBucket *ib)
+{
+	ISect *is;
+	DBlock *eb;
+	IBucket eib;
+	IEntry ie, eie;
+	int i, ei, ok, c, hdr;
+
+	is = ix->sects[indexsect0(ix, buck)];
+	if(buck < is->start || buck >= is->stop){
+		seterr(EAdmin, "cannot find index section for bucket %lud\n", (ulong)buck);
+		return -1;
+	}
+	buck -= is->start;
+	eb = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), OREAD);
+	if(eb == nil)
+		return -1;
+	unpackibucket(&eib, eb->data, is->bucketmagic);
+
+	ok = 0;
+	ei = 0;
+	hdr = 0;
+	for(i = 0; i < ib->n; i++){
+		while(ei < eib.n){
+			c = ientrycmp(&ib->data[i * IEntrySize], &eib.data[ei * IEntrySize]);
+			if(c == 0){
+				unpackientry(&ie, &ib->data[i * IEntrySize]);
+				unpackientry(&eie, &eib.data[ei * IEntrySize]);
+				if(iaddrcmp(&ie.ia, &eie.ia) != 0){
+					if(!hdr){
+						phdr(eb);
+						hdr = 1;
+					}
+					wrong++;
+					pie(&eie, '<');
+					pie(&ie, '>');
+				}
+				ei++;
+				goto cont;
+			}
+			if(c < 0)
+				break;
+			if(!hdr){
+				phdr(eb);
+				hdr = 1;
+			}
+			unpackientry(&eie, &eib.data[ei*IEntrySize]);
+			extra++;
+			pie(&eie, '<');
+			ei++;
+			ok = -1;
+		}
+		if(!hdr){
+			phdr(eb);
+			hdr = 1;
+		}
+		unpackientry(&ie, &ib->data[i*IEntrySize]);
+		missing++;
+		pie(&ie, '>');
+		ok = -1;
+	cont:;
+	}
+	for(; ei < eib.n; ei++){
+		if(!hdr){
+			phdr(eb);
+			hdr = 1;
+		}
+		unpackientry(&eie, &eib.data[ei*IEntrySize]);
+		pie(&eie, '<');
+		ok = -1;
+	}
+	putdblock(eb);
+	return ok;
+}
+
+int
+checkindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
+{
+	IEStream *ies;
+	IBucket ib, zib;
+	ZBlock *z, *b;
+	u32int next, buck;
+	int ok, bok;
+u64int found = 0;
+
+//ZZZ make buffer size configurable
+	b = alloczblock(ix->blocksize, 0, ix->blocksize);
+	z = alloczblock(ix->blocksize, 1, ix->blocksize);
+	ies = initiestream(part, off, clumps, 64*1024);
+	if(b == nil || z == nil || ies == nil){
+		werrstr("allocating: %r");
+		ok = -1;
+		goto breakout;
+		return -1;
+	}
+	ok = 0;
+	next = 0;
+	memset(&ib, 0, sizeof ib);
+	ib.data = b->data;
+	zib.data = z->data;
+	zib.n = 0;
+	zib.buck = 0;
+	for(;;){
+		buck = buildbucket(ix, ies, &ib, ix->blocksize-IBucketSize);
+		found += ib.n;
+		if(zero){
+			for(; next != buck; next++){
+				if(next == ix->buckets){
+					if(buck != TWID32){
+						ok = -1;
+						werrstr("internal error: bucket out of range");
+					}
+					if(ok < 0)
+						werrstr("%d spurious entries, %d missing, %d wrong", extra, missing, wrong);
+					goto breakout;
+				}
+				bok = checkbucket(ix, next, &zib);
+				if(bok < 0)
+					ok = -1;
+			}
+		}
+		if(buck >= ix->buckets){
+			if(buck == TWID32)
+				break;
+			werrstr("internal error: bucket out of range");
+			ok = -1;
+			goto breakout;
+		}
+		bok = checkbucket(ix, buck, &ib);
+		if(bok < 0)
+			ok = -1;
+		next = buck + 1;
+	}
+breakout:
+	freeiestream(ies);
+	freezblock(z);
+	freezblock(b);
+	return ok;
+}
+
+int
+checkbloom(Bloom *b1, Bloom *b2, int fix)
+{
+	u32int *a1, *a2;
+	int i, n, extra, missing;
+	
+	if(b1==nil && b2==nil)
+		return 0;
+	if(b1==nil || b2==nil){
+		werrstr("nil/non-nil");
+		return -1;
+	}
+	wbbloomhead(b1);
+	wbbloomhead(b2);
+	if(memcmp(b1->data, b2->data, BloomHeadSize) != 0){
+		werrstr("bloom header mismatch");
+		return -1;
+	}
+	a1 = (u32int*)b1->data;
+	a2 = (u32int*)b2->data;
+	n = b1->size/4;
+	extra = 0;
+	missing = 0;
+	for(i=BloomHeadSize/4; i<n; i++){
+		if(a1[i] != a2[i]){
+print("%.8ux/%.8ux.", a1[i], a2[i]);
+			extra += countbits(a1[i] & ~a2[i]);
+			missing += countbits(a2[i] & ~a1[i]);
+		}
+	}
+	if(extra || missing)
+		fprint(2, "bloom filter: %d spurious bits, %d missing bits\n", extra, missing);
+	else
+		fprint(2, "bloom filter: correct\n");
+	if(!fix && missing){
+		werrstr("missing bits");
+		return -1;
+	}
+	if(fix && (missing || extra)){
+		memmove(b1->data, b2->data, b1->size);
+		return writebloom(b1);
+	}
+	return 0;
+}
+
+
+void
+usage(void)
+{
+	fprint(2, "usage: checkindex [-f] [-B blockcachesize] config tmp\n");
+	threadexitsall(0);
+}
+
+Config conf;
+
+void
+threadmain(int argc, char *argv[])
+{
+	Bloom *oldbloom, *newbloom;
+	Part *part;
+	u64int clumps, base;
+	u32int bcmem;
+	int fix, skipz, ok;
+
+	fix = 0;
+	bcmem = 0;
+	skipz = 0;
+	ARGBEGIN{
+	case 'B':
+		bcmem = unittoull(ARGF());
+		break;
+	case 'f':
+		fix++;
+		break;
+	case 'Z':
+		skipz = 1;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 2)
+		usage();
+
+	ventifmtinstall();
+
+	part = initpart(argv[1], ORDWR|ODIRECT);
+	if(part == nil)
+		sysfatal("can't initialize temporary partition: %r");
+
+	if(!fix)
+		readonly = 1;
+
+	if(initventi(argv[0], &conf) < 0)
+		sysfatal("can't init venti: %r");
+	oldbloom = mainindex->bloom;
+	newbloom = nil;
+	if(oldbloom){
+		newbloom = vtmallocz(sizeof *newbloom);
+		bloominit(newbloom, oldbloom->size, nil);
+		newbloom->data = vtmallocz(oldbloom->size);
+	}
+	if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
+		bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
+	if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
+	initdcache(bcmem);
+
+	fprint(2, "checkindex: building entry list\n");
+	clumps = sortrawientries(mainindex, part, &base, newbloom);
+	if(clumps == TWID64)
+		sysfatal("can't build sorted index: %r");
+	fprint(2, "checkindex: checking %lld entries at %lld\n", clumps, base);
+	ok = 0;
+	if(checkindex(mainindex, part, base, clumps, !skipz) < 0){
+		fprint(2, "checkindex: %r\n");
+		ok = -1;
+	}
+	if(checkbloom(oldbloom, newbloom, fix) < 0){
+		fprint(2, "checkbloom: %r\n");
+		ok = -1;
+	}
+	if(ok < 0)
+		sysfatal("errors found");
+	fprint(2, "checkindex: index is correct\n");
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/clump.c b/src/cmd/venti/srv/clump.c
new file mode 100644
index 0000000..88ebdb5
--- /dev/null
+++ b/src/cmd/venti/srv/clump.c
@@ -0,0 +1,222 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "whack.h"
+
+/*
+ * Write a lump to disk.  Updates ia with an index address
+ * for the newly-written lump.  Upon return, the lump will
+ * have been placed in the disk cache but will likely not be on disk yet.
+ */
+int
+storeclump(Index *ix, ZBlock *zb, u8int *sc, int type, u32int creator, IAddr *ia)
+{
+	ZBlock *cb;
+	Clump cl;
+	u64int a;
+	u8int bh[VtScoreSize];
+	int size, dsize;
+
+	trace(TraceLump, "storeclump enter", sc, type);
+	size = zb->len;
+	if(size > VtMaxLumpSize){
+		seterr(EStrange, "lump too large");
+		return -1;
+	}
+	if(vttypevalid(type) < 0){
+		seterr(EStrange, "invalid lump type");
+		return -1;
+	}
+
+	if(0){
+		scoremem(bh, zb->data, size);
+		if(scorecmp(sc, bh) != 0){
+			seterr(ECorrupt, "storing clump: corrupted; expected=%V got=%V, size=%d", sc, bh, size);
+			return -1;
+		}
+	}
+
+	cb = alloczblock(size + ClumpSize + U32Size, 0, 0);
+	if(cb == nil)
+		return -1;
+
+	cl.info.type = type;
+	cl.info.uncsize = size;
+	cl.creator = creator;
+	cl.time = now();
+	scorecp(cl.info.score, sc);
+
+	trace(TraceLump, "storeclump whackblock");
+	dsize = whackblock(&cb->data[ClumpSize], zb->data, size);
+	if(dsize > 0 && dsize < size){
+		cl.encoding = ClumpECompress;
+	}else{
+		if(dsize > size){
+			fprint(2, "whack error: dsize=%d size=%d\n", dsize, size);
+			abort();
+		}
+		cl.encoding = ClumpENone;
+		dsize = size;
+		memmove(&cb->data[ClumpSize], zb->data, size);
+	}
+	memset(cb->data+ClumpSize+dsize, 0, 4);
+	cl.info.size = dsize;
+
+	ia->addr = 0;
+	ia->type = type;
+	ia->size = size;
+	ia->blocks = (dsize + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
+
+	a = writeiclump(ix, &cl, cb->data, &ia->addr);
+
+	trace(TraceLump, "storeclump exit %lld", a);
+
+	freezblock(cb);
+	if(a == TWID64)
+		return -1;
+
+/*
+	qlock(&stats.lock);
+	stats.clumpwrites++;
+	stats.clumpbwrites += size;
+	stats.clumpbcomp += dsize;
+	qunlock(&stats.lock);
+*/
+
+	return 0;
+}
+
+u32int
+clumpmagic(Arena *arena, u64int aa)
+{
+	u8int buf[U32Size];
+
+	if(readarena(arena, aa, buf, U32Size) < 0)
+		return TWID32;
+	return unpackmagic(buf);
+}
+
+/*
+ * fetch a block based at addr.
+ * score is filled in with the block's score.
+ * blocks is roughly the length of the clump on disk;
+ * if zero, the length is unknown.
+ */
+ZBlock*
+loadclump(Arena *arena, u64int aa, int blocks, Clump *cl, u8int *score, int verify)
+{
+	Unwhack uw;
+	ZBlock *zb, *cb;
+	u8int bh[VtScoreSize], *buf;
+	u32int n;
+	int nunc;
+
+/*
+	qlock(&stats.lock);
+	stats.clumpreads++;
+	qunlock(&stats.lock);
+*/
+
+	if(blocks <= 0)
+		blocks = 1;
+
+	trace(TraceLump, "loadclump enter");
+
+	cb = alloczblock(blocks << ABlockLog, 0, 0);
+	if(cb == nil)
+		return nil;
+	n = readarena(arena, aa, cb->data, blocks << ABlockLog);
+	if(n < ClumpSize){
+		if(n != 0)
+			seterr(ECorrupt, "loadclump read less than a header");
+		freezblock(cb);
+		return nil;
+	}
+	trace(TraceLump, "loadclump unpack");
+	if(unpackclump(cl, cb->data, arena->clumpmagic) < 0){
+		seterr(ECorrupt, "loadclump %s %llud: %r", arena->name, aa);
+		freezblock(cb);
+		return nil;
+	}
+	n -= ClumpSize;
+	if(n < cl->info.size){
+		freezblock(cb);
+		n = cl->info.size;
+		cb = alloczblock(n, 0, 0);
+		if(cb == nil)
+			return nil;
+		if(readarena(arena, aa + ClumpSize, cb->data, n) != n){
+			seterr(ECorrupt, "loadclump read too little data");
+			freezblock(cb);
+			return nil;
+		}
+		buf = cb->data;
+	}else
+		buf = cb->data + ClumpSize;
+
+	scorecp(score, cl->info.score);
+
+	zb = alloczblock(cl->info.uncsize, 0, 0);
+	if(zb == nil){
+		freezblock(cb);
+		return nil;
+	}
+	switch(cl->encoding){
+	case ClumpECompress:
+		trace(TraceLump, "loadclump decompress");
+		unwhackinit(&uw);
+		nunc = unwhack(&uw, zb->data, cl->info.uncsize, buf, cl->info.size);
+		if(nunc != cl->info.uncsize){
+			if(nunc < 0)
+				seterr(ECorrupt, "decompression of %llud failed: %s", aa, uw.err);
+			else
+				seterr(ECorrupt, "decompression of %llud gave partial block: %d/%d\n", aa, nunc, cl->info.uncsize);
+			freezblock(cb);
+			freezblock(zb);
+			return nil;
+		}
+		break;
+	case ClumpENone:
+		if(cl->info.size != cl->info.uncsize){
+			seterr(ECorrupt, "loading clump: bad uncompressed size for uncompressed block %llud", aa);
+			freezblock(cb);
+			freezblock(zb);
+			return nil;
+		}
+		scoremem(bh, buf, cl->info.uncsize);
+		if(scorecmp(cl->info.score, bh) != 0)
+			seterr(ECorrupt, "pre-copy sha1 wrong at %s %llud: expected=%V got=%V", arena->name, aa, cl->info.score, bh);
+		memmove(zb->data, buf, cl->info.uncsize);
+		break;
+	default:
+		seterr(ECorrupt, "unknown encoding in loadlump %llud", aa);
+		freezblock(cb);
+		freezblock(zb);
+		return nil;
+	}
+	freezblock(cb);
+
+	if(verify){
+		trace(TraceLump, "loadclump verify");
+		scoremem(bh, zb->data, cl->info.uncsize);
+		if(scorecmp(cl->info.score, bh) != 0){
+			seterr(ECorrupt, "loading clump: corrupted at %s %llud; expected=%V got=%V", arena->name, aa, cl->info.score, bh);
+			freezblock(zb);
+			return nil;
+		}
+		if(vttypevalid(cl->info.type) < 0){
+			seterr(ECorrupt, "loading lump at %s %llud: invalid lump type %d", arena->name, aa, cl->info.type);
+			freezblock(zb);
+			return nil;
+		}
+	}
+
+	trace(TraceLump, "loadclump exit");
+/*
+	qlock(&stats.lock);
+	stats.clumpbreads += cl->info.size;
+	stats.clumpbuncomp += cl->info.uncsize;
+	qunlock(&stats.lock);
+*/
+	return zb;
+}
diff --git a/src/cmd/venti/srv/clumpstats.c b/src/cmd/venti/srv/clumpstats.c
new file mode 100644
index 0000000..d2cfe25
--- /dev/null
+++ b/src/cmd/venti/srv/clumpstats.c
@@ -0,0 +1,127 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+int	count[VtMaxLumpSize][VtMaxType];
+Config conf;
+
+enum
+{
+	ClumpChunks	= 32*1024
+};
+
+static int
+readarenainfo(Arena *arena)
+{
+	ClumpInfo *ci, *cis;
+	u32int clump;
+	int i, n, ok;
+
+	if(arena->memstats.clumps)
+		fprint(2, "reading directory for arena=%s with %d entries\n", arena->name, arena->memstats.clumps);
+
+	cis = MKN(ClumpInfo, ClumpChunks);
+	ok = 0;
+	for(clump = 0; clump < arena->memstats.clumps; clump += n){
+		n = ClumpChunks;
+
+		if(n > arena->memstats.clumps - clump)
+			n = arena->memstats.clumps - clump;
+
+		if((i=readclumpinfos(arena, clump, cis, n)) != n){
+			seterr(EOk, "arena directory read failed %d not %d: %r", i, n);
+			ok = -1;
+			break;
+		}
+
+		for(i = 0; i < n; i++){
+			ci = &cis[i];
+			if(ci->type >= VtMaxType || ci->uncsize >= VtMaxLumpSize) {
+				fprint(2, "bad clump: %d: type = %d: size = %d\n", clump+i, ci->type, ci->uncsize);
+				continue;
+			}
+			count[ci->uncsize][ci->type]++;
+		}
+	}
+	free(cis);
+	if(ok < 0)
+		return TWID32;
+	return clump;
+}
+
+static void
+clumpstats(Index *ix)
+{
+	int ok;
+	ulong clumps, n;
+	int i, j, t;
+
+	ok = 0;
+	clumps = 0;
+	for(i = 0; i < ix->narenas; i++){
+		n = readarenainfo(ix->arenas[i]);
+		if(n == TWID32){
+			ok = -1;
+			break;
+		}
+		clumps += n;
+	}
+
+	if(ok < 0)
+		return;
+
+	print("clumps = %ld\n", clumps);
+	for(i=0; i<VtMaxLumpSize; i++) {
+		t = 0;
+		for(j=0; j<VtMaxType; j++)
+			t += count[i][j];
+		if(t == 0)
+			continue;
+		print("%d\t%d", i, t);
+		for(j=0; j<VtMaxType; j++)
+			print("\t%d", count[i][j]);
+		print("\n");
+	}
+}
+
+
+void
+usage(void)
+{
+	fprint(2, "usage: clumpstats [-B blockcachesize] config\n");
+	threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	u32int bcmem;
+
+	bcmem = 0;
+
+	ARGBEGIN{
+	case 'B':
+		bcmem = unittoull(ARGF());
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	readonly = 1;
+
+	if(argc != 1)
+		usage();
+
+	if(initventi(argv[0], &conf) < 0)
+		sysfatal("can't init venti: %r");
+
+	if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
+		bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
+	if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
+	initdcache(bcmem);
+
+	clumpstats(mainindex);
+	
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/config.c b/src/cmd/venti/srv/config.c
new file mode 100644
index 0000000..e6232d5
--- /dev/null
+++ b/src/cmd/venti/srv/config.c
@@ -0,0 +1,245 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+Index			*mainindex;
+int			paranoid = 1;		/* should verify hashes on disk read */
+
+static ArenaPart	*configarenas(char *file);
+static ISect		*configisect(char *file);
+static Bloom		*configbloom(char *file);
+
+int
+initventi(char *file, Config *conf)
+{
+	statsinit();
+
+	if(file == nil){
+		seterr(EOk, "no configuration file");
+		return -1;
+	}
+	if(runconfig(file, conf) < 0){
+		seterr(EOk, "can't initialize venti: %r");
+		return -1;
+	}
+	mainindex = initindex(conf->index, conf->sects, conf->nsects);
+	if(mainindex == nil)
+		return -1;
+	mainindex->bloom = conf->bloom;
+	return 0;
+}
+
+static int
+numok(char *s)
+{
+	char *p;
+
+	strtoull(s, &p, 0);
+	if(p == s)
+		return -1;
+	if(*p == 0)
+		return 0;
+	if(p[1] == 0 && strchr("MmGgKk", *p))
+		return 0;
+	return 0;
+}
+
+/*
+ * configs	:
+ *		| configs config
+ * config	: "isect" filename
+ *		| "arenas" filename
+ *		| "index" name
+ *		| "bcmem" num
+ *		| "mem" num
+ *		| "icmem" num
+ *		| "queuewrites"
+ *		| "httpaddr" address
+ *		| "addr" address
+ *
+ * '#' and \n delimit comments
+ */
+enum
+{
+	MaxArgs	= 2
+};
+int
+runconfig(char *file, Config *config)
+{
+	ArenaPart **av;
+	ISect **sv;
+	IFile f;
+	char *s, *line, *flds[MaxArgs + 1];
+	int i, ok;
+
+	if(readifile(&f, file) < 0)
+		return -1;
+	memset(config, 0, sizeof *config);
+	config->mem = 0xFFFFFFFFUL;
+	ok = -1;
+	line = nil;
+	for(;;){
+		s = ifileline(&f);
+		if(s == nil){
+			ok = 0;
+			break;
+		}
+		line = estrdup(s);
+		i = getfields(s, flds, MaxArgs + 1, 1, " \t\r");
+		if(i == 2 && strcmp(flds[0], "isect") == 0){
+			sv = MKN(ISect*, config->nsects + 1);
+			for(i = 0; i < config->nsects; i++)
+				sv[i] = config->sects[i];
+			free(config->sects);
+			config->sects = sv;
+			config->sects[config->nsects] = configisect(flds[1]);
+			if(config->sects[config->nsects] == nil)
+				break;
+			config->nsects++;
+		}else if(i == 2 && strcmp(flds[0], "arenas") == 0){
+			av = MKN(ArenaPart*, config->naparts + 1);
+			for(i = 0; i < config->naparts; i++)
+				av[i] = config->aparts[i];
+			free(config->aparts);
+			config->aparts = av;
+			config->aparts[config->naparts] = configarenas(flds[1]);
+			if(config->aparts[config->naparts] == nil)
+				break;
+			config->naparts++;
+		}else if(i == 2 && strcmp(flds[0], "bloom") == 0){
+			if(config->bloom){
+				seterr(EAdmin, "duplicate bloom lines in configuration file %s", file);
+				break;
+			}
+			if((config->bloom = configbloom(flds[1])) == nil)
+				break;
+		}else if(i == 2 && strcmp(flds[0], "index") == 0){
+			if(nameok(flds[1]) < 0){
+				seterr(EAdmin, "illegal index name %s in config file %s", flds[1], file);
+				break;
+			}
+			if(config->index != nil){
+				seterr(EAdmin, "duplicate indices in config file %s", file);
+				break;
+			}
+			config->index = estrdup(flds[1]);
+		}else if(i == 2 && strcmp(flds[0], "bcmem") == 0){
+			if(numok(flds[1]) < 0){
+				seterr(EAdmin, "illegal size %s in config file %s",
+					flds[1], file);
+				break;
+			}
+			if(config->bcmem != 0){
+				seterr(EAdmin, "duplicate bcmem lines in config file %s", file);
+				break;
+			}
+			config->bcmem = unittoull(flds[1]);
+		}else if(i == 2 && strcmp(flds[0], "mem") == 0){
+			if(numok(flds[1]) < 0){
+				seterr(EAdmin, "illegal size %s in config file %s",
+					flds[1], file);
+				break;
+			}
+			if(config->mem != 0xFFFFFFFFUL){
+				seterr(EAdmin, "duplicate mem lines in config file %s", file);
+				break;
+			}
+			config->mem = unittoull(flds[1]);
+		}else if(i == 2 && strcmp(flds[0], "icmem") == 0){
+			if(numok(flds[1]) < 0){
+				seterr(EAdmin, "illegal size %s in config file %s",
+					flds[1], file);
+				break;
+			}
+			if(config->icmem != 0){
+				seterr(EAdmin, "duplicate icmem lines in config file %s", file);
+				break;
+			}
+			config->icmem = unittoull(flds[1]);
+		}else if(i == 1 && strcmp(flds[0], "queuewrites") == 0){
+			config->queuewrites = 1;
+		}else if(i == 2 && strcmp(flds[0], "httpaddr") == 0){
+			if(config->haddr){
+				seterr(EAdmin, "duplicate httpaddr lines in configuration file %s", file);
+				break;
+			}
+			config->haddr = estrdup(flds[1]);
+		}else if(i == 2 && strcmp(flds[0], "webroot") == 0){
+			if(config->webroot){
+				seterr(EAdmin, "duplicate webroot lines in configuration file %s", file);
+				break;
+			}
+			config->webroot = estrdup(flds[1]);
+		}else if(i == 2 && strcmp(flds[0], "addr") == 0){
+			if(config->vaddr){
+				seterr(EAdmin, "duplicate addr lines in configuration file %s", file);
+				break;
+			}
+			config->vaddr = estrdup(flds[1]);
+		}else{
+			seterr(EAdmin, "illegal line '%s' in configuration file %s", line, file);
+			break;
+		}
+		free(line);
+		line = nil;
+	}
+	free(line);
+	freeifile(&f);
+	if(ok < 0){
+		free(config->sects);
+		config->sects = nil;
+		free(config->aparts);
+		config->aparts = nil;
+	}
+	return ok;
+}
+
+static ISect*
+configisect(char *file)
+{
+	Part *part;
+	ISect *is;
+	
+	if(0) fprint(2, "configure index section in %s\n", file);
+
+	part = initpart(file, ORDWR|ODIRECT);
+	if(part == nil)
+		return nil;
+	is = initisect(part);
+	if(is == nil)
+		werrstr("%s: %r", file);
+	return is;
+}
+
+static ArenaPart*
+configarenas(char *file)
+{
+	ArenaPart *ap;
+	Part *part;
+
+	if(0) fprint(2, "configure arenas in %s\n", file);
+	part = initpart(file, ORDWR|ODIRECT);
+	if(part == nil)
+		return nil;
+	ap = initarenapart(part);
+	if(ap == nil)
+		werrstr("%s: %r", file);
+	return ap;
+}
+
+static Bloom*
+configbloom(char *file)
+{
+	Bloom *b;
+	Part *part;
+
+	if(0) fprint(2, "configure bloom in %s\n", file);
+	part = initpart(file, ORDWR|ODIRECT);
+	if(part == nil)
+		return nil;
+	b = readbloom(part);
+	if(b == nil)
+		werrstr("%s: %r", file);
+	return b;
+}
+
diff --git a/src/cmd/venti/srv/conv.c b/src/cmd/venti/srv/conv.c
new file mode 100644
index 0000000..13afc7d
--- /dev/null
+++ b/src/cmd/venti/srv/conv.c
@@ -0,0 +1,632 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+/*
+ * disk structure conversion routines
+ */
+#define	U8GET(p)	((p)[0])
+#define	U16GET(p)	(((p)[0]<<8)|(p)[1])
+#define	U32GET(p)	((u32int)(((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3]))
+#define	U64GET(p)	(((u64int)U32GET(p)<<32)|(u64int)U32GET((p)+4))
+
+#define	U8PUT(p,v)	(p)[0]=(v)&0xFF
+#define	U16PUT(p,v)	(p)[0]=((v)>>8)&0xFF;(p)[1]=(v)&0xFF
+#define	U32PUT(p,v)	(p)[0]=((v)>>24)&0xFF;(p)[1]=((v)>>16)&0xFF;(p)[2]=((v)>>8)&0xFF;(p)[3]=(v)&0xFF
+#define	U64PUT(p,v,t32)	t32=(v)>>32;U32PUT(p,t32);t32=(v);U32PUT((p)+4,t32)
+
+static struct {
+	u32int m;
+	char *s;
+} magics[] = {
+	ArenaPartMagic, "ArenaPartMagic",
+	ArenaHeadMagic, "ArenaHeadMagic",
+	ArenaMagic, "ArenaMagic",
+	ISectMagic, "ISectMagic",
+	BloomMagic, "BloomMagic",
+};
+
+static char*
+fmtmagic(char *s, u32int m)
+{
+	int i;
+
+	for(i=0; i<nelem(magics); i++)
+		if(magics[i].m == m)
+			return magics[i].s;
+	sprint(s, "0x%08ux", m);
+	return s;
+}
+
+u32int
+unpackmagic(u8int *buf)
+{
+	return U32GET(buf);
+}
+
+void
+packmagic(u32int magic, u8int *buf)
+{
+	U32PUT(buf, magic);
+}
+
+int
+unpackarenapart(ArenaPart *ap, u8int *buf)
+{
+	u8int *p;
+	u32int m;
+	char fbuf[20];
+
+	p = buf;
+
+	m = U32GET(p);
+	if(m != ArenaPartMagic){
+		seterr(ECorrupt, "arena set has wrong magic number: %s expected ArenaPartMagic (%lux)", fmtmagic(fbuf, m), ArenaPartMagic);
+		return -1;
+	}
+	p += U32Size;
+	ap->version = U32GET(p);
+	p += U32Size;
+	ap->blocksize = U32GET(p);
+	p += U32Size;
+	ap->arenabase = U32GET(p);
+	p += U32Size;
+
+	if(buf + ArenaPartSize != p)
+		sysfatal("unpackarenapart unpacked wrong amount");
+
+	return 0;
+}
+
+int
+packarenapart(ArenaPart *ap, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+
+	U32PUT(p, ArenaPartMagic);
+	p += U32Size;
+	U32PUT(p, ap->version);
+	p += U32Size;
+	U32PUT(p, ap->blocksize);
+	p += U32Size;
+	U32PUT(p, ap->arenabase);
+	p += U32Size;
+
+	if(buf + ArenaPartSize != p)
+		sysfatal("packarenapart packed wrong amount");
+
+	return 0;
+}
+
+int
+unpackarena(Arena *arena, u8int *buf)
+{
+	int sz;
+	u8int *p;
+	u32int m;
+	char fbuf[20];
+
+	p = buf;
+
+	m = U32GET(p);
+	if(m != ArenaMagic){
+		seterr(ECorrupt, "arena has wrong magic number: %s expected ArenaMagic (%lux)", fmtmagic(fbuf, m), ArenaMagic);
+		return -1;
+	}
+	p += U32Size;
+	arena->version = U32GET(p);
+	p += U32Size;
+	namecp(arena->name, (char*)p);
+	p += ANameSize;
+	arena->diskstats.clumps = U32GET(p);
+	p += U32Size;
+	arena->diskstats.cclumps = U32GET(p);
+	p += U32Size;
+	arena->ctime = U32GET(p);
+	p += U32Size;
+	arena->wtime = U32GET(p);
+	p += U32Size;
+	if(arena->version == ArenaVersion5){
+		arena->clumpmagic = U32GET(p);
+		p += U32Size;
+	}
+	arena->diskstats.used = U64GET(p);
+	p += U64Size;
+	arena->diskstats.uncsize = U64GET(p);
+	p += U64Size;
+	arena->diskstats.sealed = U8GET(p);
+	p += U8Size;
+
+	arena->memstats = arena->diskstats;
+
+	switch(arena->version){
+	case ArenaVersion4:
+		sz = ArenaSize4;
+		arena->clumpmagic = _ClumpMagic;
+		break;
+	case ArenaVersion5:
+		sz = ArenaSize5;
+		break;
+	default:
+		seterr(ECorrupt, "arena has bad version number %d", arena->version);
+		return -1;
+	}
+	if(buf + sz != p)
+		sysfatal("unpackarena unpacked wrong amount");
+
+	return 0;
+}
+
+int
+packarena(Arena *arena, u8int *buf)
+{
+	int sz;
+	u8int *p;
+	u32int t32;
+
+	switch(arena->version){
+	case ArenaVersion4:
+		sz = ArenaSize4;
+		if(arena->clumpmagic != _ClumpMagic)
+			fprint(2, "warning: writing old arena tail loses clump magic 0x%lux != 0x%lux\n",
+				(ulong)arena->clumpmagic, (ulong)_ClumpMagic);
+		break;
+	case ArenaVersion5:
+		sz = ArenaSize5;
+		break;
+	default:
+		sysfatal("packarena unknown version %d", arena->version);
+		return -1;
+	}
+
+	p = buf;
+
+	U32PUT(p, ArenaMagic);
+	p += U32Size;
+	U32PUT(p, arena->version);
+	p += U32Size;
+	namecp((char*)p, arena->name);
+	p += ANameSize;
+	U32PUT(p, arena->diskstats.clumps);
+	p += U32Size;
+	U32PUT(p, arena->diskstats.cclumps);
+	p += U32Size;
+	U32PUT(p, arena->ctime);
+	p += U32Size;
+	U32PUT(p, arena->wtime);
+	p += U32Size;
+	if(arena->version == ArenaVersion5){
+		U32PUT(p, arena->clumpmagic);
+		p += U32Size;
+	}
+	U64PUT(p, arena->diskstats.used, t32);
+	p += U64Size;
+	U64PUT(p, arena->diskstats.uncsize, t32);
+	p += U64Size;
+	U8PUT(p, arena->diskstats.sealed);
+	p += U8Size;
+
+	if(buf + sz != p)
+		sysfatal("packarena packed wrong amount");
+
+	return 0;
+}
+
+int
+unpackarenahead(ArenaHead *head, u8int *buf)
+{
+	u8int *p;
+	u32int m;
+	int sz;
+
+	p = buf;
+
+	m = U32GET(p);
+	/* XXX check magic! */
+
+	p += U32Size;
+	head->version = U32GET(p);
+	p += U32Size;
+	namecp(head->name, (char*)p);
+	p += ANameSize;
+	head->blocksize = U32GET(p);
+	p += U32Size;
+	head->size = U64GET(p);
+	p += U64Size;
+	if(head->version == ArenaVersion5){
+		head->clumpmagic = U32GET(p);
+		p += U32Size;
+	}
+
+	switch(head->version){
+	case ArenaVersion4:
+		sz = ArenaHeadSize4;
+		head->clumpmagic = _ClumpMagic;
+		break;
+	case ArenaVersion5:
+		sz = ArenaHeadSize5;
+		break;
+	default:
+		seterr(ECorrupt, "arena head has unexpected version %d", head->version);
+		return -1;
+	}
+
+	if(buf + sz != p)
+		sysfatal("unpackarenahead unpacked wrong amount");
+
+	return 0;
+}
+
+int
+packarenahead(ArenaHead *head, u8int *buf)
+{
+	u8int *p;
+	int sz;
+	u32int t32;
+
+	switch(head->version){
+	case ArenaVersion4:
+		sz = ArenaHeadSize4;
+		if(head->clumpmagic != _ClumpMagic)
+			fprint(2, "warning: writing old arena header loses clump magic 0x%lux != 0x%lux\n",
+				(ulong)head->clumpmagic, (ulong)_ClumpMagic);
+		break;
+	case ArenaVersion5:
+		sz = ArenaHeadSize5;
+		break;
+	default:
+		sysfatal("packarenahead unknown version %d", head->version);
+		return -1;
+	}
+
+	p = buf;
+
+	U32PUT(p, ArenaHeadMagic);
+	p += U32Size;
+	U32PUT(p, head->version);
+	p += U32Size;
+	namecp((char*)p, head->name);
+	p += ANameSize;
+	U32PUT(p, head->blocksize);
+	p += U32Size;
+	U64PUT(p, head->size, t32);
+	p += U64Size;
+	if(head->version == ArenaVersion5){
+		U32PUT(p, head->clumpmagic);
+		p += U32Size;
+	}
+	if(buf + sz != p)
+		sysfatal("packarenahead packed wrong amount");
+
+	return 0;
+}
+
+static int
+checkclump(Clump *w)
+{
+	if(w->encoding == ClumpENone){
+		if(w->info.size != w->info.uncsize){
+			seterr(ECorrupt, "uncompressed wad size mismatch");
+			return -1;
+		}
+	}else if(w->encoding == ClumpECompress){
+		if(w->info.size >= w->info.uncsize){
+			seterr(ECorrupt, "compressed lump has inconsistent block sizes %d %d", w->info.size, w->info.uncsize);
+			return -1;
+		}
+	}else{
+		seterr(ECorrupt, "clump has illegal encoding");
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+unpackclump(Clump *c, u8int *buf, u32int cmagic)
+{
+	u8int *p;
+	u32int magic;
+
+	p = buf;
+	magic = U32GET(p);
+	if(magic != cmagic){
+		seterr(ECorrupt, "clump has bad magic number=%#8.8ux != %#8.8ux", magic, cmagic);
+		return -1;
+	}
+	p += U32Size;
+
+	c->info.type = vtfromdisktype(U8GET(p));
+	p += U8Size;
+	c->info.size = U16GET(p);
+	p += U16Size;
+	c->info.uncsize = U16GET(p);
+	p += U16Size;
+	scorecp(c->info.score, p);
+	p += VtScoreSize;
+
+	c->encoding = U8GET(p);
+	p += U8Size;
+	c->creator = U32GET(p);
+	p += U32Size;
+	c->time = U32GET(p);
+	p += U32Size;
+
+	if(buf + ClumpSize != p)
+		sysfatal("unpackclump unpacked wrong amount");
+
+	return checkclump(c);
+}
+
+int
+packclump(Clump *c, u8int *buf, u32int magic)
+{
+	u8int *p;
+
+	p = buf;
+	U32PUT(p, magic);
+	p += U32Size;
+
+	U8PUT(p, vttodisktype(c->info.type));
+	p += U8Size;
+	U16PUT(p, c->info.size);
+	p += U16Size;
+	U16PUT(p, c->info.uncsize);
+	p += U16Size;
+	scorecp(p, c->info.score);
+	p += VtScoreSize;
+
+	U8PUT(p, c->encoding);
+	p += U8Size;
+	U32PUT(p, c->creator);
+	p += U32Size;
+	U32PUT(p, c->time);
+	p += U32Size;
+
+	if(buf + ClumpSize != p)
+		sysfatal("packclump packed wrong amount");
+
+	return checkclump(c);
+}
+
+void
+unpackclumpinfo(ClumpInfo *ci, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+	ci->type = vtfromdisktype(U8GET(p));
+	p += U8Size;
+	ci->size = U16GET(p);
+	p += U16Size;
+	ci->uncsize = U16GET(p);
+	p += U16Size;
+	scorecp(ci->score, p);
+	p += VtScoreSize;
+
+	if(buf + ClumpInfoSize != p)
+		sysfatal("unpackclumpinfo unpacked wrong amount");
+}
+
+void
+packclumpinfo(ClumpInfo *ci, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+	U8PUT(p, vttodisktype(ci->type));
+	p += U8Size;
+	U16PUT(p, ci->size);
+	p += U16Size;
+	U16PUT(p, ci->uncsize);
+	p += U16Size;
+	scorecp(p, ci->score);
+	p += VtScoreSize;
+
+	if(buf + ClumpInfoSize != p)
+		sysfatal("packclumpinfo packed wrong amount");
+}
+
+int
+unpackisect(ISect *is, u8int *buf)
+{
+	u8int *p;
+	u32int m;
+	char fbuf[20];
+
+	p = buf;
+
+
+	m = U32GET(p);
+	if(m != ISectMagic){
+		seterr(ECorrupt, "index section has wrong magic number: %s expected ISectMagic (%lux)",
+			fmtmagic(fbuf, m), ISectMagic);
+		return -1;
+	}
+	p += U32Size;
+	is->version = U32GET(p);
+	p += U32Size;
+	namecp(is->name, (char*)p);
+	p += ANameSize;
+	namecp(is->index, (char*)p);
+	p += ANameSize;
+	is->blocksize = U32GET(p);
+	p += U32Size;
+	is->blockbase = U32GET(p);
+	p += U32Size;
+	is->blocks = U32GET(p);
+	p += U32Size;
+	is->start = U32GET(p);
+	p += U32Size;
+	is->stop = U32GET(p);
+	p += U32Size;
+	if(buf + ISectSize1 != p)
+		sysfatal("unpackisect unpacked wrong amount");
+	is->bucketmagic = 0;
+	if(is->version == ISectVersion2){
+		is->bucketmagic = U32GET(p);
+		p += U32Size;
+		if(buf + ISectSize2 != p)
+			sysfatal("unpackisect unpacked wrong amount");
+	}
+
+	return 0;
+}
+
+int
+packisect(ISect *is, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+
+	U32PUT(p, ISectMagic);
+	p += U32Size;
+	U32PUT(p, is->version);
+	p += U32Size;
+	namecp((char*)p, is->name);
+	p += ANameSize;
+	namecp((char*)p, is->index);
+	p += ANameSize;
+	U32PUT(p, is->blocksize);
+	p += U32Size;
+	U32PUT(p, is->blockbase);
+	p += U32Size;
+	U32PUT(p, is->blocks);
+	p += U32Size;
+	U32PUT(p, is->start);
+	p += U32Size;
+	U32PUT(p, is->stop);
+	p += U32Size;
+	if(buf + ISectSize1 != p)
+		sysfatal("packisect packed wrong amount");
+	if(is->version == ISectVersion2){
+		U32PUT(p, is->bucketmagic);
+		p += U32Size;
+		if(buf + ISectSize2 != p)
+			sysfatal("packisect packed wrong amount");
+	}
+
+	return 0;
+}
+
+void
+unpackientry(IEntry *ie, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+
+	scorecp(ie->score, p);
+	p += VtScoreSize;
+	ie->wtime = U32GET(p);
+	p += U32Size;
+	ie->train = U16GET(p);
+	p += U16Size;
+	ie->ia.addr = U64GET(p);
+if(ie->ia.addr>>56) print("%.8H => %llux\n", p, ie->ia.addr);
+	p += U64Size;
+	ie->ia.size = U16GET(p);
+	p += U16Size;
+	if(p - buf != IEntryTypeOff)
+		sysfatal("unpackientry bad IEntryTypeOff amount");
+	ie->ia.type = vtfromdisktype(U8GET(p));
+	p += U8Size;
+	ie->ia.blocks = U8GET(p);
+	p += U8Size;
+
+	if(p - buf != IEntrySize)
+		sysfatal("unpackientry unpacked wrong amount");
+}
+
+void
+packientry(IEntry *ie, u8int *buf)
+{
+	u32int t32;
+	u8int *p;
+
+	p = buf;
+
+	scorecp(p, ie->score);
+	p += VtScoreSize;
+	U32PUT(p, ie->wtime);
+	p += U32Size;
+	U16PUT(p, ie->train);
+	p += U16Size;
+	U64PUT(p, ie->ia.addr, t32);
+	p += U64Size;
+	U16PUT(p, ie->ia.size);
+	p += U16Size;
+	U8PUT(p, vttodisktype(ie->ia.type));
+	p += U8Size;
+	U8PUT(p, ie->ia.blocks);
+	p += U8Size;
+
+	if(p - buf != IEntrySize)
+		sysfatal("packientry packed wrong amount");
+}
+
+void
+unpackibucket(IBucket *b, u8int *buf, u32int magic)
+{
+	b->n = U16GET(buf);
+	b->data = buf + IBucketSize;
+	if(magic && magic != U32GET(buf+U16Size))
+		b->n = 0;
+}		
+
+void
+packibucket(IBucket *b, u8int *buf, u32int magic)
+{
+	U16PUT(buf, b->n);
+	U32PUT(buf+U16Size, magic);
+}
+
+void
+packbloomhead(Bloom *b, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+	U32PUT(p, BloomMagic);
+	U32PUT(p+4, BloomVersion);
+	U32PUT(p+8, b->nhash);
+	U32PUT(p+12, b->size);
+}
+
+int
+unpackbloomhead(Bloom *b, u8int *buf)
+{
+	u8int *p;
+	u32int m;
+	char fbuf[20];
+
+	p = buf;
+
+	m = U32GET(p);
+	if(m != BloomMagic){
+		seterr(ECorrupt, "bloom filter has wrong magic number: %s expected BloomMagic (%lux)", fmtmagic(fbuf, m), (ulong)BloomMagic);
+		return -1;
+	}
+	p += U32Size;
+	
+	m = U32GET(p);
+	if(m != BloomVersion){
+		seterr(ECorrupt, "bloom filter has wrong version %ud expected %ud", (uint)m, (uint)BloomVersion);
+		return -1;
+	}
+	p += U32Size;
+
+	b->nhash = U32GET(p);
+	p += U32Size;
+
+	b->size = U32GET(p);
+	p += U32Size;
+
+	if(buf + BloomHeadSize != p)
+		sysfatal("unpackarena unpacked wrong amount");
+
+	return 0;
+}
diff --git a/src/cmd/venti/srv/dat.h b/src/cmd/venti/srv/dat.h
new file mode 100644
index 0000000..5f6d1a3
--- /dev/null
+++ b/src/cmd/venti/srv/dat.h
@@ -0,0 +1,718 @@
+typedef struct Config		Config;
+typedef struct AMap		AMap;
+typedef struct AMapN		AMapN;
+typedef struct Arena		Arena;
+typedef struct AState	AState;
+typedef struct ArenaHead	ArenaHead;
+typedef struct ArenaPart	ArenaPart;
+typedef struct ArenaTail	ArenaTail;
+typedef struct ATailStats	ATailStats;
+typedef struct CIBlock		CIBlock;
+typedef struct Clump		Clump;
+typedef struct ClumpInfo	ClumpInfo;
+typedef struct Graph Graph;
+typedef struct IAddr		IAddr;
+typedef struct IBucket		IBucket;
+typedef struct IEStream		IEStream;
+typedef struct IEntry		IEntry;
+typedef struct IFile		IFile;
+typedef struct ISect		ISect;
+typedef struct Index		Index;
+typedef struct Lump		Lump;
+typedef struct DBlock		DBlock;
+typedef struct Part		Part;
+typedef struct Statbin Statbin;
+typedef struct Statdesc	Statdesc;
+typedef struct Stats		Stats;
+typedef struct ZBlock		ZBlock;
+typedef struct Round	Round;
+typedef struct Bloom	Bloom;
+
+#define TWID32	((u32int)~(u32int)0)
+#define TWID64	((u64int)~(u64int)0)
+#define	TWID8	((u8int)~(u8int)0)
+
+enum
+{
+	ABlockLog		= 9,		/* log2(512), the quantum for reading arenas */
+	ANameSize		= 64,
+	MaxDiskBlock		= 64*1024,	/* max. allowed size for a disk block */
+	MaxIoSize		= 64*1024,	/* max. allowed size for a disk io operation */
+	PartBlank		= 256*1024,	/* untouched section at beginning of partition */
+	HeadSize		= 512,		/* size of a header after PartBlank */
+	MinArenaSize		= 1*1024*1024,	/* smallest reasonable arena size */
+	IndexBase		= 1024*1024,	/* initial address to use in an index */
+	MaxIo			= 64*1024,	/* max size of a single read or write operation */
+	ICacheBits		= 16,		/* default bits for indexing icache */
+	ICacheDepth		= 4,		/* default depth of an icache hash chain */
+	MaxAMap			= 2*1024,	/* max. allowed arenas in an address mapping; must be < 32*1024 */
+
+	/*
+	 * return codes from syncarena
+	 */
+	SyncDataErr	= 1 << 0,		/* problem reading the clump data */
+	SyncCIErr	= 1 << 1,		/* found erroneous clump directory entries */
+	SyncCIZero	= 1 << 2,		/* found unwritten clump directory entries */
+	SyncFixErr	= 1 << 3,		/* error writing fixed data */
+	SyncHeader	= 1 << 4,		/* altered header fields */
+
+	/*
+	 * error severity
+	 */
+	EOk			= 0,		/* error expected in normal operation */
+	EStrange,				/* strange error that should be logged */
+	ECorrupt,				/* corrupted data found in arenas */
+	EICorrupt,				/* corrupted data found in index */
+	EAdmin,					/* should be brought to administrators' attention */
+	ECrash,					/* really bad internal error */
+	EBug,					/* a limitation which should be fixed */
+	EInconsist,				/* inconsistencies between index and arena */
+	EMax,
+
+	/*
+	 * internal disk formats for the venti archival storage system
+	 */
+	/*
+	 * magic numbers on disk
+	 */
+	_ClumpMagic		= 0xd15cb10c,	/* clump header, deprecated */
+	ClumpFreeMagic		= 0,		/* free clump; terminates active clump log */
+
+	ArenaPartMagic		= 0xa9e4a5e7,	/* arena partition header */
+	ArenaMagic		= 0xf2a14ead,	/* arena trailer */
+	ArenaHeadMagic		= 0xd15c4ead,	/* arena header */
+	
+	BloomMagic		= 0xb1004ead,	/* bloom filter header */
+	BloomMaxHash	= 32,
+
+	ISectMagic		= 0xd15c5ec7,	/* index header */
+
+	ArenaPartVersion	= 3,
+	ArenaVersion4		= 4,
+	ArenaVersion5		= 5,
+	BloomVersion		= 1,
+	IndexVersion		= 1,
+	ISectVersion1		= 1,
+	ISectVersion2		= 2,
+
+	/*
+	 * encodings of clumps on disk
+	 */
+	ClumpEErr		= 0,		/* can't happen */
+	ClumpENone,				/* plain */
+	ClumpECompress,				/* compressed */
+	ClumpEMax,
+
+	/*
+	 * sizes in bytes on disk
+	 */
+	U8Size			= 1,
+	U16Size			= 2,
+	U32Size			= 4,
+	U64Size			= 8,
+
+	ArenaPartSize		= 4 * U32Size,
+	ArenaSize4		= 2 * U64Size + 6 * U32Size + ANameSize + U8Size,
+	ArenaSize5			= ArenaSize4 + U32Size,
+	ArenaHeadSize4		= U64Size + 3 * U32Size + ANameSize,
+	ArenaHeadSize5		= ArenaHeadSize4 + U32Size,
+	BloomHeadSize	= 4 * U32Size,
+	ISectSize1		= 7 * U32Size + 2 * ANameSize,
+	ISectSize2		= ISectSize1 + U32Size,
+	ClumpInfoSize		= U8Size + 2 * U16Size + VtScoreSize,
+	ClumpSize		= ClumpInfoSize + U8Size + 3 * U32Size,
+	MaxBloomSize		= 1<<(32-3),	/* 2^32 bits */
+	MaxBloomHash	= 32,		/* bits per score */
+	/*
+	 * BUG - The various block copies that manipulate entry buckets
+	 * would be faster if we bumped IBucketSize up to 8 and IEntrySize up to 40,
+	 * so that everything is word-aligned.  Buildindex is actually cpu-bound
+	 * by the (byte at a time) copying in qsort.
+	 */
+	IBucketSize		= U32Size + U16Size,
+	IEntrySize		= U64Size + U32Size + 2*U16Size + 2*U8Size + VtScoreSize,
+	IEntryTypeOff		= VtScoreSize + U64Size + U32Size + 2 * U16Size,
+
+	MaxClumpBlocks		=  (VtMaxLumpSize + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog,
+
+	/*
+	 * dirty flags - order controls disk write order
+	 */
+	DirtyArena		= 1,
+	DirtyArenaCib,
+	DirtyArenaTrailer,
+	DirtyMax,
+
+	VentiZZZZZZZZ
+};
+
+extern char TraceDisk[];
+extern char TraceLump[];
+extern char TraceBlock[];
+extern char TraceProc[];
+extern char TraceWork[];
+extern char TraceQuiet[];
+extern char TraceRpc[];
+
+/*
+ * results of parsing and initializing a config file
+ */
+struct Config
+{
+	char		*index;			/* name of the index to initialize */
+	int		naparts;		/* arena partitions initialized */
+	ArenaPart	**aparts;
+	int		nsects;			/* index sections initialized */
+	ISect		**sects;
+	Bloom	*bloom;		/* bloom filter */
+	u32int	bcmem;
+	u32int	mem;
+	u32int	icmem;
+	int		queuewrites;
+	char*	haddr;
+	char*	vaddr;
+	char*	webroot;
+};
+
+/*
+ * a Part is the low level interface to files or disks.
+ * there are two main types of partitions
+ *	arena paritions, which some number of arenas, each in a sub-partition.
+ *	index partition, which only have one subpartition.
+ */
+struct Part
+{
+	int		fd;			/* rock for accessing the disk */
+	int		mode;
+	u64int		offset;
+	u64int		size;			/* size of the partiton */
+	u32int		blocksize;		/* block size for reads and writes */
+	u32int		fsblocksize;	/* minimum file system block size */
+	char		*name;
+	char		*filename;
+	Channel		*writechan;		/* chan[dcache.nblock](DBlock*) */
+};
+
+/*
+ * a cached block from the partition
+ * yuck -- most of this is internal structure for the cache
+ * all other routines should only use data
+ */
+struct DBlock
+{
+	u8int	*data;
+
+	Part	*part;			/* partition in which cached */
+	u64int	addr;			/* base address on the partition */
+	u32int	size;			/* amount of data available, not amount allocated; should go away */
+	u32int	mode;
+	u32int	dirty;
+	u32int	dirtying;
+	DBlock	*next;			/* doubly linked hash chains */
+	DBlock	*prev;
+	u32int	heap;			/* index in heap table */
+	u32int	used;			/* last reference times */
+	u32int	used2;
+	u32int	ref;			/* reference count */
+	RWLock	lock;			/* for access to data only */
+	Channel	*writedonechan;	
+	void*	chanbuf[1];		/* buffer for the chan! */
+};
+
+/*
+ * a cached block from the partition
+ * yuck -- most of this is internal structure for the cache
+ * all other routines should only use data
+ * double yuck -- this is mostly the same as a DBlock
+ */
+struct Lump
+{
+	Packet	*data;
+
+	Part	*part;			/* partition in which cached */
+	u8int	score[VtScoreSize];	/* score of packet */
+	u8int	type;			/* type of packet */
+	u32int	size;			/* amount of data allocated to hold packet */
+	Lump	*next;			/* doubly linked hash chains */
+	Lump	*prev;
+	u32int	heap;			/* index in heap table */
+	u32int	used;			/* last reference times */
+	u32int	used2;
+	u32int	ref;			/* reference count */
+	QLock	lock;			/* for access to data only */
+};
+
+/*
+ * mapping between names and address ranges
+ */
+struct AMap
+{
+	u64int		start;
+	u64int		stop;
+	char		name[ANameSize];
+};
+
+/*
+ * an AMap along with a length
+ */
+struct AMapN
+{
+	int		n;
+	AMap		*map;
+};
+
+/*
+ * an ArenaPart is a partition made up of Arenas
+ * it exists because most os's don't support many partitions,
+ * and we want to have many different Arenas
+ */
+struct ArenaPart
+{
+	Part		*part;
+	u64int		size;			/* size of underlying partition, rounded down to blocks */
+	Arena		**arenas;
+	u32int		tabbase;		/* base address of arena table on disk */
+	u32int		tabsize;		/* max. bytes in arena table */
+
+	/*
+	 * fields stored on disk
+	 */
+	u32int		version;
+	u32int		blocksize;		/* "optimal" block size for reads and writes */
+	u32int		arenabase;		/* base address of first arena */
+
+	/*
+	 * stored in the arena mapping table on disk
+	 */
+	AMap		*map;
+	int		narenas;
+};
+
+/*
+ * info about one block in the clump info cache
+ */
+struct CIBlock
+{
+	u32int		block;			/* blocks in the directory */
+	int		offset;			/* offsets of one clump in the data */
+	DBlock		*data;
+};
+
+/*
+ * Statistics kept in the tail. 
+ */
+struct ATailStats
+{
+	u32int		clumps;		/* number of clumps */
+	u32int		cclumps;		/* number of compressed clumps */
+	u64int		used;
+	u64int		uncsize;
+	u8int		sealed;
+};
+
+/*
+ * Arena state - represents a point in the data log
+ */
+struct AState
+{
+	Arena		*arena;
+	u64int		aa;			/* index address */
+	ATailStats		stats;
+};
+
+/*
+ * an Arena is a log of Clumps, preceeded by an ArenaHeader,
+ * and followed by a Arena, each in one disk block.
+ * struct on disk is not always up to date, but should be self-consistent.
+ * to sync after reboot, follow clumps starting at used until ClumpFreeMagic if found.
+ * <struct name="Arena" type="Arena *">
+ *	<field name="name" val="s->name" type="AName"/>
+ *	<field name="version" val="s->version" type="U32int"/>
+ *	<field name="partition" val="s->part->name" type="AName"/>
+ *	<field name="blocksize" val="s->blocksize" type="U32int"/>
+ *	<field name="start" val="s->base" type="U64int"/>
+ *	<field name="stop" val="s->base+2*s->blocksize" type="U64int"/>
+ *	<field name="created" val="s->ctime" type="U32int"/>
+ *	<field name="modified" val="s->wtime" type="U32int"/>
+ *	<field name="sealed" val="s->sealed" type="Sealed"/>
+ *	<field name="score" val="s->score" type="Score"/>
+ *	<field name="clumps" val="s->clumps" type="U32int"/>
+ *	<field name="compressedclumps" val="s->cclumps" type="U32int"/>
+ *	<field name="data" val="s->uncsize" type="U64int"/>
+ *	<field name="compresseddata" val="s->used - s->clumps * ClumpSize" type="U64int"/>
+ *	<field name="storage" val="s->used + s->clumps * ClumpInfoSize" type="U64int"/>
+ * </struct>
+ */
+struct Arena
+{
+	QLock		lock;			/* lock for arena fields, writing to disk */
+	Part		*part;			/* partition in which arena lives */
+	int		blocksize;		/* size of block to read or write */
+	u64int		base;			/* base address on disk */
+	u64int		size;			/* total space in the arena */
+	u64int		limit;			/* storage limit for clumps */
+	u8int		score[VtScoreSize];	/* score of the entire sealed & summed arena */
+
+	int		clumpmax;		/* ClumpInfos per block */
+	AState		mem;
+	int		inqueue;
+	DigestState	sha1;
+
+	/*
+	 * fields stored on disk
+	 */
+	u32int		version;
+	char		name[ANameSize];	/* text label */
+	ATailStats		memstats;
+	ATailStats		diskstats;
+	u32int		ctime;			/* first time a block was written */
+	u32int		wtime;			/* last time a block was written */
+	u32int		clumpmagic;
+};
+
+/*
+ * redundant storage of some fields at the beginning of each arena
+ */
+struct ArenaHead
+{
+	u32int		version;
+	char		name[ANameSize];
+	u32int		blocksize;
+	u64int		size;
+	u32int		clumpmagic;
+};
+
+/*
+ * most interesting meta information for a clump.
+ * stored in each clump's header and in the Arena's directory,
+ * stored in reverse order just prior to the arena trailer
+ */
+struct ClumpInfo
+{
+	u8int		type;
+	u16int		size;			/* size of disk data, not including header */
+	u16int		uncsize;		/* size of uncompressed data */
+	u8int		score[VtScoreSize];	/* score of the uncompressed data only */
+};
+
+/*
+ * header for an immutable clump of data
+ */
+struct Clump
+{
+	ClumpInfo	info;
+	u8int		encoding;
+	u32int		creator;		/* initial client which wrote the block */
+	u32int		time;			/* creation at gmt seconds since 1/1/1970 */
+};
+
+/*
+ * index of all clumps according to their score
+ * this is just a wrapper to tie together the index sections
+ * <struct name="Index" type="Index *">
+ *	<field name="name" val="s->name" type="AName"/>
+ *	<field name="version" val="s->version" type="U32int"/>
+ *	<field name="blocksize" val="s->blocksize" type="U32int"/>
+ *	<field name="tabsize" val="s->tabsize" type="U32int"/>
+ *	<field name="buckets" val="s->buckets" type="U32int"/>
+ *	<field name="buckdiv" val="s->div" type="U32int"/>
+ *	<field name="bitblocks" val="s->div" type="U32int"/>
+ *	<field name="maxdepth" val="s->div" type="U32int"/>
+ *	<field name="bitkeylog" val="s->div" type="U32int"/>
+ *	<field name="bitkeymask" val="s->div" type="U32int"/>
+ *	<array name="sect" val="&s->smap[i]" elems="s->nsects" type="Amap"/>
+ *	<array name="amap" val="&s->amap[i]" elems="s->narenas" type="Amap"/>
+ *	<array name="arena" val="s->arenas[i]" elems="s->narenas" type="Arena"/>
+ * </struct>
+ * <struct name="Amap" type="AMap *">
+ *	<field name="name" val="s->name" type="AName"/>
+ *	<field name="start" val="s->start" type="U64int"/>
+ *	<field name="stop" val="s->stop" type="U64int"/>
+ * </struct>
+ */
+struct Index
+{
+	u32int		div;			/* divisor for mapping score to bucket */
+	u32int		buckets;		/* last bucket used in disk hash table */
+	u32int		blocksize;
+	u32int		tabsize;		/* max. bytes in index config */
+	u32int		bitblocks;	//XXX remove these fields
+	u32int		maxdepth;
+	u32int		bitkeylog;
+	u32int		bitkeymask;
+
+	int		mapalloc;		/* first arena to check when adding a lump */
+	Arena		**arenas;		/* arenas in the mapping */
+	ISect		**sects;		/* sections which hold the buckets */
+	Bloom		*bloom;	/* bloom filter */
+
+	/*
+	 * fields stored in config file 
+	 */
+	u32int		version;
+	char		name[ANameSize];	/* text label */
+	int		nsects;
+	AMap		*smap;			/* mapping of buckets to index sections */
+	int		narenas;
+	AMap		*amap;			/* mapping from index addesses to arenas */
+};
+
+/*
+ * one part of the bucket storage for an index.
+ * the index blocks are sequentially allocated
+ * across all of the sections.
+ */
+struct ISect
+{
+	Part		*part;
+	int		blocklog;		/* log2(blocksize) */
+	int		buckmax;		/* max. entries in a index bucket */
+	u32int		tabbase;		/* base address of index config table on disk */
+	u32int		tabsize;		/* max. bytes in index config */
+	Channel	*writechan;
+	Channel	*writedonechan;
+
+	/*
+	 * fields stored on disk
+	 */
+	u32int		version;
+	u32int		bucketmagic;
+	char		name[ANameSize];	/* text label */
+	char		index[ANameSize];	/* index owning the section */
+	u32int		blocksize;		/* size of hash buckets in index */
+	u32int		blockbase;		/* address of start of on disk index table */
+	u32int		blocks;			/* total blocks on disk; some may be unused */
+	u32int		start;			/* first bucket in this section */
+	u32int		stop;			/* limit of buckets in this section */
+};
+
+/*
+ * externally interesting part of an IEntry
+ */
+struct IAddr
+{
+	u64int		addr;
+	u16int		size;			/* uncompressed size */
+	u8int		type;			/* type of block */
+	u8int		blocks;			/* arena io quanta for Clump + data */
+};
+
+/*
+ * entries in the index
+ * kept in IBuckets in the disk index table,
+ * cached in the memory ICache.
+ */
+struct IEntry
+{
+	u8int		score[VtScoreSize];
+	IEntry		*next;			/* next in hash chain */
+	IEntry		*nextdirty; 		/* next in dirty chain */
+	u32int		wtime;			/* last write time */
+	u16int		train;			/* relative train containing the most recent ref; 0 if no ref, 1 if in same car */
+	u8int		rac;			/* read ahead count */
+	u8int		dirty;		/* is dirty */
+	IAddr		ia;
+};
+
+/*
+ * buckets in the on disk index table
+ */
+struct IBucket
+{
+	u16int		n;			/* number of active indices */
+	u32int		buck;		/* used by buildindex/checkindex only */
+	u8int		*data;
+};
+
+/*
+ * temporary buffers used by individual threads
+ */
+struct ZBlock
+{
+	u32int		len;
+	u32int		_size;
+	u8int		*data;
+	u8int		*free;
+};
+
+/*
+ * simple input buffer for a '\0' terminated text file
+ */
+struct IFile
+{
+	char		*name;				/* name of the file */
+	ZBlock		*b;				/* entire contents of file */
+	u32int		pos;				/* current position in the file */
+};
+
+struct Statdesc
+{
+	char *name;
+	ulong max;
+};
+
+/* keep in sync with stats.c:/statdesc and httpd.c:/graphname*/
+enum
+{
+	StatRpcTotal,
+	StatRpcRead,
+	StatRpcReadOk,
+	StatRpcReadFail,
+	StatRpcReadBytes,
+	StatRpcReadTime,
+	StatRpcReadCached,
+	StatRpcReadCachedTime,
+	StatRpcReadUncached,
+	StatRpcReadUncachedTime,
+	StatRpcWrite,
+	StatRpcWriteNew,
+	StatRpcWriteOld,
+	StatRpcWriteFail,
+	StatRpcWriteBytes,
+	StatRpcWriteTime,
+	StatRpcWriteNewTime,
+	StatRpcWriteOldTime,
+
+	StatLcacheHit,
+	StatLcacheMiss,
+	StatLcacheRead,
+	StatLcacheWrite,
+	StatLcacheSize,
+	StatLcacheStall,
+	StatLcacheReadTime,
+
+	StatDcacheHit,
+	StatDcacheMiss,
+	StatDcacheLookup,
+	StatDcacheRead,
+	StatDcacheWrite,
+	StatDcacheDirty,
+	StatDcacheSize,
+	StatDcacheFlush,
+	StatDcacheStall,
+	StatDcacheLookupTime,
+
+	StatDblockStall,
+	StatLumpStall,
+
+	StatIcacheHit,
+	StatIcacheMiss,
+	StatIcacheRead,
+	StatIcacheWrite,
+	StatIcacheFill,
+	StatIcachePrefetch,
+	StatIcacheDirty,
+	StatIcacheSize,
+	StatIcacheFlush,
+	StatIcacheStall,
+	StatIcacheReadTime,
+
+	StatBloomHit,
+	StatBloomMiss,
+	StatBloomFalseMiss,
+	StatBloomLookup,
+	StatBloomOnes,
+	StatBloomBits,
+	StatBloomLookupTime,
+
+	StatApartRead,
+	StatApartReadBytes,
+	StatApartWrite,
+	StatApartWriteBytes,
+
+	StatIsectRead,
+	StatIsectReadBytes,
+	StatIsectWrite,
+	StatIsectWriteBytes,
+
+	StatSumRead,
+	StatSumReadBytes,
+
+	NStat
+};
+
+extern Statdesc statdesc[NStat];
+
+/*
+ * statistics about the operation of the server
+ * mainly for performance monitoring and profiling.
+ */
+struct Stats
+{
+	ulong		now;
+	ulong		n[NStat];
+};
+
+struct Statbin
+{
+	uint nsamp;
+	uint min;
+	uint max;
+	uint avg;
+};
+
+struct Graph
+{
+	long (*fn)(Stats*, Stats*, void*);
+	void *arg;
+	long t0;
+	long t1;
+	long min;
+	long max;
+	long wid;
+	long ht;
+	int fill;
+};
+
+/*
+ * for kicking background processes that run one round after another after another
+ */
+struct Round
+{
+	QLock	lock;
+	Rendez	start;
+	Rendez	finish;
+	Rendez	delaywait;
+	int		delaytime;
+	int		delaykick;
+	char*	name;
+	int		last;
+	int		current;
+	int		next;
+	int		doanother;
+};
+
+/*
+ * Bloom filter of stored block hashes
+ */
+struct Bloom
+{
+	RWLock lk;		/* protects nhash, nbits, tab, mb */
+	QLock mod;		/* one marker at a time, protects nb */
+	int nhash;
+	ulong size;		/* bytes in tab */
+	ulong mask;		/* to produce index */
+	u8int *data;
+	Part *part;
+	Channel *writechan;
+	Channel *writedonechan;
+};
+
+extern	Index		*mainindex;
+extern	u32int		maxblocksize;		/* max. block size used by any partition */
+extern	int		paranoid;		/* should verify hashes on disk read */
+extern	int		queuewrites;		/* put all lump writes on a queue and finish later */
+extern	int		readonly;		/* only allowed to read the disk data */
+extern	Stats		stats;
+extern	u8int		zeroscore[VtScoreSize];
+extern	int		compressblocks;
+extern	int		writestodevnull;	/* dangerous - for performance debugging */
+extern	int		collectstats;
+extern	QLock	memdrawlock;
+extern	int		icachesleeptime;
+extern	int		arenasumsleeptime;
+
+#ifndef PLAN9PORT
+#pragma varargck type "V" uchar*
+#define ODIRECT 0
+#endif
diff --git a/src/cmd/venti/srv/dcache.c b/src/cmd/venti/srv/dcache.c
new file mode 100644
index 0000000..72aaafa
--- /dev/null
+++ b/src/cmd/venti/srv/dcache.c
@@ -0,0 +1,816 @@
+/*
+ * Disk cache.
+ * 
+ * Caches raw disk blocks.  Getdblock() gets a block, putdblock puts it back.
+ * Getdblock has a mode parameter that determines i/o and access to a block:
+ * if mode is OREAD or ORDWR, it is read from disk if not already in memory.
+ * If mode is ORDWR or OWRITE, it is locked for exclusive use before being returned.
+ * It is *not* marked dirty -- once changes have been made, they should be noted
+ * by using dirtydblock() before putdblock().  
+ *
+ * There is a global cache lock as well as a lock on each block. 
+ * Within a thread, the cache lock can be acquired while holding a block lock,
+ * but not vice versa; and a block cannot be locked if you already hold the lock
+ * on another block.
+ * 
+ * The flush proc writes out dirty blocks in batches, one batch per dirty tag.
+ * For example, the DirtyArena blocks are all written to disk before any of the
+ * DirtyArenaCib blocks.
+ *
+ * This code used to be in charge of flushing the dirty index blocks out to 
+ * disk, but updating the index turned out to benefit from extra care.
+ * Now cached index blocks are never marked dirty.  The index.c code takes
+ * care of updating them behind our back, and uses _getdblock to update any
+ * cached copies of the blocks as it changes them on disk.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+typedef struct DCache	DCache;
+
+enum
+{
+	HashLog		= 9,
+	HashSize	= 1<<HashLog,
+	HashMask	= HashSize - 1,
+};
+
+struct DCache
+{
+	QLock		lock;
+	RWLock		dirtylock;		/* must be held to inspect or set b->dirty */
+	Rendez		full;
+	Round		round;
+	DBlock		*free;			/* list of available lumps */
+	u32int		now;			/* ticks for usage timestamps */
+	int		size;			/* max. size of any block; allocated to each block */
+	DBlock		**heads;		/* hash table for finding address */
+	int		nheap;			/* number of available victims */
+	DBlock		**heap;			/* heap for locating victims */
+	int		nblocks;		/* number of blocks allocated */
+	DBlock		*blocks;		/* array of block descriptors */
+	DBlock		**write;		/* array of block pointers to be written */
+	u8int		*mem;			/* memory for all block descriptors */
+	int		ndirty;			/* number of dirty blocks */
+	int		maxdirty;		/* max. number of dirty blocks */
+	Channel	*ra;
+	u8int		*rabuf;
+	u32int		ramax;
+	u32int		rasize;
+	u64int		raaddr;
+	Part		*rapart;
+
+	AState	diskstate;
+	AState	state;
+};
+
+typedef struct Ra Ra;
+struct Ra
+{
+	Part *part;
+	u64int addr;
+};
+
+static DCache	dcache;
+
+static int	downheap(int i, DBlock *b);
+static int	upheap(int i, DBlock *b);
+static DBlock	*bumpdblock(void);
+static void	delheap(DBlock *db);
+static void	fixheap(int i, DBlock *b);
+static void	flushproc(void*);
+static void	writeproc(void*);
+static void raproc(void*);
+
+void
+initdcache(u32int mem)
+{
+	DBlock *b, *last;
+	u32int nblocks, blocksize;
+	int i;
+	u8int *p;
+
+	if(mem < maxblocksize * 2)
+		sysfatal("need at least %d bytes for the disk cache", maxblocksize * 2);
+	if(maxblocksize == 0)
+		sysfatal("no max. block size given for disk cache");
+	blocksize = maxblocksize;
+	nblocks = mem / blocksize;
+	dcache.full.l = &dcache.lock;
+	dcache.nblocks = nblocks;
+	dcache.maxdirty = (nblocks * 2) / 3;
+	trace(TraceProc, "initialize disk cache with %d blocks of %d bytes, maximum %d dirty blocks\n",
+			nblocks, blocksize, dcache.maxdirty);
+	dcache.size = blocksize;
+	dcache.heads = MKNZ(DBlock*, HashSize);
+	dcache.heap = MKNZ(DBlock*, nblocks);
+	dcache.blocks = MKNZ(DBlock, nblocks);
+	dcache.write = MKNZ(DBlock*, nblocks);
+	dcache.mem = MKNZ(u8int, (nblocks+1+128) * blocksize);
+	dcache.ra = chancreate(sizeof(Ra), 0);
+
+	last = nil;
+	p = (u8int*)(((ulong)dcache.mem+blocksize-1)&~(ulong)(blocksize-1));
+	for(i = 0; i < nblocks; i++){
+		b = &dcache.blocks[i];
+		b->data = &p[i * blocksize];
+		b->heap = TWID32;
+		b->writedonechan = chancreate(sizeof(void*), 1);
+		b->next = last;
+		last = b;
+	}
+	dcache.rabuf = &p[i*blocksize];
+	dcache.ramax = 128*blocksize;
+	dcache.raaddr = 0;
+	dcache.rapart = nil;
+
+	dcache.free = last;
+	dcache.nheap = 0;
+	setstat(StatDcacheSize, nblocks);
+	initround(&dcache.round, "dcache", 120*1000);
+
+	vtproc(flushproc, nil);
+	vtproc(delaykickroundproc, &dcache.round);
+	vtproc(raproc, nil);
+}
+
+void
+setdcachestate(AState *a)
+{
+	trace(TraceBlock, "setdcachestate %s 0x%llux clumps %d", a->arena ? a->arena->name : nil, a->aa, a->stats.clumps);
+	qlock(&dcache.lock);
+	dcache.state = *a;
+	qunlock(&dcache.lock);
+}
+
+AState
+diskstate(void)
+{
+	AState a;
+
+	qlock(&dcache.lock);
+	a = dcache.diskstate;
+	qunlock(&dcache.lock);
+	return a;
+}
+
+static void
+raproc(void *v)
+{
+	Ra ra;
+	DBlock *b;
+
+	USED(v);
+	while(recv(dcache.ra, &ra) == 1){
+		if(ra.part->size <= ra.addr)
+			continue;
+		b = _getdblock(ra.part, ra.addr, OREAD, 2);
+		putdblock(b);
+	}
+}	
+
+void
+dreadahead(Part *part, u64int addr, int miss)
+{
+	Ra ra;
+	static struct {
+		Part *part;
+		u64int addr;
+	} lastmiss;
+	static struct {
+		Part *part;
+		u64int addr;
+		int dir;
+	} lastra;
+
+return;
+	if(miss){
+		if(lastmiss.part==part && lastmiss.addr==addr-dcache.size){
+		XRa:
+			lastra.part = part;
+			lastra.dir = addr-lastmiss.addr;
+			lastra.addr = addr+lastra.dir;
+			ra.part = part;
+			ra.addr = lastra.addr;
+			nbsend(dcache.ra, &ra);
+		}else if(lastmiss.part==part && lastmiss.addr==addr+dcache.size){
+			addr -= dcache.size;
+			goto XRa;
+		}
+	}else{
+		if(lastra.part==part && lastra.addr==addr){
+			lastra.addr += lastra.dir;
+			ra.part = part;
+			ra.addr = lastra.addr;
+			nbsend(dcache.ra, &ra);
+		}
+	}
+
+	if(miss){
+		lastmiss.part = part;
+		lastmiss.addr = addr;
+	}
+
+//	fprint(2, "%s %llx %s\n", part->name, addr, miss ? "miss" : "hit");
+}
+
+int
+rareadpart(Part *part, u64int addr, u8int *buf, uint n, int load)
+{
+	uint nn;
+	static RWLock ralock;
+
+	rlock(&ralock);
+	if(dcache.rapart==part && dcache.raaddr <= addr && addr+n <= dcache.raaddr+dcache.rasize){
+		memmove(buf, dcache.rabuf+(addr-dcache.raaddr), n);
+		runlock(&ralock);
+		return 0;
+	}
+	if(load != 2 || addr >= part->size){	/* addr >= part->size: let readpart do the error */	
+		runlock(&ralock);
+		return readpart(part, addr, buf, n);
+	}
+
+	runlock(&ralock);
+	wlock(&ralock);
+fprint(2, "raread %s %llx\n", part->name, addr);
+	nn = dcache.ramax;
+	if(addr+nn > part->size)
+		nn = part->size - addr;
+	if(readpart(part, addr, dcache.rabuf, nn) < 0){
+		wunlock(&ralock);
+		return -1;
+	}
+	memmove(buf, dcache.rabuf, n);	
+	dcache.rapart = part;
+	dcache.rasize = nn;
+	dcache.raaddr = addr;
+	wunlock(&ralock);
+
+	addstat(StatApartReadBytes, nn-n);
+	return 0;
+}
+
+static u32int
+pbhash(u64int addr)
+{
+	u32int h;
+
+#define hashit(c)	((((c) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
+	h = (addr >> 32) ^ addr;
+	return hashit(h);
+}
+
+DBlock*
+getdblock(Part *part, u64int addr, int mode)
+{
+	DBlock *b;
+	uint ms;
+	
+	ms = msec();
+	b = _getdblock(part, addr, mode, 1);
+	if(mode == OREAD || mode == ORDWR)
+		addstat(StatDcacheRead, 1);
+	if(mode == OWRITE || mode == ORDWR)
+		addstat(StatDcacheWrite, 1);
+	ms = msec() - ms;
+	addstat2(StatDcacheLookup, 1, StatDcacheLookupTime, ms);
+	return b;
+}
+
+DBlock*
+_getdblock(Part *part, u64int addr, int mode, int load)
+{
+	DBlock *b;
+	u32int h, size;
+
+	trace(TraceBlock, "getdblock enter %s 0x%llux", part->name, addr);
+	size = part->blocksize;
+	if(size > dcache.size){
+		seterr(EAdmin, "block size %d too big for cache with size %d", size, dcache.size);
+		return nil;
+	}
+	h = pbhash(addr);
+
+	/*
+	 * look for the block in the cache
+	 */
+//checkdcache();
+	qlock(&dcache.lock);
+again:
+	for(b = dcache.heads[h]; b != nil; b = b->next){
+		if(b->part == part && b->addr == addr){
+			/*
+			qlock(&stats.lock);
+			stats.pchit++;
+			qunlock(&stats.lock);
+			*/
+			if(load){
+				addstat(StatDcacheHit, 1);
+				if(load != 2 && mode != OWRITE)
+					dreadahead(part, b->addr, 0);
+			}
+			goto found;
+		}
+	}
+
+	/*
+	 * missed: locate the block with the oldest second to last use.
+	 * remove it from the heap, and fix up the heap.
+	 */
+	if(!load){
+		qunlock(&dcache.lock);
+		return nil;
+	}
+
+	addstat(StatDcacheMiss, 1);
+
+	b = bumpdblock();
+	if(b == nil){
+		trace(TraceBlock, "all disk cache blocks in use");
+		addstat(StatDcacheStall, 1);
+		rsleep(&dcache.full);
+		addstat(StatDcacheStall, -1);
+		goto again;
+	}
+
+	assert(!b->dirty);
+
+	/*
+	 * the new block has no last use, so assume it happens sometime in the middle
+ZZZ this is not reasonable
+	 */
+	b->used = (b->used2 + dcache.now) / 2;
+
+	/*
+	 * rechain the block on the correct hash chain
+	 */
+	b->next = dcache.heads[h];
+	dcache.heads[h] = b;
+	if(b->next != nil)
+		b->next->prev = b;
+	b->prev = nil;
+
+	b->addr = addr;
+	b->part = part;
+	b->size = 0;
+	if(load != 2 && mode != OWRITE)
+		dreadahead(part, b->addr, 1);
+
+found:
+	b->ref++;
+	b->used2 = b->used;
+	b->used = dcache.now++;
+	if(b->heap != TWID32)
+		fixheap(b->heap, b);
+
+	qunlock(&dcache.lock);
+//checkdcache();
+
+	trace(TraceBlock, "getdblock lock");
+	addstat(StatDblockStall, 1);
+	if(mode == OREAD)
+		rlock(&b->lock);
+	else
+		wlock(&b->lock);
+	addstat(StatDblockStall, -1);
+	trace(TraceBlock, "getdblock locked");
+
+	if(b->size != size){
+		if(mode == OREAD){
+			addstat(StatDblockStall, 1);
+			runlock(&b->lock);
+			wlock(&b->lock);
+			addstat(StatDblockStall, -1);
+		}
+		if(b->size < size){
+			if(mode == OWRITE)
+				memset(&b->data[b->size], 0, size - b->size);
+			else{
+				trace(TraceBlock, "getdblock readpart %s 0x%llux", part->name, addr);
+				if(rareadpart(part, addr + b->size, &b->data[b->size], size - b->size, load) < 0){
+					b->mode = ORDWR;	/* so putdblock wunlocks */
+					putdblock(b);
+					return nil;
+				}
+				trace(TraceBlock, "getdblock readpartdone");
+				addstat(StatApartRead, 1);
+				addstat(StatApartReadBytes, size-b->size);
+			}
+		}
+		b->size = size;
+		if(mode == OREAD){
+			addstat(StatDblockStall, 1);
+			wunlock(&b->lock);
+			rlock(&b->lock);
+			addstat(StatDblockStall, -1);
+		}
+	}
+
+	b->mode = mode;
+	trace(TraceBlock, "getdblock exit");
+	return b;
+}
+
+void
+putdblock(DBlock *b)
+{
+	if(b == nil)
+		return;
+
+	trace(TraceBlock, "putdblock %s 0x%llux", b->part->name, b->addr);
+
+	if(b->mode == OREAD)
+		runlock(&b->lock);
+	else
+		wunlock(&b->lock);
+
+//checkdcache();
+	qlock(&dcache.lock);
+	if(--b->ref == 0 && !b->dirty){
+		if(b->heap == TWID32)
+			upheap(dcache.nheap++, b);
+		rwakeupall(&dcache.full);
+	}
+	qunlock(&dcache.lock);
+//checkdcache();
+}
+
+void
+dirtydblock(DBlock *b, int dirty)
+{
+	int odirty;
+	Part *p;
+
+
+	trace(TraceBlock, "dirtydblock enter %s 0x%llux %d from 0x%lux", b->part->name, b->addr, dirty, getcallerpc(&b));
+	assert(b->ref != 0);
+	assert(b->mode==ORDWR || b->mode==OWRITE);
+
+	odirty = b->dirty;
+	if(b->dirty)
+		assert(b->dirty == dirty);
+	else
+		b->dirty = dirty;
+
+	p = b->part;
+	if(p->writechan == nil){
+		trace(TraceBlock, "dirtydblock allocwriteproc %s", p->name);
+		/* XXX hope this doesn't fail! */
+		p->writechan = chancreate(sizeof(DBlock*), dcache.nblocks);
+		vtproc(writeproc, p);
+	}
+	qlock(&dcache.lock);
+	if(!odirty){
+		dcache.ndirty++;
+		setstat(StatDcacheDirty, dcache.ndirty);
+		if(dcache.ndirty >= dcache.maxdirty)
+			kickround(&dcache.round, 0);
+		else
+			delaykickround(&dcache.round);
+	}
+	qunlock(&dcache.lock);
+}
+
+/*
+ * remove some block from use and update the free list and counters
+ */
+static DBlock*
+bumpdblock(void)
+{
+	DBlock *b;
+	ulong h;
+
+	trace(TraceBlock, "bumpdblock enter");
+	b = dcache.free;
+	if(b != nil){
+		dcache.free = b->next;
+		return b;
+	}
+
+	if(dcache.ndirty >= dcache.maxdirty)
+		kickdcache();
+
+	/*
+	 * remove blocks until we find one that is unused
+	 * referenced blocks are left in the heap even though
+	 * they can't be scavenged; this is simple a speed optimization
+	 */
+	for(;;){
+		if(dcache.nheap == 0){
+			kickdcache();
+			trace(TraceBlock, "bumpdblock gotnothing");
+			return nil;
+		}
+		b = dcache.heap[0];
+		delheap(b);
+		if(!b->ref && !b->dirty)
+			break;
+	}
+
+	trace(TraceBlock, "bumpdblock bumping %s 0x%llux", b->part->name, b->addr);
+
+	/*
+	 * unchain the block
+	 */
+	if(b->prev == nil){
+		h = pbhash(b->addr);
+		if(dcache.heads[h] != b)
+			sysfatal("bad hash chains in disk cache");
+		dcache.heads[h] = b->next;
+	}else
+		b->prev->next = b->next;
+	if(b->next != nil)
+		b->next->prev = b->prev;
+
+	return b;
+}
+
+/*
+ * delete an arbitrary block from the heap
+ */
+static void
+delheap(DBlock *db)
+{
+	if(db->heap == TWID32)
+		return;
+	fixheap(db->heap, dcache.heap[--dcache.nheap]);
+	db->heap = TWID32;
+}
+
+/*
+ * push an element up or down to it's correct new location
+ */
+static void
+fixheap(int i, DBlock *b)
+{
+	if(upheap(i, b) == i)
+		downheap(i, b);
+}
+
+static int
+upheap(int i, DBlock *b)
+{
+	DBlock *bb;
+	u32int now;
+	int p;
+
+	now = dcache.now;
+	for(; i != 0; i = p){
+		p = (i - 1) >> 1;
+		bb = dcache.heap[p];
+		if(b->used2 - now >= bb->used2 - now)
+			break;
+		dcache.heap[i] = bb;
+		bb->heap = i;
+	}
+
+	dcache.heap[i] = b;
+	b->heap = i;
+	return i;
+}
+
+static int
+downheap(int i, DBlock *b)
+{
+	DBlock *bb;
+	u32int now;
+	int k;
+
+	now = dcache.now;
+	for(; ; i = k){
+		k = (i << 1) + 1;
+		if(k >= dcache.nheap)
+			break;
+		if(k + 1 < dcache.nheap && dcache.heap[k]->used2 - now > dcache.heap[k + 1]->used2 - now)
+			k++;
+		bb = dcache.heap[k];
+		if(b->used2 - now <= bb->used2 - now)
+			break;
+		dcache.heap[i] = bb;
+		bb->heap = i;
+	}
+
+	dcache.heap[i] = b;
+	b->heap = i;
+	return i;
+}
+
+static void
+findblock(DBlock *bb)
+{
+	DBlock *b, *last;
+	int h;
+
+	last = nil;
+	h = pbhash(bb->addr);
+	for(b = dcache.heads[h]; b != nil; b = b->next){
+		if(last != b->prev)
+			sysfatal("bad prev link");
+		if(b == bb)
+			return;
+		last = b;
+	}
+	sysfatal("block missing from hash table");
+}
+
+void
+checkdcache(void)
+{
+	DBlock *b;
+	u32int size, now;
+	int i, k, refed, nfree;
+
+	qlock(&dcache.lock);
+	size = dcache.size;
+	now = dcache.now;
+	for(i = 0; i < dcache.nheap; i++){
+		if(dcache.heap[i]->heap != i)
+			sysfatal("dc: mis-heaped at %d: %d", i, dcache.heap[i]->heap);
+		if(i > 0 && dcache.heap[(i - 1) >> 1]->used2 - now > dcache.heap[i]->used2 - now)
+			sysfatal("dc: bad heap ordering");
+		k = (i << 1) + 1;
+		if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
+			sysfatal("dc: bad heap ordering");
+		k++;
+		if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
+			sysfatal("dc: bad heap ordering");
+	}
+
+	refed = 0;
+	for(i = 0; i < dcache.nblocks; i++){
+		b = &dcache.blocks[i];
+		if(b->data != &dcache.mem[i * size])
+			sysfatal("dc: mis-blocked at %d", i);
+		if(b->ref && b->heap == TWID32)
+			refed++;
+		if(b->addr)
+			findblock(b);
+		if(b->heap != TWID32
+		&& dcache.heap[b->heap] != b)
+			sysfatal("dc: spurious heap value");
+	}
+
+	nfree = 0;
+	for(b = dcache.free; b != nil; b = b->next){
+		if(b->addr != 0 || b->heap != TWID32)
+			sysfatal("dc: bad free list");
+		nfree++;
+	}
+
+	if(dcache.nheap + nfree + refed != dcache.nblocks)
+		sysfatal("dc: missing blocks: %d %d %d", dcache.nheap, refed, dcache.nblocks);
+	qunlock(&dcache.lock);
+}
+
+void
+flushdcache(void)
+{
+	trace(TraceProc, "flushdcache enter");
+	kickround(&dcache.round, 1);
+	trace(TraceProc, "flushdcache exit");
+}
+
+void
+kickdcache(void)
+{
+	kickround(&dcache.round, 0);
+}
+
+static int
+parallelwrites(DBlock **b, DBlock **eb, int dirty)
+{
+	DBlock **p, **q;
+	for(p=b; p<eb && (*p)->dirty == dirty; p++){
+		assert(b<=p && p<eb);
+		sendp((*p)->part->writechan, *p);
+	}
+	q = p;
+	for(p=b; p<q; p++){
+		assert(b<=p && p<eb);
+		recvp((*p)->writedonechan);
+	}
+
+	return p-b;
+}
+
+/*
+ * Sort first by dirty flag, then by partition, then by address in partition.
+ */
+static int
+writeblockcmp(const void *va, const void *vb)
+{
+	DBlock *a, *b;
+
+	a = *(DBlock**)va;
+	b = *(DBlock**)vb;
+
+	if(a->dirty != b->dirty)
+		return a->dirty - b->dirty;
+	if(a->part != b->part){
+		if(a->part < b->part)
+			return -1;
+		if(a->part > b->part)
+			return 1;
+	}
+	if(a->addr < b->addr)
+		return -1;
+	return 1;
+}
+
+static void
+flushproc(void *v)
+{
+	int i, j, n;
+	ulong t0;
+	DBlock *b, **write;
+	AState as;
+
+	USED(v);
+	threadsetname("flushproc");
+	for(;;){
+		waitforkick(&dcache.round);
+
+		trace(TraceWork, "start");
+		qlock(&dcache.lock);
+		as = dcache.state;
+		qunlock(&dcache.lock);
+
+		t0 = nsec()/1000;
+
+		trace(TraceProc, "build t=%lud", (ulong)(nsec()/1000)-t0);
+		write = dcache.write;
+		n = 0;
+		for(i=0; i<dcache.nblocks; i++){
+			b = &dcache.blocks[i];
+			if(b->dirty)
+				write[n++] = b;
+		}
+
+		qsort(write, n, sizeof(write[0]), writeblockcmp);
+
+		/* Write each stage of blocks out. */
+		trace(TraceProc, "writeblocks t=%lud", (ulong)(nsec()/1000)-t0);
+		i = 0;
+		for(j=1; j<DirtyMax; j++){
+			trace(TraceProc, "writeblocks.%d t=%lud", j, (ulong)(nsec()/1000)-t0);
+			i += parallelwrites(write+i, write+n, j);
+		}
+		if(i != n){
+			fprint(2, "in flushproc i=%d n=%d\n", i, n);
+			for(i=0; i<n; i++)
+				fprint(2, "\tblock %d: dirty=%d\n", i, write[i]->dirty);
+			abort();
+		}
+
+/* XXX
+* the locking here is suspect.  what if a block is redirtied
+* after the write happens?  we'll still decrement dcache.ndirty here.
+*/
+		trace(TraceProc, "undirty.%d t=%lud", j, (ulong)(nsec()/1000)-t0);
+		qlock(&dcache.lock);
+		dcache.diskstate = as;
+		for(i=0; i<n; i++){
+			b = write[i];
+			--dcache.ndirty;
+			if(b->ref == 0 && b->heap == TWID32){
+				upheap(dcache.nheap++, b);
+				rwakeupall(&dcache.full);
+			}
+		}
+		setstat(StatDcacheDirty, dcache.ndirty);
+		qunlock(&dcache.lock);
+		addstat(StatDcacheFlush, 1);
+		trace(TraceWork, "finish");
+	}
+}
+
+static void
+writeproc(void *v)
+{
+	DBlock *b;
+	Part *p;
+
+	p = v;
+
+	threadsetname("writeproc:%s", p->name);
+	for(;;){
+		b = recvp(p->writechan);
+		trace(TraceWork, "start");
+		assert(b->part == p);
+		trace(TraceProc, "wlock %s 0x%llux", p->name, b->addr);
+		wlock(&b->lock);
+		trace(TraceProc, "writepart %s 0x%llux", p->name, b->addr);
+		if(writepart(p, b->addr, b->data, b->size) < 0)
+			fprint(2, "write error: %r\n"); /* XXX details! */
+		addstat(StatApartWrite, 1);
+		addstat(StatApartWriteBytes, b->size);
+		b->dirty = 0;
+		wunlock(&b->lock);
+		trace(TraceProc, "finish %s 0x%llux", p->name, b->addr);
+		trace(TraceWork, "finish");
+		sendp(b->writedonechan, b);
+	}
+}
diff --git a/src/cmd/venti/srv/dump.c b/src/cmd/venti/srv/dump.c
new file mode 100644
index 0000000..fa2bfb7
--- /dev/null
+++ b/src/cmd/venti/srv/dump.c
@@ -0,0 +1,47 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+void
+printindex(int fd, Index *ix)
+{
+	int i;
+
+	fprint(fd, "index=%s version=%d blocksize=%d tabsize=%d\n",
+		ix->name, ix->version, ix->blocksize, ix->tabsize);
+	fprint(fd, "\tbuckets=%d div=%d\n", ix->buckets, ix->div);
+	for(i = 0; i < ix->nsects; i++)
+		fprint(fd, "\tsect=%s for buckets [%lld,%lld)\n", ix->smap[i].name, ix->smap[i].start, ix->smap[i].stop);
+	for(i = 0; i < ix->narenas; i++)
+		fprint(fd, "\tarena=%s at [%lld,%lld)\n", ix->amap[i].name, ix->amap[i].start, ix->amap[i].stop);
+}
+
+void
+printarenapart(int fd, ArenaPart *ap)
+{
+	int i;
+
+	fprint(fd, "arena partition=%s\n\tversion=%d blocksize=%d arenas=%d\n\tsetbase=%d setsize=%d\n",
+		ap->part->name, ap->version, ap->blocksize, ap->narenas, ap->tabbase, ap->tabsize);
+	for(i = 0; i < ap->narenas; i++)
+		fprint(fd, "\tarena=%s at [%lld,%lld)\n", ap->map[i].name, ap->map[i].start, ap->map[i].stop);
+}
+
+void
+printarena(int fd, Arena *arena)
+{
+	fprint(fd, "arena='%s' [%lld,%lld)\n\tversion=%d created=%d modified=%d",
+		arena->name, arena->base, arena->base + arena->size + 2 * arena->blocksize,
+		arena->version, arena->ctime, arena->wtime);
+	if(arena->memstats.sealed)
+		fprint(2, " sealed\n");
+	else
+		fprint(2, "\n");
+	if(scorecmp(zeroscore, arena->score) != 0)
+		fprint(2, "\tscore=%V\n", arena->score);
+
+	fprint(fd, "\tclumps=%,d compressed clumps=%,d data=%,lld compressed data=%,lld disk storage=%,lld\n",
+		arena->memstats.clumps, arena->memstats.cclumps, arena->memstats.uncsize,
+		arena->memstats.used - arena->memstats.clumps * ClumpSize,
+		arena->memstats.used + arena->memstats.clumps * ClumpInfoSize);
+}
diff --git a/src/cmd/venti/srv/findscore.c b/src/cmd/venti/srv/findscore.c
new file mode 100644
index 0000000..226d97a
--- /dev/null
+++ b/src/cmd/venti/srv/findscore.c
@@ -0,0 +1,121 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+enum
+{
+	ClumpChunks	= 32*1024
+};
+
+static int	verbose;
+
+int
+clumpinfoeq(ClumpInfo *c, ClumpInfo *d)
+{
+	return c->type == d->type
+		&& c->size == d->size
+		&& c->uncsize == d->uncsize
+		&& scorecmp(c->score, d->score)==0;
+}
+
+int
+findscore(Arena *arena, uchar *score)
+{
+	IEntry ie;
+	ClumpInfo *ci, *cis;
+	u64int a;
+	u32int clump;
+	int i, n, found;
+
+//ZZZ remove fprint?
+	if(arena->memstats.clumps)
+		fprint(2, "reading directory for arena=%s with %d entries\n", arena->name, arena->memstats.clumps);
+
+	cis = MKN(ClumpInfo, ClumpChunks);
+	found = 0;
+	a = 0;
+	memset(&ie, 0, sizeof(IEntry));
+	for(clump = 0; clump < arena->memstats.clumps; clump += n){
+		n = ClumpChunks;
+		if(n > arena->memstats.clumps - clump)
+			n = arena->memstats.clumps - clump;
+		if(readclumpinfos(arena, clump, cis, n) != n){
+			seterr(EOk, "arena directory read failed: %r");
+			break;
+		}
+
+		for(i = 0; i < n; i++){
+			ci = &cis[i];
+			if(scorecmp(score, ci->score)==0){
+				fprint(2, "found at clump=%d with type=%d size=%d csize=%d position=%lld\n",
+					clump + i, ci->type, ci->uncsize, ci->size, a);
+				found++;
+			}
+			a += ci->size + ClumpSize;
+		}
+	}
+	free(cis);
+	return found;
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: findscore [-v] arenafile score\n");
+	threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	ArenaPart *ap;
+	Part *part;
+	char *file;
+	u8int score[VtScoreSize];
+	int i, found;
+
+	ventifmtinstall();
+
+	ARGBEGIN{
+	case 'v':
+		verbose++;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	readonly = 1;
+
+	if(argc != 2)
+		usage();
+
+	file = argv[0];
+	if(strscore(argv[1], score) < 0)
+		sysfatal("bad score %s\n", argv[1]);
+
+	part = initpart(file, OREAD|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open partition %s: %r", file);
+
+	ap = initarenapart(part);
+	if(ap == nil)
+		sysfatal("can't initialize arena partition in %s: %r", file);
+
+	if(verbose > 1){
+		printarenapart(2, ap);
+		fprint(2, "\n");
+	}
+
+	initdcache(8 * MaxDiskBlock);
+
+	found = 0;
+	for(i = 0; i < ap->narenas; i++)
+		found += findscore(ap->arenas[i], score);
+
+	print("found %d occurrences of %V\n", found, score);
+
+	if(verbose > 1)
+		printstats();
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/fmtarenas.c b/src/cmd/venti/srv/fmtarenas.c
new file mode 100644
index 0000000..44c975e
--- /dev/null
+++ b/src/cmd/venti/srv/fmtarenas.c
@@ -0,0 +1,135 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#ifndef ODIRECT
+#define ODIRECT 0
+#endif
+
+void
+usage(void)
+{
+	fprint(2, "usage: fmtarenas [-Z] [-b blocksize] [-a arenasize] name file\n");
+	threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int vers;
+	ArenaPart *ap;
+	Part *part;
+	Arena *arena;
+	u64int addr, limit, asize, apsize;
+	char *file, *name, aname[ANameSize];
+	int i, n, blocksize, tabsize, zero;
+
+	ventifmtinstall();
+	statsinit();
+
+	blocksize = 8 * 1024;
+	asize = 512 * 1024 *1024;
+	tabsize = 512 * 1024;		/* BUG: should be determine from number of arenas */
+	zero = -1;
+	vers = ArenaVersion5;
+	ARGBEGIN{
+	case 'D':
+		settrace(EARGF(usage()));
+		break;
+	case 'a':
+		asize = unittoull(ARGF());
+		if(asize == TWID64)
+			usage();
+		break;
+	case 'b':
+		blocksize = unittoull(ARGF());
+		if(blocksize == ~0)
+			usage();
+		if(blocksize > MaxDiskBlock){
+			fprint(2, "block size too large, max %d\n", MaxDiskBlock);
+			threadexitsall("usage");
+		}
+		break;
+	case '4':
+		vers = ArenaVersion4;
+		break;
+	case 'Z':
+		zero = 0;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(zero == -1){
+		if(vers == ArenaVersion4)
+			zero = 1;
+		else
+			zero = 0;
+	}
+
+	if(argc != 2)
+		usage();
+
+	name = argv[0];
+	file = argv[1];
+
+	if(nameok(name) < 0)
+		sysfatal("illegal name template %s", name);
+
+	part = initpart(file, ORDWR|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open partition %s: %r", file);
+
+	if(zero)
+		zeropart(part, blocksize);
+
+	maxblocksize = blocksize;
+	initdcache(20*blocksize);
+
+	ap = newarenapart(part, blocksize, tabsize);
+	if(ap == nil)
+		sysfatal("can't initialize arena: %r");
+
+	apsize = ap->size - ap->arenabase;
+	n = apsize / asize;
+	if(apsize - (n * asize) >= MinArenaSize)
+		n++;
+
+	fprint(2, "fmtarenas %s: %,d arenas, %,lld bytes storage, %,d bytes for index map\n",
+		file, n, apsize, ap->tabsize);
+
+	ap->narenas = n;
+	ap->map = MKNZ(AMap, n);
+	ap->arenas = MKNZ(Arena*, n);
+
+	addr = ap->arenabase;
+	for(i = 0; i < n; i++){
+		limit = addr + asize;
+		if(limit >= ap->size || ap->size - limit < MinArenaSize){
+			limit = ap->size;
+			if(limit - addr < MinArenaSize)
+				sysfatal("bad arena set math: runt arena at %lld,%lld %lld\n", addr, limit, ap->size);
+		}
+
+		snprint(aname, ANameSize, "%s%d", name, i);
+
+		if(0) fprint(2, "adding arena %s at [%lld,%lld)\n", aname, addr, limit);
+
+		arena = newarena(part, vers, aname, addr, limit - addr, blocksize);
+		if(!arena)
+			fprint(2, "can't make new arena %s: %r", aname);
+		freearena(arena);
+
+		ap->map[i].start = addr;
+		ap->map[i].stop = limit;
+		namecp(ap->map[i].name, aname);
+
+		addr = limit;
+	}
+
+	if(wbarenapart(ap) < 0)
+		fprint(2, "can't write back arena partition header for %s: %r\n", file);
+
+	flushdcache();
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/fmtbloom.c b/src/cmd/venti/srv/fmtbloom.c
new file mode 100644
index 0000000..3c50d82
--- /dev/null
+++ b/src/cmd/venti/srv/fmtbloom.c
@@ -0,0 +1,115 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+Bloom b;
+
+void
+usage(void)
+{
+	fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
+	threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	Part *part;
+	char *file;
+	vlong bits, size, size2;
+	int nhash;
+	vlong nblocks;
+	
+	ventifmtinstall();
+	statsinit();
+
+	size = 0;
+	nhash = nblocks = 0;
+	ARGBEGIN{
+	case 'n':
+		if(nhash || nblocks)
+			usage();
+		nblocks = unittoull(EARGF(usage()));
+		break;
+	case 'N':
+		if(nhash || nblocks)
+			usage();
+		nhash = unittoull(EARGF(usage()));
+		if(nhash > BloomMaxHash){
+			fprint(2, "maximum possible is -N %d", BloomMaxHash);
+			usage();
+		}
+		break;
+	case 's':
+		size = unittoull(ARGF());
+		if(size == ~0)
+			usage();
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 1)
+		usage();
+
+	file = argv[0];
+
+	part = initpart(file, ORDWR|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open partition %s: %r", file);
+
+	if(size == 0)
+		size = part->size;
+	
+	if(size < 1024*1024)
+		sysfatal("bloom filter too small");
+
+	if(size > MaxBloomSize){
+		fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
+			size, MaxBloomSize);
+		size = MaxBloomSize;
+	}
+	if(size&(size-1)){
+		for(size2=1; size2<size; size2*=2)
+			;
+		size = size2/2;
+		fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
+	}
+
+	if(nblocks){
+		/*
+		 * no use for more than 32 bits per block
+		 * shoot for less than 64 bits per block
+		 */
+		size2 = size;
+		while(size2*8 >= nblocks*64)
+			size2 >>= 1;
+		if(size2 != size){
+			size = size2;
+			fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
+				size/1024/1024);
+		}
+
+		/*
+		 * optimal is to use ln 2 times as many hash functions as we have bits per blocks.  
+		 */
+		bits = (8*size)/nblocks;
+		nhash = bits*7/10;
+		if(nhash > BloomMaxHash)
+			nhash = BloomMaxHash;
+	}
+	if(!nhash)
+		nhash = BloomMaxHash;
+	if(bloominit(&b, size, nil) < 0)
+		sysfatal("bloominit: %r");
+	b.nhash = nhash;
+	bits = nhash*10/7;
+	nblocks = (8*size)/bits;
+	fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size, nhash, nblocks);
+	b.data = vtmallocz(size);
+	b.part = part;
+	if(writebloom(&b) < 0)
+		sysfatal("writing %s: %r", file);
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/fmtindex.c b/src/cmd/venti/srv/fmtindex.c
new file mode 100644
index 0000000..a0eb6b1
--- /dev/null
+++ b/src/cmd/venti/srv/fmtindex.c
@@ -0,0 +1,120 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+void
+usage(void)
+{
+	fprint(2, "usage: fmtindex [-a] config\n");
+	threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	Config conf;
+	Index *ix;
+	ArenaPart *ap;
+	Arena **arenas;
+	AMap *amap;
+	u64int addr;
+	char *file;
+	u32int i, j, n, narenas;
+	int add;
+
+	ventifmtinstall();
+	statsinit();
+
+	add = 0;
+	ARGBEGIN{
+	case 'a':
+		add = 1;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 1)
+		usage();
+
+	file = argv[0];
+
+	if(runconfig(file, &conf) < 0)
+		sysfatal("can't initialize config %s: %r", file);
+	if(conf.index == nil)
+		sysfatal("no index specified in %s", file);
+	if(nameok(conf.index) < 0)
+		sysfatal("illegal index name %s", conf.index);
+
+	narenas = 0;
+	for(i = 0; i < conf.naparts; i++){
+		ap = conf.aparts[i];
+		narenas += ap->narenas;
+	}
+
+	if(add){
+		ix = initindex(conf.index, conf.sects, conf.nsects);
+		if(ix == nil)
+			sysfatal("can't initialize index %s: %r", conf.index);
+	}else{
+		ix = newindex(conf.index, conf.sects, conf.nsects);
+		if(ix == nil)
+			sysfatal("can't create new index %s: %r", conf.index);
+
+		n = 0;
+		for(i = 0; i < ix->nsects; i++)
+			n += ix->sects[i]->blocks;
+
+		if(0) fprint(2, "using %ud buckets of %ud; div=%d\n", ix->buckets, n, ix->div);
+	}
+	amap = MKNZ(AMap, narenas);
+	arenas = MKNZ(Arena*, narenas);
+
+	addr = IndexBase;
+	n = 0;
+	for(i = 0; i < conf.naparts; i++){
+		ap = conf.aparts[i];
+		for(j = 0; j < ap->narenas; j++){
+			if(n >= narenas)
+				sysfatal("too few slots in index's arena set");
+
+			arenas[n] = ap->arenas[j];
+			if(n < ix->narenas){
+				if(arenas[n] != ix->arenas[n])
+					sysfatal("mismatched arenas %s and %s at slot %d\n",
+						arenas[n]->name, ix->arenas[n]->name, n);
+				amap[n] = ix->amap[n];
+				if(amap[n].start != addr)
+					sysfatal("mis-located arena %s in index %s\n", arenas[n]->name, ix->name);
+				addr = amap[n].stop;
+			}else{
+				amap[n].start = addr;
+				addr += ap->arenas[j]->size;
+				amap[n].stop = addr;
+				namecp(amap[n].name, ap->arenas[j]->name);
+				if(0) fprint(2, "add arena %s at [%lld,%lld)\n",
+					amap[n].name, amap[n].start, amap[n].stop);
+			}
+
+			n++;
+		}
+	}
+	if(0){
+		fprint(2, "configured index=%s with arenas=%d and storage=%lld\n",
+			ix->name, n, addr - IndexBase);
+		fprint(2, "\tbitblocks=%d maxdepth=%d buckets=%d\n",
+			ix->bitblocks, ix->maxdepth, ix->buckets);
+	}
+	fprint(2, "fmtindex: %,d arenas, %,d index buckets, %,lld bytes storage\n",
+		n, ix->buckets, addr-IndexBase);
+
+	ix->amap = amap;
+	ix->arenas = arenas;
+	ix->narenas = narenas;
+
+	if(wbindex(ix) < 0)
+		fprint(2, "can't write back arena partition header for %s: %r\n", file);
+
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/fmtisect.c b/src/cmd/venti/srv/fmtisect.c
new file mode 100644
index 0000000..28b88de
--- /dev/null
+++ b/src/cmd/venti/srv/fmtisect.c
@@ -0,0 +1,83 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+void
+usage(void)
+{
+	fprint(2, "usage: fmtisect [-Z] [-b blocksize] name file\n");
+	threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int vers;
+	ISect *is;
+	Part *part;
+	char *file, *name;
+	int blocksize, setsize, zero;
+
+	ventifmtinstall();
+	statsinit();
+
+	blocksize = 8 * 1024;
+	setsize = 512 * 1024;
+	zero = -1;
+	vers = ISectVersion2;
+	ARGBEGIN{
+	case 'b':
+		blocksize = unittoull(ARGF());
+		if(blocksize == ~0)
+			usage();
+		if(blocksize > MaxDiskBlock){
+			fprint(2, "block size too large, max %d\n", MaxDiskBlock);
+			threadexitsall("usage");
+		}
+		break;
+	case '1':
+		vers = ISectVersion1;
+		break;
+	case 'Z':
+		zero = 0;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(zero == -1){
+		if(vers == ISectVersion1)
+			zero = 1;
+		else
+			zero = 0;
+	}
+
+	if(argc != 2)
+		usage();
+
+	name = argv[0];
+	file = argv[1];
+
+	if(nameok(name) < 0)
+		sysfatal("illegal name %s", name);
+
+	part = initpart(file, ORDWR|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open partition %s: %r", file);
+
+	if(zero)
+		zeropart(part, blocksize);
+
+	is = newisect(part, vers, name, blocksize, setsize);
+	if(is == nil)
+		sysfatal("can't initialize new index: %r");
+
+	fprint(2, "fmtisect %s: %,d buckets of %,d entries, %,d bytes for index map\n",
+		file, is->blocks, is->buckmax, setsize);
+
+	if(wbisect(is) < 0)
+		fprint(2, "can't write back index section header for %s: %r\n", file);
+
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/fns.h b/src/cmd/venti/srv/fns.h
new file mode 100644
index 0000000..f35580e
--- /dev/null
+++ b/src/cmd/venti/srv/fns.h
@@ -0,0 +1,206 @@
+/*
+ * sorted by 4,/^$/|sort -bd +1
+ */
+int		addarena(Arena *name);
+void		addstat(int, int);
+void		addstat2(int, int, int, int);
+ZBlock		*alloczblock(u32int size, int zeroed, uint alignment);
+Arena		*amapitoa(Index *index, u64int a, u64int *aa);
+u64int		arenadirsize(Arena *arena, u32int clumps);
+void		arenaupdate(Arena *arena, u32int size, u8int *score);
+void		backsumarena(Arena *arena);
+void	binstats(long (*fn)(Stats *s0, Stats *s1, void*), void *arg, long t0, long t1, Statbin *bin, int nbin);
+int		bloominit(Bloom*, vlong, uchar*);
+int		bucklook(u8int*, int, u8int*, int);
+u32int		buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint);
+void		checkdcache(void);
+void		checklumpcache(void);
+int		clumpinfoeq(ClumpInfo *c, ClumpInfo *d);
+int		clumpinfoeq(ClumpInfo *c, ClumpInfo *d);
+u32int		clumpmagic(Arena *arena, u64int aa);
+uint		countbits(uint n);
+int		delarena(Arena *arena);
+void		delaykickicache(void);
+void		delaykickround(Round*);
+void		delaykickroundproc(void*);
+void		dirtydblock(DBlock*, int);
+AState	diskstate(void);
+void		*emalloc(ulong);
+void		*erealloc(void *, ulong);
+char		*estrdup(char*);
+void		*ezmalloc(ulong);
+Arena		*findarena(char *name);
+int		flushciblocks(Arena *arena);
+void		flushdcache(void);
+void		flushicache(void);
+void		flushqueue(void);
+void		fmtzbinit(Fmt *f, ZBlock *b);
+void		freearena(Arena *arena);
+void		freearenapart(ArenaPart *ap, int freearenas);
+void		freeiestream(IEStream *ies);
+void		freeifile(IFile *f);
+void		freeisect(ISect *is);
+void		freeindex(Index *index);
+void		freepart(Part *part);
+void		freezblock(ZBlock *b);
+DBlock		*_getdblock(Part *part, u64int addr, int mode, int load);
+DBlock		*getdblock(Part *part, u64int addr, int mode);
+u32int		hashbits(u8int *score, int nbits);
+int		httpdinit(char *address, char *webroot);
+int		iaddrcmp(IAddr *ia1, IAddr *ia2);
+IEntry*	icachedirty(u32int, u32int, u64int);
+void		icacheclean(IEntry*);
+int		ientrycmp(const void *vie1, const void *vie2);
+char		*ifileline(IFile *f);
+int		ifilename(IFile *f, char *dst);
+int		ifileu32int(IFile *f, u32int *r);
+int		inbloomfilter(Bloom*, u8int*);
+int		indexsect(Index *ix, u8int *score);
+int		indexsect0(Index *ix, u32int buck);
+Arena		*initarena(Part *part, u64int base, u64int size, u32int blocksize);
+ArenaPart	*initarenapart(Part *part);
+int		initarenasum(void);
+void		initbloomfilter(Index*);
+void		initdcache(u32int mem);
+void		initicache(int bits, int depth);
+void		initicachewrite(void);
+IEStream	*initiestream(Part *part, u64int off, u64int clumps, u32int size);
+ISect		*initisect(Part *part);
+Index		*initindex(char *name, ISect **sects, int n);
+void		initlumpcache(u32int size, u32int nblocks);
+int		initlumpqueues(int nq);
+Part*		initpart(char *name, int mode);
+void		initround(Round*, char*, int);
+int		initventi(char *config, Config *conf);
+void		insertlump(Lump *lump, Packet *p);
+int		insertscore(u8int *score, IAddr *ia, int write);
+void		kickdcache(void);
+void		kickicache(void);
+void		kickround(Round*, int wait);
+ZBlock		*loadclump(Arena *arena, u64int aa, int blocks, Clump *cl, u8int *score, int verify);
+DBlock	*loadibucket(Index *index, u8int *score, ISect **is, u32int *buck, IBucket *ib);
+int		loadientry(Index *index, u8int *score, int type, IEntry *ie);
+void		logerr(int severity, char *fmt, ...);
+Lump		*lookuplump(u8int *score, int type);
+int		lookupscore(u8int *score, int type, IAddr *ia, int *rac);
+int		maparenas(AMap *am, Arena **arenas, int n, char *what);
+void		markbloomfilter(Bloom*, u8int*);
+uint		msec(void);
+int		namecmp(char *s, char *t);
+void		namecp(char *dst, char *src);
+int		nameok(char *name);
+Arena		*newarena(Part *part, u32int, char *name, u64int base, u64int size, u32int blocksize);
+ArenaPart	*newarenapart(Part *part, u32int blocksize, u32int tabsize);
+ISect		*newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize);
+Index		*newindex(char *name, ISect **sects, int n);
+u32int		now(void);
+int		okamap(AMap *am, int n, u64int start, u64int stop, char *what);
+int		okibucket(IBucket*, ISect*);
+int		outputamap(Fmt *f, AMap *am, int n);
+int		outputindex(Fmt *f, Index *ix);
+int		packarena(Arena *arena, u8int *buf);
+int		packarenahead(ArenaHead *head, u8int *buf);
+int		packarenapart(ArenaPart *as, u8int *buf);
+void		packbloomhead(Bloom*, u8int*);
+int		packclump(Clump *c, u8int *buf, u32int);
+void		packclumpinfo(ClumpInfo *ci, u8int *buf);
+void		packibucket(IBucket *b, u8int *buf, u32int magic);
+void		packientry(IEntry *i, u8int *buf);
+int		packisect(ISect *is, u8int *buf);
+void		packmagic(u32int magic, u8int *buf);
+ZBlock		*packet2zblock(Packet *p, u32int size);
+int		parseamap(IFile *f, AMapN *amn);
+int		parseindex(IFile *f, Index *ix);
+void		partblocksize(Part *part, u32int blocksize);
+int		partifile(IFile *f, Part *part, u64int start, u32int size);
+void		printarenapart(int fd, ArenaPart *ap);
+void		printarena(int fd, Arena *arena);
+void		printindex(int fd, Index *ix);
+void		printstats(void);
+void		putdblock(DBlock *b);
+void		putlump(Lump *b);
+int		queuewrite(Lump *b, Packet *p, int creator, uint ms);
+u32int		readarena(Arena *arena, u64int aa, u8int *buf, long n);
+int		readarenamap(AMapN *amn, Part *part, u64int base, u32int size);
+Bloom	*readbloom(Part*);
+int		readclumpinfo(Arena *arena, int clump, ClumpInfo *ci);
+int		readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n);
+ZBlock		*readfile(char *name);
+int		readifile(IFile *f, char *name);
+Packet		*readlump(u8int *score, int type, u32int size, int *cached);
+int		readpart(Part *part, u64int addr, u8int *buf, u32int n);
+int		runconfig(char *config, Config*);
+int		scorecmp(u8int *, u8int *);
+void		scoremem(u8int *score, u8int *buf, int size);
+void		setatailstate(AState*);
+void		setdcachestate(AState*);
+void		seterr(int severity, char *fmt, ...);
+void		setstat(int, long);
+void		settrace(char *type);
+u64int		sortrawientries(Index *ix, Part *tmp, u64int *tmpoff, Bloom *bloom);
+void		startbloomproc(Bloom*);
+Memimage*	statgraph(Graph *g);
+void		statsinit(void);
+int		storeclump(Index *index, ZBlock *b, u8int *score, int type, u32int creator, IAddr *ia);
+int		storeientry(Index *index, IEntry *m);
+int		strscore(char *s, u8int *score);
+int		stru32int(char *s, u32int *r);
+int		stru64int(char *s, u64int *r);
+void		sumarena(Arena *arena);
+int		syncarena(Arena *arena, u64int start, u32int n, int zok, int fix);
+int		syncarenaindex(Index *ix, Arena *arena, u32int clump, u64int a, int fix, int *pflush, int check);
+int		syncindex(Index *ix, int fix, int mustflushicache, int check);
+void		trace(char *type, char*, ...);
+void		traceinit(void);
+int		u64log2(u64int v);
+u64int		unittoull(char *s);
+int		unpackarena(Arena *arena, u8int *buf);
+int		unpackarenahead(ArenaHead *head, u8int *buf);
+int		unpackarenapart(ArenaPart *as, u8int *buf);
+int		unpackbloomhead(Bloom*, u8int*);
+int		unpackclump(Clump *c, u8int *buf, u32int);
+void		unpackclumpinfo(ClumpInfo *ci, u8int *buf);
+void		unpackibucket(IBucket *b, u8int *buf, u32int magic);
+void		unpackientry(IEntry *i, u8int *buf);
+int		unpackisect(ISect *is, u8int *buf);
+u32int		unpackmagic(u8int *buf);
+void		ventifmtinstall(void);
+void		vtloghdump(Hio*, VtLog*);
+void		vtloghlist(Hio*);
+int		vtproc(void(*)(void*), void*);
+int		vttypevalid(int type);
+void		waitforkick(Round*);
+int		wbarena(Arena *arena);
+int		wbarenahead(Arena *arena);
+int		wbarenamap(AMap *am, int n, Part *part, u64int base, u64int size);
+int		wbarenapart(ArenaPart *ap);
+void		wbbloomhead(Bloom*);
+int		wbisect(ISect *is);
+int		wbindex(Index *ix);
+int		whackblock(u8int *dst, u8int *src, int ssize);
+u64int		writeaclump(Arena *a, Clump *c, u8int *clbuf, u64int, u64int*);
+u32int		writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n);
+int		writebloom(Bloom*);
+int		writeclumpinfo(Arena *arean, int clump, ClumpInfo *ci);
+int		writepng(Hio*, Memimage*);
+u64int		writeiclump(Index *ix, Clump *c, u8int *clbuf, u64int*);
+int		writelump(Packet *p, u8int *score, int type, u32int creator, uint ms);
+int		writepart(Part *part, u64int addr, u8int *buf, u32int n);
+int		writeqlump(Lump *u, Packet *p, int creator, uint ms);
+Packet		*zblock2packet(ZBlock *zb, u32int size);
+void		zeropart(Part *part, int blocksize);
+
+/*
+#pragma	varargck	argpos	sysfatal		1
+#pragma	varargck	argpos	logerr		2
+#pragma	varargck	argpos	SetErr		2
+*/
+
+#define scorecmp(h1,h2)		memcmp((h1),(h2),VtScoreSize)
+#define scorecp(h1,h2)		memmove((h1),(h2),VtScoreSize)
+
+#define MK(t)			((t*)emalloc(sizeof(t)))
+#define MKZ(t)			((t*)ezmalloc(sizeof(t)))
+#define MKN(t,n)		((t*)emalloc((n)*sizeof(t)))
+#define MKNZ(t,n)		((t*)ezmalloc((n)*sizeof(t)))
+#define MKNA(t,at,n)		((t*)emalloc(sizeof(t) + (n)*sizeof(at)))
diff --git a/src/cmd/venti/srv/graph.c b/src/cmd/venti/srv/graph.c
new file mode 100644
index 0000000..157b82d
--- /dev/null
+++ b/src/cmd/venti/srv/graph.c
@@ -0,0 +1,202 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+enum
+{
+	Top = 1,
+	Bottom = 1,
+	Left = 40,
+	Right = 0,
+	MinWidth = Left+Right+2,
+	MinHeight = Top+Bottom+2,
+	DefaultWidth = Left+Right+500,
+	DefaultHeight = Top+Bottom+40
+};
+
+QLock memdrawlock;
+static Memsubfont *smallfont;
+static Memimage *black;
+static Memimage *blue;
+static Memimage *red;
+static Memimage *lofill[6];
+static Memimage *hifill[6];
+static Memimage *grid;
+
+static ulong fill[] = {
+	0xFFAAAAFF,	0xBB5D5DFF,	/* peach */
+	DPalegreygreen, DPurpleblue,	/* aqua */
+	DDarkyellow, DYellowgreen,	/* yellow */
+	DMedgreen, DDarkgreen,		/* green */
+	0x00AAFFFF, 0x0088CCFF,	/* blue */
+	0xCCCCCCFF, 0x888888FF,	/* grey */
+};
+
+Memimage*
+allocrepl(ulong color)
+{
+	Memimage *m;
+	
+	m = allocmemimage(Rect(0,0,1,1), RGB24);
+	memfillcolor(m, color);
+	m->flags |= Frepl;
+	m->clipr = Rect(-1000000, -1000000, 1000000, 1000000);
+	return m;
+}
+
+static void
+ginit(void)
+{
+	static int first = 1;
+	int i;
+	
+	if(!first)
+		return;
+		
+	first = 0;
+	memimageinit();
+	smallfont = openmemsubfont(unsharp("#9/font/lucidasans/lstr.10"));
+	black = memblack;
+	blue = allocrepl(DBlue);
+	red = allocrepl(DRed);
+	grid = allocrepl(0x77777777);
+	for(i=0; i<nelem(fill)/2 && i<nelem(lofill) && i<nelem(hifill); i++){
+		lofill[i] = allocrepl(fill[2*i]);
+		hifill[i] = allocrepl(fill[2*i+1]);
+	}
+}
+
+static void
+mklabel(char *str, int v)
+{
+	if(v < 0){
+		v = -v;
+		*str++ = '-';
+	}
+	if(v < 10000)
+		sprint(str, "%d", v);
+	else if(v < 10000000)
+		sprint(str, "%dk", v/1000);
+	else
+		sprint(str, "%dM", v/1000000);
+}
+
+static void
+drawlabel(Memimage *m, Point p, int n)
+{
+	char buf[30];
+	Point w;
+	
+	mklabel(buf, n);
+	w = memsubfontwidth(smallfont, buf);
+	memimagestring(m, Pt(p.x-5-w.x, p.y), memblack, ZP, smallfont, buf);
+}
+
+static int
+scalept(int val, int valmin, int valmax, int ptmin, int ptmax)
+{
+	if(val <= valmin)
+		val = valmin;
+	if(val >= valmax)
+		val = valmax;
+	if(valmax == valmin)
+		valmax++;
+	return ptmin + (vlong)(val-valmin)*(ptmax-ptmin)/(valmax-valmin);
+}
+
+Memimage*
+statgraph(Graph *g)
+{
+	int i, lastlo, nbin, x, lo, hi, min, max, first;
+	Memimage *m;
+	Rectangle r;
+	Statbin *b, bin[2000];	/* 32 kB, but whack is worse */
+
+	needstack(8192);	/* double check that bin didn't kill us */
+	
+	if(g->wid <= MinWidth)
+		g->wid = DefaultWidth;
+	if(g->ht <= MinHeight)
+		g->ht = DefaultHeight;
+	if(g->wid > nelem(bin))
+		g->wid = nelem(bin);
+	if(g->fill < 0)
+		g->fill = ((uint)g->arg>>8)%nelem(lofill);
+	if(g->fill > nelem(lofill))
+		g->fill %= nelem(lofill);
+	
+	nbin = g->wid - (Left+Right);
+	binstats(g->fn, g->arg, g->t0, g->t1, bin, nbin);
+
+	/*
+	 * compute bounds
+	 */
+	min = g->min;
+	max = g->max;
+	if(min < 0 || max <= min){
+		min = max = 0;
+		first = 1;
+		for(i=0; i<nbin; i++){
+			b = &bin[i];
+			if(b->nsamp == 0)
+				continue;
+			if(first || b->min < min)
+				min = b->min;
+			if(first || b->max > max)
+				max = b->max;
+			first = 0;
+		}
+	}
+
+	qlock(&memdrawlock);
+	ginit();
+	if(smallfont==nil || black==nil || blue==nil || red==nil || hifill==nil || lofill==nil){
+		werrstr("graphics initialization failed");
+		qunlock(&memdrawlock);
+		return nil;
+	}
+
+	/* fresh image */
+	m = allocmemimage(Rect(0,0,g->wid,g->ht), ABGR32);
+	if(m == nil){
+		qunlock(&memdrawlock);
+		return nil;
+	}
+	r = Rect(Left, Top, g->wid-Right, g->ht-Bottom);
+	memfillcolor(m, DTransparent);
+	
+	/* x axis */
+	memimagedraw(m, Rect(r.min.x, r.max.y, r.max.x, r.max.y+1), black, ZP, memopaque, ZP, S);
+
+	/* y labels */
+	drawlabel(m, r.min, max);
+	if(min != 0)
+		drawlabel(m, Pt(r.min.x, r.max.y-smallfont->height), min);
+	
+	/* actual data */
+	lastlo = -1;
+	for(i=0; i<nbin; i++){
+		b = &bin[i];
+		if(b->nsamp == 0)
+			continue;
+		lo = scalept(b->min, min, max, r.max.y, r.min.y);
+		hi = scalept(b->max, min, max, r.max.y, r.min.y);
+		x = r.min.x+i;
+		hi-=2;
+		if(0)
+		if(lastlo != -1){
+			if(lastlo < lo)
+				memimagedraw(m, Rect(x-1, lastlo, x, lo), hifill[g->fill], ZP, memopaque, ZP, S);
+			else if(lastlo > lo)
+				memimagedraw(m, Rect(x-1, lo, x, lastlo), hifill[g->fill], ZP, memopaque, ZP, S);
+		}
+		memimagedraw(m, Rect(x, hi, x+1,lo), hifill[g->fill], ZP, memopaque, ZP, S);
+		memimagedraw(m, Rect(x, lo, x+1, r.max.y), lofill[g->fill], ZP, memopaque, ZP, S);
+		lastlo = lo;
+	}
+
+	if(bin[nbin-1].nsamp)
+		drawlabel(m, Pt(r.max.x, r.min.y+(Dy(r)-smallfont->height)/2), bin[nbin-1].avg);
+	qunlock(&memdrawlock);
+	return m;
+}
diff --git a/src/cmd/venti/srv/httpd.c b/src/cmd/venti/srv/httpd.c
new file mode 100644
index 0000000..5f1a00e
--- /dev/null
+++ b/src/cmd/venti/srv/httpd.c
@@ -0,0 +1,988 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "xml.h"
+
+typedef struct HttpObj	HttpObj;
+extern QLock memdrawlock;
+
+enum
+{
+	ObjNameSize	= 64,
+	MaxObjs		= 16
+};
+
+struct HttpObj
+{
+	char	name[ObjNameSize];
+	int	(*f)(HConnect*);
+};
+
+static HttpObj	objs[MaxObjs];
+
+static char *webroot;
+
+static	void		listenproc(void*);
+static	int		estats(HConnect *c);
+static	int		dindex(HConnect *c);
+static	int		xindex(HConnect *c);
+static	int		xlog(HConnect *c);
+static	int		sindex(HConnect *c);
+static	int		hicacheflush(HConnect *c);
+static	int		hdcacheflush(HConnect *c);
+static	int		notfound(HConnect *c);
+static	int		httpdobj(char *name, int (*f)(HConnect*));
+static	int		xgraph(HConnect *c);
+static	int		xset(HConnect *c);
+static	int		fromwebdir(HConnect *c);
+
+int
+httpdinit(char *address, char *dir)
+{
+	fmtinstall('D', hdatefmt);
+/*	fmtinstall('H', httpfmt); */
+	fmtinstall('U', hurlfmt);
+
+	if(address == nil)
+		address = "tcp!*!http";
+	webroot = dir;
+	
+	httpdobj("/stats", estats);
+	httpdobj("/index", dindex);
+	httpdobj("/storage", sindex);
+	httpdobj("/xindex", xindex);
+	httpdobj("/flushicache", hicacheflush);
+	httpdobj("/flushdcache", hdcacheflush);
+	httpdobj("/graph/", xgraph);
+	httpdobj("/set/", xset);
+	httpdobj("/log", xlog);
+	httpdobj("/log/", xlog);
+
+	if(vtproc(listenproc, address) < 0)
+		return -1;
+	return 0;
+}
+
+static int
+httpdobj(char *name, int (*f)(HConnect*))
+{
+	int i;
+
+	if(name == nil || strlen(name) >= ObjNameSize)
+		return -1;
+	for(i = 0; i < MaxObjs; i++){
+		if(objs[i].name[0] == '\0'){
+			strcpy(objs[i].name, name);
+			objs[i].f = f;
+			return 0;
+		}
+		if(strcmp(objs[i].name, name) == 0)
+			return -1;
+	}
+	return -1;
+}
+
+static HConnect*
+mkconnect(void)
+{
+	HConnect *c;
+
+	c = mallocz(sizeof(HConnect), 1);
+	if(c == nil)
+		sysfatal("out of memory");
+	c->replog = nil;
+	c->hpos = c->header;
+	c->hstop = c->header;
+	return c;
+}
+
+void httpproc(void*);
+
+static void
+listenproc(void *vaddress)
+{
+	HConnect *c;
+	char *address, ndir[NETPATHLEN], dir[NETPATHLEN];
+	int ctl, nctl, data;
+
+//sleep(1000);	/* let strace find us */
+
+	address = vaddress;
+	ctl = announce(address, dir);
+	if(ctl < 0){
+		fprint(2, "venti: httpd can't announce on %s: %r\n", address);
+		return;
+	}
+
+	if(0) print("announce ctl %d dir %s\n", ctl, dir);
+	for(;;){
+		/*
+		 *  wait for a call (or an error)
+		 */
+		nctl = listen(dir, ndir);
+		if(0) print("httpd listen %d %s...\n", nctl, ndir);
+		if(nctl < 0){
+			fprint(2, "venti: httpd can't listen on %s: %r\n", address);
+			return;
+		}
+
+		data = accept(ctl, ndir);
+		if(0) print("httpd accept %d...\n", data);
+		if(data < 0){
+			fprint(2, "venti: httpd accept: %r\n");
+			close(nctl);
+			continue;
+		}
+		if(0) print("httpd close nctl %d\n", nctl);
+		close(nctl);
+		c = mkconnect();
+		hinit(&c->hin, data, Hread);
+		hinit(&c->hout, data, Hwrite);
+		vtproc(httpproc, c);
+	}
+}
+
+void
+httpproc(void *v)
+{
+	HConnect *c;
+	int ok, i, n;
+
+//sleep(1000);	/* let strace find us */
+	c = v;
+
+	for(;;){
+		/*
+		 * No timeout because the signal appears to hit every
+		 * proc, not just us.
+		 */
+		if(hparsereq(c, 0) < 0)
+			break;
+
+		ok = -1;
+		for(i = 0; i < MaxObjs && objs[i].name[0]; i++){
+			n = strlen(objs[i].name);
+			if((objs[i].name[n-1] == '/' && strncmp(c->req.uri, objs[i].name, n) == 0)
+			|| (objs[i].name[n-1] != '/' && strcmp(c->req.uri, objs[i].name) == 0)){
+				ok = (*objs[i].f)(c);
+				goto found;
+			}
+		}
+		ok = fromwebdir(c);
+	found:
+		if(c->head.closeit)
+			ok = -1;
+		hreqcleanup(c);
+
+		if(ok < 0)
+			break;
+	}
+	hreqcleanup(c);
+	close(c->hin.fd);
+	free(c);
+}
+
+static int
+percent(long v, long total)
+{
+	if(total == 0)
+		total = 1;
+	if(v < 1000*1000)
+		return (v * 100) / total;
+	total /= 100;
+	if(total == 0)
+		total = 1;
+	return v / total;
+}
+
+static int
+preq(HConnect *c)
+{
+	if(hparseheaders(c, 0) < 0)
+		return -1;
+	if(strcmp(c->req.meth, "GET") != 0
+	&& strcmp(c->req.meth, "HEAD") != 0)
+		return hunallowed(c, "GET, HEAD");
+	if(c->head.expectother || c->head.expectcont)
+		return hfail(c, HExpectFail, nil);
+	return 0;
+}
+
+static int
+preqtype(HConnect *c, char *type)
+{
+	Hio *hout;
+	int r;
+
+	r = preq(c);
+	if(r < 0)
+		return r;
+
+	hout = &c->hout;
+	if(c->req.vermaj){
+		hokheaders(c);
+		hprint(hout, "Content-type: %s\r\n", type);
+		if(http11(c))
+			hprint(hout, "Transfer-Encoding: chunked\r\n");
+		hprint(hout, "\r\n");
+	}
+
+	if(http11(c))
+		hxferenc(hout, 1);
+	else
+		c->head.closeit = 1;
+	return 0;
+}
+
+static int
+preqtext(HConnect *c)
+{
+	return preqtype(c, "text/plain");
+}
+
+static int
+notfound(HConnect *c)
+{
+	int r;
+
+	r = preq(c);
+	if(r < 0)
+		return r;
+	return hfail(c, HNotFound, c->req.uri);
+}
+
+struct {
+	char *ext;
+	char *type;
+} exttab[] = {
+	".html",	"text/html",
+	".txt",	"text/plain",
+	".xml",	"text/xml",
+	".png",	"image/png",
+	".gif",	"image/gif",
+	0
+};
+
+static int
+fromwebdir(HConnect *c)
+{
+	char buf[4096], *p, *ext, *type;
+	int i, fd, n, defaulted;
+	Dir *d;
+	
+	if(webroot == nil || strstr(c->req.uri, ".."))
+		return notfound(c);
+	snprint(buf, sizeof buf-20, "%s/%s", webroot, c->req.uri+1);
+	defaulted = 0;
+reopen:
+	if((fd = open(buf, OREAD)) < 0)
+		return notfound(c);
+	d = dirfstat(fd);
+	if(d == nil){
+		close(fd);
+		return notfound(c);
+	}
+	if(d->mode&DMDIR){
+		if(!defaulted){
+			defaulted = 1;
+			strcat(buf, "/index.html");
+			free(d);
+			close(fd);
+			goto reopen;
+		}
+		free(d);
+		return notfound(c);
+	}
+	free(d);
+	p = buf+strlen(buf);
+	type = "application/octet-stream";
+	for(i=0; exttab[i].ext; i++){
+		ext = exttab[i].ext;
+		if(p-strlen(ext) >= buf && strcmp(p-strlen(ext), ext) == 0){
+			type = exttab[i].type;
+			break;
+		}
+	}
+	if(preqtype(c, type) < 0){
+		close(fd);
+		return 0;
+	}
+	while((n = read(fd, buf, sizeof buf)) > 0)
+		if(hwrite(&c->hout, buf, n) < 0)
+			break;
+	close(fd);
+	hflush(&c->hout);
+	return 0;
+}
+
+static struct
+{
+	char *name;
+	int *p;
+} namedints[] =
+{
+	"compress",	&compressblocks,
+	"devnull",	&writestodevnull,
+	"logging",	&ventilogging,
+	"stats",	&collectstats,
+	"icachesleeptime",	&icachesleeptime,
+	"arenasumsleeptime",	&arenasumsleeptime,
+	0
+};
+
+static int
+xset(HConnect *c)
+{
+	int i, nf, r;
+	char *f[10], *s;
+
+	s = estrdup(c->req.uri);
+	nf = getfields(s+strlen("/set/"), f, nelem(f), 1, "/");
+
+	if(nf < 1)
+		return notfound(c);
+	for(i=0; namedints[i].name; i++){
+		if(strcmp(f[0], namedints[i].name) == 0){
+			if(nf >= 2)
+				*namedints[i].p = atoi(f[1]);
+			r = preqtext(c);
+			if(r < 0)
+				return r;
+			hprint(&c->hout, "%s = %d\n", f[0], *namedints[i].p);
+			hflush(&c->hout);
+			return 0;
+		}
+	}
+	return notfound(c);
+}
+
+static int
+estats(HConnect *c)
+{
+	Hio *hout;
+	int r;
+
+	r = preqtext(c);
+	if(r < 0)
+		return r;
+
+
+	hout = &c->hout;
+/*
+	hprint(hout, "lump writes=%,ld\n", stats.lumpwrites);
+	hprint(hout, "lump reads=%,ld\n", stats.lumpreads);
+	hprint(hout, "lump cache read hits=%,ld\n", stats.lumphit);
+	hprint(hout, "lump cache read misses=%,ld\n", stats.lumpmiss);
+
+	hprint(hout, "clump disk writes=%,ld\n", stats.clumpwrites);
+	hprint(hout, "clump disk bytes written=%,lld\n", stats.clumpbwrites);
+	hprint(hout, "clump disk bytes compressed=%,lld\n", stats.clumpbcomp);
+	hprint(hout, "clump disk reads=%,ld\n", stats.clumpreads);
+	hprint(hout, "clump disk bytes read=%,lld\n", stats.clumpbreads);
+	hprint(hout, "clump disk bytes uncompressed=%,lld\n", stats.clumpbuncomp);
+
+	hprint(hout, "clump directory disk writes=%,ld\n", stats.ciwrites);
+	hprint(hout, "clump directory disk reads=%,ld\n", stats.cireads);
+
+	hprint(hout, "index disk writes=%,ld\n", stats.indexwrites);
+	hprint(hout, "index disk reads=%,ld\n", stats.indexreads);
+	hprint(hout, "index disk bloom filter hits=%,ld %d%% falsemisses=%,ld %d%%\n",
+		stats.indexbloomhits,
+		percent(stats.indexbloomhits, stats.indexreads),
+		stats.indexbloomfalsemisses,
+		percent(stats.indexbloomfalsemisses, stats.indexreads));
+	hprint(hout, "bloom filter bits=%,ld of %,ld %d%%\n",
+		stats.bloomones, stats.bloombits, percent(stats.bloomones, stats.bloombits));
+	hprint(hout, "index disk reads for modify=%,ld\n", stats.indexwreads);
+	hprint(hout, "index disk reads for allocation=%,ld\n", stats.indexareads);
+	hprint(hout, "index block splits=%,ld\n", stats.indexsplits);
+
+	hprint(hout, "index cache lookups=%,ld\n", stats.iclookups);
+	hprint(hout, "index cache hits=%,ld %d%%\n", stats.ichits,
+		percent(stats.ichits, stats.iclookups));
+	hprint(hout, "index cache fills=%,ld %d%%\n", stats.icfills,
+		percent(stats.icfills, stats.iclookups));
+	hprint(hout, "index cache inserts=%,ld\n", stats.icinserts);
+
+	hprint(hout, "disk cache hits=%,ld\n", stats.pchit);
+	hprint(hout, "disk cache misses=%,ld\n", stats.pcmiss);
+	hprint(hout, "disk cache reads=%,ld\n", stats.pcreads);
+	hprint(hout, "disk cache bytes read=%,lld\n", stats.pcbreads);
+
+	hprint(hout, "disk cache writes=%,ld\n", stats.dirtydblocks);
+	hprint(hout, "disk cache writes absorbed=%,ld %d%%\n", stats.absorbedwrites,
+		percent(stats.absorbedwrites, stats.dirtydblocks));
+
+	hprint(hout, "disk cache flushes=%,ld\n", stats.dcacheflushes);
+	hprint(hout, "disk cache flush writes=%,ld (%,ld per flush)\n", 
+		stats.dcacheflushwrites,
+		stats.dcacheflushwrites/(stats.dcacheflushes ? stats.dcacheflushes : 1));
+
+	hprint(hout, "disk writes=%,ld\n", stats.diskwrites);
+	hprint(hout, "disk bytes written=%,lld\n", stats.diskbwrites);
+	hprint(hout, "disk reads=%,ld\n", stats.diskreads);
+	hprint(hout, "disk bytes read=%,lld\n", stats.diskbreads);
+*/
+
+	hflush(hout);
+	return 0;
+}
+
+static int
+sindex(HConnect *c)
+{
+	Hio *hout;
+	Index *ix;
+	Arena *arena;
+	vlong clumps, cclumps, uncsize, used, size;
+	int i, r, active;
+
+	r = preqtext(c);
+	if(r < 0)
+		return r;
+	hout = &c->hout;
+
+	ix = mainindex;
+
+	hprint(hout, "index=%s\n", ix->name);
+
+	active = 0;
+	clumps = 0;
+	cclumps = 0;
+	uncsize = 0;
+	used = 0;
+	size = 0;
+	for(i = 0; i < ix->narenas; i++){
+		arena = ix->arenas[i];
+		if(arena != nil && arena->memstats.clumps != 0){
+			active++;
+			clumps += arena->memstats.clumps;
+			cclumps += arena->memstats.cclumps;
+			uncsize += arena->memstats.uncsize;
+			used += arena->memstats.used;
+		}
+		size += arena->size;
+	}
+	hprint(hout, "total arenas=%,d active=%,d\n", ix->narenas, active);
+	hprint(hout, "total space=%,lld used=%,lld\n", size, used + clumps * ClumpInfoSize);
+	hprint(hout, "clumps=%,lld compressed clumps=%,lld data=%,lld compressed data=%,lld\n",
+		clumps, cclumps, uncsize, used - clumps * ClumpSize);
+	hflush(hout);
+	return 0;
+}
+
+static void
+darena(Hio *hout, Arena *arena)
+{
+	hprint(hout, "arena='%s' on %s at [%lld,%lld)\n\tversion=%d created=%d modified=%d",
+		arena->name, arena->part->name, arena->base, arena->base + arena->size + 2 * arena->blocksize,
+		arena->version, arena->ctime, arena->wtime);
+	if(arena->memstats.sealed)
+		hprint(hout, " mem=sealed");
+	if(arena->diskstats.sealed)
+		hprint(hout, " disk=sealed");
+	hprint(hout, "\n");
+	if(scorecmp(zeroscore, arena->score) != 0)
+		hprint(hout, "\tscore=%V\n", arena->score);
+
+	hprint(hout, "\tmem: clumps=%d compressed clumps=%d data=%,lld compressed data=%,lld storage=%,lld\n",
+		arena->memstats.clumps, arena->memstats.cclumps, arena->memstats.uncsize,
+		arena->memstats.used - arena->memstats.clumps * ClumpSize,
+		arena->memstats.used + arena->memstats.clumps * ClumpInfoSize);
+	hprint(hout, "\tdisk: clumps=%d compressed clumps=%d data=%,lld compressed data=%,lld storage=%,lld\n",
+		arena->diskstats.clumps, arena->diskstats.cclumps, arena->diskstats.uncsize,
+		arena->diskstats.used - arena->diskstats.clumps * ClumpSize,
+		arena->diskstats.used + arena->diskstats.clumps * ClumpInfoSize);
+}
+
+static int
+hicacheflush(HConnect *c)
+{
+	Hio *hout;
+	int r;
+
+	r = preqtext(c);
+	if(r < 0)
+		return r;
+	hout = &c->hout;
+
+	flushicache();
+	hprint(hout, "flushed icache\n");
+	hflush(hout);
+	return 0;
+}
+
+static int
+hdcacheflush(HConnect *c)
+{
+	Hio *hout;
+	int r;
+
+	r = preqtext(c);
+	if(r < 0)
+		return r;
+	hout = &c->hout;
+
+	flushdcache();
+	hprint(hout, "flushed dcache\n");
+	hflush(hout);
+	return 0;
+}
+
+static int
+dindex(HConnect *c)
+{
+	Hio *hout;
+	Index *ix;
+	int i, r;
+
+	r = preqtext(c);
+	if(r < 0)
+		return r;
+	hout = &c->hout;
+
+
+	ix = mainindex;
+	hprint(hout, "index=%s version=%d blocksize=%d tabsize=%d\n",
+		ix->name, ix->version, ix->blocksize, ix->tabsize);
+	hprint(hout, "\tbuckets=%d div=%d\n", ix->buckets, ix->div);
+	for(i = 0; i < ix->nsects; i++)
+		hprint(hout, "\tsect=%s for buckets [%lld,%lld) buckmax=%d\n", ix->smap[i].name, ix->smap[i].start, ix->smap[i].stop, ix->sects[i]->buckmax);
+	for(i = 0; i < ix->narenas; i++){
+		if(ix->arenas[i] != nil && ix->arenas[i]->memstats.clumps != 0){
+			hprint(hout, "arena=%s at index [%lld,%lld)\n\t", ix->amap[i].name, ix->amap[i].start, ix->amap[i].stop);
+			darena(hout, ix->arenas[i]);
+		}
+	}
+	hflush(hout);
+	return 0;
+}
+
+typedef struct Arg Arg;
+struct Arg
+{
+	int index;
+	int index2;
+};
+
+static long
+rawgraph(Stats *s, Stats *t, void *va)
+{
+	Arg *a;
+
+	a = va;
+	return t->n[a->index];
+}
+
+static long
+diffgraph(Stats *s, Stats *t, void *va)
+{
+	Arg *a;
+
+	a = va;
+	return t->n[a->index] - s->n[a->index];
+}
+
+static long
+pctgraph(Stats *s, Stats *t, void *va)
+{
+	Arg *a;
+
+	a = va;
+	return percent(t->n[a->index], t->n[a->index2]);
+}
+
+static long
+pctdiffgraph(Stats *s, Stats *t, void *va)
+{
+	Arg *a;
+
+	a = va;
+	return percent(t->n[a->index]-s->n[a->index], t->n[a->index2]-s->n[a->index2]);
+}
+
+static long
+netbw(Stats *s)
+{
+	ulong *n;
+
+	n = s->n;
+	return n[StatRpcReadBytes]+n[StatRpcWriteBytes];	/* not exactly right */
+}
+
+static long
+diskbw(Stats *s)
+{
+	ulong *n;
+
+	n = s->n;
+	return n[StatApartReadBytes]+n[StatApartWriteBytes]	
+		+ n[StatIsectReadBytes]+n[StatIsectWriteBytes]
+		+ n[StatSumReadBytes];
+}
+
+static long
+iobw(Stats *s)
+{
+	return netbw(s)+diskbw(s);
+}
+
+static long
+diskgraph(Stats *s, Stats *t, void *va)
+{
+	USED(va);
+	return diskbw(t)-diskbw(s);
+}
+
+static long
+netgraph(Stats *s, Stats *t, void *va)
+{
+	USED(va);
+	return netbw(t)-netbw(s);
+}
+
+static long
+iograph(Stats *s, Stats *t, void *va)
+{
+	USED(va);
+	return iobw(t)-iobw(s);
+}
+
+
+static char* graphname[] =
+{
+	"rpctotal",
+	"rpcread",
+	"rpcreadok",
+	"rpcreadfail",
+	"rpcreadbyte",
+	"rpcreadtime",
+	"rpcreadcached",
+	"rpcreadcachedtime",
+	"rpcreaduncached",
+	"rpcreaduncachedtime",
+	"rpcwrite",
+	"rpcwritenew",
+	"rpcwriteold",
+	"rpcwritefail",
+	"rpcwritebyte",
+	"rpcwritetime",
+	"rpcwritenewtime",
+	"rpcwriteoldtime",
+
+	"lcachehit",
+	"lcachemiss",
+	"lcachelookup",
+	"lcachewrite",
+	"lcachesize",
+	"lcachestall",
+	"lcachelookuptime",
+	
+	"dcachehit",
+	"dcachemiss",
+	"dcachelookup",
+	"dcacheread",
+	"dcachewrite",
+	"dcachedirty",
+	"dcachesize",
+	"dcacheflush",
+	"dcachestall",
+	"dcachelookuptime",
+
+	"dblockstall",
+	"lumpstall",
+
+	"icachehit",
+	"icachemiss",
+	"icachelookup",
+	"icachewrite",
+	"icachefill",
+	"icacheprefetch",
+	"icachedirty",
+	"icachesize",
+	"icacheflush",
+	"icachestall",
+	"icachelookuptime",
+
+	"bloomhit",
+	"bloommiss",
+	"bloomfalsemiss",
+	"bloomlookup",
+	"bloomones",
+	"bloombits",
+	"bloomlookuptime",
+
+	"apartread",
+	"apartreadbyte",
+	"apartwrite",
+	"apartwritebyte",
+
+	"isectread",
+	"isectreadbyte",
+	"isectwrite",
+	"isectwritebyte",
+
+	"sumread",
+	"sumreadbyte",
+};
+
+static int
+findname(char *s)
+{
+	int i;
+
+	for(i=0; i<nelem(graphname); i++)
+		if(strcmp(graphname[i], s) == 0)
+			return i;
+fprint(2, "no name '%s'\n", s);
+	return -1;
+}
+
+static void
+dotextbin(Hio *io, Graph *g)
+{
+	int i, nbin;
+	Statbin *b, bin[2000];	/* 32 kB, but whack is worse */
+
+	needstack(8192);	/* double check that bin didn't kill us */
+	nbin = 100;
+	binstats(g->fn, g->arg, g->t0, g->t1, bin, nbin);
+
+	hprint(io, "stats\n\n");
+	for(i=0; i<nbin; i++){
+		b = &bin[i];
+		hprint(io, "%d: nsamp=%d min=%d max=%d avg=%d\n",
+			i, b->nsamp, b->min, b->max, b->avg);
+	}
+}
+
+static int
+xgraph(HConnect *c)
+{
+	char *f[20], *s;
+	Hio *hout;
+	Memimage *m;
+	int i, nf, dotext;
+	Graph g;
+	Arg arg;
+
+	s = estrdup(c->req.uri);
+if(0) fprint(2, "graph %s\n" ,s);
+	memset(&g, 0, sizeof g);
+	nf = getfields(s+strlen("/graph/"), f, nelem(f), 1, "/");
+	if(nf < 1)
+		goto notfound;
+	if((arg.index = findname(f[0])) == -1 && strcmp(f[0], "*") != 0)
+		goto notfound;
+	g.arg = &arg;
+	g.t0 = -120;
+	g.t1 = 0;
+	g.min = -1;
+	g.max = -1;
+	g.fn = rawgraph;
+	g.wid = -1;
+	g.ht = -1;
+	dotext = 0;
+	g.fill = -1;
+	for(i=1; i<nf; i++){
+		if(strncmp(f[i], "t0=", 3) == 0)
+			g.t0 = atoi(f[i]+3);
+		else if(strncmp(f[i], "t1=", 3) == 0)
+			g.t1 = atoi(f[i]+3);
+		else if(strncmp(f[i], "min=", 4) == 0)
+			g.min = atoi(f[i]+4);
+		else if(strncmp(f[i], "max=", 4) == 0)
+			g.max = atoi(f[i]+4);
+		else if(strncmp(f[i], "pct=", 4) == 0){
+			if((arg.index2 = findname(f[i]+4)) == -1)
+				goto notfound;
+			g.fn = pctgraph;
+			g.min = 0;
+			g.max = 100;
+		}else if(strncmp(f[i], "pctdiff=", 8) == 0){
+			if((arg.index2 = findname(f[i]+8)) == -1)
+				goto notfound;
+			g.fn = pctdiffgraph;
+			g.min = 0;
+			g.max = 100;
+		}else if(strcmp(f[i], "diff") == 0)
+			g.fn = diffgraph;
+		else if(strcmp(f[i], "text") == 0)
+			dotext = 1;
+		else if(strncmp(f[i], "wid=", 4) == 0)
+			g.wid = atoi(f[i]+4);
+		else if(strncmp(f[i], "ht=", 3) == 0)
+			g.ht = atoi(f[i]+3);
+		else if(strncmp(f[i], "fill=", 5) == 0)
+			g.fill = atoi(f[i]+5);
+		else if(strcmp(f[i], "diskbw") == 0)
+			g.fn = diskgraph;
+		else if(strcmp(f[i], "iobw") == 0)
+			g.fn = iograph;
+		else if(strcmp(f[i], "netbw") == 0)
+			g.fn = netgraph;
+	}
+	if(dotext){
+		preqtype(c, "text/plain");
+		dotextbin(&c->hout, &g);
+		hflush(&c->hout);
+		return 0;
+	}
+
+	m = statgraph(&g);
+	if(m == nil)
+		goto notfound;
+
+	if(preqtype(c, "image/png") < 0)
+		return -1;
+	hout = &c->hout;
+	writepng(hout, m);
+	qlock(&memdrawlock);
+	freememimage(m);
+	qunlock(&memdrawlock);
+	hflush(hout);
+	free(s);
+	return 0;
+
+notfound:
+	free(s);
+	return notfound(c);
+}
+
+static int
+xloglist(HConnect *c)
+{
+	if(preqtype(c, "text/html") < 0)
+		return -1;
+	vtloghlist(&c->hout);
+	hflush(&c->hout);
+	return 0;
+}
+
+static int
+xlog(HConnect *c)
+{
+	char *name;
+	VtLog *l;
+
+	if(strcmp(c->req.uri, "/log") == 0 || strcmp(c->req.uri, "/log/") == 0)
+		return xloglist(c);
+	if(strncmp(c->req.uri, "/log/", 5) != 0)
+		return notfound(c);
+	name = c->req.uri + strlen("/log/");
+	l = vtlogopen(name, 0);
+	if(l == nil)
+		return notfound(c);
+	if(preqtype(c, "text/html") < 0){
+		vtlogclose(l);
+		return -1;
+	}
+	vtloghdump(&c->hout, l);
+	vtlogclose(l);
+	hflush(&c->hout);
+	return 0;
+}
+
+static int
+xindex(HConnect *c)
+{
+	if(preqtype(c, "text/xml") < 0)
+		return -1;
+	xmlindex(&c->hout, mainindex, "index", 0);
+	hflush(&c->hout);
+	return 0;
+}
+
+void
+xmlindent(Hio *hout, int indent)
+{
+	int i;
+
+	for(i = 0; i < indent; i++)
+		hputc(hout, '\t');
+}
+
+void
+xmlaname(Hio *hout, char *v, char *tag)
+{
+	hprint(hout, " %s=\"%s\"", tag, v);
+}
+
+void
+xmlscore(Hio *hout, u8int *v, char *tag)
+{
+	if(scorecmp(zeroscore, v) == 0)
+		return;
+	hprint(hout, " %s=\"%V\"", tag, v);
+}
+
+void
+xmlsealed(Hio *hout, int v, char *tag)
+{
+	if(!v)
+		return;
+	hprint(hout, " %s=\"yes\"", tag);
+}
+
+void
+xmlu32int(Hio *hout, u32int v, char *tag)
+{
+	hprint(hout, " %s=\"%ud\"", tag, v);
+}
+
+void
+xmlu64int(Hio *hout, u64int v, char *tag)
+{
+	hprint(hout, " %s=\"%llud\"", tag, v);
+}
+
+void
+vtloghdump(Hio *h, VtLog *l)
+{
+	int i;
+	VtLogChunk *c;
+	char *name;
+	
+	name = l ? l->name : "&lt;nil&gt;";
+
+fprint(2, "hdump xfer %d\n", h->xferenc);
+	hprint(h, "<html><head>\n");
+	hprint(h, "<title>Venti Server Log: %s</title>\n", name);
+	hprint(h, "</head><body>\n");
+	hprint(h, "<b>Venti Server Log: %s</b>\n<p>\n", name);
+	
+	if(l){
+		c = l->w;
+		for(i=0; i<l->nchunk; i++){
+			if(++c == l->chunk+l->nchunk)
+				c = l->chunk;
+			hwrite(h, c->p, c->wp-c->p);
+		}
+	}
+	hprint(h, "</body></html>\n");
+}
+
+static int
+strpcmp(const void *va, const void *vb)
+{
+	return strcmp(*(char**)va, *(char**)vb);
+}
+
+void
+vtloghlist(Hio *h)
+{
+	char **p;
+	int i, n;
+	
+	hprint(h, "<html><head>\n");
+	hprint(h, "<title>Venti Server Logs</title>\n");
+	hprint(h, "</head><body>\n");
+	hprint(h, "<b>Venti Server Logs</b>\n<p>\n");
+	
+	p = vtlognames(&n);
+	qsort(p, n, sizeof(p[0]), strpcmp);
+	for(i=0; i<n; i++)
+		hprint(h, "<a href=\"/log/%s\">%s</a><br>\n", p[i], p[i]);
+	vtfree(p);
+	hprint(h, "</body></html>\n");
+}
diff --git a/src/cmd/venti/srv/icache.c b/src/cmd/venti/srv/icache.c
new file mode 100644
index 0000000..46d411e
--- /dev/null
+++ b/src/cmd/venti/srv/icache.c
@@ -0,0 +1,348 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+typedef struct ICache ICache;
+struct ICache
+{
+	QLock	lock;			/* locks hash table & all associated data */
+	Rendez	full;
+	IEntry	**heads;		/* heads of all the hash chains */
+	int	bits;			/* bits to use for indexing heads */
+	u32int	size;			/* number of heads; == 1 << bits, should be < entries */
+	IEntry	*base;			/* all allocated hash table entries */
+	u32int	entries;		/* elements in base */
+	IEntry	*dirty;		/* chain of dirty elements */
+	u32int	ndirty;
+	u32int	maxdirty;
+	u32int	unused;			/* index of first unused element in base */
+	u32int	stolen;			/* last head from which an element was stolen */
+
+	Arena	*last[4];
+	Arena	*lastload;
+	int		nlast;
+};
+
+static ICache icache;
+
+static IEntry	*icachealloc(IAddr *ia, u8int *score);
+
+/*
+ * bits is the number of bits in the icache hash table
+ * depth is the average depth
+ * memory usage is about (1<<bits) * depth * sizeof(IEntry) + (1<<bits) * sizeof(IEntry*)
+ */
+void
+initicache(int bits, int depth)
+{
+	icache.bits = bits;
+	icache.size = 1 << bits;
+	icache.entries = depth * icache.size;
+	icache.maxdirty = icache.entries/2;
+	icache.base = MKNZ(IEntry, icache.entries);
+	icache.heads = MKNZ(IEntry*, icache.size);
+	icache.full.l = &icache.lock;
+	setstat(StatIcacheSize, icache.entries);
+}
+
+u32int
+hashbits(u8int *sc, int bits)
+{
+	u32int v;
+
+	v = (sc[0] << 24) | (sc[1] << 16) | (sc[2] << 8) | sc[3];
+	if(bits < 32)
+		 v >>= (32 - bits);
+	return v;
+}
+
+static void
+loadarenaclumps(Arena *arena, u64int aa)
+{
+	ulong i;
+	ClumpInfo ci;
+	IAddr ia;
+
+fprint(2, "seed index cache with arena @%llud, (map %llud), %d clumps\n", arena->base, aa, arena->memstats.clumps);
+	for(i=0; i<arena->memstats.clumps; i++){
+		if(readclumpinfo(arena, i, &ci) < 0)
+			break;
+		ia.type = ci.type;
+		ia.size = ci.uncsize;
+		ia.blocks = (ci.size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
+		ia.addr = aa;
+		aa += ClumpSize + ci.size;
+		if(ia.type != VtCorruptType)
+			insertscore(ci.score, &ia, 0);
+	}
+}
+
+/*
+ZZZ need to think about evicting the correct IEntry,
+and writing back the wtime.
+ * look up data score in the index cache
+ * if this fails, pull it in from the disk index table, if it exists.
+ *
+ * must be called with the lump for this score locked
+ */
+int
+lookupscore(u8int *score, int type, IAddr *ia, int *rac)
+{
+	IEntry d, *ie, *last;
+	u32int h;
+	u64int aa;
+	Arena *load;
+	int i;
+	uint ms;
+
+	load = nil;
+	aa = 0;
+	ms = msec();
+	
+	trace(TraceLump, "lookupscore %V.%d", score, type);
+
+	qlock(&icache.lock);
+	h = hashbits(score, icache.bits);
+	last = nil;
+	for(ie = icache.heads[h]; ie != nil; ie = ie->next){
+		if(ie->ia.type == type && scorecmp(ie->score, score)==0){
+			if(last != nil)
+				last->next = ie->next;
+			else
+				icache.heads[h] = ie->next;
+			addstat(StatIcacheHit, 1);
+			ie->rac = 1;
+			trace(TraceLump, "lookupscore incache");
+			goto found;
+		}
+		last = ie;
+	}
+	addstat(StatIcacheMiss, 1);
+	qunlock(&icache.lock);
+
+	if(loadientry(mainindex, score, type, &d) < 0){
+		ms = msec() - ms;
+		addstat2(StatIcacheRead, 1, StatIcacheReadTime, ms);
+		return -1;
+	}
+
+	addstat(StatIcacheFill, 1);
+
+	trace(TraceLump, "lookupscore loaded");
+
+	/*
+	 * no one else can load an entry for this score,
+	 * since we have the overall score lock.
+	 */
+	qlock(&icache.lock);
+
+	/*
+	 * If we notice that all the hits are coming from one arena,
+	 * load the table of contents for that arena into the cache.
+	 */
+	ie = icachealloc(&d.ia, score);
+	icache.last[icache.nlast++%nelem(icache.last)] = amapitoa(mainindex, ie->ia.addr, &aa);
+	aa = ie->ia.addr - aa;	/* compute base addr of arena */
+	for(i=0; i<nelem(icache.last); i++)
+		if(icache.last[i] != icache.last[0])
+			break;
+	if(i==nelem(icache.last) && icache.lastload != icache.last[0]){
+		load = icache.last[0];
+		icache.lastload = load;
+	}
+
+found:
+	ie->next = icache.heads[h];
+	icache.heads[h] = ie;
+
+	*ia = ie->ia;
+	*rac = ie->rac;
+
+	qunlock(&icache.lock);
+
+	if(load){
+		trace(TraceProc, "preload 0x%llux", aa);
+		loadarenaclumps(load, aa);
+	}
+	ms = msec() - ms;
+	addstat2(StatIcacheRead, 1, StatIcacheReadTime, ms);
+
+	return 0;
+}
+
+/*
+ * insert a new element in the hash table.
+ */
+int
+insertscore(u8int *score, IAddr *ia, int write)
+{
+	IEntry *ie, se;
+	u32int h;
+
+	trace(TraceLump, "insertscore enter");
+	if(write)
+		addstat(StatIcacheWrite, 1);
+	else
+		addstat(StatIcachePrefetch, 1);
+
+	qlock(&icache.lock);
+	h = hashbits(score, icache.bits);
+
+	ie = icachealloc(ia, score);
+	if(write){
+		icache.ndirty++;
+		setstat(StatIcacheDirty, icache.ndirty);
+		delaykickicache();
+		ie->dirty = 1;
+	}
+	ie->next = icache.heads[h];
+	icache.heads[h] = ie;
+
+	se = *ie;
+	qunlock(&icache.lock);
+
+	if(write && icache.ndirty >= icache.maxdirty)
+		kickicache();
+
+	/*
+	 * It's okay not to do this under icache.lock.
+	 * Calling insertscore only happens when we hold
+	 * the lump, meaning any searches for this block
+	 * will hit in the lump cache until after we return.
+	 */
+	markbloomfilter(mainindex->bloom, score);
+
+	return 0;
+}
+
+/*
+ * allocate a index cache entry which hasn't been used in a while.
+ * must be called with icache.lock locked
+ * if the score is already in the table, update the entry.
+ */
+static IEntry *
+icachealloc(IAddr *ia, u8int *score)
+{
+	int i;
+	IEntry *ie, *last, *clean, *lastclean;
+	u32int h;
+
+	h = hashbits(score, icache.bits);
+	last = nil;
+	for(ie = icache.heads[h]; ie != nil; ie = ie->next){
+		if(ie->ia.type == ia->type && scorecmp(ie->score, score)==0){
+			if(last != nil)
+				last->next = ie->next;
+			else
+				icache.heads[h] = ie->next;
+			trace(TraceLump, "icachealloc hit");
+			ie->rac = 1;
+			return ie;
+		}
+		last = ie;
+	}
+
+	h = icache.unused;
+	if(h < icache.entries){
+		ie = &icache.base[h++];
+		icache.unused = h;
+		trace(TraceLump, "icachealloc unused");
+		goto Found;
+	}
+
+	h = icache.stolen;
+	for(i=0;; i++){
+		h++;
+		if(h >= icache.size)
+			h = 0;
+		if(i == icache.size){
+			trace(TraceLump, "icachealloc sleep");
+			addstat(StatIcacheStall, 1);
+			while(icache.ndirty == icache.entries){
+				/*
+				 * This is a bit suspect.  Kickicache will wake up the
+				 * icachewritecoord, but if all the index entries are for
+				 * unflushed disk blocks, icachewritecoord won't be
+				 * able to do much.  It always rewakes everyone when
+				 * it thinks it is done, though, so at least we'll go around
+				 * the while loop again.  Also, if icachewritecoord sees
+				 * that the disk state hasn't change at all since the last
+				 * time around, it kicks the disk.  This needs to be
+				 * rethought, but it shouldn't deadlock anymore.
+				 */
+				kickicache();
+				rsleep(&icache.full);
+			}
+			addstat(StatIcacheStall, -1);
+			i = 0;
+		}
+		lastclean = nil;
+		clean = nil;
+		last = nil;
+		for(ie=icache.heads[h]; ie; last=ie, ie=ie->next){
+			if(!ie->dirty){
+				clean = ie;
+				lastclean = last;
+			}
+		}
+		if(clean){
+			if(lastclean)
+				lastclean->next = clean->next;
+			else
+				icache.heads[h] = clean->next;
+			clean->next = nil;
+			icache.stolen = h;
+			ie = clean;
+			trace(TraceLump, "icachealloc steal");
+			goto Found;
+		}
+	}
+
+Found:
+	ie->ia = *ia;
+	scorecp(ie->score, score);
+	ie->rac = 0;	
+	return ie;
+}
+
+IEntry*
+icachedirty(u32int lo, u32int hi, u64int limit)
+{
+	int i;
+	u32int h;
+	IEntry *ie, *dirty;
+
+	dirty = nil;
+	trace(TraceProc, "icachedirty enter");
+	qlock(&icache.lock);
+	for(i=0; i<icache.size; i++)
+	for(ie = icache.heads[i]; ie; ie=ie->next)
+		if(ie->dirty && ie->ia.addr != 0 && ie->ia.addr < limit){
+			h = hashbits(ie->score, 32);
+			if(lo <= h && h <= hi){
+				ie->nextdirty = dirty;
+				dirty = ie;
+			}
+		}
+	qunlock(&icache.lock);
+	trace(TraceProc, "icachedirty exit");
+	if(dirty == nil)
+		flushdcache();
+	return dirty;
+}
+
+void
+icacheclean(IEntry *ie)
+{
+	trace(TraceProc, "icachedirty enter");
+	qlock(&icache.lock);
+	for(; ie; ie=ie->nextdirty){
+		icache.ndirty--;
+		ie->dirty = 0;
+	}
+	setstat(StatIcacheDirty, icache.ndirty);
+	rwakeupall(&icache.full);
+	qunlock(&icache.lock);
+	trace(TraceProc, "icachedirty exit");
+}
+
diff --git a/src/cmd/venti/srv/icachewrite.c b/src/cmd/venti/srv/icachewrite.c
new file mode 100644
index 0000000..900af87
--- /dev/null
+++ b/src/cmd/venti/srv/icachewrite.c
@@ -0,0 +1,318 @@
+/*
+ * Write the dirty icache entries to disk.  Random seeks are
+ * so expensive that it makes sense to wait until we have
+ * a lot and then just make a sequential pass over the disk.
+ */
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static void icachewriteproc(void*);
+static void icachewritecoord(void*);
+static IEntry *iesort(IEntry*);
+
+int icachesleeptime = 1000;	/* milliseconds */
+
+enum
+{
+	Bufsize = 8*1024*1024
+};
+
+typedef struct IWrite IWrite;
+struct IWrite
+{
+	Round round;
+	AState as;
+};
+
+static IWrite iwrite;
+
+void
+initicachewrite(void)
+{
+	int i;
+	Index *ix;
+
+	initround(&iwrite.round, "icache", 120*60*1000);
+	ix = mainindex;
+	for(i=0; i<ix->nsects; i++){
+		ix->sects[i]->writechan = chancreate(sizeof(ulong), 1);
+		ix->sects[i]->writedonechan = chancreate(sizeof(ulong), 1);
+		vtproc(icachewriteproc, ix->sects[i]);
+	}
+	vtproc(icachewritecoord, nil);
+	vtproc(delaykickroundproc, &iwrite.round);
+}
+
+static IEntry*
+nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf)
+{
+	u64int addr, naddr;
+	uint nbuf;
+	int bsize;
+	IEntry *iefirst, *ie, **l;
+
+	bsize = 1<<is->blocklog;
+	iefirst = *pie;
+	addr = is->blockbase + ((u64int)(hashbits(iefirst->score, 32) / ix->div - is->start) << is->blocklog);
+	nbuf = 0;
+	for(l=&iefirst->nextdirty; (ie=*l)!=nil; l=&(*l)->nextdirty){
+		naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog);
+		if(naddr - addr >= Bufsize)
+			break;
+		nbuf = naddr-addr;
+	}
+	nbuf += bsize;
+
+	*l = nil;
+	*pie = ie;
+	*paddr = addr;
+	*pnbuf = nbuf;
+	return iefirst;
+}
+	
+static int
+icachewritesect(Index *ix, ISect *is, u8int *buf)
+{
+	int err, h, bsize;
+	u32int lo, hi;
+	u64int addr, naddr;
+	uint nbuf, off;
+	DBlock *b;
+	IBucket ib;
+	IEntry *ie, *iedirty, **l, *chunk;
+
+	lo = is->start * ix->div;
+	if(TWID32/ix->div < is->stop)
+		hi = TWID32;
+	else
+		hi = is->stop * ix->div - 1;
+
+	trace(TraceProc, "icachewritesect enter %ud %ud %llud", lo, hi, iwrite.as.aa);
+
+	iedirty = icachedirty(lo, hi, iwrite.as.aa);
+	iedirty = iesort(iedirty);
+	bsize = 1<<is->blocklog;
+	err = 0;
+
+	while(iedirty){
+		sleep(icachesleeptime);
+		trace(TraceProc, "icachewritesect nextchunk");
+		chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf);
+
+		trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux", addr, nbuf);
+		if(readpart(is->part, addr, buf, nbuf) < 0){
+			// XXX
+			fprint(2, "icachewriteproc readpart: %r\n");
+			err  = -1;
+			continue;
+		}
+		trace(TraceProc, "icachewritesect updatebuf");
+		addstat(StatIsectReadBytes, nbuf);
+		addstat(StatIsectRead, 1);
+
+		for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){
+		again:
+			naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog);
+			off = naddr - addr;
+			if(off+bsize > nbuf){
+				fprint(2, "whoops! addr=0x%llux nbuf=%ud addr+nbuf=0x%llux naddr=0x%llux\n",
+					addr, nbuf, addr+nbuf, naddr);
+				assert(off+bsize <= nbuf);
+			}
+			unpackibucket(&ib, buf+off, is->bucketmagic);
+			if(okibucket(&ib, is) < 0){
+				fprint(2, "bad bucket XXX\n");
+				goto skipit;
+			}
+			trace(TraceProc, "icachewritesect add %V at 0x%llux", ie->score, naddr);
+			h = bucklook(ie->score, ie->ia.type, ib.data, ib.n);
+			if(h & 1){
+				h ^= 1;
+				packientry(ie, &ib.data[h]);
+			}else if(ib.n < is->buckmax){
+				memmove(&ib.data[h+IEntrySize], &ib.data[h], ib.n*IEntrySize - h);
+				ib.n++;
+				packientry(ie, &ib.data[h]);
+			}else{
+				fprint(2, "bucket overflow XXX\n");
+			skipit:
+				err = -1;
+				*l = ie->nextdirty;
+				ie = *l;
+				if(ie)
+					goto again;
+				else
+					break;
+			}
+			packibucket(&ib, buf+off, is->bucketmagic);
+			if((b = _getdblock(is->part, naddr, ORDWR, 0)) != nil){
+				memmove(b->data, buf+off, bsize);
+				putdblock(b);
+			}
+		}
+
+		trace(TraceProc, "icachewritesect writepart", addr, nbuf);
+		if(writepart(is->part, addr, buf, nbuf) < 0){
+			// XXX
+			fprint(2, "icachewriteproc writepart: %r\n");
+			err = -1;
+			continue;
+		}
+		addstat(StatIsectWriteBytes, nbuf);
+		addstat(StatIsectWrite, 1);
+		icacheclean(chunk);
+	}
+
+	trace(TraceProc, "icachewritesect done");
+	return err;
+}
+
+static void
+icachewriteproc(void *v)
+{
+	uint bsize;
+	ISect *is;
+	Index *ix;
+	u8int *buf;
+
+	ix = mainindex;
+	is = v;
+	threadsetname("icachewriteproc:%s", is->part->name);
+
+	bsize = 1<<is->blocklog;
+	buf = emalloc(Bufsize+bsize);
+	buf = (u8int*)(((ulong)buf+bsize-1)&~(ulong)(bsize-1));
+
+	for(;;){
+		trace(TraceProc, "icachewriteproc recv");
+		recv(is->writechan, 0);
+		trace(TraceWork, "start");
+		icachewritesect(ix, is, buf);
+		trace(TraceProc, "icachewriteproc send");
+		trace(TraceWork, "finish");
+		send(is->writedonechan, 0);
+	}
+}
+
+static void
+icachewritecoord(void *v)
+{
+	int i;
+	Index *ix;
+	AState as;
+
+	USED(v);
+
+	threadsetname("icachewritecoord");
+
+	ix = mainindex;
+	iwrite.as = diskstate();
+
+	for(;;){
+		trace(TraceProc, "icachewritecoord sleep");
+		waitforkick(&iwrite.round);
+		trace(TraceWork, "start");
+		as = diskstate();
+		if(as.arena==iwrite.as.arena && as.aa==iwrite.as.aa){
+			/* will not be able to do anything more than last flush - kick disk */
+			trace(TraceProc, "icachewritecoord flush dcache");
+			kickdcache();
+			trace(TraceProc, "icachewritecoord flushed dcache");
+		}
+		iwrite.as = as;
+
+		trace(TraceProc, "icachewritecoord start flush");
+		if(iwrite.as.arena){
+			for(i=0; i<ix->nsects; i++)
+				send(ix->sects[i]->writechan, 0);
+			if(ix->bloom)
+				send(ix->bloom->writechan, 0);
+		
+			for(i=0; i<ix->nsects; i++)
+				recv(ix->sects[i]->writedonechan, 0);
+			if(ix->bloom)
+				recv(ix->bloom->writedonechan, 0);
+
+			trace(TraceProc, "icachewritecoord donewrite");
+			setatailstate(&iwrite.as);
+		}
+		icacheclean(nil);	/* wake up anyone waiting */
+		trace(TraceWork, "finish");
+		addstat(StatIcacheFlush, 1);
+	}
+}
+
+void
+flushicache(void)
+{
+	trace(TraceProc, "flushicache enter");
+	kickround(&iwrite.round, 1);
+	trace(TraceProc, "flushicache exit");
+}
+
+void
+kickicache(void)
+{
+	kickround(&iwrite.round, 0);
+}
+
+void
+delaykickicache(void)
+{
+	delaykickround(&iwrite.round);
+}
+
+static IEntry*
+iesort(IEntry *ie)
+{
+	int cmp;
+	IEntry **l;
+	IEntry *ie1, *ie2, *sorted;
+
+	if(ie == nil || ie->nextdirty == nil)
+		return ie;
+
+	/* split the lists */
+	ie1 = ie;
+	ie2 = ie;
+	if(ie2)
+		ie2 = ie2->nextdirty;
+	if(ie2)
+		ie2 = ie2->nextdirty;
+	while(ie1 && ie2){
+		ie1 = ie1->nextdirty;
+		ie2 = ie2->nextdirty;
+		if(ie2)
+			ie2 = ie2->nextdirty;
+	}
+	if(ie1){
+		ie2 = ie1->nextdirty;
+		ie1->nextdirty = nil;
+	}
+
+	/* sort the lists */
+	ie1 = iesort(ie);
+	ie2 = iesort(ie2);
+
+	/* merge the lists */
+	sorted = nil;
+	l = &sorted;
+	cmp = 0;
+	while(ie1 || ie2){
+		if(ie1 && ie2)
+			cmp = scorecmp(ie1->score, ie2->score);
+		if(ie1==nil || (ie2 && cmp > 0)){
+			*l = ie2;
+			l = &ie2->nextdirty;
+			ie2 = ie2->nextdirty;
+		}else{
+			*l = ie1;
+			l = &ie1->nextdirty;
+			ie1 = ie1->nextdirty;
+		}
+	}
+	*l = nil;
+	return sorted;
+}
+
diff --git a/src/cmd/venti/srv/ifile.c b/src/cmd/venti/srv/ifile.c
new file mode 100644
index 0000000..fc784c9
--- /dev/null
+++ b/src/cmd/venti/srv/ifile.c
@@ -0,0 +1,93 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+int
+readifile(IFile *f, char *name)
+{
+	ZBlock *b;
+
+	b = readfile(name);
+	if(b == nil)
+		return -1;
+	f->name = name;
+	f->b = b;
+	f->pos = 0;
+	return 0;
+}
+
+void
+freeifile(IFile *f)
+{
+	freezblock(f->b);
+	f->b = nil;
+	f->pos = 0;
+}
+
+int
+partifile(IFile *f, Part *part, u64int start, u32int size)
+{
+	ZBlock *b;
+
+	b = alloczblock(size, 0, part->blocksize);
+	if(b == nil)
+		return -1;
+	if(readpart(part, start, b->data, size) < 0){
+		seterr(EAdmin, "can't read %s: %r", part->name);
+		freezblock(b);
+		return -1;
+	}
+	f->name = part->name;
+	f->b = b;
+	f->pos = 0;
+	return 0;
+}
+
+/*
+ * return the next non-blank input line,
+ * stripped of leading white space and with # comments eliminated
+ */
+char*
+ifileline(IFile *f)
+{
+	char *s, *e, *t;
+	int c;
+
+	for(;;){
+		s = (char*)&f->b->data[f->pos];
+		e = memchr(s, '\n', f->b->len - f->pos);
+		if(e == nil)
+			return nil;
+		*e++ = '\0';
+		f->pos = e - (char*)f->b->data;
+		t = strchr(s, '#');
+		if(t != nil)
+			*t = '\0';
+		for(; c = *s; s++)
+			if(c != ' ' && c != '\t' && c != '\r')
+				return s;
+	}
+}
+
+int
+ifilename(IFile *f, char *dst)
+{
+	char *s;
+
+	s = ifileline(f);
+	if(s == nil || strlen(s) >= ANameSize)
+		return -1;
+	namecp(dst, s);
+	return 0;
+}
+
+int
+ifileu32int(IFile *f, u32int *r)
+{
+	char *s;
+
+	s = ifileline(f);
+	if(s == nil)
+		return -1;
+	return stru32int(s, r);
+}
diff --git a/src/cmd/venti/srv/index.c b/src/cmd/venti/srv/index.c
new file mode 100644
index 0000000..46bf91e
--- /dev/null
+++ b/src/cmd/venti/srv/index.c
@@ -0,0 +1,819 @@
+/*
+ * Index, mapping scores to log positions. 
+ *
+ * The index is made up of some number of index sections, each of
+ * which is typically stored on a different disk.  The blocks in all the 
+ * index sections are logically numbered, with each index section 
+ * responsible for a range of blocks.  Blocks are typically 8kB.
+ *
+ * The N index blocks are treated as a giant hash table.  The top 32 bits
+ * of score are used as the key for a lookup.  Each index block holds
+ * one hash bucket, which is responsible for ceil(2^32 / N) of the key space.
+ * 
+ * The index is sized so that a particular bucket is extraordinarily 
+ * unlikely to overflow: assuming compressed data blocks are 4kB 
+ * on disk, and assuming each block has a 40 byte index entry,
+ * the index data will be 1% of the total data.  Since scores are essentially
+ * random, all buckets should be about the same fullness.
+ * A factor of 5 gives us a wide comfort boundary to account for 
+ * random variation.  So the index disk space should be 5% of the arena disk space.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+//static int	bucklook(u8int *score, int type, u8int *data, int n);
+//static int	writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b);
+//static int	okibucket(IBucket *ib, ISect *is);
+static int	initindex1(Index*);
+static ISect	*initisect1(ISect *is);
+//static int	splitiblock(Index *ix, DBlock *b, ISect *is, u32int buck, IBucket *ib);
+
+#define KEY(k,d)	((d) ? (k)>>(32-(d)) : 0)
+
+//static QLock	indexlock;	//ZZZ
+
+static char IndexMagic[] = "venti index configuration";
+
+Index*
+initindex(char *name, ISect **sects, int n)
+{
+	IFile f;
+	Index *ix;
+	ISect *is;
+	u32int last, blocksize, tabsize;
+	int i;
+
+	if(n <= 0){
+fprint(2, "bad n\n");
+		seterr(EOk, "no index sections to initialize index");
+		return nil;
+	}
+	ix = MKZ(Index);
+	if(ix == nil){
+fprint(2, "no mem\n");
+		seterr(EOk, "can't initialize index: out of memory");
+		freeindex(ix);
+		return nil;
+	}
+
+	tabsize = sects[0]->tabsize;
+	if(partifile(&f, sects[0]->part, sects[0]->tabbase, tabsize) < 0)
+		return nil;
+	if(parseindex(&f, ix) < 0){
+		freeifile(&f);
+		freeindex(ix);
+		return nil;
+	}
+	freeifile(&f);
+	if(namecmp(ix->name, name) != 0){
+		seterr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
+		return nil;
+	}
+	if(ix->nsects != n){
+		seterr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
+		freeindex(ix);
+		return nil;
+	}
+	ix->sects = sects;
+	last = 0;
+	blocksize = ix->blocksize;
+	for(i = 0; i < ix->nsects; i++){
+		is = sects[i];
+		if(namecmp(ix->name, is->index) != 0
+		|| is->blocksize != blocksize
+		|| is->tabsize != tabsize
+		|| namecmp(is->name, ix->smap[i].name) != 0
+		|| is->start != ix->smap[i].start
+		|| is->stop != ix->smap[i].stop
+		|| last != is->start
+		|| is->start > is->stop){
+			seterr(ECorrupt, "inconsistent index sections in %s", ix->name);
+			freeindex(ix);
+			return nil;
+		}
+		last = is->stop;
+	}
+	ix->tabsize = tabsize;
+	ix->buckets = last;
+
+	if(initindex1(ix) < 0){
+		freeindex(ix);
+		return nil;
+	}
+
+	ix->arenas = MKNZ(Arena*, ix->narenas);
+	if(maparenas(ix->amap, ix->arenas, ix->narenas, ix->name) < 0){
+		freeindex(ix);
+		return nil;
+	}
+
+	return ix;
+}
+
+static int
+initindex1(Index *ix)
+{
+	u32int buckets;
+
+	ix->div = (((u64int)1 << 32) + ix->buckets - 1) / ix->buckets;
+	buckets = (((u64int)1 << 32) - 1) / ix->div + 1;
+	if(buckets != ix->buckets){
+		seterr(ECorrupt, "inconsistent math for divisor and buckets in %s", ix->name);
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+wbindex(Index *ix)
+{
+	Fmt f;
+	ZBlock *b;
+	int i;
+
+	if(ix->nsects == 0){
+		seterr(EOk, "no sections in index %s", ix->name);
+		return -1;
+	}
+	b = alloczblock(ix->tabsize, 1, ix->blocksize);
+	if(b == nil){
+		seterr(EOk, "can't write index configuration: out of memory");
+		return -1;
+	}
+	fmtzbinit(&f, b);
+	if(outputindex(&f, ix) < 0){
+		seterr(EOk, "can't make index configuration: table storage too small %d", ix->tabsize);
+		freezblock(b);
+		return -1;
+	}
+	for(i = 0; i < ix->nsects; i++){
+		if(writepart(ix->sects[i]->part, ix->sects[i]->tabbase, b->data, ix->tabsize) < 0){
+			seterr(EOk, "can't write index: %r");
+			freezblock(b);
+			return -1;
+		}
+	}
+	freezblock(b);
+
+	for(i = 0; i < ix->nsects; i++)
+		if(wbisect(ix->sects[i]) < 0)
+			return -1;
+
+	return 0;
+}
+
+/*
+ * index: IndexMagic '\n' version '\n' name '\n' blocksize '\n' [V2: bitblocks '\n'] sections arenas
+ * version, blocksize: u32int
+ * name: max. ANameSize string
+ * sections, arenas: AMap
+ */
+int
+outputindex(Fmt *f, Index *ix)
+{
+	if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blocksize) < 0
+	|| outputamap(f, ix->smap, ix->nsects) < 0
+	|| outputamap(f, ix->amap, ix->narenas) < 0)
+		return -1;
+	return 0;
+}
+
+int
+parseindex(IFile *f, Index *ix)
+{
+	AMapN amn;
+	u32int v;
+	char *s;
+
+	/*
+	 * magic
+	 */
+	s = ifileline(f);
+	if(s == nil || strcmp(s, IndexMagic) != 0){
+		seterr(ECorrupt, "bad index magic for %s", f->name);
+		return -1;
+	}
+
+	/*
+	 * version
+	 */
+	if(ifileu32int(f, &v) < 0){
+		seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
+		return -1;
+	}
+	ix->version = v;
+	if(ix->version != IndexVersion){
+		seterr(ECorrupt, "bad version number in %s", f->name);
+		return -1;
+	}
+
+	/*
+	 * name
+	 */
+	if(ifilename(f, ix->name) < 0){
+		seterr(ECorrupt, "syntax error: bad index name in %s", f->name);
+		return -1;
+	}
+
+	/*
+	 * block size
+	 */
+	if(ifileu32int(f, &v) < 0){
+		seterr(ECorrupt, "syntax error: bad block size number in %s", f->name);
+		return -1;
+	}
+	ix->blocksize = v;
+
+	if(parseamap(f, &amn) < 0)
+		return -1;
+	ix->nsects = amn.n;
+	ix->smap = amn.map;
+
+	if(parseamap(f, &amn) < 0)
+		return -1;
+	ix->narenas = amn.n;
+	ix->amap = amn.map;
+
+	return 0;
+}
+
+/*
+ * initialize an entirely new index
+ */
+Index *
+newindex(char *name, ISect **sects, int n)
+{
+	Index *ix;
+	AMap *smap;
+	u64int nb;
+	u32int div, ub, xb, fb, start, stop, blocksize, tabsize;
+	int i, j;
+
+	if(n < 1){
+		seterr(EOk, "creating index with no index sections");
+		return nil;
+	}
+
+	/*
+	 * compute the total buckets available in the index,
+	 * and the total buckets which are used.
+	 */
+	nb = 0;
+	blocksize = sects[0]->blocksize;
+	tabsize = sects[0]->tabsize;
+	for(i = 0; i < n; i++){
+		if(sects[i]->start != 0 || sects[i]->stop != 0
+		|| sects[i]->index[0] != '\0'){
+			seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
+			return nil;
+		}
+		if(blocksize != sects[i]->blocksize){
+			seterr(EOk, "mismatched block sizes in index sections");
+			return nil;
+		}
+		if(tabsize != sects[i]->tabsize){
+			seterr(EOk, "mismatched config table sizes in index sections");
+			return nil;
+		}
+		nb += sects[i]->blocks;
+	}
+
+	/*
+	 * check for duplicate names
+	 */
+	for(i = 0; i < n; i++){
+		for(j = i + 1; j < n; j++){
+			if(namecmp(sects[i]->name, sects[j]->name) == 0){
+				seterr(EOk, "duplicate section name %s for index %s", sects[i]->name, name);
+				return nil;
+			}
+		}
+	}
+
+	if(nb >= ((u64int)1 << 32)){
+		seterr(EBug, "index too large");
+		return nil;
+	}
+
+	fb = 0;
+	div = (((u64int)1 << 32) + nb - 1) / nb;
+	ub = (((u64int)1 << 32) - 1) / div + 1;
+	if(div < 100){
+		seterr(EBug, "index divisor too coarse");
+		return nil;
+	}
+	if(ub > nb){
+		seterr(EBug, "index initialization math wrong");
+		return nil;
+	}
+	xb = nb - ub;
+
+	/*
+	 * initialize each of the index sections
+	 * and the section map table
+	 */
+	smap = MKNZ(AMap, n);
+	if(smap == nil){
+		seterr(EOk, "can't create new index: out of memory");
+		return nil;
+	}
+	start = 0;
+	for(i = 0; i < n; i++){
+		stop = start + sects[i]->blocks - xb / n;
+		if(i == n - 1)
+			stop = ub;
+		sects[i]->start = start;
+		sects[i]->stop = stop;
+		namecp(sects[i]->index, name);
+
+		smap[i].start = start;
+		smap[i].stop = stop;
+		namecp(smap[i].name, sects[i]->name);
+		start = stop;
+	}
+
+	/*
+	 * initialize the index itself
+	 */
+	ix = MKZ(Index);
+	if(ix == nil){
+		seterr(EOk, "can't create new index: out of memory");
+		free(smap);
+		return nil;
+	}
+	ix->version = IndexVersion;
+	namecp(ix->name, name);
+	ix->sects = sects;
+	ix->smap = smap;
+	ix->nsects = n;
+	ix->blocksize = blocksize;
+	ix->buckets = ub;
+	ix->tabsize = tabsize;
+	ix->div = div;
+	ix->bitblocks = fb;
+
+	if(initindex1(ix) < 0){
+		free(smap);
+		return nil;
+	}
+
+	return ix;
+}
+
+ISect*
+initisect(Part *part)
+{
+	ISect *is;
+	ZBlock *b;
+	int ok;
+
+	b = alloczblock(HeadSize, 0, 0);
+	if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
+		seterr(EAdmin, "can't read index section header: %r");
+		return nil;
+	}
+
+	is = MKZ(ISect);
+	if(is == nil){
+		freezblock(b);
+		return nil;
+	}
+	is->part = part;
+	ok = unpackisect(is, b->data);
+	freezblock(b);
+	if(ok < 0){
+		seterr(ECorrupt, "corrupted index section header: %r");
+		freeisect(is);
+		return nil;
+	}
+
+	if(is->version != ISectVersion1 && is->version != ISectVersion2){
+		seterr(EAdmin, "unknown index section version %d", is->version);
+		freeisect(is);
+		return nil;
+	}
+
+	return initisect1(is);
+}
+
+ISect*
+newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize)
+{
+	ISect *is;
+	u32int tabbase;
+
+	is = MKZ(ISect);
+	if(is == nil)
+		return nil;
+
+	namecp(is->name, name);
+	is->version = vers;
+	is->part = part;
+	is->blocksize = blocksize;
+	is->start = 0;
+	is->stop = 0;
+	tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
+	is->blockbase = (tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
+	is->blocks = is->part->size / blocksize - is->blockbase / blocksize;
+	is->bucketmagic = 0;
+	if(is->version == ISectVersion2){
+		do{
+			is->bucketmagic = fastrand();
+		}while(is->bucketmagic==0);
+	}
+	is = initisect1(is);
+	if(is == nil)
+		return nil;
+
+	return is;
+}
+
+/*
+ * initialize the computed parameters for an index
+ */
+static ISect*
+initisect1(ISect *is)
+{
+	u64int v;
+
+	is->buckmax = (is->blocksize - IBucketSize) / IEntrySize;
+	is->blocklog = u64log2(is->blocksize);
+	if(is->blocksize != (1 << is->blocklog)){
+		seterr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blocksize);
+		freeisect(is);
+		return nil;
+	}
+	partblocksize(is->part, is->blocksize);
+	is->tabbase = (PartBlank + HeadSize + is->blocksize - 1) & ~(is->blocksize - 1);
+	if(is->tabbase >= is->blockbase){
+		seterr(ECorrupt, "index section config table overlaps bucket storage");
+		freeisect(is);
+		return nil;
+	}
+	is->tabsize = is->blockbase - is->tabbase;
+	v = is->part->size & ~(u64int)(is->blocksize - 1);
+	if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
+		seterr(ECorrupt, "invalid blocks in index section %s", is->name);
+//ZZZZZZZZZ
+//		freeisect(is);
+//		return nil;
+	}
+
+	if(is->stop - is->start > is->blocks){
+		seterr(ECorrupt, "index section overflows available space");
+		freeisect(is);
+		return nil;
+	}
+	if(is->start > is->stop){
+		seterr(ECorrupt, "invalid index section range");
+		freeisect(is);
+		return nil;
+	}
+
+	return is;
+}
+
+int
+wbisect(ISect *is)
+{
+	ZBlock *b;
+
+	b = alloczblock(HeadSize, 1, 0);
+	if(b == nil)
+//ZZZ set error?
+		return -1;
+
+	if(packisect(is, b->data) < 0){
+		seterr(ECorrupt, "can't make index section header: %r");
+		freezblock(b);
+		return -1;
+	}
+	if(writepart(is->part, PartBlank, b->data, HeadSize) < 0){
+		seterr(EAdmin, "can't write index section header: %r");
+		freezblock(b);
+		return -1;
+	}
+	freezblock(b);
+
+	return 0;
+}
+
+void
+freeisect(ISect *is)
+{
+	if(is == nil)
+		return;
+	free(is);
+}
+
+void
+freeindex(Index *ix)
+{
+	int i;
+
+	if(ix == nil)
+		return;
+	free(ix->amap);
+	free(ix->arenas);
+	if(ix->sects)
+		for(i = 0; i < ix->nsects; i++)
+			freeisect(ix->sects[i]);
+	free(ix->sects);
+	free(ix->smap);
+	free(ix);
+}
+
+/*
+ * write a clump to an available arena in the index
+ * and return the address of the clump within the index.
+ZZZ question: should this distinguish between an arena
+filling up and real errors writing the clump?
+ */
+u64int
+writeiclump(Index *ix, Clump *c, u8int *clbuf, u64int *pa)
+{
+	u64int a;
+	int i;
+
+	trace(TraceLump, "writeiclump enter");
+	for(i = ix->mapalloc; i < ix->narenas; i++){
+		a = writeaclump(ix->arenas[i], c, clbuf, ix->amap[i].start, pa);
+		if(a != TWID64){
+			ix->mapalloc = i;	/* assuming write is atomic, race is okay */
+			trace(TraceLump, "writeiclump exit");
+			return a;
+		}
+	}
+
+	seterr(EAdmin, "no space left in arenas");
+	trace(TraceLump, "writeiclump failed");
+	return TWID64;
+}
+
+/*
+ * convert an arena index to an relative arena address
+ */
+Arena*
+amapitoa(Index *ix, u64int a, u64int *aa)
+{
+	int i, r, l, m;
+
+	l = 1;
+	r = ix->narenas - 1;
+	while(l <= r){
+		m = (r + l) / 2;
+		if(ix->amap[m].start <= a)
+			l = m + 1;
+		else
+			r = m - 1;
+	}
+	l--;
+
+	if(a > ix->amap[l].stop){
+for(i=0; i<ix->narenas; i++)
+	print("arena %d: %llux - %llux\n", i, ix->amap[i].start, ix->amap[i].stop);
+print("want arena %d for %llux\n", l, a);
+		seterr(ECrash, "unmapped address passed to amapitoa");
+		return nil;
+	}
+
+	if(ix->arenas[l] == nil){
+		seterr(ECrash, "unmapped arena selected in amapitoa");
+		return nil;
+	}
+	*aa = a - ix->amap[l].start;
+	return ix->arenas[l];
+}
+
+int
+iaddrcmp(IAddr *ia1, IAddr *ia2)
+{
+	return ia1->type != ia2->type
+		|| ia1->size != ia2->size
+		|| ia1->blocks != ia2->blocks
+		|| ia1->addr != ia2->addr;
+}
+
+/*
+ * lookup the score in the partition
+ *
+ * nothing needs to be explicitly locked:
+ * only static parts of ix are used, and
+ * the bucket is locked by the DBlock lock.
+ */
+int
+loadientry(Index *ix, u8int *score, int type, IEntry *ie)
+{
+	ISect *is;
+	DBlock *b;
+	IBucket ib;
+	u32int buck;
+	int h, ok;
+
+	ok = -1;
+
+	trace(TraceLump, "loadientry enter");
+
+	/*
+	qlock(&stats.lock);
+	stats.indexreads++;
+	qunlock(&stats.lock);
+	*/
+
+	if(!inbloomfilter(mainindex->bloom, score)){
+		trace(TraceLump, "loadientry bloomhit");
+		return -1;
+	}
+
+	trace(TraceLump, "loadientry loadibucket");
+	b = loadibucket(ix, score, &is, &buck, &ib);
+	trace(TraceLump, "loadientry loadedibucket");
+	if(b == nil)
+		return -1;
+
+	if(okibucket(&ib, is) < 0){
+		trace(TraceLump, "loadientry badbucket");
+		goto out;
+	}
+
+	h = bucklook(score, type, ib.data, ib.n);
+	if(h & 1){
+		h ^= 1;
+		trace(TraceLump, "loadientry found");
+		unpackientry(ie, &ib.data[h]);
+		ok = 0;
+		goto out;
+	}
+	trace(TraceLump, "loadientry notfound");
+	addstat(StatBloomFalseMiss, 1);
+out:
+	putdblock(b);
+	trace(TraceLump, "loadientry exit");
+	return ok;
+}
+
+int
+okibucket(IBucket *ib, ISect *is)
+{
+	if(ib->n <= is->buckmax)
+		return 0;
+
+	seterr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, range=[%lud,%lud)",
+		ib->n, is->buckmax, is->start, is->stop);
+	return -1;
+}
+
+/*
+ * look for score within data;
+ * return 1 | byte index of matching index,
+ * or 0 | index of least element > score
+ */
+int
+bucklook(u8int *score, int otype, u8int *data, int n)
+{
+	int i, r, l, m, h, c, cc, type;
+
+	type = vttodisktype(otype);
+	l = 0;
+	r = n - 1;
+	while(l <= r){
+		m = (r + l) >> 1;
+		h = m * IEntrySize;
+		for(i = 0; i < VtScoreSize; i++){
+			c = score[i];
+			cc = data[h + i];
+			if(c != cc){
+				if(c > cc)
+					l = m + 1;
+				else
+					r = m - 1;
+				goto cont;
+			}
+		}
+		cc = data[h + IEntryTypeOff];
+		if(type != cc){
+			if(type > cc)
+				l = m + 1;
+			else
+				r = m - 1;
+			goto cont;
+		}
+		return h | 1;
+	cont:;
+	}
+
+	return l * IEntrySize;
+}
+
+/*
+ * compare two IEntries; consistent with bucklook
+ */
+int
+ientrycmp(const void *vie1, const void *vie2)
+{
+	u8int *ie1, *ie2;
+	int i, v1, v2;
+
+	ie1 = (u8int*)vie1;
+	ie2 = (u8int*)vie2;
+	for(i = 0; i < VtScoreSize; i++){
+		v1 = ie1[i];
+		v2 = ie2[i];
+		if(v1 != v2){
+			if(v1 < v2)
+				return -1;
+			return 1;
+		}
+	}
+	v1 = ie1[IEntryTypeOff];
+	v2 = ie2[IEntryTypeOff];
+	if(v1 != v2){
+		if(v1 < v2)
+			return -1;
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * find the number of the index section holding bucket #buck
+ */
+int
+indexsect0(Index *ix, u32int buck)
+{
+	int r, l, m;
+
+	l = 1;
+	r = ix->nsects - 1;
+	while(l <= r){
+		m = (r + l) >> 1;
+		if(ix->sects[m]->start <= buck)
+			l = m + 1;
+		else
+			r = m - 1;
+	}
+	return l - 1;
+}
+
+/*
+ * load the index block at bucket #buck
+ */
+static DBlock*
+loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib, int mode)
+{
+	ISect *is;
+	DBlock *b;
+
+	is = ix->sects[indexsect0(ix, buck)];
+	if(buck < is->start || is->stop <= buck){
+		seterr(EAdmin, "index lookup out of range: %ud not found in index\n", buck);
+		return nil;
+	}
+
+	buck -= is->start;
+	if((b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), mode)) == nil)
+		return nil;
+
+	if(pis)
+		*pis = is;
+	if(pbuck)
+		*pbuck = buck;
+	if(ib)
+		unpackibucket(ib, b->data, is->bucketmagic);
+	return b;
+}
+
+/*
+ * find the number of the index section holding score
+ */
+static int
+indexsect1(Index *ix, u8int *score)
+{
+	return indexsect0(ix, hashbits(score, 32) / ix->div);
+}
+
+/*
+ * load the index block responsible for score.
+ */
+static DBlock*
+loadibucket1(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
+{
+	return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib, OREAD);
+}
+
+int
+indexsect(Index *ix, u8int *score)
+{
+	return indexsect1(ix, score);
+}
+
+DBlock*
+loadibucket(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
+{
+	return loadibucket1(ix, score, pis, pbuck, ib);
+}
+
+
diff --git a/src/cmd/venti/srv/lump.c b/src/cmd/venti/srv/lump.c
new file mode 100644
index 0000000..d1e58a6
--- /dev/null
+++ b/src/cmd/venti/srv/lump.c
@@ -0,0 +1,249 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+int			queuewrites = 0;
+int			writestodevnull = 0;
+
+static Packet		*readilump(Lump *u, IAddr *ia, u8int *score, int rac);
+
+Packet*
+readlump(u8int *score, int type, u32int size, int *cached)
+{
+	Lump *u;
+	Packet *p;
+	IAddr ia;
+	u32int n;
+	int rac;
+
+	trace(TraceLump, "readlump enter");
+/*
+	qlock(&stats.lock);
+	stats.lumpreads++;
+	qunlock(&stats.lock);
+*/
+	if(scorecmp(score, zeroscore) == 0)
+		return packetalloc();
+	u = lookuplump(score, type);
+	if(u->data != nil){
+		trace(TraceLump, "readlump lookuplump hit");
+		if(cached)
+			*cached = 1;
+		n = packetsize(u->data);
+		if(n > size){
+			seterr(EOk, "read too small: asked for %d need at least %d", size, n);
+			putlump(u);
+
+			return nil;
+		}
+		p = packetdup(u->data, 0, n);
+		putlump(u);
+		return p;
+	}
+
+	if(cached)
+		*cached = 0;
+
+	if(lookupscore(score, type, &ia, &rac) < 0){
+		//ZZZ place to check for someone trying to guess scores
+		seterr(EOk, "no block with score %V/%d exists", score, type);
+
+		putlump(u);
+		return nil;
+	}
+	if(ia.size > size){
+		seterr(EOk, "read too small 1: asked for %d need at least %d", size, ia.size);
+
+		putlump(u);
+		return nil;
+	}
+
+	trace(TraceLump, "readlump readilump");
+	p = readilump(u, &ia, score, rac);
+	putlump(u);
+
+	trace(TraceLump, "readlump exit");
+	return p;
+}
+
+/*
+ * save away a lump, and return it's score.
+ * doesn't store duplicates, but checks that the data is really the same.
+ */
+int
+writelump(Packet *p, u8int *score, int type, u32int creator, uint ms)
+{
+	Lump *u;
+	int ok;
+
+/*
+	qlock(&stats.lock);
+	stats.lumpwrites++;
+	qunlock(&stats.lock);
+*/
+
+	packetsha1(p, score);
+	if(packetsize(p) == 0 || writestodevnull==1){
+		packetfree(p);
+		return 0;
+	}
+
+	u = lookuplump(score, type);
+	if(u->data != nil){
+		ok = 0;
+		if(packetcmp(p, u->data) != 0){
+			seterr(EStrange, "score collision");
+			ok = -1;
+		}
+		packetfree(p);
+		putlump(u);
+		return ok;
+	}
+
+	if(writestodevnull==2){
+		packetfree(p);
+		return 0;
+	}
+
+	if(queuewrites)
+		return queuewrite(u, p, creator, ms);
+
+	ok = writeqlump(u, p, creator, ms);
+
+	putlump(u);
+	return ok;
+}
+
+int
+writeqlump(Lump *u, Packet *p, int creator, uint ms)
+{
+	ZBlock *flat;
+	Packet *old;
+	IAddr ia;
+	int ok;
+	int rac;
+
+	if(lookupscore(u->score, u->type, &ia, &rac) == 0){
+		/* assume the data is here! XXX */
+		packetfree(p);
+		ms = msec() - ms;
+		addstat2(StatRpcWriteOld, 1, StatRpcWriteOldTime, ms);
+		return 0;
+
+		/*
+		 * if the read fails,
+		 * assume it was corrupted data and store the block again
+		 */
+		old = readilump(u, &ia, u->score, rac);
+		if(old != nil){
+			ok = 0;
+			if(packetcmp(p, old) != 0){
+				seterr(EStrange, "score collision");
+				ok = -1;
+			}
+			packetfree(p);
+			packetfree(old);
+
+			ms = msec() - ms;
+			addstat2(StatRpcWriteOld, 1, StatRpcWriteOldTime, ms);
+			return ok;
+		}
+		logerr(EAdmin, "writelump: read %V failed, rewriting: %r\n", u->score);
+	}
+
+	flat = packet2zblock(p, packetsize(p));
+	ok = storeclump(mainindex, flat, u->score, u->type, creator, &ia);
+	freezblock(flat);
+	if(ok == 0)
+		ok = insertscore(u->score, &ia, 1);
+	if(ok == 0)
+		insertlump(u, p);
+	else
+		packetfree(p);
+
+	ms = msec() - ms;
+	addstat2(StatRpcWriteNew, 1, StatRpcWriteNewTime, ms);
+	return ok;
+}
+
+static void
+lreadahead(u64int a, Arena *arena, u64int aa, int n)
+{	
+	u8int buf[ClumpSize];
+	Clump cl;
+	IAddr ia;
+
+	while(n > 0) {
+		if (aa >= arena->memstats.used)
+			break;
+		if(readarena(arena, aa, buf, ClumpSize) < ClumpSize)
+			break;
+		if(unpackclump(&cl, buf, arena->clumpmagic) < 0)
+			break;
+		ia.addr = a;
+		ia.type = cl.info.type;
+		ia.size = cl.info.uncsize;
+		ia.blocks = (cl.info.size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
+		insertscore(cl.info.score, &ia, 0);
+		a += ClumpSize + cl.info.size;
+		aa += ClumpSize + cl.info.size;
+		n--;
+	}
+}
+
+static Packet*
+readilump(Lump *u, IAddr *ia, u8int *score, int rac)
+{
+	Arena *arena;
+	ZBlock *zb;
+	Packet *p, *pp;
+	Clump cl;
+	u64int a, aa;
+	u8int sc[VtScoreSize];
+
+	trace(TraceLump, "readilump enter");
+	arena = amapitoa(mainindex, ia->addr, &aa);
+	if(arena == nil){
+		trace(TraceLump, "readilump amapitoa failed");
+		return nil;
+	}
+
+	trace(TraceLump, "readilump loadclump");
+	zb = loadclump(arena, aa, ia->blocks, &cl, sc, paranoid);
+	if(zb == nil){
+		trace(TraceLump, "readilump loadclump failed");
+		return nil;
+	}
+
+	if(ia->size != cl.info.uncsize){
+		seterr(EInconsist, "index and clump size mismatch");
+		freezblock(zb);
+		return nil;
+	}
+	if(ia->type != cl.info.type){
+		seterr(EInconsist, "index and clump type mismatch");
+		freezblock(zb);
+		return nil;
+	}
+	if(scorecmp(score, sc) != 0){
+		seterr(ECrash, "score mismatch");
+		freezblock(zb);
+		return nil;
+	}
+
+	if(rac == 0) {
+		trace(TraceLump, "readilump readahead");
+		a = ia->addr + ClumpSize + cl.info.size;
+		aa += ClumpSize + cl.info.size;
+		lreadahead(a, arena, aa, 20);
+	}
+
+	trace(TraceLump, "readilump success");
+	p = zblock2packet(zb, cl.info.uncsize);
+	freezblock(zb);
+	pp = packetdup(p, 0, packetsize(p));
+	trace(TraceLump, "readilump insertlump");
+	insertlump(u, pp);
+	trace(TraceLump, "readilump exit");
+	return p;
+}
diff --git a/src/cmd/venti/srv/lumpcache.c b/src/cmd/venti/srv/lumpcache.c
new file mode 100644
index 0000000..8a1e2d9
--- /dev/null
+++ b/src/cmd/venti/srv/lumpcache.c
@@ -0,0 +1,417 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+/* #define CHECK(x)	x */
+#define CHECK(x)
+
+typedef struct LumpCache	LumpCache;
+
+enum
+{
+	HashLog		= 9,
+	HashSize	= 1<<HashLog,
+	HashMask	= HashSize - 1,
+};
+
+struct LumpCache
+{
+	QLock		lock;
+	Rendez		full;
+	Lump		*free;			/* list of available lumps */
+	u32int		allowed;		/* total allowable space for packets */
+	u32int		avail;			/* remaining space for packets */
+	u32int		now;			/* ticks for usage timestamps */
+	Lump		**heads;		/* hash table for finding address */
+	int		nheap;			/* number of available victims */
+	Lump		**heap;			/* heap for locating victims */
+	int		nblocks;		/* number of blocks allocated */
+	Lump		*blocks;		/* array of block descriptors */
+};
+
+static LumpCache	lumpcache;
+
+static void	delheap(Lump *db);
+static int	downheap(int i, Lump *b);
+static void	fixheap(int i, Lump *b);
+static int	upheap(int i, Lump *b);
+static Lump	*bumplump(void);
+
+void
+initlumpcache(u32int size, u32int nblocks)
+{
+	Lump *last, *b;
+	int i;
+
+	lumpcache.full.l = &lumpcache.lock;
+	lumpcache.nblocks = nblocks;
+	lumpcache.allowed = size;
+	lumpcache.avail = size;
+	lumpcache.heads = MKNZ(Lump*, HashSize);
+	lumpcache.heap = MKNZ(Lump*, nblocks);
+	lumpcache.blocks = MKNZ(Lump, nblocks);
+	setstat(StatLcacheSize, lumpcache.nblocks);
+
+	last = nil;
+	for(i = 0; i < nblocks; i++){
+		b = &lumpcache.blocks[i];
+		b->type = TWID8;
+		b->heap = TWID32;
+		b->next = last;
+		last = b;
+	}
+	lumpcache.free = last;
+	lumpcache.nheap = 0;
+}
+
+Lump*
+lookuplump(u8int *score, int type)
+{
+	uint ms;
+	Lump *b;
+	u32int h;
+
+	ms = msec();
+	trace(TraceLump, "lookuplump enter");
+	
+	h = hashbits(score, HashLog);
+
+	/*
+	 * look for the block in the cache
+	 */
+	qlock(&lumpcache.lock);
+	CHECK(checklumpcache());
+again:
+	for(b = lumpcache.heads[h]; b != nil; b = b->next){
+		if(scorecmp(score, b->score)==0 && type == b->type){
+			addstat(StatLcacheHit, 1);
+			trace(TraceLump, "lookuplump hit");
+			goto found;
+		}
+	}
+
+	trace(TraceLump, "lookuplump miss");
+
+	/*
+	 * missed: locate the block with the oldest second to last use.
+	 * remove it from the heap, and fix up the heap.
+	 */
+	while(lumpcache.free == nil){
+		trace(TraceLump, "lookuplump bump");
+		CHECK(checklumpcache());
+		if(bumplump() == nil){
+			CHECK(checklumpcache());
+			logerr(EAdmin, "all lump cache blocks in use");
+			addstat(StatLcacheStall, 1);
+			CHECK(checklumpcache());
+			rsleep(&lumpcache.full);
+			CHECK(checklumpcache());
+			addstat(StatLcacheStall, -1);
+			goto again;
+		}
+		CHECK(checklumpcache());
+	}
+
+	addstat(StatLcacheMiss, 1);
+	b = lumpcache.free;
+	lumpcache.free = b->next;
+
+	/*
+	 * the new block has no last use, so assume it happens sometime in the middle
+ZZZ this is not reasonable
+	 */
+	b->used = (b->used2 + lumpcache.now) / 2;
+
+	/*
+	 * rechain the block on the correct hash chain
+	 */
+	b->next = lumpcache.heads[h];
+	lumpcache.heads[h] = b;
+	if(b->next != nil)
+		b->next->prev = b;
+	b->prev = nil;
+
+	scorecp(b->score, score);
+	b->type = type;
+	b->size = 0;
+	b->data = nil;
+
+found:
+	b->ref++;
+	b->used2 = b->used;
+	b->used = lumpcache.now++;
+	if(b->heap != TWID32)
+		fixheap(b->heap, b);
+	CHECK(checklumpcache());
+	qunlock(&lumpcache.lock);
+
+
+	addstat(StatLumpStall, 1);
+	qlock(&b->lock);
+	addstat(StatLumpStall, -1);
+
+	trace(TraceLump, "lookuplump exit");
+	addstat2(StatLcacheRead, 1, StatLcacheReadTime, msec()-ms);
+	return b;
+}
+
+void
+insertlump(Lump *b, Packet *p)
+{
+	u32int size;
+
+	/*
+	 * look for the block in the cache
+	 */
+	trace(TraceLump, "insertlump enter");
+	qlock(&lumpcache.lock);
+	CHECK(checklumpcache());
+again:
+
+	addstat(StatLcacheWrite, 1);
+
+	/*
+	 * missed: locate the block with the oldest second to last use.
+	 * remove it from the heap, and fix up the heap.
+	 */
+	size = packetasize(p);
+//ZZZ
+	while(lumpcache.avail < size){
+		trace(TraceLump, "insertlump bump");
+		CHECK(checklumpcache());
+		if(bumplump() == nil){
+			logerr(EAdmin, "all lump cache blocks in use");
+			addstat(StatLcacheStall, 1);
+			CHECK(checklumpcache());
+			rsleep(&lumpcache.full);
+			CHECK(checklumpcache());
+			addstat(StatLcacheStall, -1);
+			goto again;
+		}
+		CHECK(checklumpcache());
+	}
+	b->data = p;
+	b->size = size;
+	lumpcache.avail -= size;
+	CHECK(checklumpcache());
+	qunlock(&lumpcache.lock);
+	trace(TraceLump, "insertlump exit");
+}
+
+void
+putlump(Lump *b)
+{
+	if(b == nil)
+		return;
+
+	trace(TraceLump, "putlump");
+	qunlock(&b->lock);
+	qlock(&lumpcache.lock);
+	CHECK(checklumpcache());
+	if(--b->ref == 0){
+		if(b->heap == TWID32)
+			upheap(lumpcache.nheap++, b);
+		trace(TraceLump, "putlump wakeup");
+		rwakeupall(&lumpcache.full);
+	}
+	CHECK(checklumpcache());
+	qunlock(&lumpcache.lock);
+}
+
+/*
+ * remove some lump from use and update the free list and counters
+ */
+static Lump*
+bumplump(void)
+{
+	Lump *b;
+	u32int h;
+
+	/*
+	 * remove blocks until we find one that is unused
+	 * referenced blocks are left in the heap even though
+	 * they can't be scavenged; this is simple a speed optimization
+	 */
+	CHECK(checklumpcache());
+	for(;;){
+		if(lumpcache.nheap == 0){
+			trace(TraceLump, "bumplump emptyheap");
+			return nil;
+		}
+		b = lumpcache.heap[0];
+		delheap(b);
+		if(!b->ref){
+			trace(TraceLump, "bumplump wakeup");
+			rwakeupall(&lumpcache.full);
+			break;
+		}
+	}
+
+	/*
+	 * unchain the block
+	 */
+	trace(TraceLump, "bumplump unchain");
+	if(b->prev == nil){
+		h = hashbits(b->score, HashLog);
+		if(lumpcache.heads[h] != b)
+			sysfatal("bad hash chains in lump cache");
+		lumpcache.heads[h] = b->next;
+	}else
+		b->prev->next = b->next;
+	if(b->next != nil)
+		b->next->prev = b->prev;
+
+	if(b->data != nil){
+		packetfree(b->data);
+		b->data = nil;
+		lumpcache.avail += b->size;
+		b->size = 0;
+	}
+	b->type = TWID8;
+
+	b->next = lumpcache.free;
+	lumpcache.free = b;
+
+	CHECK(checklumpcache());
+	trace(TraceLump, "bumplump exit");
+	return b;
+}
+
+/*
+ * delete an arbitrary block from the heap
+ */
+static void
+delheap(Lump *db)
+{
+	fixheap(db->heap, lumpcache.heap[--lumpcache.nheap]);
+	db->heap = TWID32;
+}
+
+/*
+ * push an element up or down to it's correct new location
+ */
+static void
+fixheap(int i, Lump *b)
+{
+	if(upheap(i, b) == i)
+		downheap(i, b);
+}
+
+static int
+upheap(int i, Lump *b)
+{
+	Lump *bb;
+	u32int now;
+	int p;
+
+	now = lumpcache.now;
+	for(; i != 0; i = p){
+		p = (i - 1) >> 1;
+		bb = lumpcache.heap[p];
+		if(b->used2 - now >= bb->used2 - now)
+			break;
+		lumpcache.heap[i] = bb;
+		bb->heap = i;
+	}
+
+	lumpcache.heap[i] = b;
+	b->heap = i;
+	return i;
+}
+
+static int
+downheap(int i, Lump *b)
+{
+	Lump *bb;
+	u32int now;
+	int k;
+
+	now = lumpcache.now;
+	for(; ; i = k){
+		k = (i << 1) + 1;
+		if(k >= lumpcache.nheap)
+			break;
+		if(k + 1 < lumpcache.nheap && lumpcache.heap[k]->used2 - now > lumpcache.heap[k + 1]->used2 - now)
+			k++;
+		bb = lumpcache.heap[k];
+		if(b->used2 - now <= bb->used2 - now)
+			break;
+		lumpcache.heap[i] = bb;
+		bb->heap = i;
+	}
+
+	lumpcache.heap[i] = b;
+	b->heap = i;
+	return i;
+}
+
+static void
+findblock(Lump *bb)
+{
+	Lump *b, *last;
+	int h;
+
+	last = nil;
+	h = hashbits(bb->score, HashLog);
+	for(b = lumpcache.heads[h]; b != nil; b = b->next){
+		if(last != b->prev)
+			sysfatal("bad prev link");
+		if(b == bb)
+			return;
+		last = b;
+	}
+	sysfatal("block score=%V type=%#x missing from hash table", bb->score, bb->type);
+}
+
+void
+checklumpcache(void)
+{
+	Lump *b;
+	u32int size, now, nfree;
+	int i, k, refed;
+
+	now = lumpcache.now;
+	for(i = 0; i < lumpcache.nheap; i++){
+		if(lumpcache.heap[i]->heap != i)
+			sysfatal("lc: mis-heaped at %d: %d", i, lumpcache.heap[i]->heap);
+		if(i > 0 && lumpcache.heap[(i - 1) >> 1]->used2 - now > lumpcache.heap[i]->used2 - now)
+			sysfatal("lc: bad heap ordering");
+		k = (i << 1) + 1;
+		if(k < lumpcache.nheap && lumpcache.heap[i]->used2 - now > lumpcache.heap[k]->used2 - now)
+			sysfatal("lc: bad heap ordering");
+		k++;
+		if(k < lumpcache.nheap && lumpcache.heap[i]->used2 - now > lumpcache.heap[k]->used2 - now)
+			sysfatal("lc: bad heap ordering");
+	}
+
+	refed = 0;
+	size = 0;
+	for(i = 0; i < lumpcache.nblocks; i++){
+		b = &lumpcache.blocks[i];
+		if(b->data == nil && b->size != 0)
+			sysfatal("bad size: %d data=%p", b->size, b->data);
+		if(b->ref && b->heap == TWID32)
+			refed++;
+		if(b->type != TWID8){
+			findblock(b);
+			size += b->size;
+		}
+		if(b->heap != TWID32
+		&& lumpcache.heap[b->heap] != b)
+			sysfatal("lc: spurious heap value");
+	}
+	if(lumpcache.avail != lumpcache.allowed - size){
+		fprint(2, "mismatched available=%d and allowed=%d - used=%d space", lumpcache.avail, lumpcache.allowed, size);
+		*(int*)0=0;
+	}
+
+	nfree = 0;
+	for(b = lumpcache.free; b != nil; b = b->next){
+		if(b->type != TWID8 || b->heap != TWID32)
+			sysfatal("lc: bad free list");
+		nfree++;
+	}
+
+	if(lumpcache.nheap + nfree + refed != lumpcache.nblocks)
+		sysfatal("lc: missing blocks: %d %d %d %d", lumpcache.nheap, refed, nfree, lumpcache.nblocks);
+}
diff --git a/src/cmd/venti/srv/lumpqueue.c b/src/cmd/venti/srv/lumpqueue.c
new file mode 100644
index 0000000..1b03f41
--- /dev/null
+++ b/src/cmd/venti/srv/lumpqueue.c
@@ -0,0 +1,187 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+typedef struct LumpQueue	LumpQueue;
+typedef struct WLump		WLump;
+
+enum
+{
+	MaxLumpQ	= 1 << 3	/* max. lumps on a single write queue, must be pow 2 */
+};
+
+struct WLump
+{
+	Lump	*u;
+	Packet	*p;
+	int	creator;
+	int	gen;
+	uint	ms;
+};
+
+struct LumpQueue
+{
+	QLock	lock;
+	Rendez 	flush;
+	Rendez	full;
+	Rendez	empty;
+	WLump	q[MaxLumpQ];
+	int	w;
+	int	r;
+};
+
+static LumpQueue	*lumpqs;
+static int		nqs;
+
+static QLock		glk;
+static int		gen;
+
+static void	queueproc(void *vq);
+
+int
+initlumpqueues(int nq)
+{
+	LumpQueue *q;
+
+	int i;
+	nqs = nq;
+
+	lumpqs = MKNZ(LumpQueue, nq);
+
+	for(i = 0; i < nq; i++){
+		q = &lumpqs[i];
+		q->full.l = &q->lock;
+		q->empty.l = &q->lock;
+		q->flush.l = &q->lock;
+
+		if(vtproc(queueproc, q) < 0){
+			seterr(EOk, "can't start write queue slave: %r");
+			return -1;
+		}
+		if(vtproc(queueproc, q) < 0){
+			seterr(EOk, "can't start write queue slave: %r");
+			return -1;
+		}
+		if(vtproc(queueproc, q) < 0){
+			seterr(EOk, "can't start write queue slave: %r");
+			return -1;
+		}
+		if(vtproc(queueproc, q) < 0){
+			seterr(EOk, "can't start write queue slave: %r");
+			return -1;
+		}
+		if(vtproc(queueproc, q) < 0){
+			seterr(EOk, "can't start write queue slave: %r");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * queue a lump & it's packet data for writing
+ */
+int
+queuewrite(Lump *u, Packet *p, int creator, uint ms)
+{
+	LumpQueue *q;
+	int i;
+
+	trace(TraceProc, "queuewrite");
+	i = indexsect(mainindex, u->score);
+	if(i < 0 || i >= nqs){
+		seterr(EBug, "internal error: illegal index section in queuewrite");
+		return -1;
+	}
+
+	q = &lumpqs[i];
+
+	qlock(&q->lock);
+	while(q->r == ((q->w + 1) & (MaxLumpQ - 1))){
+		trace(TraceProc, "queuewrite sleep");
+		rsleep(&q->full);
+	}
+
+	q->q[q->w].u = u;
+	q->q[q->w].p = p;
+	q->q[q->w].creator = creator;
+	q->q[q->w].ms = ms;
+	q->q[q->w].gen = gen;
+	q->w = (q->w + 1) & (MaxLumpQ - 1);
+
+	trace(TraceProc, "queuewrite wakeup");
+	rwakeup(&q->empty);
+
+	qunlock(&q->lock);
+
+	return 0;
+}
+
+void
+flushqueue(void)
+{
+	int i;
+	LumpQueue *q;
+
+	if(!lumpqs)
+		return;
+
+	trace(TraceProc, "flushqueue");
+
+	qlock(&glk);
+	gen++;
+	qunlock(&glk);
+
+	for(i=0; i<mainindex->nsects; i++){
+		q = &lumpqs[i];
+		qlock(&q->lock);
+		while(q->w != q->r && gen - q->q[q->r].gen > 0){
+			trace(TraceProc, "flushqueue sleep q%d", i);
+			rsleep(&q->flush);
+		}
+		qunlock(&q->lock);
+	}
+}
+	
+static void
+queueproc(void *vq)
+{
+	LumpQueue *q;
+	Lump *u;
+	Packet *p;
+	int creator;
+	uint ms;
+
+	threadsetname("queueproc");
+
+	q = vq;
+	for(;;){
+		qlock(&q->lock);
+		while(q->w == q->r){
+			trace(TraceProc, "queueproc sleep empty");
+			rsleep(&q->empty);
+		}
+
+		u = q->q[q->r].u;
+		p = q->q[q->r].p;
+		creator = q->q[q->r].creator;
+		ms = q->q[q->r].ms;
+
+		q->r = (q->r + 1) & (MaxLumpQ - 1);
+		trace(TraceProc, "queueproc wakeup flush");
+		rwakeupall(&q->flush);
+
+		trace(TraceProc, "queueproc wakeup full");
+		rwakeup(&q->full);
+
+		qunlock(&q->lock);
+
+		trace(TraceProc, "queueproc writelump %V", u->score);
+		if(writeqlump(u, p, creator, ms) < 0)
+			fprint(2, "failed to write lump for %V: %r", u->score);
+		trace(TraceProc, "queueproc wrotelump %V", u->score);
+
+		putlump(u);
+	}
+}
diff --git a/src/cmd/venti/srv/mkfile b/src/cmd/venti/srv/mkfile
new file mode 100644
index 0000000..2fd4508
--- /dev/null
+++ b/src/cmd/venti/srv/mkfile
@@ -0,0 +1,146 @@
+<$PLAN9/src/mkhdr
+CC=9c
+
+AR=ar
+
+LIBOFILES=\
+	arena.$O\
+	arenas.$O\
+	bloom.$O\
+	buildbuck.$O\
+	clump.$O\
+	config.$O\
+	conv.$O\
+	dcache.$O\
+	dump.$O\
+	graph.$O\
+	httpd.$O\
+	icache.$O\
+	icachewrite.$O\
+	ifile.$O\
+	index.$O\
+	lump.$O\
+	lumpcache.$O\
+	lumpqueue.$O\
+	part.$O\
+	png.$O\
+	round.$O\
+	score.$O\
+	sortientry.$O\
+	stats.$O\
+	syncarena.$O\
+	syncindex0.$O\
+	trace.$O\
+	unwhack.$O\
+	utils.$O\
+	unittoull.$O\
+	whack.$O\
+	xml.$O\
+	zblock.$O\
+	zeropart.$O\
+
+SLIB=libvs.a
+
+LIB=$SLIB
+
+HFILES=	dat.h\
+	fns.h\
+	stdinc.h\
+
+TARG=\
+	venti\
+	fmtarenas\
+	fmtbloom\
+	fmtisect\
+	fmtindex\
+	buildindex\
+	checkarenas\
+	checkindex\
+	clumpstats\
+	findscore\
+	rdarena\
+	wrarena\
+	syncindex\
+	printarena\
+	verifyarena\
+
+OFILES=
+
+BIN=$BIN/venti
+
+it:V: $O.venti
+
+$O.venti: # debugmalloc2.$O # debugmalloc.$O #_p9dir.$O debugmalloc.$O
+
+CLEANFILES=$CLEANFILES $SLIB
+
+<$PLAN9/src/mkmany
+
+$SLIB: $LIBOFILES
+	$AR rvc $SLIB $LIBOFILES
+
+# xml.c:D:	mkxml dat.h
+# 	./mkxml dat.h > xml.c
+
+ainstall:V: ${TARG:%=%.ainstall}
+
+%.ainstall:V:	$O.%
+	scp $prereq amsterdam:/usr/local/bin/venti/$stem
+
+test:VQ: ${TARG:%=o.%}
+	slay o.venti|rc
+	vtmp=/home/tmp
+	echo '**********' FMTARENAS
+	./o.fmtarenas -a 40M -b 8k arenas $vtmp/arena
+	echo '**********' FMTBLOOM
+	./o.fmtbloom -s 10M $vtmp/bloom
+	echo '**********' FMTISECT
+	./o.fmtisect -b 8k isect $vtmp/isect
+	(
+		echo index main
+		echo isect $vtmp/isect
+		echo arenas $vtmp/arena
+		echo bloom $vtmp/bloom
+		echo webroot $HOME/src/venti/www
+		echo mem 64M
+		echo icmem 64M
+		echo bcmem 64M
+	) >vtmp.conf
+	echo '**********' FMTINDEX
+	./o.fmtindex vtmp.conf
+	echo '**********' VENTI
+	# ./o.venti -c vtmp.conf -B 64M -I 64M -C 64M -a 'tcp!*!17034' -h 'tcp!*!8001'  >a 2>&1 &
+	./o.venti -c vtmp.conf -a 'tcp!*!17034' -h 'tcp!*!8001'  >a 2>&1 &
+	sleep 5
+	echo '**********' VAC
+	venti='tcp!127.0.0.1!17034' export venti
+	9 time vac /usr/local/plan9 >a.vac
+	case ${websync:-no} in
+	yes)
+		echo '**********' SYNC VIA WEB
+		hget http://127.0.0.1:8001/flushdcache
+		hget http://127.0.0.1:8001/flushicache
+		hget http://127.0.0.1:8001/flushdcache
+		echo '**********' KILL VENTI
+		killall -9 o.venti
+		;;
+	no)
+		echo '**********' KILL VENTI
+		killall -9 o.venti
+		echo '**********' SYNCINDEX
+		./o.syncindex -B64M -I64M -f vtmp.conf
+		;;
+	esac
+	echo '**********' CHECKINDEX
+	./o.checkindex -B64M vtmp.conf /home/tmp/check >check.out
+	wc check.out
+
+luadisk.o: luadisk.c
+	gcc -c -ggdb -Wall -I/usr/include/lua50 luadisk.c
+
+libluadisk.so: luadisk.o
+	gcc -shared -o $target luadisk.o -llua50 -llualib50
+
+$O.xwrarena: xwrarena.$O
+	$LD -o $target xwrarena.$O 
+
diff --git a/src/cmd/venti/srv/part.c b/src/cmd/venti/srv/part.c
new file mode 100644
index 0000000..9b80b6e
--- /dev/null
+++ b/src/cmd/venti/srv/part.c
@@ -0,0 +1,383 @@
+#ifdef PLAN9PORT	/* SORRY! */
+#include <u.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#endif
+#include "stdinc.h"
+#include <ctype.h>
+#include "dat.h"
+#include "fns.h"
+
+u32int	maxblocksize;
+int	readonly;
+
+static int
+strtoullsuf(char *p, char **pp, int rad, u64int *u)
+{
+	u64int v;
+
+	if(!isdigit(*p))
+		return -1;
+	v = strtoull(p, &p, rad);
+	switch(*p){
+	case 'k':
+	case 'K':
+		v *= 1024;
+		p++;
+		break;
+	case 'm':
+	case 'M':
+		v *= 1024*1024;
+		p++;
+		break;
+	case 'g':
+	case 'G':
+		v *= 1024*1024*1024;
+		p++;
+		break;
+	case 't':
+	case 'T':
+		v *= 1024*1024;
+		v *= 1024*1024;
+		p++;
+		break;
+	}
+	*pp = p;
+	*u = v;
+	return 0;
+}
+	
+static int
+parsepart(char *name, char **file, u64int *lo, u64int *hi)
+{
+	char *p;
+
+	*file = estrdup(name);
+	if((p = strrchr(*file, ':')) == nil){
+		*lo = 0;
+		*hi = 0;
+		return 0;
+	}
+	*p++ = 0;
+	if(*p == '-')
+		*lo = 0;
+	else{
+		if(strtoullsuf(p, &p, 0, lo) < 0){
+			free(*file);
+			return -1;
+		}
+	}
+	if(*p == '-')
+		p++;
+	if(*p == 0){
+		*hi = 0;
+		return 0;
+	}
+	if(strtoullsuf(p, &p, 0, hi) < 0 || *p != 0){
+		free(*file);
+		return -1;
+	}
+	return 0;
+}
+
+Part*
+initpart(char *name, int mode)
+{
+	Part *part;
+	Dir *dir;
+	char *file;
+	u64int lo, hi;
+
+	if(parsepart(name, &file, &lo, &hi) < 0)
+		return nil;
+	trace(TraceDisk, "initpart %s file %s lo 0x%llx hi 0x%llx", name, file, lo, hi);
+	part = MKZ(Part);
+	part->name = estrdup(name);
+	part->filename = estrdup(file);
+	if(readonly){
+		mode &= (OREAD|OWRITE|ORDWR);
+		mode |= OREAD;
+	}
+	part->fd = open(file, mode);
+	if(part->fd < 0){
+		if((mode&(OREAD|OWRITE|ORDWR)) == ORDWR)
+			part->fd = open(file, (mode&~ORDWR)|OREAD);
+		if(part->fd < 0){
+			freepart(part);
+			fprint(2, "can't open partition='%s': %r\n", file);
+			seterr(EOk, "can't open partition='%s': %r", file);
+			fprint(2, "%r\n");
+			free(file);
+			return nil;
+		}
+		fprint(2, "warning: %s opened for reading only\n", name);
+	}
+	part->offset = lo;
+	dir = dirfstat(part->fd);
+	if(dir == nil){
+		freepart(part);
+		seterr(EOk, "can't stat partition='%s': %r", file);
+		free(file);
+		return nil;
+	}
+	if(dir->length == 0){
+		free(dir);
+		freepart(part);
+		seterr(EOk, "can't determine size of partition %s", file);
+		free(file);
+		return nil;
+	}
+	if(dir->length < hi || dir->length < lo){
+		freepart(part);
+		seterr(EOk, "partition '%s': bounds out of range (max %lld)", name, dir->length);
+		free(dir);
+		free(file);
+		return nil;
+	}
+	if(hi == 0)
+		hi = dir->length;
+	part->size = hi - part->offset;
+#ifdef _LIBC_H_
+	{
+		struct statfs sfs;
+		if(fstatfs(part->fd, &sfs) >= 0)
+			part->fsblocksize = sfs.f_bsize;
+	}
+#endif
+	free(dir);
+	return part;
+}
+
+void
+freepart(Part *part)
+{
+	if(part == nil)
+		return;
+	if(part->fd >= 0)
+		close(part->fd);
+	free(part->name);
+	free(part);
+}
+
+void
+partblocksize(Part *part, u32int blocksize)
+{
+	if(part->blocksize)
+		sysfatal("resetting partition=%s's block size", part->name);
+	part->blocksize = blocksize;
+	if(blocksize > maxblocksize)
+		maxblocksize = blocksize;
+}
+
+/*
+ * Read/write some amount of data between a block device or file and a memory buffer.
+ *
+ * Most Unix systems require that when accessing a block device directly,
+ * the buffer, offset, and count are all multiples of the device block size,
+ * making this a lot more complicated than it otherwise would be.
+ * 
+ * Most of our callers will make things easy on us, but for some callers it's best
+ * if we just do the work here, with only one place to get it right (hopefully).
+ * 
+ * If everything is aligned properly, prwb will try to do big transfers in the main 
+ * body of the loop: up to MaxIo bytes at a time.  If everything isn't aligned properly,
+ * we work one block at a time.
+ */
+#undef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+int
+prwb(char *name, int fd, int isread, u64int offset, void *vbuf, u32int count, u32int blocksize)
+{
+	char *op;
+	u8int *buf, *tmp, *freetmp, *dst;
+	u32int c, delta, icount, opsize;
+	int r;
+
+	buf = vbuf;
+	tmp = nil;
+	freetmp = nil;
+	icount = count;
+	opsize = blocksize;
+
+	if(count == 0){
+		logerr(EStrange, "pwrb %s called to %s 0 bytes", name, isread ? "read" : "write");
+		return 0;
+	}
+
+	assert(blocksize > 0);
+
+	/* allocate blocksize-aligned temp buffer if needed */
+	if((ulong)offset%blocksize || (ulong)buf%blocksize || count%blocksize){
+		if((freetmp = malloc(blocksize*2)) == nil)
+			return -1;
+		tmp = freetmp;
+		tmp += blocksize - (ulong)tmp%blocksize;
+	}
+
+	/* handle beginning fringe */
+	if((delta = (ulong)offset%blocksize) != 0){
+		assert(tmp != nil);
+		if((r=pread(fd, tmp, blocksize, offset-delta)) != blocksize){
+			dst = tmp;
+			offset = offset-delta;
+			op = "read";
+			goto Error;
+		}
+		c = min(count, blocksize-delta);
+		assert(c > 0 && c < blocksize);
+		if(isread)
+			memmove(buf, tmp+delta, c);
+		else{
+			memmove(tmp+delta, buf, c);
+			if((r=pwrite(fd, tmp, blocksize, offset-delta)) != blocksize){
+				dst = tmp;
+				offset = offset-delta;
+				op = "read";
+				goto Error;
+			}
+		}
+		assert(c > 0);
+		offset += c;
+		buf += c;
+		count -= c;
+	}
+
+	/* handle full blocks */
+	while(count >= blocksize){
+		assert((ulong)offset%blocksize == 0);
+		if((ulong)buf%blocksize){
+			assert(tmp != nil);
+			dst = tmp;
+			opsize = blocksize;
+		}else{
+			dst = buf;
+			opsize = count - count%blocksize;
+			if(opsize > MaxIo)
+				opsize = MaxIo;
+		}
+		if(isread){
+			if((r=pread(fd, dst, opsize, offset))<=0 || r%blocksize){
+				op = "read";
+				goto Error;
+			}
+			if(dst == tmp){
+				assert(r == blocksize);
+				memmove(buf, tmp, blocksize);
+			}
+		}else{
+			if(dst == tmp){
+				assert(opsize == blocksize);
+				memmove(dst, buf, blocksize);
+			}
+			if((r=pwrite(fd, dst, opsize, offset))<=0 || r%blocksize){
+				op = "write";
+				goto Error;
+			}
+			if(dst == tmp)
+				assert(r == blocksize);
+		}
+		assert(r > 0);
+		offset += r;
+		buf += r;
+		count -= r;
+	}
+
+	/* handle ending fringe */
+	if(count > 0){
+		assert((ulong)offset%blocksize == 0);
+		assert(tmp != nil);
+		/*
+		 * Complicated condition: if we're reading it's okay to get less than
+		 * a block as long as it's enough to satisfy the read - maybe this is
+		 * a normal file.  (We never write to normal files, or else things would
+		 * be even more complicated.)
+		 */
+		r = pread(fd, tmp, blocksize, offset);
+		if((isread && r < count) || (!isread && r != blocksize)){
+print("FAILED isread=%d r=%d count=%d blocksize=%d\n", isread, r, count, blocksize);
+			dst = tmp;
+			op = "read";
+			goto Error;
+		}
+		if(isread)
+			memmove(buf, tmp, count);
+		else{
+			memmove(tmp, buf, count);
+			if(pwrite(fd, tmp, opsize, offset) != blocksize){
+				dst = tmp;
+				op = "write";
+				goto Error;
+			}
+		}
+	}
+	if(freetmp)
+		free(freetmp);
+	return icount;
+
+Error:
+	seterr(EAdmin, "%s %s offset 0x%llux count %ud buf %p returned %d: %r",
+		op, name, offset, opsize, dst, r);
+	if(freetmp)
+		free(freetmp);
+	return -1;
+}
+
+int
+rwpart(Part *part, int isread, u64int offset, u8int *buf, u32int count)
+{
+	u32int blocksize;
+
+	trace(TraceDisk, "%s %s %ud at 0x%llx", 
+		isread ? "read" : "write", part->name, count, offset);
+	if(offset >= part->size || offset+count > part->size){
+		seterr(EStrange, "out of bounds %s offset 0x%llux count %ud to partition %s size 0x%llux",
+			isread ? "read" : "write", offset, count, part->name, part->size);
+		return -1;
+	}
+
+	blocksize = part->fsblocksize;
+	if(blocksize == 0)
+		blocksize = part->blocksize;
+	if(blocksize == 0)
+		blocksize = 4096;
+
+	return prwb(part->filename, part->fd, isread, part->offset+offset, buf, count, blocksize);
+}
+
+int
+readpart(Part *part, u64int offset, u8int *buf, u32int count)
+{
+	return rwpart(part, 1, offset, buf, count);
+}
+
+int
+writepart(Part *part, u64int offset, u8int *buf, u32int count)
+{
+	return rwpart(part, 0, offset, buf, count);
+}
+
+ZBlock*
+readfile(char *name)
+{
+	Part *p;
+	ZBlock *b;
+
+	p = initpart(name, OREAD);
+	if(p == nil)
+		return nil;
+	b = alloczblock(p->size, 0, p->blocksize);
+	if(b == nil){
+		seterr(EOk, "can't alloc %s: %r", name);
+		freepart(p);
+		return nil;
+	}
+	if(readpart(p, 0, b->data, p->size) < 0){
+		seterr(EOk, "can't read %s: %r", name);
+		freepart(p);
+		freezblock(b);
+		return nil;
+	}
+	freepart(p);
+	return b;
+}
+
diff --git a/src/cmd/venti/srv/png.c b/src/cmd/venti/srv/png.c
new file mode 100644
index 0000000..966b7e9
--- /dev/null
+++ b/src/cmd/venti/srv/png.c
@@ -0,0 +1,241 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+enum
+{
+	IDATSIZE	= 20000,
+	FilterNone = 0
+};
+
+typedef struct ZlibR ZlibR;
+typedef struct ZlibW ZlibW;
+
+struct ZlibR
+{
+	uchar *data;
+	int width;
+	int dx;
+	int dy;
+	int x;
+	int y;
+	int pixwid;
+};
+
+struct ZlibW
+{
+	Hio *io;
+	uchar *buf;
+	uchar *b;
+	uchar *e;
+};
+
+static ulong *crctab;
+static uchar PNGmagic[] = { 137, 'P', 'N', 'G', '\r', '\n', 26, '\n'};
+
+static void
+put4(uchar *a, ulong v)
+{
+	a[0] = v>>24;
+	a[1] = v>>16;
+	a[2] = v>>8;
+	a[3] = v;
+}
+
+static void
+chunk(Hio *io, char *type, uchar *d, int n)
+{
+	uchar buf[4];
+	ulong crc = 0;
+
+	if(strlen(type) != 4)
+		return;
+	put4(buf, n);
+	hwrite(io, buf, 4);
+	hwrite(io, type, 4);
+	hwrite(io, d, n);
+	crc = blockcrc(crctab, crc, type, 4);
+	crc = blockcrc(crctab, crc, d, n);
+	put4(buf, crc);
+	hwrite(io, buf, 4);
+}
+
+static int
+zread(void *va, void *buf, int n)
+{
+	int a, i, pixels, pixwid;
+	uchar *b, *e, *img;
+	ZlibR *z;
+
+	z = va;
+	pixwid = z->pixwid;
+	b = buf;
+	e = b+n;
+	while(b+pixwid <= e){
+		if(z->y >= z->dy)
+			break;
+		if(z->x == 0)
+			*b++ = FilterNone;
+		pixels = (e-b)/pixwid;
+		if(pixels > z->dx - z->x)
+			pixels = z->dx - z->x;
+		img = z->data + z->width*z->y + pixwid*z->x;
+		memmove(b, img, pixwid*pixels);
+		if(pixwid == 4){
+			/*
+			 * Convert to non-premultiplied alpha.
+			 */
+			for(i=0; i<pixels; i++, b+=4){
+				a = b[3];
+				if(a == 255 || a == 0)
+					;
+				else{
+					if(b[0] >= a)
+						b[0] = a;
+					b[0] = (b[0]*255)/a;
+					if(b[1] >= a)
+						b[1] = a;
+					b[1] = (b[1]*255)/a;
+					if(b[2] >= a)
+						b[2] = a;
+					b[2] = (b[2]*255)/a;
+				}
+			}
+		}else	
+			b += pixwid*pixels;
+
+		z->x += pixels;
+		if(z->x >= z->dx){
+			z->x = 0;
+			z->y++;
+		}
+	}
+	return b - (uchar*)buf;
+}
+
+static void
+IDAT(ZlibW *z)
+{
+	chunk(z->io, "IDAT", z->buf, z->b - z->buf);
+	z->b = z->buf;
+}
+
+static int
+zwrite(void *va, void *buf, int n)
+{
+	int m;
+	uchar *b, *e;
+	ZlibW *z;
+
+	z = va;
+	b = buf;
+	e = b+n;
+
+	while(b < e){
+		m = z->e - z->b;
+		if(m > e - b)
+			m = e - b;
+		memmove(z->b, b, m);
+		z->b += m;
+		b += m;
+		if(z->b >= z->e)
+			IDAT(z);
+	}
+	return n;
+}
+
+static Memimage*
+memRGBA(Memimage *i)
+{
+	Memimage *ni;
+	char buf[32];
+	ulong dst;
+	
+	/*
+	 * [A]BGR because we want R,G,B,[A] in big-endian order.  Sigh.
+	 */
+	chantostr(buf, i->chan);
+	if(strchr(buf, 'a'))
+		dst = ABGR32;
+	else
+		dst = BGR24;
+		
+	if(i->chan == dst)
+		return i;
+
+	qlock(&memdrawlock);
+	ni = allocmemimage(i->r, dst);
+	if(ni)
+		memimagedraw(ni, ni->r, i, i->r.min, nil, i->r.min, S);
+	qunlock(&memdrawlock);
+	return ni;
+}
+
+int
+writepng(Hio *io, Memimage *m)
+{
+	static int first = 1;
+	static QLock lk;
+	uchar buf[200], *h;
+	Memimage *rgb;
+	ZlibR zr;
+	ZlibW zw;
+
+	if(first){
+		qlock(&lk);
+		if(first){
+			deflateinit();
+			crctab = mkcrctab(0xedb88320);
+			first = 0;
+		}
+		qunlock(&lk);
+	}
+
+	rgb = memRGBA(m);
+	if(rgb == nil)
+		return -1;
+
+	hwrite(io, PNGmagic, sizeof PNGmagic);
+	
+	/* IHDR chunk */
+	h = buf;
+	put4(h, Dx(m->r)); h += 4;
+	put4(h, Dy(m->r)); h += 4;
+	*h++ = 8;	/* 8 bits per channel */
+	if(rgb->chan == BGR24)
+		*h++ = 2;		/* RGB */
+	else
+		*h++ = 6;		/* RGBA */
+	*h++ = 0;	/* compression - deflate */
+	*h++ = 0;	/* filter - none */
+	*h++ = 0;	/* interlace - none */
+	chunk(io, "IHDR", buf, h-buf);
+
+	/* image data */
+	zr.dx = Dx(m->r);
+	zr.dy = Dy(m->r);
+	zr.width = rgb->width * sizeof(ulong);
+	zr.data = rgb->data->bdata;
+	zr.x = 0;
+	zr.y = 0;
+	zr.pixwid = chantodepth(rgb->chan)/8;
+	zw.io = io;
+	zw.buf = vtmalloc(IDATSIZE);
+	zw.b = zw.buf;
+	zw.e = zw.b + IDATSIZE;
+	if(deflatezlib(&zw, zwrite, &zr, zread, 6, 0) < 0){
+		free(zw.buf);
+		return -1;
+	}
+	if(zw.b > zw.buf)
+		IDAT(&zw);
+	free(zw.buf);
+	chunk(io, "IEND", nil, 0);
+
+	if(m != rgb){
+		qlock(&memdrawlock);
+		freememimage(rgb);
+		qunlock(&memdrawlock);
+	}
+	return 0;
+}
diff --git a/src/cmd/venti/srv/printarena.c b/src/cmd/venti/srv/printarena.c
new file mode 100644
index 0000000..c305e81
--- /dev/null
+++ b/src/cmd/venti/srv/printarena.c
@@ -0,0 +1,130 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+void
+usage(void)
+{
+	fprint(2, "usage: printarena arenafile [offset]\n");
+	threadexitsall("usage");
+}
+
+static void
+rdarena(Arena *arena, u64int offset)
+{
+	u64int a, aa, e;
+	u32int magic;
+	Clump cl;
+	uchar score[VtScoreSize];
+	ZBlock *lump;
+
+	printarena(2, arena);
+
+	a = arena->base;
+	e = arena->base + arena->size;
+	if(offset != ~(u64int)0) {
+		if(offset >= e-a)
+			sysfatal("bad offset %llud >= %llud\n",
+				offset, e-a);
+		aa = offset;
+	} else
+		aa = 0;
+
+	for(; aa < e; aa += ClumpSize+cl.info.size) {
+		magic = clumpmagic(arena, aa);
+		if(magic == ClumpFreeMagic)
+			break;
+		if(magic != arena->clumpmagic) {
+			fprint(2, "illegal clump magic number %#8.8ux offset %llud\n",
+				magic, aa);
+			break;
+		}
+		lump = loadclump(arena, aa, 0, &cl, score, 0);
+		if(lump == nil) {
+			fprint(2, "clump %llud failed to read: %r\n", aa);
+			break;
+		}
+		if(cl.info.type != VtCorruptType) {
+			scoremem(score, lump->data, cl.info.uncsize);
+			if(scorecmp(cl.info.score, score) != 0) {
+				fprint(2, "clump %llud has mismatched score\n", aa);
+				break;
+			}
+			if(vttypevalid(cl.info.type) < 0) {
+				fprint(2, "clump %llud has bad type %d\n", aa, cl.info.type);
+				break;
+			}
+		}
+		print("%22llud %V %3d %5d\n", aa, score, cl.info.type, cl.info.uncsize);
+		freezblock(lump);
+	}
+	print("end offset %llud\n", aa);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	char *file;
+	Arena *arena;
+	u64int offset, aoffset;
+	Part *part;
+	Dir *d;
+	uchar buf[8192];
+	ArenaHead head;
+
+	readonly = 1;	/* for part.c */
+	aoffset = 0;
+	ARGBEGIN{
+	case 'o':
+		aoffset = strtoull(EARGF(usage()), 0, 0);
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	offset = ~(u64int)0;
+	switch(argc) {
+	default:
+		usage();
+	case 2:
+		offset = strtoull(argv[1], 0, 0);
+		/* fall through */
+	case 1:
+		file = argv[0];
+	}
+
+
+	ventifmtinstall();
+	statsinit();
+
+	if((d = dirstat(file)) == nil)
+		sysfatal("can't stat file %s: %r", file);
+
+	part = initpart(file, OREAD|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open file %s: %r", file);
+	if(readpart(part, aoffset, buf, sizeof buf) < 0)
+		sysfatal("can't read file %s: %r", file);
+
+	if(unpackarenahead(&head, buf) < 0)
+		sysfatal("corrupted arena header: %r");
+
+	print("# arena head version=%d name=%.*s blocksize=%d size=%lld clumpmagic=0x%.8ux\n",
+		head.version, ANameSize, head.name, head.blocksize,
+		head.size, head.clumpmagic);
+
+	if(aoffset+head.size > d->length)
+		sysfatal("arena is truncated: want %llud bytes have %llud\n",
+			head.size, d->length);
+
+	partblocksize(part, head.blocksize);
+	initdcache(8 * MaxDiskBlock);
+
+	arena = initarena(part, aoffset, head.size, head.blocksize);
+	if(arena == nil)
+		sysfatal("initarena: %r");
+
+	rdarena(arena, offset);
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/printarenas.c b/src/cmd/venti/srv/printarenas.c
new file mode 100644
index 0000000..111db01
--- /dev/null
+++ b/src/cmd/venti/srv/printarenas.c
@@ -0,0 +1,113 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include <bio.h>
+
+Biobuf bout;
+
+static void
+pie(IEntry *ie)
+{
+	Bprint(&bout, "%22lld %V %3d %5d\n",
+		ie->ia.addr, ie->score, ie->ia.type, ie->ia.size);
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: printarenas [-B blockcachesize] config [arenaname...]\n");
+	threadexitsall(0);
+}
+
+Config conf;
+
+int
+shoulddump(char *name, int argc, char **argv)
+{
+	int i;
+
+	if(argc == 0)
+		return 1;
+	for(i=0; i<argc; i++)
+		if(strcmp(name, argv[i]) == 0)
+			return 1;
+	return 0;
+}
+
+enum
+{
+	ClumpChunks = 32*1024,
+};
+
+void
+dumparena(Arena *arena, u64int a)
+{
+	IEntry ie;
+	ClumpInfo *ci, *cis;
+	u32int clump;
+	int i, n, nskip;
+
+	cis = MKN(ClumpInfo, ClumpChunks);
+	nskip = 0;
+	memset(&ie, 0, sizeof(IEntry));
+	for(clump = 0; clump < arena->memstats.clumps; clump += n){
+		n = ClumpChunks;
+		if(n > arena->memstats.clumps - clump)
+			n = arena->memstats.clumps - clump;
+		if(readclumpinfos(arena, clump, cis, n) != n){
+			fprint(2, "arena directory read failed: %r\n");
+			break;
+		}
+
+		for(i = 0; i < n; i++){
+			ci = &cis[i];
+			ie.ia.type = ci->type;
+			ie.ia.size = ci->uncsize;
+			ie.ia.addr = a;
+			a += ci->size + ClumpSize;
+			ie.ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
+			scorecp(ie.score, ci->score);
+			pie(&ie);
+		}
+	}
+	free(cis);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int i;
+	Index *ix;
+	u32int bcmem;
+
+	bcmem = 0;
+	ARGBEGIN{
+	case 'B':
+		bcmem = unittoull(ARGF());
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc < 1)
+		usage();
+
+	ventifmtinstall();
+
+	if(initventi(argv[0], &conf) < 0)
+		sysfatal("can't init venti: %r");
+
+	if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
+		bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
+	if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
+	initdcache(bcmem);
+
+	Binit(&bout, 1, OWRITE);
+	ix = mainindex;
+	for(i=0; i<ix->narenas; i++)
+		if(shoulddump(ix->arenas[i]->name, argc-1, argv+1))
+			dumparena(ix->arenas[i], ix->amap[i].start);
+	Bterm(&bout);
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/printindex.c b/src/cmd/venti/srv/printindex.c
new file mode 100644
index 0000000..edbcf79
--- /dev/null
+++ b/src/cmd/venti/srv/printindex.c
@@ -0,0 +1,99 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include <bio.h>
+
+Biobuf bout;
+
+static void
+pie(IEntry *ie)
+{
+	Bprint(&bout, "%22lld %V %3d %5d\n",
+		ie->ia.addr, ie->score, ie->ia.type, ie->ia.size);
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: printindex [-B blockcachesize] config [isectname...]\n");
+	threadexitsall(0);
+}
+
+Config conf;
+
+int
+shoulddump(char *name, int argc, char **argv)
+{
+	int i;
+
+	if(argc == 0)
+		return 1;
+	for(i=0; i<argc; i++)
+		if(strcmp(name, argv[i]) == 0)
+			return 1;
+	return 0;
+}
+
+void
+dumpisect(ISect *is)
+{
+	int j;
+	uchar *buf;
+	u32int i;
+	u64int off;
+	IBucket ib;
+	IEntry ie;
+
+	buf = emalloc(is->blocksize);
+	for(i=0; i<is->blocks; i++){
+		off = is->blockbase+(u64int)is->blocksize*i;
+		if(readpart(is->part, off, buf, is->blocksize) < 0)
+			fprint(2, "read %s at 0x%llux: %r\n", is->part->name, off);
+		else{
+			unpackibucket(&ib, buf, is->bucketmagic);
+			for(j=0; j<ib.n; j++){
+				unpackientry(&ie, &ib.data[j*IEntrySize]);
+				pie(&ie);
+			}
+		}
+	}
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int i;
+	Index *ix;
+	u32int bcmem;
+
+	bcmem = 0;
+	ARGBEGIN{
+	case 'B':
+		bcmem = unittoull(ARGF());
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc < 1)
+		usage();
+
+	fmtinstall('H', encodefmt);
+
+	if(initventi(argv[0], &conf) < 0)
+		sysfatal("can't init venti: %r");
+
+	if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
+		bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
+	if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
+	initdcache(bcmem);
+
+	ix = mainindex;
+	Binit(&bout, 1, OWRITE);
+	for(i=0; i<ix->nsects; i++)
+		if(shoulddump(ix->sects[i]->name, argc-1, argv+1))
+			dumpisect(ix->sects[i]);
+	Bterm(&bout);
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/printmap.c b/src/cmd/venti/srv/printmap.c
new file mode 100644
index 0000000..f3392ef
--- /dev/null
+++ b/src/cmd/venti/srv/printmap.c
@@ -0,0 +1,42 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+void
+usage(void)
+{
+	fprint(2, "usage: printmap [-B blockcachesize] config\n");
+	threadexitsall("usage");
+}
+
+Config conf;
+
+void
+threadmain(int argc, char *argv[])
+{
+	u32int bcmem;
+	int fix;
+
+	fix = 0;
+	bcmem = 0;
+	ARGBEGIN{
+	case 'B':
+		bcmem = unittoull(ARGF());
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(!fix)
+		readonly = 1;
+
+	if(argc != 1)
+		usage();
+
+	if(initventi(argv[0], &conf) < 0)
+		sysfatal("can't init venti: %r");
+
+	printindex(1, mainindex);
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/rdarena.c b/src/cmd/venti/srv/rdarena.c
new file mode 100644
index 0000000..909cc20
--- /dev/null
+++ b/src/cmd/venti/srv/rdarena.c
@@ -0,0 +1,91 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static int	verbose;
+
+void
+usage(void)
+{
+	fprint(2, "usage: rdarena [-v] arenapart arena\n");
+	threadexitsall(0);
+}
+
+static void
+rdarena(Arena *arena)
+{
+	ZBlock *b;
+	u64int a, e;
+	u32int bs;
+
+	fprint(2, "copying %s to standard output\n", arena->name);
+	printarena(2, arena);
+
+	bs = MaxIoSize;
+	if(bs < arena->blocksize)
+		bs = arena->blocksize;
+
+	b = alloczblock(bs, 0, arena->blocksize);
+	e = arena->base + arena->size + arena->blocksize;
+	for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
+		if(a + bs > e)
+			bs = arena->blocksize;
+		if(readpart(arena->part, a, b->data, bs) < 0)
+			fprint(2, "can't copy %s, read at %lld failed: %r\n", arena->name, a);	
+		if(write(1, b->data, bs) != bs)
+			sysfatal("can't copy %s, write at %lld failed: %r", arena->name, a);
+	}
+
+	freezblock(b);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	ArenaPart *ap;
+	Part *part;
+	char *file, *aname;
+	int i;
+
+	ventifmtinstall();
+	statsinit();
+
+	ARGBEGIN{
+	case 'v':
+		verbose++;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	readonly = 1;
+
+	if(argc != 2)
+		usage();
+
+	file = argv[0];
+	aname = argv[1];
+
+	part = initpart(file, OREAD|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open partition %s: %r", file);
+
+	ap = initarenapart(part);
+	if(ap == nil)
+		sysfatal("can't initialize arena partition in %s: %r", file);
+
+	if(verbose)
+		printarenapart(2, ap);
+
+	initdcache(8 * MaxDiskBlock);
+
+	for(i = 0; i < ap->narenas; i++){
+		if(strcmp(ap->arenas[i]->name, aname) == 0){
+			rdarena(ap->arenas[i]);
+			threadexitsall(0);
+		}
+	}
+
+	sysfatal("couldn't find arena %s\n", aname);
+}
diff --git a/src/cmd/venti/srv/round.c b/src/cmd/venti/srv/round.c
new file mode 100644
index 0000000..bbf4a47
--- /dev/null
+++ b/src/cmd/venti/srv/round.c
@@ -0,0 +1,102 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+void
+waitforkick(Round *r)
+{
+	int n;
+
+	qlock(&r->lock);
+	r->last = r->current;
+	assert(r->current+1 == r->next);
+	rwakeupall(&r->finish);
+	while(!r->doanother)
+		rsleep(&r->start);
+	n = r->next++;
+	r->current = n;
+	r->doanother = 0;
+	qunlock(&r->lock);
+}
+
+static void
+_kickround(Round *r, int wait)
+{
+	int n;
+
+	if(!r->doanother)
+		trace(TraceProc, "kick %s", r->name);
+	r->doanother = 1;
+	rwakeup(&r->start);
+	if(wait){
+		n = r->next;
+		while((int)(n - r->last) > 0){
+			r->doanother = 1;
+			rwakeup(&r->start);
+			rsleep(&r->finish);
+		}
+	}
+}
+
+void
+kickround(Round *r, int wait)
+{
+	qlock(&r->lock);
+	_kickround(r, wait);
+	qunlock(&r->lock);
+}
+
+void
+initround(Round *r, char *name, int delay)
+{
+	memset(r, 0, sizeof *r);
+	r->name = name;
+	r->start.l = &r->lock;
+	r->finish.l = &r->lock;
+	r->delaywait.l = &r->lock;
+	r->last = 0;
+	r->current = 0;
+	r->next = 1;
+	r->doanother = 0;
+	r->delaytime = delay;
+}
+
+void
+delaykickround(Round *r)
+{
+	qlock(&r->lock);
+	r->delaykick = 1;
+	rwakeup(&r->delaywait);
+	qunlock(&r->lock);
+}
+
+void
+delaykickroundproc(void *v)
+{
+	Round *r = v;
+	int n;
+
+	threadsetname("delaykickproc %s", r->name);
+	qlock(&r->lock);
+	for(;;){
+		while(r->delaykick == 0){
+			trace(TraceProc, "sleep");
+			rsleep(&r->delaywait);
+		}
+
+		n = r->next;
+		qunlock(&r->lock);
+
+		trace(TraceProc, "waitround 0x%ux", (uint)n);
+		sleep(r->delaytime);
+
+		qlock(&r->lock);
+		if(n == r->next){
+			trace(TraceProc, "kickround 0x%ux", (uint)n);
+			_kickround(r, 1);
+		}
+
+		trace(TraceProc, "finishround 0x%ux", (uint)n);
+	}
+}
+
diff --git a/src/cmd/venti/srv/score.c b/src/cmd/venti/srv/score.c
new file mode 100644
index 0000000..0809e84
--- /dev/null
+++ b/src/cmd/venti/srv/score.c
@@ -0,0 +1,43 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+u8int zeroscore[VtScoreSize];
+
+void
+scoremem(u8int *score, u8int *buf, int n)
+{
+	DigestState s;
+
+	memset(&s, 0, sizeof s);
+	sha1(buf, n, score, &s);
+}
+
+static int
+hexv(int c)
+{
+	if(c >= '0' && c <= '9')
+		return c - '0';
+	if(c >= 'a' && c <= 'f')
+		return c - 'a' + 10;
+	if(c >= 'A' && c <= 'F')
+		return c - 'A' + 10;
+	return -1;
+}
+
+int
+strscore(char *s, u8int *score)
+{
+	int i, c, d;
+
+	for(i = 0; i < VtScoreSize; i++){
+		c = hexv(s[2 * i]);
+		if(c < 0)
+			return -1;
+		d = hexv(s[2 * i + 1]);
+		if(d < 0)
+			return -1;
+		score[i] = (c << 4) + d;
+	}
+	return s[2 * i] == '\0';
+}
diff --git a/src/cmd/venti/srv/sortientry.c b/src/cmd/venti/srv/sortientry.c
new file mode 100644
index 0000000..e1fc24b
--- /dev/null
+++ b/src/cmd/venti/srv/sortientry.c
@@ -0,0 +1,376 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include <bio.h>
+
+typedef struct IEBuck	IEBuck;
+typedef struct IEBucks	IEBucks;
+
+enum
+{
+	ClumpChunks	= 32*1024
+};
+
+struct IEBuck
+{
+	u32int	head;		/* head of chain of chunks on the disk */
+	u32int	used;		/* usage of the last chunk */
+	u64int	total;		/* total number of bytes in this bucket */
+	u8int	*buf;		/* chunk of entries for this bucket */
+};
+
+struct IEBucks
+{
+	Part	*part;
+	u64int	off;		/* offset for writing data in the partition */
+	u32int	chunks;		/* total chunks written to fd */
+	u64int	max;		/* max bytes entered in any one bucket */
+	int	bits;		/* number of bits in initial bucket sort */
+	int	nbucks;		/* 1 << bits, the number of buckets */
+	u32int	size;		/* bytes in each of the buckets chunks */
+	u32int	usable;		/* amount usable for IEntry data */
+	u8int	*buf;		/* buffer for all chunks */
+	u8int	*xbuf;
+	IEBuck	*bucks;
+};
+
+#define	U32GET(p)	(((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3])
+#define	U32PUT(p,v)	(p)[0]=(v)>>24;(p)[1]=(v)>>16;(p)[2]=(v)>>8;(p)[3]=(v)
+
+static IEBucks	*initiebucks(Part *part, int bits, u32int size);
+static int	flushiebuck(IEBucks *ib, int b, int reset);
+static int	flushiebucks(IEBucks *ib);
+static u32int	sortiebuck(IEBucks *ib, int b);
+static u64int	sortiebucks(IEBucks *ib);
+static int	sprayientry(IEBucks *ib, IEntry *ie);
+static u32int	readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *b);
+static u32int	readiebuck(IEBucks *ib, int b);
+static void	freeiebucks(IEBucks *ib);
+
+/*
+ * build a sorted file with all IEntries which should be in ix.
+ * assumes the arenas' directories are up to date.
+ * reads each, converts the entries to index entries,
+ * and sorts them.
+ */
+u64int
+sortrawientries(Index *ix, Part *tmp, u64int *base, Bloom *bloom)
+{
+	IEBucks *ib;
+	u64int clumps, sorted;
+	u32int n;
+	int i, ok;
+
+//ZZZ should allow configuration of bits, bucket size
+	ib = initiebucks(tmp, 8, 64*1024);
+	if(ib == nil){
+		seterr(EOk, "can't create sorting buckets: %r");
+		return TWID64;
+	}
+	ok = 0;
+	clumps = 0;
+	fprint(2, "constructing entry list\n");
+	for(i = 0; i < ix->narenas; i++){
+		n = readarenainfo(ib, ix->arenas[i], ix->amap[i].start, bloom);
+		if(n == TWID32){
+			ok = -1;
+			break;
+		}
+		clumps += n;
+	}
+	fprint(2, "sorting %lld entries\n", clumps);
+	if(ok == 0){
+		sorted = sortiebucks(ib);
+		*base = (u64int)ib->chunks * ib->size;
+		if(sorted != clumps){
+			fprint(2, "sorting messed up: clumps=%lld sorted=%lld\n", clumps, sorted);
+			ok = -1;
+		}
+	}
+	freeiebucks(ib);
+	if(ok < 0)
+		return TWID64;
+	return clumps;
+}
+
+#define CHECK(cis)	if(((ulong*)cis)[-4] != 0xA110C09) xabort();
+
+void
+xabort(void)
+{
+	int *x;
+
+	x = 0;
+	*x = 0;
+}
+
+/*
+ * read in all of the arena's clump directory,
+ * convert to IEntry format, and bucket sort based
+ * on the first few bits.
+ */
+static u32int
+readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *b)
+{
+	IEntry ie;
+	ClumpInfo *ci, *cis;
+	u32int clump;
+	int i, n, ok, nskip;
+//	static Biobuf bout;
+
+//ZZZ remove fprint?
+//fprint(2, "ra %s %d %d\n", arena->name, arena->memstats.clumps, arena->diskstats.clumps);
+	if(arena->memstats.clumps)
+		fprint(2, "\tarena %s: %d entries\n", arena->name, arena->memstats.clumps);
+	else
+		fprint(2, "[%s] ", arena->name);
+
+	cis = MKN(ClumpInfo, ClumpChunks);
+	ok = 0;
+	nskip = 0;
+	memset(&ie, 0, sizeof(IEntry));
+//	Binit(&bout, 1, OWRITE);
+	for(clump = 0; clump < arena->memstats.clumps; clump += n){
+		n = ClumpChunks;
+		if(n > arena->memstats.clumps - clump)
+			n = arena->memstats.clumps - clump;
+		if(readclumpinfos(arena, clump, cis, n) != n){
+			seterr(EOk, "arena directory read failed: %r");
+			ok = -1;
+			break;
+		}
+
+		for(i = 0; i < n; i++){
+			ci = &cis[i];
+			ie.ia.type = ci->type;
+			ie.ia.size = ci->uncsize;
+			ie.ia.addr = a;
+			a += ci->size + ClumpSize;
+			ie.ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
+			scorecp(ie.score, ci->score);
+		//	Bprint(&bout, "%22lld %V %3d %5d\n",
+		//		ie.ia.addr, ie.score, ie.ia.type, ie.ia.size);
+			if(ci->type == VtCorruptType){
+			//	print("! %V %22lld %3d %5d %3d\n",
+			//		ie.score, ie.ia.addr, ie.ia.type, ie.ia.size, ie.ia.blocks);
+				nskip++;
+			}else
+				sprayientry(ib, &ie);
+			markbloomfilter(b, ie.score);
+		}
+	}
+//	Bterm(&bout);
+	free(cis);
+	if(ok < 0)
+		return TWID32;
+	return clump - nskip;
+}
+
+/*
+ * initialize the external bucket sorting data structures
+ */
+static IEBucks*
+initiebucks(Part *part, int bits, u32int size)
+{
+	IEBucks *ib;
+	int i;
+
+	ib = MKZ(IEBucks);
+	if(ib == nil){
+		seterr(EOk, "out of memory");
+		return nil;
+	}
+	ib->bits = bits;
+	ib->nbucks = 1 << bits;
+	ib->size = size;
+	ib->usable = (size - U32Size) / IEntrySize * IEntrySize;
+	ib->bucks = MKNZ(IEBuck, ib->nbucks);
+	if(ib->bucks == nil){
+		seterr(EOk, "out of memory allocation sorting buckets");
+		freeiebucks(ib);
+		return nil;
+	}
+	ib->xbuf = MKN(u8int, size * ((1 << bits)+1));
+	ib->buf = (u8int*)(((ulong)ib->xbuf+size-1)&~(ulong)(size-1));
+	if(ib->buf == nil){
+		seterr(EOk, "out of memory allocating sorting buckets' buffers");
+		freeiebucks(ib);
+		return nil;
+	}
+	for(i = 0; i < ib->nbucks; i++){
+		ib->bucks[i].head = TWID32;
+		ib->bucks[i].buf = &ib->buf[i * size];
+	}
+	ib->part = part;
+	return ib;
+}
+
+static void
+freeiebucks(IEBucks *ib)
+{
+	if(ib == nil)
+		return;
+	free(ib->bucks);
+	free(ib->buf);
+	free(ib);
+}
+
+/*
+ * initial sort: put the entry into the correct bucket
+ */
+static int
+sprayientry(IEBucks *ib, IEntry *ie)
+{
+	u32int n;
+	int b;
+
+	b = hashbits(ie->score, ib->bits);
+	n = ib->bucks[b].used;
+	if(n + IEntrySize > ib->usable){
+		/* should be flushed below, but if flush fails, this can happen */
+		seterr(EOk, "out of space in bucket");
+		return -1;
+	}
+	packientry(ie, &ib->bucks[b].buf[n]);
+	n += IEntrySize;
+	ib->bucks[b].used = n;
+	if(n + IEntrySize <= ib->usable)
+		return 0;
+	return flushiebuck(ib, b, 1);
+}
+
+/*
+ * finish sorting:
+ * for each bucket, read it in and sort it
+ * write out the the final file
+ */
+static u64int
+sortiebucks(IEBucks *ib)
+{
+	u64int tot;
+	u32int n;
+	int i;
+
+	if(flushiebucks(ib) < 0)
+		return TWID64;
+	for(i = 0; i < ib->nbucks; i++)
+		ib->bucks[i].buf = nil;
+	ib->off = (u64int)ib->chunks * ib->size;
+	free(ib->xbuf);
+if(0){
+	fprint(2, "ib->max = %lld\n", ib->max);
+	fprint(2, "ib->chunks = %ud\n", ib->chunks);
+}
+	ib->buf = MKN(u8int, ib->max + U32Size);
+	if(ib->buf == nil){
+		seterr(EOk, "out of memory allocating final sorting buffer; try more buckets");
+		return TWID64;
+	}
+	tot = 0;
+	for(i = 0; i < ib->nbucks; i++){
+		n = sortiebuck(ib, i);
+		if(n == TWID32)
+			return TWID64;
+		if(n != ib->bucks[i].total/IEntrySize)
+			fprint(2, "bucket %d changed count %d => %d\n", 
+				i, (int)(ib->bucks[i].total/IEntrySize), n);
+		tot += n;
+	}
+	return tot;
+	return 0;
+}
+
+/*
+ * sort from bucket b of ib into the output file to
+ */
+static u32int
+sortiebuck(IEBucks *ib, int b)
+{
+	u32int n;
+
+	n = readiebuck(ib, b);
+	if(n == TWID32)
+		return TWID32;
+	qsort(ib->buf, n, IEntrySize, ientrycmp);
+	if(writepart(ib->part, ib->off, ib->buf, n * IEntrySize) < 0){
+		seterr(EOk, "can't write sorted bucket: %r");
+		return TWID32;
+	}
+	ib->off += n * IEntrySize;
+	return n;
+}
+
+/*
+ * write out a single bucket
+ */
+static int
+flushiebuck(IEBucks *ib, int b, int reset)
+{
+	u32int n;
+
+	if(ib->bucks[b].used == 0)
+		return 0;
+	n = ib->bucks[b].used;
+	U32PUT(&ib->bucks[b].buf[n], ib->bucks[b].head);
+	n += U32Size;
+	USED(n);
+	if(writepart(ib->part, (u64int)ib->chunks * ib->size, ib->bucks[b].buf, ib->size) < 0){
+		seterr(EOk, "can't write sorting bucket to file: %r");
+xabort();
+		return -1;
+	}
+	ib->bucks[b].head = ib->chunks++;
+	ib->bucks[b].total += ib->bucks[b].used;
+	if(reset)
+		ib->bucks[b].used = 0;
+	return 0;
+}
+
+/*
+ * write out all of the buckets, and compute
+ * the maximum size of any bucket
+ */
+static int
+flushiebucks(IEBucks *ib)
+{
+	int i;
+
+	for(i = 0; i < ib->nbucks; i++){
+		if(flushiebuck(ib, i, 0) < 0)
+			return -1;
+		if(ib->bucks[i].total > ib->max)
+			ib->max = ib->bucks[i].total;
+	}
+	return 0;
+}
+
+/*
+ * read in the chained buffers for bucket b,
+ * and return it's total number of IEntries
+ */
+static u32int
+readiebuck(IEBucks *ib, int b)
+{
+	u32int head, m, n;
+
+	head = ib->bucks[b].head;
+	n = 0;
+	m = ib->bucks[b].used;
+	if(m == 0)
+		m = ib->usable;
+//	if(ib->bucks[b].total)
+//		fprint(2, "\tbucket %d: %d entries\n", b, ib->bucks[b].total/IEntrySize);
+	while(head != TWID32){
+		if(readpart(ib->part, (u64int)head * ib->size, &ib->buf[n], m + U32Size) < 0){
+			seterr(EOk, "can't read index sort bucket: %r");
+			return TWID32;
+		}
+		n += m;
+		head = U32GET(&ib->buf[n]);
+		m = ib->usable;
+	}
+	if(n != ib->bucks[b].total)
+		fprint(2, "\tbucket %d: expected %d entries, got %d\n",
+			b, (int)ib->bucks[b].total/IEntrySize, n/IEntrySize);
+	return n / IEntrySize;
+}
diff --git a/src/cmd/venti/srv/stats.c b/src/cmd/venti/srv/stats.c
new file mode 100644
index 0000000..874f7d2
--- /dev/null
+++ b/src/cmd/venti/srv/stats.c
@@ -0,0 +1,212 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+int collectstats = 1;
+
+/* keep in sync with dat.h:/NStat */
+Statdesc statdesc[NStat] =
+{
+	{ "rpc total", },
+	{ "rpc reads", },
+	{ "rpc reads ok", },
+	{ "rpc reads failed", },
+	{ "rpc read bytes", },
+	{ "rpc read time", },
+	{ "rpc read cached", },
+	{ "rpc read cached time", },
+	{ "rpc read uncached", },
+	{ "rpc read uncached time "},
+	
+	{ "rpc writes", },
+	{ "rpc writes new", },
+	{ "rpc writes old", },
+	{ "rpc writes failed", },
+	{ "rpc write bytes", },
+	{ "rpc write time", },
+	{ "rpc write new time", },
+	{ "rpc write old time", },
+
+	{ "lump cache hits", },
+	{ "lump cache misses", },
+	{ "lump cache reads", },
+	{ "lump cache writes", },
+	{ "lump cache size", },
+	{ "lump cache stall", },
+	{ "lump cache read time", },
+
+	{ "disk cache hits", },
+	{ "disk cache misses", },
+	{ "disk cache lookups", },
+	{ "disk cache reads", },
+	{ "disk cache writes", },
+	{ "disk cache dirty", },
+	{ "disk cache size", },
+	{ "disk cache flushes", },
+	{ "disk cache stalls", },
+	{ "disk cache lookup time", },
+
+	{ "disk block stalls", },
+	{ "lump stalls", },
+
+	{ "index cache hits", },
+	{ "index cache misses", },
+	{ "index cache reads", },
+	{ "index cache writes", },
+	{ "index cache fills", },
+	{ "index cache prefetches", },
+	{ "index cache dirty", },
+	{ "index cache size", },
+	{ "index cache flushes", },
+	{ "index cache stalls", },
+	{ "index cache read time", },
+
+	{ "bloom filter hits", },
+	{ "bloom filter misses", },
+	{ "bloom filter false misses", },
+	{ "bloom filter lookups", },
+	{ "bloom filter ones", },
+	{ "bloom filter bits", },
+	{ "bloom filter lookup time", },
+
+	{ "arena block reads", },
+	{ "arena block read bytes", },
+	{ "arena block writes", },
+	{ "arena block write bytes", },
+
+	{ "isect block reads", },
+	{ "isect block read bytes", },
+	{ "isect block writes", },
+	{ "isect block write bytes", },
+
+	{ "sum reads", },
+	{ "sum read bytes", },
+};
+
+QLock statslock;
+Stats stats;
+Stats *stathist;
+int nstathist;
+ulong statind;
+ulong stattime;
+
+void
+statsproc(void *v)
+{
+	USED(v);
+
+	for(;;){
+		stats.now = time(0);
+		stathist[stattime%nstathist] = stats;
+		stattime++;
+		sleep(1000);
+	}
+}
+
+void
+statsinit(void)
+{
+	nstathist = 90000;
+	stathist = MKNZ(Stats, nstathist);
+	vtproc(statsproc, nil);
+}
+
+void
+setstat(int index, long val)
+{
+	qlock(&statslock);
+	stats.n[index] = val;
+	qunlock(&statslock);
+}
+
+void
+addstat(int index, int inc)
+{
+	if(!collectstats)
+		return;
+	qlock(&statslock);
+	stats.n[index] += inc;
+	qunlock(&statslock);
+}
+
+void
+addstat2(int index, int inc, int index1, int inc1)
+{
+	if(!collectstats)
+		return;
+	qlock(&statslock);
+	stats.n[index] += inc;
+	stats.n[index1] += inc1;
+	qunlock(&statslock);
+}
+
+void
+printstats(void)
+{
+}
+
+void
+binstats(long (*fn)(Stats *s0, Stats *s1, void *arg), void *arg,
+	long t0, long t1, Statbin *bin, int nbin)
+{
+	long t, xt0, te, v;
+	int i, j, lo, hi, m, oj;
+	vlong tot;
+	Statbin *b;
+	
+	t = stats.now;
+	
+	/* negative times mean relative to now. */
+	if(t0 <= 0)
+		t0 += t;
+	if(t1 <= 0)
+		t1 += t;
+	/* ten minute range if none given */
+	if(t1 <= t0)
+		t0 = t1 - 60*10;
+	if(0) fprint(2, "stats %ld-%ld\n", t0, t1);
+	
+	/* binary search to find t0-1 or close */
+	lo = stattime;
+	hi = stattime+nstathist;
+	while(lo+1 < hi){
+		m = (lo+hi)/2;
+		if(stathist[m%nstathist].now >= t0)
+			hi = m;
+		else
+			lo = m;
+	}
+	xt0 = stathist[lo%nstathist].now;
+	if(0) fprint(2, "bsearch found %ld\n", xt0);
+	if(xt0 >= t1){
+		/* no samples */
+		memset(bin, 0, nbin*sizeof bin[0]);
+		return;
+	}
+
+	hi = stattime+nstathist;
+	te = t0;
+	j = lo+1;
+	for(i=0; i<nbin; i++){
+		t = te;
+		te = t0 + (t1-t0)*i/nbin;
+		b = &bin[i];
+		memset(b, 0, sizeof *b);
+		tot = 0;
+		oj = j;
+		for(; j<hi && stathist[j%nstathist].now<te; j++){
+			v = fn(&stathist[(j-1)%nstathist], &stathist[j%nstathist], arg);
+			if(b->nsamp==0 || v < b->min)
+				b->min = v;
+			if(b->nsamp==0 || v > b->max)
+				b->max = v;
+			tot += v;
+			b->nsamp++;
+		}
+		if(0) fprint(2, "bin%d: %ld to %ld; %d to %d - %d samples\n", i, t, te, oj, j, b->nsamp);
+		if(b->nsamp)
+			b->avg = tot / b->nsamp;
+		if(b->nsamp==0 && i>0)
+			*b = bin[i-1];
+	}	
+}
diff --git a/src/cmd/venti/srv/stdinc.h b/src/cmd/venti/srv/stdinc.h
new file mode 100644
index 0000000..3fd06cc
--- /dev/null
+++ b/src/cmd/venti/srv/stdinc.h
@@ -0,0 +1,9 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <flate.h>
+#include <libsec.h>
+#include <thread.h>
+#include <httpd.h>
+#include <draw.h>
+#include <memdraw.h>
diff --git a/src/cmd/venti/srv/syncarena.c b/src/cmd/venti/srv/syncarena.c
new file mode 100644
index 0000000..d11ca4f
--- /dev/null
+++ b/src/cmd/venti/srv/syncarena.c
@@ -0,0 +1,174 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static int	writeclumphead(Arena *arena, u64int aa, Clump *cl);
+static int	writeclumpmagic(Arena *arena, u64int aa, u32int magic);
+
+int
+clumpinfocmp(ClumpInfo *c, ClumpInfo *d)
+{
+	return c->type != d->type
+		|| c->size != d->size
+		|| c->uncsize != d->uncsize
+		|| scorecmp(c->score, d->score)!=0;
+}
+
+/*
+ * synchronize the clump info directory with
+ * with the clumps actually stored in the arena.
+ * the directory should be at least as up to date
+ * as the arena's trailer.
+ *
+ * checks/updates at most n clumps.
+ *
+ * returns 0 if ok, flags if error occurred
+ */
+int
+syncarena(Arena *arena, u64int start, u32int n, int zok, int fix)
+{
+	ZBlock *lump;
+	Clump cl;
+	ClumpInfo ci;
+	static ClumpInfo zci = { .type = -1 };
+	u8int score[VtScoreSize];
+	u64int uncsize, used, aa;
+	u32int clump, clumps, cclumps, magic;
+	int err, flush, broken;
+	AState as;
+
+	used = arena->memstats.used;
+	clumps = arena->memstats.clumps;
+	cclumps = arena->memstats.cclumps;
+	uncsize = arena->memstats.uncsize;
+	trace(TraceProc, "syncarena start");
+	flush = 0;
+	err = 0;
+	for(; n; n--){
+		aa = arena->memstats.used;
+		clump = arena->memstats.clumps;
+		magic = clumpmagic(arena, aa);
+		if(magic == ClumpFreeMagic)
+			break;
+		if(magic != arena->clumpmagic){
+			fprint(2, "%s: illegal clump magic number=%#8.8ux at clump=%d\n", arena->name, magic, clump);
+			/* err |= SyncDataErr; */
+			if(fix && writeclumpmagic(arena, aa, ClumpFreeMagic) < 0){
+				fprint(2, "can't write corrected clump free magic: %r");
+				err |= SyncFixErr;
+			}
+			break;
+		}
+
+		broken = 0;
+		lump = loadclump(arena, aa, 0, &cl, score, 0);
+		if(lump == nil){
+			fprint(2, "%s: clump=%d failed to read correctly: %r\n", arena->name, clump);
+			break;
+			err |= SyncDataErr;
+		}else if(cl.info.type != VtCorruptType){
+			scoremem(score, lump->data, cl.info.uncsize);
+			if(scorecmp(cl.info.score, score) != 0){
+				/* ignore partially written block */
+				if(cl.encoding == ClumpENone)
+					break;
+				fprint(2, "%s: clump=%d has mismatched score\n", arena->name, clump);
+				err |= SyncDataErr;
+				broken = 1;
+			}else if(vttypevalid(cl.info.type) < 0){
+				fprint(2, "%s: clump=%d has invalid type %d", arena->name, clump, cl.info.type);
+				err |= SyncDataErr;
+				broken = 1;
+			}
+			if(broken && fix){
+				cl.info.type = VtCorruptType;
+				if(writeclumphead(arena, aa, &cl) < 0){
+					fprint(2, "%s: can't write corrected clump header: %r", arena->name);
+					err |= SyncFixErr;
+				}
+			}
+		}
+		freezblock(lump);
+		arena->memstats.used += ClumpSize + cl.info.size;
+
+		arena->memstats.clumps++;
+		if(!broken && readclumpinfo(arena, clump, &ci)<0){
+			fprint(2, "%s: arena directory read failed\n", arena->name);
+			broken = 1;
+		}else if(!broken && clumpinfocmp(&ci, &cl.info)!=0){
+			if(clumpinfocmp(&ci, &zci) == 0){
+				err |= SyncCIZero;
+				if(!zok)
+					fprint(2, "%s: unwritten clump info for clump=%d\n", arena->name, clump);
+			}else{
+				err |= SyncCIErr;
+				fprint(2, "%s: bad clump info for clump=%d\n", arena->name, clump);
+				fprint(2, "\texpected score=%V type=%d size=%d uncsize=%d\n",
+					cl.info.score, cl.info.type, cl.info.size, cl.info.uncsize);
+				fprint(2, "\tfound score=%V type=%d size=%d uncsize=%d\n",
+					ci.score, ci.type, ci.size, ci.uncsize);
+			}
+			broken = 1;
+		}
+		if(broken && fix){
+			flush = 1;
+			ci = cl.info;
+			if(writeclumpinfo(arena, clump, &ci) < 0){
+				fprint(2, "%s: can't write correct clump directory: %r\n", arena->name);
+				err |= SyncFixErr;
+			}
+		}
+		trace(TraceProc, "syncarena unindexed clump %V %d", cl.info.score, arena->memstats.clumps);
+
+		arena->memstats.uncsize += cl.info.uncsize;
+		if(cl.info.size < cl.info.uncsize)
+			arena->memstats.cclumps++;
+	}
+
+	if(flush){
+		trace(TraceProc, "syncarena flush");
+		arena->wtime = now();
+		if(arena->ctime == 0 && arena->memstats.clumps)
+			arena->ctime = arena->wtime;
+		flushdcache();
+	}
+
+	if(used != arena->memstats.used
+	|| clumps != arena->memstats.clumps
+	|| cclumps != arena->memstats.cclumps
+	|| uncsize != arena->memstats.uncsize)
+		err |= SyncHeader;
+	if(start && (err&SyncHeader)){
+		trace(TraceProc, "syncarena setdcachestate");
+		as.arena = arena;
+		as.aa = start+arena->memstats.used;
+		as.stats = arena->memstats;
+		setdcachestate(&as);
+	}
+
+	return err;
+}
+
+static int
+writeclumphead(Arena *arena, u64int aa, Clump *cl)
+{
+	ZBlock *zb;
+	int bad;
+
+	zb = alloczblock(ClumpSize, 0, arena->blocksize);
+	if(zb == nil)
+		return -1;
+	bad = packclump(cl, zb->data, arena->clumpmagic)<0
+		|| writearena(arena, aa, zb->data, ClumpSize) != ClumpSize;
+	freezblock(zb);
+	return bad ? -1 : 0;
+}
+
+static int
+writeclumpmagic(Arena *arena, u64int aa, u32int magic)
+{
+	u8int buf[U32Size];
+
+	packmagic(magic, buf);
+	return writearena(arena, aa, buf, U32Size) == U32Size;
+}
diff --git a/src/cmd/venti/srv/syncindex.c b/src/cmd/venti/srv/syncindex.c
new file mode 100644
index 0000000..b35ca2a
--- /dev/null
+++ b/src/cmd/venti/srv/syncindex.c
@@ -0,0 +1,73 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static	int	verbose;
+void
+usage(void)
+{
+	fprint(2, "usage: syncindex [-fv] [-B blockcachesize] config\n");
+	threadexitsall("usage");
+}
+
+Config conf;
+
+void
+threadmain(int argc, char *argv[])
+{
+	u32int bcmem, icmem;
+	int fix;
+
+	fix = 0;
+	bcmem = 0;
+	icmem = 0;
+	ARGBEGIN{
+	case 'B':
+		bcmem = unittoull(EARGF(usage()));
+		break;
+	case 'I':
+		icmem = unittoull(EARGF(usage()));
+		break;
+	case 'f':
+		fix++;
+		break;
+	case 'v':
+		verbose++;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(!fix)
+		readonly = 1;
+
+	if(argc != 1)
+		usage();
+
+	if(initventi(argv[0], &conf) < 0)
+		sysfatal("can't init venti: %r");
+
+	if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
+		bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
+	if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
+	initdcache(bcmem);
+	initlumpcache(1*1024*1024, 1024/8);
+	icmem = u64log2(icmem / (sizeof(IEntry)+sizeof(IEntry*)) / ICacheDepth);
+	if(icmem < 4)
+		icmem = 4;
+	if(1) fprint(2, "initialize %d bytes of index cache for %d index entries\n",
+		(sizeof(IEntry)+sizeof(IEntry*)) * (1 << icmem) * ICacheDepth,
+		(1 << icmem) * ICacheDepth);
+	initicache(icmem, ICacheDepth);
+	initicachewrite();
+	if(mainindex->bloom)
+		startbloomproc(mainindex->bloom);
+
+	if(verbose)
+		printindex(2, mainindex);
+	if(syncindex(mainindex, fix, 1, 0) < 0)
+		sysfatal("failed to sync index=%s: %r\n", mainindex->name);
+
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/syncindex0.c b/src/cmd/venti/srv/syncindex0.c
new file mode 100644
index 0000000..12b69ed
--- /dev/null
+++ b/src/cmd/venti/srv/syncindex0.c
@@ -0,0 +1,167 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+enum
+{
+	ClumpChunks	= 32*1024
+};
+
+static int missing, wrong;
+
+/*
+ * shell sort is plenty good enough
+ * because we're going to do a bunch of disk i/o's
+ */
+static void
+sortclumpinfo(ClumpInfo *ci, int *s, int n)
+{
+	int i, j, m, t;
+
+	for(m = (n + 3) / 5; m > 0; m = (m + 1) / 3){
+		for(i = n - m; i-- > 0;){
+			for(j = i + m; j < n; j += m){
+				if(memcmp(ci[s[j - m]].score, ci[s[j]].score, VtScoreSize) <= 0)
+					break;
+				t = s[j];
+				s[j] = s[j - m];
+				s[j - m] = t;
+			}
+		}
+	}
+}
+
+int
+syncarenaindex(Index *ix, Arena *arena, u32int clump, u64int a, int fix, int *pflush, int check)
+{
+	Packet *pack;
+	IEntry ie;
+	IAddr ia;
+	ClumpInfo *ci, *cis;
+	u64int *addrs;
+	int i, n, ok, *s, flush;
+
+	trace(TraceProc, "syncarenaindex enter");
+
+	flush = 0;
+	cis = MKN(ClumpInfo, ClumpChunks);
+	addrs = MKN(u64int, ClumpChunks);
+	s = MKN(int, ClumpChunks);
+	ok = 0;
+	for(; clump < arena->memstats.clumps; clump += n){
+		n = ClumpChunks;
+		if(n > arena->memstats.clumps - clump)
+			n = arena->memstats.clumps - clump;
+		n = readclumpinfos(arena, clump, cis, n);
+		if(n <= 0){
+			fprint(2, "arena directory read failed\n");
+			ok = -1;
+			break;
+		}
+
+		for(i = 0; i < n; i++){
+			addrs[i] = a;
+			a += cis[i].size + ClumpSize;
+			s[i] = i;
+		}
+
+		sortclumpinfo(cis, s, n);
+
+		for(i = 0; i < n; i++){
+			ci = &cis[s[i]];
+			ia.type = ci->type;
+			ia.size = ci->uncsize;
+			ia.addr = addrs[s[i]];
+			ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
+
+			if(!check)
+				goto Add;
+			if(loadientry(ix, ci->score, ci->type, &ie) < 0){
+				trace(TraceProc, "syncarenaindex missing block %V.%d", ci->score, ci->type);
+				missing++;
+			if(0)	fprint(2, "missing block type=%d score=%V\n", ci->type, ci->score);
+			}else if(iaddrcmp(&ia, &ie.ia) != 0){
+				trace(TraceProc, "syncarenaindex mismatched entry");
+				fprint(2, "\nmismatched index entry and clump at %d\n", clump + i);
+				fprint(2, "\tclump: type=%d size=%d blocks=%d addr=%lld\n", ia.type, ia.size, ia.blocks, ia.addr);
+				fprint(2, "\tindex: type=%d size=%d block=%d addr=%lld\n", ie.ia.type, ie.ia.size, ie.ia.blocks, ie.ia.addr);
+				pack = readlump(ie.score, ie.ia.type, ie.ia.size, nil);
+				packetfree(pack);
+				if(pack != nil){
+					fprint(2, "duplicated lump\n");
+					continue;
+				}
+				wrong++;
+			}else
+				continue;
+		Add:
+			if(!fix){
+				ok = -1;
+				continue;
+			}
+			flush = 1;
+			trace(TraceProc, "syncarenaindex insert %V", ci->score);
+			insertscore(ci->score, &ia, 1);
+		}
+
+		if(0 && clump / 1000 != (clump + n) / 1000)
+			fprint(2, ".");
+	}
+	free(cis);
+	free(addrs);
+	free(s);
+	if(flush){
+		flushdcache();
+		*pflush = 1;
+	}
+	return ok;
+}
+
+int
+syncindex(Index *ix, int fix, int mustflush, int check)
+{
+	Arena *arena;
+	u64int a;
+	u32int clump;
+	int i, e, e1, ok, ok1, flush;
+
+	ok = 0;
+	flush = 0;
+	for(i = 0; i < ix->narenas; i++){
+		trace(TraceProc, "syncindex start %d", i);
+		arena = ix->arenas[i];
+		clump = arena->memstats.clumps;
+		a = arena->memstats.used;
+		e = syncarena(arena, ix->amap[i].start, TWID32, fix, fix);
+		e1 = e;
+		if(fix)
+			e1 &= ~(SyncHeader|SyncCIZero|SyncCIErr);
+		if(e1 == SyncHeader)
+			fprint(2, "arena %s: header is out-of-date\n", arena->name);
+		if(e1)
+			ok = -1;
+		else{
+			ok1 = syncarenaindex(ix, arena, clump, a + ix->amap[i].start, fix, &flush, check);
+			if(ok1 < 0)
+				fprint(2, "syncarenaindex: %r\n");
+			if(fix && ok1==0 && (e & SyncHeader) && wbarena(arena) < 0)
+				fprint(2, "arena=%s header write failed: %r\n", arena->name);
+			ok |= ok1;
+		}
+	}
+	if(missing || wrong)
+		fprint(2, "syncindex: %d missing entries, %d wrong entries (flush=%d)\n", missing, wrong, flush);
+	if(fix && wbindex(ix) < 0){
+		fprint(2, "can't write back index header for %s: %r\n", ix->name);
+		return -1;
+	}
+	if(fix && flush){
+		flushdcache();
+		if(mustflush){
+			flushicache();
+			flushdcache();
+		}else
+			kickicache();
+	}
+	return ok;
+}
diff --git a/src/cmd/venti/srv/trace.c b/src/cmd/venti/srv/trace.c
new file mode 100644
index 0000000..f8669b3
--- /dev/null
+++ b/src/cmd/venti/srv/trace.c
@@ -0,0 +1,38 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+char TraceDisk[] = "disk";
+char TraceLump[] = "lump";
+char TraceBlock[] = "block";
+char TraceProc[] = "proc";
+char TraceWork[] = "work";
+char TraceQuiet[] = "quiet";
+char TraceRpc[] = "rpc";
+
+void
+trace(char *level, char *fmt, ...)
+{
+	char buf[512];
+	va_list arg;
+
+	if(level == nil || !ventilogging)
+		return;
+	va_start(arg, fmt);
+	vsnprint(buf, sizeof buf, fmt, arg);
+	va_end(arg);
+	vtlog(level, "<font size=-1>%T %s:</font> %s<br>\n",
+			threadgetname(), buf);
+	vtlog("all", "<font size=-1>%T <font color=#777777>%s</font> %s:</font> %s<br>\n",
+			level, threadgetname(), buf);
+}
+
+void
+traceinit(void)
+{
+}
+
+void
+settrace(char *trace)
+{
+}
diff --git a/src/cmd/venti/srv/unittoull.c b/src/cmd/venti/srv/unittoull.c
new file mode 100644
index 0000000..1f74117
--- /dev/null
+++ b/src/cmd/venti/srv/unittoull.c
@@ -0,0 +1,30 @@
+#include "stdinc.h"
+
+#define TWID64	((u64int)~(u64int)0)
+
+u64int
+unittoull(char *s)
+{
+	char *es;
+	u64int n;
+
+	if(s == nil)
+		return TWID64;
+	n = strtoul(s, &es, 0);
+	if(*es == 'k' || *es == 'K'){
+		n *= 1024;
+		es++;
+	}else if(*es == 'm' || *es == 'M'){
+		n *= 1024*1024;
+		es++;
+	}else if(*es == 'g' || *es == 'G'){
+		n *= 1024*1024*1024;
+		es++;
+	}else if(*es == 't' || *es == 'T'){
+		n *= 1024*1024;
+		n *= 1024*1024;
+	}
+	if(*es != '\0')
+		return TWID64;
+	return n;
+}
diff --git a/src/cmd/venti/srv/unwhack.c b/src/cmd/venti/srv/unwhack.c
new file mode 100644
index 0000000..5530bd0
--- /dev/null
+++ b/src/cmd/venti/srv/unwhack.c
@@ -0,0 +1,179 @@
+#include "stdinc.h"
+#include "whack.h"
+
+enum
+{
+	DMaxFastLen	= 7,
+	DBigLenCode	= 0x3c,		/* minimum code for large lenth encoding */
+	DBigLenBits	= 6,
+	DBigLenBase	= 1		/* starting items to encode for big lens */
+};
+
+static uchar lenval[1 << (DBigLenBits - 1)] =
+{
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	3, 3, 3, 3, 3, 3, 3, 3,
+	4, 4, 4, 4,
+	5,
+	6,
+	255,
+	255
+};
+
+static uchar lenbits[] =
+{
+	0, 0, 0,
+	2, 3, 5, 5,
+};
+
+static uchar offbits[16] =
+{
+	5, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 12, 13
+};
+
+static ushort offbase[16] =
+{
+	0, 0x20,
+	0x40, 0x60,
+	0x80, 0xc0,
+	0x100, 0x180,
+	0x200, 0x300,
+	0x400, 0x600,
+	0x800, 0xc00,
+	0x1000,
+	0x2000
+};
+
+void
+unwhackinit(Unwhack *uw)
+{
+	uw->err[0] = '\0';
+}
+
+int
+unwhack(Unwhack *uw, uchar *dst, int ndst, uchar *src, int nsrc)
+{
+	uchar *s, *d, *dmax, *smax, lit;
+	ulong uwbits, lithist;
+	int i, off, len, bits, use, code, uwnbits, overbits;
+
+	d = dst;
+	dmax = d + ndst;
+
+	smax = src + nsrc;
+	uwnbits = 0;
+	uwbits = 0;
+	overbits = 0;
+	lithist = ~0;
+	while(src < smax || uwnbits - overbits >= MinDecode){
+		while(uwnbits <= 24){
+			uwbits <<= 8;
+			if(src < smax)
+				uwbits |= *src++;
+			else
+				overbits += 8;
+			uwnbits += 8;
+		}
+
+		/*
+		 * literal
+		 */
+		len = lenval[(uwbits >> (uwnbits - 5)) & 0x1f];
+		if(len == 0){
+			if(lithist & 0xf){
+				uwnbits -= 9;
+				lit = (uwbits >> uwnbits) & 0xff;
+				lit &= 255;
+			}else{
+				uwnbits -= 8;
+				lit = (uwbits >> uwnbits) & 0x7f;
+				if(lit < 32){
+					if(lit < 24){
+						uwnbits -= 2;
+						lit = (lit << 2) | ((uwbits >> uwnbits) & 3);
+					}else{
+						uwnbits -= 3;
+						lit = (lit << 3) | ((uwbits >> uwnbits) & 7);
+					}
+					lit = (lit - 64) & 0xff;
+				}
+			}
+			if(d >= dmax){
+				snprint(uw->err, WhackErrLen, "too much output");
+				return -1;
+			}
+			*d++ = lit;
+			lithist = (lithist << 1) | (lit < 32) | (lit > 127);
+			continue;
+		}
+
+		/*
+		 * length
+		 */
+		if(len < 255)
+			uwnbits -= lenbits[len];
+		else{
+			uwnbits -= DBigLenBits;
+			code = ((uwbits >> uwnbits) & ((1 << DBigLenBits) - 1)) - DBigLenCode;
+			len = DMaxFastLen;
+			use = DBigLenBase;
+			bits = (DBigLenBits & 1) ^ 1;
+			while(code >= use){
+				len += use;
+				code -= use;
+				code <<= 1;
+				uwnbits--;
+				if(uwnbits < 0){
+					snprint(uw->err, WhackErrLen, "len out of range");
+					return -1;
+				}
+				code |= (uwbits >> uwnbits) & 1;
+				use <<= bits;
+				bits ^= 1;
+			}
+			len += code;
+
+			while(uwnbits <= 24){
+				uwbits <<= 8;
+				if(src < smax)
+					uwbits |= *src++;
+				else
+					overbits += 8;
+				uwnbits += 8;
+			}
+		}
+
+		/*
+		 * offset
+		 */
+		uwnbits -= 4;
+		bits = (uwbits >> uwnbits) & 0xf;
+		off = offbase[bits];
+		bits = offbits[bits];
+
+		uwnbits -= bits;
+		off |= (uwbits >> uwnbits) & ((1 << bits) - 1);
+		off++;
+
+		if(off > d - dst){
+			snprint(uw->err, WhackErrLen, "offset out of range: off=%d d=%ld len=%d nbits=%d", off, d - dst, len, uwnbits);
+			return -1;
+		}
+		if(d + len > dmax){
+			snprint(uw->err, WhackErrLen, "len out of range");
+			return -1;
+		}
+		s = d - off;
+		for(i = 0; i < len; i++)
+			d[i] = s[i];
+		d += len;
+	}
+	if(uwnbits < overbits){
+		snprint(uw->err, WhackErrLen, "compressed data overrun");
+		return -1;
+	}
+
+	len = d - dst;
+
+	return len;
+}
diff --git a/src/cmd/venti/srv/utils.c b/src/cmd/venti/srv/utils.c
new file mode 100644
index 0000000..03fd906
--- /dev/null
+++ b/src/cmd/venti/srv/utils.c
@@ -0,0 +1,252 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+int
+namecmp(char *s, char *t)
+{
+	return strncmp(s, t, ANameSize);
+}
+
+void
+namecp(char *dst, char *src)
+{
+	strncpy(dst, src, ANameSize - 1);
+	dst[ANameSize - 1] = '\0';
+}
+
+int
+nameok(char *name)
+{
+	char *t;
+	int c;
+
+	if(name == nil)
+		return -1;
+	for(t = name; c = *t; t++)
+		if(t - name >= ANameSize
+		|| c < ' ' || c >= 0x7f)
+			return -1;
+	return 0;
+}
+
+int
+stru32int(char *s, u32int *r)
+{
+	char *t;
+	u32int n, nn, m;
+	int c;
+
+	m = TWID32 / 10;
+	n = 0;
+	for(t = s; ; t++){
+		c = *t;
+		if(c < '0' || c > '9')
+			break;
+		if(n > m)
+			return -1;
+		nn = n * 10 + c - '0';
+		if(nn < n)
+			return -1;
+		n = nn;
+	}
+	*r = n;
+	return s != t && *t == '\0';
+}
+
+int
+stru64int(char *s, u64int *r)
+{
+	char *t;
+	u64int n, nn, m;
+	int c;
+
+	m = TWID64 / 10;
+	n = 0;
+	for(t = s; ; t++){
+		c = *t;
+		if(c < '0' || c > '9')
+			break;
+		if(n > m)
+			return -1;
+		nn = n * 10 + c - '0';
+		if(nn < n)
+			return -1;
+		n = nn;
+	}
+	*r = n;
+	return s != t && *t == '\0';
+}
+
+int
+vttypevalid(int type)
+{
+	return type < VtMaxType;
+}
+
+static char*
+logit(int severity, char *fmt, va_list args)
+{
+	char *s;
+
+	s = vsmprint(fmt, args);
+	if(s == nil)
+		return nil;
+	if(argv0 == nil)
+		fprint(2, "%s: err %d: %s\n", argv0, severity, s);
+	else
+		fprint(2, "err %d: %s\n", severity, s);
+	return s;
+}
+
+void
+seterr(int severity, char *fmt, ...)
+{
+	char *s;
+	va_list args;
+
+	va_start(args, fmt);
+	s = logit(severity, fmt, args);
+	va_end(args);
+	if(s == nil)
+		werrstr("error setting error");
+	else{
+		werrstr("%s", s);
+		free(s);
+	}
+}
+
+void
+logerr(int severity, char *fmt, ...)
+{
+	char *s;
+	va_list args;
+
+	va_start(args, fmt);
+	s = logit(severity, fmt, args);
+	va_end(args);
+	free(s);
+}
+
+u32int
+now(void)
+{
+	return time(nil);
+}
+
+int abortonmem = 1;
+
+void *
+emalloc(ulong n)
+{
+	void *p;
+
+	p = malloc(n);
+	if(p == nil){
+		if(abortonmem)
+			abort();
+		sysfatal("out of memory allocating %lud", n);
+	}
+	memset(p, 0xa5, n);
+if(0)print("emalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n));
+	return p;
+}
+
+void *
+ezmalloc(ulong n)
+{
+	void *p;
+
+	p = malloc(n);
+	if(p == nil){
+		if(abortonmem)
+			abort();
+		sysfatal("out of memory allocating %lud", n);
+	}
+	memset(p, 0, n);
+if(0)print("ezmalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n));
+	return p;
+}
+
+void *
+erealloc(void *p, ulong n)
+{
+	p = realloc(p, n);
+	if(p == nil){
+		if(abortonmem)
+			abort();
+		sysfatal("out of memory allocating %lud", n);
+	}
+if(0)print("erealloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&p));
+	return p;
+}
+
+char *
+estrdup(char *s)
+{
+	char *t;
+	int n;
+
+	n = strlen(s) + 1;
+	t = emalloc(n);
+	memmove(t, s, n);
+if(0)print("estrdup %p-%p by %lux\n", t, (char*)t+n, getcallerpc(&s));
+	return t;
+}
+
+/*
+ * return floor(log2(v))
+ */
+int
+u64log2(u64int v)
+{
+	int i;
+
+	for(i = 0; i < 64; i++)
+		if((v >> i) <= 1)
+			break;
+	return i;
+}
+
+int
+vtproc(void (*fn)(void*), void *arg)
+{
+	proccreate(fn, arg, 256*1024);
+	return 0;
+}
+
+int
+ientryfmt(Fmt *fmt)
+{
+	IEntry *ie;
+
+	ie = va_arg(fmt->args, IEntry*);
+	return fmtprint(fmt, "%V %22lld %3d %5d %3d",
+		ie->score, ie->ia.addr, ie->ia.type, ie->ia.size, ie->ia.blocks);
+}
+
+void
+ventifmtinstall(void)
+{
+	fmtinstall('F', vtfcallfmt);
+	fmtinstall('H', encodefmt);
+	fmtinstall('I', ientryfmt);
+	fmtinstall('V', vtscorefmt);
+}
+
+uint
+msec(void)
+{
+	return nsec()/1000000;
+}
+
+uint
+countbits(uint n)
+{
+	n = (n&0x55555555)+((n>>1)&0x55555555);
+	n = (n&0x33333333)+((n>>2)&0x33333333);
+	n = (n&0x0F0F0F0F)+((n>>4)&0x0F0F0F0F);
+	n = (n&0x00FF00FF)+((n>>8)&0x00FF00FF);
+	n = (n&0x0000FFFF)+((n>>16)&0x0000FFFF);
+	return n;
+}
diff --git a/src/cmd/venti/srv/venti.c b/src/cmd/venti/srv/venti.c
new file mode 100644
index 0000000..91ded95
--- /dev/null
+++ b/src/cmd/venti/srv/venti.c
@@ -0,0 +1,266 @@
+#ifdef PLAN9PORT
+#include <u.h>
+#include <signal.h>
+#endif
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "whack.h"
+
+int debug;
+int nofork;
+int mainstacksize = 256*1024;
+VtSrv *ventisrv;
+
+static void	ventiserver(void*);
+
+void
+usage(void)
+{
+	fprint(2, "usage: venti [-dw] [-a ventiaddress] [-h httpaddress] [-c config] [-C cachesize] [-I icachesize] [-B blockcachesize]\n");
+	threadexitsall("usage");
+}
+void
+threadmain(int argc, char *argv[])
+{
+	char *configfile, *haddr, *vaddr, *webroot;
+	u32int mem, icmem, bcmem, minbcmem;
+	Config config;
+
+	traceinit();
+	threadsetname("main");
+	vaddr = nil;
+	haddr = nil;
+	configfile = nil;
+	webroot = nil;
+	mem = 0;
+	icmem = 0;
+	bcmem = 0;
+	ARGBEGIN{
+	case 'a':
+		vaddr = EARGF(usage());
+		break;
+	case 'B':
+		bcmem = unittoull(EARGF(usage()));
+		break;
+	case 'c':
+		configfile = EARGF(usage());
+		break;
+	case 'C':
+		mem = unittoull(EARGF(usage()));
+		break;
+	case 'D':
+		settrace(EARGF(usage()));
+		break;
+	case 'd':
+		debug = 1;
+		nofork = 1;
+		break;
+	case 'h':
+		haddr = EARGF(usage());
+		break;
+	case 'I':
+		icmem = unittoull(EARGF(usage()));
+		break;
+	case 'L':
+		ventilogging = 1;
+		break;
+	case 's':
+		nofork = 1;
+		break;
+	case 'W':
+		webroot = EARGF(usage());
+		break;
+	default:
+		usage();
+	}ARGEND
+
+	if(argc)
+		usage();
+
+	if(!nofork)
+		rfork(RFNOTEG);
+
+#ifdef PLAN9PORT
+	{
+		/* sigh - needed to avoid signals when writing to hungup networks */
+		struct sigaction sa;
+		memset(&sa, 0, sizeof sa);
+		sa.sa_handler = SIG_IGN;
+		sigaction(SIGPIPE, &sa, nil);
+	}
+#endif
+
+	trace(TraceQuiet, "venti started");
+	fprint(2, "venti: ");
+
+	ventifmtinstall();
+	if(configfile == nil)
+		configfile = "venti.conf";
+
+	if(initarenasum() < 0)
+		fprint(2, "warning: can't initialize arena summing process: %r");
+
+	fprint(2, "conf...");
+	if(initventi(configfile, &config) < 0)
+		sysfatal("can't init server: %r");
+
+	if(mem == 0)
+		mem = config.mem;
+	if(bcmem == 0)
+		bcmem = config.bcmem;
+	if(icmem == 0)
+		icmem = config.icmem;
+	if(haddr == nil)
+		haddr = config.haddr;
+	if(vaddr == nil)
+		vaddr = config.vaddr;
+	if(vaddr == nil)
+		vaddr = "tcp!*!venti";
+	if(webroot == nil)
+		webroot = config.webroot;
+	if(queuewrites == 0)
+		queuewrites = config.queuewrites;
+
+	if(haddr){
+		fprint(2, "httpd %s...", haddr);
+		if(httpdinit(haddr, webroot) < 0)
+			fprint(2, "warning: can't start http server: %r");
+	}
+
+	fprint(2, "init...");
+
+	if(mem == 0xffffffffUL)
+		mem = 1 * 1024 * 1024;
+	if(0) fprint(2, "initialize %d bytes of lump cache for %d lumps\n",
+		mem, mem / (8 * 1024));
+	initlumpcache(mem, mem / (8 * 1024));
+
+	icmem = u64log2(icmem / (sizeof(IEntry)+sizeof(IEntry*)) / ICacheDepth);
+	if(icmem < 4)
+		icmem = 4;
+	if(0) fprint(2, "initialize %d bytes of index cache for %d index entries\n",
+		(sizeof(IEntry)+sizeof(IEntry*)) * (1 << icmem) * ICacheDepth,
+		(1 << icmem) * ICacheDepth);
+	initicache(icmem, ICacheDepth);
+	initicachewrite();
+
+	/*
+	 * need a block for every arena and every process
+	 */
+	minbcmem = maxblocksize * 
+		(mainindex->narenas + mainindex->nsects*4 + 16);
+	if(bcmem < minbcmem)
+		bcmem = minbcmem;
+
+	if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
+	initdcache(bcmem);
+
+	if(mainindex->bloom)
+		startbloomproc(mainindex->bloom);
+
+	fprint(2, "sync...");
+	if(syncindex(mainindex, 1, 0, 0) < 0)
+		sysfatal("can't sync server: %r");
+
+	if(queuewrites){
+		fprint(2, "queue...");
+		if(initlumpqueues(mainindex->nsects) < 0){
+			fprint(2, "can't initialize lump queues,"
+				" disabling write queueing: %r");
+			queuewrites = 0;
+		}
+	}
+
+	fprint(2, "announce %s...", vaddr);
+	ventisrv = vtlisten(vaddr);
+	if(ventisrv == nil)
+		sysfatal("can't announce %s: %r", vaddr);
+
+	fprint(2, "serving.\n");
+	if(nofork)
+		ventiserver(nil);
+	else
+		vtproc(ventiserver, nil);
+}
+
+static void
+vtrerror(VtReq *r, char *error)
+{
+	r->rx.msgtype = VtRerror;
+	r->rx.error = estrdup(error);
+}
+
+static void
+ventiserver(void *v)
+{
+	Packet *p;
+	VtReq *r;
+	char err[ERRMAX];
+	uint ms;
+	int cached, ok;
+
+	USED(v);
+	threadsetname("ventiserver");
+	trace(TraceWork, "start");
+	while((r = vtgetreq(ventisrv)) != nil){
+		trace(TraceWork, "finish");
+		trace(TraceWork, "start request %F", &r->tx);
+		trace(TraceRpc, "<- %F", &r->tx);
+		r->rx.msgtype = r->tx.msgtype+1;
+		addstat(StatRpcTotal, 1);
+	//	print("req (arenas[0]=%p sects[0]=%p) %F\n",
+	//		mainindex->arenas[0], mainindex->sects[0], &r->tx);
+		switch(r->tx.msgtype){
+		default:
+			vtrerror(r, "unknown request");
+			break;
+		case VtTread:
+			ms = msec();
+			r->rx.data = readlump(r->tx.score, r->tx.blocktype, r->tx.count, &cached);
+			ms = msec() - ms;
+			addstat2(StatRpcRead, 1, StatRpcReadTime, ms);
+			if(r->rx.data == nil){
+				addstat(StatRpcReadFail, 1);
+				rerrstr(err, sizeof err);
+				vtrerror(r, err);
+			}else{
+				addstat(StatRpcReadBytes, packetsize(r->rx.data));
+				addstat(StatRpcReadOk, 1);
+				if(cached)
+					addstat2(StatRpcReadCached, 1, StatRpcReadCachedTime, ms);
+				else
+					addstat2(StatRpcReadUncached, 1, StatRpcReadUncachedTime, ms);
+			}
+			break;
+		case VtTwrite:
+			p = r->tx.data;
+			r->tx.data = nil;
+			addstat(StatRpcWriteBytes, packetsize(p));
+			ms = msec();
+			ok = writelump(p, r->rx.score, r->tx.blocktype, 0, ms);
+			ms = msec() - ms;
+			addstat2(StatRpcWrite, 1, StatRpcWriteTime, ms);
+
+			if(ok < 0){
+				addstat(StatRpcWriteFail, 1);
+				rerrstr(err, sizeof err);
+				vtrerror(r, err);
+			}
+			break;
+		case VtTsync:
+			flushqueue();
+			flushdcache();
+			break;
+		}
+		trace(TraceRpc, "-> %F", &r->rx);
+		vtrespond(r);
+		trace(TraceWork, "start");
+	}
+	flushdcache();
+	flushicache();
+	threadexitsall(0);
+}
+
+
diff --git a/src/cmd/venti/srv/verifyarena.c b/src/cmd/venti/srv/verifyarena.c
new file mode 100644
index 0000000..5236c09
--- /dev/null
+++ b/src/cmd/venti/srv/verifyarena.c
@@ -0,0 +1,127 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static int	verbose;
+
+void
+usage(void)
+{
+	fprint(2, "usage: verifyarena [-v]\n");
+	threadexitsall(0);
+}
+
+static void
+readblock(uchar *buf, int n)
+{
+	int nr, m;
+
+	for(nr = 0; nr < n; nr += m){
+		m = n - nr;
+		m = read(0, &buf[nr], m);
+		if(m <= 0)
+			sysfatal("can't read arena from standard input: %r");
+	}
+}
+
+static void
+verifyarena(void)
+{
+	Arena arena;
+	ArenaHead head;
+	ZBlock *b;
+	DigestState s;
+	u64int n, e;
+	u32int bs;
+	u8int score[VtScoreSize];
+
+	fprint(2, "verify arena from standard input\n");
+
+	memset(&arena, 0, sizeof arena);
+	memset(&s, 0, sizeof s);
+
+	/*
+	 * read the little bit, which will included the header
+	 */
+	bs = MaxIoSize;
+	b = alloczblock(bs, 0, 0);
+	readblock(b->data, HeadSize);
+	sha1(b->data, HeadSize, nil, &s);
+	if(unpackarenahead(&head, b->data) < 0)
+		sysfatal("corrupted arena header: %r");
+	if(head.version != ArenaVersion4 && head.version != ArenaVersion5)
+		fprint(2, "warning: unknown arena version %d\n", head.version);
+
+	/*
+	 * now we know how much to read
+	 * read everything but the last block, which is special
+	 */
+	e = head.size - head.blocksize;
+	for(n = HeadSize; n < e; n += bs){
+		if(n + bs > e)
+			bs = e - n;
+		readblock(b->data, bs);
+		sha1(b->data, bs, nil, &s);
+	}
+
+	/*
+	 * read the last block update the sum.
+	 * the sum is calculated assuming the slot for the sum is zero.
+	 */
+	bs = head.blocksize;
+	readblock(b->data, bs);
+	sha1(b->data, bs-VtScoreSize, nil, &s);
+	sha1(zeroscore, VtScoreSize, nil, &s);
+	sha1(nil, 0, score, &s);
+
+	/*
+	 * validity check on the trailer
+	 */
+	arena.blocksize = head.blocksize;
+	if(unpackarena(&arena, b->data) < 0)
+		sysfatal("corrupted arena trailer: %r");
+	scorecp(arena.score, &b->data[arena.blocksize - VtScoreSize]);
+
+	if(namecmp(arena.name, head.name) != 0)
+		sysfatal("arena header and trailer names clash: %s vs. %s\n", head.name, arena.name);
+	if(arena.version != head.version)
+		sysfatal("arena header and trailer versions clash: %d vs. %d\n", head.version, arena.version);
+	arena.size = head.size - 2 * head.blocksize;
+
+	/*
+	 * check for no checksum or the same
+	 */
+	if(scorecmp(score, arena.score) != 0){
+		if(scorecmp(zeroscore, arena.score) != 0)
+			fprint(2, "warning: mismatched checksums for arena=%s, found=%V calculated=%V",
+				arena.name, arena.score, score);
+		scorecp(arena.score, score);
+	}else
+		fprint(2, "matched score\n");
+
+	printarena(2, &arena);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	ventifmtinstall();
+	statsinit();
+
+	ARGBEGIN{
+	case 'v':
+		verbose++;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	readonly = 1;
+
+	if(argc != 0)
+		usage();
+
+	verifyarena();
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/whack.c b/src/cmd/venti/srv/whack.c
new file mode 100644
index 0000000..ecd2903
--- /dev/null
+++ b/src/cmd/venti/srv/whack.c
@@ -0,0 +1,331 @@
+#include "stdinc.h"
+#include "whack.h"
+
+typedef struct Huff	Huff;
+int compressblocks = 1;
+
+enum
+{
+	MaxFastLen	= 9,
+	BigLenCode	= 0x1f4,	/* minimum code for large lenth encoding */
+	BigLenBits	= 9,
+	BigLenBase	= 4,		/* starting items to encode for big lens */
+
+	MinOffBits	= 6,
+	MaxOffBits	= MinOffBits + 8,
+
+	MaxLen		= 2051		/* max. length encodable in 24 bits */
+};
+
+enum
+{
+	StatBytes,
+	StatOutBytes,
+	StatLits,
+	StatMatches,
+	StatLitBits,
+	StatOffBits,
+	StatLenBits,
+
+	MaxStat
+};
+
+struct Huff
+{
+	short	bits;				/* length of the code */
+	ulong	encode;				/* the code */
+};
+
+static	Huff	lentab[MaxFastLen] =
+{
+	{2,	0x2},		/* 10 */
+	{3,	0x6},		/* 110 */
+	{5,	0x1c},		/* 11100 */
+	{5,	0x1d},		/* 11101 */
+	{6,	0x3c},		/* 111100 */
+	{7,	0x7a},		/* 1111010 */
+	{7,	0x7b},		/* 1111011 */
+	{8,	0xf8},		/* 11111000 */
+	{8,	0xf9},		/* 11111001 */
+};
+
+static int	thwmaxcheck;
+
+void
+whackinit(Whack *tw, int level)
+{
+	thwmaxcheck = (1 << level);
+	thwmaxcheck -= thwmaxcheck >> 2;
+	if(thwmaxcheck < 2)
+		thwmaxcheck = 2;
+	else if(thwmaxcheck > 1024)
+		thwmaxcheck = 1024;
+	memset(tw, 0, sizeof *tw);
+	tw->begin = 2 * WhackMaxOff;
+}
+
+/*
+ * find a string in the dictionary
+ */
+static int
+whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now)
+{
+	ushort then, off, last;
+	int bestoff, bestlen, check;
+	uchar *s, *t;
+
+	s = *ss;
+	if(esrc < s + MinMatch)
+		return -1;
+	if(s + MaxLen < esrc)
+		esrc = s + MaxLen;
+
+	bestoff = 0;
+	bestlen = 0;
+	check = thwmaxcheck;
+	last = 0;
+	for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){
+		off = now - then;
+		if(off <= last || off > WhackMaxOff)
+			break;
+
+		/*
+		 * don't need to check for the end because
+		 * 1) s too close check above
+		 */
+		t = s - off;
+		if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){
+			if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){
+				t += 3;
+				for(s += 3; s < esrc; s++){
+					if(*s != *t)
+						break;
+					t++;
+				}
+				if(s - *ss > bestlen){
+					bestlen = s - *ss;
+					bestoff = off;
+					if(bestlen > thwmaxcheck)
+						break;
+				}
+			}
+		}
+		s = *ss;
+		last = off;
+	}
+	*ss += bestlen;
+	return bestoff;
+}
+
+/*
+ * knuth vol. 3 multiplicative hashing
+ * each byte x chosen according to rules
+ * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
+ * with reasonable spread between the bytes & their complements
+ *
+ * the 3 byte value appears to be as almost good as the 4 byte value,
+ * and might be faster on some machines
+ */
+/*
+#define hashit(c)	((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)
+*/
+#define hashit(c)	(((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
+
+/*
+ * lz77 compression with single lookup in a hash table for each block
+ */
+int
+whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats])
+{
+	uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax;
+	ulong cont, code, wbits;
+	ushort now;
+	int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits;
+
+	if(!compressblocks || n < MinMatch)
+		return -1;
+
+	wdst = dst;
+	wdmax = dst + n;
+
+	now = w->begin;
+	s = src;
+	w->data = s;
+
+	cont = (s[0] << 16) | (s[1] << 8) | s[2];
+
+	esrc = s + n;
+	half = s + (n >> 1);
+	wnbits = 0;
+	wbits = 0;
+	lits = 0;
+	matches = 0;
+	offbits = 0;
+	lenbits = 0;
+	lithist = ~0;
+	while(s < esrc){
+		h = hashit(cont);
+
+		sss = s;
+		toff = whackmatch(w, &sss, esrc, h, now);
+		ss = sss;
+
+		len = ss - s;
+		for(; wnbits >= 8; wnbits -= 8){
+			if(wdst >= wdmax){
+				w->begin = now;
+				return -1;
+			}
+			*wdst++ = wbits >> (wnbits - 8);
+		}
+		if(len < MinMatch){
+			toff = *s;
+			lithist = (lithist << 1) | toff < 32 | toff > 127;
+			if(lithist & 0x1e){
+				wbits = (wbits << 9) | toff;
+				wnbits += 9;
+			}else if(lithist & 1){
+				toff = (toff + 64) & 0xff;
+				if(toff < 96){
+					wbits = (wbits << 10) | toff;
+					wnbits += 10;
+				}else{
+					wbits = (wbits << 11) | toff;
+					wnbits += 11;
+				}
+			}else{
+				wbits = (wbits << 8) | toff;
+				wnbits += 8;
+			}
+			lits++;
+
+			/*
+			 * speed hack
+			 * check for compression progress, bail if none achieved
+			 */
+			if(s > half){
+				if(4 * (s - src) < 5 * lits){
+					w->begin = now;
+					return -1;
+				}
+				half = esrc;
+			}
+
+			if(s + MinMatch <= esrc){
+				w->next[now & (WhackMaxOff - 1)] = w->hash[h];
+				w->hash[h] = now;
+				if(s + MinMatch < esrc)
+					cont = (cont << 8) | s[MinMatch];
+			}
+			now++;
+			s++;
+			continue;
+		}
+
+		matches++;
+
+		/*
+		 * length of match
+		 */
+		if(len > MaxLen){
+			len = MaxLen;
+			ss = s + len;
+		}
+		len -= MinMatch;
+		if(len < MaxFastLen){
+			bits = lentab[len].bits;
+			wbits = (wbits << bits) | lentab[len].encode;
+			wnbits += bits;
+			lenbits += bits;
+		}else{
+			code = BigLenCode;
+			bits = BigLenBits;
+			use = BigLenBase;
+			len -= MaxFastLen;
+			while(len >= use){
+				len -= use;
+				code = (code + use) << 1;
+				use <<= (bits & 1) ^ 1;
+				bits++;
+			}
+
+			wbits = (wbits << bits) | (code + len);
+			wnbits += bits;
+			lenbits += bits;
+
+			for(; wnbits >= 8; wnbits -= 8){
+				if(wdst >= wdmax){
+					w->begin = now;
+					return -1;
+				}
+				*wdst++ = wbits >> (wnbits - 8);
+			}
+		}
+
+		/*
+		 * offset in history
+		 */
+		toff--;
+		for(bits = MinOffBits; toff >= (1 << bits); bits++)
+			;
+		if(bits < MaxOffBits-1){
+			wbits = (wbits << 3) | (bits - MinOffBits);
+			if(bits != MinOffBits)
+				bits--;
+			wnbits += bits + 3;
+			offbits += bits + 3;
+		}else{
+			wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1));
+			bits--;
+			wnbits += bits + 4;
+			offbits += bits + 4;
+		}
+		wbits = (wbits << bits) | toff & ((1 << bits) - 1);
+
+		for(; s != ss; s++){
+			if(s + MinMatch <= esrc){
+				h = hashit(cont);
+				w->next[now & (WhackMaxOff - 1)] = w->hash[h];
+				w->hash[h] = now;
+				if(s + MinMatch < esrc)
+					cont = (cont << 8) | s[MinMatch];
+			}
+			now++;
+		}
+	}
+
+	w->begin = now;
+
+	stats[StatBytes] += esrc - src;
+	stats[StatLits] += lits;
+	stats[StatMatches] += matches;
+	stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits;
+	stats[StatOffBits] += offbits;
+	stats[StatLenBits] += lenbits;
+
+	if(wnbits & 7){
+		wbits <<= 8 - (wnbits & 7);
+		wnbits += 8 - (wnbits & 7);
+	}
+	for(; wnbits >= 8; wnbits -= 8){
+		if(wdst >= wdmax)
+			return -1;
+		*wdst++ = wbits >> (wnbits - 8);
+	}
+
+	stats[StatOutBytes] += wdst - dst;
+
+	return wdst - dst;
+}
+
+int
+whackblock(uchar *dst, uchar *src, int ssize)
+{
+	Whack w;
+	ulong stats[MaxStat];
+	int r;
+
+	whackinit(&w, 6);
+	r = whack(&w, dst, src, ssize, stats);
+	return r;
+}
diff --git a/src/cmd/venti/srv/whack.h b/src/cmd/venti/srv/whack.h
new file mode 100644
index 0000000..fb96616
--- /dev/null
+++ b/src/cmd/venti/srv/whack.h
@@ -0,0 +1,40 @@
+typedef struct Whack		Whack;
+typedef struct Unwhack		Unwhack;
+
+enum
+{
+	WhackStats	= 8,
+	WhackErrLen	= 64,		/* max length of error message from thwack or unthwack */
+	WhackMaxOff	= 16*1024,	/* max allowed offset */
+
+	HashLog		= 14,
+	HashSize	= 1<<HashLog,
+	HashMask	= HashSize - 1,
+
+	MinMatch	= 3,		/* shortest match possible */
+
+	MinDecode	= 8,		/* minimum bits to decode a match or lit; >= 8 */
+
+	MaxSeqMask	= 8,		/* number of bits in coding block mask */
+	MaxSeqStart	= 256		/* max offset of initial coding block */
+};
+
+struct Whack
+{
+	ushort		begin;			/* time of first byte in hash */
+	ushort		hash[HashSize];
+	ushort		next[WhackMaxOff];
+	uchar		*data;
+};
+
+struct Unwhack
+{
+	char		err[WhackErrLen];
+};
+
+void	whackinit(Whack*, int level);
+void	unwhackinit(Unwhack*);
+int	whack(Whack*, uchar *dst, uchar *src, int nsrc, ulong stats[WhackStats]);
+int	unwhack(Unwhack*, uchar *dst, int ndst, uchar *src, int nsrc);
+
+int	whackblock(uchar *dst, uchar *src, int ssize);
diff --git a/src/cmd/venti/srv/wrarena.c b/src/cmd/venti/srv/wrarena.c
new file mode 100644
index 0000000..4b8358c
--- /dev/null
+++ b/src/cmd/venti/srv/wrarena.c
@@ -0,0 +1,217 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+QLock godot;
+char *host;
+int readonly = 1;	/* for part.c */
+int mainstacksize = 256*1024;
+Channel *c;
+VtConn *z;
+int fast;	/* and a bit unsafe; only for benchmarking */
+int haveaoffset;
+int maxwrites = -1;
+
+typedef struct ZClump ZClump;
+struct ZClump
+{
+	ZBlock *lump;
+	Clump cl;
+	u64int aa;
+};
+
+void
+usage(void)
+{
+	fprint(2, "usage: wrarena [-h host] arenafile [offset]\n");
+	threadexitsall("usage");
+}
+
+void
+vtsendthread(void *v)
+{
+	ZClump zcl;
+
+	USED(v);
+	while(recv(c, &zcl) == 1){
+		if(zcl.lump == nil)
+			break;
+		if(vtwrite(z, zcl.cl.info.score, zcl.cl.info.type, zcl.lump->data, zcl.cl.info.uncsize) < 0)
+			sysfatal("failed writing clump %llud: %r", zcl.aa);
+		freezblock(zcl.lump);
+	}
+	/*
+	 * All the send threads try to exit right when
+	 * threadmain is calling threadexitsall.  
+	 * Either libthread or the Linux NPTL pthreads library
+	 * can't handle this condition (I suspect NPTL but have
+	 * not confirmed this) and we get a seg fault in exit.
+	 * I spent a day tracking this down with no success,
+	 * so we're going to work around it instead by just
+	 * sitting here and waiting for the threadexitsall to
+	 * take effect.
+	 */
+	qlock(&godot);
+}
+
+static void
+rdarena(Arena *arena, u64int offset)
+{
+	u64int a, aa, e;
+	u32int magic;
+	Clump cl;
+	uchar score[VtScoreSize];
+	ZBlock *lump;
+	ZClump zcl;
+
+	fprint(2, "wrarena: copying %s to venti\n", arena->name);
+	printarena(2, arena);
+
+	a = arena->base;
+	e = arena->base + arena->size;
+	if(offset != ~(u64int)0) {
+		if(offset >= e-a)
+			sysfatal("bad offset %llud >= %llud\n",
+				offset, e-a);
+		aa = offset;
+	} else
+		aa = 0;
+
+	if(maxwrites != 0)
+	for(; aa < e; aa += ClumpSize+cl.info.size) {
+		magic = clumpmagic(arena, aa);
+		if(magic == ClumpFreeMagic)
+			break;
+		if(magic != arena->clumpmagic) {
+		//	fprint(2, "illegal clump magic number %#8.8ux offset %llud\n",
+		//		magic, aa);
+			break;
+		}
+		lump = loadclump(arena, aa, 0, &cl, score, 0);
+		if(lump == nil) {
+			fprint(2, "clump %llud failed to read: %r\n", aa);
+			break;
+		}
+		if(!fast && cl.info.type != VtCorruptType) {
+			scoremem(score, lump->data, cl.info.uncsize);
+			if(scorecmp(cl.info.score, score) != 0) {
+				fprint(2, "clump %llud has mismatched score\n", aa);
+				break;
+			}
+			if(vttypevalid(cl.info.type) < 0) {
+				fprint(2, "clump %llud has bad type %d\n", aa, cl.info.type);
+				break;
+			}
+		}
+		if(z && cl.info.type != VtCorruptType){
+			zcl.cl = cl;
+			zcl.lump = lump;
+			zcl.aa = aa;
+			send(c, &zcl);
+		}else
+			freezblock(lump);
+		if(maxwrites>0 && --maxwrites == 0)
+			break;
+	}
+	if(haveaoffset)
+		print("end offset %llud\n", aa);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int i;
+	char *file;
+	Arena *arena;
+	u64int offset, aoffset;
+	Part *part;
+	Dir *d;
+	uchar buf[8192];
+	ArenaHead head;
+	ZClump zerocl;
+
+	qlock(&godot);
+	aoffset = 0;
+	ARGBEGIN{
+	case 'f':
+		fast = 1;
+		ventidoublechecksha1 = 0;
+		break;
+	case 'h':
+		host = EARGF(usage());
+		break;
+	case 'o':
+		haveaoffset = 1;
+		aoffset = strtoull(EARGF(usage()), 0, 0);
+		break;
+	case 'M':
+		maxwrites = atoi(EARGF(usage()));
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	offset = ~(u64int)0;
+	switch(argc) {
+	default:
+		usage();
+	case 2:
+		offset = strtoull(argv[1], 0, 0);
+		/* fall through */
+	case 1:
+		file = argv[0];
+	}
+
+	fmtinstall('V', vtscorefmt);
+
+	statsinit();
+
+	if((d = dirstat(file)) == nil)
+		sysfatal("can't stat file %s: %r", file);
+
+	part = initpart(file, OREAD);
+	if(part == nil)
+		sysfatal("can't open file %s: %r", file);
+	if(readpart(part, aoffset, buf, sizeof buf) < 0)
+		sysfatal("can't read file %s: %r", file);
+
+	if(unpackarenahead(&head, buf) < 0)
+		sysfatal("corrupted arena header: %r");
+
+	if(aoffset+head.size > d->length)
+		sysfatal("arena is truncated: want %llud bytes have %llud\n",
+			head.size, d->length);
+
+	partblocksize(part, head.blocksize);
+	initdcache(8 * MaxDiskBlock);
+
+	arena = initarena(part, aoffset, head.size, head.blocksize);
+	if(arena == nil)
+		sysfatal("initarena: %r");
+
+	if(host && strcmp(host, "/dev/null") != 0){
+		z = vtdial(host);
+		if(z == nil)
+			sysfatal("could not connect to server: %r");
+		if(vtconnect(z) < 0)
+			sysfatal("vtconnect: %r");
+	}else
+		z = nil;
+	
+	c = chancreate(sizeof(ZClump), 0);
+	for(i=0; i<12; i++)
+		vtproc(vtsendthread, nil);
+
+	rdarena(arena, offset);
+		if(vtsync(z) < 0)
+			sysfatal("executing sync: %r");
+
+	memset(&zerocl, 0, sizeof zerocl);
+	for(i=0; i<12; i++)
+		send(c, &zerocl);
+	if(z){
+		vthangup(z);
+	}
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/xml.c b/src/cmd/venti/srv/xml.c
new file mode 100644
index 0000000..e91afa0
--- /dev/null
+++ b/src/cmd/venti/srv/xml.c
@@ -0,0 +1,68 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "xml.h"
+
+void xmlarena(Hio *hout, Arena *s, char *tag, int indent){
+	xmlindent(hout, indent);
+	hprint(hout, "<%s", tag);
+	xmlaname(hout, s->name, "name");
+	xmlu32int(hout, s->version, "version");
+	xmlaname(hout, s->part->name, "partition");
+	xmlu32int(hout, s->blocksize, "blocksize");
+	xmlu64int(hout, s->base, "start");
+	xmlu64int(hout, s->base+2*s->blocksize, "stop");
+	xmlu32int(hout, s->ctime, "created");
+	xmlu32int(hout, s->wtime, "modified");
+	xmlsealed(hout, s->memstats.sealed, "sealed");
+	xmlscore(hout, s->score, "score");
+	xmlu32int(hout, s->memstats.clumps, "clumps");
+	xmlu32int(hout, s->memstats.cclumps, "compressedclumps");
+	xmlu64int(hout, s->memstats.uncsize, "data");
+	xmlu64int(hout, s->memstats.used - s->memstats.clumps * ClumpSize, "compresseddata");
+	xmlu64int(hout, s->memstats.used + s->memstats.clumps * ClumpInfoSize, "storage");
+	hprint(hout, "/>\n");
+}
+
+void xmlindex(Hio *hout, Index *s, char *tag, int indent){
+	int i;
+	xmlindent(hout, indent);
+	hprint(hout, "<%s", tag);
+	xmlaname(hout, s->name, "name");
+	xmlu32int(hout, s->version, "version");
+	xmlu32int(hout, s->blocksize, "blocksize");
+	xmlu32int(hout, s->tabsize, "tabsize");
+	xmlu32int(hout, s->buckets, "buckets");
+	xmlu32int(hout, s->div, "buckdiv");
+	hprint(hout, ">\n");
+	xmlindent(hout, indent + 1);
+	hprint(hout, "<sects>\n");
+	for(i = 0; i < s->nsects; i++)
+		xmlamap(hout, &s->smap[i], "sect", indent + 2);
+	xmlindent(hout, indent + 1);
+	hprint(hout, "</sects>\n");
+	xmlindent(hout, indent + 1);
+	hprint(hout, "<amaps>\n");
+	for(i = 0; i < s->narenas; i++)
+		xmlamap(hout, &s->amap[i], "amap", indent + 2);
+	xmlindent(hout, indent + 1);
+	hprint(hout, "</amaps>\n");
+	xmlindent(hout, indent + 1);
+	hprint(hout, "<arenas>\n");
+	for(i = 0; i < s->narenas; i++)
+		xmlarena(hout, s->arenas[i], "arena", indent + 2);
+	xmlindent(hout, indent + 1);
+	hprint(hout, "</arenas>\n");
+	xmlindent(hout, indent);
+	hprint(hout, "</%s>\n", tag);
+}
+
+void xmlamap(Hio *hout, AMap *s, char *tag, int indent){
+	xmlindent(hout, indent);
+	hprint(hout, "<%s", tag);
+	xmlaname(hout, s->name, "name");
+	xmlu64int(hout, s->start, "start");
+	xmlu64int(hout, s->stop, "stop");
+	hprint(hout, "/>\n");
+}
+
diff --git a/src/cmd/venti/srv/xml.h b/src/cmd/venti/srv/xml.h
new file mode 100644
index 0000000..c9e52b0
--- /dev/null
+++ b/src/cmd/venti/srv/xml.h
@@ -0,0 +1,11 @@
+void	xmlamap(Hio *hout, AMap *v, char *tag, int indent);
+void	xmlarena(Hio *hout, Arena *v, char *tag, int indent);
+void	xmlindex(Hio *hout, Index *v, char *tag, int indent);
+
+void	xmlaname(Hio *hout, char *v, char *tag);
+void	xmlscore(Hio *hout, u8int *v, char *tag);
+void	xmlsealed(Hio *hout, int v, char *tag);
+void	xmlu32int(Hio *hout, u32int v, char *tag);
+void	xmlu64int(Hio *hout, u64int v, char *tag);
+
+void	xmlindent(Hio *hout, int indent);
diff --git a/src/cmd/venti/srv/zblock.c b/src/cmd/venti/srv/zblock.c
new file mode 100644
index 0000000..b33cdd2
--- /dev/null
+++ b/src/cmd/venti/srv/zblock.c
@@ -0,0 +1,93 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+void
+fmtzbinit(Fmt *f, ZBlock *b)
+{
+	f->runes = 0;
+	f->start = b->data;
+	f->to = f->start;
+	f->stop = (char*)f->start + b->len;
+	f->flush = nil;
+	f->farg = nil;
+	f->nfmt = 0;
+}
+
+#define ROUNDUP(p, n) ((void*)(((ulong)(p)+(n)-1)&~(ulong)((n)-1)))
+
+static char zmagic[] = "1234567890abcdefghijkl";
+
+ZBlock *
+alloczblock(u32int size, int zeroed, uint blocksize)
+{
+	uchar *p, *data;
+	ZBlock *b;
+	static ZBlock z;
+	int n;
+
+	if(blocksize == 0)
+		blocksize = 32;	/* try for cache line alignment */
+
+	n = size+32/*XXX*/+sizeof(ZBlock)+blocksize+8;
+	p = malloc(n);
+	if(p == nil){
+		seterr(EOk, "out of memory");
+		return nil;
+	}
+
+	data = ROUNDUP(p, blocksize);
+	b = ROUNDUP(data+size+32/*XXX*/, 8);
+	if(0) fprint(2, "alloc %p-%p data %p-%p b %p-%p\n",
+		p, p+n, data, data+size, b, b+1);
+	*b = z;
+	b->data = data;
+	b->free = p;
+	b->len = size;
+	b->_size = size;
+	if(zeroed)
+		memset(b->data, 0, size);
+	memmove(b->data+size, zmagic, 32/*XXX*/);
+	return b;
+}
+
+void
+freezblock(ZBlock *b)
+{
+	if(b){
+		if(memcmp(b->data+b->_size, zmagic, 32) != 0)
+			abort();
+		memset(b->data+b->_size, 0, 32);
+		free(b->free);
+	}
+}
+
+ZBlock*
+packet2zblock(Packet *p, u32int size)
+{
+	ZBlock *b;
+
+	if(p == nil)
+		return nil;
+	b = alloczblock(size, 0, 0);
+	if(b == nil)
+		return nil;
+	if(packetcopy(p, b->data, 0, size) < 0){
+		freezblock(b);
+		return nil;
+	}
+	return b;
+}
+
+Packet*
+zblock2packet(ZBlock *zb, u32int size)
+{
+	Packet *p;
+
+	if(zb == nil)
+		return nil;
+	p = packetalloc();
+	packetappend(p, zb->data, size);
+	return p;
+}
+
diff --git a/src/cmd/venti/srv/zeropart.c b/src/cmd/venti/srv/zeropart.c
new file mode 100644
index 0000000..fe75c81
--- /dev/null
+++ b/src/cmd/venti/srv/zeropart.c
@@ -0,0 +1,31 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+void
+zeropart(Part *part, int blocksize)
+{
+	ZBlock *b;
+	u64int addr;
+	int w;
+
+	fprint(2, "clearing the partition\n");
+//fprint(2, "NOT!\n");
+//return;
+//b=alloczblock(MaxIoSize, 1, blocksize);
+//freezblock(b);
+	b = alloczblock(MaxIoSize, 1, blocksize);
+
+	w = 0;
+	for(addr = PartBlank; addr + MaxIoSize <= part->size; addr += MaxIoSize){
+		if(writepart(part, addr, b->data, MaxIoSize) < 0)
+			sysfatal("can't initialize %s, writing block %d failed: %r", part->name, w);
+		w++;
+	}
+
+	for(; addr + blocksize <= part->size; addr += blocksize)
+		if(writepart(part, addr, b->data, blocksize) < 0)
+			sysfatal("can't initialize %s: %r", part->name);
+
+	freezblock(b);
+}
diff --git a/src/cmd/venti/sync.c b/src/cmd/venti/sync.c
new file mode 100644
index 0000000..9d817a7
--- /dev/null
+++ b/src/cmd/venti/sync.c
@@ -0,0 +1,54 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <venti.h>
+
+char *host;
+int donothing;
+
+void
+usage(void)
+{
+	fprint(2, "usage: sync [-h host]\n");
+	threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	VtConn *z;
+
+	fmtinstall('V', vtscorefmt);
+	fmtinstall('F', vtfcallfmt);
+	
+	ARGBEGIN{
+	case 'h':
+		host = EARGF(usage());
+		if(host == nil)
+			usage();
+		break;
+	case 'x':
+		donothing = 1;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 0)
+		usage();
+
+	z = vtdial(host);
+	if(z == nil)
+		sysfatal("could not connect to server: %r");
+
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+
+	if(!donothing)
+	if(vtsync(z) < 0)
+		sysfatal("vtsync: %r");
+
+	vthangup(z);
+	threadexitsall(0);
+}
diff --git a/src/cmd/venti/write.c b/src/cmd/venti/write.c
new file mode 100644
index 0000000..c11a5a3
--- /dev/null
+++ b/src/cmd/venti/write.c
@@ -0,0 +1,62 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <libsec.h>
+#include <thread.h>
+
+void
+usage(void)
+{
+	fprint(2, "usage: write [-z] [-h host] [-t type] <datablock\n");
+	threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	char *host;
+	int dotrunc, n, type;
+	uchar *p, score[VtScoreSize];
+	VtConn *z;
+
+	fmtinstall('F', vtfcallfmt);
+	fmtinstall('V', vtscorefmt);
+
+	host = nil;
+	dotrunc = 0;
+	type = VtDataType;
+	ARGBEGIN{
+	case 'z':
+		dotrunc = 1;
+		break;
+	case 'h':
+		host = EARGF(usage());
+		break;
+	case 't':
+		type = atoi(EARGF(usage()));
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 0)
+		usage();
+
+	p = vtmallocz(VtMaxLumpSize+1);
+	n = readn(0, p, VtMaxLumpSize+1);
+	if(n > VtMaxLumpSize)
+		sysfatal("input too big: max block size is %d", VtMaxLumpSize);
+	z = vtdial(host);
+	if(z == nil)
+		sysfatal("could not connect to server: %r");
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+	if(dotrunc)
+		n = vtzerotruncate(type, p, n);
+	if(vtwrite(z, score, type, p, n) < 0)
+		sysfatal("vtwrite: %r");
+	vthangup(z);
+	print("%V\n", score);
+	threadexitsall(0);
+}