src/cmd/venti/dcache.c - plan9 - Git at Google

 /*
  * The locking here is getting a little out of hand.
  */

 #include "stdinc.h"
 #include "dat.h"
 #include "fns.h"

 typedef struct DCache	DCache;

 enum
 {
 	HashLog		= 9,
 	HashSize	= 1<<HashLog,
 	HashMask	= HashSize - 1,
 };

 struct DCache
 {
 	QLock		lock;
 	RWLock		dirtylock;		/* must be held to inspect or set b->dirty */
 	u32int		flushround;
 	Rendez		anydirty;
 	Rendez		full;
 	Rendez		flush;
 	Rendez		flushdone;
 	DBlock		*free;			/* list of available lumps */
 	u32int		now;			/* ticks for usage timestamps */
 	int		size;			/* max. size of any block; allocated to each block */
 	DBlock		**heads;		/* hash table for finding address */
 	int		nheap;			/* number of available victims */
 	DBlock		**heap;			/* heap for locating victims */
 	int		nblocks;		/* number of blocks allocated */
 	DBlock		*blocks;		/* array of block descriptors */
 	DBlock		**write;		/* array of block pointers to be written */
 	u8int		*mem;			/* memory for all block descriptors */
 	int		ndirty;			/* number of dirty blocks */
 	int		maxdirty;		/* max. number of dirty blocks */
 };

 static DCache	dcache;

 static int	downheap(int i, DBlock *b);
 static int	upheap(int i, DBlock *b);
 static DBlock	*bumpdblock(void);
 static void	delheap(DBlock *db);
 static void	fixheap(int i, DBlock *b);
 static void	_flushdcache(void);
 static void	flushproc(void*);
 static void	flushtimerproc(void*);
 static void	writeproc(void*);

 void
 initdcache(u32int mem)
 {
 	DBlock *b, *last;
 	u32int nblocks, blocksize;
 	int i;

 	if(mem < maxblocksize * 2)
 		sysfatal("need at least %d bytes for the disk cache", maxblocksize * 2);
 	if(maxblocksize == 0)
 		sysfatal("no max. block size given for disk cache");
 	blocksize = maxblocksize;
 	nblocks = mem / blocksize;
 	dcache.full.l = &dcache.lock;
 	dcache.flush.l = &dcache.lock;
 	dcache.anydirty.l = &dcache.lock;
 	dcache.flushdone.l = &dcache.lock;
 	dcache.nblocks = nblocks;
 	dcache.maxdirty = (nblocks * 3) / 4;
 	if(1)
 		fprint(2, "initialize disk cache with %d blocks of %d bytes, maximum %d dirty blocks\n",
 			nblocks, blocksize, dcache.maxdirty);
 	dcache.size = blocksize;
 	dcache.heads = MKNZ(DBlock*, HashSize);
 	dcache.heap = MKNZ(DBlock*, nblocks);
 	dcache.blocks = MKNZ(DBlock, nblocks);
 	dcache.write = MKNZ(DBlock*, nblocks);
 	dcache.mem = MKNZ(u8int, nblocks * blocksize);

 	last = nil;
 	for(i = 0; i < nblocks; i++){
 		b = &dcache.blocks[i];
 		b->data = &dcache.mem[i * blocksize];
 		b->heap = TWID32;
 		chaninit(&b->writedonechan, sizeof(void*), 1);
 		b->next = last;
 		last = b;
 	}
 	dcache.free = last;
 	dcache.nheap = 0;

 	vtproc(flushproc, nil);
 	vtproc(flushtimerproc, nil);
 }

 static u32int
 pbhash(u64int addr)
 {
 	u32int h;

 #define hashit(c)	((((c) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
 	h = (addr >> 32) ^ addr;
 	return hashit(h);
 }

 DBlock*
 getdblock(Part *part, u64int addr, int read)
 {
 	DBlock *b;
 	u32int h, size;

 	size = part->blocksize;
 	if(size > dcache.size){
 		seterr(EAdmin, "block size %d too big for cache", size);
 		return nil;
 	}
 	h = pbhash(addr);

 	/*
 	 * look for the block in the cache
 	 */
 //checkdcache();
 	qlock(&dcache.lock);
 again:
 	for(b = dcache.heads[h]; b != nil; b = b->next){
 		if(b->part == part && b->addr == addr){
 			qlock(&stats.lock);
 			stats.pchit++;
 			qunlock(&stats.lock);
 			goto found;
 		}
 	}
 	qlock(&stats.lock);
 	stats.pcmiss++;
 	qunlock(&stats.lock);

 	/*
 	 * missed: locate the block with the oldest second to last use.
 	 * remove it from the heap, and fix up the heap.
 	 */
 	b = bumpdblock();
 	if(b == nil){
 		logerr(EAdmin, "all disk cache blocks in use");
 		rsleep(&dcache.full);
 		goto again;
 	}

 	/*
 	 * the new block has no last use, so assume it happens sometime in the middle
 ZZZ this is not reasonable
 	 */
 	b->used = (b->used2 + dcache.now) / 2;

 	/*
 	 * rechain the block on the correct hash chain
 	 */
 	b->next = dcache.heads[h];
 	dcache.heads[h] = b;
 	if(b->next != nil)
 		b->next->prev = b;
 	b->prev = nil;

 	b->addr = addr;
 	b->part = part;
 	b->size = 0;

 found:
 	b->ref++;
 	b->used2 = b->used;
 	b->used = dcache.now++;
 	if(b->heap != TWID32)
 		fixheap(b->heap, b);

 	qunlock(&dcache.lock);
 //checkdcache();

 	qlock(&b->lock);
 	if(b->size != size){
 		if(b->size < size){
 			if(!read)
 				memset(&b->data[b->size], 0, size - b->size);
 			else{
 				if(readpart(part, addr + b->size, &b->data[b->size], size - b->size) < 0){
 					putdblock(b);
 					return nil;
 				}
 				qlock(&stats.lock);
 				stats.pcreads++;
 				stats.pcbreads += size - b->size;
 				qunlock(&stats.lock);
 			}
 		}
 		b->size = size;
 	}

 	return b;
 }

 void
 putdblock(DBlock *b)
 {
 	if(b == nil)
 		return;

 	if(b->dirtying){
 		b->dirtying = 0;
 		runlock(&dcache.dirtylock);
 	}
 	qunlock(&b->lock);

 //checkdcache();
 	qlock(&dcache.lock);
 	if(b->dirty)
 		delheap(b);
 	else if(--b->ref == 0){
 		if(b->heap == TWID32)
 			upheap(dcache.nheap++, b);
 		rwakeupall(&dcache.full);
 	}

 	qunlock(&dcache.lock);
 //checkdcache();
 }

 void
 dirtydblock(DBlock *b, int dirty)
 {
 	int odirty;
 	Part *p;


 	assert(b->ref != 0);

 	/*
  	 * Because we use an rlock to keep track of how
 	 * many blocks are being dirtied (and a wlock to
 	 * stop dirtying), we cannot try to dirty two blocks
 	 * at the same time in the same thread -- if the
 	 * wlock happens between them, we get a deadlock.
 	 *
 	 * The only place in the code where we need to
 	 * dirty multiple blocks at once is in splitiblock, when
 	 * we split an index block.  The new block has a dirty
 	 * flag of DirtyIndexSplit already, because it has to get
 	 * written to disk before the old block (DirtyIndex).
 	 * So when we see the DirtyIndexSplit block, we don't
 	 * acquire the rlock (and we don't set dirtying, so putdblock
 	 * won't drop the rlock).  This kludginess means that
 	 * the caller will have to be sure to putdblock on the
 	 * new block before putdblock on the old block.
 	 */
 	if(dirty == DirtyIndexSplit){
 		/* caller should have another dirty block */
 		assert(!canwlock(&dcache.dirtylock));
 		/* this block should be clean */
 		assert(b->dirtying == 0);
 		assert(b->dirty == 0);
 	}else{
 		rlock(&dcache.dirtylock);
 		assert(b->dirtying == 0);
 		b->dirtying = 1;
 	}

 	qlock(&stats.lock);
 	if(b->dirty)
 		stats.absorbedwrites++;
 	stats.dirtydblocks++;
 	qunlock(&stats.lock);

 	/*
 	 * In general, shouldn't mark any block as more than one
 	 * type, except that split index blocks are a subcase of index
 	 * blocks.  Only clean blocks ever get marked DirtyIndexSplit,
 	 * though, so we don't need the opposite conjunction here.
 	 */
 	odirty = b->dirty;
 	if(b->dirty)
 		assert(b->dirty == dirty
 			|| (b->dirty==DirtyIndexSplit && dirty==DirtyIndex));
 	else
 		b->dirty = dirty;

 	p = b->part;
 	if(p->writechan == nil){
 fprint(2, "allocate write proc for part %s\n", p->name);
 		/* XXX hope this doesn't fail! */
 		p->writechan = chancreate(sizeof(DBlock*), dcache.nblocks);
 		vtproc(writeproc, p);
 	}
 	qlock(&dcache.lock);
 	if(!odirty){
 		dcache.ndirty++;
 		rwakeupall(&dcache.anydirty);
 	}
 	qunlock(&dcache.lock);
 }

 /*
  * remove some block from use and update the free list and counters
  */
 static DBlock*
 bumpdblock(void)
 {
 	int flushed;
 	DBlock *b;
 	ulong h;

 	b = dcache.free;
 	if(b != nil){
 		dcache.free = b->next;
 		return b;
 	}

 	if(dcache.ndirty >= dcache.maxdirty)
 		_flushdcache();

 	/*
 	 * remove blocks until we find one that is unused
 	 * referenced blocks are left in the heap even though
 	 * they can't be scavenged; this is simple a speed optimization
 	 */
 	flushed = 0;
 	for(;;){
 		if(dcache.nheap == 0){
 			_flushdcache();
 			return nil;
 		}
 		b = dcache.heap[0];
 		delheap(b);
 		if(!b->ref)
 			break;
 	}

 	fprint(2, "bump %s at %llud\n", b->part->name, b->addr);

 	/*
 	 * unchain the block
 	 */
 	if(b->prev == nil){
 		h = pbhash(b->addr);
 		if(dcache.heads[h] != b)
 			sysfatal("bad hash chains in disk cache");
 		dcache.heads[h] = b->next;
 	}else
 		b->prev->next = b->next;
 	if(b->next != nil)
 		b->next->prev = b->prev;

 	return b;
 }

 /*
  * delete an arbitrary block from the heap
  */
 static void
 delheap(DBlock *db)
 {
 	if(db->heap == TWID32)
 		return;
 	fixheap(db->heap, dcache.heap[--dcache.nheap]);
 	db->heap = TWID32;
 }

 /*
  * push an element up or down to it's correct new location
  */
 static void
 fixheap(int i, DBlock *b)
 {
 	if(upheap(i, b) == i)
 		downheap(i, b);
 }

 static int
 upheap(int i, DBlock *b)
 {
 	DBlock *bb;
 	u32int now;
 	int p;

 	now = dcache.now;
 	for(; i != 0; i = p){
 		p = (i - 1) >> 1;
 		bb = dcache.heap[p];
 		if(b->used2 - now >= bb->used2 - now)
 			break;
 		dcache.heap[i] = bb;
 		bb->heap = i;
 	}

 	dcache.heap[i] = b;
 	b->heap = i;
 	return i;
 }

 static int
 downheap(int i, DBlock *b)
 {
 	DBlock *bb;
 	u32int now;
 	int k;

 	now = dcache.now;
 	for(; ; i = k){
 		k = (i << 1) + 1;
 		if(k >= dcache.nheap)
 			break;
 		if(k + 1 < dcache.nheap && dcache.heap[k]->used2 - now > dcache.heap[k + 1]->used2 - now)
 			k++;
 		bb = dcache.heap[k];
 		if(b->used2 - now <= bb->used2 - now)
 			break;
 		dcache.heap[i] = bb;
 		bb->heap = i;
 	}

 	dcache.heap[i] = b;
 	b->heap = i;
 	return i;
 }

 static void
 findblock(DBlock *bb)
 {
 	DBlock *b, *last;
 	int h;

 	last = nil;
 	h = pbhash(bb->addr);
 	for(b = dcache.heads[h]; b != nil; b = b->next){
 		if(last != b->prev)
 			sysfatal("bad prev link");
 		if(b == bb)
 			return;
 		last = b;
 	}
 	sysfatal("block missing from hash table");
 }

 void
 checkdcache(void)
 {
 	DBlock *b;
 	u32int size, now;
 	int i, k, refed, nfree;

 	qlock(&dcache.lock);
 	size = dcache.size;
 	now = dcache.now;
 	for(i = 0; i < dcache.nheap; i++){
 		if(dcache.heap[i]->heap != i)
 			sysfatal("dc: mis-heaped at %d: %d", i, dcache.heap[i]->heap);
 		if(i > 0 && dcache.heap[(i - 1) >> 1]->used2 - now > dcache.heap[i]->used2 - now)
 			sysfatal("dc: bad heap ordering");
 		k = (i << 1) + 1;
 		if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
 			sysfatal("dc: bad heap ordering");
 		k++;
 		if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
 			sysfatal("dc: bad heap ordering");
 	}

 	refed = 0;
 	for(i = 0; i < dcache.nblocks; i++){
 		b = &dcache.blocks[i];
 		if(b->data != &dcache.mem[i * size])
 			sysfatal("dc: mis-blocked at %d", i);
 		if(b->ref && b->heap == TWID32)
 			refed++;
 		if(b->addr)
 			findblock(b);
 		if(b->heap != TWID32
 		&& dcache.heap[b->heap] != b)
 			sysfatal("dc: spurious heap value");
 	}

 	nfree = 0;
 	for(b = dcache.free; b != nil; b = b->next){
 		if(b->addr != 0 || b->heap != TWID32)
 			sysfatal("dc: bad free list");
 		nfree++;
 	}

 	if(dcache.nheap + nfree + refed != dcache.nblocks)
 		sysfatal("dc: missing blocks: %d %d %d", dcache.nheap, refed, dcache.nblocks);
 	qunlock(&dcache.lock);
 }

 void
 flushdcache(void)
 {
 	u32int flushround;

 	qlock(&dcache.lock);
 	flushround = dcache.flushround;
 	rwakeupall(&dcache.flush);
 	while(flushround == dcache.flushround)
 		rsleep(&dcache.flushdone);
 	qunlock(&dcache.lock);
 }

 static void
 _flushdcache(void)
 {
 	rwakeupall(&dcache.flush);
 }

 static int
 parallelwrites(DBlock **b, DBlock **eb, int dirty)
 {
 	DBlock **p;

 	for(p=b; p<eb && (*p)->dirty == dirty; p++)
 		sendp((*p)->part->writechan, *p);
 	for(p=b; p<eb && (*p)->dirty == dirty; p++)
 		recvp(&(*p)->writedonechan);

 	return p-b;
 }

 /*
  * Sort first by dirty flag, then by partition, then by address in partition.
  */
 static int
 writeblockcmp(const void *va, const void *vb)
 {
 	DBlock *a, *b;

 	a = *(DBlock**)va;
 	b = *(DBlock**)vb;

 	if(a->dirty != b->dirty)
 		return a->dirty - b->dirty;
 	if(a->part != b->part){
 		if(a->part < b->part)
 			return -1;
 		if(a->part > b->part)
 			return 1;
 	}
 	if(a->addr < b->addr)
 		return -1;
 	return 1;
 }

 static void
 flushtimerproc(void *v)
 {
 	u32int round;

 	for(;;){
 		qlock(&dcache.lock);
 		while(dcache.ndirty == 0)
 			rsleep(&dcache.anydirty);
 		round = dcache.flushround;
 		qunlock(&dcache.lock);

 		sleep(60*1000);

 		qlock(&dcache.lock);
 		if(round == dcache.flushround){
 			rwakeupall(&dcache.flush);
 			while(round == dcache.flushround)
 				rsleep(&dcache.flushdone);
 		}
 		qunlock(&dcache.lock);
 	}
 }

 static void
 flushproc(void *v)
 {
 	int i, j, n;
 	ulong t0;
 	DBlock *b, **write;

 	USED(v);
 	for(;;){
 		qlock(&dcache.lock);
 		dcache.flushround++;
 		rwakeupall(&dcache.flushdone);
 		rsleep(&dcache.flush);
 		qunlock(&dcache.lock);

 		fprint(2, "flushing dcache: t=0 ms\n");
 		t0 = nsec()/1000;

 		/*
 		 * Because we don't record any dependencies at all, we must write out
 		 * all blocks currently dirty.  Thus we must lock all the blocks that
 		 * are currently dirty.
 		 *
 		 * We grab dirtylock to stop the dirtying of new blocks.
 		 * Then we wait until all the current blocks finish being dirtied.
 		 * Now all the dirty blocks in the system are immutable (clean blocks
 		 * might still get recycled), so we can plan our disk writes.
 		 *
 		 * In a better scheme, dirtiers might lock the block for writing in getdblock,
 		 * so that flushproc could lock all the blocks here and then unlock them as it
 		 * finishes with them.
 		 */

 		fprint(2, "flushproc: wlock at t=%lud\n", (ulong)(nsec()/1000) - t0);
 		wlock(&dcache.dirtylock);

 		fprint(2, "flushproc: build list at t=%lud\n", (ulong)(nsec()/1000) - t0);
 		write = dcache.write;
 		n = 0;
 		for(i=0; i<dcache.nblocks; i++){
 			b = &dcache.blocks[i];
 			if(b->dirty)
 				write[n++] = b;
 		}

 		qsort(write, n, sizeof(write[0]), writeblockcmp);

 		/* Write each stage of blocks out. */
 		fprint(2, "flushproc: write blocks at t=%lud\n", (ulong)(nsec()/1000) - t0);
 		i = 0;
 		for(j=1; j<DirtyMax; j++){
 			fprint(2, "flushproc: flush dirty %d at t=%lud\n", j, (ulong)(nsec()/1000) - t0);
 			i += parallelwrites(write+i, write+n, j);
 		}
 		assert(i == n);

 		fprint(2, "flushproc: update dirty bits at t=%lud\n", (ulong)(nsec()/1000) - t0);
 		qlock(&dcache.lock);
 		for(i=0; i<n; i++){
 			b = write[i];
 			b->dirty = 0;
 			--dcache.ndirty;
 			if(b->ref == 0 && b->heap == TWID32){
 				upheap(dcache.nheap++, b);
 				rwakeupall(&dcache.full);
 			}
 		}
 		qunlock(&dcache.lock);
 		wunlock(&dcache.dirtylock);

 		fprint(2, "flushproc: done at t=%lud\n", (ulong)(nsec()/1000) - t0);

 		qlock(&stats.lock);
 		stats.dcacheflushes++;
 		stats.dcacheflushwrites += n;
 		qunlock(&stats.lock);
 	}
 }

 static void
 writeproc(void *v)
 {
 	DBlock *b;
 	Part *p;

 	p = v;

 	for(;;){
 		b = recvp(p->writechan);
 		if(writepart(p, b->addr, b->data, b->size) < 0)
 			fprint(2, "write error: %r\n"); /* XXX details! */
 		sendp(&b->writedonechan, b);
 	}
 }
	/*
	* The locking here is getting a little out of hand.
	*/

	#include "stdinc.h"
	#include "dat.h"
	#include "fns.h"

	typedef struct DCache DCache;

	enum
	{
	HashLog = 9,
	HashSize = 1<<HashLog,
	HashMask = HashSize - 1,
	};

	struct DCache
	{
	QLock lock;
	RWLock dirtylock; /* must be held to inspect or set b->dirty */
	u32int flushround;
	Rendez anydirty;
	Rendez full;
	Rendez flush;
	Rendez flushdone;
	DBlock free; / list of available lumps */
	u32int now; /* ticks for usage timestamps */
	int size; /* max. size of any block; allocated to each block */
	DBlock *heads; / hash table for finding address */
	int nheap; /* number of available victims */
	DBlock *heap; / heap for locating victims */
	int nblocks; /* number of blocks allocated */
	DBlock blocks; / array of block descriptors */
	DBlock *write; / array of block pointers to be written */
	u8int mem; / memory for all block descriptors */
	int ndirty; /* number of dirty blocks */
	int maxdirty; /* max. number of dirty blocks */
	};

	static DCache dcache;

	static int downheap(int i, DBlock *b);
	static int upheap(int i, DBlock *b);
	static DBlock *bumpdblock(void);
	static void delheap(DBlock *db);
	static void fixheap(int i, DBlock *b);
	static void _flushdcache(void);
	static void flushproc(void*);
	static void flushtimerproc(void*);
	static void writeproc(void*);

	void
	initdcache(u32int mem)
	{
	DBlock b, last;
	u32int nblocks, blocksize;
	int i;

	if(mem < maxblocksize * 2)
	sysfatal("need at least %d bytes for the disk cache", maxblocksize * 2);
	if(maxblocksize == 0)
	sysfatal("no max. block size given for disk cache");
	blocksize = maxblocksize;
	nblocks = mem / blocksize;
	dcache.full.l = &dcache.lock;
	dcache.flush.l = &dcache.lock;
	dcache.anydirty.l = &dcache.lock;
	dcache.flushdone.l = &dcache.lock;
	dcache.nblocks = nblocks;
	dcache.maxdirty = (nblocks * 3) / 4;
	if(1)
	fprint(2, "initialize disk cache with %d blocks of %d bytes, maximum %d dirty blocks\n",
	nblocks, blocksize, dcache.maxdirty);
	dcache.size = blocksize;
	dcache.heads = MKNZ(DBlock*, HashSize);
	dcache.heap = MKNZ(DBlock*, nblocks);
	dcache.blocks = MKNZ(DBlock, nblocks);
	dcache.write = MKNZ(DBlock*, nblocks);
	dcache.mem = MKNZ(u8int, nblocks * blocksize);

	last = nil;
	for(i = 0; i < nblocks; i++){
	b = &dcache.blocks[i];
	b->data = &dcache.mem[i * blocksize];
	b->heap = TWID32;
	chaninit(&b->writedonechan, sizeof(void*), 1);
	b->next = last;
	last = b;
	}
	dcache.free = last;
	dcache.nheap = 0;

	vtproc(flushproc, nil);
	vtproc(flushtimerproc, nil);
	}

	static u32int
	pbhash(u64int addr)
	{
	u32int h;

	#define hashit(c) ((((c) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
	h = (addr >> 32) ^ addr;
	return hashit(h);
	}

	DBlock*
	getdblock(Part *part, u64int addr, int read)
	{
	DBlock *b;
	u32int h, size;

	size = part->blocksize;
	if(size > dcache.size){
	seterr(EAdmin, "block size %d too big for cache", size);
	return nil;
	}
	h = pbhash(addr);

	/*
	* look for the block in the cache
	*/
	//checkdcache();
	qlock(&dcache.lock);
	again:
	for(b = dcache.heads[h]; b != nil; b = b->next){
	if(b->part == part && b->addr == addr){
	qlock(&stats.lock);
	stats.pchit++;
	qunlock(&stats.lock);
	goto found;
	}
	}
	qlock(&stats.lock);
	stats.pcmiss++;
	qunlock(&stats.lock);

	/*
	* missed: locate the block with the oldest second to last use.
	* remove it from the heap, and fix up the heap.
	*/
	b = bumpdblock();
	if(b == nil){
	logerr(EAdmin, "all disk cache blocks in use");
	rsleep(&dcache.full);
	goto again;
	}

	/*
	* the new block has no last use, so assume it happens sometime in the middle
	ZZZ this is not reasonable
	*/
	b->used = (b->used2 + dcache.now) / 2;

	/*
	* rechain the block on the correct hash chain
	*/
	b->next = dcache.heads[h];
	dcache.heads[h] = b;
	if(b->next != nil)
	b->next->prev = b;
	b->prev = nil;

	b->addr = addr;
	b->part = part;
	b->size = 0;

	found:
	b->ref++;
	b->used2 = b->used;
	b->used = dcache.now++;
	if(b->heap != TWID32)
	fixheap(b->heap, b);

	qunlock(&dcache.lock);
	//checkdcache();

	qlock(&b->lock);
	if(b->size != size){
	if(b->size < size){
	if(!read)
	memset(&b->data[b->size], 0, size - b->size);
	else{
	if(readpart(part, addr + b->size, &b->data[b->size], size - b->size) < 0){
	putdblock(b);
	return nil;
	}
	qlock(&stats.lock);
	stats.pcreads++;
	stats.pcbreads += size - b->size;
	qunlock(&stats.lock);
	}
	}
	b->size = size;
	}

	return b;
	}

	void
	putdblock(DBlock *b)
	{
	if(b == nil)
	return;

	if(b->dirtying){
	b->dirtying = 0;
	runlock(&dcache.dirtylock);
	}
	qunlock(&b->lock);

	//checkdcache();
	qlock(&dcache.lock);
	if(b->dirty)
	delheap(b);
	else if(--b->ref == 0){
	if(b->heap == TWID32)
	upheap(dcache.nheap++, b);
	rwakeupall(&dcache.full);
	}

	qunlock(&dcache.lock);
	//checkdcache();
	}

	void
	dirtydblock(DBlock *b, int dirty)
	{
	int odirty;
	Part *p;


	assert(b->ref != 0);

	/*
	* Because we use an rlock to keep track of how
	* many blocks are being dirtied (and a wlock to
	* stop dirtying), we cannot try to dirty two blocks
	* at the same time in the same thread -- if the
	* wlock happens between them, we get a deadlock.
	*
	* The only place in the code where we need to
	* dirty multiple blocks at once is in splitiblock, when
	* we split an index block. The new block has a dirty
	* flag of DirtyIndexSplit already, because it has to get
	* written to disk before the old block (DirtyIndex).
	* So when we see the DirtyIndexSplit block, we don't
	* acquire the rlock (and we don't set dirtying, so putdblock
	* won't drop the rlock). This kludginess means that
	* the caller will have to be sure to putdblock on the
	* new block before putdblock on the old block.
	*/
	if(dirty == DirtyIndexSplit){
	/* caller should have another dirty block */
	assert(!canwlock(&dcache.dirtylock));
	/* this block should be clean */
	assert(b->dirtying == 0);
	assert(b->dirty == 0);
	}else{
	rlock(&dcache.dirtylock);
	assert(b->dirtying == 0);
	b->dirtying = 1;
	}

	qlock(&stats.lock);
	if(b->dirty)
	stats.absorbedwrites++;
	stats.dirtydblocks++;
	qunlock(&stats.lock);

	/*
	* In general, shouldn't mark any block as more than one
	* type, except that split index blocks are a subcase of index
	* blocks. Only clean blocks ever get marked DirtyIndexSplit,
	* though, so we don't need the opposite conjunction here.
	*/
	odirty = b->dirty;
	if(b->dirty)
	assert(b->dirty == dirty
	\|\| (b->dirty==DirtyIndexSplit && dirty==DirtyIndex));
	else
	b->dirty = dirty;

	p = b->part;
	if(p->writechan == nil){
	fprint(2, "allocate write proc for part %s\n", p->name);
	/* XXX hope this doesn't fail! */
	p->writechan = chancreate(sizeof(DBlock*), dcache.nblocks);
	vtproc(writeproc, p);
	}
	qlock(&dcache.lock);
	if(!odirty){
	dcache.ndirty++;
	rwakeupall(&dcache.anydirty);
	}
	qunlock(&dcache.lock);
	}

	/*
	* remove some block from use and update the free list and counters
	*/
	static DBlock*
	bumpdblock(void)
	{
	int flushed;
	DBlock *b;
	ulong h;

	b = dcache.free;
	if(b != nil){
	dcache.free = b->next;
	return b;
	}

	if(dcache.ndirty >= dcache.maxdirty)
	_flushdcache();

	/*
	* remove blocks until we find one that is unused
	* referenced blocks are left in the heap even though
	* they can't be scavenged; this is simple a speed optimization
	*/
	flushed = 0;
	for(;;){
	if(dcache.nheap == 0){
	_flushdcache();
	return nil;
	}
	b = dcache.heap[0];
	delheap(b);
	if(!b->ref)
	break;
	}

	fprint(2, "bump %s at %llud\n", b->part->name, b->addr);

	/*
	* unchain the block
	*/
	if(b->prev == nil){
	h = pbhash(b->addr);
	if(dcache.heads[h] != b)
	sysfatal("bad hash chains in disk cache");
	dcache.heads[h] = b->next;
	}else
	b->prev->next = b->next;
	if(b->next != nil)
	b->next->prev = b->prev;

	return b;
	}

	/*
	* delete an arbitrary block from the heap
	*/
	static void
	delheap(DBlock *db)
	{
	if(db->heap == TWID32)
	return;
	fixheap(db->heap, dcache.heap[--dcache.nheap]);
	db->heap = TWID32;
	}

	/*
	* push an element up or down to it's correct new location
	*/
	static void
	fixheap(int i, DBlock *b)
	{
	if(upheap(i, b) == i)
	downheap(i, b);
	}

	static int
	upheap(int i, DBlock *b)
	{
	DBlock *bb;
	u32int now;
	int p;

	now = dcache.now;
	for(; i != 0; i = p){
	p = (i - 1) >> 1;
	bb = dcache.heap[p];
	if(b->used2 - now >= bb->used2 - now)
	break;
	dcache.heap[i] = bb;
	bb->heap = i;
	}

	dcache.heap[i] = b;
	b->heap = i;
	return i;
	}

	static int
	downheap(int i, DBlock *b)
	{
	DBlock *bb;
	u32int now;
	int k;

	now = dcache.now;
	for(; ; i = k){
	k = (i << 1) + 1;
	if(k >= dcache.nheap)
	break;
	if(k + 1 < dcache.nheap && dcache.heap[k]->used2 - now > dcache.heap[k + 1]->used2 - now)
	k++;
	bb = dcache.heap[k];
	if(b->used2 - now <= bb->used2 - now)
	break;
	dcache.heap[i] = bb;
	bb->heap = i;
	}

	dcache.heap[i] = b;
	b->heap = i;
	return i;
	}

	static void
	findblock(DBlock *bb)
	{
	DBlock b, last;
	int h;

	last = nil;
	h = pbhash(bb->addr);
	for(b = dcache.heads[h]; b != nil; b = b->next){
	if(last != b->prev)
	sysfatal("bad prev link");
	if(b == bb)
	return;
	last = b;
	}
	sysfatal("block missing from hash table");
	}

	void
	checkdcache(void)
	{
	DBlock *b;
	u32int size, now;
	int i, k, refed, nfree;

	qlock(&dcache.lock);
	size = dcache.size;
	now = dcache.now;
	for(i = 0; i < dcache.nheap; i++){
	if(dcache.heap[i]->heap != i)
	sysfatal("dc: mis-heaped at %d: %d", i, dcache.heap[i]->heap);
	if(i > 0 && dcache.heap[(i - 1) >> 1]->used2 - now > dcache.heap[i]->used2 - now)
	sysfatal("dc: bad heap ordering");
	k = (i << 1) + 1;
	if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
	sysfatal("dc: bad heap ordering");
	k++;
	if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
	sysfatal("dc: bad heap ordering");
	}

	refed = 0;
	for(i = 0; i < dcache.nblocks; i++){
	b = &dcache.blocks[i];
	if(b->data != &dcache.mem[i * size])
	sysfatal("dc: mis-blocked at %d", i);
	if(b->ref && b->heap == TWID32)
	refed++;
	if(b->addr)
	findblock(b);
	if(b->heap != TWID32
	&& dcache.heap[b->heap] != b)
	sysfatal("dc: spurious heap value");
	}

	nfree = 0;
	for(b = dcache.free; b != nil; b = b->next){
	if(b->addr != 0 \|\| b->heap != TWID32)
	sysfatal("dc: bad free list");
	nfree++;
	}

	if(dcache.nheap + nfree + refed != dcache.nblocks)
	sysfatal("dc: missing blocks: %d %d %d", dcache.nheap, refed, dcache.nblocks);
	qunlock(&dcache.lock);
	}

	void
	flushdcache(void)
	{
	u32int flushround;

	qlock(&dcache.lock);
	flushround = dcache.flushround;
	rwakeupall(&dcache.flush);
	while(flushround == dcache.flushround)
	rsleep(&dcache.flushdone);
	qunlock(&dcache.lock);
	}

	static void
	_flushdcache(void)
	{
	rwakeupall(&dcache.flush);
	}

	static int
	parallelwrites(DBlock b, DBlock eb, int dirty)
	{
	DBlock **p;

	for(p=b; p<eb && (*p)->dirty == dirty; p++)
	sendp((p)->part->writechan, p);
	for(p=b; p<eb && (*p)->dirty == dirty; p++)
	recvp(&(*p)->writedonechan);

	return p-b;
	}

	/*
	* Sort first by dirty flag, then by partition, then by address in partition.
	*/
	static int
	writeblockcmp(const void va, const void vb)
	{
	DBlock a, b;

	a = (DBlock*)va;
	b = (DBlock*)vb;

	if(a->dirty != b->dirty)
	return a->dirty - b->dirty;
	if(a->part != b->part){
	if(a->part < b->part)
	return -1;
	if(a->part > b->part)
	return 1;
	}
	if(a->addr < b->addr)
	return -1;
	return 1;
	}

	static void
	flushtimerproc(void *v)
	{
	u32int round;

	for(;;){
	qlock(&dcache.lock);
	while(dcache.ndirty == 0)
	rsleep(&dcache.anydirty);
	round = dcache.flushround;
	qunlock(&dcache.lock);

	sleep(60*1000);

	qlock(&dcache.lock);
	if(round == dcache.flushround){
	rwakeupall(&dcache.flush);
	while(round == dcache.flushround)
	rsleep(&dcache.flushdone);
	}
	qunlock(&dcache.lock);
	}
	}

	static void
	flushproc(void *v)
	{
	int i, j, n;
	ulong t0;
	DBlock b, *write;

	USED(v);
	for(;;){
	qlock(&dcache.lock);
	dcache.flushround++;
	rwakeupall(&dcache.flushdone);
	rsleep(&dcache.flush);
	qunlock(&dcache.lock);

	fprint(2, "flushing dcache: t=0 ms\n");
	t0 = nsec()/1000;

	/*
	* Because we don't record any dependencies at all, we must write out
	* all blocks currently dirty. Thus we must lock all the blocks that
	* are currently dirty.
	*
	* We grab dirtylock to stop the dirtying of new blocks.
	* Then we wait until all the current blocks finish being dirtied.
	* Now all the dirty blocks in the system are immutable (clean blocks
	* might still get recycled), so we can plan our disk writes.
	*
	* In a better scheme, dirtiers might lock the block for writing in getdblock,
	* so that flushproc could lock all the blocks here and then unlock them as it
	* finishes with them.
	*/

	fprint(2, "flushproc: wlock at t=%lud\n", (ulong)(nsec()/1000) - t0);
	wlock(&dcache.dirtylock);

	fprint(2, "flushproc: build list at t=%lud\n", (ulong)(nsec()/1000) - t0);
	write = dcache.write;
	n = 0;
	for(i=0; i<dcache.nblocks; i++){
	b = &dcache.blocks[i];
	if(b->dirty)
	write[n++] = b;
	}

	qsort(write, n, sizeof(write[0]), writeblockcmp);

	/* Write each stage of blocks out. */
	fprint(2, "flushproc: write blocks at t=%lud\n", (ulong)(nsec()/1000) - t0);
	i = 0;
	for(j=1; j<DirtyMax; j++){
	fprint(2, "flushproc: flush dirty %d at t=%lud\n", j, (ulong)(nsec()/1000) - t0);
	i += parallelwrites(write+i, write+n, j);
	}
	assert(i == n);

	fprint(2, "flushproc: update dirty bits at t=%lud\n", (ulong)(nsec()/1000) - t0);
	qlock(&dcache.lock);
	for(i=0; i<n; i++){
	b = write[i];
	b->dirty = 0;
	--dcache.ndirty;
	if(b->ref == 0 && b->heap == TWID32){
	upheap(dcache.nheap++, b);
	rwakeupall(&dcache.full);
	}
	}
	qunlock(&dcache.lock);
	wunlock(&dcache.dirtylock);

	fprint(2, "flushproc: done at t=%lud\n", (ulong)(nsec()/1000) - t0);

	qlock(&stats.lock);
	stats.dcacheflushes++;
	stats.dcacheflushwrites += n;
	qunlock(&stats.lock);
	}
	}

	static void
	writeproc(void *v)
	{
	DBlock *b;
	Part *p;

	p = v;

	for(;;){
	b = recvp(p->writechan);
	if(writepart(p, b->addr, b->data, b->size) < 0)
	fprint(2, "write error: %r\n"); /* XXX details! */
	sendp(&b->writedonechan, b);
	}
	}