|  | #include "stdinc.h" | 
|  | #include "dat.h" | 
|  | #include "fns.h" | 
|  | #include "error.h" | 
|  |  | 
|  | #include "9.h"	/* for cacheFlush */ | 
|  |  | 
|  | typedef struct FreeList FreeList; | 
|  | typedef struct BAddr BAddr; | 
|  |  | 
|  | enum { | 
|  | BadHeap = ~0, | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Store data to the memory cache in c->size blocks | 
|  | * with the block zero extended to fill it out.  When writing to | 
|  | * Venti, the block will be zero truncated.  The walker will also check | 
|  | * that the block fits within psize or dsize as the case may be. | 
|  | */ | 
|  |  | 
|  | struct Cache | 
|  | { | 
|  | QLock	lk; | 
|  | int 	ref; | 
|  | int	mode; | 
|  |  | 
|  | Disk 	*disk; | 
|  | int	size;			/* block size */ | 
|  | int	ndmap;		/* size of per-block dirty pointer map used in blockWrite */ | 
|  | VtConn *z; | 
|  | u32int	now;			/* ticks for usage timestamps */ | 
|  | Block	**heads;		/* hash table for finding address */ | 
|  | int	nheap;			/* number of available victims */ | 
|  | Block	**heap;			/* heap for locating victims */ | 
|  | long	nblocks;		/* number of blocks allocated */ | 
|  | Block	*blocks;		/* array of block descriptors */ | 
|  | u8int	*mem;			/* memory for all block data & blists */ | 
|  |  | 
|  | BList	*blfree; | 
|  | Rendez	blrend; | 
|  |  | 
|  | int 	ndirty;			/* number of dirty blocks in the cache */ | 
|  | int 	maxdirty;		/* max number of dirty blocks */ | 
|  | u32int	vers; | 
|  |  | 
|  | long hashSize; | 
|  |  | 
|  | FreeList *fl; | 
|  |  | 
|  | Rendez die;			/* daemon threads should die when QLock != nil */ | 
|  |  | 
|  | Rendez flush; | 
|  | Rendez flushwait; | 
|  | Rendez heapwait; | 
|  | BAddr *baddr; | 
|  | int bw, br, be; | 
|  | int nflush; | 
|  |  | 
|  | Periodic *sync; | 
|  |  | 
|  | /* unlink daemon */ | 
|  | BList *uhead; | 
|  | BList *utail; | 
|  | Rendez unlink; | 
|  |  | 
|  | /* block counts */ | 
|  | int nused; | 
|  | int ndisk; | 
|  | }; | 
|  |  | 
|  | struct BList { | 
|  | int part; | 
|  | u32int addr; | 
|  | uchar type; | 
|  | u32int tag; | 
|  | u32int epoch; | 
|  | u32int vers; | 
|  |  | 
|  | int recurse;	/* for block unlink */ | 
|  |  | 
|  | /* for roll back */ | 
|  | int index;			/* -1 indicates not valid */ | 
|  | union { | 
|  | uchar score[VtScoreSize]; | 
|  | uchar entry[VtEntrySize]; | 
|  | } old; | 
|  | BList *next; | 
|  | }; | 
|  |  | 
|  | struct BAddr { | 
|  | int part; | 
|  | u32int addr; | 
|  | u32int vers; | 
|  | }; | 
|  |  | 
|  | struct FreeList { | 
|  | QLock lk; | 
|  | u32int last;		/* last block allocated */ | 
|  | u32int end;		/* end of data partition */ | 
|  | u32int nused;		/* number of used blocks */ | 
|  | u32int epochLow;	/* low epoch when last updated nused */ | 
|  | }; | 
|  |  | 
|  | static FreeList *flAlloc(u32int end); | 
|  | static void flFree(FreeList *fl); | 
|  |  | 
|  | static Block *cacheBumpBlock(Cache *c); | 
|  | static void heapDel(Block*); | 
|  | static void heapIns(Block*); | 
|  | static void cacheCheck(Cache*); | 
|  | static void unlinkThread(void *a); | 
|  | static void flushThread(void *a); | 
|  | static void unlinkBody(Cache *c); | 
|  | static int cacheFlushBlock(Cache *c); | 
|  | static void cacheSync(void*); | 
|  | static BList *blistAlloc(Block*); | 
|  | static void blistFree(Cache*, BList*); | 
|  | static void doRemoveLink(Cache*, BList*); | 
|  |  | 
|  | /* | 
|  | * Mapping from local block type to Venti type | 
|  | */ | 
|  | int vtType[BtMax] = { | 
|  | VtDataType,		/* BtData | 0  */ | 
|  | VtDataType+1,		/* BtData | 1  */ | 
|  | VtDataType+2,		/* BtData | 2  */ | 
|  | VtDataType+3,		/* BtData | 3  */ | 
|  | VtDataType+4,		/* BtData | 4  */ | 
|  | VtDataType+5,		/* BtData | 5  */ | 
|  | VtDataType+6,		/* BtData | 6  */ | 
|  | VtDataType+7,		/* BtData | 7  */ | 
|  | VtDirType,		/* BtDir | 0  */ | 
|  | VtDirType+1,		/* BtDir | 1  */ | 
|  | VtDirType+2,		/* BtDir | 2  */ | 
|  | VtDirType+3,		/* BtDir | 3  */ | 
|  | VtDirType+4,		/* BtDir | 4  */ | 
|  | VtDirType+5,		/* BtDir | 5  */ | 
|  | VtDirType+6,		/* BtDir | 6  */ | 
|  | VtDirType+7,		/* BtDir | 7  */ | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Allocate the memory cache. | 
|  | */ | 
|  | Cache * | 
|  | cacheAlloc(Disk *disk, VtConn *z, ulong nblocks, int mode) | 
|  | { | 
|  | int i; | 
|  | Cache *c; | 
|  | Block *b; | 
|  | BList *bl; | 
|  | u8int *p; | 
|  | int nbl; | 
|  |  | 
|  | c = vtmallocz(sizeof(Cache)); | 
|  |  | 
|  | /* reasonable number of BList elements */ | 
|  | nbl = nblocks * 4; | 
|  |  | 
|  | c->ref = 1; | 
|  | c->disk = disk; | 
|  | c->z = z; | 
|  | c->size = diskBlockSize(disk); | 
|  | bwatchSetBlockSize(c->size); | 
|  | /* round c->size up to be a nice multiple */ | 
|  | c->size = (c->size + 127) & ~127; | 
|  | c->ndmap = (c->size/20 + 7) / 8; | 
|  | c->nblocks = nblocks; | 
|  | c->hashSize = nblocks; | 
|  | c->heads = vtmallocz(c->hashSize*sizeof(Block*)); | 
|  | c->heap = vtmallocz(nblocks*sizeof(Block*)); | 
|  | c->blocks = vtmallocz(nblocks*sizeof(Block)); | 
|  | c->mem = vtmallocz(nblocks * (c->size + c->ndmap) + nbl * sizeof(BList)); | 
|  | c->baddr = vtmallocz(nblocks * sizeof(BAddr)); | 
|  | c->mode = mode; | 
|  | c->vers++; | 
|  | p = c->mem; | 
|  | for(i = 0; i < nblocks; i++){ | 
|  | b = &c->blocks[i]; | 
|  | b->c = c; | 
|  | b->data = p; | 
|  | b->heap = i; | 
|  | b->ioready.l = &b->lk; | 
|  | c->heap[i] = b; | 
|  | p += c->size; | 
|  | } | 
|  | c->nheap = nblocks; | 
|  | for(i = 0; i < nbl; i++){ | 
|  | bl = (BList*)p; | 
|  | bl->next = c->blfree; | 
|  | c->blfree = bl; | 
|  | p += sizeof(BList); | 
|  | } | 
|  | /* separate loop to keep blocks and blists reasonably aligned */ | 
|  | for(i = 0; i < nblocks; i++){ | 
|  | b = &c->blocks[i]; | 
|  | b->dmap = p; | 
|  | p += c->ndmap; | 
|  | } | 
|  |  | 
|  | c->blrend.l = &c->lk; | 
|  |  | 
|  | c->maxdirty = nblocks*(DirtyPercentage*0.01); | 
|  |  | 
|  | c->fl = flAlloc(diskSize(disk, PartData)); | 
|  |  | 
|  | c->unlink.l = &c->lk; | 
|  | c->flush.l = &c->lk; | 
|  | c->flushwait.l = &c->lk; | 
|  | c->heapwait.l = &c->lk; | 
|  | c->sync = periodicAlloc(cacheSync, c, 30*1000); | 
|  |  | 
|  | if(mode == OReadWrite){ | 
|  | c->ref += 2; | 
|  | proccreate(unlinkThread, c, STACK); | 
|  | proccreate(flushThread, c, STACK); | 
|  | } | 
|  | cacheCheck(c); | 
|  |  | 
|  | return c; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Free the whole memory cache, flushing all dirty blocks to the disk. | 
|  | */ | 
|  | void | 
|  | cacheFree(Cache *c) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | /* kill off daemon threads */ | 
|  | qlock(&c->lk); | 
|  | c->die.l = &c->lk; | 
|  | periodicKill(c->sync); | 
|  | rwakeup(&c->flush); | 
|  | rwakeup(&c->unlink); | 
|  | while(c->ref > 1) | 
|  | rsleep(&c->die); | 
|  |  | 
|  | /* flush everything out */ | 
|  | do { | 
|  | unlinkBody(c); | 
|  | qunlock(&c->lk); | 
|  | while(cacheFlushBlock(c)) | 
|  | ; | 
|  | diskFlush(c->disk); | 
|  | qlock(&c->lk); | 
|  | } while(c->uhead || c->ndirty); | 
|  | qunlock(&c->lk); | 
|  |  | 
|  | cacheCheck(c); | 
|  |  | 
|  | for(i = 0; i < c->nblocks; i++){ | 
|  | assert(c->blocks[i].ref == 0); | 
|  | } | 
|  | flFree(c->fl); | 
|  | vtfree(c->baddr); | 
|  | vtfree(c->heads); | 
|  | vtfree(c->blocks); | 
|  | vtfree(c->mem); | 
|  | diskFree(c->disk); | 
|  | /* don't close vtSession */ | 
|  | vtfree(c); | 
|  | } | 
|  |  | 
|  | static void | 
|  | cacheDump(Cache *c) | 
|  | { | 
|  | int i; | 
|  | Block *b; | 
|  |  | 
|  | for(i = 0; i < c->nblocks; i++){ | 
|  | b = &c->blocks[i]; | 
|  | fprint(2, "%d. p=%d a=%ud %V t=%d ref=%d state=%s io=%s pc=%#p\n", | 
|  | i, b->part, b->addr, b->score, b->l.type, b->ref, | 
|  | bsStr(b->l.state), bioStr(b->iostate), b->pc); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void | 
|  | cacheCheck(Cache *c) | 
|  | { | 
|  | u32int size, now; | 
|  | int i, k, refed; | 
|  | Block *b; | 
|  |  | 
|  | size = c->size; | 
|  | now = c->now; | 
|  |  | 
|  | for(i = 0; i < c->nheap; i++){ | 
|  | if(c->heap[i]->heap != i) | 
|  | sysfatal("mis-heaped at %d: %d", i, c->heap[i]->heap); | 
|  | if(i > 0 && c->heap[(i - 1) >> 1]->used - now > c->heap[i]->used - now) | 
|  | sysfatal("bad heap ordering"); | 
|  | k = (i << 1) + 1; | 
|  | if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now) | 
|  | sysfatal("bad heap ordering"); | 
|  | k++; | 
|  | if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now) | 
|  | sysfatal("bad heap ordering"); | 
|  | } | 
|  |  | 
|  | refed = 0; | 
|  | for(i = 0; i < c->nblocks; i++){ | 
|  | b = &c->blocks[i]; | 
|  | if(b->data != &c->mem[i * size]) | 
|  | sysfatal("mis-blocked at %d", i); | 
|  | if(b->ref && b->heap == BadHeap){ | 
|  | refed++; | 
|  | } | 
|  | } | 
|  | if(c->nheap + refed != c->nblocks){ | 
|  | fprint(2, "%s: cacheCheck: nheap %d refed %d nblocks %ld\n", argv0, c->nheap, refed, c->nblocks); | 
|  | cacheDump(c); | 
|  | } | 
|  | assert(c->nheap + refed == c->nblocks); | 
|  | refed = 0; | 
|  | for(i = 0; i < c->nblocks; i++){ | 
|  | b = &c->blocks[i]; | 
|  | if(b->ref){ | 
|  | if(1)fprint(2, "%s: p=%d a=%ud %V ref=%d %L\n", argv0, b->part, b->addr, b->score, b->ref, &b->l); | 
|  | refed++; | 
|  | } | 
|  | } | 
|  | if(refed > 0)fprint(2, "%s: cacheCheck: in used %d\n", argv0, refed); | 
|  | } | 
|  |  | 
|  |  | 
|  | /* | 
|  | * locate the block with the oldest second to last use. | 
|  | * remove it from the heap, and fix up the heap. | 
|  | */ | 
|  | /* called with c->lk held */ | 
|  | static Block * | 
|  | cacheBumpBlock(Cache *c) | 
|  | { | 
|  | int printed; | 
|  | Block *b; | 
|  |  | 
|  | /* | 
|  | * locate the block with the oldest second to last use. | 
|  | * remove it from the heap, and fix up the heap. | 
|  | */ | 
|  | printed = 0; | 
|  | if(c->nheap == 0){ | 
|  | while(c->nheap == 0){ | 
|  | rwakeup(&c->flush); | 
|  | rsleep(&c->heapwait); | 
|  | if(c->nheap == 0){ | 
|  | printed = 1; | 
|  | fprint(2, "%s: entire cache is busy, %d dirty " | 
|  | "-- waking flush thread\n", | 
|  | argv0, c->ndirty); | 
|  | } | 
|  | } | 
|  | if(printed) | 
|  | fprint(2, "%s: cache is okay again, %d dirty\n", | 
|  | argv0, c->ndirty); | 
|  | } | 
|  |  | 
|  | b = c->heap[0]; | 
|  | heapDel(b); | 
|  |  | 
|  | assert(b->heap == BadHeap); | 
|  | assert(b->ref == 0); | 
|  | assert(b->iostate != BioDirty && b->iostate != BioReading && b->iostate != BioWriting); | 
|  | assert(b->prior == nil); | 
|  | assert(b->uhead == nil); | 
|  |  | 
|  | /* | 
|  | * unchain the block from hash chain | 
|  | */ | 
|  | if(b->prev){ | 
|  | *(b->prev) = b->next; | 
|  | if(b->next) | 
|  | b->next->prev = b->prev; | 
|  | b->prev = nil; | 
|  | } | 
|  |  | 
|  |  | 
|  | if(0)fprint(2, "%s: dropping %d:%x:%V\n", argv0, b->part, b->addr, b->score); | 
|  | /* set block to a reasonable state */ | 
|  | b->ref = 1; | 
|  | b->part = PartError; | 
|  | memset(&b->l, 0, sizeof(b->l)); | 
|  | b->iostate = BioEmpty; | 
|  |  | 
|  | return b; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * look for a particular version of the block in the memory cache. | 
|  | */ | 
|  | static Block * | 
|  | _cacheLocalLookup(Cache *c, int part, u32int addr, u32int vers, | 
|  | int waitlock, int *lockfailure) | 
|  | { | 
|  | Block *b; | 
|  | ulong h; | 
|  |  | 
|  | h = addr % c->hashSize; | 
|  |  | 
|  | if(lockfailure) | 
|  | *lockfailure = 0; | 
|  |  | 
|  | /* | 
|  | * look for the block in the cache | 
|  | */ | 
|  | qlock(&c->lk); | 
|  | for(b = c->heads[h]; b != nil; b = b->next){ | 
|  | if(b->part == part && b->addr == addr) | 
|  | break; | 
|  | } | 
|  | if(b == nil || b->vers != vers){ | 
|  | qunlock(&c->lk); | 
|  | return nil; | 
|  | } | 
|  | if(!waitlock && !canqlock(&b->lk)){ | 
|  | *lockfailure = 1; | 
|  | qunlock(&c->lk); | 
|  | return nil; | 
|  | } | 
|  | heapDel(b); | 
|  | b->ref++; | 
|  | qunlock(&c->lk); | 
|  |  | 
|  | bwatchLock(b); | 
|  | if(waitlock) | 
|  | qlock(&b->lk); | 
|  | b->nlock = 1; | 
|  |  | 
|  | for(;;){ | 
|  | switch(b->iostate){ | 
|  | default: | 
|  | abort(); | 
|  | case BioEmpty: | 
|  | case BioLabel: | 
|  | case BioClean: | 
|  | case BioDirty: | 
|  | if(b->vers != vers){ | 
|  | blockPut(b); | 
|  | return nil; | 
|  | } | 
|  | return b; | 
|  | case BioReading: | 
|  | case BioWriting: | 
|  | rsleep(&b->ioready); | 
|  | break; | 
|  | case BioVentiError: | 
|  | blockPut(b); | 
|  | werrstr("venti i/o error block 0x%.8ux", addr); | 
|  | return nil; | 
|  | case BioReadError: | 
|  | blockPut(b); | 
|  | werrstr("error reading block 0x%.8ux", addr); | 
|  | return nil; | 
|  | } | 
|  | } | 
|  | /* NOT REACHED */ | 
|  | } | 
|  |  | 
|  |  | 
|  | /* | 
|  | * fetch a local (on-disk) block from the memory cache. | 
|  | * if it's not there, load it, bumping some other block. | 
|  | */ | 
|  | Block * | 
|  | _cacheLocal(Cache *c, int part, u32int addr, int mode, u32int epoch) | 
|  | { | 
|  | Block *b; | 
|  | ulong h; | 
|  |  | 
|  | assert(part != PartVenti); | 
|  |  | 
|  | h = addr % c->hashSize; | 
|  |  | 
|  | /* | 
|  | * look for the block in the cache | 
|  | */ | 
|  | qlock(&c->lk); | 
|  | for(b = c->heads[h]; b != nil; b = b->next){ | 
|  | if(b->part != part || b->addr != addr) | 
|  | continue; | 
|  | if(epoch && b->l.epoch != epoch){ | 
|  | fprint(2, "%s: _cacheLocal want epoch %ud got %ud\n", argv0, epoch, b->l.epoch); | 
|  | qunlock(&c->lk); | 
|  | werrstr(ELabelMismatch); | 
|  | return nil; | 
|  | } | 
|  | heapDel(b); | 
|  | b->ref++; | 
|  | break; | 
|  | } | 
|  |  | 
|  | if(b == nil){ | 
|  | b = cacheBumpBlock(c); | 
|  |  | 
|  | b->part = part; | 
|  | b->addr = addr; | 
|  | localToGlobal(addr, b->score); | 
|  |  | 
|  | /* chain onto correct hash */ | 
|  | b->next = c->heads[h]; | 
|  | c->heads[h] = b; | 
|  | if(b->next != nil) | 
|  | b->next->prev = &b->next; | 
|  | b->prev = &c->heads[h]; | 
|  | } | 
|  |  | 
|  | qunlock(&c->lk); | 
|  |  | 
|  | /* | 
|  | * BUG: what if the epoch changes right here? | 
|  | * In the worst case, we could end up in some weird | 
|  | * lock loop, because the block we want no longer exists, | 
|  | * and instead we're trying to lock a block we have no | 
|  | * business grabbing. | 
|  | * | 
|  | * For now, I'm not going to worry about it. | 
|  | */ | 
|  |  | 
|  | if(0)fprint(2, "%s: cacheLocal: %d: %d %x\n", argv0, getpid(), b->part, b->addr); | 
|  | bwatchLock(b); | 
|  | qlock(&b->lk); | 
|  | b->nlock = 1; | 
|  |  | 
|  | if(part == PartData && b->iostate == BioEmpty){ | 
|  | if(!readLabel(c, &b->l, addr)){ | 
|  | blockPut(b); | 
|  | return nil; | 
|  | } | 
|  | blockSetIOState(b, BioLabel); | 
|  | } | 
|  | if(epoch && b->l.epoch != epoch){ | 
|  | blockPut(b); | 
|  | fprint(2, "%s: _cacheLocal want epoch %ud got %ud\n", argv0, epoch, b->l.epoch); | 
|  | werrstr(ELabelMismatch); | 
|  | return nil; | 
|  | } | 
|  |  | 
|  | b->pc = getcallerpc(&c); | 
|  | for(;;){ | 
|  | switch(b->iostate){ | 
|  | default: | 
|  | abort(); | 
|  | case BioLabel: | 
|  | if(mode == OOverWrite) | 
|  | /* | 
|  | * leave iostate as BioLabel because data | 
|  | * hasn't been read. | 
|  | */ | 
|  | return b; | 
|  | /* fall through */ | 
|  | case BioEmpty: | 
|  | diskRead(c->disk, b); | 
|  | rsleep(&b->ioready); | 
|  | break; | 
|  | case BioClean: | 
|  | case BioDirty: | 
|  | return b; | 
|  | case BioReading: | 
|  | case BioWriting: | 
|  | rsleep(&b->ioready); | 
|  | break; | 
|  | case BioReadError: | 
|  | blockSetIOState(b, BioEmpty); | 
|  | blockPut(b); | 
|  | werrstr("error reading block 0x%.8ux", addr); | 
|  | return nil; | 
|  | } | 
|  | } | 
|  | /* NOT REACHED */ | 
|  | } | 
|  |  | 
|  | Block * | 
|  | cacheLocal(Cache *c, int part, u32int addr, int mode) | 
|  | { | 
|  | return _cacheLocal(c, part, addr, mode, 0); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * fetch a local (on-disk) block from the memory cache. | 
|  | * if it's not there, load it, bumping some other block. | 
|  | * check tag and type. | 
|  | */ | 
|  | Block * | 
|  | cacheLocalData(Cache *c, u32int addr, int type, u32int tag, int mode, u32int epoch) | 
|  | { | 
|  | Block *b; | 
|  |  | 
|  | b = _cacheLocal(c, PartData, addr, mode, epoch); | 
|  | if(b == nil) | 
|  | return nil; | 
|  | if(b->l.type != type || b->l.tag != tag){ | 
|  | fprint(2, "%s: cacheLocalData: addr=%d type got %d exp %d: tag got %ux exp %ux\n", | 
|  | argv0, addr, b->l.type, type, b->l.tag, tag); | 
|  | werrstr(ELabelMismatch); | 
|  | blockPut(b); | 
|  | return nil; | 
|  | } | 
|  | b->pc = getcallerpc(&c); | 
|  | return b; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * fetch a global (Venti) block from the memory cache. | 
|  | * if it's not there, load it, bumping some other block. | 
|  | * check tag and type if it's really a local block in disguise. | 
|  | */ | 
|  | Block * | 
|  | cacheGlobal(Cache *c, uchar score[VtScoreSize], int type, u32int tag, int mode) | 
|  | { | 
|  | int n; | 
|  | Block *b; | 
|  | ulong h; | 
|  | u32int addr; | 
|  |  | 
|  | addr = globalToLocal(score); | 
|  | if(addr != NilBlock){ | 
|  | b = cacheLocalData(c, addr, type, tag, mode, 0); | 
|  | if(b) | 
|  | b->pc = getcallerpc(&c); | 
|  | return b; | 
|  | } | 
|  |  | 
|  | h = (u32int)(score[0]|(score[1]<<8)|(score[2]<<16)|(score[3]<<24)) % c->hashSize; | 
|  |  | 
|  | /* | 
|  | * look for the block in the cache | 
|  | */ | 
|  | qlock(&c->lk); | 
|  | for(b = c->heads[h]; b != nil; b = b->next){ | 
|  | if(b->part != PartVenti || memcmp(b->score, score, VtScoreSize) != 0 || b->l.type != type) | 
|  | continue; | 
|  | heapDel(b); | 
|  | b->ref++; | 
|  | break; | 
|  | } | 
|  |  | 
|  | if(b == nil){ | 
|  | if(0)fprint(2, "%s: cacheGlobal %V %d\n", argv0, score, type); | 
|  |  | 
|  | b = cacheBumpBlock(c); | 
|  |  | 
|  | b->part = PartVenti; | 
|  | b->addr = NilBlock; | 
|  | b->l.type = type; | 
|  | memmove(b->score, score, VtScoreSize); | 
|  |  | 
|  | /* chain onto correct hash */ | 
|  | b->next = c->heads[h]; | 
|  | c->heads[h] = b; | 
|  | if(b->next != nil) | 
|  | b->next->prev = &b->next; | 
|  | b->prev = &c->heads[h]; | 
|  | } | 
|  | qunlock(&c->lk); | 
|  |  | 
|  | bwatchLock(b); | 
|  | qlock(&b->lk); | 
|  | b->nlock = 1; | 
|  | b->pc = getcallerpc(&c); | 
|  |  | 
|  | switch(b->iostate){ | 
|  | default: | 
|  | abort(); | 
|  | case BioEmpty: | 
|  | n = vtread(c->z, score, vtType[type], b->data, c->size); | 
|  | if(n < 0 || vtsha1check(score, b->data, n) < 0){ | 
|  | blockSetIOState(b, BioVentiError); | 
|  | blockPut(b); | 
|  | werrstr( | 
|  | "venti error reading block %V or wrong score: %r", | 
|  | score); | 
|  | return nil; | 
|  | } | 
|  | vtzeroextend(vtType[type], b->data, n, c->size); | 
|  | blockSetIOState(b, BioClean); | 
|  | return b; | 
|  | case BioClean: | 
|  | return b; | 
|  | case BioVentiError: | 
|  | blockPut(b); | 
|  | werrstr("venti i/o error or wrong score, block %V", score); | 
|  | return nil; | 
|  | case BioReadError: | 
|  | blockPut(b); | 
|  | werrstr("error reading block %V", b->score); | 
|  | return nil; | 
|  | } | 
|  | /* NOT REACHED */ | 
|  | } | 
|  |  | 
|  | /* | 
|  | * allocate a new on-disk block and load it into the memory cache. | 
|  | * BUG: if the disk is full, should we flush some of it to Venti? | 
|  | */ | 
|  | static u32int lastAlloc; | 
|  |  | 
|  | Block * | 
|  | cacheAllocBlock(Cache *c, int type, u32int tag, u32int epoch, u32int epochLow) | 
|  | { | 
|  | FreeList *fl; | 
|  | u32int addr; | 
|  | Block *b; | 
|  | int n, nwrap; | 
|  | Label lab; | 
|  |  | 
|  | n = c->size / LabelSize; | 
|  | fl = c->fl; | 
|  |  | 
|  | qlock(&fl->lk); | 
|  | addr = fl->last; | 
|  | b = cacheLocal(c, PartLabel, addr/n, OReadOnly); | 
|  | if(b == nil){ | 
|  | fprint(2, "%s: cacheAllocBlock: xxx %r\n", argv0); | 
|  | qunlock(&fl->lk); | 
|  | return nil; | 
|  | } | 
|  | nwrap = 0; | 
|  | for(;;){ | 
|  | if(++addr >= fl->end){ | 
|  | addr = 0; | 
|  | if(++nwrap >= 2){ | 
|  | blockPut(b); | 
|  | werrstr("disk is full"); | 
|  | /* | 
|  | * try to avoid a continuous spew of console | 
|  | * messages. | 
|  | */ | 
|  | if (fl->last != 0) | 
|  | fprint(2, "%s: cacheAllocBlock: xxx1 %r\n", | 
|  | argv0); | 
|  | fl->last = 0; | 
|  | qunlock(&fl->lk); | 
|  | return nil; | 
|  | } | 
|  | } | 
|  | if(addr%n == 0){ | 
|  | blockPut(b); | 
|  | b = cacheLocal(c, PartLabel, addr/n, OReadOnly); | 
|  | if(b == nil){ | 
|  | fl->last = addr; | 
|  | fprint(2, "%s: cacheAllocBlock: xxx2 %r\n", argv0); | 
|  | qunlock(&fl->lk); | 
|  | return nil; | 
|  | } | 
|  | } | 
|  | if(!labelUnpack(&lab, b->data, addr%n)) | 
|  | continue; | 
|  | if(lab.state == BsFree) | 
|  | goto Found; | 
|  | if(lab.state&BsClosed) | 
|  | if(lab.epochClose <= epochLow || lab.epoch==lab.epochClose) | 
|  | goto Found; | 
|  | } | 
|  | Found: | 
|  | blockPut(b); | 
|  | b = cacheLocal(c, PartData, addr, OOverWrite); | 
|  | if(b == nil){ | 
|  | fprint(2, "%s: cacheAllocBlock: xxx3 %r\n", argv0); | 
|  | return nil; | 
|  | } | 
|  | assert(b->iostate == BioLabel || b->iostate == BioClean); | 
|  | fl->last = addr; | 
|  | lab.type = type; | 
|  | lab.tag = tag; | 
|  | lab.state = BsAlloc; | 
|  | lab.epoch = epoch; | 
|  | lab.epochClose = ~(u32int)0; | 
|  | if(!blockSetLabel(b, &lab, 1)){ | 
|  | fprint(2, "%s: cacheAllocBlock: xxx4 %r\n", argv0); | 
|  | blockPut(b); | 
|  | return nil; | 
|  | } | 
|  | vtzeroextend(vtType[type], b->data, 0, c->size); | 
|  | if(0)diskWrite(c->disk, b); | 
|  |  | 
|  | if(0)fprint(2, "%s: fsAlloc %ud type=%d tag = %ux\n", argv0, addr, type, tag); | 
|  | lastAlloc = addr; | 
|  | fl->nused++; | 
|  | qunlock(&fl->lk); | 
|  | b->pc = getcallerpc(&c); | 
|  | return b; | 
|  | } | 
|  |  | 
|  | int | 
|  | cacheDirty(Cache *c) | 
|  | { | 
|  | return c->ndirty; | 
|  | } | 
|  |  | 
|  | void | 
|  | cacheCountUsed(Cache *c, u32int epochLow, u32int *used, u32int *total, u32int *bsize) | 
|  | { | 
|  | int n; | 
|  | u32int addr, nused; | 
|  | Block *b; | 
|  | Label lab; | 
|  | FreeList *fl; | 
|  |  | 
|  | fl = c->fl; | 
|  | n = c->size / LabelSize; | 
|  | *bsize = c->size; | 
|  | qlock(&fl->lk); | 
|  | if(fl->epochLow == epochLow){ | 
|  | *used = fl->nused; | 
|  | *total = fl->end; | 
|  | qunlock(&fl->lk); | 
|  | return; | 
|  | } | 
|  | b = nil; | 
|  | nused = 0; | 
|  | for(addr=0; addr<fl->end; addr++){ | 
|  | if(addr%n == 0){ | 
|  | blockPut(b); | 
|  | b = cacheLocal(c, PartLabel, addr/n, OReadOnly); | 
|  | if(b == nil){ | 
|  | fprint(2, "%s: flCountUsed: loading %ux: %r\n", | 
|  | argv0, addr/n); | 
|  | break; | 
|  | } | 
|  | } | 
|  | if(!labelUnpack(&lab, b->data, addr%n)) | 
|  | continue; | 
|  | if(lab.state == BsFree) | 
|  | continue; | 
|  | if(lab.state&BsClosed) | 
|  | if(lab.epochClose <= epochLow || lab.epoch==lab.epochClose) | 
|  | continue; | 
|  | nused++; | 
|  | } | 
|  | blockPut(b); | 
|  | if(addr == fl->end){ | 
|  | fl->nused = nused; | 
|  | fl->epochLow = epochLow; | 
|  | } | 
|  | *used = nused; | 
|  | *total = fl->end; | 
|  | qunlock(&fl->lk); | 
|  | return; | 
|  | } | 
|  |  | 
|  | static FreeList * | 
|  | flAlloc(u32int end) | 
|  | { | 
|  | FreeList *fl; | 
|  |  | 
|  | fl = vtmallocz(sizeof(*fl)); | 
|  | fl->last = 0; | 
|  | fl->end = end; | 
|  | return fl; | 
|  | } | 
|  |  | 
|  | static void | 
|  | flFree(FreeList *fl) | 
|  | { | 
|  | vtfree(fl); | 
|  | } | 
|  |  | 
|  | u32int | 
|  | cacheLocalSize(Cache *c, int part) | 
|  | { | 
|  | return diskSize(c->disk, part); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * The thread that has locked b may refer to it by | 
|  | * multiple names.  Nlock counts the number of | 
|  | * references the locking thread holds.  It will call | 
|  | * blockPut once per reference. | 
|  | */ | 
|  | void | 
|  | blockDupLock(Block *b) | 
|  | { | 
|  | assert(b->nlock > 0); | 
|  | b->nlock++; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * we're done with the block. | 
|  | * unlock it.  can't use it after calling this. | 
|  | */ | 
|  | void | 
|  | blockPut(Block* b) | 
|  | { | 
|  | Cache *c; | 
|  |  | 
|  | if(b == nil) | 
|  | return; | 
|  |  | 
|  | if(0)fprint(2, "%s: blockPut: %d: %d %x %d %s\n", argv0, getpid(), b->part, b->addr, c->nheap, bioStr(b->iostate)); | 
|  |  | 
|  | if(b->iostate == BioDirty) | 
|  | bwatchDependency(b); | 
|  |  | 
|  | if(--b->nlock > 0) | 
|  | return; | 
|  |  | 
|  | /* | 
|  | * b->nlock should probably stay at zero while | 
|  | * the block is unlocked, but diskThread and rsleep | 
|  | * conspire to assume that they can just qlock(&b->lk); blockPut(b), | 
|  | * so we have to keep b->nlock set to 1 even | 
|  | * when the block is unlocked. | 
|  | */ | 
|  | assert(b->nlock == 0); | 
|  | b->nlock = 1; | 
|  | //	b->pc = 0; | 
|  |  | 
|  | bwatchUnlock(b); | 
|  | qunlock(&b->lk); | 
|  | c = b->c; | 
|  | qlock(&c->lk); | 
|  |  | 
|  | if(--b->ref > 0){ | 
|  | qunlock(&c->lk); | 
|  | return; | 
|  | } | 
|  |  | 
|  | assert(b->ref == 0); | 
|  | switch(b->iostate){ | 
|  | default: | 
|  | b->used = c->now++; | 
|  | heapIns(b); | 
|  | break; | 
|  | case BioEmpty: | 
|  | case BioLabel: | 
|  | if(c->nheap == 0) | 
|  | b->used = c->now++; | 
|  | else | 
|  | b->used = c->heap[0]->used; | 
|  | heapIns(b); | 
|  | break; | 
|  | case BioDirty: | 
|  | break; | 
|  | } | 
|  | qunlock(&c->lk); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * set the label associated with a block. | 
|  | */ | 
|  | Block* | 
|  | _blockSetLabel(Block *b, Label *l) | 
|  | { | 
|  | int lpb; | 
|  | Block *bb; | 
|  | u32int a; | 
|  | Cache *c; | 
|  |  | 
|  | c = b->c; | 
|  |  | 
|  | assert(b->part == PartData); | 
|  | assert(b->iostate == BioLabel || b->iostate == BioClean || b->iostate == BioDirty); | 
|  | lpb = c->size / LabelSize; | 
|  | a = b->addr / lpb; | 
|  | bb = cacheLocal(c, PartLabel, a, OReadWrite); | 
|  | if(bb == nil){ | 
|  | blockPut(b); | 
|  | return nil; | 
|  | } | 
|  | b->l = *l; | 
|  | labelPack(l, bb->data, b->addr%lpb); | 
|  | blockDirty(bb); | 
|  | return bb; | 
|  | } | 
|  |  | 
|  | int | 
|  | blockSetLabel(Block *b, Label *l, int allocating) | 
|  | { | 
|  | Block *lb; | 
|  |  | 
|  | lb = _blockSetLabel(b, l); | 
|  | if(lb == nil) | 
|  | return 0; | 
|  |  | 
|  | /* | 
|  | * If we're allocating the block, make sure the label (bl) | 
|  | * goes to disk before the data block (b) itself.  This is to help | 
|  | * the blocks that in turn depend on b. | 
|  | * | 
|  | * Suppose bx depends on (must be written out after) b. | 
|  | * Once we write b we'll think it's safe to write bx. | 
|  | * Bx can't get at b unless it has a valid label, though. | 
|  | * | 
|  | * Allocation is the only case in which having a current label | 
|  | * is vital because: | 
|  | * | 
|  | *	- l.type is set at allocation and never changes. | 
|  | *	- l.tag is set at allocation and never changes. | 
|  | *	- l.state is not checked when we load blocks. | 
|  | *	- the archiver cares deeply about l.state being | 
|  | *		BaActive vs. BaCopied, but that's handled | 
|  | *		by direct calls to _blockSetLabel. | 
|  | */ | 
|  |  | 
|  | if(allocating) | 
|  | blockDependency(b, lb, -1, nil, nil); | 
|  | blockPut(lb); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Record that bb must be written out before b. | 
|  | * If index is given, we're about to overwrite the score/e | 
|  | * at that index in the block.  Save the old value so we | 
|  | * can write a safer ``old'' version of the block if pressed. | 
|  | */ | 
|  | void | 
|  | blockDependency(Block *b, Block *bb, int index, uchar *score, Entry *e) | 
|  | { | 
|  | BList *p; | 
|  |  | 
|  | if(bb->iostate == BioClean) | 
|  | return; | 
|  |  | 
|  | /* | 
|  | * Dependencies for blocks containing Entry structures | 
|  | * or scores must always be explained.  The problem with | 
|  | * only explaining some of them is this.  Suppose we have two | 
|  | * dependencies for the same field, the first explained | 
|  | * and the second not.  We try to write the block when the first | 
|  | * dependency is not written but the second is.  We will roll back | 
|  | * the first change even though the second trumps it. | 
|  | */ | 
|  | if(index == -1 && bb->part == PartData) | 
|  | assert(b->l.type == BtData); | 
|  |  | 
|  | if(bb->iostate != BioDirty){ | 
|  | fprint(2, "%s: %d:%x:%d iostate is %d in blockDependency\n", | 
|  | argv0, bb->part, bb->addr, bb->l.type, bb->iostate); | 
|  | abort(); | 
|  | } | 
|  |  | 
|  | p = blistAlloc(bb); | 
|  | if(p == nil) | 
|  | return; | 
|  |  | 
|  | assert(bb->iostate == BioDirty); | 
|  | if(0)fprint(2, "%s: %d:%x:%d depends on %d:%x:%d\n", argv0, b->part, b->addr, b->l.type, bb->part, bb->addr, bb->l.type); | 
|  |  | 
|  | p->part = bb->part; | 
|  | p->addr = bb->addr; | 
|  | p->type = bb->l.type; | 
|  | p->vers = bb->vers; | 
|  | p->index = index; | 
|  | if(p->index >= 0){ | 
|  | /* | 
|  | * This test would just be b->l.type==BtDir except | 
|  | * we need to exclude the super block. | 
|  | */ | 
|  | if(b->l.type == BtDir && b->part == PartData) | 
|  | entryPack(e, p->old.entry, 0); | 
|  | else | 
|  | memmove(p->old.score, score, VtScoreSize); | 
|  | } | 
|  | p->next = b->prior; | 
|  | b->prior = p; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Mark an in-memory block as dirty.  If there are too many | 
|  | * dirty blocks, start writing some out to disk. | 
|  | * | 
|  | * If there were way too many dirty blocks, we used to | 
|  | * try to do some flushing ourselves, but it's just too dangerous -- | 
|  | * it implies that the callers cannot have any of our priors locked, | 
|  | * but this is hard to avoid in some cases. | 
|  | */ | 
|  | int | 
|  | blockDirty(Block *b) | 
|  | { | 
|  | Cache *c; | 
|  |  | 
|  | c = b->c; | 
|  |  | 
|  | assert(b->part != PartVenti); | 
|  |  | 
|  | if(b->iostate == BioDirty) | 
|  | return 1; | 
|  | assert(b->iostate == BioClean || b->iostate == BioLabel); | 
|  |  | 
|  | qlock(&c->lk); | 
|  | b->iostate = BioDirty; | 
|  | c->ndirty++; | 
|  | if(c->ndirty > (c->maxdirty>>1)) | 
|  | rwakeup(&c->flush); | 
|  | qunlock(&c->lk); | 
|  |  | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * We've decided to write out b.  Maybe b has some pointers to blocks | 
|  | * that haven't yet been written to disk.  If so, construct a slightly out-of-date | 
|  | * copy of b that is safe to write out.  (diskThread will make sure the block | 
|  | * remains marked as dirty.) | 
|  | */ | 
|  | uchar * | 
|  | blockRollback(Block *b, uchar *buf) | 
|  | { | 
|  | u32int addr; | 
|  | BList *p; | 
|  | Super super; | 
|  |  | 
|  | /* easy case */ | 
|  | if(b->prior == nil) | 
|  | return b->data; | 
|  |  | 
|  | memmove(buf, b->data, b->c->size); | 
|  | for(p=b->prior; p; p=p->next){ | 
|  | /* | 
|  | * we know p->index >= 0 because blockWrite has vetted this block for us. | 
|  | */ | 
|  | assert(p->index >= 0); | 
|  | assert(b->part == PartSuper || (b->part == PartData && b->l.type != BtData)); | 
|  | if(b->part == PartSuper){ | 
|  | assert(p->index == 0); | 
|  | superUnpack(&super, buf); | 
|  | addr = globalToLocal(p->old.score); | 
|  | if(addr == NilBlock){ | 
|  | fprint(2, "%s: rolling back super block: " | 
|  | "bad replacement addr %V\n", | 
|  | argv0, p->old.score); | 
|  | abort(); | 
|  | } | 
|  | super.active = addr; | 
|  | superPack(&super, buf); | 
|  | continue; | 
|  | } | 
|  | if(b->l.type == BtDir) | 
|  | memmove(buf+p->index*VtEntrySize, p->old.entry, VtEntrySize); | 
|  | else | 
|  | memmove(buf+p->index*VtScoreSize, p->old.score, VtScoreSize); | 
|  | } | 
|  | return buf; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Try to write block b. | 
|  | * If b depends on other blocks: | 
|  | * | 
|  | *	If the block has been written out, remove the dependency. | 
|  | *	If the dependency is replaced by a more recent dependency, | 
|  | *		throw it out. | 
|  | *	If we know how to write out an old version of b that doesn't | 
|  | *		depend on it, do that. | 
|  | * | 
|  | *	Otherwise, bail. | 
|  | */ | 
|  | int | 
|  | blockWrite(Block *b, int waitlock) | 
|  | { | 
|  | uchar *dmap; | 
|  | Cache *c; | 
|  | BList *p, **pp; | 
|  | Block *bb; | 
|  | int lockfail; | 
|  |  | 
|  | c = b->c; | 
|  |  | 
|  | if(b->iostate != BioDirty) | 
|  | return 1; | 
|  |  | 
|  | dmap = b->dmap; | 
|  | memset(dmap, 0, c->ndmap); | 
|  | pp = &b->prior; | 
|  | for(p=*pp; p; p=*pp){ | 
|  | if(p->index >= 0){ | 
|  | /* more recent dependency has succeeded; this one can go */ | 
|  | if(dmap[p->index/8] & (1<<(p->index%8))) | 
|  | goto ignblock; | 
|  | } | 
|  |  | 
|  | lockfail = 0; | 
|  | bb = _cacheLocalLookup(c, p->part, p->addr, p->vers, waitlock, | 
|  | &lockfail); | 
|  | if(bb == nil){ | 
|  | if(lockfail) | 
|  | return 0; | 
|  | /* block not in cache => was written already */ | 
|  | dmap[p->index/8] |= 1<<(p->index%8); | 
|  | goto ignblock; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * same version of block is still in cache. | 
|  | * | 
|  | * the assertion is true because the block still has version p->vers, | 
|  | * which means it hasn't been written out since we last saw it. | 
|  | */ | 
|  | if(bb->iostate != BioDirty){ | 
|  | fprint(2, "%s: %d:%x:%d iostate is %d in blockWrite\n", | 
|  | argv0, bb->part, bb->addr, bb->l.type, bb->iostate); | 
|  | /* probably BioWriting if it happens? */ | 
|  | if(bb->iostate == BioClean) | 
|  | goto ignblock; | 
|  | } | 
|  |  | 
|  | blockPut(bb); | 
|  |  | 
|  | if(p->index < 0){ | 
|  | /* | 
|  | * We don't know how to temporarily undo | 
|  | * b's dependency on bb, so just don't write b yet. | 
|  | */ | 
|  | if(0) fprint(2, "%s: blockWrite skipping %d %x %d %d; need to write %d %x %d\n", | 
|  | argv0, b->part, b->addr, b->vers, b->l.type, p->part, p->addr, bb->vers); | 
|  | return 0; | 
|  | } | 
|  | /* keep walking down the list */ | 
|  | pp = &p->next; | 
|  | continue; | 
|  |  | 
|  | ignblock: | 
|  | *pp = p->next; | 
|  | blistFree(c, p); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * DiskWrite must never be called with a double-locked block. | 
|  | * This call to diskWrite is okay because blockWrite is only called | 
|  | * from the cache flush thread, which never double-locks a block. | 
|  | */ | 
|  | diskWrite(c->disk, b); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Change the I/O state of block b. | 
|  | * Just an assignment except for magic in | 
|  | * switch statement (read comments there). | 
|  | */ | 
|  | void | 
|  | blockSetIOState(Block *b, int iostate) | 
|  | { | 
|  | int dowakeup; | 
|  | Cache *c; | 
|  | BList *p, *q; | 
|  |  | 
|  | if(0) fprint(2, "%s: iostate part=%d addr=%x %s->%s\n", argv0, b->part, b->addr, bioStr(b->iostate), bioStr(iostate)); | 
|  |  | 
|  | c = b->c; | 
|  |  | 
|  | dowakeup = 0; | 
|  | switch(iostate){ | 
|  | default: | 
|  | abort(); | 
|  | case BioEmpty: | 
|  | assert(!b->uhead); | 
|  | break; | 
|  | case BioLabel: | 
|  | assert(!b->uhead); | 
|  | break; | 
|  | case BioClean: | 
|  | bwatchDependency(b); | 
|  | /* | 
|  | * If b->prior is set, it means a write just finished. | 
|  | * The prior list isn't needed anymore. | 
|  | */ | 
|  | for(p=b->prior; p; p=q){ | 
|  | q = p->next; | 
|  | blistFree(c, p); | 
|  | } | 
|  | b->prior = nil; | 
|  | /* | 
|  | * Freeing a block or just finished a write. | 
|  | * Move the blocks from the per-block unlink | 
|  | * queue to the cache unlink queue. | 
|  | */ | 
|  | if(b->iostate == BioDirty || b->iostate == BioWriting){ | 
|  | qlock(&c->lk); | 
|  | c->ndirty--; | 
|  | b->iostate = iostate;	/* change here to keep in sync with ndirty */ | 
|  | b->vers = c->vers++; | 
|  | if(b->uhead){ | 
|  | /* add unlink blocks to unlink queue */ | 
|  | if(c->uhead == nil){ | 
|  | c->uhead = b->uhead; | 
|  | rwakeup(&c->unlink); | 
|  | }else | 
|  | c->utail->next = b->uhead; | 
|  | c->utail = b->utail; | 
|  | b->uhead = nil; | 
|  | } | 
|  | qunlock(&c->lk); | 
|  | } | 
|  | assert(!b->uhead); | 
|  | dowakeup = 1; | 
|  | break; | 
|  | case BioDirty: | 
|  | /* | 
|  | * Wrote out an old version of the block (see blockRollback). | 
|  | * Bump a version count, leave it dirty. | 
|  | */ | 
|  | if(b->iostate == BioWriting){ | 
|  | qlock(&c->lk); | 
|  | b->vers = c->vers++; | 
|  | qunlock(&c->lk); | 
|  | dowakeup = 1; | 
|  | } | 
|  | break; | 
|  | case BioReading: | 
|  | case BioWriting: | 
|  | /* | 
|  | * Adding block to disk queue.  Bump reference count. | 
|  | * diskThread decs the count later by calling blockPut. | 
|  | * This is here because we need to lock c->lk to | 
|  | * manipulate the ref count. | 
|  | */ | 
|  | qlock(&c->lk); | 
|  | b->ref++; | 
|  | qunlock(&c->lk); | 
|  | break; | 
|  | case BioReadError: | 
|  | case BioVentiError: | 
|  | /* | 
|  | * Oops. | 
|  | */ | 
|  | dowakeup = 1; | 
|  | break; | 
|  | } | 
|  | b->iostate = iostate; | 
|  | /* | 
|  | * Now that the state has changed, we can wake the waiters. | 
|  | */ | 
|  | if(dowakeup) | 
|  | rwakeupall(&b->ioready); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * The active file system is a tree of blocks. | 
|  | * When we add snapshots to the mix, the entire file system | 
|  | * becomes a dag and thus requires a bit more care. | 
|  | * | 
|  | * The life of the file system is divided into epochs.  A snapshot | 
|  | * ends one epoch and begins the next.  Each file system block | 
|  | * is marked with the epoch in which it was created (b.epoch). | 
|  | * When the block is unlinked from the file system (closed), it is marked | 
|  | * with the epoch in which it was removed (b.epochClose). | 
|  | * Once we have discarded or archived all snapshots up to | 
|  | * b.epochClose, we can reclaim the block. | 
|  | * | 
|  | * If a block was created in a past epoch but is not yet closed, | 
|  | * it is treated as copy-on-write.  Of course, in order to insert the | 
|  | * new pointer into the tree, the parent must be made writable, | 
|  | * and so on up the tree.  The recursion stops because the root | 
|  | * block is always writable. | 
|  | * | 
|  | * If blocks are never closed, they will never be reused, and | 
|  | * we will run out of disk space.  But marking a block as closed | 
|  | * requires some care about dependencies and write orderings. | 
|  | * | 
|  | * (1) If a block p points at a copy-on-write block b and we | 
|  | * copy b to create bb, then p must be written out after bb and | 
|  | * lbb (bb's label block). | 
|  | * | 
|  | * (2) We have to mark b as closed, but only after we switch | 
|  | * the pointer, so lb must be written out after p.  In fact, we | 
|  | * can't even update the in-memory copy, or the cache might | 
|  | * mistakenly give out b for reuse before p gets written. | 
|  | * | 
|  | * CacheAllocBlock's call to blockSetLabel records a "bb after lbb" dependency. | 
|  | * The caller is expected to record a "p after bb" dependency | 
|  | * to finish (1), and also expected to call blockRemoveLink | 
|  | * to arrange for (2) to happen once p is written. | 
|  | * | 
|  | * Until (2) happens, some pieces of the code (e.g., the archiver) | 
|  | * still need to know whether a block has been copied, so we | 
|  | * set the BsCopied bit in the label and force that to disk *before* | 
|  | * the copy gets written out. | 
|  | */ | 
|  | Block* | 
|  | blockCopy(Block *b, u32int tag, u32int ehi, u32int elo) | 
|  | { | 
|  | Block *bb, *lb; | 
|  | Label l; | 
|  |  | 
|  | if((b->l.state&BsClosed) || b->l.epoch >= ehi) | 
|  | fprint(2, "%s: blockCopy %#ux %L but fs is [%ud,%ud]\n", | 
|  | argv0, b->addr, &b->l, elo, ehi); | 
|  |  | 
|  | bb = cacheAllocBlock(b->c, b->l.type, tag, ehi, elo); | 
|  | if(bb == nil){ | 
|  | blockPut(b); | 
|  | return nil; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Update label so we know the block has been copied. | 
|  | * (It will be marked closed once it has been unlinked from | 
|  | * the tree.)  This must follow cacheAllocBlock since we | 
|  | * can't be holding onto lb when we call cacheAllocBlock. | 
|  | */ | 
|  | if((b->l.state&BsCopied)==0) | 
|  | if(b->part == PartData){	/* not the superblock */ | 
|  | l = b->l; | 
|  | l.state |= BsCopied; | 
|  | lb = _blockSetLabel(b, &l); | 
|  | if(lb == nil){ | 
|  | /* can't set label => can't copy block */ | 
|  | blockPut(b); | 
|  | l.type = BtMax; | 
|  | l.state = BsFree; | 
|  | l.epoch = 0; | 
|  | l.epochClose = 0; | 
|  | l.tag = 0; | 
|  | blockSetLabel(bb, &l, 0); | 
|  | blockPut(bb); | 
|  | return nil; | 
|  | } | 
|  | blockDependency(bb, lb, -1, nil, nil); | 
|  | blockPut(lb); | 
|  | } | 
|  |  | 
|  | memmove(bb->data, b->data, b->c->size); | 
|  | blockDirty(bb); | 
|  | blockPut(b); | 
|  | return bb; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Block b once pointed at the block bb at addr/type/tag, but no longer does. | 
|  | * If recurse is set, we are unlinking all of bb's children as well. | 
|  | * | 
|  | * We can't reclaim bb (or its kids) until the block b gets written to disk.  We add | 
|  | * the relevant information to b's list of unlinked blocks.  Once b is written, | 
|  | * the list will be queued for processing. | 
|  | * | 
|  | * If b depends on bb, it doesn't anymore, so we remove bb from the prior list. | 
|  | */ | 
|  | void | 
|  | blockRemoveLink(Block *b, u32int addr, int type, u32int tag, int recurse) | 
|  | { | 
|  | BList *p, **pp, bl; | 
|  |  | 
|  | /* remove bb from prior list */ | 
|  | for(pp=&b->prior; (p=*pp)!=nil; ){ | 
|  | if(p->part == PartData && p->addr == addr){ | 
|  | *pp = p->next; | 
|  | blistFree(b->c, p); | 
|  | }else | 
|  | pp = &p->next; | 
|  | } | 
|  |  | 
|  | bl.part = PartData; | 
|  | bl.addr = addr; | 
|  | bl.type = type; | 
|  | bl.tag = tag; | 
|  | if(b->l.epoch == 0) | 
|  | assert(b->part == PartSuper); | 
|  | bl.epoch = b->l.epoch; | 
|  | bl.next = nil; | 
|  | bl.recurse = recurse; | 
|  |  | 
|  | if(b->part == PartSuper && b->iostate == BioClean) | 
|  | p = nil; | 
|  | else | 
|  | p = blistAlloc(b); | 
|  | if(p == nil){ | 
|  | /* | 
|  | * b has already been written to disk. | 
|  | */ | 
|  | doRemoveLink(b->c, &bl); | 
|  | return; | 
|  | } | 
|  |  | 
|  | /* Uhead is only processed when the block goes from Dirty -> Clean */ | 
|  | assert(b->iostate == BioDirty); | 
|  |  | 
|  | *p = bl; | 
|  | if(b->uhead == nil) | 
|  | b->uhead = p; | 
|  | else | 
|  | b->utail->next = p; | 
|  | b->utail = p; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Process removal of a single block and perhaps its children. | 
|  | */ | 
|  | static void | 
|  | doRemoveLink(Cache *c, BList *p) | 
|  | { | 
|  | int i, n, recurse; | 
|  | u32int a; | 
|  | Block *b; | 
|  | Label l; | 
|  | BList bl; | 
|  |  | 
|  | recurse = (p->recurse && p->type != BtData && p->type != BtDir); | 
|  |  | 
|  | /* | 
|  | * We're not really going to overwrite b, but if we're not | 
|  | * going to look at its contents, there is no point in reading | 
|  | * them from the disk. | 
|  | */ | 
|  | b = cacheLocalData(c, p->addr, p->type, p->tag, recurse ? OReadOnly : OOverWrite, 0); | 
|  | if(b == nil) | 
|  | return; | 
|  |  | 
|  | /* | 
|  | * When we're unlinking from the superblock, close with the next epoch. | 
|  | */ | 
|  | if(p->epoch == 0) | 
|  | p->epoch = b->l.epoch+1; | 
|  |  | 
|  | /* sanity check */ | 
|  | if(b->l.epoch > p->epoch){ | 
|  | fprint(2, "%s: doRemoveLink: strange epoch %ud > %ud\n", | 
|  | argv0, b->l.epoch, p->epoch); | 
|  | blockPut(b); | 
|  | return; | 
|  | } | 
|  |  | 
|  | if(recurse){ | 
|  | n = c->size / VtScoreSize; | 
|  | for(i=0; i<n; i++){ | 
|  | a = globalToLocal(b->data + i*VtScoreSize); | 
|  | if(a == NilBlock || !readLabel(c, &l, a)) | 
|  | continue; | 
|  | if(l.state&BsClosed) | 
|  | continue; | 
|  | /* | 
|  | * If stack space becomes an issue... | 
|  | p->addr = a; | 
|  | p->type = l.type; | 
|  | p->tag = l.tag; | 
|  | doRemoveLink(c, p); | 
|  | */ | 
|  |  | 
|  | bl.part = PartData; | 
|  | bl.addr = a; | 
|  | bl.type = l.type; | 
|  | bl.tag = l.tag; | 
|  | bl.epoch = p->epoch; | 
|  | bl.next = nil; | 
|  | bl.recurse = 1; | 
|  | /* give up the block lock - share with others */ | 
|  | blockPut(b); | 
|  | doRemoveLink(c, &bl); | 
|  | b = cacheLocalData(c, p->addr, p->type, p->tag, OReadOnly, 0); | 
|  | if(b == nil){ | 
|  | fprint(2, "%s: warning: lost block in doRemoveLink\n", | 
|  | argv0); | 
|  | return; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | l = b->l; | 
|  | l.state |= BsClosed; | 
|  | l.epochClose = p->epoch; | 
|  | if(l.epochClose == l.epoch){ | 
|  | qlock(&c->fl->lk); | 
|  | if(l.epoch == c->fl->epochLow) | 
|  | c->fl->nused--; | 
|  | blockSetLabel(b, &l, 0); | 
|  | qunlock(&c->fl->lk); | 
|  | }else | 
|  | blockSetLabel(b, &l, 0); | 
|  | blockPut(b); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Allocate a BList so that we can record a dependency | 
|  | * or queue a removal related to block b. | 
|  | * If we can't find a BList, we write out b and return nil. | 
|  | */ | 
|  | static BList * | 
|  | blistAlloc(Block *b) | 
|  | { | 
|  | Cache *c; | 
|  | BList *p; | 
|  |  | 
|  | if(b->iostate != BioDirty){ | 
|  | /* | 
|  | * should not happen anymore - | 
|  | * blockDirty used to flush but no longer does. | 
|  | */ | 
|  | assert(b->iostate == BioClean); | 
|  | fprint(2, "%s: blistAlloc: called on clean block\n", argv0); | 
|  | return nil; | 
|  | } | 
|  |  | 
|  | c = b->c; | 
|  | qlock(&c->lk); | 
|  | if(c->blfree == nil){ | 
|  | /* | 
|  | * No free BLists.  What are our options? | 
|  | */ | 
|  |  | 
|  | /* Block has no priors? Just write it. */ | 
|  | if(b->prior == nil){ | 
|  | qunlock(&c->lk); | 
|  | diskWriteAndWait(c->disk, b); | 
|  | return nil; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Wake the flush thread, which will hopefully free up | 
|  | * some BLists for us.  We used to flush a block from | 
|  | * our own prior list and reclaim that BList, but this is | 
|  | * a no-no: some of the blocks on our prior list may | 
|  | * be locked by our caller.  Or maybe their label blocks | 
|  | * are locked by our caller.  In any event, it's too hard | 
|  | * to make sure we can do I/O for ourselves.  Instead, | 
|  | * we assume the flush thread will find something. | 
|  | * (The flush thread never blocks waiting for a block, | 
|  | * so it can't deadlock like we can.) | 
|  | */ | 
|  | while(c->blfree == nil){ | 
|  | rwakeup(&c->flush); | 
|  | rsleep(&c->blrend); | 
|  | if(c->blfree == nil) | 
|  | fprint(2, "%s: flushing for blists\n", argv0); | 
|  | } | 
|  | } | 
|  |  | 
|  | p = c->blfree; | 
|  | c->blfree = p->next; | 
|  | qunlock(&c->lk); | 
|  | return p; | 
|  | } | 
|  |  | 
|  | static void | 
|  | blistFree(Cache *c, BList *bl) | 
|  | { | 
|  | qlock(&c->lk); | 
|  | bl->next = c->blfree; | 
|  | c->blfree = bl; | 
|  | rwakeup(&c->blrend); | 
|  | qunlock(&c->lk); | 
|  | } | 
|  |  | 
|  | char* | 
|  | bsStr(int state) | 
|  | { | 
|  | static char s[100]; | 
|  |  | 
|  | if(state == BsFree) | 
|  | return "Free"; | 
|  | if(state == BsBad) | 
|  | return "Bad"; | 
|  |  | 
|  | sprint(s, "%x", state); | 
|  | if(!(state&BsAlloc)) | 
|  | strcat(s, ",Free");	/* should not happen */ | 
|  | if(state&BsCopied) | 
|  | strcat(s, ",Copied"); | 
|  | if(state&BsVenti) | 
|  | strcat(s, ",Venti"); | 
|  | if(state&BsClosed) | 
|  | strcat(s, ",Closed"); | 
|  | return s; | 
|  | } | 
|  |  | 
|  | char * | 
|  | bioStr(int iostate) | 
|  | { | 
|  | switch(iostate){ | 
|  | default: | 
|  | return "Unknown!!"; | 
|  | case BioEmpty: | 
|  | return "Empty"; | 
|  | case BioLabel: | 
|  | return "Label"; | 
|  | case BioClean: | 
|  | return "Clean"; | 
|  | case BioDirty: | 
|  | return "Dirty"; | 
|  | case BioReading: | 
|  | return "Reading"; | 
|  | case BioWriting: | 
|  | return "Writing"; | 
|  | case BioReadError: | 
|  | return "ReadError"; | 
|  | case BioVentiError: | 
|  | return "VentiError"; | 
|  | case BioMax: | 
|  | return "Max"; | 
|  | } | 
|  | } | 
|  |  | 
|  | static char *bttab[] = { | 
|  | "BtData", | 
|  | "BtData+1", | 
|  | "BtData+2", | 
|  | "BtData+3", | 
|  | "BtData+4", | 
|  | "BtData+5", | 
|  | "BtData+6", | 
|  | "BtData+7", | 
|  | "BtDir", | 
|  | "BtDir+1", | 
|  | "BtDir+2", | 
|  | "BtDir+3", | 
|  | "BtDir+4", | 
|  | "BtDir+5", | 
|  | "BtDir+6", | 
|  | "BtDir+7", | 
|  | }; | 
|  |  | 
|  | char* | 
|  | btStr(int type) | 
|  | { | 
|  | if(type < nelem(bttab)) | 
|  | return bttab[type]; | 
|  | return "unknown"; | 
|  | } | 
|  |  | 
|  | int | 
|  | labelFmt(Fmt *f) | 
|  | { | 
|  | Label *l; | 
|  |  | 
|  | l = va_arg(f->args, Label*); | 
|  | return fmtprint(f, "%s,%s,e=%ud,%d,tag=%#ux", | 
|  | btStr(l->type), bsStr(l->state), l->epoch, (int)l->epochClose, l->tag); | 
|  | } | 
|  |  | 
|  | int | 
|  | scoreFmt(Fmt *f) | 
|  | { | 
|  | uchar *v; | 
|  | int i; | 
|  | u32int addr; | 
|  |  | 
|  | v = va_arg(f->args, uchar*); | 
|  | if(v == nil){ | 
|  | fmtprint(f, "*"); | 
|  | }else if((addr = globalToLocal(v)) != NilBlock) | 
|  | fmtprint(f, "0x%.8ux", addr); | 
|  | else{ | 
|  | for(i = 0; i < VtScoreSize; i++) | 
|  | fmtprint(f, "%2.2ux", v[i]); | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int | 
|  | upHeap(int i, Block *b) | 
|  | { | 
|  | Block *bb; | 
|  | u32int now; | 
|  | int p; | 
|  | Cache *c; | 
|  |  | 
|  | c = b->c; | 
|  | now = c->now; | 
|  | for(; i != 0; i = p){ | 
|  | p = (i - 1) >> 1; | 
|  | bb = c->heap[p]; | 
|  | if(b->used - now >= bb->used - now) | 
|  | break; | 
|  | c->heap[i] = bb; | 
|  | bb->heap = i; | 
|  | } | 
|  | c->heap[i] = b; | 
|  | b->heap = i; | 
|  |  | 
|  | return i; | 
|  | } | 
|  |  | 
|  | static int | 
|  | downHeap(int i, Block *b) | 
|  | { | 
|  | Block *bb; | 
|  | u32int now; | 
|  | int k; | 
|  | Cache *c; | 
|  |  | 
|  | c = b->c; | 
|  | now = c->now; | 
|  | for(; ; i = k){ | 
|  | k = (i << 1) + 1; | 
|  | if(k >= c->nheap) | 
|  | break; | 
|  | if(k + 1 < c->nheap && c->heap[k]->used - now > c->heap[k + 1]->used - now) | 
|  | k++; | 
|  | bb = c->heap[k]; | 
|  | if(b->used - now <= bb->used - now) | 
|  | break; | 
|  | c->heap[i] = bb; | 
|  | bb->heap = i; | 
|  | } | 
|  | c->heap[i] = b; | 
|  | b->heap = i; | 
|  | return i; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Delete a block from the heap. | 
|  | * Called with c->lk held. | 
|  | */ | 
|  | static void | 
|  | heapDel(Block *b) | 
|  | { | 
|  | int i, si; | 
|  | Cache *c; | 
|  |  | 
|  | c = b->c; | 
|  |  | 
|  | si = b->heap; | 
|  | if(si == BadHeap) | 
|  | return; | 
|  | b->heap = BadHeap; | 
|  | c->nheap--; | 
|  | if(si == c->nheap) | 
|  | return; | 
|  | b = c->heap[c->nheap]; | 
|  | i = upHeap(si, b); | 
|  | if(i == si) | 
|  | downHeap(i, b); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Insert a block into the heap. | 
|  | * Called with c->lk held. | 
|  | */ | 
|  | static void | 
|  | heapIns(Block *b) | 
|  | { | 
|  | assert(b->heap == BadHeap); | 
|  | upHeap(b->c->nheap++, b); | 
|  | rwakeup(&b->c->heapwait); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Get just the label for a block. | 
|  | */ | 
|  | int | 
|  | readLabel(Cache *c, Label *l, u32int addr) | 
|  | { | 
|  | int lpb; | 
|  | Block *b; | 
|  | u32int a; | 
|  |  | 
|  | lpb = c->size / LabelSize; | 
|  | a = addr / lpb; | 
|  | b = cacheLocal(c, PartLabel, a, OReadOnly); | 
|  | if(b == nil){ | 
|  | blockPut(b); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | if(!labelUnpack(l, b->data, addr%lpb)){ | 
|  | blockPut(b); | 
|  | return 0; | 
|  | } | 
|  | blockPut(b); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Process unlink queue. | 
|  | * Called with c->lk held. | 
|  | */ | 
|  | static void | 
|  | unlinkBody(Cache *c) | 
|  | { | 
|  | BList *p; | 
|  |  | 
|  | while(c->uhead != nil){ | 
|  | p = c->uhead; | 
|  | c->uhead = p->next; | 
|  | qunlock(&c->lk); | 
|  | doRemoveLink(c, p); | 
|  | qlock(&c->lk); | 
|  | p->next = c->blfree; | 
|  | c->blfree = p; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Occasionally unlink the blocks on the cache unlink queue. | 
|  | */ | 
|  | static void | 
|  | unlinkThread(void *a) | 
|  | { | 
|  | Cache *c = a; | 
|  |  | 
|  | threadsetname("unlink"); | 
|  |  | 
|  | qlock(&c->lk); | 
|  | for(;;){ | 
|  | while(c->uhead == nil && c->die.l == nil) | 
|  | rsleep(&c->unlink); | 
|  | if(c->die.l != nil) | 
|  | break; | 
|  | unlinkBody(c); | 
|  | } | 
|  | c->ref--; | 
|  | rwakeup(&c->die); | 
|  | qunlock(&c->lk); | 
|  | } | 
|  |  | 
|  | static int | 
|  | baddrCmp(const void *a0, const void *a1) | 
|  | { | 
|  | BAddr *b0, *b1; | 
|  | b0 = (BAddr*)a0; | 
|  | b1 = (BAddr*)a1; | 
|  |  | 
|  | if(b0->part < b1->part) | 
|  | return -1; | 
|  | if(b0->part > b1->part) | 
|  | return 1; | 
|  | if(b0->addr < b1->addr) | 
|  | return -1; | 
|  | if(b0->addr > b1->addr) | 
|  | return 1; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Scan the block list for dirty blocks; add them to the list c->baddr. | 
|  | */ | 
|  | static void | 
|  | flushFill(Cache *c) | 
|  | { | 
|  | int i, ndirty; | 
|  | BAddr *p; | 
|  | Block *b; | 
|  |  | 
|  | qlock(&c->lk); | 
|  | if(c->ndirty == 0){ | 
|  | qunlock(&c->lk); | 
|  | return; | 
|  | } | 
|  |  | 
|  | p = c->baddr; | 
|  | ndirty = 0; | 
|  | for(i=0; i<c->nblocks; i++){ | 
|  | b = c->blocks + i; | 
|  | if(b->part == PartError) | 
|  | continue; | 
|  | if(b->iostate == BioDirty || b->iostate == BioWriting) | 
|  | ndirty++; | 
|  | if(b->iostate != BioDirty) | 
|  | continue; | 
|  | p->part = b->part; | 
|  | p->addr = b->addr; | 
|  | p->vers = b->vers; | 
|  | p++; | 
|  | } | 
|  | if(ndirty != c->ndirty){ | 
|  | fprint(2, "%s: ndirty mismatch expected %d found %d\n", | 
|  | argv0, c->ndirty, ndirty); | 
|  | c->ndirty = ndirty; | 
|  | } | 
|  | qunlock(&c->lk); | 
|  |  | 
|  | c->bw = p - c->baddr; | 
|  | qsort(c->baddr, c->bw, sizeof(BAddr), baddrCmp); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * This is not thread safe, i.e. it can't be called from multiple threads. | 
|  | * | 
|  | * It's okay how we use it, because it only gets called in | 
|  | * the flushThread.  And cacheFree, but only after | 
|  | * cacheFree has killed off the flushThread. | 
|  | */ | 
|  | static int | 
|  | cacheFlushBlock(Cache *c) | 
|  | { | 
|  | Block *b; | 
|  | BAddr *p; | 
|  | int lockfail, nfail; | 
|  |  | 
|  | nfail = 0; | 
|  | for(;;){ | 
|  | if(c->br == c->be){ | 
|  | if(c->bw == 0 || c->bw == c->be) | 
|  | flushFill(c); | 
|  | c->br = 0; | 
|  | c->be = c->bw; | 
|  | c->bw = 0; | 
|  | c->nflush = 0; | 
|  | } | 
|  |  | 
|  | if(c->br == c->be) | 
|  | return 0; | 
|  | p = c->baddr + c->br; | 
|  | c->br++; | 
|  | b = _cacheLocalLookup(c, p->part, p->addr, p->vers, Nowaitlock, | 
|  | &lockfail); | 
|  |  | 
|  | if(b && blockWrite(b, Nowaitlock)){ | 
|  | c->nflush++; | 
|  | blockPut(b); | 
|  | return 1; | 
|  | } | 
|  | if(b) | 
|  | blockPut(b); | 
|  |  | 
|  | /* | 
|  | * Why didn't we write the block? | 
|  | */ | 
|  |  | 
|  | /* Block already written out */ | 
|  | if(b == nil && !lockfail) | 
|  | continue; | 
|  |  | 
|  | /* Failed to acquire lock; sleep if happens a lot. */ | 
|  | if(lockfail && ++nfail > 100){ | 
|  | sleep(500); | 
|  | nfail = 0; | 
|  | } | 
|  | /* Requeue block. */ | 
|  | if(c->bw < c->be) | 
|  | c->baddr[c->bw++] = *p; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Occasionally flush dirty blocks from memory to the disk. | 
|  | */ | 
|  | static void | 
|  | flushThread(void *a) | 
|  | { | 
|  | Cache *c = a; | 
|  | int i; | 
|  |  | 
|  | threadsetname("flush"); | 
|  | qlock(&c->lk); | 
|  | while(c->die.l == nil){ | 
|  | rsleep(&c->flush); | 
|  | qunlock(&c->lk); | 
|  | for(i=0; i<FlushSize; i++) | 
|  | if(!cacheFlushBlock(c)){ | 
|  | /* | 
|  | * If i==0, could be someone is waking us repeatedly | 
|  | * to flush the cache but there's no work to do. | 
|  | * Pause a little. | 
|  | */ | 
|  | if(i==0){ | 
|  | // fprint(2, "%s: flushthread found " | 
|  | //	"nothing to flush - %d dirty\n", | 
|  | //	argv0, c->ndirty); | 
|  | sleep(250); | 
|  | } | 
|  | break; | 
|  | } | 
|  | if(i==0 && c->ndirty){ | 
|  | /* | 
|  | * All the blocks are being written right now -- there's nothing to do. | 
|  | * We might be spinning with cacheFlush though -- he'll just keep | 
|  | * kicking us until c->ndirty goes down.  Probably we should sleep | 
|  | * on something that the diskThread can kick, but for now we'll | 
|  | * just pause for a little while waiting for disks to finish. | 
|  | */ | 
|  | sleep(100); | 
|  | } | 
|  | qlock(&c->lk); | 
|  | rwakeupall(&c->flushwait); | 
|  | } | 
|  | c->ref--; | 
|  | rwakeup(&c->die); | 
|  | qunlock(&c->lk); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Flush the cache. | 
|  | */ | 
|  | void | 
|  | cacheFlush(Cache *c, int wait) | 
|  | { | 
|  | qlock(&c->lk); | 
|  | if(wait){ | 
|  | while(c->ndirty){ | 
|  | //	consPrint("cacheFlush: %d dirty blocks, uhead %p\n", | 
|  | //		c->ndirty, c->uhead); | 
|  | rwakeup(&c->flush); | 
|  | rsleep(&c->flushwait); | 
|  | } | 
|  | //	consPrint("cacheFlush: done (uhead %p)\n", c->ndirty, c->uhead); | 
|  | }else if(c->ndirty) | 
|  | rwakeup(&c->flush); | 
|  | qunlock(&c->lk); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Kick the flushThread every 30 seconds. | 
|  | */ | 
|  | static void | 
|  | cacheSync(void *v) | 
|  | { | 
|  | Cache *c; | 
|  |  | 
|  | c = v; | 
|  | cacheFlush(c, 0); | 
|  | } |