| /* |
| * Write the dirty icache entries to disk. Random seeks are |
| * so expensive that it makes sense to wait until we have |
| * a lot and then just make a sequential pass over the disk. |
| */ |
| #include "stdinc.h" |
| #include "dat.h" |
| #include "fns.h" |
| |
| static void icachewriteproc(void*); |
| static void icachewritecoord(void*); |
| static IEntry *iesort(IEntry*); |
| |
| int icachesleeptime = 1000; /* milliseconds */ |
| int minicachesleeptime = 0; |
| |
| enum |
| { |
| Bufsize = 8*1024*1024 |
| }; |
| |
| typedef struct IWrite IWrite; |
| struct IWrite |
| { |
| Round round; |
| AState as; |
| }; |
| |
| static IWrite iwrite; |
| |
| void |
| initicachewrite(void) |
| { |
| int i; |
| Index *ix; |
| |
| initround(&iwrite.round, "icache", 120*60*1000); |
| ix = mainindex; |
| for(i=0; i<ix->nsects; i++){ |
| ix->sects[i]->writechan = chancreate(sizeof(ulong), 1); |
| ix->sects[i]->writedonechan = chancreate(sizeof(ulong), 1); |
| vtproc(icachewriteproc, ix->sects[i]); |
| } |
| vtproc(icachewritecoord, nil); |
| vtproc(delaykickroundproc, &iwrite.round); |
| } |
| |
| static u64int |
| ie2diskaddr(Index *ix, ISect *is, IEntry *ie) |
| { |
| u64int bucket, addr; |
| |
| bucket = hashbits(ie->score, 32)/ix->div; |
| addr = is->blockbase + ((bucket - is->start) << is->blocklog); |
| return addr; |
| } |
| |
| static IEntry* |
| nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf) |
| { |
| u64int addr, naddr; |
| uint nbuf; |
| int bsize; |
| IEntry *iefirst, *ie, **l; |
| |
| bsize = 1<<is->blocklog; |
| iefirst = *pie; |
| addr = ie2diskaddr(ix, is, iefirst); |
| nbuf = 0; |
| for(l = &iefirst->nextdirty; (ie = *l) != nil; l = &(*l)->nextdirty){ |
| naddr = ie2diskaddr(ix, is, ie); |
| if(naddr - addr >= Bufsize) |
| break; |
| nbuf = naddr - addr; |
| } |
| nbuf += bsize; |
| |
| *l = nil; |
| *pie = ie; |
| *paddr = addr; |
| *pnbuf = nbuf; |
| return iefirst; |
| } |
| |
| static int |
| icachewritesect(Index *ix, ISect *is, u8int *buf) |
| { |
| int err, i, werr, h, bsize, t; |
| u32int lo, hi; |
| u64int addr, naddr; |
| uint nbuf, off; |
| DBlock *b; |
| IBucket ib; |
| IEntry *ie, *iedirty, **l, *chunk; |
| |
| lo = is->start * ix->div; |
| if(TWID32/ix->div < is->stop) |
| hi = TWID32; |
| else |
| hi = is->stop * ix->div - 1; |
| |
| trace(TraceProc, "icachewritesect enter %ud %ud %llud", |
| lo, hi, iwrite.as.aa); |
| |
| iedirty = icachedirty(lo, hi, iwrite.as.aa); |
| iedirty = iesort(iedirty); |
| bsize = 1 << is->blocklog; |
| err = 0; |
| |
| while(iedirty){ |
| disksched(); |
| while((t = icachesleeptime) == SleepForever){ |
| sleep(1000); |
| disksched(); |
| } |
| if(t < minicachesleeptime) |
| t = minicachesleeptime; |
| if(t > 0) |
| sleep(t); |
| trace(TraceProc, "icachewritesect nextchunk"); |
| chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf); |
| |
| trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux", |
| addr, nbuf); |
| if(readpart(is->part, addr, buf, nbuf) < 0){ |
| fprint(2, "%s: part %s addr 0x%llux: icachewritesect " |
| "readpart: %r\n", argv0, is->part->name, addr); |
| err = -1; |
| continue; |
| } |
| trace(TraceProc, "icachewritesect updatebuf"); |
| addstat(StatIsectReadBytes, nbuf); |
| addstat(StatIsectRead, 1); |
| |
| for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){ |
| again: |
| naddr = ie2diskaddr(ix, is, ie); |
| off = naddr - addr; |
| if(off+bsize > nbuf){ |
| fprint(2, "%s: whoops! addr=0x%llux nbuf=%ud " |
| "addr+nbuf=0x%llux naddr=0x%llux\n", |
| argv0, addr, nbuf, addr+nbuf, naddr); |
| assert(off+bsize <= nbuf); |
| } |
| unpackibucket(&ib, buf+off, is->bucketmagic); |
| if(okibucket(&ib, is) < 0){ |
| fprint(2, "%s: bad bucket XXX\n", argv0); |
| goto skipit; |
| } |
| trace(TraceProc, "icachewritesect add %V at 0x%llux", |
| ie->score, naddr); |
| h = bucklook(ie->score, ie->ia.type, ib.data, ib.n); |
| if(h & 1){ |
| h ^= 1; |
| packientry(ie, &ib.data[h]); |
| }else if(ib.n < is->buckmax){ |
| memmove(&ib.data[h + IEntrySize], &ib.data[h], |
| ib.n*IEntrySize - h); |
| ib.n++; |
| packientry(ie, &ib.data[h]); |
| }else{ |
| fprint(2, "%s: bucket overflow XXX\n", argv0); |
| skipit: |
| err = -1; |
| *l = ie->nextdirty; |
| ie = *l; |
| if(ie) |
| goto again; |
| else |
| break; |
| } |
| packibucket(&ib, buf+off, is->bucketmagic); |
| } |
| |
| diskaccess(1); |
| |
| trace(TraceProc, "icachewritesect writepart", addr, nbuf); |
| werr = 0; |
| if(writepart(is->part, addr, buf, nbuf) < 0 || flushpart(is->part) < 0) |
| werr = -1; |
| |
| for(i=0; i<nbuf; i+=bsize){ |
| if((b = _getdblock(is->part, addr+i, ORDWR, 0)) != nil){ |
| memmove(b->data, buf+i, bsize); |
| putdblock(b); |
| } |
| } |
| |
| if(werr < 0){ |
| fprint(2, "%s: part %s addr 0x%llux: icachewritesect " |
| "writepart: %r\n", argv0, is->part->name, addr); |
| err = -1; |
| continue; |
| } |
| |
| addstat(StatIsectWriteBytes, nbuf); |
| addstat(StatIsectWrite, 1); |
| icacheclean(chunk); |
| } |
| |
| trace(TraceProc, "icachewritesect done"); |
| return err; |
| } |
| |
| static void |
| icachewriteproc(void *v) |
| { |
| int ret; |
| uint bsize; |
| ISect *is; |
| Index *ix; |
| u8int *buf; |
| |
| ix = mainindex; |
| is = v; |
| threadsetname("icachewriteproc:%s", is->part->name); |
| |
| bsize = 1<<is->blocklog; |
| buf = emalloc(Bufsize+bsize); |
| buf = (u8int*)(((uintptr)buf+bsize-1)&~(uintptr)(bsize-1)); |
| |
| for(;;){ |
| trace(TraceProc, "icachewriteproc recv"); |
| recv(is->writechan, 0); |
| trace(TraceWork, "start"); |
| ret = icachewritesect(ix, is, buf); |
| trace(TraceProc, "icachewriteproc send"); |
| trace(TraceWork, "finish"); |
| sendul(is->writedonechan, ret); |
| } |
| } |
| |
| static void |
| icachewritecoord(void *v) |
| { |
| int i, err; |
| Index *ix; |
| AState as; |
| |
| USED(v); |
| |
| threadsetname("icachewritecoord"); |
| |
| ix = mainindex; |
| iwrite.as = icachestate(); |
| |
| for(;;){ |
| trace(TraceProc, "icachewritecoord sleep"); |
| waitforkick(&iwrite.round); |
| trace(TraceWork, "start"); |
| as = icachestate(); |
| if(as.arena==iwrite.as.arena && as.aa==iwrite.as.aa){ |
| /* will not be able to do anything more than last flush - kick disk */ |
| trace(TraceProc, "icachewritecoord kick dcache"); |
| kickdcache(); |
| trace(TraceProc, "icachewritecoord kicked dcache"); |
| goto SkipWork; /* won't do anything; don't bother rewriting bloom filter */ |
| } |
| iwrite.as = as; |
| |
| trace(TraceProc, "icachewritecoord start flush"); |
| if(iwrite.as.arena){ |
| for(i=0; i<ix->nsects; i++) |
| send(ix->sects[i]->writechan, 0); |
| if(ix->bloom) |
| send(ix->bloom->writechan, 0); |
| |
| err = 0; |
| for(i=0; i<ix->nsects; i++) |
| err |= recvul(ix->sects[i]->writedonechan); |
| if(ix->bloom) |
| err |= recvul(ix->bloom->writedonechan); |
| |
| trace(TraceProc, "icachewritecoord donewrite err=%d", err); |
| if(err == 0){ |
| setatailstate(&iwrite.as); |
| } |
| } |
| SkipWork: |
| icacheclean(nil); /* wake up anyone waiting */ |
| trace(TraceWork, "finish"); |
| addstat(StatIcacheFlush, 1); |
| } |
| } |
| |
| void |
| flushicache(void) |
| { |
| trace(TraceProc, "flushicache enter"); |
| kickround(&iwrite.round, 1); |
| trace(TraceProc, "flushicache exit"); |
| } |
| |
| void |
| kickicache(void) |
| { |
| kickround(&iwrite.round, 0); |
| } |
| |
| void |
| delaykickicache(void) |
| { |
| delaykickround(&iwrite.round); |
| } |
| |
| static IEntry* |
| iesort(IEntry *ie) |
| { |
| int cmp; |
| IEntry **l; |
| IEntry *ie1, *ie2, *sorted; |
| |
| if(ie == nil || ie->nextdirty == nil) |
| return ie; |
| |
| /* split the lists */ |
| ie1 = ie; |
| ie2 = ie; |
| if(ie2) |
| ie2 = ie2->nextdirty; |
| if(ie2) |
| ie2 = ie2->nextdirty; |
| while(ie1 && ie2){ |
| ie1 = ie1->nextdirty; |
| ie2 = ie2->nextdirty; |
| if(ie2) |
| ie2 = ie2->nextdirty; |
| } |
| if(ie1){ |
| ie2 = ie1->nextdirty; |
| ie1->nextdirty = nil; |
| } |
| |
| /* sort the lists */ |
| ie1 = iesort(ie); |
| ie2 = iesort(ie2); |
| |
| /* merge the lists */ |
| sorted = nil; |
| l = &sorted; |
| cmp = 0; |
| while(ie1 || ie2){ |
| if(ie1 && ie2) |
| cmp = scorecmp(ie1->score, ie2->score); |
| if(ie1==nil || (ie2 && cmp > 0)){ |
| *l = ie2; |
| l = &ie2->nextdirty; |
| ie2 = ie2->nextdirty; |
| }else{ |
| *l = ie1; |
| l = &ie1->nextdirty; |
| ie1 = ie1->nextdirty; |
| } |
| } |
| *l = nil; |
| return sorted; |
| } |
| |