| #include "stdinc.h" |
| #include "dat.h" |
| #include "fns.h" |
| #include "error.h" |
| |
| static void diskThread(void *a); |
| |
| enum { |
| /* |
| * disable measurement since it gets alignment faults on BG |
| * and the guts used to be commented out. |
| */ |
| Timing = 0, /* flag */ |
| QueueSize = 100, /* maximum block to queue */ |
| }; |
| |
| struct Disk { |
| QLock lk; |
| int ref; |
| |
| int fd; |
| Header h; |
| |
| Rendez flow; |
| Rendez starve; |
| Rendez flush; |
| Rendez die; |
| |
| int nqueue; |
| |
| Block *cur; /* block to do on current scan */ |
| Block *next; /* blocks to do next scan */ |
| }; |
| |
| /* keep in sync with Part* enum in dat.h */ |
| static char *partname[] = { |
| [PartError] = "error", |
| [PartSuper] = "super", |
| [PartLabel] = "label", |
| [PartData] = "data", |
| [PartVenti] = "venti", |
| }; |
| |
| Disk * |
| diskAlloc(int fd) |
| { |
| u8int buf[HeaderSize]; |
| Header h; |
| Disk *disk; |
| |
| if(pread(fd, buf, HeaderSize, HeaderOffset) < HeaderSize){ |
| werrstr("short read: %r"); |
| return nil; |
| } |
| |
| if(!headerUnpack(&h, buf)){ |
| werrstr("bad disk header"); |
| return nil; |
| } |
| disk = vtmallocz(sizeof(Disk)); |
| disk->starve.l = &disk->lk; |
| disk->flow.l = &disk->lk; |
| disk->flush.l = &disk->lk; |
| disk->fd = fd; |
| disk->h = h; |
| |
| disk->ref = 2; |
| proccreate(diskThread, disk, STACK); |
| |
| return disk; |
| } |
| |
| void |
| diskFree(Disk *disk) |
| { |
| diskFlush(disk); |
| |
| /* kill slave */ |
| qlock(&disk->lk); |
| disk->die.l = &disk->lk; |
| rwakeup(&disk->starve); |
| while(disk->ref > 1) |
| rsleep(&disk->die); |
| qunlock(&disk->lk); |
| close(disk->fd); |
| vtfree(disk); |
| } |
| |
| static u32int |
| partStart(Disk *disk, int part) |
| { |
| switch(part){ |
| default: |
| assert(0); |
| case PartSuper: |
| return disk->h.super; |
| case PartLabel: |
| return disk->h.label; |
| case PartData: |
| return disk->h.data; |
| } |
| } |
| |
| |
| static u32int |
| partEnd(Disk *disk, int part) |
| { |
| switch(part){ |
| default: |
| assert(0); |
| case PartSuper: |
| return disk->h.super+1; |
| case PartLabel: |
| return disk->h.data; |
| case PartData: |
| return disk->h.end; |
| } |
| } |
| |
| int |
| diskReadRaw(Disk *disk, int part, u32int addr, uchar *buf) |
| { |
| ulong start, end; |
| u64int offset; |
| int n, nn; |
| |
| start = partStart(disk, part); |
| end = partEnd(disk, part); |
| |
| if(addr >= end-start){ |
| werrstr(EBadAddr); |
| return 0; |
| } |
| |
| offset = ((u64int)(addr + start))*disk->h.blockSize; |
| n = disk->h.blockSize; |
| while(n > 0){ |
| nn = pread(disk->fd, buf, n, offset); |
| if(nn < 0){ |
| werrstr("%r"); |
| return 0; |
| } |
| if(nn == 0){ |
| werrstr("eof reading disk"); |
| return 0; |
| } |
| n -= nn; |
| offset += nn; |
| buf += nn; |
| } |
| return 1; |
| } |
| |
| int |
| diskWriteRaw(Disk *disk, int part, u32int addr, uchar *buf) |
| { |
| ulong start, end; |
| u64int offset; |
| int n; |
| |
| start = partStart(disk, part); |
| end = partEnd(disk, part); |
| |
| if(addr >= end - start){ |
| werrstr(EBadAddr); |
| return 0; |
| } |
| |
| offset = ((u64int)(addr + start))*disk->h.blockSize; |
| n = pwrite(disk->fd, buf, disk->h.blockSize, offset); |
| if(n < 0){ |
| werrstr("%r"); |
| return 0; |
| } |
| if(n < disk->h.blockSize) { |
| werrstr("short write"); |
| return 0; |
| } |
| |
| return 1; |
| } |
| |
| static void |
| diskQueue(Disk *disk, Block *b) |
| { |
| Block **bp, *bb; |
| |
| qlock(&disk->lk); |
| while(disk->nqueue >= QueueSize) |
| rsleep(&disk->flow); |
| if(disk->cur == nil || b->addr > disk->cur->addr) |
| bp = &disk->cur; |
| else |
| bp = &disk->next; |
| |
| for(bb=*bp; bb; bb=*bp){ |
| if(b->addr < bb->addr) |
| break; |
| bp = &bb->ionext; |
| } |
| b->ionext = bb; |
| *bp = b; |
| if(disk->nqueue == 0) |
| rwakeup(&disk->starve); |
| disk->nqueue++; |
| qunlock(&disk->lk); |
| } |
| |
| |
| void |
| diskRead(Disk *disk, Block *b) |
| { |
| assert(b->iostate == BioEmpty || b->iostate == BioLabel); |
| blockSetIOState(b, BioReading); |
| diskQueue(disk, b); |
| } |
| |
| void |
| diskWrite(Disk *disk, Block *b) |
| { |
| assert(b->nlock == 1); |
| assert(b->iostate == BioDirty); |
| blockSetIOState(b, BioWriting); |
| diskQueue(disk, b); |
| } |
| |
| void |
| diskWriteAndWait(Disk *disk, Block *b) |
| { |
| int nlock; |
| |
| /* |
| * If b->nlock > 1, the block is aliased within |
| * a single thread. That thread is us. |
| * DiskWrite does some funny stuff with QLock |
| * and blockPut that basically assumes b->nlock==1. |
| * We humor diskWrite by temporarily setting |
| * nlock to 1. This needs to be revisited. |
| */ |
| nlock = b->nlock; |
| if(nlock > 1) |
| b->nlock = 1; |
| diskWrite(disk, b); |
| while(b->iostate != BioClean) |
| rsleep(&b->ioready); |
| b->nlock = nlock; |
| } |
| |
| int |
| diskBlockSize(Disk *disk) |
| { |
| return disk->h.blockSize; /* immuttable */ |
| } |
| |
| int |
| diskFlush(Disk *disk) |
| { |
| Dir dir; |
| |
| qlock(&disk->lk); |
| while(disk->nqueue > 0) |
| rsleep(&disk->flush); |
| qunlock(&disk->lk); |
| |
| /* there really should be a cleaner interface to flush an fd */ |
| nulldir(&dir); |
| if(dirfwstat(disk->fd, &dir) < 0){ |
| werrstr("%r"); |
| return 0; |
| } |
| return 1; |
| } |
| |
| u32int |
| diskSize(Disk *disk, int part) |
| { |
| return partEnd(disk, part) - partStart(disk, part); |
| } |
| |
| static uintptr |
| mypc(int x) |
| { |
| return getcallerpc(&x); |
| } |
| |
| static char * |
| disk2file(Disk *disk) |
| { |
| static char buf[256]; |
| |
| #ifndef PLAN9PORT |
| if (fd2path(disk->fd, buf, sizeof buf) < 0) |
| strncpy(buf, "GOK", sizeof buf); |
| #endif |
| return buf; |
| } |
| |
| static void |
| diskThread(void *a) |
| { |
| Disk *disk = a; |
| Block *b; |
| uchar *buf, *p; |
| double t; |
| int nio; |
| |
| threadsetname("disk"); |
| |
| //fprint(2, "diskThread %d\n", getpid()); |
| |
| buf = vtmalloc(disk->h.blockSize); |
| |
| qlock(&disk->lk); |
| if (Timing) { |
| nio = 0; |
| t = -nsec(); |
| } |
| for(;;){ |
| while(disk->nqueue == 0){ |
| if (Timing) { |
| t += nsec(); |
| if(nio >= 10000){ |
| fprint(2, "disk: io=%d at %.3fms\n", |
| nio, t*1e-6/nio); |
| nio = 0; |
| t = 0; |
| } |
| } |
| if(disk->die.l != nil) |
| goto Done; |
| rsleep(&disk->starve); |
| if (Timing) |
| t -= nsec(); |
| } |
| assert(disk->cur != nil || disk->next != nil); |
| |
| if(disk->cur == nil){ |
| disk->cur = disk->next; |
| disk->next = nil; |
| } |
| b = disk->cur; |
| disk->cur = b->ionext; |
| qunlock(&disk->lk); |
| |
| /* |
| * no one should hold onto blocking in the |
| * reading or writing state, so this lock should |
| * not cause deadlock. |
| */ |
| if(0)fprint(2, "fossil: diskThread: %d:%d %x\n", getpid(), b->part, b->addr); |
| bwatchLock(b); |
| qlock(&b->lk); |
| b->pc = mypc(0); |
| assert(b->nlock == 1); |
| switch(b->iostate){ |
| default: |
| abort(); |
| case BioReading: |
| if(!diskReadRaw(disk, b->part, b->addr, b->data)){ |
| fprint(2, "fossil: diskReadRaw failed: %s: " |
| "score %V: part=%s block %ud: %r\n", |
| disk2file(disk), b->score, |
| partname[b->part], b->addr); |
| blockSetIOState(b, BioReadError); |
| }else |
| blockSetIOState(b, BioClean); |
| break; |
| case BioWriting: |
| p = blockRollback(b, buf); |
| /* NB: ctime result ends with a newline */ |
| if(!diskWriteRaw(disk, b->part, b->addr, p)){ |
| fprint(2, "fossil: diskWriteRaw failed: %s: " |
| "score %V: date %s part=%s block %ud: %r\n", |
| disk2file(disk), b->score, |
| ctime(time(0)), |
| partname[b->part], b->addr); |
| break; |
| } |
| if(p != buf) |
| blockSetIOState(b, BioClean); |
| else |
| blockSetIOState(b, BioDirty); |
| break; |
| } |
| |
| blockPut(b); /* remove extra reference, unlock */ |
| qlock(&disk->lk); |
| disk->nqueue--; |
| if(disk->nqueue == QueueSize-1) |
| rwakeup(&disk->flow); |
| if(disk->nqueue == 0) |
| rwakeup(&disk->flush); |
| if(Timing) |
| nio++; |
| } |
| Done: |
| //fprint(2, "diskThread done\n"); |
| disk->ref--; |
| rwakeup(&disk->die); |
| qunlock(&disk->lk); |
| vtfree(buf); |
| } |