|  | #include <u.h> | 
|  | #include <libc.h> | 
|  | #include <bin.h> | 
|  | #include <bio.h> | 
|  | #include <regexp.h> | 
|  | #include "../../../libregexp/regcomp.h" | 
|  | #include "dfa.h" | 
|  |  | 
|  | void rdump(Reprog*); | 
|  | void dump(Dreprog*); | 
|  |  | 
|  | /* | 
|  | * Standard NFA determinization and DFA minimization. | 
|  | */ | 
|  | typedef struct Deter Deter; | 
|  | typedef struct Reiset Reiset; | 
|  |  | 
|  | void ddump(Deter*); | 
|  |  | 
|  | /* state of determinization */ | 
|  | struct Deter | 
|  | { | 
|  | jmp_buf kaboom;	/* jmp on error */ | 
|  |  | 
|  | Bin *bin;		/* bin for temporary allocations */ | 
|  |  | 
|  | Reprog *p;	/* program being determinized */ | 
|  | uint ninst;		/* number of instructions in program */ | 
|  |  | 
|  | Reiset *alloc;	/* chain of all Reisets */ | 
|  | Reiset **last; | 
|  |  | 
|  | Reiset **hash;	/* hash of all Reisets */ | 
|  | uint nhash; | 
|  |  | 
|  | Reiset *tmp;	/* temporaries for walk */ | 
|  | uchar *bits; | 
|  |  | 
|  | Rune *c;		/* ``interesting'' characters */ | 
|  | uint nc; | 
|  | }; | 
|  |  | 
|  | /* set of Reinsts: perhaps we should use a bit list instead of the indices? */ | 
|  | struct Reiset | 
|  | { | 
|  | uint *inst;		/* indices of instructions in set */ | 
|  | uint ninst;		/* size of set */ | 
|  |  | 
|  | Reiset *next;	/* d.alloc chain */ | 
|  | Reiset *hash;	/* d.hash chain */ | 
|  | Reiset **delta;	/* where to go on each interesting char */ | 
|  | uint id;		/* assigned id during minimization */ | 
|  | uint isfinal;	/* is an accepting (final) state */ | 
|  | }; | 
|  |  | 
|  | static Reiset* | 
|  | ralloc(Deter *d, int ninst) | 
|  | { | 
|  | Reiset *t; | 
|  |  | 
|  | t = binalloc(&d->bin, sizeof(Reiset)+2*d->nc*sizeof(Reiset*)+sizeof(uint)*ninst, 0); | 
|  | if(t == nil) | 
|  | longjmp(d->kaboom, 1); | 
|  | t->delta = (Reiset**)&t[1]; | 
|  | t->inst = (uint*)&t->delta[2*d->nc]; | 
|  | return t; | 
|  | } | 
|  |  | 
|  | /* find the canonical form a given Reiset */ | 
|  | static Reiset* | 
|  | findreiset(Deter *d, Reiset *s) | 
|  | { | 
|  | int i, szinst; | 
|  | uint h; | 
|  | Reiset *t; | 
|  |  | 
|  | h = 0; | 
|  | for(i=0; i<s->ninst; i++) | 
|  | h = h*1000003 + s->inst[i]; | 
|  | h %= d->nhash; | 
|  |  | 
|  | szinst = s->ninst*sizeof(s->inst[0]); | 
|  | for(t=d->hash[h]; t; t=t->hash) | 
|  | if(t->ninst==s->ninst && memcmp(t->inst, s->inst, szinst)==0) | 
|  | return t; | 
|  |  | 
|  | t = ralloc(d, s->ninst); | 
|  | t->hash = d->hash[h]; | 
|  | d->hash[h] = t; | 
|  |  | 
|  | *d->last = t; | 
|  | d->last = &t->next; | 
|  | t->next = 0; | 
|  |  | 
|  | t->ninst = s->ninst; | 
|  | memmove(t->inst, s->inst, szinst); | 
|  |  | 
|  | /* delta is filled in later */ | 
|  |  | 
|  | return t; | 
|  | } | 
|  |  | 
|  | /* convert bits to a real reiset */ | 
|  | static Reiset* | 
|  | bits2reiset(Deter *d, uchar *bits) | 
|  | { | 
|  | int k; | 
|  | Reiset *s; | 
|  |  | 
|  | s = d->tmp; | 
|  | s->ninst = 0; | 
|  | for(k=0; k<d->ninst; k++) | 
|  | if(bits[k]) | 
|  | s->inst[s->ninst++] = k; | 
|  | return findreiset(d, s); | 
|  | } | 
|  |  | 
|  | /* add n to state set; if n < k, need to go around again */ | 
|  | static int | 
|  | add(int n, uchar *bits, int k) | 
|  | { | 
|  | if(bits[n]) | 
|  | return 0; | 
|  | bits[n] = 1; | 
|  | return n < k; | 
|  | } | 
|  |  | 
|  | /* update bits to follow all the empty (non-character-related) transitions possible */ | 
|  | static void | 
|  | followempty(Deter *d, uchar *bits, int bol, int eol) | 
|  | { | 
|  | int again, k; | 
|  | Reinst *i; | 
|  |  | 
|  | do{ | 
|  | again = 0; | 
|  | for(i=d->p->firstinst, k=0; k < d->ninst; i++, k++){ | 
|  | if(!bits[k]) | 
|  | continue; | 
|  | switch(i->type){ | 
|  | case RBRA: | 
|  | case LBRA: | 
|  | again |= add(i->u2.next - d->p->firstinst, bits, k); | 
|  | break; | 
|  | case OR: | 
|  | again |= add(i->u2.left - d->p->firstinst, bits, k); | 
|  | again |= add(i->u1.right - d->p->firstinst, bits, k); | 
|  | break; | 
|  | case BOL: | 
|  | if(bol) | 
|  | again |= add(i->u2.next - d->p->firstinst, bits, k); | 
|  | break; | 
|  | case EOL: | 
|  | if(eol) | 
|  | again |= add(i->u2.next - d->p->firstinst, bits, k); | 
|  | break; | 
|  | } | 
|  | } | 
|  | }while(again); | 
|  |  | 
|  | /* | 
|  | * Clear bits for useless transitions.  We could do this during | 
|  | * the switch above, but then we have no guarantee of termination | 
|  | * if we get a loop in the regexp. | 
|  | */ | 
|  | for(i=d->p->firstinst, k=0; k < d->ninst; i++, k++){ | 
|  | if(!bits[k]) | 
|  | continue; | 
|  | switch(i->type){ | 
|  | case RBRA: | 
|  | case LBRA: | 
|  | case OR: | 
|  | case BOL: | 
|  | case EOL: | 
|  | bits[k] = 0; | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Where does s go if it sees rune r? | 
|  | * Eol is true if a $ matches the string at the position just after r. | 
|  | */ | 
|  | static Reiset* | 
|  | transition(Deter *d, Reiset *s, Rune r, uint eol) | 
|  | { | 
|  | int k; | 
|  | uchar *bits; | 
|  | Reinst *i, *inst0; | 
|  | Rune *rp, *ep; | 
|  |  | 
|  | bits = d->bits; | 
|  | memset(bits, 0, d->ninst); | 
|  |  | 
|  | inst0 = d->p->firstinst; | 
|  | for(k=0; k < s->ninst; k++){ | 
|  | i = inst0 + s->inst[k]; | 
|  | switch(i->type){ | 
|  | default: | 
|  | werrstr("bad reprog: got type %d", i->type); | 
|  | longjmp(d->kaboom, 1); | 
|  | case RBRA: | 
|  | case LBRA: | 
|  | case OR: | 
|  | case BOL: | 
|  | case EOL: | 
|  | werrstr("internal error: got type %d", i->type); | 
|  | longjmp(d->kaboom, 1); | 
|  |  | 
|  | case RUNE: | 
|  | if(r == i->u1.r) | 
|  | bits[i->u2.next - inst0] = 1; | 
|  | break; | 
|  | case ANY: | 
|  | if(r != L'\n') | 
|  | bits[i->u2.next - inst0] = 1; | 
|  | break; | 
|  | case ANYNL: | 
|  | bits[i->u2.next - inst0] = 1; | 
|  | break; | 
|  | case NCCLASS: | 
|  | if(r == L'\n') | 
|  | break; | 
|  | /* fall through */ | 
|  | case CCLASS: | 
|  | ep = i->u1.cp->end; | 
|  | for(rp = i->u1.cp->spans; rp < ep; rp += 2) | 
|  | if(rp[0] <= r && r <= rp[1]) | 
|  | break; | 
|  | if((rp < ep) ^! (i->type == CCLASS)) | 
|  | bits[i->u2.next - inst0] = 1; | 
|  | break; | 
|  | case END: | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | followempty(d, bits, r=='\n', eol); | 
|  | return bits2reiset(d, bits); | 
|  | } | 
|  |  | 
|  | static int | 
|  | countinst(Reprog *pp) | 
|  | { | 
|  | int n; | 
|  | Reinst *l; | 
|  |  | 
|  | n = 0; | 
|  | l = pp->firstinst; | 
|  | while(l++->type) | 
|  | n++; | 
|  | return n; | 
|  | } | 
|  |  | 
|  | static void | 
|  | set(Deter *d, u32int **tab, Rune r) | 
|  | { | 
|  | u32int *u; | 
|  |  | 
|  | if((u = tab[r/4096]) == nil){ | 
|  | u = binalloc(&d->bin, 4096/8, 1); | 
|  | if(u == nil) | 
|  | longjmp(d->kaboom, 1); | 
|  | tab[r/4096] = u; | 
|  | } | 
|  | u[(r%4096)/32] |= 1<<(r%32); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Compute the list of important characters. | 
|  | * Other characters behave like the ones that surround them. | 
|  | */ | 
|  | static void | 
|  | findchars(Deter *d, Reprog *p) | 
|  | { | 
|  | u32int *tab[65536/4096], *u, x; | 
|  | Reinst *i; | 
|  | Rune *rp, *ep; | 
|  | int k, m, n, a; | 
|  |  | 
|  | memset(tab, 0, sizeof tab); | 
|  | set(d, tab, 0); | 
|  | set(d, tab, 0xFFFF); | 
|  | for(i=p->firstinst; i->type; i++){ | 
|  | switch(i->type){ | 
|  | case ANY: | 
|  | set(d, tab, L'\n'-1); | 
|  | set(d, tab, L'\n'); | 
|  | set(d, tab, L'\n'+1); | 
|  | break; | 
|  | case RUNE: | 
|  | set(d, tab, i->u1.r-1); | 
|  | set(d, tab, i->u1.r); | 
|  | set(d, tab, i->u1.r+1); | 
|  | break; | 
|  | case NCCLASS: | 
|  | set(d, tab, L'\n'-1); | 
|  | set(d, tab, L'\n'); | 
|  | set(d, tab, L'\n'+1); | 
|  | /* fall through */ | 
|  | case CCLASS: | 
|  | ep = i->u1.cp->end; | 
|  | for(rp = i->u1.cp->spans; rp < ep; rp += 2){ | 
|  | set(d, tab, rp[0]-1); | 
|  | set(d, tab, rp[0]); | 
|  | set(d, tab, rp[1]); | 
|  | set(d, tab, rp[1]+1); | 
|  | } | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | n = 0; | 
|  | for(k=0; k<nelem(tab); k++){ | 
|  | if((u = tab[k]) == nil) | 
|  | continue; | 
|  | for(m=0; m<4096/32; m++){ | 
|  | if((x = u[m]) == 0) | 
|  | continue; | 
|  | for(a=0; a<32; a++) | 
|  | if(x&(1<<a)) | 
|  | n++; | 
|  | } | 
|  | } | 
|  |  | 
|  | d->c = binalloc(&d->bin, (n+1)*sizeof(Rune), 0); | 
|  | if(d->c == 0) | 
|  | longjmp(d->kaboom, 1); | 
|  | d->nc = n; | 
|  |  | 
|  | n = 0; | 
|  | for(k=0; k<nelem(tab); k++){ | 
|  | if((u = tab[k]) == nil) | 
|  | continue; | 
|  | for(m=0; m<4096/32; m++){ | 
|  | if((x = u[m]) == 0) | 
|  | continue; | 
|  | for(a=0; a<32; a++) | 
|  | if(x&(1<<a)) | 
|  | d->c[n++] = k*4096+m*32+a; | 
|  | } | 
|  | } | 
|  |  | 
|  | d->c[n] = 0; | 
|  | if(n != d->nc) | 
|  | abort(); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * convert the Deter and Reisets into a Dreprog. | 
|  | * if dp and c are nil, just return the count of Drecases needed. | 
|  | */ | 
|  | static int | 
|  | buildprog(Deter *d, Reiset **id2set, int nid, Dreprog *dp, Drecase *c) | 
|  | { | 
|  | int i, j, id, n, nn; | 
|  | Dreinst *di; | 
|  | Reiset *s; | 
|  |  | 
|  | nn = 0; | 
|  | di = 0; | 
|  | for(i=0; i<nid; i++){ | 
|  | s = id2set[i]; | 
|  | if(c){ | 
|  | di = &dp->inst[i]; | 
|  | di->isfinal = s->isfinal; | 
|  | } | 
|  | n = 0; | 
|  | id = -1; | 
|  | for(j=0; j<2*d->nc; j++){ | 
|  | if(s->delta[j]->id != id){ | 
|  | id = s->delta[j]->id; | 
|  | if(c){ | 
|  | c[n].start = ((j/d->nc)<<16) | d->c[j%d->nc]; | 
|  | c[n].next = &dp->inst[id]; | 
|  | } | 
|  | n++; | 
|  | } | 
|  | } | 
|  | if(c){ | 
|  | if(n == 1 && c[0].next == di) | 
|  | di->isloop = 1; | 
|  | di->c = c; | 
|  | di->nc = n; | 
|  | c += n; | 
|  | } | 
|  | nn += n; | 
|  | } | 
|  | return nn; | 
|  | } | 
|  |  | 
|  | Dreprog* | 
|  | dregcvt(Reprog *p) | 
|  | { | 
|  | uchar *bits; | 
|  | uint again, n, nid, id; | 
|  | Deter d; | 
|  | Reiset **id2set, *s, *t, *start[4]; | 
|  | Dreprog *dp; | 
|  | Drecase *c; | 
|  |  | 
|  | memset(&d, 0, sizeof d); | 
|  |  | 
|  | if(setjmp(d.kaboom)){ | 
|  | binfree(&d.bin); | 
|  | return nil; | 
|  | } | 
|  |  | 
|  | d.p = p; | 
|  | d.ninst = countinst(p); | 
|  |  | 
|  | d.last = &d.alloc; | 
|  |  | 
|  | n = d.ninst; | 
|  | /* round up to power of two; this loop is the least of our efficiency problems */ | 
|  | while(n&(n-1)) | 
|  | n++; | 
|  | d.nhash = n; | 
|  | d.hash = binalloc(&d.bin, d.nhash*sizeof(Reinst*), 1); | 
|  |  | 
|  | /* get list of important runes */ | 
|  | findchars(&d, p); | 
|  |  | 
|  | #ifdef DUMP | 
|  | print("relevant chars are: «%S»\n", d.c+1); | 
|  | #endif | 
|  |  | 
|  | d.bits = bits = binalloc(&d.bin, d.ninst, 0); | 
|  | d.tmp = ralloc(&d, d.ninst); | 
|  |  | 
|  | /* | 
|  | * Convert to DFA | 
|  | */ | 
|  |  | 
|  | /* 4 start states, depending on initial bol, eol */ | 
|  | for(n=0; n<4; n++){ | 
|  | memset(bits, 0, d.ninst); | 
|  | bits[p->startinst - p->firstinst] = 1; | 
|  | followempty(&d, bits, n&1, n&2); | 
|  | start[n] = bits2reiset(&d, bits); | 
|  | } | 
|  |  | 
|  | /* explore the reiset space */ | 
|  | for(s=d.alloc; s; s=s->next) | 
|  | for(n=0; n<2*d.nc; n++) | 
|  | s->delta[n] = transition(&d, s, d.c[n%d.nc], n/d.nc); | 
|  |  | 
|  | #ifdef DUMP | 
|  | nid = 0; | 
|  | for(s=d.alloc; s; s=s->next) | 
|  | s->id = nid++; | 
|  | ddump(&d); | 
|  | #endif | 
|  |  | 
|  | /* | 
|  | * Minimize. | 
|  | */ | 
|  |  | 
|  | /* first class division is final or not */ | 
|  | for(s=d.alloc; s; s=s->next){ | 
|  | s->isfinal = 0; | 
|  | for(n=0; n<s->ninst; n++) | 
|  | if(p->firstinst[s->inst[n]].type == END) | 
|  | s->isfinal = 1; | 
|  | s->id = s->isfinal; | 
|  | } | 
|  |  | 
|  | /* divide states with different transition tables in id space */ | 
|  | nid = 2; | 
|  | do{ | 
|  | again = 0; | 
|  | for(s=d.alloc; s; s=s->next){ | 
|  | id = -1; | 
|  | for(t=s->next; t; t=t->next){ | 
|  | if(s->id != t->id) | 
|  | continue; | 
|  | for(n=0; n<2*d.nc; n++){ | 
|  | /* until we finish the for(t) loop, s->id and id are same */ | 
|  | if((s->delta[n]->id == t->delta[n]->id) | 
|  | || (s->delta[n]->id == s->id && t->delta[n]->id == id) | 
|  | || (s->delta[n]->id == id && t->delta[n]->id == s->id)) | 
|  | continue; | 
|  | break; | 
|  | } | 
|  | if(n == 2*d.nc) | 
|  | continue; | 
|  | if(id == -1) | 
|  | id = nid++; | 
|  | t->id = id; | 
|  | again = 1; | 
|  | } | 
|  | } | 
|  | }while(again); | 
|  |  | 
|  | #ifdef DUMP | 
|  | ddump(&d); | 
|  | #endif | 
|  |  | 
|  | /* build dreprog */ | 
|  | id2set = binalloc(&d.bin, nid*sizeof(Reiset*), 1); | 
|  | if(id2set == nil) | 
|  | longjmp(d.kaboom, 1); | 
|  | for(s=d.alloc; s; s=s->next) | 
|  | id2set[s->id] = s; | 
|  |  | 
|  | n = buildprog(&d, id2set, nid, nil, nil); | 
|  | dp = mallocz(sizeof(Dreprog)+nid*sizeof(Dreinst)+n*sizeof(Drecase), 1); | 
|  | if(dp == nil) | 
|  | longjmp(d.kaboom, 1); | 
|  | c = (Drecase*)&dp->inst[nid]; | 
|  | buildprog(&d, id2set, nid, dp, c); | 
|  |  | 
|  | for(n=0; n<4; n++) | 
|  | dp->start[n] = &dp->inst[start[n]->id]; | 
|  | dp->ninst = nid; | 
|  |  | 
|  | binfree(&d.bin); | 
|  | return dp; | 
|  | } | 
|  |  | 
|  | int | 
|  | dregexec(Dreprog *p, char *s, int bol) | 
|  | { | 
|  | Rune r; | 
|  | ulong rr; | 
|  | Dreinst *i; | 
|  | Drecase *c, *ec; | 
|  | int best, n; | 
|  | char *os; | 
|  |  | 
|  | i = p->start[(bol ? 1 : 0) | (s[1]=='\n' ? 2 : 0)]; | 
|  | best = -1; | 
|  | os = s; | 
|  | for(; *s; s+=n){ | 
|  | if(i->isfinal) | 
|  | best = s - os; | 
|  | if(i->isloop){ | 
|  | if(i->isfinal) | 
|  | return strlen(os); | 
|  | else | 
|  | return best; | 
|  | } | 
|  | if((*s&0xFF) < Runeself){ | 
|  | r = *s; | 
|  | n = 1; | 
|  | }else | 
|  | n = chartorune(&r, s); | 
|  | c = i->c; | 
|  | ec = c+i->nc; | 
|  | rr = r; | 
|  | if(s[n] == '\n' || s[n] == '\0') | 
|  | rr |= 0x10000; | 
|  | for(; c<ec; c++){ | 
|  | if(c->start > rr){ | 
|  | i = c[-1].next; | 
|  | goto Out; | 
|  | } | 
|  | } | 
|  | i = ec[-1].next; | 
|  | Out:; | 
|  | } | 
|  | if(i->isfinal) | 
|  | best = s - os; | 
|  | return best; | 
|  | } | 
|  |  | 
|  |  | 
|  | #ifdef DUMP | 
|  | void | 
|  | ddump(Deter *d) | 
|  | { | 
|  | int i, id; | 
|  | Reiset *s; | 
|  |  | 
|  | for(s=d->alloc; s; s=s->next){ | 
|  | print("%d ", s->id); | 
|  | id = -1; | 
|  | for(i=0; i<2*d->nc; i++){ | 
|  | if(id != s->delta[i]->id){ | 
|  | if(i==0) | 
|  | print(" ["); | 
|  | else if(i/d->nc) | 
|  | print(" [%C$", d->c[i%d->nc]); | 
|  | else | 
|  | print(" [%C", d->c[i%d->nc]); | 
|  | print(" %d]", s->delta[i]->id); | 
|  | id = s->delta[i]->id; | 
|  | } | 
|  | } | 
|  | print("\n"); | 
|  | } | 
|  | } | 
|  |  | 
|  | void | 
|  | rdump(Reprog *pp) | 
|  | { | 
|  | Reinst *l; | 
|  | Rune *p; | 
|  |  | 
|  | l = pp->firstinst; | 
|  | do{ | 
|  | print("%ld:\t0%o\t%ld\t%ld", l-pp->firstinst, l->type, | 
|  | l->left-pp->firstinst, l->right-pp->firstinst); | 
|  | if(l->type == RUNE) | 
|  | print("\t%C\n", l->r); | 
|  | else if(l->type == CCLASS || l->type == NCCLASS){ | 
|  | print("\t["); | 
|  | if(l->type == NCCLASS) | 
|  | print("^"); | 
|  | for(p = l->cp->spans; p < l->cp->end; p += 2) | 
|  | if(p[0] == p[1]) | 
|  | print("%C", p[0]); | 
|  | else | 
|  | print("%C-%C", p[0], p[1]); | 
|  | print("]\n"); | 
|  | } else | 
|  | print("\n"); | 
|  | }while(l++->type); | 
|  | } | 
|  |  | 
|  | void | 
|  | dump(Dreprog *pp) | 
|  | { | 
|  | int i, j; | 
|  | Dreinst *l; | 
|  |  | 
|  | print("start %ld %ld %ld %ld\n", | 
|  | pp->start[0]-pp->inst, | 
|  | pp->start[1]-pp->inst, | 
|  | pp->start[2]-pp->inst, | 
|  | pp->start[3]-pp->inst); | 
|  |  | 
|  | for(i=0; i<pp->ninst; i++){ | 
|  | l = &pp->inst[i]; | 
|  | print("%d:", i); | 
|  | for(j=0; j<l->nc; j++){ | 
|  | print(" ["); | 
|  | if(j == 0) | 
|  | if(l->c[j].start != 1) | 
|  | abort(); | 
|  | if(j != 0) | 
|  | print("%C%s", l->c[j].start&0xFFFF, (l->c[j].start&0x10000) ? "$" : ""); | 
|  | print("-"); | 
|  | if(j != l->nc-1) | 
|  | print("%C%s", (l->c[j+1].start&0xFFFF)-1, (l->c[j+1].start&0x10000) ? "$" : ""); | 
|  | print("] %ld", l->c[j].next - pp->inst); | 
|  | } | 
|  | if(l->isfinal) | 
|  | print(" final"); | 
|  | if(l->isloop) | 
|  | print(" loop"); | 
|  | print("\n"); | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | void | 
|  | main(int argc, char **argv) | 
|  | { | 
|  | int i; | 
|  | Reprog *p; | 
|  | Dreprog *dp; | 
|  |  | 
|  | i = 1; | 
|  | p = regcomp(argv[i]); | 
|  | if(p == 0){ | 
|  | print("=== %s: bad regexp\n", argv[i]); | 
|  | } | 
|  | /*	print("=== %s\n", argv[i]); */ | 
|  | /*	rdump(p); */ | 
|  | dp = dregcvt(p); | 
|  | print("=== dfa\n"); | 
|  | dump(dp); | 
|  |  | 
|  | for(i=2; i<argc; i++) | 
|  | print("match %d\n", dregexec(dp, argv[i], 0)); | 
|  | exits(0); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | void | 
|  | Bprintdfa(Biobuf *b, Dreprog *p) | 
|  | { | 
|  | int i, j, nc; | 
|  |  | 
|  | Bprint(b, "# dreprog\n"); | 
|  | nc = 0; | 
|  | for(i=0; i<p->ninst; i++) | 
|  | nc += p->inst[i].nc; | 
|  | Bprint(b, "%d %d %ld %ld %ld %ld\n", p->ninst, nc, | 
|  | p->start[0]-p->inst, p->start[1]-p->inst, | 
|  | p->start[2]-p->inst, p->start[3]-p->inst); | 
|  | for(i=0; i<p->ninst; i++){ | 
|  | Bprint(b, "%d %d %d", p->inst[i].isfinal, p->inst[i].isloop, p->inst[i].nc); | 
|  | for(j=0; j<p->inst[i].nc; j++) | 
|  | Bprint(b, " %d %ld", p->inst[i].c[j].start, p->inst[i].c[j].next-p->inst); | 
|  | Bprint(b, "\n"); | 
|  | } | 
|  | } | 
|  |  | 
|  | static char* | 
|  | egetline(Biobuf *b, int c, jmp_buf jb) | 
|  | { | 
|  | char *p; | 
|  |  | 
|  | p = Brdline(b, c); | 
|  | if(p == nil) | 
|  | longjmp(jb, 1); | 
|  | p[Blinelen(b)-1] = '\0'; | 
|  | return p; | 
|  | } | 
|  |  | 
|  | static void | 
|  | egetc(Biobuf *b, int c, jmp_buf jb) | 
|  | { | 
|  | if(Bgetc(b) != c) | 
|  | longjmp(jb, 1); | 
|  | } | 
|  |  | 
|  | static int | 
|  | egetnum(Biobuf *b, int want, jmp_buf jb) | 
|  | { | 
|  | int c; | 
|  | int n, first; | 
|  |  | 
|  | n = 0; | 
|  | first = 1; | 
|  | while((c = Bgetc(b)) != Beof){ | 
|  | if(c < '0' || c > '9'){ | 
|  | if(want == 0){ | 
|  | Bungetc(b); | 
|  | c = 0; | 
|  | } | 
|  | if(first || c != want){ | 
|  | werrstr("format error"); | 
|  | longjmp(jb, 1); | 
|  | } | 
|  | return n; | 
|  | } | 
|  | n = n*10 + c - '0'; | 
|  | first = 0; | 
|  | } | 
|  | werrstr("unexpected eof"); | 
|  | longjmp(jb, 1); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | Dreprog* | 
|  | Breaddfa(Biobuf *b) | 
|  | { | 
|  | char *s; | 
|  | int ninst, nc; | 
|  | jmp_buf jb; | 
|  | Dreprog *p; | 
|  | Drecase *c; | 
|  | Dreinst *l; | 
|  | int j, k; | 
|  |  | 
|  | p = nil; | 
|  | if(setjmp(jb)){ | 
|  | free(p); | 
|  | return nil; | 
|  | } | 
|  |  | 
|  | s = egetline(b, '\n', jb); | 
|  | if(strcmp(s, "# dreprog") != 0){ | 
|  | werrstr("format error"); | 
|  | longjmp(jb, 1); | 
|  | } | 
|  |  | 
|  | ninst = egetnum(b, ' ', jb); | 
|  | nc = egetnum(b, ' ', jb); | 
|  |  | 
|  | p = mallocz(sizeof(Dreprog)+ninst*sizeof(Dreinst)+nc*sizeof(Drecase), 1); | 
|  | if(p == nil) | 
|  | longjmp(jb, 1); | 
|  | c = (Drecase*)&p->inst[ninst]; | 
|  |  | 
|  | p->start[0] = &p->inst[egetnum(b, ' ', jb)]; | 
|  | p->start[1] = &p->inst[egetnum(b, ' ', jb)]; | 
|  | p->start[2] = &p->inst[egetnum(b, ' ', jb)]; | 
|  | p->start[3] = &p->inst[egetnum(b, '\n', jb)]; | 
|  |  | 
|  | for(j=0; j<ninst; j++){ | 
|  | l = &p->inst[j]; | 
|  | l->isfinal = egetnum(b, ' ', jb); | 
|  | l->isloop = egetnum(b, ' ', jb); | 
|  | l->nc = egetnum(b, 0, jb); | 
|  | l->c = c; | 
|  | for(k=0; k<l->nc; k++){ | 
|  | egetc(b, ' ', jb); | 
|  | c->start = egetnum(b, ' ', jb); | 
|  | c->next = &p->inst[egetnum(b, 0, jb)]; | 
|  | c++; | 
|  | } | 
|  | egetc(b, '\n', jb); | 
|  | } | 
|  | return p; | 
|  | } |