| #include "grep.h" |
| |
| /* |
| * incremental compiler. |
| * add the branch c to the |
| * state s. |
| */ |
| void |
| increment(State *s, int c) |
| { |
| int i; |
| State *t, **tt; |
| Re *re1, *re2; |
| |
| nfollow = 0; |
| gen++; |
| matched = 0; |
| for(i=0; i<s->count; i++) |
| fol1(s->re[i], c); |
| qsort(follow, nfollow, sizeof(*follow), fcmp); |
| for(tt=&state0; t = *tt;) { |
| if(t->count > nfollow) { |
| tt = &t->linkleft; |
| goto cont; |
| } |
| if(t->count < nfollow) { |
| tt = &t->linkright; |
| goto cont; |
| } |
| for(i=0; i<nfollow; i++) { |
| re1 = t->re[i]; |
| re2 = follow[i]; |
| if(re1 > re2) { |
| tt = &t->linkleft; |
| goto cont; |
| } |
| if(re1 < re2) { |
| tt = &t->linkright; |
| goto cont; |
| } |
| } |
| if(!!matched && !t->match) { |
| tt = &t->linkleft; |
| goto cont; |
| } |
| if(!matched && !!t->match) { |
| tt = &t->linkright; |
| goto cont; |
| } |
| s->next[c] = t; |
| return; |
| cont:; |
| } |
| |
| t = sal(nfollow); |
| *tt = t; |
| for(i=0; i<nfollow; i++) { |
| re1 = follow[i]; |
| t->re[i] = re1; |
| } |
| s->next[c] = t; |
| t->match = matched; |
| } |
| |
| int |
| fcmp(const void *va, const void *vb) |
| { |
| Re **aa, **bb; |
| Re *a, *b; |
| |
| aa = (Re**)va; |
| bb = (Re**)vb; |
| a = *aa; |
| b = *bb; |
| if(a > b) |
| return 1; |
| if(a < b) |
| return -1; |
| return 0; |
| } |
| |
| void |
| fol1(Re *r, int c) |
| { |
| Re *r1; |
| |
| loop: |
| if(r->gen == gen) |
| return; |
| if(nfollow >= maxfollow) |
| error("nfollow"); |
| r->gen = gen; |
| switch(r->type) { |
| default: |
| error("fol1"); |
| |
| case Tcase: |
| if(c >= 0 && c < 256) |
| if(r1 = r->u.cases[c]) |
| follow[nfollow++] = r1; |
| if(r = r->next) |
| goto loop; |
| break; |
| |
| case Talt: |
| case Tor: |
| fol1(r->u.alt, c); |
| r = r->next; |
| goto loop; |
| |
| case Tbegin: |
| if(c == '\n' || c == Cbegin) |
| follow[nfollow++] = r->next; |
| break; |
| |
| case Tend: |
| if(c == '\n') { |
| if(r->next == 0) { |
| matched = 1; |
| break; |
| } |
| r = r->next; |
| goto loop; |
| } |
| break; |
| |
| case Tclass: |
| if(c >= r->u.x.lo && c <= r->u.x.hi) |
| follow[nfollow++] = r->next; |
| break; |
| } |
| } |
| |
| Rune tab1[] = |
| { |
| 0x007f, |
| 0x07ff |
| }; |
| Rune tab2[] = |
| { |
| 0x003f, |
| 0x0fff |
| }; |
| |
| Re2 |
| rclass(Rune p0, Rune p1) |
| { |
| char xc0[6], xc1[6]; |
| int i, n, m; |
| Re2 x; |
| |
| if(p0 > p1) |
| return re2char(0xff, 0xff); /* no match */ |
| |
| /* |
| * bust range into same length |
| * character sequences |
| */ |
| for(i=0; i<nelem(tab1); i++) { |
| m = tab1[i]; |
| if(p0 <= m && p1 > m) |
| return re2or(rclass(p0, m), rclass(m+1, p1)); |
| } |
| |
| /* |
| * bust range into part of a single page |
| * or into full pages |
| */ |
| for(i=0; i<nelem(tab2); i++) { |
| m = tab2[i]; |
| if((p0 & ~m) != (p1 & ~m)) { |
| if((p0 & m) != 0) |
| return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1)); |
| if((p1 & m) != m) |
| return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1)); |
| } |
| } |
| |
| n = runetochar(xc0, &p0); |
| i = runetochar(xc1, &p1); |
| if(i != n) |
| error("length"); |
| |
| x = re2char(xc0[0], xc1[0]); |
| for(i=1; i<n; i++) |
| x = re2cat(x, re2char(xc0[i], xc1[i])); |
| return x; |
| } |
| |
| int |
| pcmp(const void *va, const void *vb) |
| { |
| int n; |
| Rune *a, *b; |
| |
| a = (Rune*)va; |
| b = (Rune*)vb; |
| |
| n = a[0] - b[0]; |
| if(n) |
| return n; |
| return a[1] - b[1]; |
| } |
| |
| /* |
| * convert character chass into |
| * run-pair ranges of matches. |
| * this is 10646/utf specific and |
| * needs to be changed for some |
| * other input character set. |
| * this is the key to a fast |
| * regular search of characters |
| * by looking at sequential bytes. |
| */ |
| Re2 |
| re2class(char *s) |
| { |
| Rune pairs[200], *p, *q, ov; |
| int nc; |
| Re2 x; |
| |
| nc = 0; |
| if(*s == '^') { |
| nc = 1; |
| s++; |
| } |
| |
| p = pairs; |
| s += chartorune(p, s); |
| for(;;) { |
| if(*p == '\\') |
| s += chartorune(p, s); |
| if(*p == 0) |
| break; |
| p[1] = *p; |
| p += 2; |
| s += chartorune(p, s); |
| if(*p != '-') |
| continue; |
| s += chartorune(p, s); |
| if(*p == '\\') |
| s += chartorune(p, s); |
| if(*p == 0) |
| break; |
| p[-1] = *p; |
| s += chartorune(p, s); |
| } |
| *p = 0; |
| qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp); |
| |
| q = pairs; |
| for(p=pairs+2; *p; p+=2) { |
| if(p[0] > p[1]) |
| continue; |
| if(p[0] > q[1] || p[1] < q[0]) { |
| q[2] = p[0]; |
| q[3] = p[1]; |
| q += 2; |
| continue; |
| } |
| if(p[0] < q[0]) |
| q[0] = p[0]; |
| if(p[1] > q[1]) |
| q[1] = p[1]; |
| } |
| q[2] = 0; |
| |
| p = pairs; |
| if(nc) { |
| x = rclass(0, p[0]-1); |
| ov = p[1]+1; |
| for(p+=2; *p; p+=2) { |
| x = re2or(x, rclass(ov, p[0]-1)); |
| ov = p[1]+1; |
| } |
| x = re2or(x, rclass(ov, 0xffff)); |
| } else { |
| x = rclass(p[0], p[1]); |
| for(p+=2; *p; p+=2) |
| x = re2or(x, rclass(p[0], p[1])); |
| } |
| return x; |
| } |