blob: eece0eb9d8adfaa74c708bc42661b4e5074efc6a [file] [log] [blame]
rscb2cfc4e2003-09-30 17:47:41 +00001#include "lib9.h"
2#include "regexp9.h"
3#include "regcomp.h"
4
5/*
6 * return 0 if no match
7 * >0 if a match
8 * <0 if we ran out of _relist space
9 */
10static int
11rregexec1(Reprog *progp, /* program to run */
12 Rune *bol, /* string to run machine on */
13 Resub *mp, /* subexpression elements */
14 int ms, /* number of elements at mp */
15 Reljunk *j)
16{
17 int flag=0;
18 Reinst *inst;
19 Relist *tlp;
20 Rune *s;
21 int i, checkstart;
22 Rune r, *rp, *ep;
23 Relist* tl; /* This list, next list */
24 Relist* nl;
25 Relist* tle; /* ends of this and next list */
26 Relist* nle;
27 int match;
28
29 match = 0;
30 checkstart = j->startchar;
31 if(mp)
32 for(i=0; i<ms; i++) {
33 mp[i].s.rsp = 0;
34 mp[i].e.rep = 0;
35 }
36 j->relist[0][0].inst = 0;
37 j->relist[1][0].inst = 0;
38
39 /* Execute machine once for each character, including terminal NUL */
40 s = j->rstarts;
41 do{
42
43 /* fast check for first char */
44 if(checkstart) {
45 switch(j->starttype) {
46 case RUNE:
47 while(*s != j->startchar) {
rsc62390092004-03-05 05:13:56 +000048 if(*s == 0 || s == j->reol)
rscb2cfc4e2003-09-30 17:47:41 +000049 return match;
50 s++;
51 }
52 break;
53 case BOL:
54 if(s == bol)
55 break;
56 while(*s != '\n') {
rsc62390092004-03-05 05:13:56 +000057 if(*s == 0 || s == j->reol)
rscb2cfc4e2003-09-30 17:47:41 +000058 return match;
59 s++;
60 }
61 break;
62 }
63 }
64
65 r = *s;
66
67 /* switch run lists */
68 tl = j->relist[flag];
69 tle = j->reliste[flag];
70 nl = j->relist[flag^=1];
71 nle = j->reliste[flag];
72 nl->inst = 0;
73
74 /* Add first instruction to current list */
rsc62390092004-03-05 05:13:56 +000075 _rrenewemptythread(tl, progp->startinst, ms, s);
rscb2cfc4e2003-09-30 17:47:41 +000076
77 /* Execute machine until current list is empty */
78 for(tlp=tl; tlp->inst; tlp++){
79 for(inst=tlp->inst; ; inst = inst->u2.next){
80 switch(inst->type){
81 case RUNE: /* regular character */
82 if(inst->u1.r == r)
rsc62390092004-03-05 05:13:56 +000083 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
rscb2cfc4e2003-09-30 17:47:41 +000084 return -1;
85 break;
86 case LBRA:
87 tlp->se.m[inst->u1.subid].s.rsp = s;
88 continue;
89 case RBRA:
90 tlp->se.m[inst->u1.subid].e.rep = s;
91 continue;
92 case ANY:
93 if(r != '\n')
rsc62390092004-03-05 05:13:56 +000094 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
rscb2cfc4e2003-09-30 17:47:41 +000095 return -1;
96 break;
97 case ANYNL:
rsc62390092004-03-05 05:13:56 +000098 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
rscb2cfc4e2003-09-30 17:47:41 +000099 return -1;
100 break;
101 case BOL:
102 if(s == bol || *(s-1) == '\n')
103 continue;
104 break;
105 case EOL:
106 if(s == j->reol || r == 0 || r == '\n')
107 continue;
108 break;
109 case CCLASS:
110 ep = inst->u1.cp->end;
111 for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
112 if(r >= rp[0] && r <= rp[1]){
rsc62390092004-03-05 05:13:56 +0000113 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
rscb2cfc4e2003-09-30 17:47:41 +0000114 return -1;
115 break;
116 }
117 break;
118 case NCCLASS:
119 ep = inst->u1.cp->end;
120 for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
121 if(r >= rp[0] && r <= rp[1])
122 break;
123 if(rp == ep)
rsc62390092004-03-05 05:13:56 +0000124 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
rscb2cfc4e2003-09-30 17:47:41 +0000125 return -1;
126 break;
127 case OR:
128 /* evaluate right choice later */
rsc62390092004-03-05 05:13:56 +0000129 if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
rscb2cfc4e2003-09-30 17:47:41 +0000130 return -1;
131 /* efficiency: advance and re-evaluate */
132 continue;
133 case END: /* Match! */
134 match = 1;
135 tlp->se.m[0].e.rep = s;
136 if(mp != 0)
137 _renewmatch(mp, ms, &tlp->se);
138 break;
139 }
140 break;
141 }
142 }
143 if(s == j->reol)
144 break;
145 checkstart = j->startchar && nl->inst==0;
146 s++;
147 }while(r);
148 return match;
149}
150
151static int
152rregexec2(Reprog *progp, /* program to run */
153 Rune *bol, /* string to run machine on */
154 Resub *mp, /* subexpression elements */
155 int ms, /* number of elements at mp */
156 Reljunk *j
157)
158{
159 Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
160
161 /* mark space */
162 j->relist[0] = relist0;
163 j->relist[1] = relist1;
164 j->reliste[0] = relist0 + nelem(relist0) - 2;
165 j->reliste[1] = relist1 + nelem(relist1) - 2;
166
167 return rregexec1(progp, bol, mp, ms, j);
168}
169
170extern int
171rregexec(Reprog *progp, /* program to run */
172 Rune *bol, /* string to run machine on */
173 Resub *mp, /* subexpression elements */
174 int ms) /* number of elements at mp */
175{
176 Reljunk j;
177 Relist relist0[LISTSIZE], relist1[LISTSIZE];
178 int rv;
179
180 /*
181 * use user-specified starting/ending location if specified
182 */
183 j.rstarts = bol;
184 j.reol = 0;
185 if(mp && ms>0){
186 if(mp->s.sp)
187 j.rstarts = mp->s.rsp;
188 if(mp->e.ep)
189 j.reol = mp->e.rep;
190 }
191 j.starttype = 0;
192 j.startchar = 0;
rsc62390092004-03-05 05:13:56 +0000193 if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
rscb2cfc4e2003-09-30 17:47:41 +0000194 j.starttype = RUNE;
195 j.startchar = progp->startinst->u1.r;
196 }
197 if(progp->startinst->type == BOL)
198 j.starttype = BOL;
199
200 /* mark space */
201 j.relist[0] = relist0;
202 j.relist[1] = relist1;
203 j.reliste[0] = relist0 + nelem(relist0) - 2;
204 j.reliste[1] = relist1 + nelem(relist1) - 2;
205
206 rv = rregexec1(progp, bol, mp, ms, &j);
207 if(rv >= 0)
208 return rv;
209 rv = rregexec2(progp, bol, mp, ms, &j);
210 if(rv >= 0)
211 return rv;
212 return -1;
213}