diff --git a/src/libregexp/regaux.c b/src/libregexp/regaux.c
index 39e6772..b854b5a 100644
--- a/src/libregexp/regaux.c
+++ b/src/libregexp/regaux.c
@@ -23,89 +23,90 @@
 }
 
 /*
- * Add ip to the list [lp, elp], but only if it is not there already.
- * These work lists are stored and processed in increasing
- * order of sp[0], so if the ip is there already, the one that's
- * there already is a more left match and takes priority.
+ * Note optimization in _renewthread:
+ * 	*lp must be pending when _renewthread called; if *l has been looked
+ *		at already, the optimization is a bug.
  */
-static Relist*
-_renewthread1(Relist *lp,	/* Relist to add to */
-	Relist *elp,		/* limit pointer for Relist */
+extern Relist*
+_renewthread(Relist *lp,	/* _relist to add to */
 	Reinst *ip,		/* instruction to add */
 	int ms,
 	Resublist *sep)		/* pointers to subexpressions */
 {
 	Relist *p;
 
-	for(p=lp; p->inst; p++)
-		if(p->inst == ip)
+	for(p=lp; p->inst; p++){
+		if(p->inst == ip){
+			if(sep->m[0].s.sp < p->se.m[0].s.sp){
+				if(ms > 1)
+					p->se = *sep;
+				else
+					p->se.m[0] = sep->m[0];
+			}
 			return 0;
-	
-	if(p == elp)	/* refuse to overflow buffer */
-		return elp;
-
+		}
+	}
 	p->inst = ip;
 	if(ms > 1)
 		p->se = *sep;
 	else
 		p->se.m[0] = sep->m[0];
-	(p+1)->inst = 0;
+	(++p)->inst = 0;
 	return p;
 }
 
-extern int
-_renewthread(Relist *lp, Relist *elp, Reinst *ip, int ms, Resublist *sep)
-{
-	Relist *ap;
-
-	ap = _renewthread1(lp, elp, ip, ms, sep);
-	if(ap == 0)
-		return 0;
-	if(ap == elp)
-		return -1;
-
-	/*
-	 * Added ip to list at ap.  
-	 * Expand any ORs right now, so that entire
-	 * work list ends up being sorted by increasing m[0].sp.
-	 */
-	for(; ap->inst; ap++){
-		if(ap->inst->type == OR){
-			if(_renewthread1(lp, elp, ap->inst->u1.right, ms, &ap->se) == elp)
-				return -1;
-			if(_renewthread1(lp, elp, ap->inst->u2.next, ms, &ap->se) == elp)
-				return -1;
-		}
-	}
-	return 0;
-}
-
 /*
  * same as renewthread, but called with
  * initial empty start pointer.
  */
-extern int
+extern Relist*
 _renewemptythread(Relist *lp,	/* _relist to add to */
-	Relist *elp,
 	Reinst *ip,		/* instruction to add */
 	int ms,
 	char *sp)		/* pointers to subexpressions */
 {
-	Resublist sep;
-	
+	Relist *p;
+
+	for(p=lp; p->inst; p++){
+		if(p->inst == ip){
+			if(sp < p->se.m[0].s.sp) {
+				if(ms > 1)
+					memset(&p->se, 0, sizeof(p->se));
+				p->se.m[0].s.sp = sp;
+			}
+			return 0;
+		}
+	}
+	p->inst = ip;
 	if(ms > 1)
-		memset(&sep, 0, sizeof sep);
-	sep.m[0].s.sp = sp;
-	sep.m[0].e.ep = 0;
-	return _renewthread(lp, elp, ip, ms, &sep);
+		memset(&p->se, 0, sizeof(p->se));
+	p->se.m[0].s.sp = sp;
+	(++p)->inst = 0;
+	return p;
 }
 
-extern int
+extern Relist*
 _rrenewemptythread(Relist *lp,	/* _relist to add to */
-	Relist *elp,
 	Reinst *ip,		/* instruction to add */
 	int ms,
 	Rune *rsp)		/* pointers to subexpressions */
 {
-	return _renewemptythread(lp, elp, ip, ms, (char*)rsp);
+	Relist *p;
+
+	for(p=lp; p->inst; p++){
+		if(p->inst == ip){
+			if(rsp < p->se.m[0].s.rsp) {
+				if(ms > 1)
+					memset(&p->se, 0, sizeof(p->se));
+				p->se.m[0].s.rsp = rsp;
+			}
+			return 0;
+		}
+	}
+	p->inst = ip;
+	if(ms > 1)
+		memset(&p->se, 0, sizeof(p->se));
+	p->se.m[0].s.rsp = rsp;
+	(++p)->inst = 0;
+	return p;
 }
diff --git a/src/libregexp/regcomp.c b/src/libregexp/regcomp.c
index ba0175f..b8286dc 100644
--- a/src/libregexp/regcomp.c
+++ b/src/libregexp/regcomp.c
@@ -232,7 +232,7 @@
 	int size;
 	Reprog *npp;
 	Reclass *cl;
-	int diff, proglen;
+	int diff;
 
 	/*
 	 *  get rid of NOOP chains
@@ -249,13 +249,10 @@
 	 *  necessary.  Reallocate to the actual space used
 	 *  and then relocate the code.
 	 */
-	proglen = freep - pp->firstinst;
-	size = sizeof(Reprog) + proglen*sizeof(Reinst);
+	size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
 	npp = realloc(pp, size);
-	if(npp==0 || npp==pp){
-		pp->proglen = proglen;
+	if(npp==0 || npp==pp)
 		return pp;
-	}
 	diff = (char *)npp - (char *)pp;
 	freep = (Reinst *)((char *)freep + diff);
 	for(inst=npp->firstinst; inst<freep; inst++){
@@ -276,7 +273,6 @@
 		*(char**)(void*)&inst->u2.left += diff;
 	}
 	*(char**)(void*)&npp->startinst += diff;
-	npp->proglen = proglen;
 	return npp;
 }
 
diff --git a/src/libregexp/regcomp.h b/src/libregexp/regcomp.h
index fde99f5..6c88cd0 100644
--- a/src/libregexp/regcomp.h
+++ b/src/libregexp/regcomp.h
@@ -68,7 +68,7 @@
 	Rune*	reol;
 };
 
-extern int	_renewthread(Relist*, Relist*, Reinst*, int, Resublist*);
+extern Relist*	_renewthread(Relist*, Reinst*, int, Resublist*);
 extern void	_renewmatch(Resub*, int, Resublist*);
-extern int	_renewemptythread(Relist*, Relist*, Reinst*, int, char*);
-extern int	_rrenewemptythread(Relist*, Relist*, Reinst*, int, Rune*);
+extern Relist*	_renewemptythread(Relist*, Reinst*, int, char*);
+extern Relist*	_rrenewemptythread(Relist*, Reinst*, int, Rune*);
diff --git a/src/libregexp/regexec.c b/src/libregexp/regexec.c
index 10d93f0..c04182a 100644
--- a/src/libregexp/regexec.c
+++ b/src/libregexp/regexec.c
@@ -2,6 +2,7 @@
 #include "regexp9.h"
 #include "regcomp.h"
 
+
 /*
  *  return	0 if no match
  *		>0 if a match
@@ -12,14 +13,16 @@
 	char *bol,	/* string to run machine on */
 	Resub *mp,	/* subexpression elements */
 	int ms,		/* number of elements at mp */
-	Reljunk *j)
+	Reljunk *j
+)
 {
 	int flag=0;
 	Reinst *inst;
 	Relist *tlp;
 	char *s;
-	int i, checkstart, n;
+	int i, checkstart;
 	Rune r, *rp, *ep;
+	int n;
 	Relist* tl;		/* This list, next list */
 	Relist* nl;
 	Relist* tle;		/* ends of this and next list */
@@ -45,7 +48,7 @@
 			switch(j->starttype) {
 			case RUNE:
 				p = utfrune(s, j->startchar);
-				if(p == 0 || (j->eol && p >= j->eol))
+				if(p == 0 || s == j->eol)
 					return match;
 				s = p;
 				break;
@@ -53,7 +56,7 @@
 				if(s == bol)
 					break;
 				p = utfrune(s, '\n');
-				if(p == 0 || (j->eol && p+1 >= j->eol))
+				if(p == 0 || s == j->eol)
 					return match;
 				s = p+1;
 				break;
@@ -74,16 +77,17 @@
 
 		/* Add first instruction to current list */
 		if(match == 0)
-			_renewemptythread(tl, tle, progp->startinst, ms, s);
+			_renewemptythread(tl, progp->startinst, ms, s);
 
 		/* Execute machine until current list is empty */
-		for(tlp=tl; tlp->inst; tlp++){
+		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
 			for(inst = tlp->inst; ; inst = inst->u2.next){
 				switch(inst->type){
 				case RUNE:	/* regular character */
-					if(inst->u1.r == r)
-						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+					if(inst->u1.r == r){
+						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
 							return -1;
+					}
 					break;
 				case LBRA:
 					tlp->se.m[inst->u1.subid].s.sp = s;
@@ -93,11 +97,11 @@
 					continue;
 				case ANY:
 					if(r != '\n')
-						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
 							return -1;
 					break;
 				case ANYNL:
-					if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+					if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
 							return -1;
 					break;
 				case BOL:
@@ -112,7 +116,7 @@
 					ep = inst->u1.cp->end;
 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
 						if(r >= rp[0] && r <= rp[1]){
-							if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+							if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
 								return -1;
 							break;
 						}
@@ -123,12 +127,15 @@
 						if(r >= rp[0] && r <= rp[1])
 							break;
 					if(rp == ep)
-						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
 							return -1;
 					break;
 				case OR:
-					/* expanded during renewthread; just a place holder */
-					break;
+					/* evaluate right choice later */
+					if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
+						return -1;
+					/* efficiency: advance and re-evaluate */
+					continue;
 				case END:	/* Match! */
 					match = 1;
 					tlp->se.m[0].e.ep = s;
@@ -158,18 +165,19 @@
 	int rv;
 	Relist *relist0, *relist1;
 
-	relist0 = malloc((progp->proglen+1)*sizeof(Relist));
+	/* mark space */
+	relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
 	if(relist0 == nil)
 		return -1;
-	relist1 = malloc((progp->proglen+1)*sizeof(Relist));
+	relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
 	if(relist1 == nil){
 		free(relist1);
 		return -1;
 	}
 	j->relist[0] = relist0;
 	j->relist[1] = relist1;
-	j->reliste[0] = relist0 + progp->proglen;
-	j->reliste[1] = relist1 + progp->proglen;
+	j->reliste[0] = relist0 + BIGLISTSIZE - 2;
+	j->reliste[1] = relist1 + BIGLISTSIZE - 2;
 
 	rv = regexec1(progp, bol, mp, ms, j);
 	free(relist0);
@@ -210,8 +218,8 @@
 	/* mark space */
 	j.relist[0] = relist0;
 	j.relist[1] = relist1;
-	j.reliste[0] = relist0 + nelem(relist0) - 1;
-	j.reliste[1] = relist1 + nelem(relist1) - 1;
+	j.reliste[0] = relist0 + nelem(relist0) - 2;
+	j.reliste[1] = relist1 + nelem(relist1) - 2;
 
 	rv = regexec1(progp, bol, mp, ms, &j);
 	if(rv >= 0)
diff --git a/src/libregexp/rregexec.c b/src/libregexp/rregexec.c
index 907ddef..ec7907d 100644
--- a/src/libregexp/rregexec.c
+++ b/src/libregexp/rregexec.c
@@ -9,9 +9,9 @@
  */
 static int
 rregexec1(Reprog *progp,	/* program to run */
-	Rune *bol,	/* string to run machine on */
-	Resub *mp,	/* subexpression elements */
-	int ms,		/* number of elements at mp */
+	Rune *bol,		/* string to run machine on */
+	Resub *mp,		/* subexpression elements */
+	int ms,			/* number of elements at mp */
 	Reljunk *j)
 {
 	int flag=0;
@@ -28,7 +28,7 @@
 	Rune *p;
 
 	match = 0;
-	checkstart = j->starttype;
+	checkstart = j->startchar;
 	if(mp)
 		for(i=0; i<ms; i++) {
 			mp[i].s.rsp = 0;
@@ -46,7 +46,7 @@
 			switch(j->starttype) {
 			case RUNE:
 				p = runestrchr(s, j->startchar);
-				if(p == 0 || (j->reol && p >= j->reol))
+				if(p == 0 || p == j->reol)
 					return match;
 				s = p;
 				break;
@@ -54,7 +54,7 @@
 				if(s == bol)
 					break;
 				p = runestrchr(s, '\n');
-				if(p == 0 || (j->reol && p+1 >= j->reol))
+				if(p == 0 || s == j->reol)
 					return match;
 				s = p+1;
 				break;
@@ -71,16 +71,15 @@
 		nl->inst = 0;
 
 		/* Add first instruction to current list */
-		if(match == 0)
-			_rrenewemptythread(tl, tle, progp->startinst, ms, s);
+		_rrenewemptythread(tl, progp->startinst, ms, s);
 
 		/* Execute machine until current list is empty */
 		for(tlp=tl; tlp->inst; tlp++){
-			for(inst = tlp->inst; ; inst = inst->u2.next){
+			for(inst=tlp->inst; ; inst = inst->u2.next){
 				switch(inst->type){
 				case RUNE:	/* regular character */
 					if(inst->u1.r == r)
-						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
 							return -1;
 					break;
 				case LBRA:
@@ -91,11 +90,11 @@
 					continue;
 				case ANY:
 					if(r != '\n')
-						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
 							return -1;
 					break;
 				case ANYNL:
-					if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+					if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
 							return -1;
 					break;
 				case BOL:
@@ -110,7 +109,7 @@
 					ep = inst->u1.cp->end;
 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
 						if(r >= rp[0] && r <= rp[1]){
-							if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+							if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
 								return -1;
 							break;
 						}
@@ -121,12 +120,15 @@
 						if(r >= rp[0] && r <= rp[1])
 							break;
 					if(rp == ep)
-						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
 							return -1;
 					break;
 				case OR:
-					/* expanded during renewthread; just a place holder */
-					break;
+					/* evaluate right choice later */
+					if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
+						return -1;
+					/* efficiency: advance and re-evaluate */
+					continue;
 				case END:	/* Match! */
 					match = 1;
 					tlp->se.m[0].e.rep = s;
@@ -139,7 +141,7 @@
 		}
 		if(s == j->reol)
 			break;
-		checkstart = j->starttype && nl->inst==0;
+		checkstart = j->startchar && nl->inst==0;
 		s++;
 	}while(r);
 	return match;
@@ -153,26 +155,15 @@
 	Reljunk *j
 )
 {
-	int rv;
-	Relist *relist0, *relist1;
+	Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
 
-	relist0 = malloc((progp->proglen+1)*sizeof(Relist));
-	if(relist0 == nil)
-		return -1;
-	relist1 = malloc((progp->proglen+1)*sizeof(Relist));
-	if(relist1 == nil){
-		free(relist1);
-		return -1;
-	}
+	/* mark space */
 	j->relist[0] = relist0;
 	j->relist[1] = relist1;
-	j->reliste[0] = relist0 + progp->proglen;
-	j->reliste[1] = relist1 + progp->proglen;
+	j->reliste[0] = relist0 + nelem(relist0) - 2;
+	j->reliste[1] = relist1 + nelem(relist1) - 2;
 
-	rv = rregexec1(progp, bol, mp, ms, j);
-	free(relist0);
-	free(relist1);
-	return rv;
+	return rregexec1(progp, bol, mp, ms, j);
 }
 
 extern int
@@ -208,8 +199,8 @@
 	/* mark space */
 	j.relist[0] = relist0;
 	j.relist[1] = relist1;
-	j.reliste[0] = relist0 + nelem(relist0) - 1;
-	j.reliste[1] = relist1 + nelem(relist1) - 1;
+	j.reliste[0] = relist0 + nelem(relist0) - 2;
+	j.reliste[1] = relist1 + nelem(relist1) - 2;
 
 	rv = rregexec1(progp, bol, mp, ms, &j);
 	if(rv >= 0)
