Fix small bugs.
diff --git a/src/libthread/386.c b/src/libthread/386.c
index d8052f0..2c611e4 100644
--- a/src/libthread/386.c
+++ b/src/libthread/386.c
@@ -15,7 +15,7 @@
 
 	p = _threadgetproc();
 	t = p->thread;
-	_threadstacklimit(t->stk);
+	_threadstacklimit(t->stk, t->stk+t->stksize);
 
 	(*f)(arg);
 	threadexits(nil);
@@ -39,18 +39,19 @@
 	USED(enter);
 #ifdef USEVALGRIND
 	if(enter)
-		VALGRIND_SET_STACK_LIMIT(0, 0, 1);
-	else
 		VALGRIND_SET_STACK_LIMIT(0, 0, 0);
+	else
+		VALGRIND_SET_STACK_LIMIT(0, 0, 1);
 #endif
 }
 
 void
-_threadstacklimit(void *addr)
+_threadstacklimit(void *bottom, void *top)
 {
-	USED(addr);
+	USED(bottom);
+	USED(top);
 
 #ifdef USEVALGRIND
-	VALGRIND_SET_STACK_LIMIT(1, addr, 0);
+	VALGRIND_SET_STACK_LIMIT(1, bottom, top);
 #endif
 }
diff --git a/src/libthread/PowerMacintosh.c b/src/libthread/PowerMacintosh.c
index 21f6041..94d4db9 100644
--- a/src/libthread/PowerMacintosh.c
+++ b/src/libthread/PowerMacintosh.c
@@ -32,7 +32,7 @@
 }
 
 void
-_threadstacklimit(void *addr)
+_threadstacklimit(void *addr, void *addr2)
 {
 	USED(addr);
 }
diff --git a/src/libthread/create.c b/src/libthread/create.c
index b63fee4..f5f0d6c 100644
--- a/src/libthread/create.c
+++ b/src/libthread/create.c
@@ -24,6 +24,7 @@
 	if(stacksize < 32)
 		sysfatal("bad stacksize %d", stacksize);
 	t = _threadmalloc(sizeof(Thread), 1);
+	t->lastfd = -1;
 	s = _threadmalloc(stacksize, 0);
 	t->stk = (uchar*)s;
 	t->stksize = stacksize;
diff --git a/src/libthread/fdwait.c b/src/libthread/fdwait.c
index 927fc64..a689033 100644
--- a/src/libthread/fdwait.c
+++ b/src/libthread/fdwait.c
@@ -2,12 +2,13 @@
 #include <u.h>
 #include <libc.h>
 #include <thread.h>
-
+#include "threadimpl.h"
 #include <errno.h>
 #include <unistd.h>
 #include <fcntl.h>
 
 #define debugpoll 0
+static int noblocked[4096/32];
 
 #ifdef __APPLE__
 #include <sys/time.h>
@@ -174,11 +175,15 @@
 
 	struct {
 		Channel c;
+		Alt *qentry[2];
 		ulong x;
 	} s;
 
 	threadfdwaitsetup();
 	chaninit(&s.c, sizeof(ulong), 1);
+	s.c.qentry = (volatile Alt**)s.qentry;
+	s.c.nentry = 2;
+	memset(s.qentry, 0, sizeof s.qentry);
 	for(i=0; i<npoll; i++)
 		if(pfd[i].fd == -1)
 			break;
@@ -223,7 +228,25 @@
 void
 threadfdnoblock(int fd)
 {
+	Thread *t;
+
+	if(fd<0)
+		return;
+	if(fd < 8*sizeof(int)*nelem(noblocked)
+	&& (noblocked[fd/(8*sizeof(int))] & (1<<(fd%(8*sizeof(int))))))
+		return;
+	t = _threadgetproc()->thread;
+	if(t && t->lastfd == fd)
+		return;
 	fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0)|O_NONBLOCK);
+	if(t)
+		t->lastfd = fd;
+
+	/* We could lock this but we're probably single-threaded
+	 * and the worst that will happen is we'll run fcntl
+	 * a few more times.
+	 */
+	noblocked[fd/(8*sizeof(int))] |= 1<<(fd%(8*sizeof(int)));
 }
 
 long
diff --git a/src/libthread/main.c b/src/libthread/main.c
index 04e6cd9..bc7ad0f 100644
--- a/src/libthread/main.c
+++ b/src/libthread/main.c
@@ -32,24 +32,12 @@
 		exit(_threadexitsallstatus[0] ? 1 : 0);
 }
 
-static void
-_nop(int x)
-{
-	USED(x);
-}
-
 int
 main(int argc, char **argv)
 {
 	Mainarg *a;
 	Proc *p;
 
-	signal(SIGTERM, _threaddie);
-	signal(SIGCHLD, _nop);
-	signal(SIGALRM, _nop);
-//	signal(SIGINFO, _threadstatus);
-//	rfork(RFREND);
-
 //_threaddebuglevel = (DBGSCHED|DBGCHAN|DBGREND)^~0;
 	_systhreadinit();
 	_qlockinit(_threadrendezvous);
diff --git a/src/libthread/mkfile b/src/libthread/mkfile
index 1ced902..f8ec8e8 100644
--- a/src/libthread/mkfile
+++ b/src/libthread/mkfile
@@ -59,6 +59,7 @@
 
 # sorry
 VG=`test -d /home/rsc/pub/valgrind-debian && echo -DUSEVALGRIND`
+# VG=
 
 CFLAGS=$CFLAGS $VG
 
diff --git a/src/libthread/sched.c b/src/libthread/sched.c
index 9ad7298..bdb9ad6 100644
--- a/src/libthread/sched.c
+++ b/src/libthread/sched.c
@@ -171,7 +171,7 @@
 	}
 }
 
-void
+int
 _sched(void)
 {
 	Proc *p;
@@ -186,8 +186,8 @@
 	//		psstate(t->state), &t->sched, &p->sched);
 		if(_setlabel(&t->sched)==0)
 			_gotolabel(&p->sched);
-		_threadstacklimit(t->stk);
-		return;
+		_threadstacklimit(t->stk, t->stk+t->stksize);
+		return p->nsched++;
 	}else{
 		t = runthread(p);
 		if(t == nil){
@@ -277,10 +277,15 @@
 	unlock(&p->readylock);
 }
 
-void
+int
 yield(void)
 {
-	_sched();
+	Proc *p;
+	int nsched;
+
+	p = _threadgetproc();
+	nsched = p->nsched;
+	return _sched() - nsched;
 }
 
 void
diff --git a/src/libthread/sun4u.c b/src/libthread/sun4u.c
index 6312dac..0d2d8d2 100644
--- a/src/libthread/sun4u.c
+++ b/src/libthread/sun4u.c
@@ -46,7 +46,7 @@
 }
 
 void
-_threadstacklimit(void *addr)
+_threadstacklimit(void *addr, void *addr2)
 {
 	USED(addr);
 }
diff --git a/src/libthread/threadimpl.h b/src/libthread/threadimpl.h
index 373164a..7e44e64 100644
--- a/src/libthread/threadimpl.h
+++ b/src/libthread/threadimpl.h
@@ -99,6 +99,7 @@
 	ulong		userpc;
 
 	void*	udata[NPRIV];	/* User per-thread data pointer */
+	int		lastfd;
 };
 
 struct Execargs
@@ -143,6 +144,7 @@
 	Waitmsg		*waitmsg;
 
 	void*	udata;		/* User per-proc data pointer */
+	int		nsched;
 };
 
 struct Pqueue {		/* Proc queue */
@@ -169,7 +171,7 @@
 Proc*	_newproc(void(*)(void*), void*, uint, char*, int, int);
 int		_procsplhi(void);
 void		_procsplx(int);
-void		_sched(void);
+int		_sched(void);
 int		_schedexec(Execargs*);
 void		_schedexecwait(void);
 void		_schedexit(Proc*);
@@ -219,4 +221,4 @@
 extern void _threadmemset(void*, int, int);
 extern void _threaddebugmemset(void*, int, int);
 extern int _threadprocs;
-extern void _threadstacklimit(void*);
+extern void _threadstacklimit(void*, void*);