merge
diff --git a/include/keyboard.h b/include/keyboard.h
index ec9bff4..a3d02d0 100644
--- a/include/keyboard.h
+++ b/include/keyboard.h
@@ -32,7 +32,9 @@
 
 	Kalt=		KF|0x15,
 	Kshift=	KF|0x16,
-	Kctl=		KF|0x17
+	Kctl=		KF|0x17,
+	
+	Kcmd=	0xF100	/* Rune: beginning of Cmd+'a', Cmd+'A', etc on Mac */
 };
 
 #if defined(__cplusplus)
diff --git a/src/cmd/acme/text.c b/src/cmd/acme/text.c
index 729e430..30f5804 100644
--- a/src/cmd/acme/text.c
+++ b/src/cmd/acme/text.c
@@ -734,6 +734,11 @@
 			q0++;
 		textshow(t, q0, q0, TRUE);
 		return;
+	case Kcmd+'c':	/* %C: copy */
+		typecommit(t);
+		cut(t, t, nil, TRUE, FALSE, nil, 0);
+		return;
+
 	Tagdown:
 		/* expand tag to show all text */
 		if(!t->w->tagexpand){
@@ -755,6 +760,27 @@
 		seq++;
 		filemark(t->file);
 	}
+	/* cut/paste must be done after the seq++/filemark */
+	switch(r){
+	case Kcmd+'x':	/* %X: cut */
+		typecommit(t);
+		if(t->what == Body){
+			seq++;
+			filemark(t->file);
+		}
+		cut(t, t, nil, TRUE, TRUE, nil, 0);
+		textshow(t, t->q0, t->q0, 1);
+		return;
+	case Kcmd+'v':	/* %V: paste */
+		typecommit(t);
+		if(t->what == Body){
+			seq++;
+			filemark(t->file);
+		}
+		paste(t, t, nil, TRUE, FALSE, nil, 0);
+		textshow(t, t->q0, t->q1, 1);
+		return;
+	}
 	if(t->q1 > t->q0){
 		if(t->ncache != 0)
 			error("text.type");
diff --git a/src/cmd/devdraw/osx-screen.c b/src/cmd/devdraw/osx-screen.c
index 4b1f6ed..76416cb 100644
--- a/src/cmd/devdraw/osx-screen.c
+++ b/src/cmd/devdraw/osx-screen.c
@@ -2,7 +2,6 @@
 #define Rect OSXRect
 #define Cursor OSXCursor
 #include <Carbon/Carbon.h>
-#include <QuickTime/QuickTime.h> // for full screen
 #undef Rect
 #undef Point
 #undef Cursor
@@ -23,7 +22,6 @@
 #include "glendapng.h"
 
 AUTOFRAMEWORK(Carbon)
-AUTOFRAMEWORK(QuickTime)
 
 #define panic sysfatal
 
@@ -52,6 +50,8 @@
 	PasteboardRef snarf;
 	int needflush;
 	QLock flushlock;
+	int active;
+	int infullscreen;
 } osx;
 
 enum
@@ -66,8 +66,9 @@
 
 static void screenproc(void*);
 static void eresized(int);
-static void fullscreen(void);
+static void fullscreen(int);
 static void seticon(void);
+static void activated(int);
 
 static OSStatus quithandler(EventHandlerCallRef, EventRef, void*);
 static OSStatus eventhandler(EventHandlerCallRef, EventRef, void*);
@@ -159,7 +160,9 @@
 	const EventTypeSpec cmds[] = {
 		{ kEventClassWindow, kEventWindowClosed },
 		{ kEventClassWindow, kEventWindowBoundsChanged },
-		{ kEventClassCommand, kEventCommandProcess }
+		{ kEventClassCommand, kEventCommandProcess },
+		{ kEventClassWindow, kEventWindowActivated },
+		{ kEventClassWindow, kEventWindowDeactivated },
 	};
 	const EventTypeSpec events[] = {
 		{ kEventClassKeyboard, kEventRawKeyDown },
@@ -256,7 +259,7 @@
 			exit(0);
 		
 		case CmdFullScreen:
-			fullscreen();
+			fullscreen(1);
 			break;
 		
 		default:
@@ -273,6 +276,14 @@
 			eresized(1);
 			break;
 		
+		case kEventWindowActivated:
+			activated(1);
+			return eventNotHandledErr;
+					
+		case kEventWindowDeactivated:
+			activated(0);
+			return eventNotHandledErr;
+
 		default:
 			return eventNotHandledErr;
 		}
@@ -419,7 +430,22 @@
 		if(mod == cmdKey){
 			if(ch == 'F' || ch == 'f'){
 				if(osx.isfullscreen && msec() - osx.fullscreentime > 500)
-					fullscreen();
+					fullscreen(0);
+				return noErr;
+			}
+			
+			// Pass most Cmd keys through as Kcmd + ch.
+			// OS X interprets a few no matter what we do,
+			// so it is useless to pass them through as keystrokes too.
+			switch(ch) {
+			case 'm':	// minimize window
+			case 'h':	// hide window
+			case 'H':	// hide others
+			case 'q':	// quit
+				return eventNotHandledErr;
+			}
+			if(' ' <= ch && ch <= '~') {
+				keystroke(Kcmd + ch);
 				return noErr;
 			}
 			return eventNotHandledErr;
@@ -472,7 +498,7 @@
 	CGDataProviderRef provider;
 	CGImageRef image;
 	CGColorSpaceRef cspace;
-	
+
 	GetWindowBounds(osx.window, kWindowContentRgn, &or);
 	r = Rect(or.left, or.top, or.right, or.bottom);
 	if(Dx(r) == Dx(osx.screenr) && Dy(r) == Dy(osx.screenr)){
@@ -561,39 +587,52 @@
 }
 
 void
-fullscreen(void)
+activated(int active)
 {
-	static Ptr restore;
-	static WindowRef oldwindow;
-	GDHandle device;
-
-	qlock(&osx.flushlock);
-	if(osx.isfullscreen){
-		if(osx.windowctx){
-			QDEndCGContext(GetWindowPort(osx.window), &osx.windowctx);
-			osx.windowctx = nil;
-		}
-		EndFullScreen(restore, 0);
-		osx.window = oldwindow;
-		ShowWindow(osx.window);
-		osx.isfullscreen = 0;
-	}else{
-		if(osx.windowctx){
-			QDEndCGContext(GetWindowPort(osx.window), &osx.windowctx);
-			osx.windowctx = nil;
-		}
-		HideWindow(osx.window);
-		oldwindow = osx.window;
-		GetWindowGreatestAreaDevice(osx.window, kWindowTitleBarRgn, &device, nil);
-		BeginFullScreen(&restore, device, 0, 0, &osx.window, 0, 0);
-		osx.isfullscreen = 1;
-		osx.fullscreentime = msec();
-	}
-	qunlock(&osx.flushlock);
-	eresized(1);
+	osx.active = active;
 }
 
 void
+fullscreen(int wascmd)
+{
+	static OSXRect oldrect;
+	GDHandle device;
+	OSXRect dr;
+
+	if(!wascmd)
+		return;
+	
+	if(!osx.isfullscreen){
+		GetWindowGreatestAreaDevice(osx.window,
+			kWindowTitleBarRgn, &device, nil);
+		dr = (*device)->gdRect;
+		if(dr.top == 0 && dr.left == 0)
+			HideMenuBar();
+		GetWindowBounds(osx.window, kWindowContentRgn, &oldrect);
+		ChangeWindowAttributes(osx.window,
+			kWindowNoTitleBarAttribute,
+			kWindowResizableAttribute);
+		MoveWindow(osx.window, 0, 0, 1);
+		MoveWindow(osx.window, dr.left, dr.top, 0);
+		SizeWindow(osx.window,
+			dr.right - dr.left,
+			dr.bottom - dr.top, 0);
+		osx.isfullscreen = 1;
+	}else{
+		ShowMenuBar();
+		ChangeWindowAttributes(osx.window,
+			kWindowResizableAttribute,
+			kWindowNoTitleBarAttribute);
+		SizeWindow(osx.window,
+			oldrect.right - oldrect.left,
+			oldrect.bottom - oldrect.top, 0);
+		MoveWindow(osx.window, oldrect.left, oldrect.top, 0);
+		osx.isfullscreen = 0;
+	}
+	eresized(1);
+}
+		
+void
 setmouse(Point p)
 {
 	CGPoint cgp;
diff --git a/src/cmd/samterm/main.c b/src/cmd/samterm/main.c
index bf0d9a5..5b645e8 100644
--- a/src/cmd/samterm/main.c
+++ b/src/cmd/samterm/main.c
@@ -494,6 +494,9 @@
 #define	PAGEUP	Kpgup
 #define	RIGHTARROW	Kright
 #define	SCROLLKEY	Kdown
+#define	CUT	(Kcmd+'x')
+#define	COPY	(Kcmd+'c')
+#define	PASTE	(Kcmd+'v')
 
 int
 nontypingkey(int c)
@@ -511,6 +514,8 @@
 	case SCROLLKEY:
 		return 1;
 	}
+	if(c >= Kcmd)
+		return 1;
 	return 0;
 }
 
@@ -673,6 +678,20 @@
 		for(l=t->l; l<&t->l[NL]; l++)
 			if(l->textfn)
 				flsetselect(l, l->p0, l->p1);
+		switch(c) {
+		case CUT:
+			flushtyping(0);
+			cut(t, t->front, 1, 1);
+			break;
+		case COPY:
+			flushtyping(0);
+			snarf(t, t->front);
+			break;
+		case PASTE:
+			flushtyping(0);
+			paste(t, t->front);
+			break;
+		}
 	}
 }
 
diff --git a/src/libdraw/drawclient.c b/src/libdraw/drawclient.c
index f3c42b8..361cb0a 100644
--- a/src/libdraw/drawclient.c
+++ b/src/libdraw/drawclient.c
@@ -45,8 +45,17 @@
 		 * The NOLIBTHREADDAEMONIZE keeps devdraw from
 		 * forking before threadmain. OS X hates it when
 		 * guis fork.
+		 *
+		 * If client didn't use ARGBEGIN, argv0 == nil.
+		 * Can't send nil through because OS X expects
+		 * argv[0] to be non-nil.  Also, OS X apparently
+		 * expects argv[0] to be a valid executable name,
+		 * so "(argv0)" is not okay.  Use "devdraw"
+		 * instead.
 		 */
 		putenv("NOLIBTHREADDAEMONIZE", "1");
+		if(argv0 == nil)
+			argv0 = "devdraw";
 		execl("devdraw", argv0, argv0, "(devdraw)", nil);
 		sysfatal("exec devdraw: %r");
 	}
diff --git a/src/libmemdraw/draw.c b/src/libmemdraw/draw.c
index 2d8681e..7616997 100644
--- a/src/libmemdraw/draw.c
+++ b/src/libmemdraw/draw.c
@@ -10,22 +10,34 @@
 #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
 
 /*
- * for 0 ≤ x ≤ 255*255, (x*0x0101+0x100)>>16 is a perfect approximation.
- * for 0 ≤ x < (1<<16), x/255 = ((x+1)*0x0101)>>16 is a perfect approximation.
- * the last one is perfect for all up to 1<<16, avoids a multiply, but requires a rathole.
+ * For 16-bit values, x / 255 == (t = x+1, (t+(t>>8)) >> 8).
+ * We add another 127 to round to the nearest value rather
+ * than truncate.
+ *
+ * CALCxy does x bytewise calculations on y input images (x=1,4; y=1,2).
+ * CALC2x does two parallel 16-bit calculations on y input images (y=1,2).
  */
-/* #define DIV255(x) (((x)*257+256)>>16)  */
-#define DIV255(x) ((((x)+1)*257)>>16)
-/* #define DIV255(x) (tmp=(x)+1, (tmp+(tmp>>8))>>8) */
+#define CALC11(a, v, tmp) \
+	(tmp=(a)*(v)+128, (tmp+(tmp>>8))>>8)
 
-#define MUL(x, y, t)	(t = (x)*(y)+128, (t+(t>>8))>>8)
-#define MASK13	0xFF00FF00
-#define MASK02	0x00FF00FF
-#define MUL13(a, x, t)		(t = (a)*(((x)&MASK13)>>8)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
-#define MUL02(a, x, t)		(t = (a)*(((x)&MASK02)>>0)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
-#define MUL0123(a, x, s, t)	((MUL13(a, x, s)<<8)|MUL02(a, x, t))
+#define CALC12(a1, v1, a2, v2, tmp) \
+	(tmp=(a1)*(v1)+(a2)*(v2)+128, (tmp+(tmp>>8))>>8)
 
-#define MUL2(u, v, x, y)	(t = (u)*(v)+(x)*(y)+256, (t+(t>>8))>>8)
+#define MASK 0xFF00FF
+
+#define CALC21(a, vvuu, tmp) \
+	(tmp=(a)*(vvuu)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
+
+#define CALC41(a, rgba, tmp1, tmp2) \
+	(CALC21(a, rgba & MASK, tmp1) | \
+	 (CALC21(a, (rgba>>8)&MASK, tmp2)<<8))
+
+#define CALC22(a1, vvuu1, a2, vvuu2, tmp) \
+	(tmp=(a1)*(vvuu1)+(a2)*(vvuu2)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
+
+#define CALC42(a1, rgba1, a2, rgba2, tmp1, tmp2) \
+	(CALC22(a1, rgba1 & MASK, a2, rgba2 & MASK, tmp1) | \
+	 (CALC22(a1, (rgba1>>8) & MASK, a2, (rgba2>>8) & MASK, tmp2)<<8))
 
 static void mktables(void);
 typedef int Subdraw(Memdrawparam*);
@@ -786,41 +798,85 @@
 	return bdst;
 }
 
+/*
+ * Do the channels in the buffers match enough
+ * that we can do word-at-a-time operations
+ * on the pixels?
+ */
+static int
+chanmatch(Buffer *bdst, Buffer *bsrc)
+{
+	uchar *drgb, *srgb;
+	
+	/*
+	 * first, r, g, b must be in the same place
+	 * in the rgba word.
+	 */
+	drgb = (uchar*)bdst->rgba;
+	srgb = (uchar*)bsrc->rgba;
+	if(bdst->red - drgb != bsrc->red - srgb
+	|| bdst->blu - drgb != bsrc->blu - srgb
+	|| bdst->grn - drgb != bsrc->grn - srgb)
+		return 0;
+	
+	/*
+	 * that implies alpha is in the same place,
+	 * if it is there at all (it might be == &ones).
+	 * if the destination is &ones, we can scribble
+	 * over the rgba slot just fine.
+	 */
+	if(bdst->alpha == &ones)
+		return 1;
+	
+	/*
+	 * if the destination is not ones but the src is,
+	 * then the simultaneous calculation will use
+	 * bogus bytes from the src's rgba.  no good.
+	 */
+	if(bsrc->alpha == &ones)
+		return 0;
+	
+	/*
+	 * otherwise, alphas are in the same place.
+	 */
+	return 1;
+}
+
 static Buffer
 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
 {
 	Buffer obdst;
 	int fd, sadelta;
 	int i, sa, ma, q;
-	u32int s, t;
+	u32int t, t1;
 
 	obdst = bdst;
 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
-	q = bsrc.delta == 4 && bdst.delta == 4;
+	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
 
 	for(i=0; i<dx; i++){
 		sa = *bsrc.alpha;
 		ma = *bmask.alpha;
-		fd = MUL(sa, ma, t);
+		fd = CALC11(sa, ma, t);
 		if(op == DoutS)
 			fd = 255-fd;
 
 		if(grey){
-			*bdst.grey = MUL(fd, *bdst.grey, t);
+			*bdst.grey = CALC11(fd, *bdst.grey, t);
 			bsrc.grey += bsrc.delta;
 			bdst.grey += bdst.delta;
 		}else{
 			if(q){
-				*bdst.rgba = MUL0123(fd, *bdst.rgba, s, t);
+				*bdst.rgba = CALC41(fd, *bdst.rgba, t, t1);
 				bsrc.rgba++;
 				bdst.rgba++;
 				bsrc.alpha += sadelta;
 				bmask.alpha += bmask.delta;
 				continue;
 			}
-			*bdst.red = MUL(fd, *bdst.red, t);
-			*bdst.grn = MUL(fd, *bdst.grn, t);
-			*bdst.blu = MUL(fd, *bdst.blu, t);
+			*bdst.red = CALC11(fd, *bdst.red, t);
+			*bdst.grn = CALC11(fd, *bdst.grn, t);
+			*bdst.blu = CALC11(fd, *bdst.blu, t);
 			bsrc.red += bsrc.delta;
 			bsrc.blu += bsrc.delta;
 			bsrc.grn += bsrc.delta;
@@ -829,7 +885,7 @@
 			bdst.grn += bdst.delta;
 		}
 		if(bdst.alpha != &ones){
-			*bdst.alpha = MUL(fd, *bdst.alpha, t);
+			*bdst.alpha = CALC11(fd, *bdst.alpha, t);
 			bdst.alpha += bdst.delta;
 		}
 		bmask.alpha += bmask.delta;
@@ -844,11 +900,11 @@
 	Buffer obdst;
 	int fs, sadelta;
 	int i, ma, da, q;
-	u32int s, t;
+	u32int t, t1;
 
 	obdst = bdst;
 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
-	q = bsrc.delta == 4 && bdst.delta == 4;
+	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
 
 	for(i=0; i<dx; i++){
 		ma = *bmask.alpha;
@@ -857,24 +913,24 @@
 			da = 255-da;
 		fs = ma;
 		if(op != S)
-			fs = MUL(fs, da, t);
+			fs = CALC11(fs, da, t);
 
 		if(grey){
-			*bdst.grey = MUL(fs, *bsrc.grey, t);
+			*bdst.grey = CALC11(fs, *bsrc.grey, t);
 			bsrc.grey += bsrc.delta;
 			bdst.grey += bdst.delta;
 		}else{
 			if(q){
-				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t);
+				*bdst.rgba = CALC41(fs, *bsrc.rgba, t, t1);
 				bsrc.rgba++;
 				bdst.rgba++;
 				bmask.alpha += bmask.delta;
 				bdst.alpha += bdst.delta;
 				continue;
 			}
-			*bdst.red = MUL(fs, *bsrc.red, t);
-			*bdst.grn = MUL(fs, *bsrc.grn, t);
-			*bdst.blu = MUL(fs, *bsrc.blu, t);
+			*bdst.red = CALC11(fs, *bsrc.red, t);
+			*bdst.grn = CALC11(fs, *bsrc.grn, t);
+			*bdst.blu = CALC11(fs, *bsrc.blu, t);
 			bsrc.red += bsrc.delta;
 			bsrc.blu += bsrc.delta;
 			bsrc.grn += bsrc.delta;
@@ -883,7 +939,7 @@
 			bdst.grn += bdst.delta;
 		}
 		if(bdst.alpha != &ones){
-			*bdst.alpha = MUL(fs, *bsrc.alpha, t);
+			*bdst.alpha = CALC11(fs, *bsrc.alpha, t);
 			bdst.alpha += bdst.delta;
 		}
 		bmask.alpha += bmask.delta;
@@ -898,35 +954,35 @@
 	Buffer obdst;
 	int fs, fd, sadelta;
 	int i, sa, ma, da, q;
-	u32int s, t, u, v;
+	u32int t, t1;
 
 	obdst = bdst;
 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
-	q = bsrc.delta == 4 && bdst.delta == 4;
+	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
 
 	for(i=0; i<dx; i++){
 		sa = *bsrc.alpha;
 		ma = *bmask.alpha;
 		da = *bdst.alpha;
 		if(op == SatopD)
-			fs = MUL(ma, da, t);
+			fs = CALC11(ma, da, t);
 		else
-			fs = MUL(ma, 255-da, t);
+			fs = CALC11(ma, 255-da, t);
 		if(op == DoverS)
 			fd = 255;
 		else{
-			fd = MUL(sa, ma, t);
+			fd = CALC11(sa, ma, t);
 			if(op != DatopS)
 				fd = 255-fd;
 		}
 
 		if(grey){
-			*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
+			*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
 			bsrc.grey += bsrc.delta;
 			bdst.grey += bdst.delta;
 		}else{
 			if(q){
-				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
+				*bdst.rgba = CALC42(fs, *bsrc.rgba, fd, *bdst.rgba, t, t1);
 				bsrc.rgba++;
 				bdst.rgba++;
 				bsrc.alpha += sadelta;
@@ -934,9 +990,9 @@
 				bdst.alpha += bdst.delta;
 				continue;
 			}
-			*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
-			*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
-			*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
+			*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
+			*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
+			*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
 			bsrc.red += bsrc.delta;
 			bsrc.blu += bsrc.delta;
 			bsrc.grn += bsrc.delta;
@@ -945,7 +1001,7 @@
 			bdst.grn += bdst.delta;
 		}
 		if(bdst.alpha != &ones){
-			*bdst.alpha = MUL(fs, sa, s)+MUL(fd, da, t);
+			*bdst.alpha = CALC12(fs, sa, fd, da, t);
 			bdst.alpha += bdst.delta;
 		}
 		bmask.alpha += bmask.delta;
@@ -969,34 +1025,34 @@
 	Buffer obdst;
 	int fd, sadelta;
 	int i, sa, ma, q;
-	u32int s, t, u, v;
+	u32int t, t1;
 
 	USED(op);
 	obdst = bdst;
 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
-	q = bsrc.delta == 4 && bdst.delta == 4;
+	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
 
 	for(i=0; i<dx; i++){
 		sa = *bsrc.alpha;
 		ma = *bmask.alpha;
-		fd = 255-MUL(sa, ma, t);
+		fd = 255-CALC11(sa, ma, t);
 
 		if(grey){
-			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
+			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
 			bsrc.grey += bsrc.delta;
 			bdst.grey += bdst.delta;
 		}else{
 			if(q){
-				*bdst.rgba = MUL0123(ma, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
+				*bdst.rgba = CALC42(ma, *bsrc.rgba, fd, *bdst.rgba, t, t1);
 				bsrc.rgba++;
 				bdst.rgba++;
 				bsrc.alpha += sadelta;
 				bmask.alpha += bmask.delta;
 				continue;
 			}
-			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
-			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
-			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
+			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
+			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
+			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
 			bsrc.red += bsrc.delta;
 			bsrc.blu += bsrc.delta;
 			bsrc.grn += bsrc.delta;
@@ -1005,7 +1061,7 @@
 			bdst.grn += bdst.delta;
 		}
 		if(bdst.alpha != &ones){
-			*bdst.alpha = MUL(ma, sa, s)+MUL(fd, *bdst.alpha, t);
+			*bdst.alpha = CALC12(ma, sa, fd, *bdst.alpha, t);
 			bdst.alpha += bdst.delta;
 		}
 		bmask.alpha += bmask.delta;
@@ -1061,7 +1117,7 @@
 	Buffer obdst;
 	int fd;
 	int i, ma;
-	u32int s, t;
+	u32int t;
 
 	USED(op);
 	obdst = bdst;
@@ -1071,13 +1127,13 @@
 		fd = 255-ma;
 
 		if(grey){
-			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
+			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
 			bsrc.grey += bsrc.delta;
 			bdst.grey += bdst.delta;
 		}else{
-			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
-			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
-			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
+			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
+			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
+			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
 			bsrc.red += bsrc.delta;
 			bsrc.blu += bsrc.delta;
 			bsrc.grn += bsrc.delta;
@@ -1086,7 +1142,7 @@
 			bdst.grn += bdst.delta;
 		}
 		if(bdst.alpha != &ones){
-			*bdst.alpha = ma+MUL(fd, *bdst.alpha, t);
+			*bdst.alpha = ma+CALC11(fd, *bdst.alpha, t);
 			bdst.alpha += bdst.delta;
 		}
 		bmask.alpha += bmask.delta;
@@ -1133,7 +1189,7 @@
 	Buffer obdst;
 	int fs, fd;
 	int i, ma, da, zero;
-	u32int s, t;
+	u32int t;
 
 	obdst = bdst;
 	zero = !(op&1);
@@ -1150,16 +1206,16 @@
 
 		if(grey){
 			if(ma)
-				*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
+				*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
 			else if(zero)
 				*bdst.grey = 0;
 			bsrc.grey += bsrc.delta;
 			bdst.grey += bdst.delta;
 		}else{
 			if(ma){
-				*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
-				*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
-				*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
+				*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
+				*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
+				*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
 			}
 			else if(zero)
 				*bdst.red = *bdst.grn = *bdst.blu = 0;
@@ -1173,7 +1229,7 @@
 		bmask.alpha += bmask.delta;
 		if(bdst.alpha != &ones){
 			if(ma)
-				*bdst.alpha = fs+MUL(fd, da, t);
+				*bdst.alpha = fs+CALC11(fd, da, t);
 			else if(zero)
 				*bdst.alpha = 0;
 			bdst.alpha += bdst.delta;
diff --git a/src/libmemdraw/drawtest.c b/src/libmemdraw/drawtest.c
index 9d99178..07cda45 100644
--- a/src/libmemdraw/drawtest.c
+++ b/src/libmemdraw/drawtest.c
@@ -62,7 +62,7 @@
 	char buf[1024];
 
 	va_start(va, fmt);
-	n = doprint(buf, buf+sizeof buf, fmt, va) - buf;
+	n = vseprint(buf, buf+sizeof buf, fmt, va) - buf;
 	va_end(va);
 
 	write(1,buf,n);
@@ -104,8 +104,6 @@
 		exits("usage");
 	}
 
-	fmtinstall('b', numbconv);	/* binary! */
-
 	fprint(2, "%s -x %d -y %d -s 0x%x %s %s %s\n", argv0, Xrange, Yrange, seed, dchan, schan, mchan);
 	srand(seed);
 
@@ -161,7 +159,7 @@
  * a list of characters to put at various points in the picture.
  */
 static void
-Bprintr5g6b5(Biobuf *bio, char*, u32int v)
+Bprintr5g6b5(Biobuf *bio, char* _, u32int v)
 {
 	int r,g,b;
 	r = (v>>11)&31;
@@ -171,7 +169,7 @@
 }
 
 static void
-Bprintr5g5b5a1(Biobuf *bio, char*, u32int v)
+Bprintr5g5b5a1(Biobuf *bio, char* _, u32int v)
 {
 	int r,g,b,a;
 	r = (v>>11)&31;
@@ -974,7 +972,7 @@
 	pixtorgba(getpixel(dst, dp), &dr, &dg, &db, &da);
 	pixtorgba(getpixel(src, sp), &sr, &sg, &sb, &sa);
 	m = getmask(mask, mp);
-	M = 255-(sa*m)/255;
+	M = 255-(sa*m + 127)/255;
 
 DBG print("dst %x %x %x %x src %x %x %x %x m %x = ", dr,dg,db,da, sr,sg,sb,sa, m);
 	if(dst->flags&Fgrey){
@@ -985,18 +983,18 @@
 		 */
 		sk = RGB2K(sr, sg, sb);
 		dk = RGB2K(dr, dg, db);
-		dk = (sk*m + dk*M)/255;
+		dk = (sk*m + dk*M + 127)/255;
 		dr = dg = db = dk;
-		da = (sa*m + da*M)/255;
+		da = (sa*m + da*M + 127)/255;
 	}else{
 		/*
 		 * True color alpha calculation treats all channels (including alpha)
 		 * the same.  It might have been nice to use an array, but oh well.
 		 */
-		dr = (sr*m + dr*M)/255;
-		dg = (sg*m + dg*M)/255;
-		db = (sb*m + db*M)/255;
-		da = (sa*m + da*M)/255;
+		dr = (sr*m + dr*M + 127)/255;
+		dg = (sg*m + dg*M + 127)/255;
+		db = (sb*m + db*M + 127)/255;
+		da = (sa*m + da*M + 127)/255;
 	}
 
 DBG print("%x %x %x %x\n", dr,dg,db,da);