Blame - src/libdraw/md-draw.c - plan9

blob: c8ad2a6bbff8d3fc63e5555460c04767811f8b81 [file] [log] [blame]

rsc	76193d7	2003-09-30 17:47:42 +0000	[diff] [blame]	1	#include <u.h>
				2	#include <libc.h>
				3	#include <draw.h>
				4	#include <memdraw.h>
				5
				6	int drawdebug;
				7	static int tablesbuilt;
				8
				9	/* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
				10	#define RGB2K(r,g,b) ((156763(r)+307758(g)+59769*(b))>>19)
				11
				12	/*
				13	* for 0 ≤ x ≤ 255255, (x0x0101+0x100)>>16 is a perfect approximation.
				14	* for 0 ≤ x < (1<<16), x/255 = ((x+1)*0x0101)>>16 is a perfect approximation.
				15	* the last one is perfect for all up to 1<<16, avoids a multiply, but requires a rathole.
				16	*/
				17	/* #define DIV255(x) (((x)257+256)>>16) /
				18	#define DIV255(x) ((((x)+1)*257)>>16)
				19	/* #define DIV255(x) (tmp=(x)+1, (tmp+(tmp>>8))>>8) */
				20
				21	#define MUL(x, y, t) (t = (x)*(y)+128, (t+(t>>8))>>8)
				22	#define MASK13 0xFF00FF00
				23	#define MASK02 0x00FF00FF
				24	#define MUL13(a, x, t) (t = (a)*(((x)&MASK13)>>8)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
				25	#define MUL02(a, x, t) (t = (a)*(((x)&MASK02)>>0)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
				26	#define MUL0123(a, x, s, t) ((MUL13(a, x, s)<<8)\|MUL02(a, x, t))
				27
				28	#define MUL2(u, v, x, y) (t = (u)(v)+(x)(y)+256, (t+(t>>8))>>8)
				29
				30	static void mktables(void);
				31	typedef int Subdraw(Memdrawparam*);
				32	static Subdraw chardraw, alphadraw, memoptdraw;
				33
				34	static Memimage* memones;
				35	static Memimage* memzeros;
				36	Memimage *memwhite;
				37	Memimage *memblack;
				38	Memimage *memtransparent;
				39	Memimage *memopaque;
				40
				41	int __ifmt(Fmt*);
				42
				43	void
				44	memimageinit(void)
				45	{
				46	static int didinit = 0;
				47
				48	if(didinit)
				49	return;
				50
				51	didinit = 1;
				52
				53	mktables();
				54	_memmkcmap();
				55
				56	fmtinstall('R', Rfmt);
				57	fmtinstall('P', Pfmt);
				58	fmtinstall('b', __ifmt);
				59
				60	memones = allocmemimage(Rect(0,0,1,1), GREY1);
				61	memones->flags \|= Frepl;
				62	memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
				63	*byteaddr(memones, ZP) = ~0;
				64
				65	memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
				66	memzeros->flags \|= Frepl;
				67	memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
				68	*byteaddr(memzeros, ZP) = 0;
				69
				70	if(memones == nil \|\| memzeros == nil)
				71	assert(0 /cannot initialize memimage library /); /* RSC BUG */
				72
				73	memwhite = memones;
				74	memblack = memzeros;
				75	memopaque = memones;
				76	memtransparent = memzeros;
				77	}
				78
				79	u32int _imgtorgba(Memimage*, u32int);
				80	u32int _rgbatoimg(Memimage*, u32int);
				81	u32int _pixelbits(Memimage*, Point);
				82
				83	#define DBG if(0)
				84	static Memdrawparam par;
				85
				86	Memdrawparam*
				87	_memimagedrawsetup(Memimage dst, Rectangle r, Memimage src, Point p0, Memimage *mask, Point p1, int op)
				88	{
				89	if(mask == nil)
				90	mask = memopaque;
				91
				92	DBG print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
				93
				94	if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
				95	// if(drawdebug)
				96	// iprint("empty clipped rectangle\n");
				97	return nil;
				98	}
				99
				100	if(op < Clear \|\| op > SoverD){
				101	// if(drawdebug)
				102	// iprint("op out of range: %d\n", op);
				103	return nil;
				104	}
				105
				106	par.op = op;
				107	par.dst = dst;
				108	par.r = r;
				109	par.src = src;
				110	/* par.sr set by drawclip */
				111	par.mask = mask;
				112	/* par.mr set by drawclip */
				113
				114	par.state = 0;
				115	if(src->flags&Frepl){
				116	par.state \|= Replsrc;
				117	if(Dx(src->r)==1 && Dy(src->r)==1){
				118	par.sval = pixelbits(src, src->r.min);
				119	par.state \|= Simplesrc;
				120	par.srgba = _imgtorgba(src, par.sval);
				121	par.sdval = _rgbatoimg(dst, par.srgba);
				122	if((par.srgba&0xFF) == 0 && (op&DoutS)){
				123	// if (drawdebug) iprint("fill with transparent source\n");
				124	return nil; /* no-op successfully handled */
				125	}
				126	}
				127	}
				128
				129	if(mask->flags & Frepl){
				130	par.state \|= Replmask;
				131	if(Dx(mask->r)==1 && Dy(mask->r)==1){
				132	par.mval = pixelbits(mask, mask->r.min);
				133	if(par.mval == 0 && (op&DoutS)){
				134	// if(drawdebug) iprint("fill with zero mask\n");
				135	return nil; /* no-op successfully handled */
				136	}
				137	par.state \|= Simplemask;
				138	if(par.mval == ~0)
				139	par.state \|= Fullmask;
				140	par.mrgba = _imgtorgba(mask, par.mval);
				141	}
				142	}
				143
				144	// if(drawdebug)
				145	// iprint("dr %R sr %R mr %R...", r, par.sr, par.mr);
				146	DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
				147
				148	return &par;
				149	}
				150
				151	void
				152	_memimagedraw(Memdrawparam *par)
				153	{
				154	/*
				155	* Now that we've clipped the parameters down to be consistent, we
				156	* simply try sub-drawing routines in order until we find one that was able
				157	* to handle us. If the sub-drawing routine returns zero, it means it was
				158	* unable to satisfy the request, so we do not return.
				159	*/
				160
				161	/*
				162	* Hardware support. Each video driver provides this function,
				163	* which checks to see if there is anything it can help with.
				164	* There could be an if around this checking to see if dst is in video memory.
				165	*/
				166	DBG print("test hwdraw\n");
				167	if(hwdraw(par)){
				168	//if(drawdebug) iprint("hw handled\n");
				169	DBG print("hwdraw handled\n");
				170	return;
				171	}
				172	/*
				173	* Optimizations using memmove and memset.
				174	*/
				175	DBG print("test memoptdraw\n");
				176	if(memoptdraw(par)){
				177	//if(drawdebug) iprint("memopt handled\n");
				178	DBG print("memopt handled\n");
				179	return;
				180	}
				181
				182	/*
				183	* Character drawing.
				184	* Solid source color being painted through a boolean mask onto a high res image.
				185	*/
				186	DBG print("test chardraw\n");
				187	if(chardraw(par)){
				188	//if(drawdebug) iprint("chardraw handled\n");
				189	DBG print("chardraw handled\n");
				190	return;
				191	}
				192
				193	/*
				194	* General calculation-laden case that does alpha for each pixel.
				195	*/
				196	DBG print("do alphadraw\n");
				197	alphadraw(par);
				198	//if(drawdebug) iprint("alphadraw handled\n");
				199	DBG print("alphadraw handled\n");
				200	}
				201	#undef DBG
				202
				203	/*
				204	* Clip the destination rectangle further based on the properties of the
				205	* source and mask rectangles. Once the destination rectangle is properly
				206	* clipped, adjust the source and mask rectangles to be the same size.
				207	* Then if source or mask is replicated, move its clipped rectangle
				208	* so that its minimum point falls within the repl rectangle.
				209	*
				210	* Return zero if the final rectangle is null.
				211	*/
				212	int
				213	drawclip(Memimage dst, Rectangle r, Memimage src, Point p0, Memimage mask, Point p1, Rectangle sr, Rectangle mr)
				214	{
				215	Point rmin, delta;
				216	int splitcoords;
				217	Rectangle omr;
				218
				219	if(r->min.x>=r->max.x \|\| r->min.y>=r->max.y)
				220	return 0;
				221	splitcoords = (p0->x!=p1->x) \|\| (p0->y!=p1->y);
				222	/* clip to destination */
				223	rmin = r->min;
				224	if(!rectclip(r, dst->r) \|\| !rectclip(r, dst->clipr))
				225	return 0;
				226	/* move mask point */
				227	p1->x += r->min.x-rmin.x;
				228	p1->y += r->min.y-rmin.y;
				229	/* move source point */
				230	p0->x += r->min.x-rmin.x;
				231	p0->y += r->min.y-rmin.y;
				232	/* map destination rectangle into source */
				233	sr->min = *p0;
				234	sr->max.x = p0->x+Dx(*r);
				235	sr->max.y = p0->y+Dy(*r);
				236	/* sr is r in source coordinates; clip to source */
				237	if(!(src->flags&Frepl) && !rectclip(sr, src->r))
				238	return 0;
				239	if(!rectclip(sr, src->clipr))
				240	return 0;
				241	/* compute and clip rectangle in mask */
				242	if(splitcoords){
				243	/* move mask point with source */
				244	p1->x += sr->min.x-p0->x;
				245	p1->y += sr->min.y-p0->y;
				246	mr->min = *p1;
				247	mr->max.x = p1->x+Dx(*sr);
				248	mr->max.y = p1->y+Dy(*sr);
				249	omr = *mr;
				250	/* mr is now rectangle in mask; clip it */
				251	if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
				252	return 0;
				253	if(!rectclip(mr, mask->clipr))
				254	return 0;
				255	/* reflect any clips back to source */
				256	sr->min.x += mr->min.x-omr.min.x;
				257	sr->min.y += mr->min.y-omr.min.y;
				258	sr->max.x += mr->max.x-omr.max.x;
				259	sr->max.y += mr->max.y-omr.max.y;
				260	*p1 = mr->min;
				261	}else{
				262	if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
				263	return 0;
				264	if(!rectclip(sr, mask->clipr))
				265	return 0;
				266	*p1 = sr->min;
				267	}
				268
				269	/* move source clipping back to destination */
				270	delta.x = r->min.x - p0->x;
				271	delta.y = r->min.y - p0->y;
				272	r->min.x = sr->min.x + delta.x;
				273	r->min.y = sr->min.y + delta.y;
				274	r->max.x = sr->max.x + delta.x;
				275	r->max.y = sr->max.y + delta.y;
				276
				277	/* move source rectangle so sr->min is in src->r */
				278	if(src->flags&Frepl) {
				279	delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
				280	delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
				281	sr->min.x += delta.x;
				282	sr->min.y += delta.y;
				283	sr->max.x += delta.x;
				284	sr->max.y += delta.y;
				285	}
				286	*p0 = sr->min;
				287
				288	/* move mask point so it is in mask->r */
				289	p1 = drawrepl(mask->r, p1);
				290	mr->min = *p1;
				291	mr->max.x = p1->x+Dx(*sr);
				292	mr->max.y = p1->y+Dy(*sr);
				293
				294	assert(Dx(sr) == Dx(mr) && Dx(mr) == Dx(r));
				295	assert(Dy(sr) == Dy(mr) && Dy(mr) == Dy(r));
				296	assert(ptinrect(*p0, src->r));
				297	assert(ptinrect(*p1, mask->r));
				298	assert(ptinrect(r->min, dst->r));
				299
				300	return 1;
				301	}
				302
				303	/*
				304	* Conversion tables.
				305	*/
				306	static uchar replbit[1+8][256]; /* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
				307	static uchar conv18[256][8]; /* conv18[x][y] is the yth pixel in the depth-1 pixel x */
				308	static uchar conv28[256][4]; /* ... */
				309	static uchar conv48[256][2];
				310
				311	/*
				312	* bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
				313	* the X's are where to put the bottom (ones) bit of the n-bit pattern.
				314	* only the top 8 bits of the result are actually used.
				315	* (the lower 8 bits are needed to get bits in the right place
				316	* when n is not a divisor of 8.)
				317	*
				318	* Should check to see if its easier to just refer to replmul than
				319	* use the precomputed values in replbit. On PCs it may well
				320	* be; on machines with slow multiply instructions it probably isn't.
				321	*/
				322	#define a ((((((((((((((((0
				323	#define X *2+1)
				324	#define _ *2)
				325	static int replmul[1+8] = {
				326	0,
				327	a X X X X X X X X X X X X X X X X,
				328	a _ X _ X _ X _ X _ X _ X _ X _ X,
				329	a _ _ X _ _ X _ _ X _ _ X _ _ X _,
				330	a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
				331	a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
				332	a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _,
				333	a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
				334	a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
				335	};
				336	#undef a
				337	#undef X
				338	#undef _
				339
				340	static void
				341	mktables(void)
				342	{
				343	int i, j, mask, sh, small;
				344
				345	if(tablesbuilt)
				346	return;
				347
				348	fmtinstall('R', Rfmt);
				349	fmtinstall('P', Pfmt);
				350	tablesbuilt = 1;
				351
				352	/* bit replication up to 8 bits */
				353	for(i=0; i<256; i++){
				354	for(j=0; j<=8; j++){ /* j <= 8 [sic] */
				355	small = i & ((1<<j)-1);
				356	replbit[j][i] = (small*replmul[j])>>8;
				357	}
				358	}
				359
				360	/* bit unpacking up to 8 bits, only powers of 2 */
				361	for(i=0; i<256; i++){
				362	for(j=0, sh=7, mask=1; j<8; j++, sh--)
				363	conv18[i][j] = replbit[1][(i>>sh)&mask];
				364
				365	for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
				366	conv28[i][j] = replbit[2][(i>>sh)&mask];
				367
				368	for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
				369	conv48[i][j] = replbit[4][(i>>sh)&mask];
				370	}
				371	}
				372
				373	static uchar ones = 0xff;
				374
				375	/*
				376	* General alpha drawing case. Can handle anything.
				377	*/
				378	typedef struct Buffer Buffer;
				379	struct Buffer {
				380	/* used by most routines */
				381	uchar *red;
				382	uchar *grn;
				383	uchar *blu;
				384	uchar *alpha;
				385	uchar *grey;
				386	u32int *rgba;
				387	int delta; /* number of bytes to add to pointer to get next pixel to the right */
				388
				389	/* used by boolcalc* for mask data */
				390	uchar m; / ptr to mask data r.min byte; like p->bytermin */
				391	int mskip; /* no. of left bits to skip in m /
				392	uchar bm; / ptr to mask data img->r.min byte; like p->bytey0s */
				393	int bmskip; /* no. of left bits to skip in bm /
				394	uchar em; / ptr to mask data img->r.max.x byte; like p->bytey0e */
				395	int emskip; /* no. of right bits to skip in em /
				396	};
				397
				398	typedef struct Param Param;
				399	typedef Buffer Readfn(Param, uchar, int);
				400	typedef void Writefn(Param, uchar, Buffer);
				401	typedef Buffer Calcfn(Buffer, Buffer, Buffer, int, int, int);
				402
				403	enum {
				404	MAXBCACHE = 16
				405	};
				406
				407	/* giant rathole to customize functions with */
				408	struct Param {
				409	Readfn *replcall;
				410	Readfn *greymaskcall;
				411	Readfn *convreadcall;
				412	Writefn *convwritecall;
				413
				414	Memimage *img;
				415	Rectangle r;
				416	int dx; /* of r */
				417	int needbuf;
				418	int convgrey;
				419	int alphaonly;
				420
				421	uchar bytey0s; / byteaddr(Pt(img->r.min.x, img->r.min.y)) */
				422	uchar bytermin; / byteaddr(Pt(r.min.x, img->r.min.y)) */
				423	uchar bytey0e; / byteaddr(Pt(img->r.max.x, img->r.min.y)) */
				424	int bwidth;
				425
				426	int replcache; /* if set, cache buffers */
				427	Buffer bcache[MAXBCACHE];
				428	u32int bfilled;
				429	uchar *bufbase;
				430	int bufoff;
				431	int bufdelta;
				432
				433	int dir;
				434
				435	int convbufoff;
				436	uchar *convbuf;
				437	Param *convdpar;
				438	int convdx;
				439	};
				440
				441	static uchar *drawbuf;
				442	static int ndrawbuf;
				443	static int mdrawbuf;
				444	static Param spar, mpar, dpar; /* easier on the stacks */
				445	static Readfn greymaskread, replread, readptr;
				446	static Writefn nullwrite;
				447	static Calcfn alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
				448	static Calcfn boolcalc14, boolcalc236789, boolcalc1011;
				449
				450	static Readfn* readfn(Memimage*);
				451	static Readfn* readalphafn(Memimage*);
				452	static Writefn* writefn(Memimage*);
				453
				454	static Calcfn* boolcopyfn(Memimage, Memimage);
				455	static Readfn* convfn(Memimage, Param, Memimage, Param);
				456
				457	static Calcfn *alphacalc[Ncomp] =
				458	{
				459	alphacalc0, /* Clear */
				460	alphacalc14, /* DoutS */
				461	alphacalc2810, /* SoutD */
				462	alphacalc3679, /* DxorS */
				463	alphacalc14, /* DinS */
				464	alphacalc5, /* D */
				465	alphacalc3679, /* DatopS */
				466	alphacalc3679, /* DoverS */
				467	alphacalc2810, /* SinD */
				468	alphacalc3679, /* SatopD */
				469	alphacalc2810, /* S */
				470	alphacalc11, /* SoverD */
				471	};
				472
				473	static Calcfn *boolcalc[Ncomp] =
				474	{
				475	alphacalc0, /* Clear */
				476	boolcalc14, /* DoutS */
				477	boolcalc236789, /* SoutD */
				478	boolcalc236789, /* DxorS */
				479	boolcalc14, /* DinS */
				480	alphacalc5, /* D */
				481	boolcalc236789, /* DatopS */
				482	boolcalc236789, /* DoverS */
				483	boolcalc236789, /* SinD */
				484	boolcalc236789, /* SatopD */
				485	boolcalc1011, /* S */
				486	boolcalc1011, /* SoverD */
				487	};
				488
				489	static int
				490	allocdrawbuf(void)
				491	{
				492	uchar *p;
				493
				494	if(ndrawbuf > mdrawbuf){
				495	p = realloc(drawbuf, ndrawbuf);
				496	if(p == nil){
				497	werrstr("memimagedraw out of memory");
				498	return -1;
				499	}
				500	drawbuf = p;
				501	mdrawbuf = ndrawbuf;
				502	}
				503	return 0;
				504	}
				505
				506	static Param
				507	getparam(Memimage *img, Rectangle r, int convgrey, int needbuf)
				508	{
				509	Param p;
				510	int nbuf;
				511
				512	memset(&p, 0, sizeof p);
				513
				514	p.img = img;
				515	p.r = r;
				516	p.dx = Dx(r);
				517	p.needbuf = needbuf;
				518	p.convgrey = convgrey;
				519
				520	assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
				521
				522	p.bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
				523	p.bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
				524	p.bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
				525	p.bwidth = sizeof(u32int)*img->width;
				526
				527	assert(p.bytey0s <= p.bytermin && p.bytermin <= p.bytey0e);
				528
				529	if(p.r.min.x == p.img->r.min.x)
				530	assert(p.bytermin == p.bytey0s);
				531
				532	nbuf = 1;
				533	if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
				534	p.replcache = 1;
				535	nbuf = Dy(img->r);
				536	}
				537	p.bufdelta = 4*p.dx;
				538	p.bufoff = ndrawbuf;
				539	ndrawbuf += p.bufdelta*nbuf;
				540
				541	return p;
				542	}
				543
				544	static void
				545	clipy(Memimage img, int y)
				546	{
				547	int dy;
				548
				549	dy = Dy(img->r);
				550	if(*y == dy)
				551	*y = 0;
				552	else if(*y == -1)
				553	*y = dy-1;
				554	assert(0 <= y && y < dy);
				555	}
				556
				557	static void
				558	dumpbuf(char *s, Buffer b, int n)
				559	{
				560	int i;
				561	uchar *p;
				562
				563	print("%s", s);
				564	for(i=0; i<n; i++){
				565	print(" ");
				566	if(p=b.grey){
				567	print(" k%.2uX", *p);
				568	b.grey += b.delta;
				569	}else{
				570	if(p=b.red){
				571	print(" r%.2uX", *p);
				572	b.red += b.delta;
				573	}
				574	if(p=b.grn){
				575	print(" g%.2uX", *p);
				576	b.grn += b.delta;
				577	}
				578	if(p=b.blu){
				579	print(" b%.2uX", *p);
				580	b.blu += b.delta;
				581	}
				582	}
				583	if((p=b.alpha) != &ones){
				584	print(" α%.2uX", *p);
				585	b.alpha += b.delta;
				586	}
				587	}
				588	print("\n");
				589	}
				590
				591	/*
				592	* For each scan line, we expand the pixels from source, mask, and destination
				593	* into byte-aligned red, green, blue, alpha, and grey channels. If buffering is not
				594	* needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
				595	* the readers need not copy the data: they can simply return pointers to the data.
				596	* If the destination image is grey and the source is not, it is converted using the NTSC
				597	* formula.
				598	*
				599	* Once we have all the channels, we call either rgbcalc or greycalc, depending on
				600	* whether the destination image is color. This is allowed to overwrite the dst buffer (perhaps
				601	* the actual data, perhaps a copy) with its result. It should only overwrite the dst buffer
				602	* with the same format (i.e. red bytes with red bytes, etc.) A new buffer is returned from
				603	* the calculator, and that buffer is passed to a function to write it to the destination.
				604	* If the buffer is already pointing at the destination, the writing function is a no-op.
				605	*/
				606	#define DBG if(0)
				607	static int
				608	alphadraw(Memdrawparam *par)
				609	{
				610	int isgrey, starty, endy, op;
				611	int needbuf, dsty, srcy, masky;
				612	int y, dir, dx, dy;
				613	Buffer bsrc, bdst, bmask;
				614	Readfn rdsrc, rdmask, *rddst;
				615	Calcfn *calc;
				616	Writefn *wrdst;
				617	Memimage src, mask, *dst;
				618	Rectangle r, sr, mr;
				619
				620	r = par->r;
				621	dx = Dx(r);
				622	dy = Dy(r);
				623
				624	ndrawbuf = 0;
				625
				626	src = par->src;
				627	mask = par->mask;
				628	dst = par->dst;
				629	sr = par->sr;
				630	mr = par->mr;
				631	op = par->op;
				632
				633	isgrey = dst->flags&Fgrey;
				634
				635	/*
				636	* Buffering when src and dst are the same bitmap is sufficient but not
				637	* necessary. There are stronger conditions we could use. We could
				638	* check to see if the rectangles intersect, and if simply moving in the
				639	* correct y direction can avoid the need to buffer.
				640	*/
				641	needbuf = (src->data == dst->data);
				642
				643	spar = getparam(src, sr, isgrey, needbuf);
				644	dpar = getparam(dst, r, isgrey, needbuf);
				645	mpar = getparam(mask, mr, 0, needbuf);
				646
				647	dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
				648	spar.dir = mpar.dir = dpar.dir = dir;
				649
				650	/*
				651	* If the mask is purely boolean, we can convert from src to dst format
				652	* when we read src, and then just copy it to dst where the mask tells us to.
				653	* This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
				654	*
				655	* The computation is accomplished by assigning the function pointers as follows:
				656	* rdsrc - read and convert source into dst format in a buffer
				657	* rdmask - convert mask to bytes, set pointer to it
				658	* rddst - fill with pointer to real dst data, but do no reads
				659	* calc - copy src onto dst when mask says to.
				660	* wrdst - do nothing
				661	* This is slightly sleazy, since things aren't doing exactly what their names say,
				662	* but it avoids a fair amount of code duplication to make this a case here
				663	* rather than have a separate booldraw.
				664	*/
				665	//if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth);
				666	if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
				667	//if(drawdebug) iprint("boolcopy...");
				668	rdsrc = convfn(dst, &dpar, src, &spar);
				669	rddst = readptr;
				670	rdmask = readfn(mask);
				671	calc = boolcopyfn(dst, mask);
				672	wrdst = nullwrite;
				673	}else{
				674	/* usual alphadraw parameter fetching */
				675	rdsrc = readfn(src);
				676	rddst = readfn(dst);
				677	wrdst = writefn(dst);
				678	calc = alphacalc[op];
				679
				680	/*
				681	* If there is no alpha channel, we'll ask for a grey channel
				682	* and pretend it is the alpha.
				683	*/
				684	if(mask->flags&Falpha){
				685	rdmask = readalphafn(mask);
				686	mpar.alphaonly = 1;
				687	}else{
				688	mpar.greymaskcall = readfn(mask);
				689	mpar.convgrey = 1;
				690	rdmask = greymaskread;
				691
				692	/*
				693	* Should really be above, but then boolcopyfns would have
				694	* to deal with bit alignment, and I haven't written that.
				695	*
				696	* This is a common case for things like ellipse drawing.
				697	* When there's no alpha involved and the mask is boolean,
				698	* we can avoid all the division and multiplication.
				699	*/
				700	if(mask->chan == GREY1 && !(src->flags&Falpha))
				701	calc = boolcalc[op];
				702	else if(op == SoverD && !(src->flags&Falpha))
				703	calc = alphacalcS;
				704	}
				705	}
				706
				707	/*
				708	* If the image has a small enough repl rectangle,
				709	* we can just read each line once and cache them.
				710	*/
				711	if(spar.replcache){
				712	spar.replcall = rdsrc;
				713	rdsrc = replread;
				714	}
				715	if(mpar.replcache){
				716	mpar.replcall = rdmask;
				717	rdmask = replread;
				718	}
				719
				720	if(allocdrawbuf() < 0)
				721	return 0;
				722
				723	/*
				724	* Before we were saving only offsets from drawbuf in the parameter
				725	* structures; now that drawbuf has been grown to accomodate us,
				726	* we can fill in the pointers.
				727	*/
				728	spar.bufbase = drawbuf+spar.bufoff;
				729	mpar.bufbase = drawbuf+mpar.bufoff;
				730	dpar.bufbase = drawbuf+dpar.bufoff;
				731	spar.convbuf = drawbuf+spar.convbufoff;
				732
				733	if(dir == 1){
				734	starty = 0;
				735	endy = dy;
				736	}else{
				737	starty = dy-1;
				738	endy = -1;
				739	}
				740
				741	/*
				742	* srcy, masky, and dsty are offsets from the top of their
				743	* respective Rectangles. they need to be contained within
				744	* the rectangles, so clipy can keep them there without division.
				745	*/
				746	srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
				747	masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
				748	dsty = starty + r.min.y - dst->r.min.y;
				749
				750	assert(0 <= srcy && srcy < Dy(src->r));
				751	assert(0 <= masky && masky < Dy(mask->r));
				752	assert(0 <= dsty && dsty < Dy(dst->r));
				753
				754	for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
				755	clipy(src, &srcy);
				756	clipy(dst, &dsty);
				757	clipy(mask, &masky);
				758
				759	bsrc = rdsrc(&spar, spar.bufbase, srcy);
				760	DBG print("[");
				761	bmask = rdmask(&mpar, mpar.bufbase, masky);
				762	DBG print("]\n");
				763	bdst = rddst(&dpar, dpar.bufbase, dsty);
				764	DBG dumpbuf("src", bsrc, dx);
				765	DBG dumpbuf("mask", bmask, dx);
				766	DBG dumpbuf("dst", bdst, dx);
				767	bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
				768	wrdst(&dpar, dpar.bytermin+dsty*dpar.bwidth, bdst);
				769	}
				770
				771	return 1;
				772	}
				773	#undef DBG
				774
				775	static Buffer
				776	alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
				777	{
				778	USED(grey);
				779	USED(op);
				780	memset(bdst.rgba, 0, dx*bdst.delta);
				781	return bdst;
				782	}
				783
				784	static Buffer
				785	alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
				786	{
				787	Buffer obdst;
				788	int fd, sadelta;
				789	int i, sa, ma, q;
				790	u32int s, t;
				791
				792	obdst = bdst;
				793	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
				794	q = bsrc.delta == 4 && bdst.delta == 4;
				795
				796	for(i=0; i<dx; i++){
				797	sa = *bsrc.alpha;
				798	ma = *bmask.alpha;
				799	fd = MUL(sa, ma, t);
				800	if(op == DoutS)
				801	fd = 255-fd;
				802
				803	if(grey){
				804	bdst.grey = MUL(fd, bdst.grey, t);
				805	bsrc.grey += bsrc.delta;
				806	bdst.grey += bdst.delta;
				807	}else{
				808	if(q){
				809	bdst.rgba = MUL0123(fd, bdst.rgba, s, t);
				810	bsrc.rgba++;
				811	bdst.rgba++;
				812	bsrc.alpha += sadelta;
				813	bmask.alpha += bmask.delta;
				814	continue;
				815	}
				816	bdst.red = MUL(fd, bdst.red, t);
				817	bdst.grn = MUL(fd, bdst.grn, t);
				818	bdst.blu = MUL(fd, bdst.blu, t);
				819	bsrc.red += bsrc.delta;
				820	bsrc.blu += bsrc.delta;
				821	bsrc.grn += bsrc.delta;
				822	bdst.red += bdst.delta;
				823	bdst.blu += bdst.delta;
				824	bdst.grn += bdst.delta;
				825	}
				826	if(bdst.alpha != &ones){
				827	bdst.alpha = MUL(fd, bdst.alpha, t);
				828	bdst.alpha += bdst.delta;
				829	}
				830	bmask.alpha += bmask.delta;
				831	bsrc.alpha += sadelta;
				832	}
				833	return obdst;
				834	}
				835
				836	static Buffer
				837	alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
				838	{
				839	Buffer obdst;
				840	int fs, sadelta;
				841	int i, ma, da, q;
				842	u32int s, t;
				843
				844	obdst = bdst;
				845	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
				846	q = bsrc.delta == 4 && bdst.delta == 4;
				847
				848	for(i=0; i<dx; i++){
				849	ma = *bmask.alpha;
				850	da = *bdst.alpha;
				851	if(op == SoutD)
				852	da = 255-da;
				853	fs = ma;
				854	if(op != S)
				855	fs = MUL(fs, da, t);
				856
				857	if(grey){
				858	bdst.grey = MUL(fs, bsrc.grey, t);
				859	bsrc.grey += bsrc.delta;
				860	bdst.grey += bdst.delta;
				861	}else{
				862	if(q){
				863	bdst.rgba = MUL0123(fs, bsrc.rgba, s, t);
				864	bsrc.rgba++;
				865	bdst.rgba++;
				866	bmask.alpha += bmask.delta;
				867	bdst.alpha += bdst.delta;
				868	continue;
				869	}
				870	bdst.red = MUL(fs, bsrc.red, t);
				871	bdst.grn = MUL(fs, bsrc.grn, t);
				872	bdst.blu = MUL(fs, bsrc.blu, t);
				873	bsrc.red += bsrc.delta;
				874	bsrc.blu += bsrc.delta;
				875	bsrc.grn += bsrc.delta;
				876	bdst.red += bdst.delta;
				877	bdst.blu += bdst.delta;
				878	bdst.grn += bdst.delta;
				879	}
				880	if(bdst.alpha != &ones){
				881	bdst.alpha = MUL(fs, bsrc.alpha, t);
				882	bdst.alpha += bdst.delta;
				883	}
				884	bmask.alpha += bmask.delta;
				885	bsrc.alpha += sadelta;
				886	}
				887	return obdst;
				888	}
				889
				890	static Buffer
				891	alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
				892	{
				893	Buffer obdst;
				894	int fs, fd, sadelta;
				895	int i, sa, ma, da, q;
				896	u32int s, t, u, v;
				897
				898	obdst = bdst;
				899	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
				900	q = bsrc.delta == 4 && bdst.delta == 4;
				901
				902	for(i=0; i<dx; i++){
				903	sa = *bsrc.alpha;
				904	ma = *bmask.alpha;
				905	da = *bdst.alpha;
				906	if(op == SatopD)
				907	fs = MUL(ma, da, t);
				908	else
				909	fs = MUL(ma, 255-da, t);
				910	if(op == DoverS)
				911	fd = 255;
				912	else{
				913	fd = MUL(sa, ma, t);
				914	if(op != DatopS)
				915	fd = 255-fd;
				916	}
				917
				918	if(grey){
				919	bdst.grey = MUL(fs, bsrc.grey, s)+MUL(fd, *bdst.grey, t);
				920	bsrc.grey += bsrc.delta;
				921	bdst.grey += bdst.delta;
				922	}else{
				923	if(q){
				924	bdst.rgba = MUL0123(fs, bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
				925	bsrc.rgba++;
				926	bdst.rgba++;
				927	bsrc.alpha += sadelta;
				928	bmask.alpha += bmask.delta;
				929	bdst.alpha += bdst.delta;
				930	continue;
				931	}
				932	bdst.red = MUL(fs, bsrc.red, s)+MUL(fd, *bdst.red, t);
				933	bdst.grn = MUL(fs, bsrc.grn, s)+MUL(fd, *bdst.grn, t);
				934	bdst.blu = MUL(fs, bsrc.blu, s)+MUL(fd, *bdst.blu, t);
				935	bsrc.red += bsrc.delta;
				936	bsrc.blu += bsrc.delta;
				937	bsrc.grn += bsrc.delta;
				938	bdst.red += bdst.delta;
				939	bdst.blu += bdst.delta;
				940	bdst.grn += bdst.delta;
				941	}
				942	if(bdst.alpha != &ones){
				943	*bdst.alpha = MUL(fs, sa, s)+MUL(fd, da, t);
				944	bdst.alpha += bdst.delta;
				945	}
				946	bmask.alpha += bmask.delta;
				947	bsrc.alpha += sadelta;
				948	}
				949	return obdst;
				950	}
				951
				952	static Buffer
				953	alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
				954	{
				955	USED(dx);
				956	USED(grey);
				957	USED(op);
				958	return bdst;
				959	}
				960
				961	static Buffer
				962	alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
				963	{
				964	Buffer obdst;
				965	int fd, sadelta;
				966	int i, sa, ma, q;
				967	u32int s, t, u, v;
				968
				969	USED(op);
				970	obdst = bdst;
				971	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
				972	q = bsrc.delta == 4 && bdst.delta == 4;
				973
				974	for(i=0; i<dx; i++){
				975	sa = *bsrc.alpha;
				976	ma = *bmask.alpha;
				977	fd = 255-MUL(sa, ma, t);
				978
				979	if(grey){
				980	bdst.grey = MUL(ma, bsrc.grey, s)+MUL(fd, *bdst.grey, t);
				981	bsrc.grey += bsrc.delta;
				982	bdst.grey += bdst.delta;
				983	}else{
				984	if(q){
				985	bdst.rgba = MUL0123(ma, bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
				986	bsrc.rgba++;
				987	bdst.rgba++;
				988	bsrc.alpha += sadelta;
				989	bmask.alpha += bmask.delta;
				990	continue;
				991	}
				992	bdst.red = MUL(ma, bsrc.red, s)+MUL(fd, *bdst.red, t);
				993	bdst.grn = MUL(ma, bsrc.grn, s)+MUL(fd, *bdst.grn, t);
				994	bdst.blu = MUL(ma, bsrc.blu, s)+MUL(fd, *bdst.blu, t);
				995	bsrc.red += bsrc.delta;
				996	bsrc.blu += bsrc.delta;
				997	bsrc.grn += bsrc.delta;
				998	bdst.red += bdst.delta;
				999	bdst.blu += bdst.delta;
				1000	bdst.grn += bdst.delta;
				1001	}
				1002	if(bdst.alpha != &ones){
				1003	bdst.alpha = MUL(ma, sa, s)+MUL(fd, bdst.alpha, t);
				1004	bdst.alpha += bdst.delta;
				1005	}
				1006	bmask.alpha += bmask.delta;
				1007	bsrc.alpha += sadelta;
				1008	}
				1009	return obdst;
				1010	}
				1011
				1012	/*
				1013	not used yet
				1014	source and mask alpha 1
				1015	static Buffer
				1016	alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
				1017	{
				1018	Buffer obdst;
				1019	int i;
				1020
				1021	USED(op);
				1022	obdst = bdst;
				1023	if(bsrc.delta == bdst.delta){
				1024	memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
				1025	return obdst;
				1026	}
				1027	for(i=0; i<dx; i++){
				1028	if(grey){
				1029	bdst.grey = bsrc.grey;
				1030	bsrc.grey += bsrc.delta;
				1031	bdst.grey += bdst.delta;
				1032	}else{
				1033	bdst.red = bsrc.red;
				1034	bdst.grn = bsrc.grn;
				1035	bdst.blu = bsrc.blu;
				1036	bsrc.red += bsrc.delta;
				1037	bsrc.blu += bsrc.delta;
				1038	bsrc.grn += bsrc.delta;
				1039	bdst.red += bdst.delta;
				1040	bdst.blu += bdst.delta;
				1041	bdst.grn += bdst.delta;
				1042	}
				1043	if(bdst.alpha != &ones){
				1044	*bdst.alpha = 255;
				1045	bdst.alpha += bdst.delta;
				1046	}
				1047	}
				1048	return obdst;
				1049	}
				1050	*/
				1051
				1052	/* source alpha 1 */
				1053	static Buffer
				1054	alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
				1055	{
				1056	Buffer obdst;
				1057	int fd;
				1058	int i, ma;
				1059	u32int s, t;
				1060
				1061	USED(op);
				1062	obdst = bdst;
				1063
				1064	for(i=0; i<dx; i++){
				1065	ma = *bmask.alpha;
				1066	fd = 255-ma;
				1067
				1068	if(grey){
				1069	bdst.grey = MUL(ma, bsrc.grey, s)+MUL(fd, *bdst.grey, t);
				1070	bsrc.grey += bsrc.delta;
				1071	bdst.grey += bdst.delta;
				1072	}else{
				1073	bdst.red = MUL(ma, bsrc.red, s)+MUL(fd, *bdst.red, t);
				1074	bdst.grn = MUL(ma, bsrc.grn, s)+MUL(fd, *bdst.grn, t);
				1075	bdst.blu = MUL(ma, bsrc.blu, s)+MUL(fd, *bdst.blu, t);
				1076	bsrc.red += bsrc.delta;
				1077	bsrc.blu += bsrc.delta;
				1078	bsrc.grn += bsrc.delta;
				1079	bdst.red += bdst.delta;
				1080	bdst.blu += bdst.delta;
				1081	bdst.grn += bdst.delta;
				1082	}
				1083	if(bdst.alpha != &ones){
				1084	bdst.alpha = ma+MUL(fd, bdst.alpha, t);
				1085	bdst.alpha += bdst.delta;
				1086	}
				1087	bmask.alpha += bmask.delta;
				1088	}
				1089	return obdst;
				1090	}
				1091
				1092	static Buffer
				1093	boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
				1094	{
				1095	Buffer obdst;
				1096	int i, ma, zero;
				1097
				1098	obdst = bdst;
				1099
				1100	for(i=0; i<dx; i++){
				1101	ma = *bmask.alpha;
				1102	zero = ma ? op == DoutS : op == DinS;
				1103
				1104	if(grey){
				1105	if(zero)
				1106	*bdst.grey = 0;
				1107	bdst.grey += bdst.delta;
				1108	}else{
				1109	if(zero)
				1110	bdst.red = bdst.grn = *bdst.blu = 0;
				1111	bdst.red += bdst.delta;
				1112	bdst.blu += bdst.delta;
				1113	bdst.grn += bdst.delta;
				1114	}
				1115	bmask.alpha += bmask.delta;
				1116	if(bdst.alpha != &ones){
				1117	if(zero)
				1118	*bdst.alpha = 0;
				1119	bdst.alpha += bdst.delta;
				1120	}
				1121	}
				1122	return obdst;
				1123	}
				1124
				1125	static Buffer
				1126	boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
				1127	{
				1128	Buffer obdst;
				1129	int fs, fd;
				1130	int i, ma, da, zero;
				1131	u32int s, t;
				1132
				1133	obdst = bdst;
				1134	zero = !(op&1);
				1135
				1136	for(i=0; i<dx; i++){
				1137	ma = *bmask.alpha;
				1138	da = *bdst.alpha;
				1139	fs = da;
				1140	if(op&2)
				1141	fs = 255-da;
				1142	fd = 0;
				1143	if(op&4)
				1144	fd = 255;
				1145
				1146	if(grey){
				1147	if(ma)
				1148	bdst.grey = MUL(fs, bsrc.grey, s)+MUL(fd, *bdst.grey, t);
				1149	else if(zero)
				1150	*bdst.grey = 0;
				1151	bsrc.grey += bsrc.delta;
				1152	bdst.grey += bdst.delta;
				1153	}else{
				1154	if(ma){
				1155	bdst.red = MUL(fs, bsrc.red, s)+MUL(fd, *bdst.red, t);
				1156	bdst.grn = MUL(fs, bsrc.grn, s)+MUL(fd, *bdst.grn, t);
				1157	bdst.blu = MUL(fs, bsrc.blu, s)+MUL(fd, *bdst.blu, t);
				1158	}
				1159	else if(zero)
				1160	bdst.red = bdst.grn = *bdst.blu = 0;
				1161	bsrc.red += bsrc.delta;
				1162	bsrc.blu += bsrc.delta;
				1163	bsrc.grn += bsrc.delta;
				1164	bdst.red += bdst.delta;
				1165	bdst.blu += bdst.delta;
				1166	bdst.grn += bdst.delta;
				1167	}
				1168	bmask.alpha += bmask.delta;
				1169	if(bdst.alpha != &ones){
				1170	if(ma)
				1171	*bdst.alpha = fs+MUL(fd, da, t);
				1172	else if(zero)
				1173	*bdst.alpha = 0;
				1174	bdst.alpha += bdst.delta;
				1175	}
				1176	}
				1177	return obdst;
				1178	}
				1179
				1180	static Buffer
				1181	boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
				1182	{
				1183	Buffer obdst;
				1184	int i, ma, zero;
				1185
				1186	obdst = bdst;
				1187	zero = !(op&1);
				1188
				1189	for(i=0; i<dx; i++){
				1190	ma = *bmask.alpha;
				1191
				1192	if(grey){
				1193	if(ma)
				1194	bdst.grey = bsrc.grey;
				1195	else if(zero)
				1196	*bdst.grey = 0;
				1197	bsrc.grey += bsrc.delta;
				1198	bdst.grey += bdst.delta;
				1199	}else{
				1200	if(ma){
				1201	bdst.red = bsrc.red;
				1202	bdst.grn = bsrc.grn;
				1203	bdst.blu = bsrc.blu;
				1204	}
				1205	else if(zero)
				1206	bdst.red = bdst.grn = *bdst.blu = 0;
				1207	bsrc.red += bsrc.delta;
				1208	bsrc.blu += bsrc.delta;
				1209	bsrc.grn += bsrc.delta;
				1210	bdst.red += bdst.delta;
				1211	bdst.blu += bdst.delta;
				1212	bdst.grn += bdst.delta;
				1213	}
				1214	bmask.alpha += bmask.delta;
				1215	if(bdst.alpha != &ones){
				1216	if(ma)
				1217	*bdst.alpha = 255;
				1218	else if(zero)
				1219	*bdst.alpha = 0;
				1220	bdst.alpha += bdst.delta;
				1221	}
				1222	}
				1223	return obdst;
				1224	}
				1225	/*
				1226	* Replicated cached scan line read. Call the function listed in the Param,
				1227	* but cache the result so that for replicated images we only do the work once.
				1228	*/
				1229	static Buffer
				1230	replread(Param p, uchar s, int y)
				1231	{
				1232	Buffer *b;
				1233
				1234	USED(s);
				1235	b = &p->bcache[y];
				1236	if((p->bfilled & (1<<y)) == 0){
				1237	p->bfilled \|= 1<<y;
				1238	b = p->replcall(p, p->bufbase+yp->bufdelta, y);
				1239	}
				1240	return *b;
				1241	}
				1242
				1243	/*
				1244	* Alpha reading function that simply relabels the grey pointer.
				1245	*/
				1246	static Buffer
				1247	greymaskread(Param p, uchar buf, int y)
				1248	{
				1249	Buffer b;
				1250
				1251	b = p->greymaskcall(p, buf, y);
				1252	b.alpha = b.grey;
				1253	return b;
				1254	}
				1255
				1256	#define DBG if(0)
				1257	static Buffer
				1258	readnbit(Param p, uchar buf, int y)
				1259	{
				1260	Buffer b;
				1261	Memimage *img;
				1262	uchar repl, r, w, ow, bits;
				1263	int i, n, sh, depth, x, dx, npack, nbits;
				1264
				1265	b.rgba = (u32int*)buf;
				1266	b.grey = w = buf;
				1267	b.red = b.blu = b.grn = w;
				1268	b.alpha = &ones;
				1269	b.delta = 1;
				1270
				1271	dx = p->dx;
				1272	img = p->img;
				1273	depth = img->depth;
				1274	repl = &replbit[depth][0];
				1275	npack = 8/depth;
				1276	sh = 8-depth;
				1277
				1278	/* copy from p->r.min.x until end of repl rectangle */
				1279	x = p->r.min.x;
				1280	n = dx;
				1281	if(n > p->img->r.max.x - x)
				1282	n = p->img->r.max.x - x;
				1283
				1284	r = p->bytermin + y*p->bwidth;
				1285	DBG print("readnbit dx %d %p=%p+%d%d, r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
				1286	bits = *r++;
				1287	nbits = 8;
				1288	if(i=x&(npack-1)){
				1289	DBG print("throwaway %d...", i);
				1290	bits <<= depth*i;
				1291	nbits -= depth*i;
				1292	}
				1293	for(i=0; i<n; i++){
				1294	if(nbits == 0){
				1295	DBG print("(%.2ux)...", *r);
				1296	bits = *r++;
				1297	nbits = 8;
				1298	}
				1299	*w++ = repl[bits>>sh];
				1300	DBG print("bit %x...", repl[bits>>sh]);
				1301	bits <<= depth;
				1302	nbits -= depth;
				1303	}
				1304	dx -= n;
				1305	if(dx == 0)
				1306	return b;
				1307
				1308	assert(x+i == p->img->r.max.x);
				1309
				1310	/* copy from beginning of repl rectangle until where we were before. */
				1311	x = p->img->r.min.x;
				1312	n = dx;
				1313	if(n > p->r.min.x - x)
				1314	n = p->r.min.x - x;
				1315
				1316	r = p->bytey0s + y*p->bwidth;
				1317	DBG print("x=%d r=%p...", x, r);
				1318	bits = *r++;
				1319	nbits = 8;
				1320	if(i=x&(npack-1)){
				1321	bits <<= depth*i;
				1322	nbits -= depth*i;
				1323	}
				1324	DBG print("nbits=%d...", nbits);
				1325	for(i=0; i<n; i++){
				1326	if(nbits == 0){
				1327	bits = *r++;
				1328	nbits = 8;
				1329	}
				1330	*w++ = repl[bits>>sh];
				1331	DBG print("bit %x...", repl[bits>>sh]);
				1332	bits <<= depth;
				1333	nbits -= depth;
				1334	DBG print("bits %x nbits %d...", bits, nbits);
				1335	}
				1336	dx -= n;
				1337	if(dx == 0)
				1338	return b;
				1339
				1340	assert(dx > 0);
				1341	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
				1342	ow = buf;
				1343	while(dx--)
				1344	w++ = ow++;
				1345
				1346	return b;
				1347	}
				1348	#undef DBG
				1349
				1350	#define DBG if(0)
				1351	static void
				1352	writenbit(Param p, uchar w, Buffer src)
				1353	{
				1354	uchar *r;
				1355	u32int bits;
				1356	int i, sh, depth, npack, nbits, x, ex;
				1357
				1358	assert(src.grey != nil && src.delta == 1);
				1359
				1360	x = p->r.min.x;
				1361	ex = x+p->dx;
				1362	depth = p->img->depth;
				1363	npack = 8/depth;
				1364
				1365	i=x&(npack-1);
				1366	bits = i ? (w >> (8-depthi)) : 0;
				1367	nbits = depth*i;
				1368	sh = 8-depth;
				1369	r = src.grey;
				1370
				1371	for(; x<ex; x++){
				1372	bits <<= depth;
				1373	DBG print(" %x", *r);
				1374	bits \|= (*r++ >> sh);
				1375	nbits += depth;
				1376	if(nbits == 8){
				1377	*w++ = bits;
				1378	nbits = 0;
				1379	}
				1380	}
				1381
				1382	if(nbits){
				1383	sh = 8-nbits;
				1384	bits <<= sh;
				1385	bits \|= *w & ((1<<sh)-1);
				1386	*w = bits;
				1387	}
				1388	DBG print("\n");
				1389	return;
				1390	}
				1391	#undef DBG
				1392
				1393	static Buffer
				1394	readcmap(Param p, uchar buf, int y)
				1395	{
				1396	Buffer b;
				1397	int a, convgrey, copyalpha, dx, i, m;
				1398	uchar q, cmap, begin, end, r, w;
				1399
				1400	begin = p->bytey0s + y*p->bwidth;
				1401	r = p->bytermin + y*p->bwidth;
				1402	end = p->bytey0e + y*p->bwidth;
				1403	cmap = p->img->cmap->cmap2rgb;
				1404	convgrey = p->convgrey;
				1405	copyalpha = (p->img->flags&Falpha) ? 1 : 0;
				1406
				1407	w = buf;
				1408	dx = p->dx;
				1409	if(copyalpha){
				1410	b.alpha = buf++;
				1411	a = p->img->shift[CAlpha]/8;
				1412	m = p->img->shift[CMap]/8;
				1413	for(i=0; i<dx; i++){
				1414	*w++ = r[a];
				1415	q = cmap+r[m]*3;
				1416	r += 2;
				1417	if(r == end)
				1418	r = begin;
				1419	if(convgrey){
				1420	*w++ = RGB2K(q[0], q[1], q[2]);
				1421	}else{
				1422	w++ = q[2]; / blue */
				1423	w++ = q[1]; / green */
				1424	w++ = q[0]; / red */
				1425	}
				1426	}
				1427	}else{
				1428	b.alpha = &ones;
				1429	for(i=0; i<dx; i++){
				1430	q = cmap+r++3;
				1431	if(r == end)
				1432	r = begin;
				1433	if(convgrey){
				1434	*w++ = RGB2K(q[0], q[1], q[2]);
				1435	}else{
				1436	w++ = q[2]; / blue */
				1437	w++ = q[1]; / green */
				1438	w++ = q[0]; / red */
				1439	}
				1440	}
				1441	}
				1442
				1443	b.rgba = (u32int*)(buf-copyalpha);
				1444
				1445	if(convgrey){
				1446	b.grey = buf;
				1447	b.red = b.blu = b.grn = buf;
				1448	b.delta = 1+copyalpha;
				1449	}else{
				1450	b.blu = buf;
				1451	b.grn = buf+1;
				1452	b.red = buf+2;
				1453	b.grey = nil;
				1454	b.delta = 3+copyalpha;
				1455	}
				1456	return b;
				1457	}
				1458
				1459	static void
				1460	writecmap(Param p, uchar w, Buffer src)
				1461	{
				1462	uchar cmap, red, grn, blu;
				1463	int i, dx, delta;
				1464
				1465	cmap = p->img->cmap->rgb2cmap;
				1466
				1467	delta = src.delta;
				1468	red= src.red;
				1469	grn = src.grn;
				1470	blu = src.blu;
				1471
				1472	dx = p->dx;
				1473	for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
				1474	w++ = cmap[(red>>4)256+(grn>>4)16+(blu>>4)];
				1475	}
				1476
				1477	#define DBG if(0)
				1478	static Buffer
				1479	readbyte(Param p, uchar buf, int y)
				1480	{
				1481	Buffer b;
				1482	Memimage *img;
				1483	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
				1484	uchar begin, end, r, w, rrepl, grepl, brepl, arepl, *krepl;
				1485	uchar ured, ugrn, ublu;
				1486	u32int u;
				1487
				1488	img = p->img;
				1489	begin = p->bytey0s + y*p->bwidth;
				1490	r = p->bytermin + y*p->bwidth;
				1491	end = p->bytey0e + y*p->bwidth;
				1492
				1493	w = buf;
				1494	dx = p->dx;
				1495	nb = img->depth/8;
				1496
				1497	convgrey = p->convgrey; /* convert rgb to grey */
				1498	isgrey = img->flags&Fgrey;
				1499	alphaonly = p->alphaonly;
				1500	copyalpha = (img->flags&Falpha) ? 1 : 0;
				1501
				1502	DBG print("copyalpha %d alphaonly %d convgrey %d isgrey %d\n", copyalpha, alphaonly, convgrey, isgrey);
				1503	/* if we can, avoid processing everything */
				1504	if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
				1505	memset(&b, 0, sizeof b);
				1506	if(p->needbuf){
				1507	memmove(buf, r, dx*nb);
				1508	r = buf;
				1509	}
				1510	b.rgba = (u32int*)r;
				1511	if(copyalpha)
				1512	b.alpha = r+img->shift[CAlpha]/8;
				1513	else
				1514	b.alpha = &ones;
				1515	if(isgrey){
				1516	b.grey = r+img->shift[CGrey]/8;
				1517	b.red = b.grn = b.blu = b.grey;
				1518	}else{
				1519	b.red = r+img->shift[CRed]/8;
				1520	b.grn = r+img->shift[CGreen]/8;
				1521	b.blu = r+img->shift[CBlue]/8;
				1522	}
				1523	b.delta = nb;
				1524	return b;
				1525	}
				1526
				1527	DBG print("2\n");
				1528	rrepl = replbit[img->nbits[CRed]];
				1529	grepl = replbit[img->nbits[CGreen]];
				1530	brepl = replbit[img->nbits[CBlue]];
				1531	arepl = replbit[img->nbits[CAlpha]];
				1532	krepl = replbit[img->nbits[CGrey]];
				1533
				1534	for(i=0; i<dx; i++){
				1535	u = r[0] \| (r[1]<<8) \| (r[2]<<16) \| (r[3]<<24);
				1536	if(copyalpha) {
				1537	*w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
				1538	DBG print("a %x\n", w[-1]);
				1539	}
				1540
				1541	if(isgrey)
				1542	*w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
				1543	else if(!alphaonly){
				1544	ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
				1545	ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
				1546	ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
				1547	if(convgrey){
				1548	DBG print("g %x %x %x\n", ured, ugrn, ublu);
				1549	*w++ = RGB2K(ured, ugrn, ublu);
				1550	DBG print("%x\n", w[-1]);
				1551	}else{
				1552	*w++ = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
				1553	*w++ = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
				1554	*w++ = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
				1555	}
				1556	}
				1557	r += nb;
				1558	if(r == end)
				1559	r = begin;
				1560	}
				1561
				1562	b.alpha = copyalpha ? buf : &ones;
				1563	b.rgba = (u32int*)buf;
				1564	if(alphaonly){
				1565	b.red = b.grn = b.blu = b.grey = nil;
				1566	if(!copyalpha)
				1567	b.rgba = nil;
				1568	b.delta = 1;
				1569	}else if(isgrey \|\| convgrey){
				1570	b.grey = buf+copyalpha;
				1571	b.red = b.grn = b.blu = buf+copyalpha;
				1572	b.delta = copyalpha+1;
				1573	DBG print("alpha %x grey %x\n", b.alpha ? b.alpha : 0xFF, b.grey);
				1574	}else{
				1575	b.blu = buf+copyalpha;
				1576	b.grn = buf+copyalpha+1;
				1577	b.grey = nil;
				1578	b.red = buf+copyalpha+2;
				1579	b.delta = copyalpha+3;
				1580	}
				1581	return b;
				1582	}
				1583	#undef DBG
				1584
				1585	#define DBG if(0)
				1586	static void
				1587	writebyte(Param p, uchar w, Buffer src)
				1588	{
				1589	Memimage *img;
				1590	int i, isalpha, isgrey, nb, delta, dx, adelta;
				1591	uchar ff, red, grn, blu, grey, *alpha;
				1592	u32int u, mask;
				1593
				1594	img = p->img;
				1595
				1596	red = src.red;
				1597	grn = src.grn;
				1598	blu = src.blu;
				1599	alpha = src.alpha;
				1600	delta = src.delta;
				1601	grey = src.grey;
				1602	dx = p->dx;
				1603
				1604	nb = img->depth/8;
				1605	mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
				1606
				1607	isalpha = img->flags&Falpha;
				1608	isgrey = img->flags&Fgrey;
				1609	adelta = src.delta;
				1610
				1611	if(isalpha && (alpha == nil \|\| alpha == &ones)){
				1612	ff = 0xFF;
				1613	alpha = &ff;
				1614	adelta = 0;
				1615	}
				1616
				1617	for(i=0; i<dx; i++){
				1618	u = w[0] \| (w[1]<<8) \| (w[2]<<16) \| (w[3]<<24);
				1619	DBG print("u %.8lux...", u);
				1620	u &= mask;
				1621	DBG print("&mask %.8lux...", u);
				1622	if(isgrey){
				1623	u \|= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
				1624	DBG print("\|grey %.8lux...", u);
				1625	grey += delta;
				1626	}else{
				1627	u \|= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
				1628	u \|= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
				1629	u \|= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
				1630	red += delta;
				1631	grn += delta;
				1632	blu += delta;
				1633	DBG print("\|rgb %.8lux...", u);
				1634	}
				1635
				1636	if(isalpha){
				1637	u \|= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
				1638	alpha += adelta;
				1639	DBG print("\|alpha %.8lux...", u);
				1640	}
				1641
				1642	w[0] = u;
				1643	w[1] = u>>8;
				1644	w[2] = u>>16;
				1645	w[3] = u>>24;
				1646	w += nb;
				1647	}
				1648	}
				1649	#undef DBG
				1650
				1651	static Readfn*
				1652	readfn(Memimage *img)
				1653	{
				1654	if(img->depth < 8)
				1655	return readnbit;
				1656	if(img->nbits[CMap] == 8)
				1657	return readcmap;
				1658	return readbyte;
				1659	}
				1660
				1661	static Readfn*
				1662	readalphafn(Memimage *m)
				1663	{
				1664	USED(m);
				1665	return readbyte;
				1666	}
				1667
				1668	static Writefn*
				1669	writefn(Memimage *img)
				1670	{
				1671	if(img->depth < 8)
				1672	return writenbit;
				1673	if(img->chan == CMAP8)
				1674	return writecmap;
				1675	return writebyte;
				1676	}
				1677
				1678	static void
				1679	nullwrite(Param p, uchar s, Buffer b)
				1680	{
				1681	USED(p);
				1682	USED(s);
				1683	}
				1684
				1685	static Buffer
				1686	readptr(Param p, uchar s, int y)
				1687	{
				1688	Buffer b;
				1689	uchar *q;
				1690
				1691	USED(s);
				1692	q = p->bytermin + y*p->bwidth;
				1693	b.red = q; /* ptr to data */
				1694	b.grn = b.blu = b.grey = b.alpha = nil;
				1695	b.rgba = (u32int*)q;
				1696	b.delta = p->img->depth/8;
				1697	return b;
				1698	}
				1699
				1700	static Buffer
				1701	boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
				1702	{
				1703	USED(i);
				1704	USED(o);
				1705	memmove(bdst.red, bsrc.red, dx*bdst.delta);
				1706	return bdst;
				1707	}
				1708
				1709	static Buffer
				1710	boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
				1711	{
				1712	uchar m, r, w, ew;
				1713
				1714	USED(i);
				1715	USED(o);
				1716	m = bmask.grey;
				1717	w = bdst.red;
				1718	r = bsrc.red;
				1719	ew = w+dx;
				1720	for(; w < ew; w++,r++)
				1721	if(*m++)
				1722	w = r;
				1723	return bdst; /* not used */
				1724	}
				1725
				1726	static Buffer
				1727	boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
				1728	{
				1729	uchar *m;
				1730	ushort r, w, *ew;
				1731
				1732	USED(i);
				1733	USED(o);
				1734	m = bmask.grey;
				1735	w = (ushort*)bdst.red;
				1736	r = (ushort*)bsrc.red;
				1737	ew = w+dx;
				1738	for(; w < ew; w++,r++)
				1739	if(*m++)
				1740	w = r;
				1741	return bdst; /* not used */
				1742	}
				1743
				1744	static Buffer
				1745	boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
				1746	{
				1747	uchar *m;
				1748	uchar r, w, *ew;
				1749
				1750	USED(i);
				1751	USED(o);
				1752	m = bmask.grey;
				1753	w = bdst.red;
				1754	r = bsrc.red;
				1755	ew = w+dx*3;
				1756	while(w < ew){
				1757	if(*m++){
				1758	w++ = r++;
				1759	w++ = r++;
				1760	w++ = r++;
				1761	}else{
				1762	w += 3;
				1763	r += 3;
				1764	}
				1765	}
				1766	return bdst; /* not used */
				1767	}
				1768
				1769	static Buffer
				1770	boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
				1771	{
				1772	uchar *m;
				1773	u32int r, w, *ew;
				1774
				1775	USED(i);
				1776	USED(o);
				1777	m = bmask.grey;
				1778	w = (u32int*)bdst.red;
				1779	r = (u32int*)bsrc.red;
				1780	ew = w+dx;
				1781	for(; w < ew; w++,r++)
				1782	if(*m++)
				1783	w = r;
				1784	return bdst; /* not used */
				1785	}
				1786
				1787	static Buffer
				1788	genconv(Param p, uchar buf, int y)
				1789	{
				1790	Buffer b;
				1791	int nb;
				1792	uchar r, w, *ew;
				1793
				1794	/* read from source into RGB format in convbuf */
				1795	b = p->convreadcall(p, p->convbuf, y);
				1796
				1797	/* write RGB format into dst format in buf */
				1798	p->convwritecall(p->convdpar, buf, b);
				1799
				1800	if(p->convdx){
				1801	nb = p->convdpar->img->depth/8;
				1802	r = buf;
				1803	w = buf+nb*p->dx;
				1804	ew = buf+nb*p->convdx;
				1805	while(w<ew)
				1806	w++ = r++;
				1807	}
				1808
				1809	b.red = buf;
				1810	b.blu = b.grn = b.grey = b.alpha = nil;
				1811	b.rgba = (u32int*)buf;
				1812	b.delta = 0;
				1813
				1814	return b;
				1815	}
				1816
				1817	static Readfn*
				1818	convfn(Memimage dst, Param dpar, Memimage src, Param spar)
				1819	{
				1820	if(dst->chan == src->chan && !(src->flags&Frepl)){
				1821	//if(drawdebug) iprint("readptr...");
				1822	return readptr;
				1823	}
				1824
				1825	if(dst->chan==CMAP8 && (src->chan==GREY1\|\|src->chan==GREY2\|\|src->chan==GREY4)){
				1826	/* cheat because we know the replicated value is exactly the color map entry. */
				1827	//if(drawdebug) iprint("Readnbit...");
				1828	return readnbit;
				1829	}
				1830
				1831	spar->convreadcall = readfn(src);
				1832	spar->convwritecall = writefn(dst);
				1833	spar->convdpar = dpar;
				1834
				1835	/* allocate a conversion buffer */
				1836	spar->convbufoff = ndrawbuf;
				1837	ndrawbuf += spar->dx*4;
				1838
				1839	if(spar->dx > Dx(spar->img->r)){
				1840	spar->convdx = spar->dx;
				1841	spar->dx = Dx(spar->img->r);
				1842	}
				1843
				1844	//if(drawdebug) iprint("genconv...");
				1845	return genconv;
				1846	}
				1847
				1848	/*
				1849	* Do NOT call this directly. pixelbits is a wrapper
				1850	* around this that fetches the bits from the X server
				1851	* when necessary.
				1852	*/
				1853	u32int
				1854	_pixelbits(Memimage *i, Point pt)
				1855	{
				1856	uchar *p;
				1857	u32int val;
				1858	int off, bpp, npack;
				1859
				1860	val = 0;
				1861	p = byteaddr(i, pt);
				1862	switch(bpp=i->depth){
				1863	case 1:
				1864	case 2:
				1865	case 4:
				1866	npack = 8/bpp;
				1867	off = pt.x%npack;
				1868	val = p[0] >> bpp*(npack-1-off);
				1869	val &= (1<<bpp)-1;
				1870	break;
				1871	case 8:
				1872	val = p[0];
				1873	break;
				1874	case 16:
				1875	val = p[0]\|(p[1]<<8);
				1876	break;
				1877	case 24:
				1878	val = p[0]\|(p[1]<<8)\|(p[2]<<16);
				1879	break;
				1880	case 32:
				1881	val = p[0]\|(p[1]<<8)\|(p[2]<<16)\|(p[3]<<24);
				1882	break;
				1883	}
				1884	while(bpp<32){
				1885	val \|= val<<bpp;
				1886	bpp *= 2;
				1887	}
				1888	return val;
				1889	}
				1890
				1891	static Calcfn*
				1892	boolcopyfn(Memimage img, Memimage mask)
				1893	{
				1894	if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
				1895	return boolmemmove;
				1896
				1897	switch(img->depth){
				1898	case 8:
				1899	return boolcopy8;
				1900	case 16:
				1901	return boolcopy16;
				1902	case 24:
				1903	return boolcopy24;
				1904	case 32:
				1905	return boolcopy32;
				1906	default:
				1907	assert(0 /* boolcopyfn */);
				1908	}
rsc	be22ae2	2004-03-26 01:59:35 +0000	[diff] [blame]	1909	return 0;
rsc	76193d7	2003-09-30 17:47:42 +0000	[diff] [blame]	1910	}
				1911
				1912	/*
				1913	* Optimized draw for filling and scrolling; uses memset and memmove.
				1914	*/
				1915	static void
				1916	memsets(void *vp, ushort val, int n)
				1917	{
				1918	ushort p, ep;
				1919
				1920	p = vp;
				1921	ep = p+n;
				1922	while(p<ep)
				1923	*p++ = val;
				1924	}
				1925
				1926	static void
				1927	memsetl(void *vp, u32int val, int n)
				1928	{
				1929	u32int p, ep;
				1930
				1931	p = vp;
				1932	ep = p+n;
				1933	while(p<ep)
				1934	*p++ = val;
				1935	}
				1936
				1937	static void
				1938	memset24(void *vp, u32int val, int n)
				1939	{
				1940	uchar p, ep;
				1941	uchar a,b,c;
				1942
				1943	p = vp;
				1944	ep = p+3*n;
				1945	a = val;
				1946	b = val>>8;
				1947	c = val>>16;
				1948	while(p<ep){
				1949	*p++ = a;
				1950	*p++ = b;
				1951	*p++ = c;
				1952	}
				1953	}
				1954
				1955	u32int
				1956	_imgtorgba(Memimage *img, u32int val)
				1957	{
				1958	uchar r, g, b, a;
				1959	int nb, ov, v;
				1960	u32int chan;
				1961	uchar *p;
				1962
				1963	a = 0xFF;
				1964	r = g = b = 0xAA; /* garbage */
				1965	for(chan=img->chan; chan; chan>>=8){
				1966	nb = NBITS(chan);
				1967	ov = v = val&((1<<nb)-1);
				1968	val >>= nb;
				1969
				1970	while(nb < 8){
				1971	v \|= v<<nb;
				1972	nb *= 2;
				1973	}
				1974	v >>= (nb-8);
				1975
				1976	switch(TYPE(chan)){
				1977	case CRed:
				1978	r = v;
				1979	break;
				1980	case CGreen:
				1981	g = v;
				1982	break;
				1983	case CBlue:
				1984	b = v;
				1985	break;
				1986	case CAlpha:
				1987	a = v;
				1988	break;
				1989	case CGrey:
				1990	r = g = b = v;
				1991	break;
				1992	case CMap:
				1993	p = img->cmap->cmap2rgb+3*ov;
				1994	r = *p++;
				1995	g = *p++;
				1996	b = *p;
				1997	break;
				1998	}
				1999	}
				2000	return (r<<24)\|(g<<16)\|(b<<8)\|a;
				2001	}
				2002
				2003	u32int
				2004	_rgbatoimg(Memimage *img, u32int rgba)
				2005	{
				2006	u32int chan;
				2007	int d, nb;
				2008	u32int v;
				2009	uchar *p, r, g, b, a, m;
				2010
				2011	v = 0;
				2012	r = rgba>>24;
				2013	g = rgba>>16;
				2014	b = rgba>>8;
				2015	a = rgba;
				2016	d = 0;
				2017	for(chan=img->chan; chan; chan>>=8){
				2018	nb = NBITS(chan);
				2019	switch(TYPE(chan)){
				2020	case CRed:
				2021	v \|= (r>>(8-nb))<<d;
				2022	break;
				2023	case CGreen:
				2024	v \|= (g>>(8-nb))<<d;
				2025	break;
				2026	case CBlue:
				2027	v \|= (b>>(8-nb))<<d;
				2028	break;
				2029	case CAlpha:
				2030	v \|= (a>>(8-nb))<<d;
				2031	break;
				2032	case CMap:
				2033	p = img->cmap->rgb2cmap;
				2034	m = p[(r>>4)256+(g>>4)16+(b>>4)];
				2035	v \|= (m>>(8-nb))<<d;
				2036	break;
				2037	case CGrey:
				2038	m = RGB2K(r,g,b);
				2039	v \|= (m>>(8-nb))<<d;
				2040	break;
				2041	}
				2042	d += nb;
				2043	}
				2044	// print("rgba2img %.8lux = %.lux\n", rgba, 2d/8, v);
				2045	return v;
				2046	}
				2047
				2048	#define DBG if(0)
				2049	static int
				2050	memoptdraw(Memdrawparam *par)
				2051	{
				2052	int m, y, dy, dx, op;
				2053	u32int v;
				2054	Memimage *src;
				2055	Memimage *dst;
				2056
				2057	dx = Dx(par->r);
				2058	dy = Dy(par->r);
				2059	src = par->src;
				2060	dst = par->dst;
				2061	op = par->op;
				2062
				2063	DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
				2064	/*
				2065	* If we have an opaque mask and source is one opaque pixel we can convert to the
				2066	* destination format and just replicate with memset.
				2067	*/
				2068	m = Simplesrc\|Simplemask\|Fullmask;
				2069	if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S \|\| op == SoverD)){
				2070	uchar *dp, p[4];
				2071	int d, dwid, ppb, np, nb;
				2072	uchar lm, rm;
				2073
				2074	DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
				2075	dwid = dst->width*sizeof(u32int);
				2076	dp = byteaddr(dst, par->r.min);
				2077	v = par->sdval;
				2078	DBG print("sdval %lud, depth %d\n", v, dst->depth);
				2079	switch(dst->depth){
				2080	case 1:
				2081	case 2:
				2082	case 4:
				2083	for(d=dst->depth; d<8; d*=2)
				2084	v \|= (v<<d);
				2085	ppb = 8/dst->depth; /* pixels per byte */
				2086	m = ppb-1;
				2087	/* left edge */
				2088	np = par->r.min.x&m; /* no. pixels unused on left side of word */
				2089	dx -= (ppb-np);
				2090	nb = 8 - np * dst->depth; /* no. bits used on right side of word */
				2091	lm = (1<<nb)-1;
				2092	DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);
				2093
				2094	/* right edge */
				2095	np = par->r.max.x&m; /* no. pixels used on left side of word */
				2096	dx -= np;
				2097	nb = 8 - np * dst->depth; /* no. bits unused on right side of word */
				2098	rm = ~((1<<nb)-1);
				2099	DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);
				2100
				2101	DBG print("dx %d Dx %d\n", dx, Dx(par->r));
				2102	/* lm, rm are masks that are 1 where we should touch the bits */
				2103	if(dx < 0){ /* just one byte */
				2104	lm &= rm;
				2105	for(y=0; y<dy; y++, dp+=dwid)
				2106	dp ^= (v ^ dp) & lm;
				2107	}else if(dx == 0){ /* no full bytes */
				2108	if(lm)
				2109	dwid--;
				2110
				2111	for(y=0; y<dy; y++, dp+=dwid){
				2112	if(lm){
				2113	DBG print("dp %p v %lux lm %ux (v ^ dp) & lm %lux\n", dp, v, lm, (v^dp)&lm);
				2114	dp ^= (v ^ dp) & lm;
				2115	dp++;
				2116	}
				2117	dp ^= (v ^ dp) & rm;
				2118	}
				2119	}else{ /* full bytes in middle */
				2120	dx /= ppb;
				2121	if(lm)
				2122	dwid--;
				2123	dwid -= dx;
				2124
				2125	for(y=0; y<dy; y++, dp+=dwid){
				2126	if(lm){
				2127	dp ^= (v ^ dp) & lm;
				2128	dp++;
				2129	}
				2130	memset(dp, v, dx);
				2131	dp += dx;
				2132	dp ^= (v ^ dp) & rm;
				2133	}
				2134	}
				2135	return 1;
				2136	case 8:
				2137	for(y=0; y<dy; y++, dp+=dwid)
				2138	memset(dp, v, dx);
				2139	return 1;
				2140	case 16:
				2141	p[0] = v; /* make little endian */
				2142	p[1] = v>>8;
				2143	v = (ushort)p;
				2144	DBG print("dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n",
				2145	dp, dx, dy, dwid);
				2146	for(y=0; y<dy; y++, dp+=dwid)
				2147	memsets(dp, v, dx);
				2148	return 1;
				2149	case 24:
				2150	for(y=0; y<dy; y++, dp+=dwid)
				2151	memset24(dp, v, dx);
				2152	return 1;
				2153	case 32:
				2154	p[0] = v; /* make little endian */
				2155	p[1] = v>>8;
				2156	p[2] = v>>16;
				2157	p[3] = v>>24;
				2158	v = (u32int)p;
				2159	for(y=0; y<dy; y++, dp+=dwid)
				2160	memsetl(dp, v, dx);
				2161	return 1;
				2162	default:
				2163	assert(0 /* bad dest depth in memoptdraw */);
				2164	}
				2165	}
				2166
				2167	/*
				2168	* If no source alpha, an opaque mask, we can just copy the
				2169	* source onto the destination. If the channels are the same and
				2170	* the source is not replicated, memmove suffices.
				2171	*/
				2172	m = Simplemask\|Fullmask;
				2173	if((par->state&(m\|Replsrc))==m && src->depth >= 8
				2174	&& src->chan == dst->chan && !(src->flags&Falpha) && (op == S \|\| op == SoverD)){
				2175	uchar sp, dp;
				2176	long swid, dwid, nb;
				2177	int dir;
				2178
				2179	if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
				2180	dir = -1;
				2181	else
				2182	dir = 1;
				2183
				2184	swid = src->width*sizeof(u32int);
				2185	dwid = dst->width*sizeof(u32int);
				2186	sp = byteaddr(src, par->sr.min);
				2187	dp = byteaddr(dst, par->r.min);
				2188	if(dir == -1){
				2189	sp += (dy-1)*swid;
				2190	dp += (dy-1)*dwid;
				2191	swid = -swid;
				2192	dwid = -dwid;
				2193	}
				2194	nb = (dx*src->depth)/8;
				2195	for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
				2196	memmove(dp, sp, nb);
				2197	return 1;
				2198	}
				2199
				2200	/*
				2201	* If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
				2202	* they're all bit aligned, we can just use bit operators. This happens
				2203	* when we're manipulating boolean masks, e.g. in the arc code.
				2204	*/
				2205	if((par->state&(Simplemask\|Simplesrc\|Replmask\|Replsrc))==0
				2206	&& dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1
				2207	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
				2208	uchar sp, dp, *mp;
				2209	uchar lm, rm;
				2210	long swid, dwid, mwid;
				2211	int i, x, dir;
				2212
				2213	sp = byteaddr(src, par->sr.min);
				2214	dp = byteaddr(dst, par->r.min);
				2215	mp = byteaddr(par->mask, par->mr.min);
				2216	swid = src->width*sizeof(u32int);
				2217	dwid = dst->width*sizeof(u32int);
				2218	mwid = par->mask->width*sizeof(u32int);
				2219
				2220	if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
				2221	dir = -1;
				2222	}else
				2223	dir = 1;
				2224
				2225	lm = 0xFF>>(par->r.min.x&7);
				2226	rm = 0xFF<<(8-(par->r.max.x&7));
				2227	dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
				2228
				2229	if(dx < 0){ /* one byte wide */
				2230	lm &= rm;
				2231	if(dir == -1){
				2232	dp += dwid*(dy-1);
				2233	sp += swid*(dy-1);
				2234	mp += mwid*(dy-1);
				2235	dwid = -dwid;
				2236	swid = -swid;
				2237	mwid = -mwid;
				2238	}
				2239	for(y=0; y<dy; y++){
				2240	dp ^= (dp ^ sp) & mp & lm;
				2241	dp += dwid;
				2242	sp += swid;
				2243	mp += mwid;
				2244	}
				2245	return 1;
				2246	}
				2247
				2248	dx /= 8;
				2249	if(dir == 1){
				2250	i = (lm!=0)+dx+(rm!=0);
				2251	mwid -= i;
				2252	swid -= i;
				2253	dwid -= i;
				2254	for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
				2255	if(lm){
				2256	dp ^= (dp ^ sp++) & mp++ & lm;
				2257	dp++;
				2258	}
				2259	for(x=0; x<dx; x++){
				2260	dp ^= (dp ^ sp++) & mp++;
				2261	dp++;
				2262	}
				2263	if(rm){
				2264	dp ^= (dp ^ sp++) & mp++ & rm;
				2265	dp++;
				2266	}
				2267	}
				2268	return 1;
				2269	}else{
				2270	/* dir == -1 */
				2271	i = (lm!=0)+dx+(rm!=0);
				2272	dp += dwid*(dy-1)+i-1;
				2273	sp += swid*(dy-1)+i-1;
				2274	mp += mwid*(dy-1)+i-1;
				2275	dwid = -dwid+i;
				2276	swid = -swid+i;
				2277	mwid = -mwid+i;
				2278	for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
				2279	if(rm){
				2280	dp ^= (dp ^ sp--) & mp-- & rm;
				2281	dp--;
				2282	}
				2283	for(x=0; x<dx; x++){
				2284	dp ^= (dp ^ sp--) & mp--;
				2285	dp--;
				2286	}
				2287	if(lm){
				2288	dp ^= (dp ^ sp--) & mp-- & lm;
				2289	dp--;
				2290	}
				2291	}
				2292	}
				2293	return 1;
				2294	}
				2295	return 0;
				2296	}
				2297	#undef DBG
				2298
				2299	/*
				2300	* Boolean character drawing.
				2301	* Solid opaque color through a 1-bit greyscale mask.
				2302	*/
				2303	#define DBG if(0)
				2304	static int
				2305	chardraw(Memdrawparam *par)
				2306	{
				2307	u32int bits;
				2308	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
				2309	u32int v, maskwid, dstwid;
				2310	uchar wp, rp, q, wc;
				2311	ushort *ws;
				2312	u32int *wl;
				2313	uchar sp[4];
				2314	Rectangle r, mr;
				2315	Memimage mask, src, *dst;
				2316
				2317	if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
				2318	par->mask->flags, par->mask->depth, par->src->flags,
				2319	Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
				2320
				2321	mask = par->mask;
				2322	src = par->src;
				2323	dst = par->dst;
				2324	r = par->r;
				2325	mr = par->mr;
				2326	op = par->op;
				2327
				2328	if((par->state&(Replsrc\|Simplesrc\|Replmask)) != (Replsrc\|Simplesrc)
				2329	\|\| mask->depth != 1 \|\| src->flags&Falpha \|\| dst->depth<8 \|\| dst->data==src->data
				2330	\|\| op != SoverD)
				2331	return 0;
				2332
				2333	//if(drawdebug) iprint("chardraw...");
				2334
				2335	depth = mask->depth;
				2336	maskwid = mask->width*sizeof(u32int);
				2337	rp = byteaddr(mask, mr.min);
				2338	npack = 8/depth;
				2339	bsh = (mr.min.x % npack) * depth;
				2340
				2341	wp = byteaddr(dst, r.min);
				2342	dstwid = dst->width*sizeof(u32int);
				2343	DBG print("bsh %d\n", bsh);
				2344	dy = Dy(r);
				2345	dx = Dx(r);
				2346
				2347	ddepth = dst->depth;
				2348
				2349	/*
				2350	* for loop counts from bsh to bsh+dx
				2351	*
				2352	* we want the bottom bits to be the amount
				2353	* to shift the pixels down, so for n≡0 (mod 8) we want
				2354	* bottom bits 7. for n≡1, 6, etc.
				2355	* the bits come from -n-1.
				2356	*/
				2357
				2358	bx = -bsh-1;
				2359	ex = -bsh-1-dx;
				2360	SET(bits);
				2361	v = par->sdval;
				2362
				2363	/* make little endian */
				2364	sp[0] = v;
				2365	sp[1] = v>>8;
				2366	sp[2] = v>>16;
				2367	sp[3] = v>>24;
				2368
				2369	//print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]);
				2370	for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
				2371	q = rp;
				2372	if(bsh)
				2373	bits = *q++;
				2374	switch(ddepth){
				2375	case 8:
				2376	//if(drawdebug) iprint("8loop...");
				2377	wc = wp;
				2378	for(x=bx; x>ex; x--, wc++){
				2379	i = x&7;
				2380	if(i == 8-1)
				2381	bits = *q++;
				2382	DBG print("bits %lux sh %d...", bits, i);
				2383	if((bits>>i)&1)
				2384	*wc = v;
				2385	}
				2386	break;
				2387	case 16:
				2388	ws = (ushort*)wp;
				2389	v = (ushort)sp;
				2390	for(x=bx; x>ex; x--, ws++){
				2391	i = x&7;
				2392	if(i == 8-1)
				2393	bits = *q++;
				2394	DBG print("bits %lux sh %d...", bits, i);
				2395	if((bits>>i)&1)
				2396	*ws = v;
				2397	}
				2398	break;
				2399	case 24:
				2400	wc = wp;
				2401	for(x=bx; x>ex; x--, wc+=3){
				2402	i = x&7;
				2403	if(i == 8-1)
				2404	bits = *q++;
				2405	DBG print("bits %lux sh %d...", bits, i);
				2406	if((bits>>i)&1){
				2407	wc[0] = sp[0];
				2408	wc[1] = sp[1];
				2409	wc[2] = sp[2];
				2410	}
				2411	}
				2412	break;
				2413	case 32:
				2414	wl = (u32int*)wp;
				2415	v = (u32int)sp;
				2416	for(x=bx; x>ex; x--, wl++){
				2417	i = x&7;
				2418	if(i == 8-1)
				2419	bits = *q++;
				2420	DBG iprint("bits %lux sh %d...", bits, i);
				2421	if((bits>>i)&1)
				2422	*wl = v;
				2423	}
				2424	break;
				2425	}
				2426	}
				2427
				2428	DBG print("\n");
				2429	return 1;
				2430	}
				2431	#undef DBG
				2432
				2433
				2434	/*
				2435	* Fill entire byte with replicated (if necessary) copy of source pixel,
				2436	* assuming destination ldepth is >= source ldepth.
				2437	*
				2438	* This code is just plain wrong for >8bpp.
				2439	*
				2440	u32int
				2441	membyteval(Memimage *src)
				2442	{
				2443	int i, val, bpp;
				2444	uchar uc;
				2445
				2446	unloadmemimage(src, src->r, &uc, 1);
				2447	bpp = src->depth;
				2448	uc <<= (src->r.min.x&(7/src->depth))*src->depth;
				2449	uc &= ~(0xFF>>bpp);
				2450	* pixel value is now in high part of byte. repeat throughout byte
				2451	val = uc;
				2452	for(i=bpp; i<8; i<<=1)
				2453	val \|= val>>i;
				2454	return val;
				2455	}
				2456	*
				2457	*/
				2458
				2459	void
				2460	_memfillcolor(Memimage *i, u32int val)
				2461	{
				2462	u32int bits;
				2463	int d, y;
				2464	uchar p[4];
				2465
				2466	if(val == DNofill)
				2467	return;
				2468
				2469	bits = _rgbatoimg(i, val);
				2470	switch(i->depth){
				2471	case 24: /* 24-bit images suck */
				2472	for(y=i->r.min.y; y<i->r.max.y; y++)
				2473	memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
				2474	break;
				2475	default: /* 1, 2, 4, 8, 16, 32 */
				2476	for(d=i->depth; d<32; d*=2)
				2477	bits = (bits << d) \| bits;
				2478	p[0] = bits; /* make little endian */
				2479	p[1] = bits>>8;
				2480	p[2] = bits>>16;
				2481	p[3] = bits>>24;
				2482	bits = (u32int)p;
				2483	memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
				2484	break;
				2485	}
				2486	}
				2487