Blame - src/libflate/deflate.c - plan9

blob: ceaa77f4407aa83be009ae00d7d9c4b4d05574ba [file] [log] [blame]

rsc	b6afd33	2003-11-23 18:19:18 +0000	[diff] [blame]	1	#include <u.h>
				2	#include <libc.h>
				3	#include <flate.h>
				4
				5	typedef struct Chain Chain;
				6	typedef struct Chains Chains;
				7	typedef struct Dyncode Dyncode;
				8	typedef struct Huff Huff;
				9	typedef struct LZblock LZblock;
				10	typedef struct LZstate LZstate;
				11
				12	enum
				13	{
				14	/*
				15	* deflate format paramaters
				16	*/
				17	DeflateUnc = 0, /* uncompressed block */
				18	DeflateFix = 1, /* fixed huffman codes */
				19	DeflateDyn = 2, /* dynamic huffman codes */
				20
				21	DeflateEob = 256, /* end of block code in lit/len book */
				22	DeflateMaxBlock = 641024-1, / maximum size of uncompressed block */
				23
				24	DeflateMaxExp = 10, /* maximum expansion for a block */
				25
				26	LenStart = 257, /* start of length codes in litlen */
				27	Nlitlen = 288, /* number of litlen codes */
				28	Noff = 30, /* number of offset codes */
				29	Nclen = 19, /* number of codelen codes */
				30
				31	MaxOff = 32*1024,
				32	MinMatch = 3, /* shortest match possible */
				33	MaxMatch = 258, /* longest match possible */
				34
				35	/*
				36	* huffman code paramaters
				37	*/
				38	MaxLeaf = Nlitlen,
				39	MaxHuffBits = 16, /* max bits in a huffman code */
				40	ChainMem = 2 * (MaxHuffBits - 1) * MaxHuffBits,
				41
				42	/*
				43	* coding of the lz parse
				44	*/
				45	LenFlag = 1 << 3,
				46	LenShift = 4, /* leaves enough space for MinMatchMaxOff */
				47	MaxLitRun = LenFlag - 1,
				48
				49	/*
				50	* internal lz paramaters
				51	*/
				52	DeflateOut = 4096, /* output buffer size */
				53	BlockSize = 8192, /* attempted input read quanta */
				54	DeflateBlock = DeflateMaxBlock & ~(BlockSize - 1),
				55	MinMatchMaxOff = 4096, /* max profitable offset for small match;
				56	* assumes 8 bits for len, 5+10 for offset
				57	* DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS
				58	*/
				59	HistSlop = 512, /* must be at lead MaxMatch */
				60	HistBlock = 64*1024,
				61	HistSize = HistBlock + HistSlop,
				62
				63	HashLog = 13,
				64	HashSize = 1<<HashLog,
				65
				66	MaxOffCode = 256, /* biggest offset looked up in direct table */
				67
				68	EstLitBits = 8,
				69	EstLenBits = 4,
rsc	cbeb0b2	2006-04-01 19:24:03 +0000	[diff] [blame]	70	EstOffBits = 5
rsc	b6afd33	2003-11-23 18:19:18 +0000	[diff] [blame]	71	};
				72
				73	/*
				74	* knuth vol. 3 multiplicative hashing
				75	* each byte x chosen according to rules
				76	* 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
				77	* with reasonable spread between the bytes & their complements
				78	*
				79	* the 3 byte value appears to be as almost good as the 4 byte value,
				80	* and might be faster on some machines
				81	*/
				82	/*
rsc	00c6cee	2006-06-07 23:25:39 +0000	[diff] [blame]	83	#define hashit(c) ((u32int)((c) * 0x6b43a9) >> (24 - HashLog))
rsc	b6afd33	2003-11-23 18:19:18 +0000	[diff] [blame]	84	*/
rsc	00c6cee	2006-06-07 23:25:39 +0000	[diff] [blame]	85	#define hashit(c) ((u32int)(((c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog))
rsc	b6afd33	2003-11-23 18:19:18 +0000	[diff] [blame]	86
				87	/*
				88	* lempel-ziv style compression state
				89	*/
				90	struct LZstate
				91	{
				92	uchar hist[HistSize];
				93	ulong pos; /* current location in history buffer */
				94	ulong avail; /* data available after pos */
				95	int eof;
				96	ushort hash[HashSize]; /* hash chains */
				97	ushort nexts[MaxOff];
				98	int now; /* pos in hash chains */
				99	int dot; /* dawn of time in history */
				100	int prevlen; /* lazy matching state */
				101	int prevoff;
				102	int maxcheck; /* compressor tuning */
				103
				104	uchar obuf[DeflateOut];
				105	uchar out; / current position in the output buffer */
				106	uchar *eout;
				107	ulong bits; /* bit shift register */
				108	int nbits;
				109	int rbad; /* got an error reading the buffer */
				110	int wbad; /* got an error writing the buffer */
				111	int (w)(void, void*, int);
				112	void *wr;
				113
				114	ulong totr; /* total input size */
				115	ulong totw; /* total output size */
				116	int debug;
				117	};
				118
				119	struct LZblock
				120	{
				121	ushort parse[DeflateMaxBlock / 2 + 1];
				122	int lastv; /* value being constucted for parse */
				123	ulong litlencount[Nlitlen];
				124	ulong offcount[Noff];
				125	ushort eparse; / limit for parse table */
				126	int bytes; /* consumed from the input */
				127	int excost; /* cost of encoding extra len & off bits */
				128	};
				129
				130	/*
				131	* huffman code table
				132	*/
				133	struct Huff
				134	{
				135	short bits; /* length of the code */
				136	ushort encode; /* the code */
				137	};
				138
				139	/*
				140	* encoding of dynamic huffman trees
				141	*/
				142	struct Dyncode
				143	{
				144	int nlit;
				145	int noff;
				146	int nclen;
				147	int ncode;
				148	Huff codetab[Nclen];
				149	uchar codes[Nlitlen+Noff];
				150	uchar codeaux[Nlitlen+Noff];
				151	};
				152
				153	static int deflateb(LZstate lz, LZblock lzb, void rr, int (r)(void, void, int));
				154	static int lzcomp(LZstate, LZblock, uchar, ushort, int finish);
				155	static void wrblock(LZstate, int, ushort, ushort, Huff, Huff*);
				156	static int bitcost(Huff, ulong, int);
				157	static int huffcodes(Dyncode, Huff, Huff*);
				158	static void wrdyncode(LZstate, Dyncode);
				159	static void lzput(LZstate*, ulong bits, int nbits);
				160	static void lzflushbits(LZstate*);
				161	static void lzflush(LZstate *lz);
				162	static void lzwrite(LZstate lz, void buf, int n);
				163
				164	static int hufftabinit(Huff, int, ulong, int);
				165	static int mkgzprecode(Huff, ulong , int, int);
				166
				167	static int mkprecode(Huff, ulong , int, int, ulong*);
				168	static void nextchain(Chains*, int);
				169	static void leafsort(ulong, ushort, int, int);
				170
				171	/* conversion from len to code word */
				172	static int lencode[MaxMatch];
				173
				174	/*
				175	* conversion from off to code word
				176	* off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7]
				177	*/
				178	static int offcode[MaxOffCode];
				179	static int bigoffcode[256];
				180
				181	/* litlen code words LenStart-285 extra bits */
				182	static int litlenbase[Nlitlen-LenStart];
				183	static int litlenextra[Nlitlen-LenStart] =
				184	{
				185	/* 257 */ 0, 0, 0,
				186	/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
				187	/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
				188	/* 280 */ 4, 5, 5, 5, 5, 0, 0, 0
				189	};
				190
				191	/* offset code word extra bits */
				192	static int offbase[Noff];
				193	static int offextra[] =
				194	{
				195	0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
				196	4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
				197	9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
				198	0, 0,
				199	};
				200
				201	/* order code lengths */
				202	static int clenorder[Nclen] =
				203	{
				204	16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
				205	};
				206
				207	/* static huffman tables */
				208	static Huff litlentab[Nlitlen];
				209	static Huff offtab[Noff];
				210	static Huff hofftab[Noff];
				211
				212	/* bit reversal for brain dead endian swap in huffman codes */
				213	static uchar revtab[256];
				214	static ulong nlits;
				215	static ulong nmatches;
				216
				217	int
				218	deflateinit(void)
				219	{
				220	ulong bitcount[MaxHuffBits];
				221	int i, j, ci, n;
				222
				223	/* byte reverse table */
				224	for(i=0; i<256; i++)
				225	for(j=0; j<8; j++)
				226	if(i & (1<<j))
				227	revtab[i] \|= 0x80 >> j;
				228
				229	/* static Litlen bit lengths */
				230	for(i=0; i<144; i++)
				231	litlentab[i].bits = 8;
				232	for(i=144; i<256; i++)
				233	litlentab[i].bits = 9;
				234	for(i=256; i<280; i++)
				235	litlentab[i].bits = 7;
				236	for(i=280; i<Nlitlen; i++)
				237	litlentab[i].bits = 8;
				238
				239	memset(bitcount, 0, sizeof(bitcount));
				240	bitcount[8] += 144 - 0;
				241	bitcount[9] += 256 - 144;
				242	bitcount[7] += 280 - 256;
				243	bitcount[8] += Nlitlen - 280;
				244
				245	if(!hufftabinit(litlentab, Nlitlen, bitcount, 9))
				246	return FlateInternal;
				247
				248	/* static offset bit lengths */
				249	for(i = 0; i < Noff; i++)
				250	offtab[i].bits = 5;
				251
				252	memset(bitcount, 0, sizeof(bitcount));
				253	bitcount[5] = Noff;
				254
				255	if(!hufftabinit(offtab, Noff, bitcount, 5))
				256	return FlateInternal;
				257
				258	bitcount[0] = 0;
				259	bitcount[1] = 0;
				260	if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits))
				261	return FlateInternal;
				262
				263	/* conversion tables for lens & offs to codes */
				264	ci = 0;
				265	for(i = LenStart; i < 286; i++){
				266	n = ci + (1 << litlenextra[i - LenStart]);
				267	litlenbase[i - LenStart] = ci;
				268	for(; ci < n; ci++)
				269	lencode[ci] = i;
				270	}
				271	/* patch up special case for len MaxMatch */
				272	lencode[MaxMatch-MinMatch] = 285;
				273	litlenbase[285-LenStart] = MaxMatch-MinMatch;
				274
				275	ci = 0;
				276	for(i = 0; i < 16; i++){
				277	n = ci + (1 << offextra[i]);
				278	offbase[i] = ci;
				279	for(; ci < n; ci++)
				280	offcode[ci] = i;
				281	}
				282
				283	ci = ci >> 7;
				284	for(; i < 30; i++){
				285	n = ci + (1 << (offextra[i] - 7));
				286	offbase[i] = ci << 7;
				287	for(; ci < n; ci++)
				288	bigoffcode[ci] = i;
				289	}
				290	return FlateOk;
				291	}
				292
				293	static void
				294	deflatereset(LZstate *lz, int level, int debug)
				295	{
				296	memset(lz->nexts, 0, sizeof lz->nexts);
				297	memset(lz->hash, 0, sizeof lz->hash);
				298	lz->totr = 0;
				299	lz->totw = 0;
				300	lz->pos = 0;
				301	lz->avail = 0;
				302	lz->out = lz->obuf;
				303	lz->eout = &lz->obuf[DeflateOut];
				304	lz->prevlen = MinMatch - 1;
				305	lz->prevoff = 0;
				306	lz->now = MaxOff + 1;
				307	lz->dot = lz->now;
				308	lz->bits = 0;
				309	lz->nbits = 0;
				310	lz->maxcheck = (1 << level);
				311	lz->maxcheck -= lz->maxcheck >> 2;
				312	if(lz->maxcheck < 2)
				313	lz->maxcheck = 2;
				314	else if(lz->maxcheck > 1024)
				315	lz->maxcheck = 1024;
				316
				317	lz->debug = debug;
				318	}
				319
				320	int
				321	deflate(void wr, int (w)(void, void, int), void rr, int (r)(void, void, int), int level, int debug)
				322	{
				323	LZstate *lz;
				324	LZblock *lzb;
				325	int ok;
				326
				327	lz = malloc(sizeof lz + sizeof lzb);
				328	if(lz == nil)
				329	return FlateNoMem;
				330	lzb = (LZblock*)&lz[1];
				331
				332	deflatereset(lz, level, debug);
				333	lz->w = w;
				334	lz->wr = wr;
				335	lz->wbad = 0;
				336	lz->rbad = 0;
				337	lz->eof = 0;
				338	ok = FlateOk;
				339	while(!lz->eof \|\| lz->avail){
				340	ok = deflateb(lz, lzb, rr, r);
				341	if(ok != FlateOk)
				342	break;
				343	}
				344	if(ok == FlateOk && lz->rbad)
				345	ok = FlateInputFail;
				346	if(ok == FlateOk && lz->wbad)
				347	ok = FlateOutputFail;
				348	free(lz);
				349	return ok;
				350	}
				351
				352	static int
				353	deflateb(LZstate lz, LZblock lzb, void rr, int (r)(void, void, int))
				354	{
				355	Dyncode dyncode, hdyncode;
				356	Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen];
				357	ulong litcount[Nlitlen];
				358	long nunc, ndyn, nfix, nhuff;
				359	uchar slop, hslop;
				360	ulong ep;
				361	int i, n, m, mm, nslop;
				362
				363	memset(lzb->litlencount, 0, sizeof lzb->litlencount);
				364	memset(lzb->offcount, 0, sizeof lzb->offcount);
				365	lzb->litlencount[DeflateEob]++;
				366
				367	lzb->bytes = 0;
				368	lzb->eparse = lzb->parse;
				369	lzb->lastv = 0;
				370	lzb->excost = 0;
				371
				372	slop = &lz->hist[lz->pos];
				373	n = lz->avail;
				374	while(n < DeflateBlock && (!lz->eof \|\| lz->avail)){
				375	/*
				376	* fill the buffer as much as possible,
				377	* while leaving room for MaxOff history behind lz->pos,
				378	* and not reading more than we can handle.
				379	*
				380	* make sure we read at least HistSlop bytes.
				381	*/
				382	if(!lz->eof){
				383	ep = lz->pos + lz->avail;
				384	if(ep >= HistBlock)
				385	ep -= HistBlock;
				386	m = HistBlock - MaxOff - lz->avail;
				387	if(m > HistBlock - n)
				388	m = HistBlock - n;
				389	if(m > (HistBlock + HistSlop) - ep)
				390	m = (HistBlock + HistSlop) - ep;
				391	if(m & ~(BlockSize - 1))
				392	m &= ~(BlockSize - 1);
				393
				394	/*
				395	* be nice to the caller: stop reads that are too small.
				396	* can only get here when we've already filled the buffer some
				397	*/
				398	if(m < HistSlop){
				399	if(!m \|\| !lzb->bytes)
				400	return FlateInternal;
				401	break;
				402	}
				403
				404	mm = (*r)(rr, &lz->hist[ep], m);
				405	if(mm > 0){
				406	/*
				407	* wrap data to end if we're read it from the beginning
				408	* this way, we don't have to wrap searches.
				409	*
				410	* wrap reads past the end to the beginning.
				411	* this way, we can guarantee minimum size reads.
				412	*/
				413	if(ep < HistSlop)
				414	memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep);
				415	else if(ep + mm > HistBlock)
				416	memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock);
				417
				418	lz->totr += mm;
				419	n += mm;
				420	lz->avail += mm;
				421	}else{
				422	if(mm < 0)
				423	lz->rbad = 1;
				424	lz->eof = 1;
				425	}
				426	}
				427	ep = lz->pos + lz->avail;
				428	if(ep > HistSize)
				429	ep = HistSize;
				430	if(lzb->bytes + ep - lz->pos > DeflateMaxBlock)
				431	ep = lz->pos + DeflateMaxBlock - lzb->bytes;
				432	m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof);
				433	lzb->bytes += m;
				434	lz->pos = (lz->pos + m) & (HistBlock - 1);
				435	lz->avail -= m;
				436	}
				437	if(lzb->lastv)
				438	*lzb->eparse++ = lzb->lastv;
				439	if(lzb->eparse > lzb->parse + nelem(lzb->parse))
				440	return FlateInternal;
				441	nunc = lzb->bytes;
				442
				443	if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits)
				444	\|\| !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits))
				445	return FlateInternal;
				446
				447	ndyn = huffcodes(&dyncode, dlitlentab, dofftab);
				448	if(ndyn < 0)
				449	return FlateInternal;
				450	ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen)
				451	+ bitcost(dofftab, lzb->offcount, Noff)
				452	+ lzb->excost;
				453
				454	memset(litcount, 0, sizeof litcount);
				455
				456	nslop = nunc;
				457	if(nslop > &lz->hist[HistSize] - slop)
				458	nslop = &lz->hist[HistSize] - slop;
				459
				460	for(i = 0; i < nslop; i++)
				461	litcount[slop[i]]++;
				462	hslop = &lz->hist[HistSlop - nslop];
				463	for(; i < nunc; i++)
				464	litcount[hslop[i]]++;
				465	litcount[DeflateEob]++;
				466
				467	if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits))
				468	return FlateInternal;
				469	nhuff = huffcodes(&hdyncode, hlitlentab, hofftab);
				470	if(nhuff < 0)
				471	return FlateInternal;
				472	nhuff += bitcost(hlitlentab, litcount, Nlitlen);
				473
				474	nfix = bitcost(litlentab, lzb->litlencount, Nlitlen)
				475	+ bitcost(offtab, lzb->offcount, Noff)
				476	+ lzb->excost;
				477
				478	lzput(lz, lz->eof && !lz->avail, 1);
				479
				480	if(lz->debug){
				481	fprint(2, "block: bytes=%lud entries=%ld extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n",
				482	nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff);
				483	fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail);
				484	}
				485
				486	if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){
				487	lzput(lz, DeflateUnc, 2);
				488	lzflushbits(lz);
				489
				490	lzput(lz, nunc & 0xff, 8);
				491	lzput(lz, (nunc >> 8) & 0xff, 8);
				492	lzput(lz, ~nunc & 0xff, 8);
				493	lzput(lz, (~nunc >> 8) & 0xff, 8);
				494	lzflush(lz);
				495
				496	lzwrite(lz, slop, nslop);
				497	lzwrite(lz, &lz->hist[HistSlop], nunc - nslop);
				498	}else if(ndyn < nfix && ndyn < nhuff){
				499	lzput(lz, DeflateDyn, 2);
				500
				501	wrdyncode(lz, &dyncode);
				502	wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab);
				503	lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits);
				504	}else if(nhuff < nfix){
				505	lzput(lz, DeflateDyn, 2);
				506
				507	wrdyncode(lz, &hdyncode);
				508
				509	m = 0;
				510	for(i = nunc; i > MaxLitRun; i -= MaxLitRun)
				511	lzb->parse[m++] = MaxLitRun;
				512	lzb->parse[m++] = i;
				513
				514	wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab);
				515	lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits);
				516	}else{
				517	lzput(lz, DeflateFix, 2);
				518
				519	wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab);
				520	lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits);
				521	}
				522
				523	if(lz->eof && !lz->avail){
				524	lzflushbits(lz);
				525	lzflush(lz);
				526	}
				527	return FlateOk;
				528	}
				529
				530	static void
				531	lzwrite(LZstate lz, void buf, int n)
				532	{
				533	int nw;
				534
				535	if(n && lz->w){
				536	nw = (*lz->w)(lz->wr, buf, n);
				537	if(nw != n){
rsc	a0f1e21	2004-04-20 02:03:38 +0000	[diff] [blame]	538	lz->w = 0;
rsc	b6afd33	2003-11-23 18:19:18 +0000	[diff] [blame]	539	lz->wbad = 1;
				540	}else
				541	lz->totw += n;
				542	}
				543	}
				544
				545	static void
				546	lzflush(LZstate *lz)
				547	{
				548	lzwrite(lz, lz->obuf, lz->out - lz->obuf);
				549	lz->out = lz->obuf;
				550	}
				551
				552	static void
				553	lzput(LZstate *lz, ulong bits, int nbits)
				554	{
				555	bits = (bits << lz->nbits) \| lz->bits;
				556	for(nbits += lz->nbits; nbits >= 8; nbits -= 8){
				557	*lz->out++ = bits;
				558	if(lz->out == lz->eout)
				559	lzflush(lz);
				560	bits >>= 8;
				561	}
				562	lz->bits = bits;
				563	lz->nbits = nbits;
				564	}
				565
				566	static void
				567	lzflushbits(LZstate *lz)
				568	{
				569	if(lz->nbits)
				570	lzput(lz, 0, 8 - (lz->nbits & 7));
				571	}
				572
				573	/*
				574	* write out a block of n samples,
				575	* given lz encoding and counts for huffman tables
				576	*/
				577	static void
				578	wrblock(LZstate out, int litoff, ushort soff, ushort eoff, Huff litlentab, Huff *offtab)
				579	{
				580	ushort *off;
				581	int i, run, offset, lit, len, c;
				582
				583	if(out->debug > 2){
				584	for(off = soff; off < eoff; ){
				585	offset = *off++;
				586	run = offset & MaxLitRun;
				587	if(run){
				588	for(i = 0; i < run; i++){
				589	lit = out->hist[litoff & (HistBlock - 1)];
				590	litoff++;
				591	fprint(2, "\tlit %.2ux %c\n", lit, lit);
				592	}
				593	if(!(offset & LenFlag))
				594	continue;
				595	len = offset >> LenShift;
				596	offset = *off++;
				597	}else if(offset & LenFlag){
				598	len = offset >> LenShift;
				599	offset = *off++;
				600	}else{
				601	len = 0;
				602	offset >>= LenShift;
				603	}
				604	litoff += len + MinMatch;
				605	fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch);
				606	}
				607	}
				608
				609	for(off = soff; off < eoff; ){
				610	offset = *off++;
				611	run = offset & MaxLitRun;
				612	if(run){
				613	for(i = 0; i < run; i++){
				614	lit = out->hist[litoff & (HistBlock - 1)];
				615	litoff++;
				616	lzput(out, litlentab[lit].encode, litlentab[lit].bits);
				617	}
				618	if(!(offset & LenFlag))
				619	continue;
				620	len = offset >> LenShift;
				621	offset = *off++;
				622	}else if(offset & LenFlag){
				623	len = offset >> LenShift;
				624	offset = *off++;
				625	}else{
				626	len = 0;
				627	offset >>= LenShift;
				628	}
				629	litoff += len + MinMatch;
				630	c = lencode[len];
				631	lzput(out, litlentab[c].encode, litlentab[c].bits);
				632	c -= LenStart;
				633	if(litlenextra[c])
				634	lzput(out, len - litlenbase[c], litlenextra[c]);
				635
				636	if(offset < MaxOffCode)
				637	c = offcode[offset];
				638	else
				639	c = bigoffcode[offset >> 7];
				640	lzput(out, offtab[c].encode, offtab[c].bits);
				641	if(offextra[c])
				642	lzput(out, offset - offbase[c], offextra[c]);
				643	}
				644	}
				645
				646	/*
				647	* look for the longest, closest string which matches
				648	* the next prefix. the clever part here is looking for
				649	* a string 1 longer than the previous best match.
				650	*
				651	* follows the recommendation of limiting number of chains
				652	* which are checked. this appears to be the best heuristic.
				653	*/
				654	static int
				655	lzmatch(int now, int then, uchar p, uchar es, ushort nexts, uchar hist, int runlen, int check, int *m)
				656	{
				657	uchar s, t;
				658	int ml, off, last;
				659
				660	ml = check;
				661	if(runlen >= 8)
				662	check >>= 2;
				663	*m = 0;
				664	if(p + runlen >= es)
				665	return runlen;
				666	last = 0;
				667	for(; check-- > 0; then = nexts[then & (MaxOff-1)]){
				668	off = (ushort)(now - then);
				669	if(off <= last \|\| off > MaxOff)
				670	break;
				671	s = p + runlen;
				672	t = hist + (((p - hist) - off) & (HistBlock-1));
				673	t += runlen;
				674	for(; s >= p; s--){
				675	if(s != t)
				676	goto matchloop;
				677	t--;
				678	}
				679
				680	/*
				681	* we have a new best match.
				682	* extend it to it's maximum length
				683	*/
				684	t += runlen + 2;
				685	s += runlen + 2;
				686	for(; s < es; s++){
				687	if(s != t)
				688	break;
				689	t++;
				690	}
				691	runlen = s - p;
				692	*m = off - 1;
				693	if(s == es \|\| runlen > ml)
				694	break;
				695	matchloop:;
				696	last = off;
				697	}
				698	return runlen;
				699	}
				700
				701	static int
				702	lzcomp(LZstate lz, LZblock lzb, uchar ep, ushort parse, int finish)
				703	{
				704	ulong cont, excost, litlencount, offcount;
				705	uchar p, q, s, es;
				706	ushort nexts, hash;
				707	int v, i, h, runlen, n, now, then, m, prevlen, prevoff, maxdefer;
				708
				709	litlencount = lzb->litlencount;
				710	offcount = lzb->offcount;
				711	nexts = lz->nexts;
				712	hash = lz->hash;
				713	now = lz->now;
				714
				715	p = &lz->hist[lz->pos];
				716	if(lz->prevlen != MinMatch - 1)
				717	p++;
				718
				719	/*
				720	* hash in the links for any hanging link positions,
				721	* and calculate the hash for the current position.
				722	*/
				723	n = MinMatch;
				724	if(n > ep - p)
				725	n = ep - p;
				726	cont = 0;
				727	for(i = 0; i < n - 1; i++){
				728	m = now - ((MinMatch-1) - i);
				729	if(m < lz->dot)
				730	continue;
				731	s = lz->hist + (((p - lz->hist) - (now - m)) & (HistBlock-1));
				732
				733	cont = (s[0] << 16) \| (s[1] << 8) \| s[2];
				734	h = hashit(cont);
				735	prevoff = 0;
				736	for(then = hash[h]; ; then = nexts[then & (MaxOff-1)]){
				737	v = (ushort)(now - then);
				738	if(v <= prevoff \|\| v >= (MinMatch-1) - i)
				739	break;
				740	prevoff = v;
				741	}
				742	if(then == (ushort)m)
				743	continue;
				744	nexts[m & (MaxOff-1)] = hash[h];
				745	hash[h] = m;
				746	}
				747	for(i = 0; i < n; i++)
				748	cont = (cont << 8) \| p[i];
				749
				750	/*
				751	* now must point to the index in the nexts array
				752	* corresponding to p's position in the history
				753	*/
				754	prevlen = lz->prevlen;
				755	prevoff = lz->prevoff;
				756	maxdefer = lz->maxcheck >> 2;
				757	excost = 0;
				758	v = lzb->lastv;
				759	for(;;){
				760	es = p + MaxMatch;
				761	if(es > ep){
				762	if(!finish \|\| p >= ep)
				763	break;
				764	es = ep;
				765	}
				766
				767	h = hashit(cont);
				768	runlen = lzmatch(now, hash[h], p, es, nexts, lz->hist, prevlen, lz->maxcheck, &m);
				769
				770	/*
				771	* back out of small matches too far in the past
				772	*/
				773	if(runlen == MinMatch && m >= MinMatchMaxOff){
				774	runlen = MinMatch - 1;
				775	m = 0;
				776	}
				777
				778	/*
				779	* record the encoding and increment counts for huffman trees
				780	* if we get a match, defer selecting it until we check for
				781	* a longer match at the next position.
				782	*/
				783	if(prevlen >= runlen && prevlen != MinMatch - 1){
				784	/*
				785	* old match at least as good; use that one
				786	*/
				787	n = prevlen - MinMatch;
				788	if(v \|\| n){
				789	*parse++ = v \| LenFlag \| (n << LenShift);
				790	*parse++ = prevoff;
				791	}else
				792	*parse++ = prevoff << LenShift;
				793	v = 0;
				794
				795	n = lencode[n];
				796	litlencount[n]++;
				797	excost += litlenextra[n - LenStart];
				798
				799	if(prevoff < MaxOffCode)
				800	n = offcode[prevoff];
				801	else
				802	n = bigoffcode[prevoff >> 7];
				803	offcount[n]++;
				804	excost += offextra[n];
				805
				806	runlen = prevlen - 1;
				807	prevlen = MinMatch - 1;
				808	nmatches++;
				809	}else if(runlen == MinMatch - 1){
				810	/*
				811	* no match; just put out the literal
				812	*/
				813	if(++v == MaxLitRun){
				814	*parse++ = v;
				815	v = 0;
				816	}
				817	litlencount[*p]++;
				818	nlits++;
				819	runlen = 1;
				820	}else{
				821	if(prevlen != MinMatch - 1){
				822	/*
				823	* longer match now. output previous literal,
				824	* update current match, and try again
				825	*/
				826	if(++v == MaxLitRun){
				827	*parse++ = v;
				828	v = 0;
				829	}
				830	litlencount[p[-1]]++;
				831	nlits++;
				832	}
				833
				834	prevoff = m;
				835
				836	if(runlen < maxdefer){
				837	prevlen = runlen;
				838	runlen = 1;
				839	}else{
				840	n = runlen - MinMatch;
				841	if(v \|\| n){
				842	*parse++ = v \| LenFlag \| (n << LenShift);
				843	*parse++ = prevoff;
				844	}else
				845	*parse++ = prevoff << LenShift;
				846	v = 0;
				847
				848	n = lencode[n];
				849	litlencount[n]++;
				850	excost += litlenextra[n - LenStart];
				851
				852	if(prevoff < MaxOffCode)
				853	n = offcode[prevoff];
				854	else
				855	n = bigoffcode[prevoff >> 7];
				856	offcount[n]++;
				857	excost += offextra[n];
				858
				859	prevlen = MinMatch - 1;
				860	nmatches++;
				861	}
				862	}
				863
				864	/*
				865	* update the hash for the newly matched data
				866	* this is constructed so the link for the old
				867	* match in this position must be at the end of a chain,
				868	* and will expire when this match is added, ie it will
				869	* never be examined by the match loop.
				870	* add to the hash chain only if we have the real hash data.
				871	*/
				872	for(q = p + runlen; p != q; p++){
				873	if(p + MinMatch <= ep){
				874	h = hashit(cont);
				875	nexts[now & (MaxOff-1)] = hash[h];
				876	hash[h] = now;
				877	if(p + MinMatch < ep)
				878	cont = (cont << 8) \| p[MinMatch];
				879	}
				880	now++;
				881	}
				882	}
				883
				884	/*
				885	* we can just store away the lazy state and
				886	* pick it up next time. the last block will have finish set
				887	* so we won't have any pending matches
				888	* however, we need to correct for how much we've encoded
				889	*/
				890	if(prevlen != MinMatch - 1)
				891	p--;
				892
				893	lzb->excost += excost;
				894	lzb->eparse = parse;
				895	lzb->lastv = v;
				896
				897	lz->now = now;
				898	lz->prevlen = prevlen;
				899	lz->prevoff = prevoff;
				900
				901	return p - &lz->hist[lz->pos];
				902	}
				903
				904	/*
				905	* make up the dynamic code tables, and return the number of bits
				906	* needed to transmit them.
				907	*/
				908	static int
				909	huffcodes(Dyncode dc, Huff littab, Huff *offtab)
				910	{
				911	Huff *codetab;
				912	uchar codes, codeaux;
				913	ulong codecount[Nclen], excost;
				914	int i, n, m, v, c, nlit, noff, ncode, nclen;
				915
				916	codetab = dc->codetab;
				917	codes = dc->codes;
				918	codeaux = dc->codeaux;
				919
				920	/*
				921	* trim the sizes of the tables
				922	*/
				923	for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--)
				924	;
				925	for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--)
				926	;
				927
				928	/*
				929	* make the code-length code
				930	*/
				931	for(i = 0; i < nlit; i++)
				932	codes[i] = littab[i].bits;
				933	for(i = 0; i < noff; i++)
				934	codes[i + nlit] = offtab[i].bits;
				935
				936	/*
				937	* run-length compress the code-length code
				938	*/
				939	excost = 0;
				940	c = 0;
				941	ncode = nlit+noff;
				942	for(i = 0; i < ncode; ){
				943	n = i + 1;
				944	v = codes[i];
				945	while(n < ncode && v == codes[n])
				946	n++;
				947	n -= i;
				948	i += n;
				949	if(v == 0){
				950	while(n >= 11){
				951	m = n;
				952	if(m > 138)
				953	m = 138;
				954	codes[c] = 18;
				955	codeaux[c++] = m - 11;
				956	n -= m;
				957	excost += 7;
				958	}
				959	if(n >= 3){
				960	codes[c] = 17;
				961	codeaux[c++] = n - 3;
				962	n = 0;
				963	excost += 3;
				964	}
				965	}
				966	while(n--){
				967	codes[c++] = v;
				968	while(n >= 3){
				969	m = n;
				970	if(m > 6)
				971	m = 6;
				972	codes[c] = 16;
				973	codeaux[c++] = m - 3;
				974	n -= m;
				975	excost += 3;
				976	}
				977	}
				978	}
				979
				980	memset(codecount, 0, sizeof codecount);
				981	for(i = 0; i < c; i++)
				982	codecount[codes[i]]++;
				983	if(!mkgzprecode(codetab, codecount, Nclen, 8))
				984	return -1;
				985
				986	for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--)
				987	;
				988
				989	dc->nlit = nlit;
				990	dc->noff = noff;
				991	dc->nclen = nclen;
				992	dc->ncode = c;
				993
				994	return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost;
				995	}
				996
				997	static void
				998	wrdyncode(LZstate out, Dyncode dc)
				999	{
				1000	Huff *codetab;
				1001	uchar codes, codeaux;
				1002	int i, v, c;
				1003
				1004	/*
				1005	* write out header, then code length code lengths,
				1006	* and code lengths
				1007	*/
				1008	lzput(out, dc->nlit-257, 5);
				1009	lzput(out, dc->noff-1, 5);
				1010	lzput(out, dc->nclen-4, 4);
				1011
				1012	codetab = dc->codetab;
				1013	for(i = 0; i < dc->nclen; i++)
				1014	lzput(out, codetab[clenorder[i]].bits, 3);
				1015
				1016	codes = dc->codes;
				1017	codeaux = dc->codeaux;
				1018	c = dc->ncode;
				1019	for(i = 0; i < c; i++){
				1020	v = codes[i];
				1021	lzput(out, codetab[v].encode, codetab[v].bits);
				1022	if(v >= 16){
				1023	if(v == 16)
				1024	lzput(out, codeaux[i], 2);
				1025	else if(v == 17)
				1026	lzput(out, codeaux[i], 3);
				1027	else /* v == 18 */
				1028	lzput(out, codeaux[i], 7);
				1029	}
				1030	}
				1031	}
				1032
				1033	static int
				1034	bitcost(Huff tab, ulong count, int n)
				1035	{
				1036	ulong tot;
				1037	int i;
				1038
				1039	tot = 0;
				1040	for(i = 0; i < n; i++)
				1041	tot += count[i] * tab[i].bits;
				1042	return tot;
				1043	}
				1044
				1045	static int
				1046	mkgzprecode(Huff tab, ulong count, int n, int maxbits)
				1047	{
				1048	ulong bitcount[MaxHuffBits];
				1049	int i, nbits;
				1050
				1051	nbits = mkprecode(tab, count, n, maxbits, bitcount);
				1052	for(i = 0; i < n; i++){
				1053	if(tab[i].bits == -1)
				1054	tab[i].bits = 0;
				1055	else if(tab[i].bits == 0){
				1056	if(nbits != 0 \|\| bitcount[0] != 1)
				1057	return 0;
				1058	bitcount[1] = 1;
				1059	bitcount[0] = 0;
				1060	nbits = 1;
				1061	tab[i].bits = 1;
				1062	}
				1063	}
				1064	if(bitcount[0] != 0)
				1065	return 0;
				1066	return hufftabinit(tab, n, bitcount, nbits);
				1067	}
				1068
				1069	static int
				1070	hufftabinit(Huff tab, int n, ulong bitcount, int nbits)
				1071	{
				1072	ulong code, nc[MaxHuffBits];
				1073	int i, bits;
				1074
				1075	code = 0;
				1076	for(bits = 1; bits <= nbits; bits++){
				1077	code = (code + bitcount[bits-1]) << 1;
				1078	nc[bits] = code;
				1079	}
				1080
				1081	for(i = 0; i < n; i++){
				1082	bits = tab[i].bits;
				1083	if(bits){
				1084	code = nc[bits]++ << (16 - bits);
				1085	if(code & ~0xffff)
				1086	return 0;
				1087	tab[i].encode = revtab[code >> 8] \| (revtab[code & 0xff] << 8);
				1088	}
				1089	}
				1090	return 1;
				1091	}
				1092
				1093
				1094	/*
				1095	* this should be in a library
				1096	*/
				1097	struct Chain
				1098	{
				1099	ulong count; /* occurances of everything in the chain */
				1100	ushort leaf; /* leaves to the left of chain, or leaf value */
				1101	char col; /* ref count for collecting unused chains */
				1102	char gen; /* need to generate chains for next lower level */
				1103	Chain up; / Chain up in the lists */
				1104	};
				1105
				1106	struct Chains
				1107	{
				1108	Chain lists[(MaxHuffBits - 1) 2];
				1109	ulong leafcount[MaxLeaf]; /* sorted list of leaf counts */
				1110	ushort leafmap[MaxLeaf]; /* map to actual leaf number */
				1111	int nleaf; /* number of leaves */
				1112	Chain chains[ChainMem];
				1113	Chain *echains;
				1114	Chain *free;
				1115	char col;
				1116	int nlists;
				1117	};
				1118
				1119	/*
				1120	* fast, low space overhead algorithm for max depth huffman type codes
				1121	*
				1122	* J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical
				1123	* algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms
				1124	* and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer
				1125	* Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds.,
				1126	* pp 12-21, Springer Verlag, New York, 1995.
				1127	*/
				1128	static int
				1129	mkprecode(Huff tab, ulong count, int n, int maxbits, ulong *bitcount)
				1130	{
				1131	Chains cs;
				1132	Chain *c;
				1133	int i, m, em, bits;
				1134
Russ Cox	995e570	2009-04-30 07:24:53 -0700	[diff] [blame]	1135	memset(&cs, 0, sizeof cs);
				1136
rsc	b6afd33	2003-11-23 18:19:18 +0000	[diff] [blame]	1137	/*
				1138	* set up the sorted list of leaves
				1139	*/
				1140	m = 0;
				1141	for(i = 0; i < n; i++){
				1142	tab[i].bits = -1;
				1143	tab[i].encode = 0;
				1144	if(count[i] != 0){
				1145	cs.leafcount[m] = count[i];
				1146	cs.leafmap[m] = i;
				1147	m++;
				1148	}
				1149	}
				1150	if(m < 2){
				1151	if(m != 0){
				1152	tab[cs.leafmap[0]].bits = 0;
				1153	bitcount[0] = 1;
				1154	}else
				1155	bitcount[0] = 0;
				1156	return 0;
				1157	}
				1158	cs.nleaf = m;
				1159	leafsort(cs.leafcount, cs.leafmap, 0, m);
				1160
				1161	for(i = 0; i < m; i++)
				1162	cs.leafcount[i] = count[cs.leafmap[i]];
				1163
				1164	/*
				1165	* set up free list
				1166	*/
				1167	cs.free = &cs.chains[2];
				1168	cs.echains = &cs.chains[ChainMem];
				1169	cs.col = 1;
				1170
				1171	/*
				1172	* initialize chains for each list
				1173	*/
				1174	c = &cs.chains[0];
				1175	c->count = cs.leafcount[0];
				1176	c->leaf = 1;
				1177	c->col = cs.col;
				1178	c->up = nil;
				1179	c->gen = 0;
				1180	cs.chains[1] = cs.chains[0];
				1181	cs.chains[1].leaf = 2;
				1182	cs.chains[1].count = cs.leafcount[1];
				1183	for(i = 0; i < maxbits-1; i++){
				1184	cs.lists[i * 2] = &cs.chains[0];
				1185	cs.lists[i * 2 + 1] = &cs.chains[1];
				1186	}
				1187
				1188	cs.nlists = 2 * (maxbits - 1);
				1189	m = 2 * m - 2;
				1190	for(i = 2; i < m; i++)
				1191	nextchain(&cs, cs.nlists - 2);
				1192
				1193	bits = 0;
				1194	bitcount[0] = cs.nleaf;
				1195	for(c = cs.lists[cs.nlists - 1]; c != nil; c = c->up){
				1196	m = c->leaf;
				1197	bitcount[bits++] -= m;
				1198	bitcount[bits] = m;
				1199	}
				1200	m = 0;
				1201	for(i = bits; i >= 0; i--)
				1202	for(em = m + bitcount[i]; m < em; m++)
				1203	tab[cs.leafmap[m]].bits = i;
				1204
				1205	return bits;
				1206	}
				1207
				1208	/*
				1209	* calculate the next chain on the list
				1210	* we can always toss out the old chain
				1211	*/
				1212	static void
				1213	nextchain(Chains *cs, int list)
				1214	{
				1215	Chain c, oc;
				1216	int i, nleaf, sumc;
				1217
				1218	oc = cs->lists[list + 1];
				1219	cs->lists[list] = oc;
				1220	if(oc == nil)
				1221	return;
				1222
				1223	/*
				1224	* make sure we have all chains needed to make sumc
				1225	* note it is possible to generate only one of these,
				1226	* use twice that value for sumc, and then generate
				1227	* the second if that preliminary sumc would be chosen.
				1228	* however, this appears to be slower on current tests
				1229	*/
				1230	if(oc->gen){
				1231	nextchain(cs, list - 2);
				1232	nextchain(cs, list - 2);
				1233	oc->gen = 0;
				1234	}
				1235
				1236	/*
				1237	* pick up the chain we're going to add;
				1238	* collect unused chains no free ones are left
				1239	*/
				1240	for(c = cs->free; ; c++){
				1241	if(c >= cs->echains){
				1242	cs->col++;
				1243	for(i = 0; i < cs->nlists; i++)
				1244	for(c = cs->lists[i]; c != nil; c = c->up)
				1245	c->col = cs->col;
				1246	c = cs->chains;
				1247	}
				1248	if(c->col != cs->col)
				1249	break;
				1250	}
				1251
				1252	/*
				1253	* pick the cheapest of
				1254	* 1) the next package from the previous list
				1255	* 2) the next leaf
				1256	*/
				1257	nleaf = oc->leaf;
				1258	sumc = 0;
				1259	if(list > 0 && cs->lists[list-1] != nil)
				1260	sumc = cs->lists[list-2]->count + cs->lists[list-1]->count;
				1261	if(sumc != 0 && (nleaf >= cs->nleaf \|\| cs->leafcount[nleaf] > sumc)){
				1262	c->count = sumc;
				1263	c->leaf = oc->leaf;
				1264	c->up = cs->lists[list-1];
				1265	c->gen = 1;
				1266	}else if(nleaf >= cs->nleaf){
				1267	cs->lists[list + 1] = nil;
				1268	return;
				1269	}else{
				1270	c->leaf = nleaf + 1;
				1271	c->count = cs->leafcount[nleaf];
				1272	c->up = oc->up;
				1273	c->gen = 0;
				1274	}
				1275	cs->free = c + 1;
				1276
				1277	cs->lists[list + 1] = c;
				1278	c->col = cs->col;
				1279	}
				1280
				1281	static int
				1282	pivot(ulong *c, int a, int n)
				1283	{
				1284	int j, pi, pj, pk;
				1285
				1286	j = n/6;
				1287	pi = a + j; /* 1/6 */
				1288	j += j;
				1289	pj = pi + j; /* 1/2 */
				1290	pk = pj + j; /* 5/6 */
				1291	if(c[pi] < c[pj]){
				1292	if(c[pi] < c[pk]){
				1293	if(c[pj] < c[pk])
				1294	return pj;
				1295	return pk;
				1296	}
				1297	return pi;
				1298	}
				1299	if(c[pj] < c[pk]){
				1300	if(c[pi] < c[pk])
				1301	return pi;
				1302	return pk;
				1303	}
				1304	return pj;
				1305	}
				1306
				1307	static void
				1308	leafsort(ulong leafcount, ushort leafmap, int a, int n)
				1309	{
				1310	ulong t;
				1311	int j, pi, pj, pn;
				1312
				1313	while(n > 1){
				1314	if(n > 10){
				1315	pi = pivot(leafcount, a, n);
				1316	}else
				1317	pi = a + (n>>1);
				1318
				1319	t = leafcount[pi];
				1320	leafcount[pi] = leafcount[a];
				1321	leafcount[a] = t;
				1322	t = leafmap[pi];
				1323	leafmap[pi] = leafmap[a];
				1324	leafmap[a] = t;
				1325	pi = a;
				1326	pn = a + n;
				1327	pj = pn;
				1328	for(;;){
				1329	do
				1330	pi++;
				1331	while(pi < pn && (leafcount[pi] < leafcount[a] \|\| leafcount[pi] == leafcount[a] && leafmap[pi] > leafmap[a]));
				1332	do
				1333	pj--;
				1334	while(pj > a && (leafcount[pj] > leafcount[a] \|\| leafcount[pj] == leafcount[a] && leafmap[pj] < leafmap[a]));
				1335	if(pj < pi)
				1336	break;
				1337	t = leafcount[pi];
				1338	leafcount[pi] = leafcount[pj];
				1339	leafcount[pj] = t;
				1340	t = leafmap[pi];
				1341	leafmap[pi] = leafmap[pj];
				1342	leafmap[pj] = t;
				1343	}
				1344	t = leafcount[a];
				1345	leafcount[a] = leafcount[pj];
				1346	leafcount[pj] = t;
				1347	t = leafmap[a];
				1348	leafmap[a] = leafmap[pj];
				1349	leafmap[pj] = t;
				1350	j = pj - a;
				1351
				1352	n = n-j-1;
				1353	if(j >= n){
				1354	leafsort(leafcount, leafmap, a, j);
				1355	a += j+1;
				1356	}else{
				1357	leafsort(leafcount, leafmap, a + (j+1), n);
				1358	n = j;
				1359	}
				1360	}
				1361	}