blob: ceaa77f4407aa83be009ae00d7d9c4b4d05574ba [file] [log] [blame]
rscb6afd332003-11-23 18:19:18 +00001#include <u.h>
2#include <libc.h>
3#include <flate.h>
4
5typedef struct Chain Chain;
6typedef struct Chains Chains;
7typedef struct Dyncode Dyncode;
8typedef struct Huff Huff;
9typedef struct LZblock LZblock;
10typedef struct LZstate LZstate;
11
12enum
13{
14 /*
15 * deflate format paramaters
16 */
17 DeflateUnc = 0, /* uncompressed block */
18 DeflateFix = 1, /* fixed huffman codes */
19 DeflateDyn = 2, /* dynamic huffman codes */
20
21 DeflateEob = 256, /* end of block code in lit/len book */
22 DeflateMaxBlock = 64*1024-1, /* maximum size of uncompressed block */
23
24 DeflateMaxExp = 10, /* maximum expansion for a block */
25
26 LenStart = 257, /* start of length codes in litlen */
27 Nlitlen = 288, /* number of litlen codes */
28 Noff = 30, /* number of offset codes */
29 Nclen = 19, /* number of codelen codes */
30
31 MaxOff = 32*1024,
32 MinMatch = 3, /* shortest match possible */
33 MaxMatch = 258, /* longest match possible */
34
35 /*
36 * huffman code paramaters
37 */
38 MaxLeaf = Nlitlen,
39 MaxHuffBits = 16, /* max bits in a huffman code */
40 ChainMem = 2 * (MaxHuffBits - 1) * MaxHuffBits,
41
42 /*
43 * coding of the lz parse
44 */
45 LenFlag = 1 << 3,
46 LenShift = 4, /* leaves enough space for MinMatchMaxOff */
47 MaxLitRun = LenFlag - 1,
48
49 /*
50 * internal lz paramaters
51 */
52 DeflateOut = 4096, /* output buffer size */
53 BlockSize = 8192, /* attempted input read quanta */
54 DeflateBlock = DeflateMaxBlock & ~(BlockSize - 1),
55 MinMatchMaxOff = 4096, /* max profitable offset for small match;
56 * assumes 8 bits for len, 5+10 for offset
57 * DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS
58 */
59 HistSlop = 512, /* must be at lead MaxMatch */
60 HistBlock = 64*1024,
61 HistSize = HistBlock + HistSlop,
62
63 HashLog = 13,
64 HashSize = 1<<HashLog,
65
66 MaxOffCode = 256, /* biggest offset looked up in direct table */
67
68 EstLitBits = 8,
69 EstLenBits = 4,
rsccbeb0b22006-04-01 19:24:03 +000070 EstOffBits = 5
rscb6afd332003-11-23 18:19:18 +000071};
72
73/*
74 * knuth vol. 3 multiplicative hashing
75 * each byte x chosen according to rules
76 * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
77 * with reasonable spread between the bytes & their complements
78 *
79 * the 3 byte value appears to be as almost good as the 4 byte value,
80 * and might be faster on some machines
81 */
82/*
rsc00c6cee2006-06-07 23:25:39 +000083#define hashit(c) ((u32int)((c) * 0x6b43a9) >> (24 - HashLog))
rscb6afd332003-11-23 18:19:18 +000084*/
rsc00c6cee2006-06-07 23:25:39 +000085#define hashit(c) ((u32int)(((c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog))
rscb6afd332003-11-23 18:19:18 +000086
87/*
88 * lempel-ziv style compression state
89 */
90struct LZstate
91{
92 uchar hist[HistSize];
93 ulong pos; /* current location in history buffer */
94 ulong avail; /* data available after pos */
95 int eof;
96 ushort hash[HashSize]; /* hash chains */
97 ushort nexts[MaxOff];
98 int now; /* pos in hash chains */
99 int dot; /* dawn of time in history */
100 int prevlen; /* lazy matching state */
101 int prevoff;
102 int maxcheck; /* compressor tuning */
103
104 uchar obuf[DeflateOut];
105 uchar *out; /* current position in the output buffer */
106 uchar *eout;
107 ulong bits; /* bit shift register */
108 int nbits;
109 int rbad; /* got an error reading the buffer */
110 int wbad; /* got an error writing the buffer */
111 int (*w)(void*, void*, int);
112 void *wr;
113
114 ulong totr; /* total input size */
115 ulong totw; /* total output size */
116 int debug;
117};
118
119struct LZblock
120{
121 ushort parse[DeflateMaxBlock / 2 + 1];
122 int lastv; /* value being constucted for parse */
123 ulong litlencount[Nlitlen];
124 ulong offcount[Noff];
125 ushort *eparse; /* limit for parse table */
126 int bytes; /* consumed from the input */
127 int excost; /* cost of encoding extra len & off bits */
128};
129
130/*
131 * huffman code table
132 */
133struct Huff
134{
135 short bits; /* length of the code */
136 ushort encode; /* the code */
137};
138
139/*
140 * encoding of dynamic huffman trees
141 */
142struct Dyncode
143{
144 int nlit;
145 int noff;
146 int nclen;
147 int ncode;
148 Huff codetab[Nclen];
149 uchar codes[Nlitlen+Noff];
150 uchar codeaux[Nlitlen+Noff];
151};
152
153static int deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int));
154static int lzcomp(LZstate*, LZblock*, uchar*, ushort*, int finish);
155static void wrblock(LZstate*, int, ushort*, ushort*, Huff*, Huff*);
156static int bitcost(Huff*, ulong*, int);
157static int huffcodes(Dyncode*, Huff*, Huff*);
158static void wrdyncode(LZstate*, Dyncode*);
159static void lzput(LZstate*, ulong bits, int nbits);
160static void lzflushbits(LZstate*);
161static void lzflush(LZstate *lz);
162static void lzwrite(LZstate *lz, void *buf, int n);
163
164static int hufftabinit(Huff*, int, ulong*, int);
165static int mkgzprecode(Huff*, ulong *, int, int);
166
167static int mkprecode(Huff*, ulong *, int, int, ulong*);
168static void nextchain(Chains*, int);
169static void leafsort(ulong*, ushort*, int, int);
170
171/* conversion from len to code word */
172static int lencode[MaxMatch];
173
174/*
175 * conversion from off to code word
176 * off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7]
177*/
178static int offcode[MaxOffCode];
179static int bigoffcode[256];
180
181/* litlen code words LenStart-285 extra bits */
182static int litlenbase[Nlitlen-LenStart];
183static int litlenextra[Nlitlen-LenStart] =
184{
185/* 257 */ 0, 0, 0,
186/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
187/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
188/* 280 */ 4, 5, 5, 5, 5, 0, 0, 0
189};
190
191/* offset code word extra bits */
192static int offbase[Noff];
193static int offextra[] =
194{
195 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
196 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
197 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
198 0, 0,
199};
200
201/* order code lengths */
202static int clenorder[Nclen] =
203{
204 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
205};
206
207/* static huffman tables */
208static Huff litlentab[Nlitlen];
209static Huff offtab[Noff];
210static Huff hofftab[Noff];
211
212/* bit reversal for brain dead endian swap in huffman codes */
213static uchar revtab[256];
214static ulong nlits;
215static ulong nmatches;
216
217int
218deflateinit(void)
219{
220 ulong bitcount[MaxHuffBits];
221 int i, j, ci, n;
222
223 /* byte reverse table */
224 for(i=0; i<256; i++)
225 for(j=0; j<8; j++)
226 if(i & (1<<j))
227 revtab[i] |= 0x80 >> j;
228
229 /* static Litlen bit lengths */
230 for(i=0; i<144; i++)
231 litlentab[i].bits = 8;
232 for(i=144; i<256; i++)
233 litlentab[i].bits = 9;
234 for(i=256; i<280; i++)
235 litlentab[i].bits = 7;
236 for(i=280; i<Nlitlen; i++)
237 litlentab[i].bits = 8;
238
239 memset(bitcount, 0, sizeof(bitcount));
240 bitcount[8] += 144 - 0;
241 bitcount[9] += 256 - 144;
242 bitcount[7] += 280 - 256;
243 bitcount[8] += Nlitlen - 280;
244
245 if(!hufftabinit(litlentab, Nlitlen, bitcount, 9))
246 return FlateInternal;
247
248 /* static offset bit lengths */
249 for(i = 0; i < Noff; i++)
250 offtab[i].bits = 5;
251
252 memset(bitcount, 0, sizeof(bitcount));
253 bitcount[5] = Noff;
254
255 if(!hufftabinit(offtab, Noff, bitcount, 5))
256 return FlateInternal;
257
258 bitcount[0] = 0;
259 bitcount[1] = 0;
260 if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits))
261 return FlateInternal;
262
263 /* conversion tables for lens & offs to codes */
264 ci = 0;
265 for(i = LenStart; i < 286; i++){
266 n = ci + (1 << litlenextra[i - LenStart]);
267 litlenbase[i - LenStart] = ci;
268 for(; ci < n; ci++)
269 lencode[ci] = i;
270 }
271 /* patch up special case for len MaxMatch */
272 lencode[MaxMatch-MinMatch] = 285;
273 litlenbase[285-LenStart] = MaxMatch-MinMatch;
274
275 ci = 0;
276 for(i = 0; i < 16; i++){
277 n = ci + (1 << offextra[i]);
278 offbase[i] = ci;
279 for(; ci < n; ci++)
280 offcode[ci] = i;
281 }
282
283 ci = ci >> 7;
284 for(; i < 30; i++){
285 n = ci + (1 << (offextra[i] - 7));
286 offbase[i] = ci << 7;
287 for(; ci < n; ci++)
288 bigoffcode[ci] = i;
289 }
290 return FlateOk;
291}
292
293static void
294deflatereset(LZstate *lz, int level, int debug)
295{
296 memset(lz->nexts, 0, sizeof lz->nexts);
297 memset(lz->hash, 0, sizeof lz->hash);
298 lz->totr = 0;
299 lz->totw = 0;
300 lz->pos = 0;
301 lz->avail = 0;
302 lz->out = lz->obuf;
303 lz->eout = &lz->obuf[DeflateOut];
304 lz->prevlen = MinMatch - 1;
305 lz->prevoff = 0;
306 lz->now = MaxOff + 1;
307 lz->dot = lz->now;
308 lz->bits = 0;
309 lz->nbits = 0;
310 lz->maxcheck = (1 << level);
311 lz->maxcheck -= lz->maxcheck >> 2;
312 if(lz->maxcheck < 2)
313 lz->maxcheck = 2;
314 else if(lz->maxcheck > 1024)
315 lz->maxcheck = 1024;
316
317 lz->debug = debug;
318}
319
320int
321deflate(void *wr, int (*w)(void*, void*, int), void *rr, int (*r)(void*, void*, int), int level, int debug)
322{
323 LZstate *lz;
324 LZblock *lzb;
325 int ok;
326
327 lz = malloc(sizeof *lz + sizeof *lzb);
328 if(lz == nil)
329 return FlateNoMem;
330 lzb = (LZblock*)&lz[1];
331
332 deflatereset(lz, level, debug);
333 lz->w = w;
334 lz->wr = wr;
335 lz->wbad = 0;
336 lz->rbad = 0;
337 lz->eof = 0;
338 ok = FlateOk;
339 while(!lz->eof || lz->avail){
340 ok = deflateb(lz, lzb, rr, r);
341 if(ok != FlateOk)
342 break;
343 }
344 if(ok == FlateOk && lz->rbad)
345 ok = FlateInputFail;
346 if(ok == FlateOk && lz->wbad)
347 ok = FlateOutputFail;
348 free(lz);
349 return ok;
350}
351
352static int
353deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int))
354{
355 Dyncode dyncode, hdyncode;
356 Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen];
357 ulong litcount[Nlitlen];
358 long nunc, ndyn, nfix, nhuff;
359 uchar *slop, *hslop;
360 ulong ep;
361 int i, n, m, mm, nslop;
362
363 memset(lzb->litlencount, 0, sizeof lzb->litlencount);
364 memset(lzb->offcount, 0, sizeof lzb->offcount);
365 lzb->litlencount[DeflateEob]++;
366
367 lzb->bytes = 0;
368 lzb->eparse = lzb->parse;
369 lzb->lastv = 0;
370 lzb->excost = 0;
371
372 slop = &lz->hist[lz->pos];
373 n = lz->avail;
374 while(n < DeflateBlock && (!lz->eof || lz->avail)){
375 /*
376 * fill the buffer as much as possible,
377 * while leaving room for MaxOff history behind lz->pos,
378 * and not reading more than we can handle.
379 *
380 * make sure we read at least HistSlop bytes.
381 */
382 if(!lz->eof){
383 ep = lz->pos + lz->avail;
384 if(ep >= HistBlock)
385 ep -= HistBlock;
386 m = HistBlock - MaxOff - lz->avail;
387 if(m > HistBlock - n)
388 m = HistBlock - n;
389 if(m > (HistBlock + HistSlop) - ep)
390 m = (HistBlock + HistSlop) - ep;
391 if(m & ~(BlockSize - 1))
392 m &= ~(BlockSize - 1);
393
394 /*
395 * be nice to the caller: stop reads that are too small.
396 * can only get here when we've already filled the buffer some
397 */
398 if(m < HistSlop){
399 if(!m || !lzb->bytes)
400 return FlateInternal;
401 break;
402 }
403
404 mm = (*r)(rr, &lz->hist[ep], m);
405 if(mm > 0){
406 /*
407 * wrap data to end if we're read it from the beginning
408 * this way, we don't have to wrap searches.
409 *
410 * wrap reads past the end to the beginning.
411 * this way, we can guarantee minimum size reads.
412 */
413 if(ep < HistSlop)
414 memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep);
415 else if(ep + mm > HistBlock)
416 memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock);
417
418 lz->totr += mm;
419 n += mm;
420 lz->avail += mm;
421 }else{
422 if(mm < 0)
423 lz->rbad = 1;
424 lz->eof = 1;
425 }
426 }
427 ep = lz->pos + lz->avail;
428 if(ep > HistSize)
429 ep = HistSize;
430 if(lzb->bytes + ep - lz->pos > DeflateMaxBlock)
431 ep = lz->pos + DeflateMaxBlock - lzb->bytes;
432 m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof);
433 lzb->bytes += m;
434 lz->pos = (lz->pos + m) & (HistBlock - 1);
435 lz->avail -= m;
436 }
437 if(lzb->lastv)
438 *lzb->eparse++ = lzb->lastv;
439 if(lzb->eparse > lzb->parse + nelem(lzb->parse))
440 return FlateInternal;
441 nunc = lzb->bytes;
442
443 if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits)
444 || !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits))
445 return FlateInternal;
446
447 ndyn = huffcodes(&dyncode, dlitlentab, dofftab);
448 if(ndyn < 0)
449 return FlateInternal;
450 ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen)
451 + bitcost(dofftab, lzb->offcount, Noff)
452 + lzb->excost;
453
454 memset(litcount, 0, sizeof litcount);
455
456 nslop = nunc;
457 if(nslop > &lz->hist[HistSize] - slop)
458 nslop = &lz->hist[HistSize] - slop;
459
460 for(i = 0; i < nslop; i++)
461 litcount[slop[i]]++;
462 hslop = &lz->hist[HistSlop - nslop];
463 for(; i < nunc; i++)
464 litcount[hslop[i]]++;
465 litcount[DeflateEob]++;
466
467 if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits))
468 return FlateInternal;
469 nhuff = huffcodes(&hdyncode, hlitlentab, hofftab);
470 if(nhuff < 0)
471 return FlateInternal;
472 nhuff += bitcost(hlitlentab, litcount, Nlitlen);
473
474 nfix = bitcost(litlentab, lzb->litlencount, Nlitlen)
475 + bitcost(offtab, lzb->offcount, Noff)
476 + lzb->excost;
477
478 lzput(lz, lz->eof && !lz->avail, 1);
479
480 if(lz->debug){
481 fprint(2, "block: bytes=%lud entries=%ld extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n",
482 nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff);
483 fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail);
484 }
485
486 if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){
487 lzput(lz, DeflateUnc, 2);
488 lzflushbits(lz);
489
490 lzput(lz, nunc & 0xff, 8);
491 lzput(lz, (nunc >> 8) & 0xff, 8);
492 lzput(lz, ~nunc & 0xff, 8);
493 lzput(lz, (~nunc >> 8) & 0xff, 8);
494 lzflush(lz);
495
496 lzwrite(lz, slop, nslop);
497 lzwrite(lz, &lz->hist[HistSlop], nunc - nslop);
498 }else if(ndyn < nfix && ndyn < nhuff){
499 lzput(lz, DeflateDyn, 2);
500
501 wrdyncode(lz, &dyncode);
502 wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab);
503 lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits);
504 }else if(nhuff < nfix){
505 lzput(lz, DeflateDyn, 2);
506
507 wrdyncode(lz, &hdyncode);
508
509 m = 0;
510 for(i = nunc; i > MaxLitRun; i -= MaxLitRun)
511 lzb->parse[m++] = MaxLitRun;
512 lzb->parse[m++] = i;
513
514 wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab);
515 lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits);
516 }else{
517 lzput(lz, DeflateFix, 2);
518
519 wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab);
520 lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits);
521 }
522
523 if(lz->eof && !lz->avail){
524 lzflushbits(lz);
525 lzflush(lz);
526 }
527 return FlateOk;
528}
529
530static void
531lzwrite(LZstate *lz, void *buf, int n)
532{
533 int nw;
534
535 if(n && lz->w){
536 nw = (*lz->w)(lz->wr, buf, n);
537 if(nw != n){
rsca0f1e212004-04-20 02:03:38 +0000538 lz->w = 0;
rscb6afd332003-11-23 18:19:18 +0000539 lz->wbad = 1;
540 }else
541 lz->totw += n;
542 }
543}
544
545static void
546lzflush(LZstate *lz)
547{
548 lzwrite(lz, lz->obuf, lz->out - lz->obuf);
549 lz->out = lz->obuf;
550}
551
552static void
553lzput(LZstate *lz, ulong bits, int nbits)
554{
555 bits = (bits << lz->nbits) | lz->bits;
556 for(nbits += lz->nbits; nbits >= 8; nbits -= 8){
557 *lz->out++ = bits;
558 if(lz->out == lz->eout)
559 lzflush(lz);
560 bits >>= 8;
561 }
562 lz->bits = bits;
563 lz->nbits = nbits;
564}
565
566static void
567lzflushbits(LZstate *lz)
568{
569 if(lz->nbits)
570 lzput(lz, 0, 8 - (lz->nbits & 7));
571}
572
573/*
574 * write out a block of n samples,
575 * given lz encoding and counts for huffman tables
576 */
577static void
578wrblock(LZstate *out, int litoff, ushort *soff, ushort *eoff, Huff *litlentab, Huff *offtab)
579{
580 ushort *off;
581 int i, run, offset, lit, len, c;
582
583 if(out->debug > 2){
584 for(off = soff; off < eoff; ){
585 offset = *off++;
586 run = offset & MaxLitRun;
587 if(run){
588 for(i = 0; i < run; i++){
589 lit = out->hist[litoff & (HistBlock - 1)];
590 litoff++;
591 fprint(2, "\tlit %.2ux %c\n", lit, lit);
592 }
593 if(!(offset & LenFlag))
594 continue;
595 len = offset >> LenShift;
596 offset = *off++;
597 }else if(offset & LenFlag){
598 len = offset >> LenShift;
599 offset = *off++;
600 }else{
601 len = 0;
602 offset >>= LenShift;
603 }
604 litoff += len + MinMatch;
605 fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch);
606 }
607 }
608
609 for(off = soff; off < eoff; ){
610 offset = *off++;
611 run = offset & MaxLitRun;
612 if(run){
613 for(i = 0; i < run; i++){
614 lit = out->hist[litoff & (HistBlock - 1)];
615 litoff++;
616 lzput(out, litlentab[lit].encode, litlentab[lit].bits);
617 }
618 if(!(offset & LenFlag))
619 continue;
620 len = offset >> LenShift;
621 offset = *off++;
622 }else if(offset & LenFlag){
623 len = offset >> LenShift;
624 offset = *off++;
625 }else{
626 len = 0;
627 offset >>= LenShift;
628 }
629 litoff += len + MinMatch;
630 c = lencode[len];
631 lzput(out, litlentab[c].encode, litlentab[c].bits);
632 c -= LenStart;
633 if(litlenextra[c])
634 lzput(out, len - litlenbase[c], litlenextra[c]);
635
636 if(offset < MaxOffCode)
637 c = offcode[offset];
638 else
639 c = bigoffcode[offset >> 7];
640 lzput(out, offtab[c].encode, offtab[c].bits);
641 if(offextra[c])
642 lzput(out, offset - offbase[c], offextra[c]);
643 }
644}
645
646/*
647 * look for the longest, closest string which matches
648 * the next prefix. the clever part here is looking for
649 * a string 1 longer than the previous best match.
650 *
651 * follows the recommendation of limiting number of chains
652 * which are checked. this appears to be the best heuristic.
653 */
654static int
655lzmatch(int now, int then, uchar *p, uchar *es, ushort *nexts, uchar *hist, int runlen, int check, int *m)
656{
657 uchar *s, *t;
658 int ml, off, last;
659
660 ml = check;
661 if(runlen >= 8)
662 check >>= 2;
663 *m = 0;
664 if(p + runlen >= es)
665 return runlen;
666 last = 0;
667 for(; check-- > 0; then = nexts[then & (MaxOff-1)]){
668 off = (ushort)(now - then);
669 if(off <= last || off > MaxOff)
670 break;
671 s = p + runlen;
672 t = hist + (((p - hist) - off) & (HistBlock-1));
673 t += runlen;
674 for(; s >= p; s--){
675 if(*s != *t)
676 goto matchloop;
677 t--;
678 }
679
680 /*
681 * we have a new best match.
682 * extend it to it's maximum length
683 */
684 t += runlen + 2;
685 s += runlen + 2;
686 for(; s < es; s++){
687 if(*s != *t)
688 break;
689 t++;
690 }
691 runlen = s - p;
692 *m = off - 1;
693 if(s == es || runlen > ml)
694 break;
695matchloop:;
696 last = off;
697 }
698 return runlen;
699}
700
701static int
702lzcomp(LZstate *lz, LZblock *lzb, uchar *ep, ushort *parse, int finish)
703{
704 ulong cont, excost, *litlencount, *offcount;
705 uchar *p, *q, *s, *es;
706 ushort *nexts, *hash;
707 int v, i, h, runlen, n, now, then, m, prevlen, prevoff, maxdefer;
708
709 litlencount = lzb->litlencount;
710 offcount = lzb->offcount;
711 nexts = lz->nexts;
712 hash = lz->hash;
713 now = lz->now;
714
715 p = &lz->hist[lz->pos];
716 if(lz->prevlen != MinMatch - 1)
717 p++;
718
719 /*
720 * hash in the links for any hanging link positions,
721 * and calculate the hash for the current position.
722 */
723 n = MinMatch;
724 if(n > ep - p)
725 n = ep - p;
726 cont = 0;
727 for(i = 0; i < n - 1; i++){
728 m = now - ((MinMatch-1) - i);
729 if(m < lz->dot)
730 continue;
731 s = lz->hist + (((p - lz->hist) - (now - m)) & (HistBlock-1));
732
733 cont = (s[0] << 16) | (s[1] << 8) | s[2];
734 h = hashit(cont);
735 prevoff = 0;
736 for(then = hash[h]; ; then = nexts[then & (MaxOff-1)]){
737 v = (ushort)(now - then);
738 if(v <= prevoff || v >= (MinMatch-1) - i)
739 break;
740 prevoff = v;
741 }
742 if(then == (ushort)m)
743 continue;
744 nexts[m & (MaxOff-1)] = hash[h];
745 hash[h] = m;
746 }
747 for(i = 0; i < n; i++)
748 cont = (cont << 8) | p[i];
749
750 /*
751 * now must point to the index in the nexts array
752 * corresponding to p's position in the history
753 */
754 prevlen = lz->prevlen;
755 prevoff = lz->prevoff;
756 maxdefer = lz->maxcheck >> 2;
757 excost = 0;
758 v = lzb->lastv;
759 for(;;){
760 es = p + MaxMatch;
761 if(es > ep){
762 if(!finish || p >= ep)
763 break;
764 es = ep;
765 }
766
767 h = hashit(cont);
768 runlen = lzmatch(now, hash[h], p, es, nexts, lz->hist, prevlen, lz->maxcheck, &m);
769
770 /*
771 * back out of small matches too far in the past
772 */
773 if(runlen == MinMatch && m >= MinMatchMaxOff){
774 runlen = MinMatch - 1;
775 m = 0;
776 }
777
778 /*
779 * record the encoding and increment counts for huffman trees
780 * if we get a match, defer selecting it until we check for
781 * a longer match at the next position.
782 */
783 if(prevlen >= runlen && prevlen != MinMatch - 1){
784 /*
785 * old match at least as good; use that one
786 */
787 n = prevlen - MinMatch;
788 if(v || n){
789 *parse++ = v | LenFlag | (n << LenShift);
790 *parse++ = prevoff;
791 }else
792 *parse++ = prevoff << LenShift;
793 v = 0;
794
795 n = lencode[n];
796 litlencount[n]++;
797 excost += litlenextra[n - LenStart];
798
799 if(prevoff < MaxOffCode)
800 n = offcode[prevoff];
801 else
802 n = bigoffcode[prevoff >> 7];
803 offcount[n]++;
804 excost += offextra[n];
805
806 runlen = prevlen - 1;
807 prevlen = MinMatch - 1;
808 nmatches++;
809 }else if(runlen == MinMatch - 1){
810 /*
811 * no match; just put out the literal
812 */
813 if(++v == MaxLitRun){
814 *parse++ = v;
815 v = 0;
816 }
817 litlencount[*p]++;
818 nlits++;
819 runlen = 1;
820 }else{
821 if(prevlen != MinMatch - 1){
822 /*
823 * longer match now. output previous literal,
824 * update current match, and try again
825 */
826 if(++v == MaxLitRun){
827 *parse++ = v;
828 v = 0;
829 }
830 litlencount[p[-1]]++;
831 nlits++;
832 }
833
834 prevoff = m;
835
836 if(runlen < maxdefer){
837 prevlen = runlen;
838 runlen = 1;
839 }else{
840 n = runlen - MinMatch;
841 if(v || n){
842 *parse++ = v | LenFlag | (n << LenShift);
843 *parse++ = prevoff;
844 }else
845 *parse++ = prevoff << LenShift;
846 v = 0;
847
848 n = lencode[n];
849 litlencount[n]++;
850 excost += litlenextra[n - LenStart];
851
852 if(prevoff < MaxOffCode)
853 n = offcode[prevoff];
854 else
855 n = bigoffcode[prevoff >> 7];
856 offcount[n]++;
857 excost += offextra[n];
858
859 prevlen = MinMatch - 1;
860 nmatches++;
861 }
862 }
863
864 /*
865 * update the hash for the newly matched data
866 * this is constructed so the link for the old
867 * match in this position must be at the end of a chain,
868 * and will expire when this match is added, ie it will
869 * never be examined by the match loop.
870 * add to the hash chain only if we have the real hash data.
871 */
872 for(q = p + runlen; p != q; p++){
873 if(p + MinMatch <= ep){
874 h = hashit(cont);
875 nexts[now & (MaxOff-1)] = hash[h];
876 hash[h] = now;
877 if(p + MinMatch < ep)
878 cont = (cont << 8) | p[MinMatch];
879 }
880 now++;
881 }
882 }
883
884 /*
885 * we can just store away the lazy state and
886 * pick it up next time. the last block will have finish set
887 * so we won't have any pending matches
888 * however, we need to correct for how much we've encoded
889 */
890 if(prevlen != MinMatch - 1)
891 p--;
892
893 lzb->excost += excost;
894 lzb->eparse = parse;
895 lzb->lastv = v;
896
897 lz->now = now;
898 lz->prevlen = prevlen;
899 lz->prevoff = prevoff;
900
901 return p - &lz->hist[lz->pos];
902}
903
904/*
905 * make up the dynamic code tables, and return the number of bits
906 * needed to transmit them.
907 */
908static int
909huffcodes(Dyncode *dc, Huff *littab, Huff *offtab)
910{
911 Huff *codetab;
912 uchar *codes, *codeaux;
913 ulong codecount[Nclen], excost;
914 int i, n, m, v, c, nlit, noff, ncode, nclen;
915
916 codetab = dc->codetab;
917 codes = dc->codes;
918 codeaux = dc->codeaux;
919
920 /*
921 * trim the sizes of the tables
922 */
923 for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--)
924 ;
925 for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--)
926 ;
927
928 /*
929 * make the code-length code
930 */
931 for(i = 0; i < nlit; i++)
932 codes[i] = littab[i].bits;
933 for(i = 0; i < noff; i++)
934 codes[i + nlit] = offtab[i].bits;
935
936 /*
937 * run-length compress the code-length code
938 */
939 excost = 0;
940 c = 0;
941 ncode = nlit+noff;
942 for(i = 0; i < ncode; ){
943 n = i + 1;
944 v = codes[i];
945 while(n < ncode && v == codes[n])
946 n++;
947 n -= i;
948 i += n;
949 if(v == 0){
950 while(n >= 11){
951 m = n;
952 if(m > 138)
953 m = 138;
954 codes[c] = 18;
955 codeaux[c++] = m - 11;
956 n -= m;
957 excost += 7;
958 }
959 if(n >= 3){
960 codes[c] = 17;
961 codeaux[c++] = n - 3;
962 n = 0;
963 excost += 3;
964 }
965 }
966 while(n--){
967 codes[c++] = v;
968 while(n >= 3){
969 m = n;
970 if(m > 6)
971 m = 6;
972 codes[c] = 16;
973 codeaux[c++] = m - 3;
974 n -= m;
975 excost += 3;
976 }
977 }
978 }
979
980 memset(codecount, 0, sizeof codecount);
981 for(i = 0; i < c; i++)
982 codecount[codes[i]]++;
983 if(!mkgzprecode(codetab, codecount, Nclen, 8))
984 return -1;
985
986 for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--)
987 ;
988
989 dc->nlit = nlit;
990 dc->noff = noff;
991 dc->nclen = nclen;
992 dc->ncode = c;
993
994 return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost;
995}
996
997static void
998wrdyncode(LZstate *out, Dyncode *dc)
999{
1000 Huff *codetab;
1001 uchar *codes, *codeaux;
1002 int i, v, c;
1003
1004 /*
1005 * write out header, then code length code lengths,
1006 * and code lengths
1007 */
1008 lzput(out, dc->nlit-257, 5);
1009 lzput(out, dc->noff-1, 5);
1010 lzput(out, dc->nclen-4, 4);
1011
1012 codetab = dc->codetab;
1013 for(i = 0; i < dc->nclen; i++)
1014 lzput(out, codetab[clenorder[i]].bits, 3);
1015
1016 codes = dc->codes;
1017 codeaux = dc->codeaux;
1018 c = dc->ncode;
1019 for(i = 0; i < c; i++){
1020 v = codes[i];
1021 lzput(out, codetab[v].encode, codetab[v].bits);
1022 if(v >= 16){
1023 if(v == 16)
1024 lzput(out, codeaux[i], 2);
1025 else if(v == 17)
1026 lzput(out, codeaux[i], 3);
1027 else /* v == 18 */
1028 lzput(out, codeaux[i], 7);
1029 }
1030 }
1031}
1032
1033static int
1034bitcost(Huff *tab, ulong *count, int n)
1035{
1036 ulong tot;
1037 int i;
1038
1039 tot = 0;
1040 for(i = 0; i < n; i++)
1041 tot += count[i] * tab[i].bits;
1042 return tot;
1043}
1044
1045static int
1046mkgzprecode(Huff *tab, ulong *count, int n, int maxbits)
1047{
1048 ulong bitcount[MaxHuffBits];
1049 int i, nbits;
1050
1051 nbits = mkprecode(tab, count, n, maxbits, bitcount);
1052 for(i = 0; i < n; i++){
1053 if(tab[i].bits == -1)
1054 tab[i].bits = 0;
1055 else if(tab[i].bits == 0){
1056 if(nbits != 0 || bitcount[0] != 1)
1057 return 0;
1058 bitcount[1] = 1;
1059 bitcount[0] = 0;
1060 nbits = 1;
1061 tab[i].bits = 1;
1062 }
1063 }
1064 if(bitcount[0] != 0)
1065 return 0;
1066 return hufftabinit(tab, n, bitcount, nbits);
1067}
1068
1069static int
1070hufftabinit(Huff *tab, int n, ulong *bitcount, int nbits)
1071{
1072 ulong code, nc[MaxHuffBits];
1073 int i, bits;
1074
1075 code = 0;
1076 for(bits = 1; bits <= nbits; bits++){
1077 code = (code + bitcount[bits-1]) << 1;
1078 nc[bits] = code;
1079 }
1080
1081 for(i = 0; i < n; i++){
1082 bits = tab[i].bits;
1083 if(bits){
1084 code = nc[bits]++ << (16 - bits);
1085 if(code & ~0xffff)
1086 return 0;
1087 tab[i].encode = revtab[code >> 8] | (revtab[code & 0xff] << 8);
1088 }
1089 }
1090 return 1;
1091}
1092
1093
1094/*
1095 * this should be in a library
1096 */
1097struct Chain
1098{
1099 ulong count; /* occurances of everything in the chain */
1100 ushort leaf; /* leaves to the left of chain, or leaf value */
1101 char col; /* ref count for collecting unused chains */
1102 char gen; /* need to generate chains for next lower level */
1103 Chain *up; /* Chain up in the lists */
1104};
1105
1106struct Chains
1107{
1108 Chain *lists[(MaxHuffBits - 1) * 2];
1109 ulong leafcount[MaxLeaf]; /* sorted list of leaf counts */
1110 ushort leafmap[MaxLeaf]; /* map to actual leaf number */
1111 int nleaf; /* number of leaves */
1112 Chain chains[ChainMem];
1113 Chain *echains;
1114 Chain *free;
1115 char col;
1116 int nlists;
1117};
1118
1119/*
1120 * fast, low space overhead algorithm for max depth huffman type codes
1121 *
1122 * J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical
1123 * algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms
1124 * and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer
1125 * Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds.,
1126 * pp 12-21, Springer Verlag, New York, 1995.
1127 */
1128static int
1129mkprecode(Huff *tab, ulong *count, int n, int maxbits, ulong *bitcount)
1130{
1131 Chains cs;
1132 Chain *c;
1133 int i, m, em, bits;
1134
Russ Cox995e5702009-04-30 07:24:53 -07001135 memset(&cs, 0, sizeof cs);
1136
rscb6afd332003-11-23 18:19:18 +00001137 /*
1138 * set up the sorted list of leaves
1139 */
1140 m = 0;
1141 for(i = 0; i < n; i++){
1142 tab[i].bits = -1;
1143 tab[i].encode = 0;
1144 if(count[i] != 0){
1145 cs.leafcount[m] = count[i];
1146 cs.leafmap[m] = i;
1147 m++;
1148 }
1149 }
1150 if(m < 2){
1151 if(m != 0){
1152 tab[cs.leafmap[0]].bits = 0;
1153 bitcount[0] = 1;
1154 }else
1155 bitcount[0] = 0;
1156 return 0;
1157 }
1158 cs.nleaf = m;
1159 leafsort(cs.leafcount, cs.leafmap, 0, m);
1160
1161 for(i = 0; i < m; i++)
1162 cs.leafcount[i] = count[cs.leafmap[i]];
1163
1164 /*
1165 * set up free list
1166 */
1167 cs.free = &cs.chains[2];
1168 cs.echains = &cs.chains[ChainMem];
1169 cs.col = 1;
1170
1171 /*
1172 * initialize chains for each list
1173 */
1174 c = &cs.chains[0];
1175 c->count = cs.leafcount[0];
1176 c->leaf = 1;
1177 c->col = cs.col;
1178 c->up = nil;
1179 c->gen = 0;
1180 cs.chains[1] = cs.chains[0];
1181 cs.chains[1].leaf = 2;
1182 cs.chains[1].count = cs.leafcount[1];
1183 for(i = 0; i < maxbits-1; i++){
1184 cs.lists[i * 2] = &cs.chains[0];
1185 cs.lists[i * 2 + 1] = &cs.chains[1];
1186 }
1187
1188 cs.nlists = 2 * (maxbits - 1);
1189 m = 2 * m - 2;
1190 for(i = 2; i < m; i++)
1191 nextchain(&cs, cs.nlists - 2);
1192
1193 bits = 0;
1194 bitcount[0] = cs.nleaf;
1195 for(c = cs.lists[cs.nlists - 1]; c != nil; c = c->up){
1196 m = c->leaf;
1197 bitcount[bits++] -= m;
1198 bitcount[bits] = m;
1199 }
1200 m = 0;
1201 for(i = bits; i >= 0; i--)
1202 for(em = m + bitcount[i]; m < em; m++)
1203 tab[cs.leafmap[m]].bits = i;
1204
1205 return bits;
1206}
1207
1208/*
1209 * calculate the next chain on the list
1210 * we can always toss out the old chain
1211 */
1212static void
1213nextchain(Chains *cs, int list)
1214{
1215 Chain *c, *oc;
1216 int i, nleaf, sumc;
1217
1218 oc = cs->lists[list + 1];
1219 cs->lists[list] = oc;
1220 if(oc == nil)
1221 return;
1222
1223 /*
1224 * make sure we have all chains needed to make sumc
1225 * note it is possible to generate only one of these,
1226 * use twice that value for sumc, and then generate
1227 * the second if that preliminary sumc would be chosen.
1228 * however, this appears to be slower on current tests
1229 */
1230 if(oc->gen){
1231 nextchain(cs, list - 2);
1232 nextchain(cs, list - 2);
1233 oc->gen = 0;
1234 }
1235
1236 /*
1237 * pick up the chain we're going to add;
1238 * collect unused chains no free ones are left
1239 */
1240 for(c = cs->free; ; c++){
1241 if(c >= cs->echains){
1242 cs->col++;
1243 for(i = 0; i < cs->nlists; i++)
1244 for(c = cs->lists[i]; c != nil; c = c->up)
1245 c->col = cs->col;
1246 c = cs->chains;
1247 }
1248 if(c->col != cs->col)
1249 break;
1250 }
1251
1252 /*
1253 * pick the cheapest of
1254 * 1) the next package from the previous list
1255 * 2) the next leaf
1256 */
1257 nleaf = oc->leaf;
1258 sumc = 0;
1259 if(list > 0 && cs->lists[list-1] != nil)
1260 sumc = cs->lists[list-2]->count + cs->lists[list-1]->count;
1261 if(sumc != 0 && (nleaf >= cs->nleaf || cs->leafcount[nleaf] > sumc)){
1262 c->count = sumc;
1263 c->leaf = oc->leaf;
1264 c->up = cs->lists[list-1];
1265 c->gen = 1;
1266 }else if(nleaf >= cs->nleaf){
1267 cs->lists[list + 1] = nil;
1268 return;
1269 }else{
1270 c->leaf = nleaf + 1;
1271 c->count = cs->leafcount[nleaf];
1272 c->up = oc->up;
1273 c->gen = 0;
1274 }
1275 cs->free = c + 1;
1276
1277 cs->lists[list + 1] = c;
1278 c->col = cs->col;
1279}
1280
1281static int
1282pivot(ulong *c, int a, int n)
1283{
1284 int j, pi, pj, pk;
1285
1286 j = n/6;
1287 pi = a + j; /* 1/6 */
1288 j += j;
1289 pj = pi + j; /* 1/2 */
1290 pk = pj + j; /* 5/6 */
1291 if(c[pi] < c[pj]){
1292 if(c[pi] < c[pk]){
1293 if(c[pj] < c[pk])
1294 return pj;
1295 return pk;
1296 }
1297 return pi;
1298 }
1299 if(c[pj] < c[pk]){
1300 if(c[pi] < c[pk])
1301 return pi;
1302 return pk;
1303 }
1304 return pj;
1305}
1306
1307static void
1308leafsort(ulong *leafcount, ushort *leafmap, int a, int n)
1309{
1310 ulong t;
1311 int j, pi, pj, pn;
1312
1313 while(n > 1){
1314 if(n > 10){
1315 pi = pivot(leafcount, a, n);
1316 }else
1317 pi = a + (n>>1);
1318
1319 t = leafcount[pi];
1320 leafcount[pi] = leafcount[a];
1321 leafcount[a] = t;
1322 t = leafmap[pi];
1323 leafmap[pi] = leafmap[a];
1324 leafmap[a] = t;
1325 pi = a;
1326 pn = a + n;
1327 pj = pn;
1328 for(;;){
1329 do
1330 pi++;
1331 while(pi < pn && (leafcount[pi] < leafcount[a] || leafcount[pi] == leafcount[a] && leafmap[pi] > leafmap[a]));
1332 do
1333 pj--;
1334 while(pj > a && (leafcount[pj] > leafcount[a] || leafcount[pj] == leafcount[a] && leafmap[pj] < leafmap[a]));
1335 if(pj < pi)
1336 break;
1337 t = leafcount[pi];
1338 leafcount[pi] = leafcount[pj];
1339 leafcount[pj] = t;
1340 t = leafmap[pi];
1341 leafmap[pi] = leafmap[pj];
1342 leafmap[pj] = t;
1343 }
1344 t = leafcount[a];
1345 leafcount[a] = leafcount[pj];
1346 leafcount[pj] = t;
1347 t = leafmap[a];
1348 leafmap[a] = leafmap[pj];
1349 leafmap[pj] = t;
1350 j = pj - a;
1351
1352 n = n-j-1;
1353 if(j >= n){
1354 leafsort(leafcount, leafmap, a, j);
1355 a += j+1;
1356 }else{
1357 leafsort(leafcount, leafmap, a + (j+1), n);
1358 n = j;
1359 }
1360 }
1361}