Wise&mystical  1.0
Project about Europe
Loading...
Searching...
No Matches
sdefl.h
Go to the documentation of this file.
1/*# Small Deflate
2`sdefl` is a small bare bone lossless compression library in ANSI C (ISO C90)
3which implements the Deflate (RFC 1951) compressed data format specification standard.
4It is mainly tuned to get as much speed and compression ratio from as little code
5as needed to keep the implementation as concise as possible.
6
7## Features
8- Portable single header and source file duo written in ANSI C (ISO C90)
9- Dual license with either MIT or public domain
10- Small implementation
11 - Deflate: 525 LoC
12 - Inflate: 320 LoC
13- Webassembly:
14 - Deflate ~3.7 KB (~2.2KB compressed)
15 - Inflate ~3.6 KB (~2.2KB compressed)
16
17## Usage:
18This file behaves differently depending on what symbols you define
19before including it.
20
21Header-File mode:
22If you do not define `SDEFL_IMPLEMENTATION` before including this file, it
23will operate in header only mode. In this mode it declares all used structs
24and the API of the library without including the implementation of the library.
25
26Implementation mode:
27If you define `SDEFL_IMPLEMENTATION` before including this file, it will
28compile the implementation . Make sure that you only include
29this file implementation in *one* C or C++ file to prevent collisions.
30
31### Benchmark
32
33| Compressor name | Compression| Decompress.| Compr. size | Ratio |
34| ------------------------| -----------| -----------| ----------- | ----- |
35| miniz 1.0 -1 | 122 MB/s | 208 MB/s | 48510028 | 48.51 |
36| miniz 1.0 -6 | 27 MB/s | 260 MB/s | 36513697 | 36.51 |
37| miniz 1.0 -9 | 23 MB/s | 261 MB/s | 36460101 | 36.46 |
38| zlib 1.2.11 -1 | 72 MB/s | 307 MB/s | 42298774 | 42.30 |
39| zlib 1.2.11 -6 | 24 MB/s | 313 MB/s | 36548921 | 36.55 |
40| zlib 1.2.11 -9 | 20 MB/s | 314 MB/s | 36475792 | 36.48 |
41| sdefl 1.0 -0 | 127 MB/s | 371 MB/s | 40004116 | 39.88 |
42| sdefl 1.0 -1 | 111 MB/s | 398 MB/s | 38940674 | 38.82 |
43| sdefl 1.0 -5 | 45 MB/s | 420 MB/s | 36577183 | 36.46 |
44| sdefl 1.0 -7 | 38 MB/s | 423 MB/s | 36523781 | 36.41 |
45| libdeflate 1.3 -1 | 147 MB/s | 667 MB/s | 39597378 | 39.60 |
46| libdeflate 1.3 -6 | 69 MB/s | 689 MB/s | 36648318 | 36.65 |
47| libdeflate 1.3 -9 | 13 MB/s | 672 MB/s | 35197141 | 35.20 |
48| libdeflate 1.3 -12 | 8.13 MB/s | 670 MB/s | 35100568 | 35.10 |
49
50### Compression
51Results on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia):
52
53| File | Original | `sdefl 0` | `sdefl 5` | `sdefl 7` |
54| :------ | ---------: | -----------------: | ---------: | ----------: |
55| dickens | 10.192.446 | 4,260,187| 3,845,261| 3,833,657 |
56| mozilla | 51.220.480 | 20,774,706 | 19,607,009 | 19,565,867 |
57| mr | 9.970.564 | 3,860,531 | 3,673,460 | 3,665,627 |
58| nci | 33.553.445 | 4,030,283 | 3,094,526 | 3,006,075 |
59| ooffice | 6.152.192 | 3,320,063 | 3,186,373 | 3,183,815 |
60| osdb | 10.085.684 | 3,919,646 | 3,649,510 | 3,649,477 |
61| reymont | 6.627.202 | 2,263,378 | 1,857,588 | 1,827,237 |
62| samba | 21.606.400 | 6,121,797 | 5,462,670 | 5,450,762 |
63| sao | 7.251.944 | 5,612,421 | 5,485,380 | 5,481,765 |
64| webster | 41.458.703 | 13,972,648 | 12,059,432 | 11,991,421 |
65| xml | 5.345.280 | 886,620| 674,009 | 662,141 |
66| x-ray | 8.474.240 | 6,304,655 | 6,244,779 | 6,244,779 |
67
68## License
69```
70------------------------------------------------------------------------------
71This software is available under 2 licenses -- choose whichever you prefer.
72------------------------------------------------------------------------------
73ALTERNATIVE A - MIT License
74Copyright (c) 2020 Micha Mettke
75Permission is hereby granted, free of charge, to any person obtaining a copy of
76this software and associated documentation files (the "Software"), to deal in
77the Software without restriction, including without limitation the rights to
78use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
79of the Software, and to permit persons to whom the Software is furnished to do
80so, subject to the following conditions:
81The above copyright notice and this permission notice shall be included in all
82copies or substantial portions of the Software.
83THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
84IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
85FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
86AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
87LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
88OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
89SOFTWARE.
90------------------------------------------------------------------------------
91ALTERNATIVE B - Public Domain (www.unlicense.org)
92This is free and unencumbered software released into the public domain.
93Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
94software, either in source code form or as a compiled binary, for any purpose,
95commercial or non-commercial, and by any means.
96In jurisdictions that recognize copyright laws, the author or authors of this
97software dedicate any and all copyright interest in the software to the public
98domain. We make this dedication for the benefit of the public at large and to
99the detriment of our heirs and successors. We intend this dedication to be an
100overt act of relinquishment in perpetuity of all present and future rights to
101this software under copyright law.
102THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
103IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
104FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
105AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
106ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
107WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
108------------------------------------------------------------------------------
109```
110*/
111#ifndef SDEFL_H_INCLUDED
112#define SDEFL_H_INCLUDED
113
114#ifdef __cplusplus
115extern "C" {
116#endif
117
118#define SDEFL_MAX_OFF (1 << 15)
119#define SDEFL_WIN_SIZ SDEFL_MAX_OFF
120#define SDEFL_WIN_MSK (SDEFL_WIN_SIZ-1)
121
122#define SDEFL_HASH_BITS 15
123#define SDEFL_HASH_SIZ (1 << SDEFL_HASH_BITS)
124#define SDEFL_HASH_MSK (SDEFL_HASH_SIZ-1)
125
126#define SDEFL_MIN_MATCH 4
127#define SDEFL_BLK_MAX (256*1024)
128#define SDEFL_SEQ_SIZ ((SDEFL_BLK_MAX + SDEFL_MIN_MATCH)/SDEFL_MIN_MATCH)
129
130#define SDEFL_SYM_MAX (288)
131#define SDEFL_OFF_MAX (32)
132#define SDEFL_PRE_MAX (19)
133
134#define SDEFL_LVL_MIN 0
135#define SDEFL_LVL_DEF 5
136#define SDEFL_LVL_MAX 8
137
141};
145};
147 unsigned char lit[SDEFL_SYM_MAX];
148 unsigned char off[SDEFL_OFF_MAX];
149};
153};
155 int off, len;
156};
157struct sdefl {
161
166};
167extern int sdefl_bound(int in_len);
168extern int sdeflate(struct sdefl *s, void *o, const void *i, int n, int lvl);
169extern int zsdeflate(struct sdefl *s, void *o, const void *i, int n, int lvl);
170
171#ifdef __cplusplus
172}
173#endif
174
175#endif /* SDEFL_H_INCLUDED */
176
177#ifdef SDEFL_IMPLEMENTATION
178
179#include <assert.h> /* assert */
180#include <string.h> /* memcpy */
181#include <limits.h> /* CHAR_BIT */
182
183#define SDEFL_NIL (-1)
184#define SDEFL_MAX_MATCH 258
185#define SDEFL_MAX_CODE_LEN (15)
186#define SDEFL_SYM_BITS (10u)
187#define SDEFL_SYM_MSK ((1u << SDEFL_SYM_BITS)-1u)
188#define SDEFL_LIT_LEN_CODES (14)
189#define SDEFL_OFF_CODES (15)
190#define SDEFL_PRE_CODES (7)
191#define SDEFL_CNT_NUM(n) ((((n)+3u/4u)+3u)&~3u)
192#define SDEFL_EOB (256)
193
194#define sdefl_npow2(n) (1 << (sdefl_ilog2((n)-1) + 1))
195
196static int
197sdefl_ilog2(int n) {
198 if (!n) return 0;
199#ifdef _MSC_VER
200 unsigned long msbp = 0;
201 _BitScanReverse(&msbp, (unsigned long)n);
202 return (int)msbp;
203#elif defined(__GNUC__) || defined(__clang__)
204 return (int)sizeof(unsigned long) * CHAR_BIT - 1 - __builtin_clzl((unsigned long)n);
205#else
206 #define lt(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
207 static const char tbl[256] = {
208 0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,lt(4), lt(5), lt(5), lt(6), lt(6), lt(6), lt(6),
209 lt(7), lt(7), lt(7), lt(7), lt(7), lt(7), lt(7), lt(7)};
210 int tt, t;
211 if ((tt = (n >> 16))) {
212 return (t = (tt >> 8)) ? 24 + tbl[t] : 16 + tbl[tt];
213 } else {
214 return (t = (n >> 8)) ? 8 + tbl[t] : tbl[n];
215 }
216 #undef lt
217#endif
218}
219static unsigned
220sdefl_uload32(const void *p) {
221 /* hopefully will be optimized to an unaligned read */
222 unsigned n = 0;
223 memcpy(&n, p, sizeof(n));
224 return n;
225}
226static unsigned
227sdefl_hash32(const void *p) {
228 unsigned n = sdefl_uload32(p);
229 return (n * 0x9E377989) >> (32 - SDEFL_HASH_BITS);
230}
231static void
232sdefl_put(unsigned char **dst, struct sdefl *s, int code, int bitcnt) {
233 s->bits |= (code << s->bitcnt);
234 s->bitcnt += bitcnt;
235 while (s->bitcnt >= 8) {
236 unsigned char *tar = *dst;
237 *tar = (unsigned char)(s->bits & 0xFF);
238 s->bits >>= 8;
239 s->bitcnt -= 8;
240 *dst = *dst + 1;
241 }
242}
243static void
244sdefl_heap_sub(unsigned A[], unsigned len, unsigned sub) {
245 unsigned c, p = sub;
246 unsigned v = A[sub];
247 while ((c = p << 1) <= len) {
248 if (c < len && A[c + 1] > A[c]) c++;
249 if (v >= A[c]) break;
250 A[p] = A[c], p = c;
251 }
252 A[p] = v;
253}
254static void
255sdefl_heap_array(unsigned *A, unsigned len) {
256 unsigned sub;
257 for (sub = len >> 1; sub >= 1; sub--)
258 sdefl_heap_sub(A, len, sub);
259}
260static void
261sdefl_heap_sort(unsigned *A, unsigned n) {
262 A--;
263 sdefl_heap_array(A, n);
264 while (n >= 2) {
265 unsigned tmp = A[n];
266 A[n--] = A[1];
267 A[1] = tmp;
268 sdefl_heap_sub(A, n, 1);
269 }
270}
271static unsigned
272sdefl_sort_sym(unsigned sym_cnt, unsigned *freqs,
273 unsigned char *lens, unsigned *sym_out) {
274 unsigned cnts[SDEFL_CNT_NUM(SDEFL_SYM_MAX)] = {0};
275 unsigned cnt_num = SDEFL_CNT_NUM(sym_cnt);
276 unsigned used_sym = 0;
277 unsigned sym, i;
278 for (sym = 0; sym < sym_cnt; sym++)
279 cnts[freqs[sym] < cnt_num-1 ? freqs[sym]: cnt_num-1]++;
280 for (i = 1; i < cnt_num; i++) {
281 unsigned cnt = cnts[i];
282 cnts[i] = used_sym;
283 used_sym += cnt;
284 }
285 for (sym = 0; sym < sym_cnt; sym++) {
286 unsigned freq = freqs[sym];
287 if (freq) {
288 unsigned idx = freq < cnt_num-1 ? freq : cnt_num-1;
289 sym_out[cnts[idx]++] = sym | (freq << SDEFL_SYM_BITS);
290 } else lens[sym] = 0;
291 }
292 sdefl_heap_sort(sym_out + cnts[cnt_num-2], cnts[cnt_num-1] - cnts[cnt_num-2]);
293 return used_sym;
294}
295static void
296sdefl_build_tree(unsigned *A, unsigned sym_cnt) {
297 unsigned i = 0, b = 0, e = 0;
298 do {
299 unsigned m, n, freq_shift;
300 if (i != sym_cnt && (b == e || (A[i] >> SDEFL_SYM_BITS) <= (A[b] >> SDEFL_SYM_BITS)))
301 m = i++;
302 else m = b++;
303 if (i != sym_cnt && (b == e || (A[i] >> SDEFL_SYM_BITS) <= (A[b] >> SDEFL_SYM_BITS)))
304 n = i++;
305 else n = b++;
306
307 freq_shift = (A[m] & ~SDEFL_SYM_MSK) + (A[n] & ~SDEFL_SYM_MSK);
308 A[m] = (A[m] & SDEFL_SYM_MSK) | (e << SDEFL_SYM_BITS);
309 A[n] = (A[n] & SDEFL_SYM_MSK) | (e << SDEFL_SYM_BITS);
310 A[e] = (A[e] & SDEFL_SYM_MSK) | freq_shift;
311 } while (sym_cnt - ++e > 1);
312}
313static void
314sdefl_gen_len_cnt(unsigned *A, unsigned root, unsigned *len_cnt,
315 unsigned max_code_len) {
316 int n;
317 unsigned i;
318 for (i = 0; i <= max_code_len; i++)
319 len_cnt[i] = 0;
320 len_cnt[1] = 2;
321
322 A[root] &= SDEFL_SYM_MSK;
323 for (n = (int)root - 1; n >= 0; n--) {
324 unsigned p = A[n] >> SDEFL_SYM_BITS;
325 unsigned pdepth = A[p] >> SDEFL_SYM_BITS;
326 unsigned depth = pdepth + 1;
327 unsigned len = depth;
328
329 A[n] = (A[n] & SDEFL_SYM_MSK) | (depth << SDEFL_SYM_BITS);
330 if (len >= max_code_len) {
331 len = max_code_len;
332 do len--; while (!len_cnt[len]);
333 }
334 len_cnt[len]--;
335 len_cnt[len+1] += 2;
336 }
337}
338static void
339sdefl_gen_codes(unsigned *A, unsigned char *lens, const unsigned *len_cnt,
340 unsigned max_code_word_len, unsigned sym_cnt) {
341 unsigned i, sym, len, nxt[SDEFL_MAX_CODE_LEN + 1];
342 for (i = 0, len = max_code_word_len; len >= 1; len--) {
343 unsigned cnt = len_cnt[len];
344 while (cnt--) lens[A[i++] & SDEFL_SYM_MSK] = (unsigned char)len;
345 }
346 nxt[0] = nxt[1] = 0;
347 for (len = 2; len <= max_code_word_len; len++)
348 nxt[len] = (nxt[len-1] + len_cnt[len-1]) << 1;
349 for (sym = 0; sym < sym_cnt; sym++)
350 A[sym] = nxt[lens[sym]]++;
351}
352static unsigned
353sdefl_rev(unsigned c, unsigned char n) {
354 c = ((c & 0x5555) << 1) | ((c & 0xAAAA) >> 1);
355 c = ((c & 0x3333) << 2) | ((c & 0xCCCC) >> 2);
356 c = ((c & 0x0F0F) << 4) | ((c & 0xF0F0) >> 4);
357 c = ((c & 0x00FF) << 8) | ((c & 0xFF00) >> 8);
358 return c >> (16-n);
359}
360static void
361sdefl_huff(unsigned char *lens, unsigned *codes, unsigned *freqs,
362 unsigned num_syms, unsigned max_code_len) {
363 unsigned c, *A = codes;
364 unsigned len_cnt[SDEFL_MAX_CODE_LEN + 1];
365 unsigned used_syms = sdefl_sort_sym(num_syms, freqs, lens, A);
366 if (!used_syms) return;
367 if (used_syms == 1) {
368 unsigned s = A[0] & SDEFL_SYM_MSK;
369 unsigned i = s ? s : 1;
370 codes[0] = 0, lens[0] = 1;
371 codes[i] = 1, lens[i] = 1;
372 return;
373 }
374 sdefl_build_tree(A, used_syms);
375 sdefl_gen_len_cnt(A, used_syms-2, len_cnt, max_code_len);
376 sdefl_gen_codes(A, lens, len_cnt, max_code_len, num_syms);
377 for (c = 0; c < num_syms; c++) {
378 codes[c] = sdefl_rev(codes[c], lens[c]);
379 }
380}
381struct sdefl_symcnt {
382 int items;
383 int lit;
384 int off;
385};
386static void
387sdefl_precode(struct sdefl_symcnt *cnt, unsigned *freqs, unsigned *items,
388 const unsigned char *litlen, const unsigned char *offlen) {
389 unsigned *at = items;
390 unsigned run_start = 0;
391
392 unsigned total = 0;
393 unsigned char lens[SDEFL_SYM_MAX + SDEFL_OFF_MAX];
394 for (cnt->lit = SDEFL_SYM_MAX; cnt->lit > 257; cnt->lit--)
395 if (litlen[cnt->lit - 1]) break;
396 for (cnt->off = SDEFL_OFF_MAX; cnt->off > 1; cnt->off--)
397 if (offlen[cnt->off - 1]) break;
398
399 total = (unsigned)(cnt->lit + cnt->off);
400 memcpy(lens, litlen, sizeof(unsigned char) * (size_t)cnt->lit);
401 memcpy(lens + cnt->lit, offlen, sizeof(unsigned char) * (size_t)cnt->off);
402 do {
403 unsigned len = lens[run_start];
404 unsigned run_end = run_start;
405 do run_end++; while (run_end != total && len == lens[run_end]);
406 if (!len) {
407 while ((run_end - run_start) >= 11) {
408 unsigned n = (run_end - run_start) - 11;
409 unsigned xbits = n < 0x7f ? n : 0x7f;
410 freqs[18]++;
411 *at++ = 18u | (xbits << 5u);
412 run_start += 11 + xbits;
413 }
414 if ((run_end - run_start) >= 3) {
415 unsigned n = (run_end - run_start) - 3;
416 unsigned xbits = n < 0x7 ? n : 0x7;
417 freqs[17]++;
418 *at++ = 17u | (xbits << 5u);
419 run_start += 3 + xbits;
420 }
421 } else if ((run_end - run_start) >= 4) {
422 freqs[len]++;
423 *at++ = len;
424 run_start++;
425 do {
426 unsigned xbits = (run_end - run_start) - 3;
427 xbits = xbits < 0x03 ? xbits : 0x03;
428 *at++ = 16 | (xbits << 5);
429 run_start += 3 + xbits;
430 freqs[16]++;
431 } while ((run_end - run_start) >= 3);
432 }
433 while (run_start != run_end) {
434 freqs[len]++;
435 *at++ = len;
436 run_start++;
437 }
438 } while (run_start != total);
439 cnt->items = (int)(at - items);
440}
441struct sdefl_match_codes {
442 int ls, lc;
443 int dc, dx;
444};
445static void
446sdefl_match_codes(struct sdefl_match_codes *cod, int dist, int len) {
447 static const short dxmax[] = {0,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576};
448 static const unsigned char lslot[258+1] = {
449 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12,
450 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16,
451 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18,
452 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20,
453 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
454 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
455 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
456 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
457 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25,
458 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
459 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26,
460 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
461 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
462 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
463 27, 27, 28
464 };
465 cod->ls = lslot[len];
466 cod->lc = 257 + cod->ls;
467 cod->dx = sdefl_ilog2(sdefl_npow2(dist) >> 2);
468 cod->dc = cod->dx ? ((cod->dx + 1) << 1) + (dist > dxmax[cod->dx]) : dist-1;
469}
470static void
471sdefl_match(unsigned char **dst, struct sdefl *s, int dist, int len) {
472 static const char lxn[] = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
473 static const short lmin[] = {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,
474 51,59,67,83,99,115,131,163,195,227,258};
475 static const short dmin[] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,
476 385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
477
478 struct sdefl_match_codes cod;
479 sdefl_match_codes(&cod, dist, len);
480 sdefl_put(dst, s, (int)s->cod.word.lit[cod.lc], s->cod.len.lit[cod.lc]);
481 sdefl_put(dst, s, len - lmin[cod.ls], lxn[cod.ls]);
482 sdefl_put(dst, s, (int)s->cod.word.off[cod.dc], s->cod.len.off[cod.dc]);
483 sdefl_put(dst, s, dist - dmin[cod.dc], cod.dx);
484}
485static void
486sdefl_flush(unsigned char **dst, struct sdefl *s, int is_last,
487 const unsigned char *in) {
488 int j, i = 0, item_cnt = 0;
489 struct sdefl_symcnt symcnt = {0};
490 unsigned codes[SDEFL_PRE_MAX];
491 unsigned char lens[SDEFL_PRE_MAX];
492 unsigned freqs[SDEFL_PRE_MAX] = {0};
493 unsigned items[SDEFL_SYM_MAX + SDEFL_OFF_MAX];
494 static const unsigned char perm[SDEFL_PRE_MAX] = {16,17,18,0,8,7,9,6,10,5,11,
495 4,12,3,13,2,14,1,15};
496
497 /* huffman codes */
498 s->freq.lit[SDEFL_EOB]++;
499 sdefl_huff(s->cod.len.lit, s->cod.word.lit, s->freq.lit, SDEFL_SYM_MAX, SDEFL_LIT_LEN_CODES);
500 sdefl_huff(s->cod.len.off, s->cod.word.off, s->freq.off, SDEFL_OFF_MAX, SDEFL_OFF_CODES);
501 sdefl_precode(&symcnt, freqs, items, s->cod.len.lit, s->cod.len.off);
502 sdefl_huff(lens, codes, freqs, SDEFL_PRE_MAX, SDEFL_PRE_CODES);
503 for (item_cnt = SDEFL_PRE_MAX; item_cnt > 4; item_cnt--) {
504 if (lens[perm[item_cnt - 1]]) break;
505 }
506 /* block header */
507 sdefl_put(dst, s, is_last ? 0x01 : 0x00, 1); /* block */
508 sdefl_put(dst, s, 0x02, 2); /* dynamic huffman */
509 sdefl_put(dst, s, symcnt.lit - 257, 5);
510 sdefl_put(dst, s, symcnt.off - 1, 5);
511 sdefl_put(dst, s, item_cnt - 4, 4);
512 for (i = 0; i < item_cnt; ++i)
513 sdefl_put(dst, s, lens[perm[i]], 3);
514 for (i = 0; i < symcnt.items; ++i) {
515 unsigned sym = items[i] & 0x1F;
516 sdefl_put(dst, s, (int)codes[sym], lens[sym]);
517 if (sym < 16) continue;
518 if (sym == 16) sdefl_put(dst, s, items[i] >> 5, 2);
519 else if(sym == 17) sdefl_put(dst, s, items[i] >> 5, 3);
520 else sdefl_put(dst, s, items[i] >> 5, 7);
521 }
522 /* block sequences */
523 for (i = 0; i < s->seq_cnt; ++i) {
524 if (s->seq[i].off >= 0)
525 for (j = 0; j < s->seq[i].len; ++j) {
526 int c = in[s->seq[i].off + j];
527 sdefl_put(dst, s, (int)s->cod.word.lit[c], s->cod.len.lit[c]);
528 }
529 else sdefl_match(dst, s, -s->seq[i].off, s->seq[i].len);
530 }
531 sdefl_put(dst, s, (int)(s)->cod.word.lit[SDEFL_EOB], (s)->cod.len.lit[SDEFL_EOB]);
532 memset(&s->freq, 0, sizeof(s->freq));
533 s->seq_cnt = 0;
534}
535static void
536sdefl_seq(struct sdefl *s, int off, int len) {
537 assert(s->seq_cnt + 2 < SDEFL_SEQ_SIZ);
538 s->seq[s->seq_cnt].off = off;
539 s->seq[s->seq_cnt].len = len;
540 s->seq_cnt++;
541}
542static void
543sdefl_reg_match(struct sdefl *s, int off, int len) {
544 struct sdefl_match_codes cod;
545 sdefl_match_codes(&cod, off, len);
546 s->freq.lit[cod.lc]++;
547 s->freq.off[cod.dc]++;
548}
549struct sdefl_match {
550 int off;
551 int len;
552};
553static void
554sdefl_fnd(struct sdefl_match *m, const struct sdefl *s,
555 int chain_len, int max_match, const unsigned char *in, int p) {
556 int i = s->tbl[sdefl_hash32(&in[p])];
557 int limit = ((p-SDEFL_WIN_SIZ)<SDEFL_NIL)?SDEFL_NIL:(p-SDEFL_WIN_SIZ);
558 while (i > limit) {
559 if (in[i+m->len] == in[p+m->len] &&
560 (sdefl_uload32(&in[i]) == sdefl_uload32(&in[p]))){
561 int n = SDEFL_MIN_MATCH;
562 while (n < max_match && in[i+n] == in[p+n]) n++;
563 if (n > m->len) {
564 m->len = n, m->off = p - i;
565 if (n == max_match) break;
566 }
567 }
568 if (!(--chain_len)) break;
569 i = s->prv[i&SDEFL_WIN_MSK];
570 }
571}
572static int
573sdefl_compr(struct sdefl *s, unsigned char *out, const unsigned char *in,
574 int in_len, int lvl) {
575 unsigned char *q = out;
576 static const unsigned char pref[] = {8,10,14,24,30,48,65,96,130};
577 int max_chain = (lvl < 8) ? (1 << (lvl + 1)): (1 << 13);
578 int n, i = 0, litlen = 0;
579 for (n = 0; n < SDEFL_HASH_SIZ; ++n) {
580 s->tbl[n] = SDEFL_NIL;
581 }
582 do {int blk_end = i + SDEFL_BLK_MAX < in_len ? i + SDEFL_BLK_MAX : in_len;
583 while (i < blk_end) {
584 struct sdefl_match m = {0};
585 int max_match = ((in_len-i)>SDEFL_MAX_MATCH) ? SDEFL_MAX_MATCH:(in_len-i);
586 int nice_match = pref[lvl] < max_match ? pref[lvl] : max_match;
587 int run = 1, inc = 1, run_inc;
588 if (max_match > SDEFL_MIN_MATCH) {
589 sdefl_fnd(&m, s, max_chain, max_match, in, i);
590 }
591 if (lvl >= 5 && m.len >= SDEFL_MIN_MATCH && m.len < nice_match){
592 struct sdefl_match m2 = {0};
593 sdefl_fnd(&m2, s, max_chain, m.len+1, in, i+1);
594 m.len = (m2.len > m.len) ? 0 : m.len;
595 }
596 if (m.len >= SDEFL_MIN_MATCH) {
597 if (litlen) {
598 sdefl_seq(s, i - litlen, litlen);
599 litlen = 0;
600 }
601 sdefl_seq(s, -m.off, m.len);
602 sdefl_reg_match(s, m.off, m.len);
603 if (lvl < 2 && m.len >= nice_match) {
604 inc = m.len;
605 } else {
606 run = m.len;
607 }
608 } else {
609 s->freq.lit[in[i]]++;
610 litlen++;
611 }
612 run_inc = run * inc;
613 if (in_len - (i + run_inc) > SDEFL_MIN_MATCH) {
614 while (run-- > 0) {
615 unsigned h = sdefl_hash32(&in[i]);
616 s->prv[i&SDEFL_WIN_MSK] = s->tbl[h];
617 s->tbl[h] = i, i += inc;
618 }
619 } else {
620 i += run_inc;
621 }
622 }
623 if (litlen) {
624 sdefl_seq(s, i - litlen, litlen);
625 litlen = 0;
626 }
627 sdefl_flush(&q, s, blk_end == in_len, in);
628 } while (i < in_len);
629
630 if (s->bitcnt)
631 sdefl_put(&q, s, 0x00, 8 - s->bitcnt);
632 return (int)(q - out);
633}
634extern int
635sdeflate(struct sdefl *s, void *out, const void *in, int n, int lvl) {
636 s->bits = s->bitcnt = 0;
637 return sdefl_compr(s, (unsigned char*)out, (const unsigned char*)in, n, lvl);
638}
639static unsigned
640sdefl_adler32(unsigned adler32, const unsigned char *in, int in_len) {
641 #define SDEFL_ADLER_INIT (1)
642 const unsigned ADLER_MOD = 65521;
643 unsigned s1 = adler32 & 0xffff;
644 unsigned s2 = adler32 >> 16;
645 unsigned blk_len, i;
646
647 blk_len = in_len % 5552;
648 while (in_len) {
649 for (i = 0; i + 7 < blk_len; i += 8) {
650 s1 += in[0]; s2 += s1;
651 s1 += in[1]; s2 += s1;
652 s1 += in[2]; s2 += s1;
653 s1 += in[3]; s2 += s1;
654 s1 += in[4]; s2 += s1;
655 s1 += in[5]; s2 += s1;
656 s1 += in[6]; s2 += s1;
657 s1 += in[7]; s2 += s1;
658 in += 8;
659 }
660 for (; i < blk_len; ++i) {
661 s1 += *in++, s2 += s1;
662 }
663 s1 %= ADLER_MOD;
664 s2 %= ADLER_MOD;
665 in_len -= blk_len;
666 blk_len = 5552;
667 }
668 return (unsigned)(s2 << 16) + (unsigned)s1;
669}
670extern int
671zsdeflate(struct sdefl *s, void *out, const void *in, int n, int lvl) {
672 int p = 0;
673 unsigned a = 0;
674 unsigned char *q = (unsigned char*)out;
675
676 s->bits = s->bitcnt = 0;
677 sdefl_put(&q, s, 0x78, 8); /* deflate, 32k window */
678 sdefl_put(&q, s, 0x01, 8); /* fast compression */
679 q += sdefl_compr(s, q, (const unsigned char*)in, n, lvl);
680
681 /* append adler checksum */
682 a = sdefl_adler32(SDEFL_ADLER_INIT, (const unsigned char*)in, n);
683 for (p = 0; p < 4; ++p) {
684 sdefl_put(&q, s, (a >> 24) & 0xFF, 8);
685 a <<= 8;
686 }
687 return (int)(q - (unsigned char*)out);
688}
689extern int
690sdefl_bound(int len) {
691 int a = 128 + (len * 110) / 100;
692 int b = 128 + len + ((len / (31 * 1024)) + 1) * 5;
693 return (a > b) ? a : b;
694}
695#endif /* SDEFL_IMPLEMENTATION */
696
#define SDEFL_WIN_MSK
Definition: sdefl.h:120
#define SDEFL_SYM_MAX
Definition: sdefl.h:130
int sdefl_bound(int in_len)
int sdeflate(struct sdefl *s, void *o, const void *i, int n, int lvl)
#define SDEFL_HASH_BITS
Definition: sdefl.h:122
#define SDEFL_MIN_MATCH
Definition: sdefl.h:126
#define SDEFL_HASH_SIZ
Definition: sdefl.h:123
#define SDEFL_OFF_MAX
Definition: sdefl.h:131
#define SDEFL_PRE_MAX
Definition: sdefl.h:132
#define SDEFL_WIN_SIZ
Definition: sdefl.h:119
#define SDEFL_SEQ_SIZ
Definition: sdefl.h:128
#define SDEFL_BLK_MAX
Definition: sdefl.h:127
int zsdeflate(struct sdefl *s, void *o, const void *i, int n, int lvl)
unsigned off[SDEFL_OFF_MAX]
Definition: sdefl.h:144
unsigned lit[SDEFL_SYM_MAX]
Definition: sdefl.h:143
struct sdefl_lens len
Definition: sdefl.h:152
struct sdefl_code_words word
Definition: sdefl.h:151
unsigned lit[SDEFL_SYM_MAX]
Definition: sdefl.h:139
unsigned off[SDEFL_OFF_MAX]
Definition: sdefl.h:140
unsigned char lit[SDEFL_SYM_MAX]
Definition: sdefl.h:147
unsigned char off[SDEFL_OFF_MAX]
Definition: sdefl.h:148
int off
Definition: sdefl.h:155
int len
Definition: sdefl.h:155
Definition: sdefl.h:157
struct sdefl_codes cod
Definition: sdefl.h:165
struct sdefl_freq freq
Definition: sdefl.h:164
int tbl[SDEFL_HASH_SIZ]
Definition: sdefl.h:159
int seq_cnt
Definition: sdefl.h:162
int bitcnt
Definition: sdefl.h:158
int prv[SDEFL_WIN_SIZ]
Definition: sdefl.h:160
int bits
Definition: sdefl.h:158
struct sdefl_seqt seq[SDEFL_SEQ_SIZ]
Definition: sdefl.h:163