112#ifndef SINFL_H_INCLUDED
113#define SINFL_H_INCLUDED
119#define SINFL_PRE_TBL_SIZE 128
120#define SINFL_LIT_TBL_SIZE 1334
121#define SINFL_OFF_TBL_SIZE 402
131extern int sinflate(
void *out,
int cap,
const void *in,
int size);
132extern int zsinflate(
void *out,
int cap,
const void *in,
int size);
140#ifdef SINFL_IMPLEMENTATION
145#if defined(__GNUC__) || defined(__clang__)
146#define sinfl_likely(x) __builtin_expect((x),1)
147#define sinfl_unlikely(x) __builtin_expect((x),0)
149#define sinfl_likely(x) (x)
150#define sinfl_unlikely(x) (x)
154#if __x86_64__ || defined(_WIN32) || defined(_WIN64)
155 #include <emmintrin.h>
156 #define sinfl_char16 __m128i
157 #define sinfl_char16_ld(p) _mm_loadu_si128((const __m128i *)(void*)(p))
158 #define sinfl_char16_str(d,v) _mm_storeu_si128((__m128i*)(void*)(d), v)
159 #define sinfl_char16_char(c) _mm_set1_epi8(c)
160#elif defined(__arm__) || defined(__aarch64__)
161 #include <arm_neon.h>
162 #define sinfl_char16 uint8x16_t
163 #define sinfl_char16_ld(p) vld1q_u8((const unsigned char*)(p))
164 #define sinfl_char16_str(d,v) vst1q_u8((unsigned char*)(d), v)
165 #define sinfl_char16_char(c) vdupq_n_u8(c)
167 #define SINFL_NO_SIMD
172sinfl_bsr(
unsigned n) {
174 _BitScanReverse(&n, n);
176#elif defined(__GNUC__) || defined(__clang__)
177 return 31 - __builtin_clz(n);
180static unsigned long long
181sinfl_read64(
const void *p) {
182 unsigned long long n;
188sinfl_write128(
unsigned char *dst, sinfl_char16 w) {
189 sinfl_char16_str(dst, w);
193sinfl_copy128(
unsigned char **dst,
unsigned char **src) {
194 sinfl_char16 n = sinfl_char16_ld(*src);
195 sinfl_char16_str(*dst, n);
196 *dst += 16, *src += 16;
200sinfl_write64(
unsigned char *dst,
unsigned long long w) {
205sinfl_copy64(
unsigned char **dst,
unsigned char **src) {
206 unsigned long long n;
209 *dst += 8, *src += 8;
213sinfl_refill(
struct sinfl *s) {
219sinfl_peek(
struct sinfl *s,
int cnt) {
220 assert(cnt >= 0 && cnt <= 56);
221 assert(cnt <= s->bitcnt);
222 return s->
bitbuf & ((1ull << cnt) - 1);
225sinfl_consume(
struct sinfl *s,
int cnt) {
226 assert(cnt <= s->bitcnt);
231sinfl__get(
struct sinfl *s,
int cnt) {
232 int res = sinfl_peek(s, cnt);
233 sinfl_consume(s, cnt);
237sinfl_get(
struct sinfl *s,
int cnt) {
239 return sinfl__get(s, cnt);
248sinfl_build_tbl(
struct sinfl_gen *gen,
unsigned *tbl,
int tbl_bits,
251 while (!(gen->cnt = cnt[gen->len])) {
254 tbl_end = 1 << gen->len;
255 while (gen->len <= tbl_bits) {
256 do {
unsigned bit = 0;
257 tbl[gen->word] = (*gen->sorted++ << 16) | gen->len;
258 if (gen->word == tbl_end - 1) {
259 for (; gen->len < tbl_bits; gen->len++) {
260 memcpy(&tbl[tbl_end], tbl, (
size_t)tbl_end *
sizeof(tbl[0]));
265 bit = 1 << sinfl_bsr((
unsigned)(gen->word ^ (tbl_end - 1)));
266 gen->word &= bit - 1;
268 }
while (--gen->cnt);
270 if (++gen->len <= tbl_bits) {
271 memcpy(&tbl[tbl_end], tbl, (
size_t)tbl_end *
sizeof(tbl[0]));
274 }
while (!(gen->cnt = cnt[gen->len]));
279sinfl_build_subtbl(
struct sinfl_gen *gen,
unsigned *tbl,
int tbl_bits,
284 int tbl_end = 1 << tbl_bits;
289 if ((gen->word & ((1 << tbl_bits)-1)) != sub_prefix) {
291 sub_prefix = gen->word & ((1 << tbl_bits)-1);
293 sub_bits = gen->len - tbl_bits;
295 while (used < (1 << sub_bits)) {
297 used = (used << 1) + cnt[tbl_bits + sub_bits];
299 tbl_end = sub_start + (1 << sub_bits);
300 tbl[sub_prefix] = (sub_start << 16) | 0x10 | (sub_bits & 0xf);
303 entry = (*gen->sorted << 16) | ((gen->len - tbl_bits) & 0xf);
305 i = sub_start + (gen->word >> tbl_bits);
306 stride = 1 << (gen->len - tbl_bits);
310 }
while (i < tbl_end);
311 if (gen->word == (1 << gen->len)-1) {
314 bit = 1 << sinfl_bsr(gen->word ^ ((1 << gen->len) - 1));
315 gen->word &= bit - 1;
319 gen->cnt = cnt[++gen->len];
324sinfl_build(
unsigned *tbl,
unsigned char *lens,
int tbl_bits,
int maxlen,
328 int cnt[16] = {0}, off[16]= {0};
329 struct sinfl_gen gen = {0};
333 for (i = 0; i < symcnt; ++i)
336 for (i = 1; i < maxlen; ++i) {
337 off[i + 1] = off[i] + cnt[i];
338 used = (used << 1) + cnt[i];
340 used = (used << 1) + cnt[i];
341 for (i = 0; i < symcnt; ++i)
342 gen.sorted[off[lens[i]]++] = (
short)i;
343 gen.sorted += off[0];
345 if (used < (1 << maxlen)){
346 for (i = 0; i < 1 << tbl_bits; ++i)
347 tbl[i] = (0 << 16u) | 1;
350 if (!sinfl_build_tbl(&gen, tbl, tbl_bits, cnt)){
351 sinfl_build_subtbl(&gen, tbl, tbl_bits, cnt);
355sinfl_decode(
struct sinfl *s,
const unsigned *tbl,
int bit_len) {
357 {
int idx = sinfl_peek(s, bit_len);
358 unsigned key = tbl[idx];
361 int len = key & 0x0f;
362 sinfl_consume(s, bit_len);
363 idx = sinfl_peek(s, len);
364 key = tbl[((key >> 16) & 0xffff) + (unsigned)idx];
366 sinfl_consume(s, key & 0x0f);
367 return (key >> 16) & 0x0fff;}
370sinfl_decompress(
unsigned char *out,
int cap,
const unsigned char *in,
int size) {
371 static const unsigned char order[] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
372 static const short dbase[30+2] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
373 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
374 static const unsigned char dbits[30+2] = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,
375 10,10,11,11,12,12,13,13,0,0};
376 static const short lbase[29+2] = {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,
377 43,51,59,67,83,99,115,131,163,195,227,258,0,0};
378 static const unsigned char lbits[29+2] = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,
379 4,4,4,5,5,5,5,0,0,0};
381 const unsigned char *oe = out + cap;
382 const unsigned char *e = in + size, *o = out;
383 enum sinfl_states {hdr,stored,fixed,dyn,blk};
384 enum sinfl_states state = hdr;
385 struct sinfl s = {0};
395 last = sinfl__get(&s,1);
396 type = sinfl__get(&s,2);
398 switch (type) {
default:
return (
int)(out-o);
399 case 0x00: state = stored;
break;
400 case 0x01: state = fixed;
break;
401 case 0x02: state = dyn;
break;}
407 sinfl__get(&s,s.
bitcnt & 7);
408 len = sinfl__get(&s,16);
412 if (len > (e-in) || !len)
414 memcpy(out, in, (
size_t)len);
415 in += len, out += len;
420 int n;
unsigned char lens[288+32];
421 for (n = 0; n <= 143; n++) lens[n] = 8;
422 for (n = 144; n <= 255; n++) lens[n] = 9;
423 for (n = 256; n <= 279; n++) lens[n] = 7;
424 for (n = 280; n <= 287; n++) lens[n] = 8;
425 for (n = 0; n < 32; n++) lens[288+n] = 5;
428 sinfl_build(s.
lits, lens, 10, 15, 288);
429 sinfl_build(s.
dsts, lens + 288, 8, 15, 32);
436 unsigned char nlens[19] = {0}, lens[288+32];
439 {
int nlit = 257 + sinfl__get(&s,5);
440 int ndist = 1 + sinfl__get(&s,5);
441 int nlen = 4 + sinfl__get(&s,4);
442 for (n = 0; n < nlen; n++)
443 nlens[order[n]] = (
unsigned char)sinfl_get(&s,3);
444 sinfl_build(hlens, nlens, 7, 7, 19);
447 for (n = 0; n < nlit + ndist;) {
448 int sym = sinfl_decode(&s, hlens, 7);
449 switch (sym) {
default: lens[n++] = (
unsigned char)sym;
break;
450 case 16:
for (i=3+sinfl_get(&s,2);i;i--,n++) lens[n]=lens[n-1];
break;
451 case 17:
for (i=3+sinfl_get(&s,3);i;i--,n++) lens[n]=0;
break;
452 case 18:
for (i=11+sinfl_get(&s,7);i;i--,n++) lens[n]=0;
break;}
455 sinfl_build(s.
lits, lens, 10, 15, nlit);
456 sinfl_build(s.
dsts, lens + nlit, 8, 15, ndist);
461 int sym = sinfl_decode(&s, s.
lits, 10);
464 *out++ = (
unsigned char)sym;
465 }
else if (sym > 256) {sym -= 257;
467 {
int len = sinfl__get(&s, lbits[sym]) + lbase[sym];
468 int dsym = sinfl_decode(&s, s.
dsts, 8);
469 int offs = sinfl__get(&s, dbits[dsym]) + dbase[dsym];
470 unsigned char *dst = out, *src = out - offs;
471 if (sinfl_unlikely(offs > (
int)(out-o))) {
477 if (sinfl_likely(oe - out >= 16 * 3)) {
480 sinfl_copy128(&dst, &src);
481 sinfl_copy128(&dst, &src);
482 do sinfl_copy128(&dst, &src);
484 }
else if (offs == 1) {
486 sinfl_char16 w = sinfl_char16_char(src[0]);
487 dst = sinfl_write128(dst, w);
488 dst = sinfl_write128(dst, w);
489 do dst = sinfl_write128(dst, w);
499 if (sinfl_likely(oe - out >= 3 * 8 - 3)) {
502 sinfl_copy64(&dst, &src);
503 sinfl_copy64(&dst, &src);
504 do sinfl_copy64(&dst, &src);
506 }
else if (offs == 1) {
508 unsigned int c = src[0];
509 unsigned int hw = (c << 24u) | (c << 16u) | (c << 8u) | (
unsigned)c;
510 unsigned long long w = (
unsigned long long)hw << 32llu | hw;
511 dst = sinfl_write64(dst, w);
512 dst = sinfl_write64(dst, w);
513 do dst = sinfl_write64(dst, w);
531 if (last)
return (
int)(out-o);
540sinflate(
void *out,
int cap,
const void *in,
int size) {
541 return sinfl_decompress((
unsigned char*)out, cap, (
const unsigned char*)in, size);
544sinfl_adler32(
unsigned adler32,
const unsigned char *in,
int in_len) {
545 const unsigned ADLER_MOD = 65521;
546 unsigned s1 = adler32 & 0xffff;
547 unsigned s2 = adler32 >> 16;
550 blk_len = in_len % 5552;
552 for (i=0; i + 7 < blk_len; i += 8) {
553 s1 += in[0]; s2 += s1;
554 s1 += in[1]; s2 += s1;
555 s1 += in[2]; s2 += s1;
556 s1 += in[3]; s2 += s1;
557 s1 += in[4]; s2 += s1;
558 s1 += in[5]; s2 += s1;
559 s1 += in[6]; s2 += s1;
560 s1 += in[7]; s2 += s1;
563 for (; i < blk_len; ++i)
564 s1 += *in++, s2 += s1;
565 s1 %= ADLER_MOD; s2 %= ADLER_MOD;
568 }
return (
unsigned)(s2 << 16) + (
unsigned)s1;
571zsinflate(
void *out,
int cap,
const void *mem,
int size) {
572 const unsigned char *in = (
const unsigned char*)mem;
574 const unsigned char *eob = in + size - 4;
575 int n = sinfl_decompress((
unsigned char*)out, cap, in + 2u, size);
576 unsigned a = sinfl_adler32(1u, (
unsigned char*)out, n);
577 unsigned h = eob[0] << 24 | eob[1] << 16 | eob[2] << 8 | eob[3] << 0;
578 return a == h ? n : -1;
int zsinflate(void *out, int cap, const void *in, int size)
int sinflate(void *out, int cap, const void *in, int size)
#define SINFL_OFF_TBL_SIZE
#define SINFL_LIT_TBL_SIZE
#define SINFL_PRE_TBL_SIZE
unsigned dsts[SINFL_OFF_TBL_SIZE]
unsigned long long bitbuf
const unsigned char * bitptr
unsigned lits[SINFL_LIT_TBL_SIZE]