2 LZ4 - Fast LZ compression algorithm
3 Copyright (C) 2011-2013, Yann Collet.
4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 You can contact the author at :
30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31 - LZ4 source repository : http://code.google.com/p/lz4/
37 Note : this source file requires "lz4_encoder.h"
40 //**************************************
42 //**************************************
44 // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
45 // Increasing memory usage improves compression ratio
46 // Reduced memory usage can improve speed, due to cache effect
47 // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
48 #define MEMORY_USAGE 14
51 // Select how default compression function will allocate memory for its hash table,
52 // in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)).
53 // Default allocation strategy is to use stack (HEAPMODE 0)
54 // Note : explicit functions *_stack* and *_heap* are unaffected by this setting
57 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
58 // This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
59 // You can set this option to 1 in situations where data will remain within closed environment
60 // This option is useless on Little_Endian CPU (such as x86)
61 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
63 #ifdef HAVE_BIG_ENDIAN
64 #define LZ4_BIG_ENDIAN 1
66 #if defined(__sparc) || defined(__sparc__) \
67 || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
68 || defined(__hpux) || defined(__hppa) \
69 || defined(_MIPSEB) || defined(__s390__)
70 #error "BIG Endian detected but not set"
74 //**************************************
75 // CPU Feature Detection
76 //**************************************
78 #if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
79 || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
80 || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
81 || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) // Detects 64 bits mode
88 // Unaligned memory access is automatically enabled for "common" CPU, such as x86.
89 // For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
90 // If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
91 #if defined(__ARM_FEATURE_UNALIGNED)
92 # define LZ4_FORCE_UNALIGNED_ACCESS 1
95 // Define this parameter if your target system or compiler does not support hardware bit count
96 #if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count
97 # define LZ4_FORCE_SW_BITCOUNT
101 //**************************************
103 //**************************************
104 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
105 /* "restrict" is a known keyword */
106 #elif !defined restrict
107 # define restrict // Disable restrict
111 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
113 #ifdef _MSC_VER // Visual Studio
114 # include <intrin.h> // For Visual 2005
115 # if LZ4_ARCH64 // 64-bit
116 # pragma intrinsic(_BitScanForward64) // For Visual 2005
117 # pragma intrinsic(_BitScanReverse64) // For Visual 2005
119 # pragma intrinsic(_BitScanForward) // For Visual 2005
120 # pragma intrinsic(_BitScanReverse) // For Visual 2005
122 # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
126 # define lz4_bswap16(x) _byteswap_ushort(x)
128 # define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
131 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
132 # define expect(expr,value) (__builtin_expect ((expr),(value)) )
134 # define expect(expr,value) (expr)
137 #define likely(expr) expect((expr) != 0, 1)
138 #define unlikely(expr) expect((expr) != 0, 0)
141 //**************************************
143 //**************************************
144 #include <stdlib.h> // for malloc
145 #include <string.h> // for memset
149 //**************************************
151 //**************************************
152 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
154 typedef uint8_t BYTE;
155 typedef uint16_t U16;
156 typedef uint32_t U32;
158 typedef uint64_t U64;
160 typedef unsigned char BYTE;
161 typedef unsigned short U16;
162 typedef unsigned int U32;
163 typedef signed int S32;
164 typedef unsigned long long U64;
167 #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
168 # define _PACKED __attribute__ ((packed))
173 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
174 # pragma pack(push, 1)
177 typedef struct _U16_S { U16 v; } _PACKED U16_S;
178 typedef struct _U32_S { U32 v; } _PACKED U32_S;
179 typedef struct _U64_S { U64 v; } _PACKED U64_S;
181 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
185 #define A64(x) (((U64_S *)(x))->v)
186 #define A32(x) (((U32_S *)(x))->v)
187 #define A16(x) (((U16_S *)(x))->v)
190 //**************************************
192 //**************************************
193 #define HASHTABLESIZE (1 << MEMORY_USAGE)
198 #define LASTLITERALS 5
199 #define MFLIMIT (COPYLENGTH+MINMATCH)
200 #define MINLENGTH (MFLIMIT+1)
202 #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
203 #define SKIPSTRENGTH 6 // Increasing this value will make the compression run slower on incompressible data
206 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
209 #define ML_MASK ((1U<<ML_BITS)-1)
210 #define RUN_BITS (8-ML_BITS)
211 #define RUN_MASK ((1U<<RUN_BITS)-1)
214 //**************************************
215 // Architecture-specific macros
216 //**************************************
217 #if LZ4_ARCH64 // 64-bit
221 # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8;
222 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d)
223 # define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e)
225 # define INITBASE(base) const BYTE* const base = ip
230 # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4;
231 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
232 # define LZ4_SECURECOPY LZ4_WILDCOPY
233 # define HTYPE const BYTE*
234 # define INITBASE(base) const int base = 0
237 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
238 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
239 # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
240 #else // Little Endian
241 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
242 # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
246 //**************************************
248 //**************************************
249 #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e);
250 #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; }
253 //****************************
255 //****************************
258 static inline int LZ4_NbCommonBytes (register U64 val)
260 #if defined(LZ4_BIG_ENDIAN)
261 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
263 _BitScanReverse64( &r, val );
265 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
266 return (__builtin_clzll(val) >> 3);
269 if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
270 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
275 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
277 _BitScanForward64( &r, val );
279 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
280 return (__builtin_ctzll(val) >> 3);
282 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
283 return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
290 static inline int LZ4_NbCommonBytes (register U32 val)
292 #if defined(LZ4_BIG_ENDIAN)
293 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
295 _BitScanReverse( &r, val );
297 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
298 return (__builtin_clz(val) >> 3);
301 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
306 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
308 _BitScanForward( &r, val );
310 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
311 return (__builtin_ctz(val) >> 3);
313 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
314 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
323 //******************************
324 // Compression functions
325 //******************************
328 int LZ4_compress_stack(
333 Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
334 Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
335 return : the number of bytes written in buffer 'dest'
337 #define FUNCTION_NAME LZ4_compress_stack
338 #include "lz4_encoder.h"
342 int LZ4_compress_stack_limitedOutput(
348 Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
349 If it cannot achieve it, compression will stop, and result of the function will be zero.
350 return : the number of bytes written in buffer 'dest', or 0 if the compression fails
352 #define FUNCTION_NAME LZ4_compress_stack_limitedOutput
353 #define LIMITED_OUTPUT
354 #include "lz4_encoder.h"
358 int LZ4_compress64k_stack(
363 Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
364 This function compresses better than LZ4_compress_stack(), on the condition that
365 'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
366 Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
367 return : the number of bytes written in buffer 'dest', or 0 if compression fails
369 #define FUNCTION_NAME LZ4_compress64k_stack
371 #include "lz4_encoder.h"
375 int LZ4_compress64k_stack_limitedOutput(
381 Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
382 This function compresses better than LZ4_compress_stack_limitedOutput(), on the condition that
383 'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
384 If it cannot achieve it, compression will stop, and result of the function will be zero.
385 return : the number of bytes written in buffer 'dest', or 0 if the compression fails
387 #define FUNCTION_NAME LZ4_compress64k_stack_limitedOutput
389 #define LIMITED_OUTPUT
390 #include "lz4_encoder.h"
394 void* LZ4_createHeapMemory();
395 int LZ4_freeHeapMemory(void* ctx);
397 Used to allocate and free hashTable memory
398 to be used by the LZ4_compress_heap* family of functions.
399 LZ4_createHeapMemory() returns NULL is memory allocation fails.
401 void* LZ4_create() { return malloc(HASHTABLESIZE); }
402 int LZ4_free(void* ctx) { free(ctx); return 0; }
406 int LZ4_compress_heap(
412 Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
413 The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
414 Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
415 return : the number of bytes written in buffer 'dest'
417 #define FUNCTION_NAME LZ4_compress_heap
418 #define USE_HEAPMEMORY
419 #include "lz4_encoder.h"
423 int LZ4_compress_heap_limitedOutput(
430 Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
431 If it cannot achieve it, compression will stop, and result of the function will be zero.
432 The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
433 return : the number of bytes written in buffer 'dest', or 0 if the compression fails
435 #define FUNCTION_NAME LZ4_compress_heap_limitedOutput
436 #define LIMITED_OUTPUT
437 #define USE_HEAPMEMORY
438 #include "lz4_encoder.h"
442 int LZ4_compress64k_heap(
448 Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
449 The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
450 'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
451 Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
452 return : the number of bytes written in buffer 'dest'
454 #define FUNCTION_NAME LZ4_compress64k_heap
456 #define USE_HEAPMEMORY
457 #include "lz4_encoder.h"
461 int LZ4_compress64k_heap_limitedOutput(
468 Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
469 If it cannot achieve it, compression will stop, and result of the function will be zero.
470 The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
471 'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
472 return : the number of bytes written in buffer 'dest', or 0 if the compression fails
474 #define FUNCTION_NAME LZ4_compress64k_heap_limitedOutput
476 #define LIMITED_OUTPUT
477 #define USE_HEAPMEMORY
478 #include "lz4_encoder.h"
481 int LZ4_compress(const char* source, char* dest, int inputSize)
484 void* ctx = LZ4_create();
486 if (ctx == NULL) return 0; // Failed allocation => compression not done
487 if (inputSize < LZ4_64KLIMIT)
488 result = LZ4_compress64k_heap(ctx, source, dest, inputSize);
489 else result = LZ4_compress_heap(ctx, source, dest, inputSize);
493 if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress64k_stack(source, dest, inputSize);
494 return LZ4_compress_stack(source, dest, inputSize);
499 int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
502 void* ctx = LZ4_create();
504 if (ctx == NULL) return 0; // Failed allocation => compression not done
505 if (inputSize < LZ4_64KLIMIT)
506 result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest, inputSize, maxOutputSize);
507 else result = LZ4_compress_heap_limitedOutput(ctx, source, dest, inputSize, maxOutputSize);
511 if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress64k_stack_limitedOutput(source, dest, inputSize, maxOutputSize);
512 return LZ4_compress_stack_limitedOutput(source, dest, inputSize, maxOutputSize);
517 //****************************
518 // Decompression functions
519 //****************************
521 typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
522 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive;
523 typedef enum { full = 0, partial = 1 } exit_directive;
526 // This generic decompression function cover all use cases.
527 // It shall be instanciated several times, using different sets of directives
528 // Note that it is essential this generic function is really inlined,
529 // in order to remove useless branches during compilation optimisation.
530 static inline int LZ4_decompress_generic(
534 int outputSize, // OutputSize must be != 0; if endOnInput==endOnInputSize, this value is the max size of Output Buffer.
536 int endOnInput, // endOnOutputSize, endOnInputSize
537 int prefix64k, // noPrefix, withPrefix
538 int partialDecoding, // full, partial
539 int targetOutputSize // only used if partialDecoding==partial
543 const BYTE* restrict ip = (const BYTE*) source;
545 const BYTE* const iend = ip + inputSize;
547 BYTE* op = (BYTE*) dest;
548 BYTE* const oend = op + outputSize;
550 BYTE* oexit = op + targetOutputSize;
552 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
554 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
559 if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; // targetOutputSize too large, better decode everything
560 if unlikely(outputSize==0) goto _output_error; // Empty output buffer
571 if ((length=(token>>ML_BITS)) == RUN_MASK)
574 while (((endOnInput)?ip<iend:1) && (s==255))
583 if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
584 || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
588 if (cpy > oend) goto _output_error; // Error : write attempt beyond end of output buffer
589 if ((endOnInput) && (ip+length > iend)) goto _output_error; // Error : read attempt beyond end of input buffer
593 if ((!endOnInput) && (cpy != oend)) goto _output_error; // Error : block decoding must stop exactly there, due to parsing restrictions
594 if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; // Error : not enough place for another match (min 4) + 5 literals
596 memcpy(op, ip, length);
599 break; // Necessarily EOF, due to parsing restrictions
601 LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
604 LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
605 if ((prefix64k==noPrefix) && unlikely(ref < (BYTE* const)dest)) goto _output_error; // Error : offset outside destination buffer
608 if ((length=(token&ML_MASK)) == ML_MASK)
610 while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1) // A minimum nb of input bytes must remain for LASTLITERALS + token
614 if (s==255) continue;
619 // copy repeated sequence
620 if unlikely((op-ref)<STEPSIZE)
623 size_t dec64 = dec64table[op-ref];
625 const size_t dec64 = 0;
631 op += 4, ref += 4; ref -= dec32table[op-ref];
633 op += STEPSIZE-4; ref -= dec64;
634 } else { LZ4_COPYSTEP(ref,op); }
635 cpy = op + length - (STEPSIZE-4);
637 if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
639 if (cpy > oend-LASTLITERALS) goto _output_error; // Error : last 5 bytes must be literals
640 LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
641 while(op<cpy) *op++=*ref++;
645 LZ4_WILDCOPY(ref, op, cpy);
646 op=cpy; // correction
651 return (int) (((char*)op)-dest); // Nb of output bytes decoded
653 return (int) (((char*)ip)-source); // Nb of input bytes read
655 // Overflow error detected
657 return (int) (-(((char*)ip)-source))-1;
661 int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize)
663 return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, full, 0);
666 int LZ4_decompress_fast(const char* source, char* dest, int outputSize)
668 return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, noPrefix, full, 0);
671 int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int inputSize, int maxOutputSize)
673 return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, withPrefix, full, 0);
676 int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int outputSize)
678 return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
681 int LZ4_decompress_safe_partial(const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize)
683 return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, partial, targetOutputSize);