]> git.sur5r.net Git - cc65/blobdiff - libsrc/zlib/inflatemem.s
don't use constructor to setup runtime stack
[cc65] / libsrc / zlib / inflatemem.s
index de444ef7ddb5a19c01a25d5138699b404cd0ecad..18bf80cd1e3028007f74549fd60f589e9dca3280 100644 (file)
-;\r
-; Piotr Fusik, 18.11.2001\r
-;\r
-; unsigned __fastcall__ inflatemem (char* dest, const char* source);\r
-;\r
-\r
-       .export         _inflatemem\r
-\r
-       .import         incsp2\r
-       .importzp       sp, sreg\r
-       .importzp       ptr1, ptr2, ptr3, ptr4\r
-       .importzp       tmp1, tmp2, tmp3, tmp4\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Constants\r
-;\r
-\r
-; Maximum length of a Huffman code\r
-MAX_BITS      =        15\r
-; Index in bitsCount, bitsPointer_l and bitsPointer_h for literal tree\r
-LITERAL_TREE  =        0\r
-; Index in bitsCount, bitsPointer_l and bitsPointer_h for distance tree\r
-DISTANCE_TREE =        MAX_BITS\r
-; Size of each of bitsCount, bitsPointer_l and bitsPointer_h\r
-TREES_SIZE    =        2*MAX_BITS+1\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Page zero\r
-;\r
-\r
-; Pointer to compressed data\r
-inputPointer  =        ptr1    ; 2 bytes\r
-; Pointer to uncompressed data\r
-outputPointer =        ptr2    ; 2 bytes\r
-; Buffer for getBit\r
-getBitHold    =        tmp1    ; 1 byte\r
-; Local variables. Variables from different routines use same memory.\r
-cnt           =        tmp2    ; 1 byte\r
-tmp           =        sreg    ; 1 byte\r
-ptr           =        sreg    ; 2 bytes\r
-len           =        ptr3    ; 2 bytes\r
-nl            =        tmp3    ; 1 byte\r
-nd            =        tmp4    ; 1 byte\r
-src           =        ptr4    ; 2 bytes\r
-dest          =        ptr4    ; 2 bytes\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Code\r
-;\r
-\r
-_inflatemem:\r
-\r
-; inputPointer = source\r
-       sta     inputPointer\r
-       stx     inputPointer+1\r
-; outputPointer = dest\r
-       ldy     #0\r
-       lda     (sp),y\r
-       sta     outputPointer\r
-       iny\r
-       lda     (sp),y\r
-       sta     outputPointer+1\r
-\r
-;      ldy     #1\r
-       sty     getBitHold\r
-inflatemem_1:\r
-; Get a bit of EOF and two bits of block type\r
-       ldx     #3\r
-       lda     #0\r
-       jsr     getBits\r
-       lsr     a\r
-       tax\r
-; X contains block type, C contains EOF flag\r
-; Save EOF flag\r
-       php\r
-; Go to the routine decompressing this block\r
-       jsr     callExtr\r
-       plp\r
-       bcc     inflatemem_1\r
-; C flag is set!\r
-\r
-; return outputPointer - dest;\r
-       lda     outputPointer\r
-       ldy     #0\r
-       sbc     (sp),y  ; C flag is set\r
-       pha\r
-       iny\r
-       lda     outputPointer+1\r
-       sbc     (sp),y\r
-       tax\r
-       pla\r
-; pop dest\r
-       jmp     incsp2\r
-\r
-; --------------------------------------------------------------------------\r
-; Go to the routine decompressing block type X\r
-\r
-callExtr:\r
-       lda     extr_h,x\r
-       pha\r
-       lda     extr_l,x\r
-       pha\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress 'stored' data block\r
-\r
-inflateCopyBlock:\r
-; Ignore bits until byte boundary\r
-       ldy     #1\r
-       sty     getBitHold\r
-; Get 16-bit length\r
-       ldx     #inputPointer\r
-       lda     (0,x)\r
-       sta     len\r
-       lda     (inputPointer),y\r
-       sta     len+1\r
-; Skip length and one's compliment length\r
-       lda     #4\r
-       clc\r
-       adc     inputPointer\r
-       sta     inputPointer\r
-       bcc     moveBlock\r
-       inc     inputPointer+1\r
-; fall into moveBlock\r
-\r
-; --------------------------------------------------------------------------\r
-; Copy block of length len from (0,x) to output\r
-\r
-moveBlock:\r
-       ldy     len\r
-       beq     moveBlock_1\r
-       ldy     #0\r
-       inc     len+1\r
-moveBlock_1:\r
-       lda     (0,x)\r
-       sta     (outputPointer),y\r
-       inc     0,x\r
-       bne     moveBlock_2\r
-       inc     1,x\r
-moveBlock_2:\r
-       inc     outputPointer\r
-       bne     moveBlock_3\r
-       inc     outputPointer+1\r
-moveBlock_3:\r
-       dec     len\r
-       bne     moveBlock_1\r
-       dec     len+1\r
-       bne     moveBlock_1\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress Huffman-coded data block with default Huffman trees:\r
-; literalCodeLength:  144 times 8, 112 times 9\r
-; endCodeLength:      24 times 7, 6 times 8\r
-; distanceCodeLength: 30 times 5, 2 times 8\r
-;                     (2 last codes from literal tree are not used)\r
-\r
-inflateFixedBlock:\r
-       ldx     #159\r
-       stx     distanceCodeLength+32\r
-       lda     #8\r
-inflateFixedBlock_1:\r
-       sta     literalCodeLength-1,x\r
-       sta     literalCodeLength+159-1,x\r
-       dex\r
-       bne     inflateFixedBlock_1\r
-       ldx     #112\r
-       lda     #9\r
-inflateFixedBlock_2:\r
-       sta     literalCodeLength+144-1,x\r
-       dex\r
-       bne     inflateFixedBlock_2\r
-       ldx     #24\r
-       lda     #7\r
-inflateFixedBlock_3:\r
-       sta     endCodeLength-1,x\r
-       dex\r
-       bne     inflateFixedBlock_3\r
-       ldx     #30\r
-       lda     #5+DISTANCE_TREE\r
-inflateFixedBlock_4:\r
-       sta     distanceCodeLength-1,x\r
-       dex\r
-       bne     inflateFixedBlock_4\r
-       jmp     inflateCodes\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress Huffman-coded data block, reading Huffman trees first\r
-\r
-inflateDynamicBlock:\r
-; numberOfLiteralCodes = getBits(5) + 257;\r
-       ldx     #5\r
-       lda     #<(lengthCodeLength-1)\r
-       jsr     getBits\r
-       sta     nl\r
-; numberOfDistanceCodes = getBits(5) + 1;\r
-       ldx     #5\r
-       lda     #1\r
-       jsr     getBits\r
-       sta     nd\r
-       clc\r
-       adc     nl\r
-       sta     nl\r
-; numberOfTemporaryCodes = getBits(4) + 4;\r
-       lda     #4\r
-       tax\r
-       jsr     getBits\r
-       sta     cnt\r
-; Clear lengths of temporary codes (there're 19 temp codes max),\r
-; clear literalCodeLength-1 (it may be used by temporary code 16)\r
-; and leave #0 in Y\r
-       ldy     #20\r
-       lda     #0\r
-inflateDynamicBlock_1:\r
-       sta     literalCodeLength-2,y\r
-       dey\r
-       bne     inflateDynamicBlock_1\r
-; Get lengths of temporary codes in order stored in bll\r
-inflateDynamicBlock_2:\r
-       ldx     #3\r
-       lda     #0\r
-       jsr     getBits ; does not change Y\r
-       ldx     bll,y\r
-       sta     literalCodeLength,x\r
-       iny\r
-       cpy     cnt\r
-       bcc     inflateDynamicBlock_2\r
-       ror     literalCodeLength+19    ; C flag is set, so it will set b7\r
-; Build tree for temporary codes\r
-       jsr     buildHuffmanTree\r
-\r
-; Use temporary codes to get lengths for literal and distance codes\r
-; dest is target-1, so we can access last written byte by (dest,0)\r
-       lda     #<(literalCodeLength-1)\r
-       sta     dest\r
-       lda     #>(literalCodeLength-1)\r
-       sta     dest+1\r
-inflateDynamicBlock_3:\r
-       jsr     fetchLiteralCode\r
-; Temporary code 0..15: put this length\r
-       ldy     #1\r
-       cmp     #16\r
-       bcc     inflateDynamicBlock_6\r
-       bne     inflateDynamicBlock_4\r
-; Temporary code 16: repeat last length 3..6 times\r
-       ldx     #2\r
-       lda     #3\r
-       jsr     getBits\r
-       tay\r
-       lda     (dest,x)        ; X == 0\r
-       bpl     inflateDynamicBlock_6   ; branch always\r
-inflateDynamicBlock_4:\r
-       lsr     a\r
-; Temporary code 17: put zero length 3..10 times\r
-       lda     #3\r
-       tax\r
-       bcs     inflateDynamicBlock_5\r
-; Temporary code 18: put zero length 11..138 times\r
-       ldx     #7\r
-       lda     #11\r
-inflateDynamicBlock_5:\r
-       jsr     getBits\r
-       tay\r
-       lda     #0\r
-; Write A length Y times\r
-inflateDynamicBlock_6:\r
-       sty     cnt\r
-inflateDynamicBlock_7:\r
-       sta     (dest),y\r
-       dey\r
-       bne     inflateDynamicBlock_7\r
-       lda     cnt\r
-       clc\r
-       adc     dest\r
-       sta     dest\r
-       bcc     inflateDynamicBlock_8\r
-       inc     dest+1\r
-inflateDynamicBlock_8:\r
-       cmp     nl\r
-       bne     inflateDynamicBlock_3\r
-       ldy     dest+1\r
-       sbc     #<endCodeLength ; C flag is set\r
-       bcs     inflateDynamicBlock_11\r
-       dey\r
-inflateDynamicBlock_11:\r
-       cpy     #>endCodeLength\r
-       bcc     inflateDynamicBlock_3\r
-; Mark end of distance lengths\r
-       ldx     nd\r
-       tay\r
-       ror     distanceCodeLength,x    ; C flag is set, so it will set b7\r
-; Move distance lengths to distanceCodeLength table\r
-inflateDynamicBlock_9:\r
-       dex\r
-       lda     endCodeLength,y\r
-; Mark existing codes (of non-zero length) as distance tree codes\r
-       beq     inflateDynamicBlock_10\r
-       pha\r
-       lda     #0\r
-       sta     endCodeLength,y\r
-       pla\r
-       clc\r
-       adc     #DISTANCE_TREE\r
-       sta     distanceCodeLength,x\r
-inflateDynamicBlock_10:\r
-       dey\r
-       txa\r
-       bne     inflateDynamicBlock_9\r
-; fall into inflateCodes\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress data block basing on given Huffman trees\r
-\r
-inflateCodes:\r
-       jsr     buildHuffmanTree\r
-inflateCodes_1:\r
-       jsr     fetchLiteralCode\r
-       bcc     inflateCodes_2\r
-; Literal code\r
-       ldy     #0\r
-       sta     (outputPointer),y\r
-       inc     outputPointer\r
-       bne     inflateCodes_1\r
-       inc     outputPointer+1\r
-       bcs     inflateCodes_1  ; branch always\r
-; End of block\r
-inflateCodes_ret:\r
-       rts\r
-inflateCodes_2:\r
-       beq     inflateCodes_ret\r
-; Repeat block\r
-       jsr     getValue\r
-       sta     len\r
-       tya\r
-       jsr     getBits\r
-       sta     len+1\r
-       ldx     #DISTANCE_TREE\r
-       jsr     fetchCode\r
-       jsr     getValue\r
-       sta     src\r
-       tya\r
-       jsr     getBits\r
-       sta     src+1\r
-       lda     outputPointer\r
-       sec\r
-       sbc     src\r
-       sta     src\r
-       lda     outputPointer+1\r
-       sbc     src+1\r
-       sta     src+1\r
-       ldx     #src\r
-       jsr     moveBlock\r
-       beq     inflateCodes_1  ; branch always\r
-\r
-; --------------------------------------------------------------------------\r
-; Build Huffman trees basing on code lengths.\r
-; Lengths (in bits) are stored in literalCodeLength.\r
-; A byte with highest bit set marks end of length table.\r
-\r
-buildHuffmanTree:\r
-       lda     #<literalCodeLength\r
-       sta     ptr\r
-       sta     src\r
-       lda     #>literalCodeLength\r
-       sta     ptr+1\r
-       sta     src+1\r
-; Clear counts\r
-       ldy     #TREES_SIZE\r
-       lda     #0\r
-buildHuffmanTree_1:\r
-       sta     bitsCount-1,y\r
-       dey\r
-       bne     buildHuffmanTree_1\r
-       beq     buildHuffmanTree_3      ; branch always\r
-; Count number of codes of each length\r
-buildHuffmanTree_2:\r
-       tax\r
-       inc     bitsCount,x\r
-       iny\r
-       bne     buildHuffmanTree_3\r
-       inc     ptr+1\r
-buildHuffmanTree_3:\r
-       lda     (ptr),y\r
-       bpl     buildHuffmanTree_2\r
-; Calculate pointer for each length\r
-       ldx     #0\r
-       stx     bitsCount\r
-       lda     #<sortedCodes\r
-       ldy     #>sortedCodes\r
-buildHuffmanTree_4:\r
-       sta     bitsPointer_l,x\r
-       tya\r
-       sta     bitsPointer_h,x\r
-       lda     bitsCount,x\r
-       asl     a\r
-       bcc     buildHuffmanTree_5\r
-       iny\r
-       clc\r
-buildHuffmanTree_5:\r
-       adc     bitsPointer_l,x\r
-       bcc     buildHuffmanTree_6\r
-       iny\r
-buildHuffmanTree_6:\r
-       inx\r
-       cpx     #TREES_SIZE\r
-       bcc     buildHuffmanTree_4\r
-.ifpc02\r
-       ldy     #1\r
-.else\r
-       ldy     #0\r
-.endif\r
-       bcs     buildHuffmanTree_9      ; branch always\r
-; Put codes into their place in sorted table\r
-buildHuffmanTree_7:\r
-       beq     buildHuffmanTree_8\r
-       tax\r
-       lda     bitsPointer_l,x\r
-       sta     ptr\r
-       clc\r
-       adc     #2\r
-       sta     bitsPointer_l,x\r
-       lda     bitsPointer_h,x\r
-       sta     ptr+1\r
-       adc     #0\r
-       sta     bitsPointer_h,x\r
-       lda     src\r
-       sbc     #<(endCodeLength-1)     ; C flag is zero\r
-.ifpc02\r
-.else\r
-       iny                             ; ldy #1\r
-.endif\r
-       sta     (ptr),y\r
-       lda     src+1\r
-       sbc     #>(endCodeLength-1)\r
-.ifpc02\r
-       sta     (ptr)\r
-.else\r
-       dey                             ; ldy #0\r
-       sta     (ptr),y\r
-.endif\r
-buildHuffmanTree_8:\r
-       inc     src\r
-       bne     buildHuffmanTree_9\r
-       inc     src+1\r
-buildHuffmanTree_9:\r
-.ifpc02\r
-       lda     (src)\r
-.else\r
-       lda     (src),y\r
-.endif\r
-       bpl     buildHuffmanTree_7\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Read code basing on literal tree\r
-\r
-fetchLiteralCode:\r
-       ldx     #LITERAL_TREE\r
-; fall into fetchCode\r
-\r
-; --------------------------------------------------------------------------\r
-; Read code from input stream basing on tree given in X.\r
-; Return code in A, C is set if non-literal code.\r
-\r
-fetchCode:\r
-       lda     #0\r
-fetchCode_1:\r
-       jsr     getBit\r
-       rol     a\r
-       cmp     bitsCount+1,x\r
-       bcc     fetchCode_2\r
-       sbc     bitsCount+1,x\r
-       inx\r
-       bcs     fetchCode_1     ; branch always\r
-fetchCode_2:\r
-       ldy     bitsPointer_l,x\r
-       sty     ptr\r
-       ldy     bitsPointer_h,x\r
-       asl     a\r
-       bcc     fetchCode_3\r
-       iny\r
-fetchCode_3:\r
-       sty     ptr+1\r
-       tay\r
-       lda     (ptr),y\r
-       asl     a\r
-       iny\r
-       lda     (ptr),y\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Read low byte of value (length or distance), basing on code A\r
-\r
-getValue:\r
-       tay\r
-       ldx     lengthExtraBits-1,y\r
-       lda     lengthCode_l-1,y\r
-       pha\r
-       lda     lengthCode_h-1,y\r
-       tay\r
-       pla\r
-; fall into getBits\r
-\r
-; --------------------------------------------------------------------------\r
-; Read X-bit number from input stream and adds it to A.\r
-; In case of carry, Y is incremented.\r
-; If X > 8, only 8 bits are read.\r
-; On return X holds number of unread bits: X = (X > 8 ? X - 8 : 0);\r
-\r
-getBits:\r
-       cpx     #0\r
-       beq     getBits_ret\r
-       pha\r
-       lda     #1\r
-       sta     tmp\r
-       pla\r
-getBits_1:\r
-       jsr     getBit\r
-       bcc     getBits_2\r
-       clc\r
-       adc     tmp\r
-       bcc     getBits_2\r
-       iny\r
-getBits_2:\r
-       dex\r
-       beq     getBits_ret\r
-       asl     tmp\r
-       bcc     getBits_1\r
-getBits_ret:\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Read single bit from input stream, return it in C flag\r
-\r
-getBit:\r
-       lsr     getBitHold\r
-       bne     getBit_ret\r
-       pha\r
-.ifpc02\r
-       lda     (inputPointer)\r
-.else\r
-       tya\r
-       pha\r
-       ldy     #0\r
-       lda     (inputPointer),y\r
-.endif\r
-       inc     inputPointer\r
-       bne     getBit_1\r
-       inc     inputPointer+1\r
-getBit_1:\r
-       ror     a       ; C flag set\r
-       sta     getBitHold\r
-.ifpc02\r
-.else\r
-       pla\r
-       tay\r
-.endif\r
-       pla\r
-getBit_ret:\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Constant data\r
-;\r
-\r
-       .rodata\r
-; --------------------------------------------------------------------------\r
-; Addresses of functions extracting different blocks\r
-extr_l:\r
-       .byte   <(inflateCopyBlock-1)\r
-       .byte   <(inflateFixedBlock-1)\r
-       .byte   <(inflateDynamicBlock-1)\r
-extr_h:\r
-       .byte   >(inflateCopyBlock-1)\r
-       .byte   >(inflateFixedBlock-1)\r
-       .byte   >(inflateDynamicBlock-1)\r
-\r
-; --------------------------------------------------------------------------\r
-; Order, in which lengths of temporary codes are stored\r
-bll:\r
-       .byte   16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15\r
-\r
-; --------------------------------------------------------------------------\r
-; Tables for length and distance codes\r
-; Value is Code + getBits(ExtraBits)\r
-\r
-; Base values\r
-lengthCode_l:\r
-       .byte   <3,<4,<5,<6,<7,<8,<9,<10\r
-       .byte   <11,<13,<15,<17,<19,<23,<27,<31\r
-       .byte   <35,<43,<51,<59,<67,<83,<99,<115\r
-       .byte   <131,<163,<195,<227,<258\r
-distanceCode_l:\r
-       .byte   <1,<2,<3,<4,<5,<7,<9,<13\r
-       .byte   <17,<25,<33,<49,<65,<97,<129,<193\r
-       .byte   <257,<385,<513,<769,<1025,<1537,<2049,<3073\r
-       .byte   <4097,<6145,<8193,<12289,<16385,<24577\r
-lengthCode_h:\r
-       .byte   >3,>4,>5,>6,>7,>8,>9,>10\r
-       .byte   >11,>13,>15,>17,>19,>23,>27,>31\r
-       .byte   >35,>43,>51,>59,>67,>83,>99,>115\r
-       .byte   >131,>163,>195,>227,>258\r
-distanceCode_h:\r
-       .byte   >1,>2,>3,>4,>5,>7,>9,>13\r
-       .byte   >17,>25,>33,>49,>65,>97,>129,>193\r
-       .byte   >257,>385,>513,>769,>1025,>1537,>2049,>3073\r
-       .byte   >4097,>6145,>8193,>12289,>16385,>24577\r
-\r
-; Number of extra bits to read\r
-lengthExtraBits:\r
-       .byte   0,0,0,0,0,0,0,0\r
-       .byte   1,1,1,1,2,2,2,2\r
-       .byte   3,3,3,3,4,4,4,4\r
-       .byte   5,5,5,5,0\r
-distanceExtraBits:\r
-       .byte   0,0,0,0,1,1,2,2\r
-       .byte   3,3,4,4,5,5,6,6\r
-       .byte   7,7,8,8,9,9,10,10\r
-       .byte   11,11,12,12,13,13\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Uninitialised data\r
-;\r
-\r
-       .bss\r
-\r
-; --------------------------------------------------------------------------\r
-; Data for building literal tree\r
-\r
-       .res    1\r
-; Length of literal codes\r
-literalCodeLength:\r
-       .res    256\r
-; Length of 'end' code\r
-endCodeLength:\r
-       .res    1\r
-; Length of 'length' codes\r
-lengthCodeLength:\r
-       .res    29\r
-\r
-; --------------------------------------------------------------------------\r
-; Data for building distance tree\r
-\r
-distanceCodeLength:\r
-       .res    30\r
-; For two unused codes in fixed trees and an end flag\r
-       .res    3\r
-\r
-; --------------------------------------------------------------------------\r
-; Huffman tree structure\r
-\r
-; Number of codes of each length\r
-bitsCount:\r
-       .res    TREES_SIZE\r
-\r
-; Pointer to sorted codes of each length\r
-bitsPointer_l:\r
-       .res    TREES_SIZE\r
-bitsPointer_h:\r
-       .res    TREES_SIZE\r
-\r
-; Sorted codes\r
-sortedCodes:\r
-       .res    2*(256+1+29+30+2)\r
-\r
-\r
-\r
+;
+; Piotr Fusik, 21.09.2003
+;
+; unsigned __fastcall__ inflatemem (char* dest, const char* source);
+;
+
+       .export         _inflatemem
+
+       .import         incsp2
+       .importzp       sp, sreg, ptr1, ptr2, ptr3, ptr4, tmp1
+
+; --------------------------------------------------------------------------
+;
+; Constants
+;
+
+; Maximum length of a Huffman code.
+MAX_BITS      =        15
+
+; All Huffman trees are stored in the bitsCount, bitsPointer_l
+; and bitsPointer_h arrays.  There may be two trees: the literal/length tree
+; and the distance tree, or just one - the temporary tree.
+
+; Index in the mentioned arrays for the beginning of the literal/length tree
+; or the temporary tree.
+PRIMARY_TREE  =        0
+
+; Index in the mentioned arrays for the beginning of the distance tree.
+DISTANCE_TREE =        MAX_BITS
+
+; Size of each array.
+TREES_SIZE    =        2*MAX_BITS
+
+
+; --------------------------------------------------------------------------
+;
+; Page zero
+;
+
+; Pointer to the compressed data.
+inputPointer            =      ptr1    ; 2 bytes
+
+; Pointer to the uncompressed data.
+outputPointer           =      ptr2    ; 2 bytes
+
+; Local variables.
+; As far as there is no conflict, same memory locations are used
+; for different variables.
+
+inflateDynamicBlock_cnt =      ptr3    ; 1 byte
+inflateCodes_src        =      ptr3    ; 2 bytes
+buildHuffmanTree_src    =      ptr3    ; 2 bytes
+getNextLength_last      =      ptr3    ; 1 byte
+getNextLength_index     =      ptr3+1  ; 1 byte
+
+buildHuffmanTree_ptr    =      ptr4    ; 2 bytes
+fetchCode_ptr           =      ptr4    ; 2 bytes
+getBits_tmp             =      ptr4    ; 1 byte
+
+moveBlock_len           =      sreg    ; 2 bytes
+inflateDynamicBlock_np  =      sreg    ; 1 byte
+inflateDynamicBlock_nd  =      sreg+1  ; 1 byte
+
+getBit_hold             =      tmp1    ; 1 byte
+
+
+; --------------------------------------------------------------------------
+;
+; Code
+;
+
+_inflatemem:
+
+; inputPointer = source
+       sta     inputPointer
+       stx     inputPointer+1
+; outputPointer = dest
+.ifpc02
+       lda     (sp)
+       ldy     #1
+.else
+       ldy     #0
+       lda     (sp),y
+       iny
+.endif
+       sta     outputPointer
+       lda     (sp),y
+       sta     outputPointer+1
+
+;      ldy     #1
+       sty     getBit_hold
+inflatemem_1:
+; Get a bit of EOF and two bits of block type
+       ldx     #3
+       lda     #0
+       jsr     getBits
+       lsr     a
+; A and Z contain block type, C contains EOF flag
+; Save EOF flag
+       php
+; Go to the routine decompressing this block
+       jsr     callExtr
+       plp
+       bcc     inflatemem_1
+; C flag is set!
+
+; return outputPointer - dest;
+       lda     outputPointer
+.ifpc02
+       sbc     (sp)            ; C flag is set
+       ldy     #1
+.else
+       ldy     #0
+       sbc     (sp),y          ; C flag is set
+       iny
+.endif
+       pha
+       lda     outputPointer+1
+       sbc     (sp),y
+       tax
+       pla
+; pop dest
+       jmp     incsp2
+
+; --------------------------------------------------------------------------
+; Go to proper block decoding routine.
+
+callExtr:
+       bne     inflateCompressedBlock
+
+; --------------------------------------------------------------------------
+; Decompress a 'stored' data block.
+
+inflateCopyBlock:
+; Ignore bits until byte boundary
+       ldy     #1
+       sty     getBit_hold
+; Get 16-bit length
+       ldx     #inputPointer
+       lda     (0,x)
+       sta     moveBlock_len
+       lda     (inputPointer),y
+       sta     moveBlock_len+1
+; Skip the length and one's complement of it
+       lda     #4
+       clc
+       adc     inputPointer
+       sta     inputPointer
+       bcc     moveBlock
+       inc     inputPointer+1
+;      jmp     moveBlock
+
+; --------------------------------------------------------------------------
+; Copy block of length moveBlock_len from (0,x) to the output.
+
+moveBlock:
+       ldy     moveBlock_len
+       beq     moveBlock_1
+.ifpc02
+.else
+       ldy     #0
+.endif
+       inc     moveBlock_len+1
+moveBlock_1:
+       lda     (0,x)
+.ifpc02
+       sta     (outputPointer)
+.else
+       sta     (outputPointer),y
+.endif
+       inc     0,x
+       bne     moveBlock_2
+       inc     1,x
+moveBlock_2:
+       inc     outputPointer
+       bne     moveBlock_3
+       inc     outputPointer+1
+moveBlock_3:
+.ifpc02
+       dey
+.else
+       dec     moveBlock_len
+.endif
+       bne     moveBlock_1
+       dec     moveBlock_len+1
+       bne     moveBlock_1
+       rts
+
+; --------------------------------------------------------------------------
+; Decompress a Huffman-coded data block
+; (A = 1: fixed, A = 2: dynamic).
+
+inflateCompressedBlock:
+       lsr     a
+       bne     inflateDynamicBlock
+; Note: inflateDynamicBlock may assume that A = 1
+
+; --------------------------------------------------------------------------
+; Decompress a Huffman-coded data block with default Huffman trees
+; (defined by the DEFLATE format):
+; literalCodeLength:  144 times 8, 112 times 9
+; endCodeLength:      7
+; lengthCodeLength:   23 times 7, 6 times 8
+; distanceCodeLength: 30 times 5+DISTANCE_TREE, 2 times 8
+;                     (two 8-bit codes from the primary tree are not used).
+
+inflateFixedBlock:
+       ldx     #159
+       stx     distanceCodeLength+32
+       lda     #8
+inflateFixedBlock_1:
+       sta     literalCodeLength-1,x
+       sta     literalCodeLength+159-1,x
+       dex
+       bne     inflateFixedBlock_1
+       ldx     #112
+;      lda     #9
+inflateFixedBlock_2:
+       inc     literalCodeLength+144-1,x       ; sta
+       dex
+       bne     inflateFixedBlock_2
+       ldx     #24
+;      lda     #7
+inflateFixedBlock_3:
+       dec     endCodeLength-1,x       ; sta
+       dex
+       bne     inflateFixedBlock_3
+       ldx     #30
+       lda     #5+DISTANCE_TREE
+inflateFixedBlock_4:
+       sta     distanceCodeLength-1,x
+       dex
+       bne     inflateFixedBlock_4
+       beq     inflateCodes            ; branch always
+
+; --------------------------------------------------------------------------
+; Decompress a Huffman-coded data block, reading Huffman trees first.
+
+inflateDynamicBlock:
+; numberOfPrimaryCodes = 257 + getBits(5)
+       ldx     #5
+;      lda     #1
+       jsr     getBits
+       sta     inflateDynamicBlock_np
+; numberOfDistanceCodes = 1 + getBits(5)
+       ldx     #5
+       lda     #1+29+1
+       jsr     getBits
+       sta     inflateDynamicBlock_nd
+; numberOfTemporaryCodes = 4 + getBits(4)
+       lda     #4
+       tax
+       jsr     getBits
+       sta     inflateDynamicBlock_cnt
+; Get lengths of temporary codes in the order stored in tempCodeLengthOrder
+       txa                     ; lda #0
+       tay
+inflateDynamicBlock_1:
+       ldx     #3              ; A = 0
+       jsr     getBits         ; does not change Y
+inflateDynamicBlock_2:
+       ldx     tempCodeLengthOrder,y
+       sta     literalCodeLength,x
+       lda     #0
+       iny
+       cpy     inflateDynamicBlock_cnt
+       bcc     inflateDynamicBlock_1
+       cpy     #19
+       bcc     inflateDynamicBlock_2
+       ror     literalCodeLength+19    ; C flag is set, so this will set b7
+; Build the tree for temporary codes
+       jsr     buildHuffmanTree
+
+; Use temporary codes to get lengths of literal/length and distance codes
+       ldx     #0
+       ldy     #1
+       stx     getNextLength_last
+inflateDynamicBlock_3:
+       jsr     getNextLength
+       sta     literalCodeLength,x
+       inx
+       bne     inflateDynamicBlock_3
+inflateDynamicBlock_4:
+       jsr     getNextLength
+inflateDynamicBlock_5:
+       sta     endCodeLength,x
+       inx
+       cpx     inflateDynamicBlock_np
+       bcc     inflateDynamicBlock_4
+       lda     #0
+       cpx     #1+29
+       bcc     inflateDynamicBlock_5
+inflateDynamicBlock_6:
+       jsr     getNextLength
+       cmp     #0
+       beq     inflateDynamicBlock_7
+       adc     #DISTANCE_TREE-1        ; C flag is set
+inflateDynamicBlock_7:
+       sta     endCodeLength,x
+       inx
+       cpx     inflateDynamicBlock_nd
+       bcc     inflateDynamicBlock_6
+       ror     endCodeLength,x         ; C flag is set, so this will set b7
+;      jmp     inflateCodes
+
+; --------------------------------------------------------------------------
+; Decompress a data block basing on given Huffman trees.
+
+inflateCodes:
+       jsr     buildHuffmanTree
+inflateCodes_1:
+       jsr     fetchPrimaryCode
+       bcs     inflateCodes_2
+; Literal code
+.ifpc02
+       sta     (outputPointer)
+.else
+       ldy     #0
+       sta     (outputPointer),y
+.endif
+       inc     outputPointer
+       bne     inflateCodes_1
+       inc     outputPointer+1
+       bcc     inflateCodes_1  ; branch always
+; End of block
+inflateCodes_ret:
+       rts
+inflateCodes_2:
+       beq     inflateCodes_ret
+; Restore a block from the look-behind buffer
+       jsr     getValue
+       sta     moveBlock_len
+       tya
+       jsr     getBits
+       sta     moveBlock_len+1
+       ldx     #DISTANCE_TREE
+       jsr     fetchCode
+       jsr     getValue
+       sec
+       eor     #$ff
+       adc     outputPointer
+       sta     inflateCodes_src
+       php
+       tya
+       jsr     getBits
+       plp
+       eor     #$ff
+       adc     outputPointer+1
+       sta     inflateCodes_src+1
+       ldx     #inflateCodes_src
+       jsr     moveBlock
+       beq     inflateCodes_1  ; branch always
+
+; --------------------------------------------------------------------------
+; Build Huffman trees basing on code lengths (in bits).
+; stored in the *CodeLength arrays.
+; A byte with its highest bit set marks the end.
+
+buildHuffmanTree:
+       lda     #<literalCodeLength
+       sta     buildHuffmanTree_src
+       lda     #>literalCodeLength
+       sta     buildHuffmanTree_src+1
+; Clear bitsCount and bitsPointer_l
+       ldy     #2*TREES_SIZE+1
+       lda     #0
+buildHuffmanTree_1:
+       sta     bitsCount-1,y
+       dey
+       bne     buildHuffmanTree_1
+       beq     buildHuffmanTree_3      ; branch always
+; Count number of codes of each length
+buildHuffmanTree_2:
+       tax
+       inc     bitsPointer_l,x
+       iny
+       bne     buildHuffmanTree_3
+       inc     buildHuffmanTree_src+1
+buildHuffmanTree_3:
+       lda     (buildHuffmanTree_src),y
+       bpl     buildHuffmanTree_2
+; Calculate a pointer for each length
+       ldx     #0
+       lda     #<sortedCodes
+       ldy     #>sortedCodes
+       clc
+buildHuffmanTree_4:
+       sta     bitsPointer_l,x
+       tya
+       sta     bitsPointer_h,x
+       lda     bitsPointer_l+1,x
+       adc     bitsPointer_l,x         ; C flag is zero
+       bcc     buildHuffmanTree_5
+       iny
+buildHuffmanTree_5:
+       inx
+       cpx     #TREES_SIZE
+       bcc     buildHuffmanTree_4
+       lda     #>literalCodeLength
+       sta     buildHuffmanTree_src+1
+       ldy     #0
+       bcs     buildHuffmanTree_9      ; branch always
+; Put codes into their place in sorted table
+buildHuffmanTree_6:
+       beq     buildHuffmanTree_7
+       tax
+       lda     bitsPointer_l-1,x
+       sta     buildHuffmanTree_ptr
+       lda     bitsPointer_h-1,x
+       sta     buildHuffmanTree_ptr+1
+       tya
+       ldy     bitsCount-1,x
+       inc     bitsCount-1,x
+       sta     (buildHuffmanTree_ptr),y
+       tay
+buildHuffmanTree_7:
+       iny
+       bne     buildHuffmanTree_9
+       inc     buildHuffmanTree_src+1
+       ldx     #MAX_BITS-1
+buildHuffmanTree_8:
+       lda     bitsCount,x
+       sta     literalCount,x
+       dex
+       bpl     buildHuffmanTree_8
+buildHuffmanTree_9:
+       lda     (buildHuffmanTree_src),y
+       bpl     buildHuffmanTree_6
+       rts
+
+; --------------------------------------------------------------------------
+; Decode next code length using temporary codes.
+
+getNextLength:
+       stx     getNextLength_index
+       dey
+       bne     getNextLength_1
+; Fetch a temporary code
+       jsr     fetchPrimaryCode
+; Temporary code 0..15: put this length
+       ldy     #1
+       cmp     #16
+       bcc     getNextLength_2
+; Temporary code 16: repeat last length 3 + getBits(2) times
+; Temporary code 17: put zero length 3 + getBits(3) times
+; Temporary code 18: put zero length 11 + getBits(7) times
+       tay
+       ldx     tempExtraBits-16,y
+       lda     tempBaseValue-16,y
+       jsr     getBits
+       cpy     #17
+       tay
+       txa                     ; lda #0
+       bcs     getNextLength_2
+getNextLength_1:
+       lda     getNextLength_last
+getNextLength_2:
+       sta     getNextLength_last
+       ldx     getNextLength_index
+       rts
+
+; --------------------------------------------------------------------------
+; Read a code basing on the primary tree.
+
+fetchPrimaryCode:
+       ldx     #PRIMARY_TREE
+;      jmp     fetchCode
+
+; --------------------------------------------------------------------------
+; Read a code from input basing on the tree specified in X.
+; Return low byte of this code in A.
+; For the literal/length tree, the C flag is set if the code is non-literal.
+
+fetchCode:
+       lda     #0
+fetchCode_1:
+       jsr     getBit
+       rol     a
+       inx
+       sec
+       sbc     bitsCount-1,x
+       bcs     fetchCode_1
+       adc     bitsCount-1,x   ; C flag is zero
+       cmp     literalCount-1,x
+       sta     fetchCode_ptr
+       ldy     bitsPointer_l-1,x
+       lda     bitsPointer_h-1,x
+       sta     fetchCode_ptr+1
+       lda     (fetchCode_ptr),y
+       rts
+
+; --------------------------------------------------------------------------
+; Decode low byte of a value (length or distance), basing on the code in A.
+; The result is the base value for this code plus some bits read from input.
+
+getValue:
+       tay
+       ldx     lengthExtraBits-1,y
+       lda     lengthBaseValue_l-1,y
+       pha
+       lda     lengthBaseValue_h-1,y
+       tay
+       pla
+;      jmp     getBits
+
+; --------------------------------------------------------------------------
+; Read X-bit number from the input and add it to A.
+; Increment Y if overflow.
+; If X > 8, read only 8 bits.
+; On return X holds number of unread bits: X = (X > 8 ? X - 8 : 0);
+
+getBits:
+       cpx     #0
+       beq     getBits_ret
+.ifpc02
+       stz     getBits_tmp
+       dec     getBits_tmp
+.else
+       pha
+       lda     #$ff
+       sta     getBits_tmp
+       pla
+.endif
+getBits_1:
+       jsr     getBit
+       bcc     getBits_2
+       sbc     getBits_tmp     ; C flag is set
+       bcc     getBits_2
+       iny
+getBits_2:
+       dex
+       beq     getBits_ret
+       asl     getBits_tmp
+       bmi     getBits_1
+getBits_ret:
+       rts
+
+; --------------------------------------------------------------------------
+; Read a single bit from input, return it in the C flag.
+
+getBit:
+       lsr     getBit_hold
+       bne     getBit_ret
+       pha
+.ifpc02
+       lda     (inputPointer)
+.else
+       sty     getBit_hold
+       ldy     #0
+       lda     (inputPointer),y
+       ldy     getBit_hold
+.endif
+       inc     inputPointer
+       bne     getBit_1
+       inc     inputPointer+1
+getBit_1:
+       ror     a       ; C flag is set
+       sta     getBit_hold
+       pla
+getBit_ret:
+       rts
+
+
+; --------------------------------------------------------------------------
+;
+; Constant data
+;
+
+       .rodata
+; --------------------------------------------------------------------------
+; Arrays for the temporary codes.
+
+; Order, in which lengths of the temporary codes are stored.
+tempCodeLengthOrder:
+       .byte   16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
+
+; Base values.
+tempBaseValue:
+       .byte   3,3,11
+
+; Number of extra bits to read.
+tempExtraBits:
+       .byte   2,3,7
+
+; --------------------------------------------------------------------------
+; Arrays for the length and distance codes.
+
+; Base values.
+lengthBaseValue_l:
+       .byte   <3,<4,<5,<6,<7,<8,<9,<10
+       .byte   <11,<13,<15,<17,<19,<23,<27,<31
+       .byte   <35,<43,<51,<59,<67,<83,<99,<115
+       .byte   <131,<163,<195,<227,<258
+distanceBaseValue_l:
+       .byte   <1,<2,<3,<4,<5,<7,<9,<13
+       .byte   <17,<25,<33,<49,<65,<97,<129,<193
+       .byte   <257,<385,<513,<769,<1025,<1537,<2049,<3073
+       .byte   <4097,<6145,<8193,<12289,<16385,<24577
+lengthBaseValue_h:
+       .byte   >3,>4,>5,>6,>7,>8,>9,>10
+       .byte   >11,>13,>15,>17,>19,>23,>27,>31
+       .byte   >35,>43,>51,>59,>67,>83,>99,>115
+       .byte   >131,>163,>195,>227,>258
+distanceBaseValue_h:
+       .byte   >1,>2,>3,>4,>5,>7,>9,>13
+       .byte   >17,>25,>33,>49,>65,>97,>129,>193
+       .byte   >257,>385,>513,>769,>1025,>1537,>2049,>3073
+       .byte   >4097,>6145,>8193,>12289,>16385,>24577
+
+; Number of extra bits to read.
+lengthExtraBits:
+       .byte   0,0,0,0,0,0,0,0
+       .byte   1,1,1,1,2,2,2,2
+       .byte   3,3,3,3,4,4,4,4
+       .byte   5,5,5,5,0
+distanceExtraBits:
+       .byte   0,0,0,0,1,1,2,2
+       .byte   3,3,4,4,5,5,6,6
+       .byte   7,7,8,8,9,9,10,10
+       .byte   11,11,12,12,13,13
+
+
+; --------------------------------------------------------------------------
+;
+; Uninitialised data
+;
+
+       .bss
+
+; Number of literal codes of each length in the primary tree
+; (MAX_BITS bytes, overlap with literalCodeLength).
+literalCount:
+
+; --------------------------------------------------------------------------
+; Data for building the primary tree.
+
+; Lengths of literal codes.
+literalCodeLength:
+       .res    256
+; Length of the end code.
+endCodeLength:
+       .res    1
+; Lengths of length codes.
+lengthCodeLength:
+       .res    29
+
+; --------------------------------------------------------------------------
+; Data for building the distance tree.
+
+; Lengths of distance codes.
+distanceCodeLength:
+       .res    30
+; For two unused codes in the fixed trees and an 'end' mark.
+       .res    3
+
+; --------------------------------------------------------------------------
+; The Huffman trees.
+
+; Number of codes of each length.
+bitsCount:
+       .res    TREES_SIZE
+; Pointers to sorted codes of each length.
+bitsPointer_l:
+       .res    TREES_SIZE+1
+bitsPointer_h:
+       .res    TREES_SIZE
+
+; Sorted codes.
+sortedCodes:
+       .res    256+1+29+30+2
+
+
+