]> git.sur5r.net Git - cc65/blobdiff - libsrc/zlib/inflatemem.s
Merged pull request #459 from "pmjdebruijn/pragma".
[cc65] / libsrc / zlib / inflatemem.s
index de444ef7ddb5a19c01a25d5138699b404cd0ecad..27802fbff13bc9b4b6fc109384fa8700dc31d4aa 100644 (file)
-;\r
-; Piotr Fusik, 18.11.2001\r
-;\r
-; unsigned __fastcall__ inflatemem (char* dest, const char* source);\r
-;\r
-\r
-       .export         _inflatemem\r
-\r
-       .import         incsp2\r
-       .importzp       sp, sreg\r
-       .importzp       ptr1, ptr2, ptr3, ptr4\r
-       .importzp       tmp1, tmp2, tmp3, tmp4\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Constants\r
-;\r
-\r
-; Maximum length of a Huffman code\r
-MAX_BITS      =        15\r
-; Index in bitsCount, bitsPointer_l and bitsPointer_h for literal tree\r
-LITERAL_TREE  =        0\r
-; Index in bitsCount, bitsPointer_l and bitsPointer_h for distance tree\r
-DISTANCE_TREE =        MAX_BITS\r
-; Size of each of bitsCount, bitsPointer_l and bitsPointer_h\r
-TREES_SIZE    =        2*MAX_BITS+1\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Page zero\r
-;\r
-\r
-; Pointer to compressed data\r
-inputPointer  =        ptr1    ; 2 bytes\r
-; Pointer to uncompressed data\r
-outputPointer =        ptr2    ; 2 bytes\r
-; Buffer for getBit\r
-getBitHold    =        tmp1    ; 1 byte\r
-; Local variables. Variables from different routines use same memory.\r
-cnt           =        tmp2    ; 1 byte\r
-tmp           =        sreg    ; 1 byte\r
-ptr           =        sreg    ; 2 bytes\r
-len           =        ptr3    ; 2 bytes\r
-nl            =        tmp3    ; 1 byte\r
-nd            =        tmp4    ; 1 byte\r
-src           =        ptr4    ; 2 bytes\r
-dest          =        ptr4    ; 2 bytes\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Code\r
-;\r
-\r
-_inflatemem:\r
-\r
-; inputPointer = source\r
-       sta     inputPointer\r
-       stx     inputPointer+1\r
-; outputPointer = dest\r
-       ldy     #0\r
-       lda     (sp),y\r
-       sta     outputPointer\r
-       iny\r
-       lda     (sp),y\r
-       sta     outputPointer+1\r
-\r
-;      ldy     #1\r
-       sty     getBitHold\r
-inflatemem_1:\r
-; Get a bit of EOF and two bits of block type\r
-       ldx     #3\r
-       lda     #0\r
-       jsr     getBits\r
-       lsr     a\r
-       tax\r
-; X contains block type, C contains EOF flag\r
-; Save EOF flag\r
-       php\r
-; Go to the routine decompressing this block\r
-       jsr     callExtr\r
-       plp\r
-       bcc     inflatemem_1\r
-; C flag is set!\r
-\r
-; return outputPointer - dest;\r
-       lda     outputPointer\r
-       ldy     #0\r
-       sbc     (sp),y  ; C flag is set\r
-       pha\r
-       iny\r
-       lda     outputPointer+1\r
-       sbc     (sp),y\r
-       tax\r
-       pla\r
-; pop dest\r
-       jmp     incsp2\r
-\r
-; --------------------------------------------------------------------------\r
-; Go to the routine decompressing block type X\r
-\r
-callExtr:\r
-       lda     extr_h,x\r
-       pha\r
-       lda     extr_l,x\r
-       pha\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress 'stored' data block\r
-\r
-inflateCopyBlock:\r
-; Ignore bits until byte boundary\r
-       ldy     #1\r
-       sty     getBitHold\r
-; Get 16-bit length\r
-       ldx     #inputPointer\r
-       lda     (0,x)\r
-       sta     len\r
-       lda     (inputPointer),y\r
-       sta     len+1\r
-; Skip length and one's compliment length\r
-       lda     #4\r
-       clc\r
-       adc     inputPointer\r
-       sta     inputPointer\r
-       bcc     moveBlock\r
-       inc     inputPointer+1\r
-; fall into moveBlock\r
-\r
-; --------------------------------------------------------------------------\r
-; Copy block of length len from (0,x) to output\r
-\r
-moveBlock:\r
-       ldy     len\r
-       beq     moveBlock_1\r
-       ldy     #0\r
-       inc     len+1\r
-moveBlock_1:\r
-       lda     (0,x)\r
-       sta     (outputPointer),y\r
-       inc     0,x\r
-       bne     moveBlock_2\r
-       inc     1,x\r
-moveBlock_2:\r
-       inc     outputPointer\r
-       bne     moveBlock_3\r
-       inc     outputPointer+1\r
-moveBlock_3:\r
-       dec     len\r
-       bne     moveBlock_1\r
-       dec     len+1\r
-       bne     moveBlock_1\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress Huffman-coded data block with default Huffman trees:\r
-; literalCodeLength:  144 times 8, 112 times 9\r
-; endCodeLength:      24 times 7, 6 times 8\r
-; distanceCodeLength: 30 times 5, 2 times 8\r
-;                     (2 last codes from literal tree are not used)\r
-\r
-inflateFixedBlock:\r
-       ldx     #159\r
-       stx     distanceCodeLength+32\r
-       lda     #8\r
-inflateFixedBlock_1:\r
-       sta     literalCodeLength-1,x\r
-       sta     literalCodeLength+159-1,x\r
-       dex\r
-       bne     inflateFixedBlock_1\r
-       ldx     #112\r
-       lda     #9\r
-inflateFixedBlock_2:\r
-       sta     literalCodeLength+144-1,x\r
-       dex\r
-       bne     inflateFixedBlock_2\r
-       ldx     #24\r
-       lda     #7\r
-inflateFixedBlock_3:\r
-       sta     endCodeLength-1,x\r
-       dex\r
-       bne     inflateFixedBlock_3\r
-       ldx     #30\r
-       lda     #5+DISTANCE_TREE\r
-inflateFixedBlock_4:\r
-       sta     distanceCodeLength-1,x\r
-       dex\r
-       bne     inflateFixedBlock_4\r
-       jmp     inflateCodes\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress Huffman-coded data block, reading Huffman trees first\r
-\r
-inflateDynamicBlock:\r
-; numberOfLiteralCodes = getBits(5) + 257;\r
-       ldx     #5\r
-       lda     #<(lengthCodeLength-1)\r
-       jsr     getBits\r
-       sta     nl\r
-; numberOfDistanceCodes = getBits(5) + 1;\r
-       ldx     #5\r
-       lda     #1\r
-       jsr     getBits\r
-       sta     nd\r
-       clc\r
-       adc     nl\r
-       sta     nl\r
-; numberOfTemporaryCodes = getBits(4) + 4;\r
-       lda     #4\r
-       tax\r
-       jsr     getBits\r
-       sta     cnt\r
-; Clear lengths of temporary codes (there're 19 temp codes max),\r
-; clear literalCodeLength-1 (it may be used by temporary code 16)\r
-; and leave #0 in Y\r
-       ldy     #20\r
-       lda     #0\r
-inflateDynamicBlock_1:\r
-       sta     literalCodeLength-2,y\r
-       dey\r
-       bne     inflateDynamicBlock_1\r
-; Get lengths of temporary codes in order stored in bll\r
-inflateDynamicBlock_2:\r
-       ldx     #3\r
-       lda     #0\r
-       jsr     getBits ; does not change Y\r
-       ldx     bll,y\r
-       sta     literalCodeLength,x\r
-       iny\r
-       cpy     cnt\r
-       bcc     inflateDynamicBlock_2\r
-       ror     literalCodeLength+19    ; C flag is set, so it will set b7\r
-; Build tree for temporary codes\r
-       jsr     buildHuffmanTree\r
-\r
-; Use temporary codes to get lengths for literal and distance codes\r
-; dest is target-1, so we can access last written byte by (dest,0)\r
-       lda     #<(literalCodeLength-1)\r
-       sta     dest\r
-       lda     #>(literalCodeLength-1)\r
-       sta     dest+1\r
-inflateDynamicBlock_3:\r
-       jsr     fetchLiteralCode\r
-; Temporary code 0..15: put this length\r
-       ldy     #1\r
-       cmp     #16\r
-       bcc     inflateDynamicBlock_6\r
-       bne     inflateDynamicBlock_4\r
-; Temporary code 16: repeat last length 3..6 times\r
-       ldx     #2\r
-       lda     #3\r
-       jsr     getBits\r
-       tay\r
-       lda     (dest,x)        ; X == 0\r
-       bpl     inflateDynamicBlock_6   ; branch always\r
-inflateDynamicBlock_4:\r
-       lsr     a\r
-; Temporary code 17: put zero length 3..10 times\r
-       lda     #3\r
-       tax\r
-       bcs     inflateDynamicBlock_5\r
-; Temporary code 18: put zero length 11..138 times\r
-       ldx     #7\r
-       lda     #11\r
-inflateDynamicBlock_5:\r
-       jsr     getBits\r
-       tay\r
-       lda     #0\r
-; Write A length Y times\r
-inflateDynamicBlock_6:\r
-       sty     cnt\r
-inflateDynamicBlock_7:\r
-       sta     (dest),y\r
-       dey\r
-       bne     inflateDynamicBlock_7\r
-       lda     cnt\r
-       clc\r
-       adc     dest\r
-       sta     dest\r
-       bcc     inflateDynamicBlock_8\r
-       inc     dest+1\r
-inflateDynamicBlock_8:\r
-       cmp     nl\r
-       bne     inflateDynamicBlock_3\r
-       ldy     dest+1\r
-       sbc     #<endCodeLength ; C flag is set\r
-       bcs     inflateDynamicBlock_11\r
-       dey\r
-inflateDynamicBlock_11:\r
-       cpy     #>endCodeLength\r
-       bcc     inflateDynamicBlock_3\r
-; Mark end of distance lengths\r
-       ldx     nd\r
-       tay\r
-       ror     distanceCodeLength,x    ; C flag is set, so it will set b7\r
-; Move distance lengths to distanceCodeLength table\r
-inflateDynamicBlock_9:\r
-       dex\r
-       lda     endCodeLength,y\r
-; Mark existing codes (of non-zero length) as distance tree codes\r
-       beq     inflateDynamicBlock_10\r
-       pha\r
-       lda     #0\r
-       sta     endCodeLength,y\r
-       pla\r
-       clc\r
-       adc     #DISTANCE_TREE\r
-       sta     distanceCodeLength,x\r
-inflateDynamicBlock_10:\r
-       dey\r
-       txa\r
-       bne     inflateDynamicBlock_9\r
-; fall into inflateCodes\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress data block basing on given Huffman trees\r
-\r
-inflateCodes:\r
-       jsr     buildHuffmanTree\r
-inflateCodes_1:\r
-       jsr     fetchLiteralCode\r
-       bcc     inflateCodes_2\r
-; Literal code\r
-       ldy     #0\r
-       sta     (outputPointer),y\r
-       inc     outputPointer\r
-       bne     inflateCodes_1\r
-       inc     outputPointer+1\r
-       bcs     inflateCodes_1  ; branch always\r
-; End of block\r
-inflateCodes_ret:\r
-       rts\r
-inflateCodes_2:\r
-       beq     inflateCodes_ret\r
-; Repeat block\r
-       jsr     getValue\r
-       sta     len\r
-       tya\r
-       jsr     getBits\r
-       sta     len+1\r
-       ldx     #DISTANCE_TREE\r
-       jsr     fetchCode\r
-       jsr     getValue\r
-       sta     src\r
-       tya\r
-       jsr     getBits\r
-       sta     src+1\r
-       lda     outputPointer\r
-       sec\r
-       sbc     src\r
-       sta     src\r
-       lda     outputPointer+1\r
-       sbc     src+1\r
-       sta     src+1\r
-       ldx     #src\r
-       jsr     moveBlock\r
-       beq     inflateCodes_1  ; branch always\r
-\r
-; --------------------------------------------------------------------------\r
-; Build Huffman trees basing on code lengths.\r
-; Lengths (in bits) are stored in literalCodeLength.\r
-; A byte with highest bit set marks end of length table.\r
-\r
-buildHuffmanTree:\r
-       lda     #<literalCodeLength\r
-       sta     ptr\r
-       sta     src\r
-       lda     #>literalCodeLength\r
-       sta     ptr+1\r
-       sta     src+1\r
-; Clear counts\r
-       ldy     #TREES_SIZE\r
-       lda     #0\r
-buildHuffmanTree_1:\r
-       sta     bitsCount-1,y\r
-       dey\r
-       bne     buildHuffmanTree_1\r
-       beq     buildHuffmanTree_3      ; branch always\r
-; Count number of codes of each length\r
-buildHuffmanTree_2:\r
-       tax\r
-       inc     bitsCount,x\r
-       iny\r
-       bne     buildHuffmanTree_3\r
-       inc     ptr+1\r
-buildHuffmanTree_3:\r
-       lda     (ptr),y\r
-       bpl     buildHuffmanTree_2\r
-; Calculate pointer for each length\r
-       ldx     #0\r
-       stx     bitsCount\r
-       lda     #<sortedCodes\r
-       ldy     #>sortedCodes\r
-buildHuffmanTree_4:\r
-       sta     bitsPointer_l,x\r
-       tya\r
-       sta     bitsPointer_h,x\r
-       lda     bitsCount,x\r
-       asl     a\r
-       bcc     buildHuffmanTree_5\r
-       iny\r
-       clc\r
-buildHuffmanTree_5:\r
-       adc     bitsPointer_l,x\r
-       bcc     buildHuffmanTree_6\r
-       iny\r
-buildHuffmanTree_6:\r
-       inx\r
-       cpx     #TREES_SIZE\r
-       bcc     buildHuffmanTree_4\r
-.ifpc02\r
-       ldy     #1\r
-.else\r
-       ldy     #0\r
-.endif\r
-       bcs     buildHuffmanTree_9      ; branch always\r
-; Put codes into their place in sorted table\r
-buildHuffmanTree_7:\r
-       beq     buildHuffmanTree_8\r
-       tax\r
-       lda     bitsPointer_l,x\r
-       sta     ptr\r
-       clc\r
-       adc     #2\r
-       sta     bitsPointer_l,x\r
-       lda     bitsPointer_h,x\r
-       sta     ptr+1\r
-       adc     #0\r
-       sta     bitsPointer_h,x\r
-       lda     src\r
-       sbc     #<(endCodeLength-1)     ; C flag is zero\r
-.ifpc02\r
-.else\r
-       iny                             ; ldy #1\r
-.endif\r
-       sta     (ptr),y\r
-       lda     src+1\r
-       sbc     #>(endCodeLength-1)\r
-.ifpc02\r
-       sta     (ptr)\r
-.else\r
-       dey                             ; ldy #0\r
-       sta     (ptr),y\r
-.endif\r
-buildHuffmanTree_8:\r
-       inc     src\r
-       bne     buildHuffmanTree_9\r
-       inc     src+1\r
-buildHuffmanTree_9:\r
-.ifpc02\r
-       lda     (src)\r
-.else\r
-       lda     (src),y\r
-.endif\r
-       bpl     buildHuffmanTree_7\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Read code basing on literal tree\r
-\r
-fetchLiteralCode:\r
-       ldx     #LITERAL_TREE\r
-; fall into fetchCode\r
-\r
-; --------------------------------------------------------------------------\r
-; Read code from input stream basing on tree given in X.\r
-; Return code in A, C is set if non-literal code.\r
-\r
-fetchCode:\r
-       lda     #0\r
-fetchCode_1:\r
-       jsr     getBit\r
-       rol     a\r
-       cmp     bitsCount+1,x\r
-       bcc     fetchCode_2\r
-       sbc     bitsCount+1,x\r
-       inx\r
-       bcs     fetchCode_1     ; branch always\r
-fetchCode_2:\r
-       ldy     bitsPointer_l,x\r
-       sty     ptr\r
-       ldy     bitsPointer_h,x\r
-       asl     a\r
-       bcc     fetchCode_3\r
-       iny\r
-fetchCode_3:\r
-       sty     ptr+1\r
-       tay\r
-       lda     (ptr),y\r
-       asl     a\r
-       iny\r
-       lda     (ptr),y\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Read low byte of value (length or distance), basing on code A\r
-\r
-getValue:\r
-       tay\r
-       ldx     lengthExtraBits-1,y\r
-       lda     lengthCode_l-1,y\r
-       pha\r
-       lda     lengthCode_h-1,y\r
-       tay\r
-       pla\r
-; fall into getBits\r
-\r
-; --------------------------------------------------------------------------\r
-; Read X-bit number from input stream and adds it to A.\r
-; In case of carry, Y is incremented.\r
-; If X > 8, only 8 bits are read.\r
-; On return X holds number of unread bits: X = (X > 8 ? X - 8 : 0);\r
-\r
-getBits:\r
-       cpx     #0\r
-       beq     getBits_ret\r
-       pha\r
-       lda     #1\r
-       sta     tmp\r
-       pla\r
-getBits_1:\r
-       jsr     getBit\r
-       bcc     getBits_2\r
-       clc\r
-       adc     tmp\r
-       bcc     getBits_2\r
-       iny\r
-getBits_2:\r
-       dex\r
-       beq     getBits_ret\r
-       asl     tmp\r
-       bcc     getBits_1\r
-getBits_ret:\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Read single bit from input stream, return it in C flag\r
-\r
-getBit:\r
-       lsr     getBitHold\r
-       bne     getBit_ret\r
-       pha\r
-.ifpc02\r
-       lda     (inputPointer)\r
-.else\r
-       tya\r
-       pha\r
-       ldy     #0\r
-       lda     (inputPointer),y\r
-.endif\r
-       inc     inputPointer\r
-       bne     getBit_1\r
-       inc     inputPointer+1\r
-getBit_1:\r
-       ror     a       ; C flag set\r
-       sta     getBitHold\r
-.ifpc02\r
-.else\r
-       pla\r
-       tay\r
-.endif\r
-       pla\r
-getBit_ret:\r
-       rts\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Constant data\r
-;\r
-\r
-       .rodata\r
-; --------------------------------------------------------------------------\r
-; Addresses of functions extracting different blocks\r
-extr_l:\r
-       .byte   <(inflateCopyBlock-1)\r
-       .byte   <(inflateFixedBlock-1)\r
-       .byte   <(inflateDynamicBlock-1)\r
-extr_h:\r
-       .byte   >(inflateCopyBlock-1)\r
-       .byte   >(inflateFixedBlock-1)\r
-       .byte   >(inflateDynamicBlock-1)\r
-\r
-; --------------------------------------------------------------------------\r
-; Order, in which lengths of temporary codes are stored\r
-bll:\r
-       .byte   16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15\r
-\r
-; --------------------------------------------------------------------------\r
-; Tables for length and distance codes\r
-; Value is Code + getBits(ExtraBits)\r
-\r
-; Base values\r
-lengthCode_l:\r
-       .byte   <3,<4,<5,<6,<7,<8,<9,<10\r
-       .byte   <11,<13,<15,<17,<19,<23,<27,<31\r
-       .byte   <35,<43,<51,<59,<67,<83,<99,<115\r
-       .byte   <131,<163,<195,<227,<258\r
-distanceCode_l:\r
-       .byte   <1,<2,<3,<4,<5,<7,<9,<13\r
-       .byte   <17,<25,<33,<49,<65,<97,<129,<193\r
-       .byte   <257,<385,<513,<769,<1025,<1537,<2049,<3073\r
-       .byte   <4097,<6145,<8193,<12289,<16385,<24577\r
-lengthCode_h:\r
-       .byte   >3,>4,>5,>6,>7,>8,>9,>10\r
-       .byte   >11,>13,>15,>17,>19,>23,>27,>31\r
-       .byte   >35,>43,>51,>59,>67,>83,>99,>115\r
-       .byte   >131,>163,>195,>227,>258\r
-distanceCode_h:\r
-       .byte   >1,>2,>3,>4,>5,>7,>9,>13\r
-       .byte   >17,>25,>33,>49,>65,>97,>129,>193\r
-       .byte   >257,>385,>513,>769,>1025,>1537,>2049,>3073\r
-       .byte   >4097,>6145,>8193,>12289,>16385,>24577\r
-\r
-; Number of extra bits to read\r
-lengthExtraBits:\r
-       .byte   0,0,0,0,0,0,0,0\r
-       .byte   1,1,1,1,2,2,2,2\r
-       .byte   3,3,3,3,4,4,4,4\r
-       .byte   5,5,5,5,0\r
-distanceExtraBits:\r
-       .byte   0,0,0,0,1,1,2,2\r
-       .byte   3,3,4,4,5,5,6,6\r
-       .byte   7,7,8,8,9,9,10,10\r
-       .byte   11,11,12,12,13,13\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Uninitialised data\r
-;\r
-\r
-       .bss\r
-\r
-; --------------------------------------------------------------------------\r
-; Data for building literal tree\r
-\r
-       .res    1\r
-; Length of literal codes\r
-literalCodeLength:\r
-       .res    256\r
-; Length of 'end' code\r
-endCodeLength:\r
-       .res    1\r
-; Length of 'length' codes\r
-lengthCodeLength:\r
-       .res    29\r
-\r
-; --------------------------------------------------------------------------\r
-; Data for building distance tree\r
-\r
-distanceCodeLength:\r
-       .res    30\r
-; For two unused codes in fixed trees and an end flag\r
-       .res    3\r
-\r
-; --------------------------------------------------------------------------\r
-; Huffman tree structure\r
-\r
-; Number of codes of each length\r
-bitsCount:\r
-       .res    TREES_SIZE\r
-\r
-; Pointer to sorted codes of each length\r
-bitsPointer_l:\r
-       .res    TREES_SIZE\r
-bitsPointer_h:\r
-       .res    TREES_SIZE\r
-\r
-; Sorted codes\r
-sortedCodes:\r
-       .res    2*(256+1+29+30+2)\r
-\r
-\r
-\r
+;
+; 2017-02-12, Piotr Fusik
+;
+; unsigned __fastcall__ inflatemem (char* dest, const char* source);
+;
+; NOTE: Be extremely careful with modifications, because this code is heavily
+; optimized for size (for example assumes certain register and flag values
+; when its internal routines return). Test with the gunzip65 sample.
+;
+
+        .export         _inflatemem
+
+        .import         incsp2
+        .importzp       sp, sreg, ptr1, ptr2, ptr3, ptr4
+
+; --------------------------------------------------------------------------
+;
+; Constants
+;
+
+; Argument values for getBits.
+GET_1_BIT           = $81
+GET_2_BITS          = $82
+GET_3_BITS          = $84
+GET_4_BITS          = $88
+GET_5_BITS          = $90
+GET_6_BITS          = $a0
+GET_7_BITS          = $c0
+
+; Huffman trees.
+TREE_SIZE           = 16
+PRIMARY_TREE        = 0
+DISTANCE_TREE       = TREE_SIZE
+
+; Alphabet.
+LENGTH_SYMBOLS      = 1+29+2    ; EOF, 29 length symbols, two unused symbols
+DISTANCE_SYMBOLS    = 30
+CONTROL_SYMBOLS     = LENGTH_SYMBOLS+DISTANCE_SYMBOLS
+
+
+; --------------------------------------------------------------------------
+;
+; Page zero
+;
+
+; Pointer to the compressed data.
+inputPointer                :=  ptr1    ; 2 bytes
+
+; Pointer to the uncompressed data.
+outputPointer               :=  ptr2    ; 2 bytes
+
+; Local variables.
+; As far as there is no conflict, same memory locations are used
+; for different variables.
+
+inflateStored_pageCounter   :=  ptr3    ; 1 byte
+inflateDynamic_symbol       :=  ptr3    ; 1 byte
+inflateDynamic_lastLength   :=  ptr3+1  ; 1 byte
+        .assert ptr4 = ptr3 + 2, error, "Need three bytes for inflateDynamic_tempCodes"
+inflateDynamic_tempCodes    :=  ptr3+1  ; 3 bytes
+inflateDynamic_allCodes     :=  inflateDynamic_tempCodes+1 ; 1 byte
+inflateDynamic_primaryCodes :=  inflateDynamic_tempCodes+2 ; 1 byte
+inflateCodes_sourcePointer  :=  ptr3    ; 2 bytes
+inflateCodes_lengthMinus2   :=  ptr4    ; 1 byte
+getBits_base                :=  sreg    ; 1 byte
+getBit_buffer               :=  sreg+1  ; 1 byte
+
+
+; --------------------------------------------------------------------------
+;
+; Code
+;
+
+_inflatemem:
+
+; inputPointer = source
+        sta     inputPointer
+        stx     inputPointer+1
+; outputPointer = dest
+        ldy     #1
+        lda     (sp),y
+        sta     outputPointer+1
+        dey
+        lda     (sp),y
+        sta     outputPointer
+
+;       ldy     #0
+        sty     getBit_buffer
+
+inflate_blockLoop:
+; Get a bit of EOF and two bits of block type
+;       ldy     #0
+        sty     getBits_base
+        lda     #GET_3_BITS
+        jsr     getBits
+        lsr     a
+; A and Z contain block type, C contains EOF flag
+; Save EOF flag
+        php
+        bne     inflateCompressed
+
+; Decompress a 'stored' data block.
+;       ldy     #0
+        sty     getBit_buffer   ; ignore bits until byte boundary
+        jsr     getWord         ; skip the length we don't need
+        jsr     getWord         ; get the two's complement length
+        sta     inflateStored_pageCounter
+        bcs     inflateStored_firstByte ; jmp
+inflateStored_copyByte:
+        jsr     getByte
+;       sec
+inflateStoreByte:
+        jsr     storeByte
+        bcc     inflateCodes_loop
+inflateStored_firstByte:
+        inx
+        bne     inflateStored_copyByte
+        inc     inflateStored_pageCounter
+        bne     inflateStored_copyByte
+
+; Block decompressed.
+inflate_nextBlock:
+        plp
+        bcc     inflate_blockLoop
+
+; Decompression complete.
+; return outputPointer - dest
+        lda     outputPointer
+;       ldy     #0
+;       sec
+        sbc     (sp),y
+        iny
+        pha
+        lda     outputPointer+1
+        sbc     (sp),y
+        tax
+        pla
+; pop dest
+        jmp     incsp2
+
+inflateCompressed:
+; Decompress a Huffman-coded data block
+; A=1: fixed block, initialize with fixed codes
+; A=2: dynamic block, start by clearing all code lengths
+; A=3: invalid compressed data, not handled in this routine
+        eor     #2
+
+;       ldy     #0
+inflateCompressed_setCodeLengths:
+        tax
+        beq     inflateCompressed_setLiteralCodeLength
+; fixed Huffman literal codes:
+; 144 8-bit codes
+; 112 9-bit codes
+        lda     #4
+        cpy     #144
+        rol     a
+inflateCompressed_setLiteralCodeLength:
+        sta     literalSymbolCodeLength,y
+        beq     inflateCompressed_setControlCodeLength
+; fixed Huffman control codes:
+; 24 7-bit codes
+;  6 8-bit codes
+;  2 meaningless 8-bit codes
+; 30 5-bit distance codes
+        lda     #5+DISTANCE_TREE
+        cpy     #LENGTH_SYMBOLS
+        bcs     inflateCompressed_setControlCodeLength
+        cpy     #24
+        adc     #$100+2-DISTANCE_TREE
+inflateCompressed_setControlCodeLength:
+        cpy     #CONTROL_SYMBOLS
+        bcs     inflateCompressed_noControlSymbol
+        sta     controlSymbolCodeLength,y
+inflateCompressed_noControlSymbol:
+        iny
+        bne     inflateCompressed_setCodeLengths
+
+        tax
+        beq     inflateDynamic
+
+; Decompress a block
+inflateCodes:
+        jsr     buildHuffmanTree
+inflateCodes_loop:
+        jsr     fetchPrimaryCode
+        bcc     inflateStoreByte
+        beq     inflate_nextBlock
+; Copy sequence from look-behind buffer
+;       ldy     #0
+        sty     getBits_base
+        cmp     #9
+        bcc     inflateCodes_setSequenceLength
+        tya
+;       lda     #0
+        cpx     #1+28
+        bcs     inflateCodes_setSequenceLength
+        dex
+        txa
+        lsr     a
+        ror     getBits_base
+        inc     getBits_base
+        lsr     a
+        rol     getBits_base
+        jsr     getAMinus1BitsMax8
+;       sec
+        adc     #0
+inflateCodes_setSequenceLength:
+        sta     inflateCodes_lengthMinus2
+        ldx     #DISTANCE_TREE
+        jsr     fetchCode
+        cmp     #4
+        bcc     inflateCodes_setOffsetLowByte
+        inc     getBits_base
+        lsr     a
+        jsr     getAMinus1BitsMax8
+inflateCodes_setOffsetLowByte:
+        eor     #$ff
+        sta     inflateCodes_sourcePointer
+        lda     getBits_base
+        cpx     #10
+        bcc     inflateCodes_setOffsetHighByte
+        lda     getNPlus1Bits_mask-10,x
+        jsr     getBits
+        clc
+inflateCodes_setOffsetHighByte:
+        eor     #$ff
+;       clc
+        adc     outputPointer+1
+        sta     inflateCodes_sourcePointer+1
+        jsr     copyByte
+        jsr     copyByte
+inflateCodes_copyByte:
+        jsr     copyByte
+        dec     inflateCodes_lengthMinus2
+        bne     inflateCodes_copyByte
+        beq     inflateCodes_loop ; jmp
+
+inflateDynamic:
+; Decompress a block reading Huffman trees first
+;       ldy     #0
+; numberOfPrimaryCodes = 257 + getBits(5)
+; numberOfDistanceCodes = 1 + getBits(5)
+; numberOfTemporaryCodes = 4 + getBits(4)
+        ldx     #3
+inflateDynamic_getHeader:
+        lda     inflateDynamic_headerBits-1,x
+        jsr     getBits
+;       sec
+        adc     inflateDynamic_headerBase-1,x
+        sta     inflateDynamic_tempCodes-1,x
+        dex
+        bne     inflateDynamic_getHeader
+
+; Get lengths of temporary codes in the order stored in inflateDynamic_tempSymbols
+;       ldx     #0
+inflateDynamic_getTempCodeLengths:
+        lda     #GET_3_BITS
+        jsr     getBits
+        ldy     inflateDynamic_tempSymbols,x
+        sta     literalSymbolCodeLength,y
+        ldy     #0
+        inx
+        cpx     inflateDynamic_tempCodes
+        bcc     inflateDynamic_getTempCodeLengths
+
+; Build the tree for temporary codes
+        jsr     buildHuffmanTree
+
+; Use temporary codes to get lengths of literal/length and distance codes
+;       ldx     #0
+;       sec
+inflateDynamic_decodeLength:
+; C=1: literal codes
+; C=0: control codes
+        stx     inflateDynamic_symbol
+        php
+; Fetch a temporary code
+        jsr     fetchPrimaryCode
+; Temporary code 0..15: put this length
+        bpl     inflateDynamic_storeLengths
+; Temporary code 16: repeat last length 3 + getBits(2) times
+; Temporary code 17: put zero length 3 + getBits(3) times
+; Temporary code 18: put zero length 11 + getBits(7) times
+        tax
+        jsr     getBits
+        cpx     #GET_3_BITS
+        bcc     inflateDynamic_code16
+        beq     inflateDynamic_code17
+;       sec
+        adc     #7
+inflateDynamic_code17:
+;       ldy     #0
+        sty     inflateDynamic_lastLength
+inflateDynamic_code16:
+        tay
+        lda     inflateDynamic_lastLength
+        iny
+        iny
+inflateDynamic_storeLengths:
+        iny
+        plp
+        ldx     inflateDynamic_symbol
+inflateDynamic_storeLength:
+        bcc     inflateDynamic_controlSymbolCodeLength
+        sta     literalSymbolCodeLength,x
+        inx
+        cpx     #1
+inflateDynamic_storeNext:
+        dey
+        bne     inflateDynamic_storeLength
+        sta     inflateDynamic_lastLength
+        beq     inflateDynamic_decodeLength ; jmp
+inflateDynamic_controlSymbolCodeLength:
+        cpx     inflateDynamic_primaryCodes
+        bcc     inflateDynamic_storeControl
+; the code lengths we skip here were zero-initialized
+; in inflateCompressed_setControlCodeLength
+        bne     inflateDynamic_noStartDistanceTree
+        ldx     #LENGTH_SYMBOLS
+inflateDynamic_noStartDistanceTree:
+        ora     #DISTANCE_TREE
+inflateDynamic_storeControl:
+        sta     controlSymbolCodeLength,x
+        inx
+        cpx     inflateDynamic_allCodes
+        bcc     inflateDynamic_storeNext
+        dey
+;       ldy     #0
+        jmp     inflateCodes
+
+; Build Huffman trees basing on code lengths (in bits)
+; stored in the *SymbolCodeLength arrays
+buildHuffmanTree:
+; Clear nBitCode_totalCount, nBitCode_literalCount, nBitCode_controlCount
+        tya
+;       lda     #0
+buildHuffmanTree_clear:
+        sta     nBitCode_clearFrom,y
+        iny
+        bne     buildHuffmanTree_clear
+; Count number of codes of each length
+;       ldy     #0
+buildHuffmanTree_countCodeLengths:
+        ldx     literalSymbolCodeLength,y
+        inc     nBitCode_literalCount,x
+        inc     nBitCode_totalCount,x
+        cpy     #CONTROL_SYMBOLS
+        bcs     buildHuffmanTree_noControlSymbol
+        ldx     controlSymbolCodeLength,y
+        inc     nBitCode_controlCount,x
+        inc     nBitCode_totalCount,x
+buildHuffmanTree_noControlSymbol:
+        iny
+        bne     buildHuffmanTree_countCodeLengths
+; Calculate offsets of symbols sorted by code length
+;       lda     #0
+        ldx     #$100-3*TREE_SIZE
+buildHuffmanTree_calculateOffsets:
+        sta     nBitCode_literalOffset+3*TREE_SIZE-$100,x
+        clc
+        adc     nBitCode_literalCount+3*TREE_SIZE-$100,x
+        inx
+        bne     buildHuffmanTree_calculateOffsets
+; Put symbols in their place in the sorted array
+;       ldy     #0
+buildHuffmanTree_assignCode:
+        tya
+        ldx     literalSymbolCodeLength,y
+        ldy     nBitCode_literalOffset,x
+        inc     nBitCode_literalOffset,x
+        sta     codeToLiteralSymbol,y
+        tay
+        cpy     #CONTROL_SYMBOLS
+        bcs     buildHuffmanTree_noControlSymbol2
+        ldx     controlSymbolCodeLength,y
+        ldy     nBitCode_controlOffset,x
+        inc     nBitCode_controlOffset,x
+        sta     codeToControlSymbol,y
+        tay
+buildHuffmanTree_noControlSymbol2:
+        iny
+        bne     buildHuffmanTree_assignCode
+        rts
+
+; Read Huffman code using the primary tree
+fetchPrimaryCode:
+        ldx     #PRIMARY_TREE
+; Read a code from input using the tree specified in X.
+; Return low byte of this code in A.
+; Return C flag reset for literal code, set for length code.
+fetchCode:
+;       ldy     #0
+        tya
+fetchCode_nextBit:
+        jsr     getBit
+        rol     a
+        inx
+        sec
+        sbc     nBitCode_totalCount,x
+        bcs     fetchCode_nextBit
+;       clc
+        adc     nBitCode_controlCount,x
+        bcs     fetchCode_control
+;       clc
+        adc     nBitCode_literalOffset,x
+        tax
+        lda     codeToLiteralSymbol,x
+        clc
+        rts
+fetchCode_control:
+;       sec
+        adc     nBitCode_controlOffset-1,x
+        tax
+        lda     codeToControlSymbol-1,x
+        and     #$1f    ; make distance symbols zero-based
+        tax
+        sec
+        rts
+
+; Read A minus 1 bits, but no more than 8
+getAMinus1BitsMax8:
+        rol     getBits_base
+        tax
+        cmp     #9
+        bcs     getByte
+        lda     getNPlus1Bits_mask-2,x
+getBits:
+        jsr     getBits_loop
+getBits_normalizeLoop:
+        lsr     getBits_base
+        ror     a
+        bcc     getBits_normalizeLoop
+        rts
+
+; Read 16 bits
+getWord:
+        jsr     getByte
+        tax
+; Read 8 bits
+getByte:
+        lda     #$80
+getBits_loop:
+        jsr     getBit
+        ror     a
+        bcc     getBits_loop
+        rts
+
+; Read one bit, return in the C flag
+getBit:
+        lsr     getBit_buffer
+        bne     getBit_return
+        pha
+;       ldy     #0
+        lda     (inputPointer),y
+        inc     inputPointer
+        bne     getBit_samePage
+        inc     inputPointer+1
+getBit_samePage:
+        sec
+        ror     a
+        sta     getBit_buffer
+        pla
+getBit_return:
+        rts
+
+; Copy a previously written byte
+copyByte:
+        ldy     outputPointer
+        lda     (inflateCodes_sourcePointer),y
+        ldy     #0
+; Write a byte
+storeByte:
+;       ldy     #0
+        sta     (outputPointer),y
+        inc     outputPointer
+        bne     storeByte_return
+        inc     outputPointer+1
+        inc     inflateCodes_sourcePointer+1
+storeByte_return:
+        rts
+
+
+; --------------------------------------------------------------------------
+;
+; Constant data
+;
+
+        .rodata
+
+getNPlus1Bits_mask:
+        .byte   GET_1_BIT,GET_2_BITS,GET_3_BITS,GET_4_BITS,GET_5_BITS,GET_6_BITS,GET_7_BITS
+
+inflateDynamic_tempSymbols:
+        .byte   GET_2_BITS,GET_3_BITS,GET_7_BITS,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
+
+inflateDynamic_headerBits:
+        .byte   GET_4_BITS,GET_5_BITS,GET_5_BITS
+inflateDynamic_headerBase:
+        .byte   3,LENGTH_SYMBOLS,0
+
+
+; --------------------------------------------------------------------------
+;
+; Uninitialised data
+;
+
+        .bss
+
+; Data for building trees.
+
+literalSymbolCodeLength:
+        .res    256
+controlSymbolCodeLength:
+        .res    CONTROL_SYMBOLS
+
+; Huffman trees.
+
+nBitCode_clearFrom:
+nBitCode_totalCount:
+        .res    2*TREE_SIZE
+nBitCode_literalCount:
+        .res    TREE_SIZE
+nBitCode_controlCount:
+        .res    2*TREE_SIZE
+nBitCode_literalOffset:
+        .res    TREE_SIZE
+nBitCode_controlOffset:
+        .res    2*TREE_SIZE
+
+codeToLiteralSymbol:
+        .res    256
+codeToControlSymbol:
+        .res    CONTROL_SYMBOLS