-;\r
-; Piotr Fusik, 18.11.2001\r
-;\r
-; unsigned __fastcall__ inflatemem (char* dest, const char* source);\r
-;\r
-\r
- .export _inflatemem\r
-\r
- .import incsp2\r
- .importzp sp, sreg\r
- .importzp ptr1, ptr2, ptr3, ptr4\r
- .importzp tmp1, tmp2, tmp3, tmp4\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Constants\r
-;\r
-\r
-; Maximum length of a Huffman code\r
-MAX_BITS = 15\r
-; Index in bitsCount, bitsPointer_l and bitsPointer_h for literal tree\r
-LITERAL_TREE = 0\r
-; Index in bitsCount, bitsPointer_l and bitsPointer_h for distance tree\r
-DISTANCE_TREE = MAX_BITS\r
-; Size of each of bitsCount, bitsPointer_l and bitsPointer_h\r
-TREES_SIZE = 2*MAX_BITS+1\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Page zero\r
-;\r
-\r
-; Pointer to compressed data\r
-inputPointer = ptr1 ; 2 bytes\r
-; Pointer to uncompressed data\r
-outputPointer = ptr2 ; 2 bytes\r
-; Buffer for getBit\r
-getBitHold = tmp1 ; 1 byte\r
-; Local variables. Variables from different routines use same memory.\r
-cnt = tmp2 ; 1 byte\r
-tmp = sreg ; 1 byte\r
-ptr = sreg ; 2 bytes\r
-len = ptr3 ; 2 bytes\r
-nl = tmp3 ; 1 byte\r
-nd = tmp4 ; 1 byte\r
-src = ptr4 ; 2 bytes\r
-dest = ptr4 ; 2 bytes\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Code\r
-;\r
-\r
-_inflatemem:\r
-\r
-; inputPointer = source\r
- sta inputPointer\r
- stx inputPointer+1\r
-; outputPointer = dest\r
- ldy #0\r
- lda (sp),y\r
- sta outputPointer\r
- iny\r
- lda (sp),y\r
- sta outputPointer+1\r
-\r
-; ldy #1\r
- sty getBitHold\r
-inflatemem_1:\r
-; Get a bit of EOF and two bits of block type\r
- ldx #3\r
- lda #0\r
- jsr getBits\r
- lsr a\r
- tax\r
-; X contains block type, C contains EOF flag\r
-; Save EOF flag\r
- php\r
-; Go to the routine decompressing this block\r
- jsr callExtr\r
- plp\r
- bcc inflatemem_1\r
-; C flag is set!\r
-\r
-; return outputPointer - dest;\r
- lda outputPointer\r
- ldy #0\r
- sbc (sp),y ; C flag is set\r
- pha\r
- iny\r
- lda outputPointer+1\r
- sbc (sp),y\r
- tax\r
- pla\r
-; pop dest\r
- jmp incsp2\r
-\r
-; --------------------------------------------------------------------------\r
-; Go to the routine decompressing block type X\r
-\r
-callExtr:\r
- lda extr_h,x\r
- pha\r
- lda extr_l,x\r
- pha\r
- rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress 'stored' data block\r
-\r
-inflateCopyBlock:\r
-; Ignore bits until byte boundary\r
- ldy #1\r
- sty getBitHold\r
-; Get 16-bit length\r
- ldx #inputPointer\r
- lda (0,x)\r
- sta len\r
- lda (inputPointer),y\r
- sta len+1\r
-; Skip length and one's compliment length\r
- lda #4\r
- clc\r
- adc inputPointer\r
- sta inputPointer\r
- bcc moveBlock\r
- inc inputPointer+1\r
-; fall into moveBlock\r
-\r
-; --------------------------------------------------------------------------\r
-; Copy block of length len from (0,x) to output\r
-\r
-moveBlock:\r
- ldy len\r
- beq moveBlock_1\r
- ldy #0\r
- inc len+1\r
-moveBlock_1:\r
- lda (0,x)\r
- sta (outputPointer),y\r
- inc 0,x\r
- bne moveBlock_2\r
- inc 1,x\r
-moveBlock_2:\r
- inc outputPointer\r
- bne moveBlock_3\r
- inc outputPointer+1\r
-moveBlock_3:\r
- dec len\r
- bne moveBlock_1\r
- dec len+1\r
- bne moveBlock_1\r
- rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress Huffman-coded data block with default Huffman trees:\r
-; literalCodeLength: 144 times 8, 112 times 9\r
-; endCodeLength: 24 times 7, 6 times 8\r
-; distanceCodeLength: 30 times 5, 2 times 8\r
-; (2 last codes from literal tree are not used)\r
-\r
-inflateFixedBlock:\r
- ldx #159\r
- stx distanceCodeLength+32\r
- lda #8\r
-inflateFixedBlock_1:\r
- sta literalCodeLength-1,x\r
- sta literalCodeLength+159-1,x\r
- dex\r
- bne inflateFixedBlock_1\r
- ldx #112\r
- lda #9\r
-inflateFixedBlock_2:\r
- sta literalCodeLength+144-1,x\r
- dex\r
- bne inflateFixedBlock_2\r
- ldx #24\r
- lda #7\r
-inflateFixedBlock_3:\r
- sta endCodeLength-1,x\r
- dex\r
- bne inflateFixedBlock_3\r
- ldx #30\r
- lda #5+DISTANCE_TREE\r
-inflateFixedBlock_4:\r
- sta distanceCodeLength-1,x\r
- dex\r
- bne inflateFixedBlock_4\r
- jmp inflateCodes\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress Huffman-coded data block, reading Huffman trees first\r
-\r
-inflateDynamicBlock:\r
-; numberOfLiteralCodes = getBits(5) + 257;\r
- ldx #5\r
- lda #<(lengthCodeLength-1)\r
- jsr getBits\r
- sta nl\r
-; numberOfDistanceCodes = getBits(5) + 1;\r
- ldx #5\r
- lda #1\r
- jsr getBits\r
- sta nd\r
- clc\r
- adc nl\r
- sta nl\r
-; numberOfTemporaryCodes = getBits(4) + 4;\r
- lda #4\r
- tax\r
- jsr getBits\r
- sta cnt\r
-; Clear lengths of temporary codes (there're 19 temp codes max),\r
-; clear literalCodeLength-1 (it may be used by temporary code 16)\r
-; and leave #0 in Y\r
- ldy #20\r
- lda #0\r
-inflateDynamicBlock_1:\r
- sta literalCodeLength-2,y\r
- dey\r
- bne inflateDynamicBlock_1\r
-; Get lengths of temporary codes in order stored in bll\r
-inflateDynamicBlock_2:\r
- ldx #3\r
- lda #0\r
- jsr getBits ; does not change Y\r
- ldx bll,y\r
- sta literalCodeLength,x\r
- iny\r
- cpy cnt\r
- bcc inflateDynamicBlock_2\r
- ror literalCodeLength+19 ; C flag is set, so it will set b7\r
-; Build tree for temporary codes\r
- jsr buildHuffmanTree\r
-\r
-; Use temporary codes to get lengths for literal and distance codes\r
-; dest is target-1, so we can access last written byte by (dest,0)\r
- lda #<(literalCodeLength-1)\r
- sta dest\r
- lda #>(literalCodeLength-1)\r
- sta dest+1\r
-inflateDynamicBlock_3:\r
- jsr fetchLiteralCode\r
-; Temporary code 0..15: put this length\r
- ldy #1\r
- cmp #16\r
- bcc inflateDynamicBlock_6\r
- bne inflateDynamicBlock_4\r
-; Temporary code 16: repeat last length 3..6 times\r
- ldx #2\r
- lda #3\r
- jsr getBits\r
- tay\r
- lda (dest,x) ; X == 0\r
- bpl inflateDynamicBlock_6 ; branch always\r
-inflateDynamicBlock_4:\r
- lsr a\r
-; Temporary code 17: put zero length 3..10 times\r
- lda #3\r
- tax\r
- bcs inflateDynamicBlock_5\r
-; Temporary code 18: put zero length 11..138 times\r
- ldx #7\r
- lda #11\r
-inflateDynamicBlock_5:\r
- jsr getBits\r
- tay\r
- lda #0\r
-; Write A length Y times\r
-inflateDynamicBlock_6:\r
- sty cnt\r
-inflateDynamicBlock_7:\r
- sta (dest),y\r
- dey\r
- bne inflateDynamicBlock_7\r
- lda cnt\r
- clc\r
- adc dest\r
- sta dest\r
- bcc inflateDynamicBlock_8\r
- inc dest+1\r
-inflateDynamicBlock_8:\r
- cmp nl\r
- bne inflateDynamicBlock_3\r
- ldy dest+1\r
- sbc #<endCodeLength ; C flag is set\r
- bcs inflateDynamicBlock_11\r
- dey\r
-inflateDynamicBlock_11:\r
- cpy #>endCodeLength\r
- bcc inflateDynamicBlock_3\r
-; Mark end of distance lengths\r
- ldx nd\r
- tay\r
- ror distanceCodeLength,x ; C flag is set, so it will set b7\r
-; Move distance lengths to distanceCodeLength table\r
-inflateDynamicBlock_9:\r
- dex\r
- lda endCodeLength,y\r
-; Mark existing codes (of non-zero length) as distance tree codes\r
- beq inflateDynamicBlock_10\r
- pha\r
- lda #0\r
- sta endCodeLength,y\r
- pla\r
- clc\r
- adc #DISTANCE_TREE\r
- sta distanceCodeLength,x\r
-inflateDynamicBlock_10:\r
- dey\r
- txa\r
- bne inflateDynamicBlock_9\r
-; fall into inflateCodes\r
-\r
-; --------------------------------------------------------------------------\r
-; Decompress data block basing on given Huffman trees\r
-\r
-inflateCodes:\r
- jsr buildHuffmanTree\r
-inflateCodes_1:\r
- jsr fetchLiteralCode\r
- bcc inflateCodes_2\r
-; Literal code\r
- ldy #0\r
- sta (outputPointer),y\r
- inc outputPointer\r
- bne inflateCodes_1\r
- inc outputPointer+1\r
- bcs inflateCodes_1 ; branch always\r
-; End of block\r
-inflateCodes_ret:\r
- rts\r
-inflateCodes_2:\r
- beq inflateCodes_ret\r
-; Repeat block\r
- jsr getValue\r
- sta len\r
- tya\r
- jsr getBits\r
- sta len+1\r
- ldx #DISTANCE_TREE\r
- jsr fetchCode\r
- jsr getValue\r
- sta src\r
- tya\r
- jsr getBits\r
- sta src+1\r
- lda outputPointer\r
- sec\r
- sbc src\r
- sta src\r
- lda outputPointer+1\r
- sbc src+1\r
- sta src+1\r
- ldx #src\r
- jsr moveBlock\r
- beq inflateCodes_1 ; branch always\r
-\r
-; --------------------------------------------------------------------------\r
-; Build Huffman trees basing on code lengths.\r
-; Lengths (in bits) are stored in literalCodeLength.\r
-; A byte with highest bit set marks end of length table.\r
-\r
-buildHuffmanTree:\r
- lda #<literalCodeLength\r
- sta ptr\r
- sta src\r
- lda #>literalCodeLength\r
- sta ptr+1\r
- sta src+1\r
-; Clear counts\r
- ldy #TREES_SIZE\r
- lda #0\r
-buildHuffmanTree_1:\r
- sta bitsCount-1,y\r
- dey\r
- bne buildHuffmanTree_1\r
- beq buildHuffmanTree_3 ; branch always\r
-; Count number of codes of each length\r
-buildHuffmanTree_2:\r
- tax\r
- inc bitsCount,x\r
- iny\r
- bne buildHuffmanTree_3\r
- inc ptr+1\r
-buildHuffmanTree_3:\r
- lda (ptr),y\r
- bpl buildHuffmanTree_2\r
-; Calculate pointer for each length\r
- ldx #0\r
- stx bitsCount\r
- lda #<sortedCodes\r
- ldy #>sortedCodes\r
-buildHuffmanTree_4:\r
- sta bitsPointer_l,x\r
- tya\r
- sta bitsPointer_h,x\r
- lda bitsCount,x\r
- asl a\r
- bcc buildHuffmanTree_5\r
- iny\r
- clc\r
-buildHuffmanTree_5:\r
- adc bitsPointer_l,x\r
- bcc buildHuffmanTree_6\r
- iny\r
-buildHuffmanTree_6:\r
- inx\r
- cpx #TREES_SIZE\r
- bcc buildHuffmanTree_4\r
-.ifpc02\r
- ldy #1\r
-.else\r
- ldy #0\r
-.endif\r
- bcs buildHuffmanTree_9 ; branch always\r
-; Put codes into their place in sorted table\r
-buildHuffmanTree_7:\r
- beq buildHuffmanTree_8\r
- tax\r
- lda bitsPointer_l,x\r
- sta ptr\r
- clc\r
- adc #2\r
- sta bitsPointer_l,x\r
- lda bitsPointer_h,x\r
- sta ptr+1\r
- adc #0\r
- sta bitsPointer_h,x\r
- lda src\r
- sbc #<(endCodeLength-1) ; C flag is zero\r
-.ifpc02\r
-.else\r
- iny ; ldy #1\r
-.endif\r
- sta (ptr),y\r
- lda src+1\r
- sbc #>(endCodeLength-1)\r
-.ifpc02\r
- sta (ptr)\r
-.else\r
- dey ; ldy #0\r
- sta (ptr),y\r
-.endif\r
-buildHuffmanTree_8:\r
- inc src\r
- bne buildHuffmanTree_9\r
- inc src+1\r
-buildHuffmanTree_9:\r
-.ifpc02\r
- lda (src)\r
-.else\r
- lda (src),y\r
-.endif\r
- bpl buildHuffmanTree_7\r
- rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Read code basing on literal tree\r
-\r
-fetchLiteralCode:\r
- ldx #LITERAL_TREE\r
-; fall into fetchCode\r
-\r
-; --------------------------------------------------------------------------\r
-; Read code from input stream basing on tree given in X.\r
-; Return code in A, C is set if non-literal code.\r
-\r
-fetchCode:\r
- lda #0\r
-fetchCode_1:\r
- jsr getBit\r
- rol a\r
- cmp bitsCount+1,x\r
- bcc fetchCode_2\r
- sbc bitsCount+1,x\r
- inx\r
- bcs fetchCode_1 ; branch always\r
-fetchCode_2:\r
- ldy bitsPointer_l,x\r
- sty ptr\r
- ldy bitsPointer_h,x\r
- asl a\r
- bcc fetchCode_3\r
- iny\r
-fetchCode_3:\r
- sty ptr+1\r
- tay\r
- lda (ptr),y\r
- asl a\r
- iny\r
- lda (ptr),y\r
- rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Read low byte of value (length or distance), basing on code A\r
-\r
-getValue:\r
- tay\r
- ldx lengthExtraBits-1,y\r
- lda lengthCode_l-1,y\r
- pha\r
- lda lengthCode_h-1,y\r
- tay\r
- pla\r
-; fall into getBits\r
-\r
-; --------------------------------------------------------------------------\r
-; Read X-bit number from input stream and adds it to A.\r
-; In case of carry, Y is incremented.\r
-; If X > 8, only 8 bits are read.\r
-; On return X holds number of unread bits: X = (X > 8 ? X - 8 : 0);\r
-\r
-getBits:\r
- cpx #0\r
- beq getBits_ret\r
- pha\r
- lda #1\r
- sta tmp\r
- pla\r
-getBits_1:\r
- jsr getBit\r
- bcc getBits_2\r
- clc\r
- adc tmp\r
- bcc getBits_2\r
- iny\r
-getBits_2:\r
- dex\r
- beq getBits_ret\r
- asl tmp\r
- bcc getBits_1\r
-getBits_ret:\r
- rts\r
-\r
-; --------------------------------------------------------------------------\r
-; Read single bit from input stream, return it in C flag\r
-\r
-getBit:\r
- lsr getBitHold\r
- bne getBit_ret\r
- pha\r
-.ifpc02\r
- lda (inputPointer)\r
-.else\r
- tya\r
- pha\r
- ldy #0\r
- lda (inputPointer),y\r
-.endif\r
- inc inputPointer\r
- bne getBit_1\r
- inc inputPointer+1\r
-getBit_1:\r
- ror a ; C flag set\r
- sta getBitHold\r
-.ifpc02\r
-.else\r
- pla\r
- tay\r
-.endif\r
- pla\r
-getBit_ret:\r
- rts\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Constant data\r
-;\r
-\r
- .rodata\r
-; --------------------------------------------------------------------------\r
-; Addresses of functions extracting different blocks\r
-extr_l:\r
- .byte <(inflateCopyBlock-1)\r
- .byte <(inflateFixedBlock-1)\r
- .byte <(inflateDynamicBlock-1)\r
-extr_h:\r
- .byte >(inflateCopyBlock-1)\r
- .byte >(inflateFixedBlock-1)\r
- .byte >(inflateDynamicBlock-1)\r
-\r
-; --------------------------------------------------------------------------\r
-; Order, in which lengths of temporary codes are stored\r
-bll:\r
- .byte 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15\r
-\r
-; --------------------------------------------------------------------------\r
-; Tables for length and distance codes\r
-; Value is Code + getBits(ExtraBits)\r
-\r
-; Base values\r
-lengthCode_l:\r
- .byte <3,<4,<5,<6,<7,<8,<9,<10\r
- .byte <11,<13,<15,<17,<19,<23,<27,<31\r
- .byte <35,<43,<51,<59,<67,<83,<99,<115\r
- .byte <131,<163,<195,<227,<258\r
-distanceCode_l:\r
- .byte <1,<2,<3,<4,<5,<7,<9,<13\r
- .byte <17,<25,<33,<49,<65,<97,<129,<193\r
- .byte <257,<385,<513,<769,<1025,<1537,<2049,<3073\r
- .byte <4097,<6145,<8193,<12289,<16385,<24577\r
-lengthCode_h:\r
- .byte >3,>4,>5,>6,>7,>8,>9,>10\r
- .byte >11,>13,>15,>17,>19,>23,>27,>31\r
- .byte >35,>43,>51,>59,>67,>83,>99,>115\r
- .byte >131,>163,>195,>227,>258\r
-distanceCode_h:\r
- .byte >1,>2,>3,>4,>5,>7,>9,>13\r
- .byte >17,>25,>33,>49,>65,>97,>129,>193\r
- .byte >257,>385,>513,>769,>1025,>1537,>2049,>3073\r
- .byte >4097,>6145,>8193,>12289,>16385,>24577\r
-\r
-; Number of extra bits to read\r
-lengthExtraBits:\r
- .byte 0,0,0,0,0,0,0,0\r
- .byte 1,1,1,1,2,2,2,2\r
- .byte 3,3,3,3,4,4,4,4\r
- .byte 5,5,5,5,0\r
-distanceExtraBits:\r
- .byte 0,0,0,0,1,1,2,2\r
- .byte 3,3,4,4,5,5,6,6\r
- .byte 7,7,8,8,9,9,10,10\r
- .byte 11,11,12,12,13,13\r
-\r
-; --------------------------------------------------------------------------\r
-;\r
-; Uninitialised data\r
-;\r
-\r
- .bss\r
-\r
-; --------------------------------------------------------------------------\r
-; Data for building literal tree\r
-\r
- .res 1\r
-; Length of literal codes\r
-literalCodeLength:\r
- .res 256\r
-; Length of 'end' code\r
-endCodeLength:\r
- .res 1\r
-; Length of 'length' codes\r
-lengthCodeLength:\r
- .res 29\r
-\r
-; --------------------------------------------------------------------------\r
-; Data for building distance tree\r
-\r
-distanceCodeLength:\r
- .res 30\r
-; For two unused codes in fixed trees and an end flag\r
- .res 3\r
-\r
-; --------------------------------------------------------------------------\r
-; Huffman tree structure\r
-\r
-; Number of codes of each length\r
-bitsCount:\r
- .res TREES_SIZE\r
-\r
-; Pointer to sorted codes of each length\r
-bitsPointer_l:\r
- .res TREES_SIZE\r
-bitsPointer_h:\r
- .res TREES_SIZE\r
-\r
-; Sorted codes\r
-sortedCodes:\r
- .res 2*(256+1+29+30+2)\r
-\r
-\r
-\r
+;
+; 2017-02-12, Piotr Fusik
+;
+; unsigned __fastcall__ inflatemem (char* dest, const char* source);
+;
+; NOTE: Be extremely careful with modifications, because this code is heavily
+; optimized for size (for example assumes certain register and flag values
+; when its internal routines return). Test with the gunzip65 sample.
+;
+
+ .export _inflatemem
+
+ .import incsp2
+ .importzp sp, sreg, ptr1, ptr2, ptr3, ptr4
+
+; --------------------------------------------------------------------------
+;
+; Constants
+;
+
+; Argument values for getBits.
+GET_1_BIT = $81
+GET_2_BITS = $82
+GET_3_BITS = $84
+GET_4_BITS = $88
+GET_5_BITS = $90
+GET_6_BITS = $a0
+GET_7_BITS = $c0
+
+; Huffman trees.
+TREE_SIZE = 16
+PRIMARY_TREE = 0
+DISTANCE_TREE = TREE_SIZE
+
+; Alphabet.
+LENGTH_SYMBOLS = 1+29+2 ; EOF, 29 length symbols, two unused symbols
+DISTANCE_SYMBOLS = 30
+CONTROL_SYMBOLS = LENGTH_SYMBOLS+DISTANCE_SYMBOLS
+
+
+; --------------------------------------------------------------------------
+;
+; Page zero
+;
+
+; Pointer to the compressed data.
+inputPointer := ptr1 ; 2 bytes
+
+; Pointer to the uncompressed data.
+outputPointer := ptr2 ; 2 bytes
+
+; Local variables.
+; As far as there is no conflict, same memory locations are used
+; for different variables.
+
+inflateStored_pageCounter := ptr3 ; 1 byte
+inflateDynamic_symbol := ptr3 ; 1 byte
+inflateDynamic_lastLength := ptr3+1 ; 1 byte
+ .assert ptr4 = ptr3 + 2, error, "Need three bytes for inflateDynamic_tempCodes"
+inflateDynamic_tempCodes := ptr3+1 ; 3 bytes
+inflateDynamic_allCodes := inflateDynamic_tempCodes+1 ; 1 byte
+inflateDynamic_primaryCodes := inflateDynamic_tempCodes+2 ; 1 byte
+inflateCodes_sourcePointer := ptr3 ; 2 bytes
+inflateCodes_lengthMinus2 := ptr4 ; 1 byte
+getBits_base := sreg ; 1 byte
+getBit_buffer := sreg+1 ; 1 byte
+
+
+; --------------------------------------------------------------------------
+;
+; Code
+;
+
+_inflatemem:
+
+; inputPointer = source
+ sta inputPointer
+ stx inputPointer+1
+; outputPointer = dest
+ ldy #1
+ lda (sp),y
+ sta outputPointer+1
+ dey
+ lda (sp),y
+ sta outputPointer
+
+; ldy #0
+ sty getBit_buffer
+
+inflate_blockLoop:
+; Get a bit of EOF and two bits of block type
+; ldy #0
+ sty getBits_base
+ lda #GET_3_BITS
+ jsr getBits
+ lsr a
+; A and Z contain block type, C contains EOF flag
+; Save EOF flag
+ php
+ bne inflateCompressed
+
+; Decompress a 'stored' data block.
+; ldy #0
+ sty getBit_buffer ; ignore bits until byte boundary
+ jsr getWord ; skip the length we don't need
+ jsr getWord ; get the two's complement length
+ sta inflateStored_pageCounter
+ bcs inflateStored_firstByte ; jmp
+inflateStored_copyByte:
+ jsr getByte
+; sec
+inflateStoreByte:
+ jsr storeByte
+ bcc inflateCodes_loop
+inflateStored_firstByte:
+ inx
+ bne inflateStored_copyByte
+ inc inflateStored_pageCounter
+ bne inflateStored_copyByte
+
+; Block decompressed.
+inflate_nextBlock:
+ plp
+ bcc inflate_blockLoop
+
+; Decompression complete.
+; return outputPointer - dest
+ lda outputPointer
+; ldy #0
+; sec
+ sbc (sp),y
+ iny
+ pha
+ lda outputPointer+1
+ sbc (sp),y
+ tax
+ pla
+; pop dest
+ jmp incsp2
+
+inflateCompressed:
+; Decompress a Huffman-coded data block
+; A=1: fixed block, initialize with fixed codes
+; A=2: dynamic block, start by clearing all code lengths
+; A=3: invalid compressed data, not handled in this routine
+ eor #2
+
+; ldy #0
+inflateCompressed_setCodeLengths:
+ tax
+ beq inflateCompressed_setLiteralCodeLength
+; fixed Huffman literal codes:
+; 144 8-bit codes
+; 112 9-bit codes
+ lda #4
+ cpy #144
+ rol a
+inflateCompressed_setLiteralCodeLength:
+ sta literalSymbolCodeLength,y
+ beq inflateCompressed_setControlCodeLength
+; fixed Huffman control codes:
+; 24 7-bit codes
+; 6 8-bit codes
+; 2 meaningless 8-bit codes
+; 30 5-bit distance codes
+ lda #5+DISTANCE_TREE
+ cpy #LENGTH_SYMBOLS
+ bcs inflateCompressed_setControlCodeLength
+ cpy #24
+ adc #$100+2-DISTANCE_TREE
+inflateCompressed_setControlCodeLength:
+ cpy #CONTROL_SYMBOLS
+ bcs inflateCompressed_noControlSymbol
+ sta controlSymbolCodeLength,y
+inflateCompressed_noControlSymbol:
+ iny
+ bne inflateCompressed_setCodeLengths
+
+ tax
+ beq inflateDynamic
+
+; Decompress a block
+inflateCodes:
+ jsr buildHuffmanTree
+inflateCodes_loop:
+ jsr fetchPrimaryCode
+ bcc inflateStoreByte
+ beq inflate_nextBlock
+; Copy sequence from look-behind buffer
+; ldy #0
+ sty getBits_base
+ cmp #9
+ bcc inflateCodes_setSequenceLength
+ tya
+; lda #0
+ cpx #1+28
+ bcs inflateCodes_setSequenceLength
+ dex
+ txa
+ lsr a
+ ror getBits_base
+ inc getBits_base
+ lsr a
+ rol getBits_base
+ jsr getAMinus1BitsMax8
+; sec
+ adc #0
+inflateCodes_setSequenceLength:
+ sta inflateCodes_lengthMinus2
+ ldx #DISTANCE_TREE
+ jsr fetchCode
+ cmp #4
+ bcc inflateCodes_setOffsetLowByte
+ inc getBits_base
+ lsr a
+ jsr getAMinus1BitsMax8
+inflateCodes_setOffsetLowByte:
+ eor #$ff
+ sta inflateCodes_sourcePointer
+ lda getBits_base
+ cpx #10
+ bcc inflateCodes_setOffsetHighByte
+ lda getNPlus1Bits_mask-10,x
+ jsr getBits
+ clc
+inflateCodes_setOffsetHighByte:
+ eor #$ff
+; clc
+ adc outputPointer+1
+ sta inflateCodes_sourcePointer+1
+ jsr copyByte
+ jsr copyByte
+inflateCodes_copyByte:
+ jsr copyByte
+ dec inflateCodes_lengthMinus2
+ bne inflateCodes_copyByte
+ beq inflateCodes_loop ; jmp
+
+inflateDynamic:
+; Decompress a block reading Huffman trees first
+; ldy #0
+; numberOfPrimaryCodes = 257 + getBits(5)
+; numberOfDistanceCodes = 1 + getBits(5)
+; numberOfTemporaryCodes = 4 + getBits(4)
+ ldx #3
+inflateDynamic_getHeader:
+ lda inflateDynamic_headerBits-1,x
+ jsr getBits
+; sec
+ adc inflateDynamic_headerBase-1,x
+ sta inflateDynamic_tempCodes-1,x
+ dex
+ bne inflateDynamic_getHeader
+
+; Get lengths of temporary codes in the order stored in inflateDynamic_tempSymbols
+; ldx #0
+inflateDynamic_getTempCodeLengths:
+ lda #GET_3_BITS
+ jsr getBits
+ ldy inflateDynamic_tempSymbols,x
+ sta literalSymbolCodeLength,y
+ ldy #0
+ inx
+ cpx inflateDynamic_tempCodes
+ bcc inflateDynamic_getTempCodeLengths
+
+; Build the tree for temporary codes
+ jsr buildHuffmanTree
+
+; Use temporary codes to get lengths of literal/length and distance codes
+; ldx #0
+; sec
+inflateDynamic_decodeLength:
+; C=1: literal codes
+; C=0: control codes
+ stx inflateDynamic_symbol
+ php
+; Fetch a temporary code
+ jsr fetchPrimaryCode
+; Temporary code 0..15: put this length
+ bpl inflateDynamic_storeLengths
+; Temporary code 16: repeat last length 3 + getBits(2) times
+; Temporary code 17: put zero length 3 + getBits(3) times
+; Temporary code 18: put zero length 11 + getBits(7) times
+ tax
+ jsr getBits
+ cpx #GET_3_BITS
+ bcc inflateDynamic_code16
+ beq inflateDynamic_code17
+; sec
+ adc #7
+inflateDynamic_code17:
+; ldy #0
+ sty inflateDynamic_lastLength
+inflateDynamic_code16:
+ tay
+ lda inflateDynamic_lastLength
+ iny
+ iny
+inflateDynamic_storeLengths:
+ iny
+ plp
+ ldx inflateDynamic_symbol
+inflateDynamic_storeLength:
+ bcc inflateDynamic_controlSymbolCodeLength
+ sta literalSymbolCodeLength,x
+ inx
+ cpx #1
+inflateDynamic_storeNext:
+ dey
+ bne inflateDynamic_storeLength
+ sta inflateDynamic_lastLength
+ beq inflateDynamic_decodeLength ; jmp
+inflateDynamic_controlSymbolCodeLength:
+ cpx inflateDynamic_primaryCodes
+ bcc inflateDynamic_storeControl
+; the code lengths we skip here were zero-initialized
+; in inflateCompressed_setControlCodeLength
+ bne inflateDynamic_noStartDistanceTree
+ ldx #LENGTH_SYMBOLS
+inflateDynamic_noStartDistanceTree:
+ ora #DISTANCE_TREE
+inflateDynamic_storeControl:
+ sta controlSymbolCodeLength,x
+ inx
+ cpx inflateDynamic_allCodes
+ bcc inflateDynamic_storeNext
+ dey
+; ldy #0
+ jmp inflateCodes
+
+; Build Huffman trees basing on code lengths (in bits)
+; stored in the *SymbolCodeLength arrays
+buildHuffmanTree:
+; Clear nBitCode_totalCount, nBitCode_literalCount, nBitCode_controlCount
+ tya
+; lda #0
+buildHuffmanTree_clear:
+ sta nBitCode_clearFrom,y
+ iny
+ bne buildHuffmanTree_clear
+; Count number of codes of each length
+; ldy #0
+buildHuffmanTree_countCodeLengths:
+ ldx literalSymbolCodeLength,y
+ inc nBitCode_literalCount,x
+ inc nBitCode_totalCount,x
+ cpy #CONTROL_SYMBOLS
+ bcs buildHuffmanTree_noControlSymbol
+ ldx controlSymbolCodeLength,y
+ inc nBitCode_controlCount,x
+ inc nBitCode_totalCount,x
+buildHuffmanTree_noControlSymbol:
+ iny
+ bne buildHuffmanTree_countCodeLengths
+; Calculate offsets of symbols sorted by code length
+; lda #0
+ ldx #$100-3*TREE_SIZE
+buildHuffmanTree_calculateOffsets:
+ sta nBitCode_literalOffset+3*TREE_SIZE-$100,x
+ clc
+ adc nBitCode_literalCount+3*TREE_SIZE-$100,x
+ inx
+ bne buildHuffmanTree_calculateOffsets
+; Put symbols in their place in the sorted array
+; ldy #0
+buildHuffmanTree_assignCode:
+ tya
+ ldx literalSymbolCodeLength,y
+ ldy nBitCode_literalOffset,x
+ inc nBitCode_literalOffset,x
+ sta codeToLiteralSymbol,y
+ tay
+ cpy #CONTROL_SYMBOLS
+ bcs buildHuffmanTree_noControlSymbol2
+ ldx controlSymbolCodeLength,y
+ ldy nBitCode_controlOffset,x
+ inc nBitCode_controlOffset,x
+ sta codeToControlSymbol,y
+ tay
+buildHuffmanTree_noControlSymbol2:
+ iny
+ bne buildHuffmanTree_assignCode
+ rts
+
+; Read Huffman code using the primary tree
+fetchPrimaryCode:
+ ldx #PRIMARY_TREE
+; Read a code from input using the tree specified in X.
+; Return low byte of this code in A.
+; Return C flag reset for literal code, set for length code.
+fetchCode:
+; ldy #0
+ tya
+fetchCode_nextBit:
+ jsr getBit
+ rol a
+ inx
+ sec
+ sbc nBitCode_totalCount,x
+ bcs fetchCode_nextBit
+; clc
+ adc nBitCode_controlCount,x
+ bcs fetchCode_control
+; clc
+ adc nBitCode_literalOffset,x
+ tax
+ lda codeToLiteralSymbol,x
+ clc
+ rts
+fetchCode_control:
+; sec
+ adc nBitCode_controlOffset-1,x
+ tax
+ lda codeToControlSymbol-1,x
+ and #$1f ; make distance symbols zero-based
+ tax
+ sec
+ rts
+
+; Read A minus 1 bits, but no more than 8
+getAMinus1BitsMax8:
+ rol getBits_base
+ tax
+ cmp #9
+ bcs getByte
+ lda getNPlus1Bits_mask-2,x
+getBits:
+ jsr getBits_loop
+getBits_normalizeLoop:
+ lsr getBits_base
+ ror a
+ bcc getBits_normalizeLoop
+ rts
+
+; Read 16 bits
+getWord:
+ jsr getByte
+ tax
+; Read 8 bits
+getByte:
+ lda #$80
+getBits_loop:
+ jsr getBit
+ ror a
+ bcc getBits_loop
+ rts
+
+; Read one bit, return in the C flag
+getBit:
+ lsr getBit_buffer
+ bne getBit_return
+ pha
+; ldy #0
+ lda (inputPointer),y
+ inc inputPointer
+ bne getBit_samePage
+ inc inputPointer+1
+getBit_samePage:
+ sec
+ ror a
+ sta getBit_buffer
+ pla
+getBit_return:
+ rts
+
+; Copy a previously written byte
+copyByte:
+ ldy outputPointer
+ lda (inflateCodes_sourcePointer),y
+ ldy #0
+; Write a byte
+storeByte:
+; ldy #0
+ sta (outputPointer),y
+ inc outputPointer
+ bne storeByte_return
+ inc outputPointer+1
+ inc inflateCodes_sourcePointer+1
+storeByte_return:
+ rts
+
+
+; --------------------------------------------------------------------------
+;
+; Constant data
+;
+
+ .rodata
+
+getNPlus1Bits_mask:
+ .byte GET_1_BIT,GET_2_BITS,GET_3_BITS,GET_4_BITS,GET_5_BITS,GET_6_BITS,GET_7_BITS
+
+inflateDynamic_tempSymbols:
+ .byte GET_2_BITS,GET_3_BITS,GET_7_BITS,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
+
+inflateDynamic_headerBits:
+ .byte GET_4_BITS,GET_5_BITS,GET_5_BITS
+inflateDynamic_headerBase:
+ .byte 3,LENGTH_SYMBOLS,0
+
+
+; --------------------------------------------------------------------------
+;
+; Uninitialised data
+;
+
+ .bss
+
+; Data for building trees.
+
+literalSymbolCodeLength:
+ .res 256
+controlSymbolCodeLength:
+ .res CONTROL_SYMBOLS
+
+; Huffman trees.
+
+nBitCode_clearFrom:
+nBitCode_totalCount:
+ .res 2*TREE_SIZE
+nBitCode_literalCount:
+ .res TREE_SIZE
+nBitCode_controlCount:
+ .res 2*TREE_SIZE
+nBitCode_literalOffset:
+ .res TREE_SIZE
+nBitCode_controlOffset:
+ .res 2*TREE_SIZE
+
+codeToLiteralSymbol:
+ .res 256
+codeToControlSymbol:
+ .res CONTROL_SYMBOLS