]> git.sur5r.net Git - cc65/blobdiff - libsrc/zlib/inflatemem.s
don't use constructor to setup runtime stack
[cc65] / libsrc / zlib / inflatemem.s
index 6c2959084a1858fbb96b0a54feddfb2afeb66684..18bf80cd1e3028007f74549fd60f589e9dca3280 100644 (file)
@@ -1,48 +1,68 @@
 ;
-; Piotr Fusik, 11.11.2001
+; Piotr Fusik, 21.09.2003
 ;
-; void* inflatemem (void* dest, void* src);
+; unsigned __fastcall__ inflatemem (char* dest, const char* source);
 ;
 
        .export         _inflatemem
 
-       .import         popax
-       .importzp       sreg, ptr1, ptr2, ptr3, ptr4, tmp1, tmp2, tmp3, tmp4
+       .import         incsp2
+       .importzp       sp, sreg, ptr1, ptr2, ptr3, ptr4, tmp1
 
 ; --------------------------------------------------------------------------
 ;
 ; Constants
 ;
 
-; Maximum length of a Huffman code
+; Maximum length of a Huffman code.
 MAX_BITS      =        15
-; Index in bitsCount, bitsPointer_l and bitsPointer_h for literal tree
-LITERAL_TREE  =        0
-; Index in bitsCount, bitsPointer_l and bitsPointer_h for distance tree
+
+; All Huffman trees are stored in the bitsCount, bitsPointer_l
+; and bitsPointer_h arrays.  There may be two trees: the literal/length tree
+; and the distance tree, or just one - the temporary tree.
+
+; Index in the mentioned arrays for the beginning of the literal/length tree
+; or the temporary tree.
+PRIMARY_TREE  =        0
+
+; Index in the mentioned arrays for the beginning of the distance tree.
 DISTANCE_TREE =        MAX_BITS
-; Size of each of bitsCount, bitsPointer_l and bitsPointer_h
-TREES_SIZE    =        2*MAX_BITS+1
+
+; Size of each array.
+TREES_SIZE    =        2*MAX_BITS
+
 
 ; --------------------------------------------------------------------------
 ;
 ; Page zero
 ;
 
-; Pointer to compressed data
-inputPointer  =        ptr1    ; 2 bytes
-; Pointer to uncompressed data
-outputPointer =        ptr2    ; 2 bytes
-; Buffer for getBit
-getBitHold    =        tmp1    ; 1 byte
-; Local variables. Variables from different routines use same memory.
-cnt           =        tmp2    ; 1 byte
-tmp           =        sreg    ; 1 byte
-ptr           =        sreg    ; 2 bytes
-len           =        ptr3    ; 2 bytes
-nl            =        tmp3    ; 1 byte
-nd            =        tmp4    ; 1 byte
-src           =        ptr4    ; 2 bytes
-dest          =        ptr4    ; 2 bytes
+; Pointer to the compressed data.
+inputPointer            =      ptr1    ; 2 bytes
+
+; Pointer to the uncompressed data.
+outputPointer           =      ptr2    ; 2 bytes
+
+; Local variables.
+; As far as there is no conflict, same memory locations are used
+; for different variables.
+
+inflateDynamicBlock_cnt =      ptr3    ; 1 byte
+inflateCodes_src        =      ptr3    ; 2 bytes
+buildHuffmanTree_src    =      ptr3    ; 2 bytes
+getNextLength_last      =      ptr3    ; 1 byte
+getNextLength_index     =      ptr3+1  ; 1 byte
+
+buildHuffmanTree_ptr    =      ptr4    ; 2 bytes
+fetchCode_ptr           =      ptr4    ; 2 bytes
+getBits_tmp             =      ptr4    ; 1 byte
+
+moveBlock_len           =      sreg    ; 2 bytes
+inflateDynamicBlock_np  =      sreg    ; 1 byte
+inflateDynamicBlock_nd  =      sreg+1  ; 1 byte
+
+getBit_hold             =      tmp1    ; 1 byte
+
 
 ; --------------------------------------------------------------------------
 ;
@@ -51,77 +71,103 @@ dest          =    ptr4    ; 2 bytes
 
 _inflatemem:
 
-; Get inputPointer and outputPointer from stack
-       jsr     popax
+; inputPointer = source
        sta     inputPointer
        stx     inputPointer+1
-       jsr     popax
+; outputPointer = dest
+.ifpc02
+       lda     (sp)
+       ldy     #1
+.else
+       ldy     #0
+       lda     (sp),y
+       iny
+.endif
        sta     outputPointer
-       stx     outputPointer+1
+       lda     (sp),y
+       sta     outputPointer+1
 
-       ldy     #1
-       sty     getBitHold
-; Get a bit of EOF and two bits of block type
+;      ldy     #1
+       sty     getBit_hold
 inflatemem_1:
+; Get a bit of EOF and two bits of block type
        ldx     #3
        lda     #0
        jsr     getBits
        lsr     a
-       tax
-; X contains block type, C contains EOF flag
+; A and Z contain block type, C contains EOF flag
 ; Save EOF flag
        php
 ; Go to the routine decompressing this block
        jsr     callExtr
        plp
        bcc     inflatemem_1
+; C flag is set!
+
+; return outputPointer - dest;
        lda     outputPointer
-       ldx     outputPointer+1
-       rts
+.ifpc02
+       sbc     (sp)            ; C flag is set
+       ldy     #1
+.else
+       ldy     #0
+       sbc     (sp),y          ; C flag is set
+       iny
+.endif
+       pha
+       lda     outputPointer+1
+       sbc     (sp),y
+       tax
+       pla
+; pop dest
+       jmp     incsp2
 
 ; --------------------------------------------------------------------------
-; Go to the routine decompressing block type X
+; Go to proper block decoding routine.
 
 callExtr:
-       lda     extr_h,x
-       pha
-       lda     extr_l,x
-       pha
-       rts
+       bne     inflateCompressedBlock
 
 ; --------------------------------------------------------------------------
-; Decompress 'stored' data block
+; Decompress a 'stored' data block.
 
 inflateCopyBlock:
 ; Ignore bits until byte boundary
        ldy     #1
-       sty     getBitHold
+       sty     getBit_hold
 ; Get 16-bit length
        ldx     #inputPointer
        lda     (0,x)
-       sta     len
+       sta     moveBlock_len
        lda     (inputPointer),y
-       sta     len+1
-; Skip length and one's compliment length
+       sta     moveBlock_len+1
+; Skip the length and one's complement of it
        lda     #4
        clc
        adc     inputPointer
        sta     inputPointer
        bcc     moveBlock
        inc     inputPointer+1
-; fall into moveBlock
+;      jmp     moveBlock
 
 ; --------------------------------------------------------------------------
-; Copy block of length len from (0,x) to output
+; Copy block of length moveBlock_len from (0,x) to the output.
 
 moveBlock:
-       ldy     len
+       ldy     moveBlock_len
        beq     moveBlock_1
+.ifpc02
+.else
        ldy     #0
-       inc     len+1
+.endif
+       inc     moveBlock_len+1
 moveBlock_1:
        lda     (0,x)
+.ifpc02
+       sta     (outputPointer)
+.else
        sta     (outputPointer),y
+.endif
        inc     0,x
        bne     moveBlock_2
        inc     1,x
@@ -130,18 +176,33 @@ moveBlock_2:
        bne     moveBlock_3
        inc     outputPointer+1
 moveBlock_3:
-       dec     len
+.ifpc02
+       dey
+.else
+       dec     moveBlock_len
+.endif
        bne     moveBlock_1
-       dec     len+1
+       dec     moveBlock_len+1
        bne     moveBlock_1
        rts
 
 ; --------------------------------------------------------------------------
-; Decompress Huffman-coded data block with default Huffman trees:
+; Decompress a Huffman-coded data block
+; (A = 1: fixed, A = 2: dynamic).
+
+inflateCompressedBlock:
+       lsr     a
+       bne     inflateDynamicBlock
+; Note: inflateDynamicBlock may assume that A = 1
+
+; --------------------------------------------------------------------------
+; Decompress a Huffman-coded data block with default Huffman trees
+; (defined by the DEFLATE format):
 ; literalCodeLength:  144 times 8, 112 times 9
-; endCodeLength:      24 times 7, 6 times 8
-; distanceCodeLength: 30 times 5, 2 times 8
-;                     (2 last codes from literal tree are not used)
+; endCodeLength:      7
+; lengthCodeLength:   23 times 7, 6 times 8
+; distanceCodeLength: 30 times 5+DISTANCE_TREE, 2 times 8
+;                     (two 8-bit codes from the primary tree are not used).
 
 inflateFixedBlock:
        ldx     #159
@@ -153,15 +214,15 @@ inflateFixedBlock_1:
        dex
        bne     inflateFixedBlock_1
        ldx     #112
-       lda     #9
+;      lda     #9
 inflateFixedBlock_2:
-       sta     literalCodeLength+144-1,x
+       inc     literalCodeLength+144-1,x       ; sta
        dex
        bne     inflateFixedBlock_2
        ldx     #24
-       lda     #7
+;      lda     #7
 inflateFixedBlock_3:
-       sta     endCodeLength-1,x
+       dec     endCodeLength-1,x       ; sta
        dex
        bne     inflateFixedBlock_3
        ldx     #30
@@ -170,412 +231,383 @@ inflateFixedBlock_4:
        sta     distanceCodeLength-1,x
        dex
        bne     inflateFixedBlock_4
-       jmp     inflateCodes
+       beq     inflateCodes            ; branch always
 
 ; --------------------------------------------------------------------------
-; Decompress Huffman-coded data block, reading Huffman trees first
+; Decompress a Huffman-coded data block, reading Huffman trees first.
 
 inflateDynamicBlock:
-; numberOfLiteralCodes = getBits(5) + 257;
+; numberOfPrimaryCodes = 257 + getBits(5)
        ldx     #5
-       lda     #<(lengthCodeLength-1)
+;      lda     #1
        jsr     getBits
-       sta     nl
-; numberOfDistanceCodes = getBits(5) + 1;
+       sta     inflateDynamicBlock_np
+; numberOfDistanceCodes = 1 + getBits(5)
        ldx     #5
-       lda     #1
+       lda     #1+29+1
        jsr     getBits
-       sta     nd
-       clc
-       adc     nl
-       sta     nl
-; numberOfTemporaryCodes = getBits(4) + 4;
+       sta     inflateDynamicBlock_nd
+; numberOfTemporaryCodes = 4 + getBits(4)
        lda     #4
        tax
        jsr     getBits
-       sta     cnt
-; Clear lengths of temporary codes (there're 19 temp codes max),
-; clear literalCodeLength-1 (it may be used by temporary code 16)
-; and leave #0 in Y
-       ldy     #20
-       lda     #0
+       sta     inflateDynamicBlock_cnt
+; Get lengths of temporary codes in the order stored in tempCodeLengthOrder
+       txa                     ; lda #0
+       tay
 inflateDynamicBlock_1:
-       sta     literalCodeLength-2,y
-       dey
-       bne     inflateDynamicBlock_1
-; Get lengths of temporary codes in order stored in bll
+       ldx     #3              ; A = 0
+       jsr     getBits         ; does not change Y
 inflateDynamicBlock_2:
-       ldx     #3
-       lda     #0
-       jsr     getBits ; does not change Y
-       ldx     bll,y
+       ldx     tempCodeLengthOrder,y
        sta     literalCodeLength,x
+       lda     #0
        iny
-       cpy     cnt
+       cpy     inflateDynamicBlock_cnt
+       bcc     inflateDynamicBlock_1
+       cpy     #19
        bcc     inflateDynamicBlock_2
-       ror     literalCodeLength+19    ; C flag is set, so it will set b7
-; Build tree for temporary codes
+       ror     literalCodeLength+19    ; C flag is set, so this will set b7
+; Build the tree for temporary codes
        jsr     buildHuffmanTree
 
-; Use temporary codes to get lengths for literal and distance codes
-; dest is target-1, so we can access last written byte by (dest,0)
-       lda     #<(literalCodeLength-1)
-       sta     dest
-       lda     #>(literalCodeLength-1)
-       sta     dest+1
-inflateDynamicBlock_3:
-       jsr     fetchLiteralCode
-; Temporary code 0..15: put this length
+; Use temporary codes to get lengths of literal/length and distance codes
+       ldx     #0
        ldy     #1
-       cmp     #16
-       bcc     inflateDynamicBlock_6
-       bne     inflateDynamicBlock_4
-; Temporary code 16: repeat last length 3..6 times
-       ldx     #2
-       lda     #3
-       jsr     getBits
-       tay
-       lda     (dest,x)        ; X == 0
-       bpl     inflateDynamicBlock_6   ; branch always
+       stx     getNextLength_last
+inflateDynamicBlock_3:
+       jsr     getNextLength
+       sta     literalCodeLength,x
+       inx
+       bne     inflateDynamicBlock_3
 inflateDynamicBlock_4:
-       lsr     a
-; Temporary code 17: put zero length 3..10 times
-       lda     #3
-       tax
-       bcs     inflateDynamicBlock_5
-; Temporary code 18: put zero length 11..138 times
-       ldx     #7
-       lda     #11
+       jsr     getNextLength
 inflateDynamicBlock_5:
-       jsr     getBits
-       tay
+       sta     endCodeLength,x
+       inx
+       cpx     inflateDynamicBlock_np
+       bcc     inflateDynamicBlock_4
        lda     #0
-; Write A length Y times
+       cpx     #1+29
+       bcc     inflateDynamicBlock_5
 inflateDynamicBlock_6:
-       sty     cnt
+       jsr     getNextLength
+       cmp     #0
+       beq     inflateDynamicBlock_7
+       adc     #DISTANCE_TREE-1        ; C flag is set
 inflateDynamicBlock_7:
-       sta     (dest),y
-       dey
-       bne     inflateDynamicBlock_7
-       lda     cnt
-       clc
-       adc     dest
-       sta     dest
-       bcc     inflateDynamicBlock_8
-       inc     dest+1
-inflateDynamicBlock_8:
-       cmp     nl
-       bne     inflateDynamicBlock_3
-       ldy     dest+1
-       sbc     #<endCodeLength ; C flag is set
-       bcs     inflateDynamicBlock_11
-       dey
-inflateDynamicBlock_11:
-       cpy     #>endCodeLength
-       bcc     inflateDynamicBlock_3
-; Mark end of distance lengths
-       ldx     nd
-       tay
-       ror     distanceCodeLength,x    ; C flag is set, so it will set b7
-; Move distance lengths to distanceCodeLength table
-inflateDynamicBlock_9:
-       dex
-       lda     endCodeLength,y
-; Mark existing codes (of non-zero length) as distance tree codes
-       beq     inflateDynamicBlock_10
-       pha
-       lda     #0
-       sta     endCodeLength,y
-       pla
-       clc
-       adc     #DISTANCE_TREE
-       sta     distanceCodeLength,x
-inflateDynamicBlock_10:
-       dey
-       txa
-       bne     inflateDynamicBlock_9
-; fall into inflateCodes
+       sta     endCodeLength,x
+       inx
+       cpx     inflateDynamicBlock_nd
+       bcc     inflateDynamicBlock_6
+       ror     endCodeLength,x         ; C flag is set, so this will set b7
+;      jmp     inflateCodes
 
 ; --------------------------------------------------------------------------
-; Decompress data block basing on given Huffman trees
+; Decompress a data block basing on given Huffman trees.
 
 inflateCodes:
        jsr     buildHuffmanTree
 inflateCodes_1:
-       jsr     fetchLiteralCode
-       bcc     inflateCodes_2
+       jsr     fetchPrimaryCode
+       bcs     inflateCodes_2
 ; Literal code
+.ifpc02
+       sta     (outputPointer)
+.else
        ldy     #0
        sta     (outputPointer),y
+.endif
        inc     outputPointer
        bne     inflateCodes_1
        inc     outputPointer+1
-       bcs     inflateCodes_1  ; branch always
+       bcc     inflateCodes_1  ; branch always
 ; End of block
 inflateCodes_ret:
        rts
 inflateCodes_2:
        beq     inflateCodes_ret
-; Repeat block
+; Restore a block from the look-behind buffer
        jsr     getValue
-       sta     len
+       sta     moveBlock_len
        tya
        jsr     getBits
-       sta     len+1
+       sta     moveBlock_len+1
        ldx     #DISTANCE_TREE
        jsr     fetchCode
        jsr     getValue
-       sta     src
+       sec
+       eor     #$ff
+       adc     outputPointer
+       sta     inflateCodes_src
+       php
        tya
        jsr     getBits
-       sta     src+1
-       lda     outputPointer
-       sec
-       sbc     src
-       sta     src
-       lda     outputPointer+1
-       sbc     src+1
-       sta     src+1
-       ldx     #src
+       plp
+       eor     #$ff
+       adc     outputPointer+1
+       sta     inflateCodes_src+1
+       ldx     #inflateCodes_src
        jsr     moveBlock
        beq     inflateCodes_1  ; branch always
 
 ; --------------------------------------------------------------------------
-; Build Huffman trees basing on code lengths.
-; Lengths (in bits) are stored in literalCodeLength.
-; A byte with highest bit set marks end of length table.
+; Build Huffman trees basing on code lengths (in bits).
+; stored in the *CodeLength arrays.
+; A byte with its highest bit set marks the end.
 
 buildHuffmanTree:
        lda     #<literalCodeLength
-       sta     buildHuffmanTree_3+1
-       sta     buildHuffmanTree_9+1
+       sta     buildHuffmanTree_src
        lda     #>literalCodeLength
-       sta     buildHuffmanTree_3+2
-       sta     buildHuffmanTree_9+2
-; Clear counts
-       ldx     #TREES_SIZE-1
+       sta     buildHuffmanTree_src+1
+; Clear bitsCount and bitsPointer_l
+       ldy     #2*TREES_SIZE+1
        lda     #0
 buildHuffmanTree_1:
-       sta     bitsCount,x
-       dex
-       bpl     buildHuffmanTree_1
-       bmi     buildHuffmanTree_3      ; branch always
+       sta     bitsCount-1,y
+       dey
+       bne     buildHuffmanTree_1
+       beq     buildHuffmanTree_3      ; branch always
 ; Count number of codes of each length
 buildHuffmanTree_2:
-       inc     bitsCount,x
-       inc     buildHuffmanTree_3+1
+       tax
+       inc     bitsPointer_l,x
+       iny
        bne     buildHuffmanTree_3
-       inc     buildHuffmanTree_3+2
+       inc     buildHuffmanTree_src+1
 buildHuffmanTree_3:
-       ldx     $ffff   ; patched at runtime
+       lda     (buildHuffmanTree_src),y
        bpl     buildHuffmanTree_2
-; Calculate pointer for each length
-       tax             ; ldx #0
-       stx     bitsCount
+; Calculate a pointer for each length
+       ldx     #0
        lda     #<sortedCodes
        ldy     #>sortedCodes
+       clc
 buildHuffmanTree_4:
        sta     bitsPointer_l,x
        tya
        sta     bitsPointer_h,x
-       lda     bitsCount,x
-       asl     a
+       lda     bitsPointer_l+1,x
+       adc     bitsPointer_l,x         ; C flag is zero
        bcc     buildHuffmanTree_5
        iny
-       clc
 buildHuffmanTree_5:
-       adc     bitsPointer_l,x
-       bcc     buildHuffmanTree_6
-       iny
-buildHuffmanTree_6:
        inx
        cpx     #TREES_SIZE
        bcc     buildHuffmanTree_4
+       lda     #>literalCodeLength
+       sta     buildHuffmanTree_src+1
+       ldy     #0
        bcs     buildHuffmanTree_9      ; branch always
 ; Put codes into their place in sorted table
+buildHuffmanTree_6:
+       beq     buildHuffmanTree_7
+       tax
+       lda     bitsPointer_l-1,x
+       sta     buildHuffmanTree_ptr
+       lda     bitsPointer_h-1,x
+       sta     buildHuffmanTree_ptr+1
+       tya
+       ldy     bitsCount-1,x
+       inc     bitsCount-1,x
+       sta     (buildHuffmanTree_ptr),y
+       tay
 buildHuffmanTree_7:
-       beq     buildHuffmanTree_8
-       lda     bitsPointer_l,x
-       sta     ptr
-       clc
-       adc     #2
-       sta     bitsPointer_l,x
-       lda     bitsPointer_h,x
-       sta     ptr+1
-       adc     #0
-       sta     bitsPointer_h,x
-       lda     buildHuffmanTree_9+1
-       sbc     #<(endCodeLength-1)     ; C flag is zero
-       ldy     #1
-       sta     (ptr),y
-       lda     buildHuffmanTree_9+2
-       sbc     #>(endCodeLength-1)
-.ifpc02
-       sta     (ptr)
-.else
-       dey
-       sta     (ptr),y
-.endif
-buildHuffmanTree_8:
-       inc     buildHuffmanTree_9+1
+       iny
        bne     buildHuffmanTree_9
-       inc     buildHuffmanTree_9+2
+       inc     buildHuffmanTree_src+1
+       ldx     #MAX_BITS-1
+buildHuffmanTree_8:
+       lda     bitsCount,x
+       sta     literalCount,x
+       dex
+       bpl     buildHuffmanTree_8
 buildHuffmanTree_9:
-       ldx     $ffff   ; patched at runtime
-       bpl     buildHuffmanTree_7
+       lda     (buildHuffmanTree_src),y
+       bpl     buildHuffmanTree_6
+       rts
+
+; --------------------------------------------------------------------------
+; Decode next code length using temporary codes.
+
+getNextLength:
+       stx     getNextLength_index
+       dey
+       bne     getNextLength_1
+; Fetch a temporary code
+       jsr     fetchPrimaryCode
+; Temporary code 0..15: put this length
+       ldy     #1
+       cmp     #16
+       bcc     getNextLength_2
+; Temporary code 16: repeat last length 3 + getBits(2) times
+; Temporary code 17: put zero length 3 + getBits(3) times
+; Temporary code 18: put zero length 11 + getBits(7) times
+       tay
+       ldx     tempExtraBits-16,y
+       lda     tempBaseValue-16,y
+       jsr     getBits
+       cpy     #17
+       tay
+       txa                     ; lda #0
+       bcs     getNextLength_2
+getNextLength_1:
+       lda     getNextLength_last
+getNextLength_2:
+       sta     getNextLength_last
+       ldx     getNextLength_index
        rts
 
 ; --------------------------------------------------------------------------
-; Read code basing on literal tree
+; Read a code basing on the primary tree.
 
-fetchLiteralCode:
-       ldx     #LITERAL_TREE
-; fall into fetchCode
+fetchPrimaryCode:
+       ldx     #PRIMARY_TREE
+;      jmp     fetchCode
 
 ; --------------------------------------------------------------------------
-; Read code from input stream basing on tree given in X.
-; Return code in A, C is set if non-literal code.
+; Read a code from input basing on the tree specified in X.
+; Return low byte of this code in A.
+; For the literal/length tree, the C flag is set if the code is non-literal.
 
 fetchCode:
        lda     #0
 fetchCode_1:
        jsr     getBit
        rol     a
-       cmp     bitsCount+1,x
-       bcc     fetchCode_2
-       sbc     bitsCount+1,x
        inx
-       bcs     fetchCode_1     ; branch always
-fetchCode_2:
-       ldy     bitsPointer_l,x
-       sty     ptr
-       ldy     bitsPointer_h,x
-       asl     a
-       bcc     fetchCode_3
-       iny
-fetchCode_3:
-       sty     ptr+1
-       tay
-       lda     (ptr),y
-       asl     a
-       iny
-       lda     (ptr),y
+       sec
+       sbc     bitsCount-1,x
+       bcs     fetchCode_1
+       adc     bitsCount-1,x   ; C flag is zero
+       cmp     literalCount-1,x
+       sta     fetchCode_ptr
+       ldy     bitsPointer_l-1,x
+       lda     bitsPointer_h-1,x
+       sta     fetchCode_ptr+1
+       lda     (fetchCode_ptr),y
        rts
 
 ; --------------------------------------------------------------------------
-; Read low byte of value (length or distance), basing on code A
+; Decode low byte of a value (length or distance), basing on the code in A.
+; The result is the base value for this code plus some bits read from input.
 
 getValue:
        tay
        ldx     lengthExtraBits-1,y
-       lda     lengthCode_l-1,y
+       lda     lengthBaseValue_l-1,y
        pha
-       lda     lengthCode_h-1,y
+       lda     lengthBaseValue_h-1,y
        tay
        pla
-; fall into getBits
+;      jmp     getBits
 
 ; --------------------------------------------------------------------------
-; Read X-bit number from input stream and adds it to A.
-; In case of carry, Y is incremented.
-; If X > 8, only 8 bits are read.
+; Read X-bit number from the input and add it to A.
+; Increment Y if overflow.
+; If X > 8, read only 8 bits.
 ; On return X holds number of unread bits: X = (X > 8 ? X - 8 : 0);
 
 getBits:
        cpx     #0
        beq     getBits_ret
+.ifpc02
+       stz     getBits_tmp
+       dec     getBits_tmp
+.else
        pha
-       lda     #1
-       sta     tmp
+       lda     #$ff
+       sta     getBits_tmp
        pla
+.endif
 getBits_1:
        jsr     getBit
        bcc     getBits_2
-       clc
-       adc     tmp
+       sbc     getBits_tmp     ; C flag is set
        bcc     getBits_2
        iny
 getBits_2:
        dex
        beq     getBits_ret
-       asl     tmp
-       bcc     getBits_1
+       asl     getBits_tmp
+       bmi     getBits_1
 getBits_ret:
        rts
 
 ; --------------------------------------------------------------------------
-; Read single bit from input stream, return it in C flag
+; Read a single bit from input, return it in the C flag.
 
 getBit:
-       lsr     getBitHold
+       lsr     getBit_hold
        bne     getBit_ret
        pha
 .ifpc02
        lda     (inputPointer)
 .else
-       tya
-       pha
+       sty     getBit_hold
        ldy     #0
        lda     (inputPointer),y
+       ldy     getBit_hold
 .endif
        inc     inputPointer
        bne     getBit_1
        inc     inputPointer+1
 getBit_1:
-       ror     a       ; C flag set
-       sta     getBitHold
-.ifpc02
-.else
-       pla
-       tay
-.endif
+       ror     a       ; C flag is set
+       sta     getBit_hold
        pla
 getBit_ret:
        rts
 
+
 ; --------------------------------------------------------------------------
-; Addresses of functions extracting different blocks
-extr_l:
-       .byte   <(inflateCopyBlock-1)
-       .byte   <(inflateFixedBlock-1)
-       .byte   <(inflateDynamicBlock-1)
-extr_h:
-       .byte   >(inflateCopyBlock-1)
-       .byte   >(inflateFixedBlock-1)
-       .byte   >(inflateDynamicBlock-1)
+;
+; Constant data
+;
 
+       .rodata
 ; --------------------------------------------------------------------------
-; Order, in which lengths of temporary codes are stored
-bll:
+; Arrays for the temporary codes.
+
+; Order, in which lengths of the temporary codes are stored.
+tempCodeLengthOrder:
        .byte   16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
 
+; Base values.
+tempBaseValue:
+       .byte   3,3,11
+
+; Number of extra bits to read.
+tempExtraBits:
+       .byte   2,3,7
+
 ; --------------------------------------------------------------------------
-; Tables for length and distance codes
-; Value is Code + getBits(ExtraBits)
+; Arrays for the length and distance codes.
 
-; Base values
-lengthCode_l:
+; Base values.
+lengthBaseValue_l:
        .byte   <3,<4,<5,<6,<7,<8,<9,<10
        .byte   <11,<13,<15,<17,<19,<23,<27,<31
        .byte   <35,<43,<51,<59,<67,<83,<99,<115
        .byte   <131,<163,<195,<227,<258
-distanceCode_l:
+distanceBaseValue_l:
        .byte   <1,<2,<3,<4,<5,<7,<9,<13
        .byte   <17,<25,<33,<49,<65,<97,<129,<193
        .byte   <257,<385,<513,<769,<1025,<1537,<2049,<3073
        .byte   <4097,<6145,<8193,<12289,<16385,<24577
-lengthCode_h:
+lengthBaseValue_h:
        .byte   >3,>4,>5,>6,>7,>8,>9,>10
        .byte   >11,>13,>15,>17,>19,>23,>27,>31
        .byte   >35,>43,>51,>59,>67,>83,>99,>115
        .byte   >131,>163,>195,>227,>258
-distanceCode_h:
+distanceBaseValue_h:
        .byte   >1,>2,>3,>4,>5,>7,>9,>13
        .byte   >17,>25,>33,>49,>65,>97,>129,>193
        .byte   >257,>385,>513,>769,>1025,>1537,>2049,>3073
        .byte   >4097,>6145,>8193,>12289,>16385,>24577
 
-; Number of extra bits to read
+; Number of extra bits to read.
 lengthExtraBits:
        .byte   0,0,0,0,0,0,0,0
        .byte   1,1,1,1,2,2,2,2
@@ -587,6 +619,7 @@ distanceExtraBits:
        .byte   7,7,8,8,9,9,10,10
        .byte   11,11,12,12,13,13
 
+
 ; --------------------------------------------------------------------------
 ;
 ; Uninitialised data
@@ -594,44 +627,47 @@ distanceExtraBits:
 
        .bss
 
+; Number of literal codes of each length in the primary tree
+; (MAX_BITS bytes, overlap with literalCodeLength).
+literalCount:
+
 ; --------------------------------------------------------------------------
-; Data for building literal tree
+; Data for building the primary tree.
 
-       .res    1
-; Length of literal codes
+; Lengths of literal codes.
 literalCodeLength:
        .res    256
-; Length of 'end' code
+; Length of the end code.
 endCodeLength:
        .res    1
-; Length of 'length' codes
+; Lengths of length codes.
 lengthCodeLength:
        .res    29
 
 ; --------------------------------------------------------------------------
-; Data for building distance tree
+; Data for building the distance tree.
 
+; Lengths of distance codes.
 distanceCodeLength:
        .res    30
-; For two unused codes in fixed trees and an end flag
+; For two unused codes in the fixed trees and an 'end' mark.
        .res    3
 
 ; --------------------------------------------------------------------------
-; Huffman tree structure
+; The Huffman trees.
 
-; Number of codes of each length
+; Number of codes of each length.
 bitsCount:
        .res    TREES_SIZE
-
-; Pointer to sorted codes of each length
+; Pointers to sorted codes of each length.
 bitsPointer_l:
-       .res    TREES_SIZE
+       .res    TREES_SIZE+1
 bitsPointer_h:
        .res    TREES_SIZE
 
-; Sorted codes
+; Sorted codes.
 sortedCodes:
-       .res    2*(256+1+29+30+2)
+       .res    256+1+29+30+2