]> git.sur5r.net Git - cc65/blob - libsrc/zlib/inflatemem.s
Optimize inflatemem.
[cc65] / libsrc / zlib / inflatemem.s
1 ;
2 ; 2017-02-12, Piotr Fusik
3 ;
4 ; unsigned __fastcall__ inflatemem (char* dest, const char* source);
5 ;
6 ; NOTE: Be extremely careful with modifications, because this code is heavily
7 ; optimized for size (for example assumes certain register and flag values
8 ; when its internal routines return). Test with the gunzip65 sample.
9 ;
10
11         .export         _inflatemem
12
13         .import         incsp2
14         .importzp       sp, sreg, ptr1, ptr2, ptr3, ptr4
15
16 ; --------------------------------------------------------------------------
17 ;
18 ; Constants
19 ;
20
21 ; Argument values for getBits.
22 GET_1_BIT           = $81
23 GET_2_BITS          = $82
24 GET_3_BITS          = $84
25 GET_4_BITS          = $88
26 GET_5_BITS          = $90
27 GET_6_BITS          = $a0
28 GET_7_BITS          = $c0
29
30 ; Huffman trees.
31 TREE_SIZE           = 16
32 PRIMARY_TREE        = 0
33 DISTANCE_TREE       = TREE_SIZE
34
35 ; Alphabet.
36 LENGTH_SYMBOLS      = 1+29+2    ; EOF, 29 length symbols, two unused symbols
37 DISTANCE_SYMBOLS    = 30
38 CONTROL_SYMBOLS     = LENGTH_SYMBOLS+DISTANCE_SYMBOLS
39
40
41 ; --------------------------------------------------------------------------
42 ;
43 ; Page zero
44 ;
45
46 ; Pointer to the compressed data.
47 inputPointer                :=  ptr1    ; 2 bytes
48
49 ; Pointer to the uncompressed data.
50 outputPointer               :=  ptr2    ; 2 bytes
51
52 ; Local variables.
53 ; As far as there is no conflict, same memory locations are used
54 ; for different variables.
55
56 inflateStored_pageCounter   :=  ptr3    ; 1 byte
57 inflateDynamic_symbol       :=  ptr3    ; 1 byte
58 inflateDynamic_lastLength   :=  ptr3+1  ; 1 byte
59         .assert ptr4 = ptr3 + 2, error, "Need three bytes for inflateDynamic_tempCodes"
60 inflateDynamic_tempCodes    :=  ptr3+1  ; 3 bytes
61 inflateDynamic_allCodes     :=  inflateDynamic_tempCodes+1 ; 1 byte
62 inflateDynamic_primaryCodes :=  inflateDynamic_tempCodes+2 ; 1 byte
63 inflateCodes_sourcePointer  :=  ptr3    ; 2 bytes
64 inflateCodes_lengthMinus2   :=  ptr4    ; 1 byte
65 getBits_base                :=  sreg    ; 1 byte
66 getBit_buffer               :=  sreg+1  ; 1 byte
67
68
69 ; --------------------------------------------------------------------------
70 ;
71 ; Code
72 ;
73
74 _inflatemem:
75
76 ; inputPointer = source
77         sta     inputPointer
78         stx     inputPointer+1
79 ; outputPointer = dest
80         ldy     #1
81         lda     (sp),y
82         sta     outputPointer+1
83         dey
84         lda     (sp),y
85         sta     outputPointer
86
87 ;       ldy     #0
88         sty     getBit_buffer
89
90 inflate_blockLoop:
91 ; Get a bit of EOF and two bits of block type
92 ;       ldy     #0
93         sty     getBits_base
94         lda     #GET_3_BITS
95         jsr     getBits
96         lsr     a
97 ; A and Z contain block type, C contains EOF flag
98 ; Save EOF flag
99         php
100         bne     inflateCompressed
101
102 ; Decompress a 'stored' data block.
103 ;       ldy     #0
104         sty     getBit_buffer   ; ignore bits until byte boundary
105         jsr     getWord         ; skip the length we don't need
106         jsr     getWord         ; get the two's complement length
107         sta     inflateStored_pageCounter
108         bcs     inflateStored_firstByte ; jmp
109 inflateStored_copyByte:
110         jsr     getByte
111 ;       sec
112 inflateStoreByte:
113         jsr     storeByte
114         bcc     inflateCodes_loop
115 inflateStored_firstByte:
116         inx
117         bne     inflateStored_copyByte
118         inc     inflateStored_pageCounter
119         bne     inflateStored_copyByte
120
121 ; Block decompressed.
122 inflate_nextBlock:
123         plp
124         bcc     inflate_blockLoop
125
126 ; Decompression complete.
127 ; return outputPointer - dest
128         lda     outputPointer
129 ;       ldy     #0
130 ;       sec
131         sbc     (sp),y
132         iny
133         pha
134         lda     outputPointer+1
135         sbc     (sp),y
136         tax
137         pla
138 ; pop dest
139         jmp     incsp2
140
141 inflateCompressed:
142 ; Decompress a Huffman-coded data block
143 ; A=1: fixed block, initialize with fixed codes
144 ; A=2: dynamic block, start by clearing all code lengths
145 ; A=3: invalid compressed data, not handled in this routine
146         eor     #2
147
148 ;       ldy     #0
149 inflateCompressed_setCodeLengths:
150         tax
151         beq     inflateCompressed_setLiteralCodeLength
152 ; fixed Huffman literal codes:
153 ; 144 8-bit codes
154 ; 112 9-bit codes
155         lda     #4
156         cpy     #144
157         rol     a
158 inflateCompressed_setLiteralCodeLength:
159         sta     literalSymbolCodeLength,y
160         beq     inflateCompressed_setControlCodeLength
161 ; fixed Huffman control codes:
162 ; 24 7-bit codes
163 ;  6 8-bit codes
164 ;  2 meaningless 8-bit codes
165 ; 30 5-bit distance codes
166         lda     #5+DISTANCE_TREE
167         cpy     #LENGTH_SYMBOLS
168         bcs     inflateCompressed_setControlCodeLength
169         cpy     #24
170         adc     #$100+2-DISTANCE_TREE
171 inflateCompressed_setControlCodeLength:
172         cpy     #CONTROL_SYMBOLS
173         bcs     inflateCompressed_noControlSymbol
174         sta     controlSymbolCodeLength,y
175 inflateCompressed_noControlSymbol:
176         iny
177         bne     inflateCompressed_setCodeLengths
178
179         tax
180         beq     inflateDynamic
181
182 ; Decompress a block
183 inflateCodes:
184         jsr     buildHuffmanTree
185 inflateCodes_loop:
186         jsr     fetchPrimaryCode
187         bcc     inflateStoreByte
188         beq     inflate_nextBlock
189 ; Copy sequence from look-behind buffer
190 ;       ldy     #0
191         sty     getBits_base
192         cmp     #9
193         bcc     inflateCodes_setSequenceLength
194         tya
195 ;       lda     #0
196         cpx     #1+28
197         bcs     inflateCodes_setSequenceLength
198         dex
199         txa
200         lsr     a
201         ror     getBits_base
202         inc     getBits_base
203         lsr     a
204         rol     getBits_base
205         jsr     getAMinus1BitsMax8
206 ;       sec
207         adc     #0
208 inflateCodes_setSequenceLength:
209         sta     inflateCodes_lengthMinus2
210         ldx     #DISTANCE_TREE
211         jsr     fetchCode
212         cmp     #4
213         bcc     inflateCodes_setOffsetLowByte
214         inc     getBits_base
215         lsr     a
216         jsr     getAMinus1BitsMax8
217 inflateCodes_setOffsetLowByte:
218         eor     #$ff
219         sta     inflateCodes_sourcePointer
220         lda     getBits_base
221         cpx     #10
222         bcc     inflateCodes_setOffsetHighByte
223         lda     getNPlus1Bits_mask-10,x
224         jsr     getBits
225         clc
226 inflateCodes_setOffsetHighByte:
227         eor     #$ff
228 ;       clc
229         adc     outputPointer+1
230         sta     inflateCodes_sourcePointer+1
231         jsr     copyByte
232         jsr     copyByte
233 inflateCodes_copyByte:
234         jsr     copyByte
235         dec     inflateCodes_lengthMinus2
236         bne     inflateCodes_copyByte
237         beq     inflateCodes_loop ; jmp
238
239 inflateDynamic:
240 ; Decompress a block reading Huffman trees first
241 ;       ldy     #0
242 ; numberOfPrimaryCodes = 257 + getBits(5)
243 ; numberOfDistanceCodes = 1 + getBits(5)
244 ; numberOfTemporaryCodes = 4 + getBits(4)
245         ldx     #3
246 inflateDynamic_getHeader:
247         lda     inflateDynamic_headerBits-1,x
248         jsr     getBits
249 ;       sec
250         adc     inflateDynamic_headerBase-1,x
251         sta     inflateDynamic_tempCodes-1,x
252         dex
253         bne     inflateDynamic_getHeader
254
255 ; Get lengths of temporary codes in the order stored in inflateDynamic_tempSymbols
256 ;       ldx     #0
257 inflateDynamic_getTempCodeLengths:
258         lda     #GET_3_BITS
259         jsr     getBits
260         ldy     inflateDynamic_tempSymbols,x
261         sta     literalSymbolCodeLength,y
262         ldy     #0
263         inx
264         cpx     inflateDynamic_tempCodes
265         bcc     inflateDynamic_getTempCodeLengths
266
267 ; Build the tree for temporary codes
268         jsr     buildHuffmanTree
269
270 ; Use temporary codes to get lengths of literal/length and distance codes
271 ;       ldx     #0
272 ;       sec
273 inflateDynamic_decodeLength:
274 ; C=1: literal codes
275 ; C=0: control codes
276         stx     inflateDynamic_symbol
277         php
278 ; Fetch a temporary code
279         jsr     fetchPrimaryCode
280 ; Temporary code 0..15: put this length
281         bpl     inflateDynamic_storeLengths
282 ; Temporary code 16: repeat last length 3 + getBits(2) times
283 ; Temporary code 17: put zero length 3 + getBits(3) times
284 ; Temporary code 18: put zero length 11 + getBits(7) times
285         tax
286         jsr     getBits
287         cpx     #GET_3_BITS
288         bcc     inflateDynamic_code16
289         beq     inflateDynamic_code17
290 ;       sec
291         adc     #7
292 inflateDynamic_code17:
293 ;       ldy     #0
294         sty     inflateDynamic_lastLength
295 inflateDynamic_code16:
296         tay
297         lda     inflateDynamic_lastLength
298         iny
299         iny
300 inflateDynamic_storeLengths:
301         iny
302         plp
303         ldx     inflateDynamic_symbol
304 inflateDynamic_storeLength:
305         bcc     inflateDynamic_controlSymbolCodeLength
306         sta     literalSymbolCodeLength,x
307         inx
308         cpx     #1
309 inflateDynamic_storeNext:
310         dey
311         bne     inflateDynamic_storeLength
312         sta     inflateDynamic_lastLength
313         beq     inflateDynamic_decodeLength ; jmp
314 inflateDynamic_controlSymbolCodeLength:
315         cpx     inflateDynamic_primaryCodes
316         bcc     inflateDynamic_storeControl
317 ; the code lengths we skip here were zero-initialized
318 ; in inflateCompressed_setControlCodeLength
319         bne     inflateDynamic_noStartDistanceTree
320         ldx     #LENGTH_SYMBOLS
321 inflateDynamic_noStartDistanceTree:
322         ora     #DISTANCE_TREE
323 inflateDynamic_storeControl:
324         sta     controlSymbolCodeLength,x
325         inx
326         cpx     inflateDynamic_allCodes
327         bcc     inflateDynamic_storeNext
328         dey
329 ;       ldy     #0
330         jmp     inflateCodes
331
332 ; Build Huffman trees basing on code lengths (in bits)
333 ; stored in the *SymbolCodeLength arrays
334 buildHuffmanTree:
335 ; Clear nBitCode_totalCount, nBitCode_literalCount, nBitCode_controlCount
336         tya
337 ;       lda     #0
338 buildHuffmanTree_clear:
339         sta     nBitCode_clearFrom,y
340         iny
341         bne     buildHuffmanTree_clear
342 ; Count number of codes of each length
343 ;       ldy     #0
344 buildHuffmanTree_countCodeLengths:
345         ldx     literalSymbolCodeLength,y
346         inc     nBitCode_literalCount,x
347         inc     nBitCode_totalCount,x
348         cpy     #CONTROL_SYMBOLS
349         bcs     buildHuffmanTree_noControlSymbol
350         ldx     controlSymbolCodeLength,y
351         inc     nBitCode_controlCount,x
352         inc     nBitCode_totalCount,x
353 buildHuffmanTree_noControlSymbol:
354         iny
355         bne     buildHuffmanTree_countCodeLengths
356 ; Calculate offsets of symbols sorted by code length
357 ;       lda     #0
358         ldx     #$100-3*TREE_SIZE
359 buildHuffmanTree_calculateOffsets:
360         sta     nBitCode_literalOffset+3*TREE_SIZE-$100,x
361         clc
362         adc     nBitCode_literalCount+3*TREE_SIZE-$100,x
363         inx
364         bne     buildHuffmanTree_calculateOffsets
365 ; Put symbols in their place in the sorted array
366 ;       ldy     #0
367 buildHuffmanTree_assignCode:
368         tya
369         ldx     literalSymbolCodeLength,y
370         ldy     nBitCode_literalOffset,x
371         inc     nBitCode_literalOffset,x
372         sta     codeToLiteralSymbol,y
373         tay
374         cpy     #CONTROL_SYMBOLS
375         bcs     buildHuffmanTree_noControlSymbol2
376         ldx     controlSymbolCodeLength,y
377         ldy     nBitCode_controlOffset,x
378         inc     nBitCode_controlOffset,x
379         sta     codeToControlSymbol,y
380         tay
381 buildHuffmanTree_noControlSymbol2:
382         iny
383         bne     buildHuffmanTree_assignCode
384         rts
385
386 ; Read Huffman code using the primary tree
387 fetchPrimaryCode:
388         ldx     #PRIMARY_TREE
389 ; Read a code from input using the tree specified in X.
390 ; Return low byte of this code in A.
391 ; Return C flag reset for literal code, set for length code.
392 fetchCode:
393 ;       ldy     #0
394         tya
395 fetchCode_nextBit:
396         jsr     getBit
397         rol     a
398         inx
399         sec
400         sbc     nBitCode_totalCount,x
401         bcs     fetchCode_nextBit
402 ;       clc
403         adc     nBitCode_controlCount,x
404         bcs     fetchCode_control
405 ;       clc
406         adc     nBitCode_literalOffset,x
407         tax
408         lda     codeToLiteralSymbol,x
409         clc
410         rts
411 fetchCode_control:
412 ;       sec
413         adc     nBitCode_controlOffset-1,x
414         tax
415         lda     codeToControlSymbol-1,x
416         and     #$1f    ; make distance symbols zero-based
417         tax
418         sec
419         rts
420
421 ; Read A minus 1 bits, but no more than 8
422 getAMinus1BitsMax8:
423         rol     getBits_base
424         tax
425         cmp     #9
426         bcs     getByte
427         lda     getNPlus1Bits_mask-2,x
428 getBits:
429         jsr     getBits_loop
430 getBits_normalizeLoop:
431         lsr     getBits_base
432         ror     a
433         bcc     getBits_normalizeLoop
434         rts
435
436 ; Read 16 bits
437 getWord:
438         jsr     getByte
439         tax
440 ; Read 8 bits
441 getByte:
442         lda     #$80
443 getBits_loop:
444         jsr     getBit
445         ror     a
446         bcc     getBits_loop
447         rts
448
449 ; Read one bit, return in the C flag
450 getBit:
451         lsr     getBit_buffer
452         bne     getBit_return
453         pha
454 ;       ldy     #0
455         lda     (inputPointer),y
456         inc     inputPointer
457         bne     getBit_samePage
458         inc     inputPointer+1
459 getBit_samePage:
460         sec
461         ror     a
462         sta     getBit_buffer
463         pla
464 getBit_return:
465         rts
466
467 ; Copy a previously written byte
468 copyByte:
469         ldy     outputPointer
470         lda     (inflateCodes_sourcePointer),y
471         ldy     #0
472 ; Write a byte
473 storeByte:
474 ;       ldy     #0
475         sta     (outputPointer),y
476         inc     outputPointer
477         bne     storeByte_return
478         inc     outputPointer+1
479         inc     inflateCodes_sourcePointer+1
480 storeByte_return:
481         rts
482
483
484 ; --------------------------------------------------------------------------
485 ;
486 ; Constant data
487 ;
488
489         .rodata
490
491 getNPlus1Bits_mask:
492         .byte   GET_1_BIT,GET_2_BITS,GET_3_BITS,GET_4_BITS,GET_5_BITS,GET_6_BITS,GET_7_BITS
493
494 inflateDynamic_tempSymbols:
495         .byte   GET_2_BITS,GET_3_BITS,GET_7_BITS,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
496
497 inflateDynamic_headerBits:
498         .byte   GET_4_BITS,GET_5_BITS,GET_5_BITS
499 inflateDynamic_headerBase:
500         .byte   3,LENGTH_SYMBOLS,0
501
502
503 ; --------------------------------------------------------------------------
504 ;
505 ; Uninitialised data
506 ;
507
508         .bss
509
510 ; Data for building trees.
511
512 literalSymbolCodeLength:
513         .res    256
514 controlSymbolCodeLength:
515         .res    CONTROL_SYMBOLS
516
517 ; Huffman trees.
518
519 nBitCode_clearFrom:
520 nBitCode_totalCount:
521         .res    2*TREE_SIZE
522 nBitCode_literalCount:
523         .res    TREE_SIZE
524 nBitCode_controlCount:
525         .res    2*TREE_SIZE
526 nBitCode_literalOffset:
527         .res    TREE_SIZE
528 nBitCode_controlOffset:
529         .res    2*TREE_SIZE
530
531 codeToLiteralSymbol:
532         .res    256
533 codeToControlSymbol:
534         .res    CONTROL_SYMBOLS