]> git.sur5r.net Git - cc65/blob - libsrc/zlib/inflatemem.s
Now tgi_clear() works
[cc65] / libsrc / zlib / inflatemem.s
1 ;
2 ; 2017-11-07, Piotr Fusik
3 ;
4 ; unsigned __fastcall__ inflatemem (char* dest, const char* source);
5 ;
6 ; NOTE: Be extremely careful with modifications, because this code is heavily
7 ; optimized for size (for example assumes certain register and flag values
8 ; when its internal routines return). Test with the gunzip65 sample.
9 ;
10
11         .export         _inflatemem
12
13         .import         incsp2
14         .importzp       sp, sreg, ptr1, ptr2, ptr3, ptr4
15
16 ; --------------------------------------------------------------------------
17 ;
18 ; Constants
19 ;
20
21 ; Argument values for getBits.
22 GET_1_BIT           = $81
23 GET_2_BITS          = $82
24 GET_3_BITS          = $84
25 GET_4_BITS          = $88
26 GET_5_BITS          = $90
27 GET_6_BITS          = $a0
28 GET_7_BITS          = $c0
29
30 ; Huffman trees.
31 TREE_SIZE           = 16
32 PRIMARY_TREE        = 0
33 DISTANCE_TREE       = TREE_SIZE
34
35 ; Alphabet.
36 LENGTH_SYMBOLS      = 1+29+2    ; EOF, 29 length symbols, two unused symbols
37 DISTANCE_SYMBOLS    = 30
38 CONTROL_SYMBOLS     = LENGTH_SYMBOLS+DISTANCE_SYMBOLS
39
40
41 ; --------------------------------------------------------------------------
42 ;
43 ; Page zero
44 ;
45
46 ; Pointer to the compressed data.
47 inputPointer                :=  ptr1    ; 2 bytes
48
49 ; Pointer to the uncompressed data.
50 outputPointer               :=  ptr2    ; 2 bytes
51
52 ; Local variables.
53 ; As far as there is no conflict, same memory locations are used
54 ; for different variables.
55
56 inflateStored_pageCounter   :=  ptr3    ; 1 byte
57 inflateDynamic_symbol       :=  ptr3    ; 1 byte
58 inflateDynamic_lastLength   :=  ptr3+1  ; 1 byte
59         .assert ptr4 = ptr3 + 2, error, "Need three bytes for inflateDynamic_tempCodes"
60 inflateDynamic_tempCodes    :=  ptr3+1  ; 3 bytes
61 inflateDynamic_allCodes     :=  inflateDynamic_tempCodes+1 ; 1 byte
62 inflateDynamic_primaryCodes :=  inflateDynamic_tempCodes+2 ; 1 byte
63 inflateCodes_sourcePointer  :=  ptr3    ; 2 bytes
64 inflateCodes_lengthMinus2   :=  ptr4    ; 1 byte
65 getBits_base                :=  sreg    ; 1 byte
66 getBit_buffer               :=  sreg+1  ; 1 byte
67
68
69 ; --------------------------------------------------------------------------
70 ;
71 ; Code
72 ;
73
74 _inflatemem:
75
76 ; inputPointer = source
77         sta     inputPointer
78         stx     inputPointer+1
79 ; outputPointer = dest
80         ldy     #1
81         lda     (sp),y
82         sta     outputPointer+1
83         dey
84         lda     (sp),y
85         sta     outputPointer
86
87 ;       ldy     #0
88         sty     getBit_buffer
89
90 inflate_blockLoop:
91 ; Get a bit of EOF and two bits of block type
92 ;       ldy     #0
93         sty     getBits_base
94         lda     #GET_3_BITS
95         jsr     getBits
96         lsr     a
97 ; A and Z contain block type, C contains EOF flag
98 ; Save EOF flag
99         php
100         bne     inflateCompressed
101
102 ; Decompress a 'stored' data block.
103 ;       ldy     #0
104         sty     getBit_buffer   ; ignore bits until byte boundary
105         jsr     getWord         ; skip the length we don't need
106         jsr     getWord         ; get the one's complement length
107         sta     inflateStored_pageCounter
108         bcs     inflateStored_firstByte ; jmp
109 inflateStored_copyByte:
110         jsr     getByte
111 ;       sec
112 inflateStoreByte:
113         jsr     storeByte
114         bcc     inflateCodes_loop
115 inflateStored_firstByte:
116         inx
117         bne     inflateStored_copyByte
118         inc     inflateStored_pageCounter
119         bne     inflateStored_copyByte
120
121 ; Block decompressed.
122 inflate_nextBlock:
123         plp
124         bcc     inflate_blockLoop
125
126 ; Decompression complete.
127 ; return outputPointer - dest
128         lda     outputPointer
129 ;       ldy     #0
130 ;       sec
131         sbc     (sp),y
132         iny
133         pha
134         lda     outputPointer+1
135         sbc     (sp),y
136         tax
137         pla
138 ; pop dest
139         jmp     incsp2
140
141 inflateCompressed:
142 ; Decompress a Huffman-coded data block
143 ; A=1: fixed block, initialize with fixed codes
144 ; A=2: dynamic block, start by clearing all code lengths
145 ; A=3: invalid compressed data, not handled in this routine
146         eor     #2
147
148 ;       ldy     #0
149 inflateCompressed_setCodeLengths:
150         tax
151         beq     inflateCompressed_setLiteralCodeLength
152 ; fixed Huffman literal codes:
153 ; 144 8-bit codes
154 ; 112 9-bit codes
155         lda     #4
156         cpy     #144
157         rol     a
158 inflateCompressed_setLiteralCodeLength:
159         sta     literalSymbolCodeLength,y
160         beq     inflateCompressed_setControlCodeLength
161 ; fixed Huffman control codes:
162 ; 24 7-bit codes
163 ;  6 8-bit codes
164 ;  2 meaningless 8-bit codes
165 ; 30 5-bit distance codes
166         lda     #5+DISTANCE_TREE
167         cpy     #LENGTH_SYMBOLS
168         bcs     inflateCompressed_setControlCodeLength
169         cpy     #24
170         adc     #$100+2-DISTANCE_TREE
171 inflateCompressed_setControlCodeLength:
172         cpy     #CONTROL_SYMBOLS
173         bcs     inflateCompressed_noControlSymbol
174         sta     controlSymbolCodeLength,y
175 inflateCompressed_noControlSymbol:
176         iny
177         bne     inflateCompressed_setCodeLengths
178
179         tax
180         beq     inflateDynamic
181
182 ; Decompress a block
183 inflateCodes:
184         jsr     buildHuffmanTree
185 inflateCodes_loop:
186         jsr     fetchPrimaryCode
187         bcc     inflateStoreByte
188         beq     inflate_nextBlock
189 ; Copy sequence from look-behind buffer
190 ;       ldy     #0
191         sty     getBits_base
192         cmp     #9
193         bcc     inflateCodes_setSequenceLength
194         tya
195 ;       lda     #0
196         cpx     #1+28
197         bcs     inflateCodes_setSequenceLength
198         dex
199         txa
200         lsr     a
201         ror     getBits_base
202         inc     getBits_base
203         lsr     a
204         rol     getBits_base
205         jsr     getAMinus1BitsMax8
206 ;       sec
207         adc     #0
208 inflateCodes_setSequenceLength:
209         sta     inflateCodes_lengthMinus2
210         ldx     #DISTANCE_TREE
211         jsr     fetchCode
212         cmp     #4
213         bcc     inflateCodes_setOffsetLowByte
214         inc     getBits_base
215         lsr     a
216         jsr     getAMinus1BitsMax8
217 inflateCodes_setOffsetLowByte:
218         eor     #$ff
219         sta     inflateCodes_sourcePointer
220         lda     getBits_base
221         cpx     #10
222         bcc     inflateCodes_setOffsetHighByte
223         lda     getNPlus1Bits_mask-10,x
224         jsr     getBits
225         clc
226 inflateCodes_setOffsetHighByte:
227         eor     #$ff
228 ;       clc
229         adc     outputPointer+1
230         sta     inflateCodes_sourcePointer+1
231         jsr     copyByte
232         jsr     copyByte
233 inflateCodes_copyByte:
234         jsr     copyByte
235         dec     inflateCodes_lengthMinus2
236         bne     inflateCodes_copyByte
237         beq     inflateCodes_loop ; jmp
238
239 inflateDynamic:
240 ; Decompress a block reading Huffman trees first
241 ;       ldy     #0
242 ; numberOfPrimaryCodes = 257 + getBits(5)
243 ; numberOfDistanceCodes = 1 + getBits(5)
244 ; numberOfTemporaryCodes = 4 + getBits(4)
245         ldx     #3
246 inflateDynamic_getHeader:
247         lda     inflateDynamic_headerBits-1,x
248         jsr     getBits
249 ;       sec
250         adc     inflateDynamic_headerBase-1,x
251         sta     inflateDynamic_tempCodes-1,x
252         dex
253         bne     inflateDynamic_getHeader
254
255 ; Get lengths of temporary codes in the order stored in inflateDynamic_tempSymbols
256 ;       ldx     #0
257 inflateDynamic_getTempCodeLengths:
258         lda     #GET_3_BITS
259         jsr     getBits
260         ldy     inflateDynamic_tempSymbols,x
261         sta     literalSymbolCodeLength,y
262         ldy     #0
263         inx
264         cpx     inflateDynamic_tempCodes
265         bcc     inflateDynamic_getTempCodeLengths
266
267 ; Build the tree for temporary codes
268         jsr     buildHuffmanTree
269
270 ; Use temporary codes to get lengths of literal/length and distance codes
271 ;       ldx     #0
272 ;       sec
273 inflateDynamic_decodeLength:
274 ; C=1: literal codes
275 ; C=0: control codes
276         stx     inflateDynamic_symbol
277         php
278 ; Fetch a temporary code
279         jsr     fetchPrimaryCode
280 ; Temporary code 0..15: put this length
281         bpl     inflateDynamic_storeLengths
282 ; Temporary code 16: repeat last length 3 + getBits(2) times
283 ; Temporary code 17: put zero length 3 + getBits(3) times
284 ; Temporary code 18: put zero length 11 + getBits(7) times
285         tax
286         jsr     getBits
287         cpx     #GET_3_BITS
288         bcc     inflateDynamic_code16
289         beq     inflateDynamic_code17
290 ;       sec
291         adc     #7
292 inflateDynamic_code17:
293 ;       ldy     #0
294         sty     inflateDynamic_lastLength
295 inflateDynamic_code16:
296         tay
297         lda     inflateDynamic_lastLength
298         iny
299         iny
300 inflateDynamic_storeLengths:
301         iny
302         plp
303         ldx     inflateDynamic_symbol
304 inflateDynamic_storeLength:
305         bcc     inflateDynamic_controlSymbolCodeLength
306         sta     literalSymbolCodeLength,x
307         inx
308         cpx     #1
309 inflateDynamic_storeNext:
310         dey
311         bne     inflateDynamic_storeLength
312         sta     inflateDynamic_lastLength
313         beq     inflateDynamic_decodeLength ; jmp
314 inflateDynamic_controlSymbolCodeLength:
315         cpx     inflateDynamic_primaryCodes
316         bcc     inflateDynamic_storeControl
317 ; the code lengths we skip here were zero-initialized
318 ; in inflateCompressed_setControlCodeLength
319         bne     inflateDynamic_noStartDistanceTree
320         ldx     #LENGTH_SYMBOLS
321 inflateDynamic_noStartDistanceTree:
322         ora     #DISTANCE_TREE
323 inflateDynamic_storeControl:
324         sta     controlSymbolCodeLength,x
325         inx
326         cpx     inflateDynamic_allCodes
327         bcc     inflateDynamic_storeNext
328         dey
329 ;       ldy     #0
330         jmp     inflateCodes
331
332 ; Build Huffman trees basing on code lengths (in bits)
333 ; stored in the *SymbolCodeLength arrays
334 buildHuffmanTree:
335 ; Clear nBitCode_literalCount, nBitCode_controlCount
336         tya
337 ;       lda     #0
338 buildHuffmanTree_clear:
339         sta     nBitCode_clearFrom,y
340         iny
341         bne     buildHuffmanTree_clear
342 ; Count number of codes of each length
343 ;       ldy     #0
344 buildHuffmanTree_countCodeLengths:
345         ldx     literalSymbolCodeLength,y
346         inc     nBitCode_literalCount,x
347         bne     buildHuffmanTree_notAllLiterals
348         stx     allLiteralsCodeLength
349 buildHuffmanTree_notAllLiterals:
350         cpy     #CONTROL_SYMBOLS
351         bcs     buildHuffmanTree_noControlSymbol
352         ldx     controlSymbolCodeLength,y
353         inc     nBitCode_controlCount,x
354 buildHuffmanTree_noControlSymbol:
355         iny
356         bne     buildHuffmanTree_countCodeLengths
357 ; Calculate offsets of symbols sorted by code length
358 ;       lda     #0
359         ldx     #$100-4*TREE_SIZE
360 buildHuffmanTree_calculateOffsets:
361         sta     nBitCode_literalOffset+4*TREE_SIZE-$100,x
362         clc
363         adc     nBitCode_literalCount+4*TREE_SIZE-$100,x
364         inx
365         bne     buildHuffmanTree_calculateOffsets
366 ; Put symbols in their place in the sorted array
367 ;       ldy     #0
368 buildHuffmanTree_assignCode:
369         tya
370         ldx     literalSymbolCodeLength,y
371         ldy     nBitCode_literalOffset,x
372         inc     nBitCode_literalOffset,x
373         sta     codeToLiteralSymbol,y
374         tay
375         cpy     #CONTROL_SYMBOLS
376         bcs     buildHuffmanTree_noControlSymbol2
377         ldx     controlSymbolCodeLength,y
378         ldy     nBitCode_controlOffset,x
379         inc     nBitCode_controlOffset,x
380         sta     codeToControlSymbol,y
381         tay
382 buildHuffmanTree_noControlSymbol2:
383         iny
384         bne     buildHuffmanTree_assignCode
385         rts
386
387 ; Read Huffman code using the primary tree
388 fetchPrimaryCode:
389         ldx     #PRIMARY_TREE
390 ; Read a code from input using the tree specified in X.
391 ; Return low byte of this code in A.
392 ; Return C flag reset for literal code, set for length code.
393 fetchCode:
394 ;       ldy     #0
395         tya
396 fetchCode_nextBit:
397         jsr     getBit
398         rol     a
399         inx
400         bcs     fetchCode_ge256
401 ; are all 256 literal codes of this length?
402         cpx     allLiteralsCodeLength
403         beq     fetchCode_allLiterals
404 ; is it literal code of length X?
405         sec
406         sbc     nBitCode_literalCount,x
407         bcs     fetchCode_notLiteral
408 ; literal code
409 ;       clc
410         adc     nBitCode_literalOffset,x
411         tax
412         lda     codeToLiteralSymbol,x
413 fetchCode_allLiterals:
414         clc
415         rts
416 ; code >= 256, must be control
417 fetchCode_ge256:
418 ;       sec
419         sbc     nBitCode_literalCount,x
420         sec
421 ; is it control code of length X?
422 fetchCode_notLiteral:
423 ;       sec
424         sbc     nBitCode_controlCount,x
425         bcs     fetchCode_nextBit
426 ; control code
427 ;       clc
428         adc     nBitCode_controlOffset,x
429         tax
430         lda     codeToControlSymbol,x
431         and     #$1f    ; make distance symbols zero-based
432         tax
433 ;       sec
434         rts
435
436 ; Read A minus 1 bits, but no more than 8
437 getAMinus1BitsMax8:
438         rol     getBits_base
439         tax
440         cmp     #9
441         bcs     getByte
442         lda     getNPlus1Bits_mask-2,x
443 getBits:
444         jsr     getBits_loop
445 getBits_normalizeLoop:
446         lsr     getBits_base
447         ror     a
448         bcc     getBits_normalizeLoop
449         rts
450
451 ; Read 16 bits
452 getWord:
453         jsr     getByte
454         tax
455 ; Read 8 bits
456 getByte:
457         lda     #$80
458 getBits_loop:
459         jsr     getBit
460         ror     a
461         bcc     getBits_loop
462         rts
463
464 ; Read one bit, return in the C flag
465 getBit:
466         lsr     getBit_buffer
467         bne     getBit_return
468         pha
469 ;       ldy     #0
470         lda     (inputPointer),y
471         inc     inputPointer
472         bne     getBit_samePage
473         inc     inputPointer+1
474 getBit_samePage:
475         sec
476         ror     a
477         sta     getBit_buffer
478         pla
479 getBit_return:
480         rts
481
482 ; Copy a previously written byte
483 copyByte:
484         ldy     outputPointer
485         lda     (inflateCodes_sourcePointer),y
486         ldy     #0
487 ; Write a byte
488 storeByte:
489 ;       ldy     #0
490         sta     (outputPointer),y
491         inc     outputPointer
492         bne     storeByte_return
493         inc     outputPointer+1
494         inc     inflateCodes_sourcePointer+1
495 storeByte_return:
496         rts
497
498
499 ; --------------------------------------------------------------------------
500 ;
501 ; Constant data
502 ;
503
504         .rodata
505
506 getNPlus1Bits_mask:
507         .byte   GET_1_BIT,GET_2_BITS,GET_3_BITS,GET_4_BITS,GET_5_BITS,GET_6_BITS,GET_7_BITS
508
509 inflateDynamic_tempSymbols:
510         .byte   GET_2_BITS,GET_3_BITS,GET_7_BITS,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
511
512 inflateDynamic_headerBits:
513         .byte   GET_4_BITS,GET_5_BITS,GET_5_BITS
514 inflateDynamic_headerBase:
515         .byte   3,LENGTH_SYMBOLS,0
516
517
518 ; --------------------------------------------------------------------------
519 ;
520 ; Uninitialised data
521 ;
522
523         .bss
524
525 ; Data for building trees.
526
527 literalSymbolCodeLength:
528         .res    256
529 controlSymbolCodeLength:
530         .res    CONTROL_SYMBOLS
531
532 ; Huffman trees.
533
534 nBitCode_clearFrom:
535 nBitCode_literalCount:
536         .res    2*TREE_SIZE
537 nBitCode_controlCount:
538         .res    2*TREE_SIZE
539 nBitCode_literalOffset:
540         .res    2*TREE_SIZE
541 nBitCode_controlOffset:
542         .res    2*TREE_SIZE
543 allLiteralsCodeLength:
544         .res    1
545
546 codeToLiteralSymbol:
547         .res    256
548 codeToControlSymbol:
549         .res    CONTROL_SYMBOLS