atexit.o \
atoi.o \
calloc.o \
+ cc65_idiv32by16r16.o \
cc65_imul16x16r32.o \
cc65_sincos.o \
cc65_udiv32by16r16.o \
--- /dev/null
+;
+; Ullrich von Bassewitz, 2009-11-04
+;
+; CC65 library: 32by16 => 16 signed division
+;
+
+ .export _cc65_idiv32by16r16
+ .import idiv32by16r16, incsp4
+
+ .include "zeropage.inc"
+
+
+;---------------------------------------------------------------------------
+; 32by16 division.
+
+.proc _cc65_idiv32by16r16
+
+ pha ; Save rhs
+
+; Copy from stack to zeropage. This assumes ptr1 and ptr2 are adjacent.
+
+ ldy #3
+@L1: lda (sp),y
+ sta ptr1,y
+ dey
+ bpl @L1
+
+ lda #4
+ clc
+ adc sp
+ sta sp
+ bcc @L2
+ inc sp+1
+
+@L2: pla ; Old rhs
+ jmp idiv32by16r16
+
+.endproc
+
.export _cc65_imul16x16r32
.import imul16x16r32, popax
- .importzp ptr1
+
+ .include "zeropage.inc"
;---------------------------------------------------------------------------
.export _cc65_udiv32by16r16
.import udiv32by16r16m, incsp4
- .importzp ptr1, ptr2, ptr3, sp
+
+ .include "zeropage.inc"
;---------------------------------------------------------------------------
.export _cc65_umul16x16r32
.import umul16x16r32, popax
- .importzp ptr1
+
+ .include "zeropage.inc"
;---------------------------------------------------------------------------
ge.o \
gt.o \
icmp.o \
+ idiv32by16r16.o \
imul16x16r32.o \
incax1.o \
incax2.o \
--- /dev/null
+;
+; Ullrich von Bassewitz, 2009-11-04
+;
+; CC65 runtime: 32by16 => 16 signed division
+;
+
+ .export idiv32by16r16
+ .import negax, udiv32by16r16m
+
+ .include "zeropage.inc"
+
+
+;---------------------------------------------------------------------------
+; 32by16 division. Divide ptr1:ptr2 by ptr3. Result is in ptr1, remainder
+; in sreg.
+;
+; lhs rhs result result also in remainder
+; -----------------------------------------------------------------------
+; ptr1:ptr2 ptr3 ax ptr1 sreg
+;
+
+
+idiv32by16r16:
+ stx tmp1
+ cpx #0
+ bpl @L1
+ jsr negax
+@L1: sta ptr3
+ stx ptr3+1
+
+ lda ptr2+1
+ eor tmp1
+ sta tmp1
+ bit ptr2+1
+ bpl @L3
+
+; Negate the value in ptr1:ptr2
+
+ ldx #0
+ ldy #4
+ sec
+@L2: lda ptr1,x
+ eor #$FF
+ adc #$00
+ sta ptr1,x
+ inx
+ dey
+ bne @L2
+
+; Call the unsigned division routine
+
+@L3: jsr udiv32by16r16m
+
+; Check the sign of the result
+
+ bit tmp1
+ bmi @L4
+ rts
+
+; Negate the result. We do this here only for the result, not for the
+; remainder!
+
+@L4: jmp negax
+
.export imul16x16r32
.import negax, umul16x16r32m, negeax
- .importzp ptr1, ptr3, tmp1
+
+ .include "zeropage.inc"
;---------------------------------------------------------------------------
;
.export udiv32by16r16, udiv32by16r16m
- .importzp ptr1, ptr2, ptr3, sreg
+
+ .include "zeropage.inc"
;---------------------------------------------------------------------------
;
.export umul16x16r32, umul16x16r32m
- .importzp ptr1, ptr3, sreg
+
+ .include "zeropage.inc"
;---------------------------------------------------------------------------
.export _tgi_clip_dx, _tgi_clip_dy
.export _tgi_xmax, _tgi_ymax
- .import negax, pushax, tosmulax, tosdivax
+ .import negax, pushax
+ .import imul16x16r32, idiv32by16r16
.import return0, return1
.include "tgi-kernel.inc"
+ .include "zeropage.inc"
.macpack longbranch
sbc _tgi_yres+1
bvs L1
eor #$80
-L1: bmi L2
+L1: bmi L2
ldy #CLIP_NONE ; No clipping actually
.proc muldiv_dydx
- tax
- tya ; Move value into a/x
-
- jsr pushax
+ sty ptr1 ; lhs
+ sta ptr1+1
lda _tgi_clip_dy
- ldx _tgi_clip_dy+1
- jsr tosmulax
- jsr pushax
+ ldx _tgi_clip_dy+1 ; rhs
+ jsr imul16x16r32 ; Multiplicate
+
+; Move the result of the multiplication into ptr1:ptr2
+
+ sta ptr1
+ stx ptr1+1
+ ldy sreg
+ sty ptr2
+ ldy sreg+1
+ sty ptr2+1
+
+; Load divisor and divide
+
lda _tgi_clip_dx
ldx _tgi_clip_dx+1
- jmp tosdivax
+ jmp idiv32by16r16
.endproc
.proc muldiv_dxdy
- tax
- tya ; Move value into a/x
-
- jsr pushax
+ sty ptr1 ; lhs
+ sta ptr1+1
lda _tgi_clip_dx
- ldx _tgi_clip_dx+1
- jsr tosmulax
- jsr pushax
+ ldx _tgi_clip_dx+1 ; rhs
+ jsr imul16x16r32 ; Multiplicate
+
+; Move the result of the multiplication into ptr1:ptr2
+
+ sta ptr1
+ stx ptr1+1
+ ldy sreg
+ sty ptr2
+ ldy sreg+1
+ sty ptr2+1
+
+; Load divisor and divide
+
lda _tgi_clip_dy
ldx _tgi_clip_dy+1
- jmp tosdivax
+ jmp idiv32by16r16
.endproc