From 18272c04271cf4df2c0334edc10baddfd257c2f7 Mon Sep 17 00:00:00 2001 From: uz Date: Wed, 4 Nov 2009 19:41:54 +0000 Subject: [PATCH] Working on the division and multiplication routines. git-svn-id: svn://svn.cc65.org/cc65/trunk@4443 b7a2c559-68d2-44c3-8de9-860c34a00d81 --- libsrc/common/Makefile | 1 + libsrc/common/cc65_idiv32by16r16.s | 39 ++++++++++++++++++ libsrc/common/cc65_imul16x16r32.s | 3 +- libsrc/common/cc65_udiv32by16r16.s | 3 +- libsrc/common/cc65_umul16x16r32.s | 3 +- libsrc/runtime/Makefile | 1 + libsrc/runtime/idiv32by16r16.s | 64 ++++++++++++++++++++++++++++++ libsrc/runtime/imul16x16r32.s | 3 +- libsrc/runtime/udiv32by16r16.s | 3 +- libsrc/runtime/umul16x16r32.s | 3 +- libsrc/tgi/tgi_clipline.s | 56 +++++++++++++++++--------- 11 files changed, 155 insertions(+), 24 deletions(-) create mode 100644 libsrc/common/cc65_idiv32by16r16.s create mode 100644 libsrc/runtime/idiv32by16r16.s diff --git a/libsrc/common/Makefile b/libsrc/common/Makefile index 700401dd9..b92a02a14 100644 --- a/libsrc/common/Makefile +++ b/libsrc/common/Makefile @@ -99,6 +99,7 @@ S_OBJS = _cwd.o \ atexit.o \ atoi.o \ calloc.o \ + cc65_idiv32by16r16.o \ cc65_imul16x16r32.o \ cc65_sincos.o \ cc65_udiv32by16r16.o \ diff --git a/libsrc/common/cc65_idiv32by16r16.s b/libsrc/common/cc65_idiv32by16r16.s new file mode 100644 index 000000000..74394672b --- /dev/null +++ b/libsrc/common/cc65_idiv32by16r16.s @@ -0,0 +1,39 @@ +; +; Ullrich von Bassewitz, 2009-11-04 +; +; CC65 library: 32by16 => 16 signed division +; + + .export _cc65_idiv32by16r16 + .import idiv32by16r16, incsp4 + + .include "zeropage.inc" + + +;--------------------------------------------------------------------------- +; 32by16 division. + +.proc _cc65_idiv32by16r16 + + pha ; Save rhs + +; Copy from stack to zeropage. This assumes ptr1 and ptr2 are adjacent. + + ldy #3 +@L1: lda (sp),y + sta ptr1,y + dey + bpl @L1 + + lda #4 + clc + adc sp + sta sp + bcc @L2 + inc sp+1 + +@L2: pla ; Old rhs + jmp idiv32by16r16 + +.endproc + diff --git a/libsrc/common/cc65_imul16x16r32.s b/libsrc/common/cc65_imul16x16r32.s index 331e2189b..b4e82de10 100644 --- a/libsrc/common/cc65_imul16x16r32.s +++ b/libsrc/common/cc65_imul16x16r32.s @@ -6,7 +6,8 @@ .export _cc65_imul16x16r32 .import imul16x16r32, popax - .importzp ptr1 + + .include "zeropage.inc" ;--------------------------------------------------------------------------- diff --git a/libsrc/common/cc65_udiv32by16r16.s b/libsrc/common/cc65_udiv32by16r16.s index 351eff855..23fcbbe17 100644 --- a/libsrc/common/cc65_udiv32by16r16.s +++ b/libsrc/common/cc65_udiv32by16r16.s @@ -6,7 +6,8 @@ .export _cc65_udiv32by16r16 .import udiv32by16r16m, incsp4 - .importzp ptr1, ptr2, ptr3, sp + + .include "zeropage.inc" ;--------------------------------------------------------------------------- diff --git a/libsrc/common/cc65_umul16x16r32.s b/libsrc/common/cc65_umul16x16r32.s index 84c71538f..0e7ed7602 100644 --- a/libsrc/common/cc65_umul16x16r32.s +++ b/libsrc/common/cc65_umul16x16r32.s @@ -6,7 +6,8 @@ .export _cc65_umul16x16r32 .import umul16x16r32, popax - .importzp ptr1 + + .include "zeropage.inc" ;--------------------------------------------------------------------------- diff --git a/libsrc/runtime/Makefile b/libsrc/runtime/Makefile index c8a76beab..a7c959700 100644 --- a/libsrc/runtime/Makefile +++ b/libsrc/runtime/Makefile @@ -81,6 +81,7 @@ OBJS = add.o \ ge.o \ gt.o \ icmp.o \ + idiv32by16r16.o \ imul16x16r32.o \ incax1.o \ incax2.o \ diff --git a/libsrc/runtime/idiv32by16r16.s b/libsrc/runtime/idiv32by16r16.s new file mode 100644 index 000000000..a69c2a7a7 --- /dev/null +++ b/libsrc/runtime/idiv32by16r16.s @@ -0,0 +1,64 @@ +; +; Ullrich von Bassewitz, 2009-11-04 +; +; CC65 runtime: 32by16 => 16 signed division +; + + .export idiv32by16r16 + .import negax, udiv32by16r16m + + .include "zeropage.inc" + + +;--------------------------------------------------------------------------- +; 32by16 division. Divide ptr1:ptr2 by ptr3. Result is in ptr1, remainder +; in sreg. +; +; lhs rhs result result also in remainder +; ----------------------------------------------------------------------- +; ptr1:ptr2 ptr3 ax ptr1 sreg +; + + +idiv32by16r16: + stx tmp1 + cpx #0 + bpl @L1 + jsr negax +@L1: sta ptr3 + stx ptr3+1 + + lda ptr2+1 + eor tmp1 + sta tmp1 + bit ptr2+1 + bpl @L3 + +; Negate the value in ptr1:ptr2 + + ldx #0 + ldy #4 + sec +@L2: lda ptr1,x + eor #$FF + adc #$00 + sta ptr1,x + inx + dey + bne @L2 + +; Call the unsigned division routine + +@L3: jsr udiv32by16r16m + +; Check the sign of the result + + bit tmp1 + bmi @L4 + rts + +; Negate the result. We do this here only for the result, not for the +; remainder! + +@L4: jmp negax + diff --git a/libsrc/runtime/imul16x16r32.s b/libsrc/runtime/imul16x16r32.s index b9101f339..f41d13602 100644 --- a/libsrc/runtime/imul16x16r32.s +++ b/libsrc/runtime/imul16x16r32.s @@ -6,7 +6,8 @@ .export imul16x16r32 .import negax, umul16x16r32m, negeax - .importzp ptr1, ptr3, tmp1 + + .include "zeropage.inc" ;--------------------------------------------------------------------------- diff --git a/libsrc/runtime/udiv32by16r16.s b/libsrc/runtime/udiv32by16r16.s index ac048bda0..3925ffe81 100644 --- a/libsrc/runtime/udiv32by16r16.s +++ b/libsrc/runtime/udiv32by16r16.s @@ -5,7 +5,8 @@ ; .export udiv32by16r16, udiv32by16r16m - .importzp ptr1, ptr2, ptr3, sreg + + .include "zeropage.inc" ;--------------------------------------------------------------------------- diff --git a/libsrc/runtime/umul16x16r32.s b/libsrc/runtime/umul16x16r32.s index 75d6157fe..b51ed7343 100644 --- a/libsrc/runtime/umul16x16r32.s +++ b/libsrc/runtime/umul16x16r32.s @@ -5,7 +5,8 @@ ; .export umul16x16r32, umul16x16r32m - .importzp ptr1, ptr3, sreg + + .include "zeropage.inc" ;--------------------------------------------------------------------------- diff --git a/libsrc/tgi/tgi_clipline.s b/libsrc/tgi/tgi_clipline.s index 98bddcba2..96ef5b860 100644 --- a/libsrc/tgi/tgi_clipline.s +++ b/libsrc/tgi/tgi_clipline.s @@ -11,10 +11,12 @@ .export _tgi_clip_dx, _tgi_clip_dy .export _tgi_xmax, _tgi_ymax - .import negax, pushax, tosmulax, tosdivax + .import negax, pushax + .import imul16x16r32, idiv32by16r16 .import return0, return1 .include "tgi-kernel.inc" + .include "zeropage.inc" .macpack longbranch @@ -65,7 +67,7 @@ CLIP_TOP = $08 sbc _tgi_yres+1 bvs L1 eor #$80 -L1: bmi L2 +L1: bmi L2 ldy #CLIP_NONE ; No clipping actually @@ -201,17 +203,26 @@ L4: tya .proc muldiv_dydx - tax - tya ; Move value into a/x - - jsr pushax + sty ptr1 ; lhs + sta ptr1+1 lda _tgi_clip_dy - ldx _tgi_clip_dy+1 - jsr tosmulax - jsr pushax + ldx _tgi_clip_dy+1 ; rhs + jsr imul16x16r32 ; Multiplicate + +; Move the result of the multiplication into ptr1:ptr2 + + sta ptr1 + stx ptr1+1 + ldy sreg + sty ptr2 + ldy sreg+1 + sty ptr2+1 + +; Load divisor and divide + lda _tgi_clip_dx ldx _tgi_clip_dx+1 - jmp tosdivax + jmp idiv32by16r16 .endproc @@ -223,17 +234,26 @@ L4: tya .proc muldiv_dxdy - tax - tya ; Move value into a/x - - jsr pushax + sty ptr1 ; lhs + sta ptr1+1 lda _tgi_clip_dx - ldx _tgi_clip_dx+1 - jsr tosmulax - jsr pushax + ldx _tgi_clip_dx+1 ; rhs + jsr imul16x16r32 ; Multiplicate + +; Move the result of the multiplication into ptr1:ptr2 + + sta ptr1 + stx ptr1+1 + ldy sreg + sty ptr2 + ldy sreg+1 + sty ptr2+1 + +; Load divisor and divide + lda _tgi_clip_dy ldx _tgi_clip_dy+1 - jmp tosdivax + jmp idiv32by16r16 .endproc -- 2.39.5