From 18272c04271cf4df2c0334edc10baddfd257c2f7 Mon Sep 17 00:00:00 2001
From: uz <uz@b7a2c559-68d2-44c3-8de9-860c34a00d81>
Date: Wed, 4 Nov 2009 19:41:54 +0000
Subject: [PATCH] Working on the division and multiplication routines.

git-svn-id: svn://svn.cc65.org/cc65/trunk@4443 b7a2c559-68d2-44c3-8de9-860c34a00d81
---
 libsrc/common/Makefile             |  1 +
 libsrc/common/cc65_idiv32by16r16.s | 39 ++++++++++++++++++
 libsrc/common/cc65_imul16x16r32.s  |  3 +-
 libsrc/common/cc65_udiv32by16r16.s |  3 +-
 libsrc/common/cc65_umul16x16r32.s  |  3 +-
 libsrc/runtime/Makefile            |  1 +
 libsrc/runtime/idiv32by16r16.s     | 64 ++++++++++++++++++++++++++++++
 libsrc/runtime/imul16x16r32.s      |  3 +-
 libsrc/runtime/udiv32by16r16.s     |  3 +-
 libsrc/runtime/umul16x16r32.s      |  3 +-
 libsrc/tgi/tgi_clipline.s          | 56 +++++++++++++++++---------
 11 files changed, 155 insertions(+), 24 deletions(-)
 create mode 100644 libsrc/common/cc65_idiv32by16r16.s
 create mode 100644 libsrc/runtime/idiv32by16r16.s

diff --git a/libsrc/common/Makefile b/libsrc/common/Makefile
index 700401dd9..b92a02a14 100644
--- a/libsrc/common/Makefile
+++ b/libsrc/common/Makefile
@@ -99,6 +99,7 @@ S_OBJS = 	_cwd.o                  \
 		atexit.o  	        \
 		atoi.o	  	        \
 		calloc.o  	        \
+                cc65_idiv32by16r16.o    \
                 cc65_imul16x16r32.o     \
                 cc65_sincos.o           \
                 cc65_udiv32by16r16.o    \
diff --git a/libsrc/common/cc65_idiv32by16r16.s b/libsrc/common/cc65_idiv32by16r16.s
new file mode 100644
index 000000000..74394672b
--- /dev/null
+++ b/libsrc/common/cc65_idiv32by16r16.s
@@ -0,0 +1,39 @@
+;
+; Ullrich von Bassewitz, 2009-11-04
+;
+; CC65 library: 32by16 => 16 signed division
+;
+
+       	.export	       	_cc65_idiv32by16r16
+        .import         idiv32by16r16, incsp4
+
+        .include        "zeropage.inc"
+
+
+;---------------------------------------------------------------------------
+; 32by16 division.
+
+.proc   _cc65_idiv32by16r16
+
+        pha                     ; Save rhs
+
+; Copy from stack to zeropage. This assumes ptr1 and ptr2 are adjacent.
+
+        ldy     #3
+@L1:    lda     (sp),y
+        sta     ptr1,y
+        dey
+        bpl     @L1
+
+        lda     #4
+        clc
+        adc     sp
+        sta     sp
+        bcc     @L2
+        inc     sp+1
+
+@L2:    pla                     ; Old rhs
+        jmp     idiv32by16r16
+
+.endproc
+
diff --git a/libsrc/common/cc65_imul16x16r32.s b/libsrc/common/cc65_imul16x16r32.s
index 331e2189b..b4e82de10 100644
--- a/libsrc/common/cc65_imul16x16r32.s
+++ b/libsrc/common/cc65_imul16x16r32.s
@@ -6,7 +6,8 @@
 
         .export         _cc65_imul16x16r32
         .import         imul16x16r32, popax
-    	.importzp     	ptr1
+
+        .include        "zeropage.inc"
 
 
 ;---------------------------------------------------------------------------
diff --git a/libsrc/common/cc65_udiv32by16r16.s b/libsrc/common/cc65_udiv32by16r16.s
index 351eff855..23fcbbe17 100644
--- a/libsrc/common/cc65_udiv32by16r16.s
+++ b/libsrc/common/cc65_udiv32by16r16.s
@@ -6,7 +6,8 @@
 
        	.export	       	_cc65_udiv32by16r16
         .import         udiv32by16r16m, incsp4
-       	.importzp      	ptr1, ptr2, ptr3, sp
+
+        .include        "zeropage.inc"
 
 
 ;---------------------------------------------------------------------------
diff --git a/libsrc/common/cc65_umul16x16r32.s b/libsrc/common/cc65_umul16x16r32.s
index 84c71538f..0e7ed7602 100644
--- a/libsrc/common/cc65_umul16x16r32.s
+++ b/libsrc/common/cc65_umul16x16r32.s
@@ -6,7 +6,8 @@
 
         .export         _cc65_umul16x16r32
         .import         umul16x16r32, popax
-    	.importzp      	ptr1
+
+        .include        "zeropage.inc"
 
 
 ;---------------------------------------------------------------------------
diff --git a/libsrc/runtime/Makefile b/libsrc/runtime/Makefile
index c8a76beab..a7c959700 100644
--- a/libsrc/runtime/Makefile
+++ b/libsrc/runtime/Makefile
@@ -81,6 +81,7 @@ OBJS = 	add.o		\
        	ge.o		\
        	gt.o		\
        	icmp.o		\
+        idiv32by16r16.o \
         imul16x16r32.o  \
 	incax1.o	\
 	incax2.o	\
diff --git a/libsrc/runtime/idiv32by16r16.s b/libsrc/runtime/idiv32by16r16.s
new file mode 100644
index 000000000..a69c2a7a7
--- /dev/null
+++ b/libsrc/runtime/idiv32by16r16.s
@@ -0,0 +1,64 @@
+;
+; Ullrich von Bassewitz, 2009-11-04
+;
+; CC65 runtime: 32by16 => 16 signed division
+;
+
+        .export         idiv32by16r16
+       	.import	       	negax, udiv32by16r16m
+
+        .include        "zeropage.inc"
+
+
+;---------------------------------------------------------------------------
+; 32by16 division. Divide ptr1:ptr2 by ptr3. Result is in ptr1, remainder
+; in sreg.
+;
+;   lhs         rhs           result      result also in    remainder
+; -----------------------------------------------------------------------
+;   ptr1:ptr2   ptr3          ax          ptr1              sreg
+;
+
+
+idiv32by16r16:
+        stx     tmp1
+        cpx     #0
+        bpl     @L1
+        jsr     negax
+@L1:    sta     ptr3
+        stx     ptr3+1
+
+        lda     ptr2+1
+        eor     tmp1
+        sta     tmp1
+        bit     ptr2+1
+        bpl     @L3
+
+; Negate the value in ptr1:ptr2
+
+        ldx     #0
+        ldy     #4
+        sec
+@L2:    lda     ptr1,x
+        eor     #$FF
+        adc     #$00
+        sta     ptr1,x
+        inx
+        dey
+        bne     @L2
+
+; Call the unsigned division routine
+
+@L3:    jsr     udiv32by16r16m
+
+; Check the sign of the result
+
+        bit     tmp1
+        bmi     @L4
+        rts
+
+; Negate the result. We do this here only for the result, not for the
+; remainder!
+
+@L4:    jmp     negax
+
diff --git a/libsrc/runtime/imul16x16r32.s b/libsrc/runtime/imul16x16r32.s
index b9101f339..f41d13602 100644
--- a/libsrc/runtime/imul16x16r32.s
+++ b/libsrc/runtime/imul16x16r32.s
@@ -6,7 +6,8 @@
 
         .export         imul16x16r32
         .import         negax, umul16x16r32m, negeax
-    	.importzp     	ptr1, ptr3, tmp1
+
+        .include        "zeropage.inc"
 
 
 ;---------------------------------------------------------------------------
diff --git a/libsrc/runtime/udiv32by16r16.s b/libsrc/runtime/udiv32by16r16.s
index ac048bda0..3925ffe81 100644
--- a/libsrc/runtime/udiv32by16r16.s
+++ b/libsrc/runtime/udiv32by16r16.s
@@ -5,7 +5,8 @@
 ;
 
        	.export	       	udiv32by16r16, udiv32by16r16m
- 	.importzp   	ptr1, ptr2, ptr3, sreg
+
+        .include        "zeropage.inc"
 
 
 ;---------------------------------------------------------------------------
diff --git a/libsrc/runtime/umul16x16r32.s b/libsrc/runtime/umul16x16r32.s
index 75d6157fe..b51ed7343 100644
--- a/libsrc/runtime/umul16x16r32.s
+++ b/libsrc/runtime/umul16x16r32.s
@@ -5,7 +5,8 @@
 ;
 
         .export         umul16x16r32, umul16x16r32m
-    	.importzp      	ptr1, ptr3, sreg
+
+        .include        "zeropage.inc"
 
 
 ;---------------------------------------------------------------------------
diff --git a/libsrc/tgi/tgi_clipline.s b/libsrc/tgi/tgi_clipline.s
index 98bddcba2..96ef5b860 100644
--- a/libsrc/tgi/tgi_clipline.s
+++ b/libsrc/tgi/tgi_clipline.s
@@ -11,10 +11,12 @@
         .export _tgi_clip_dx, _tgi_clip_dy
         .export _tgi_xmax, _tgi_ymax
 
-        .import negax, pushax, tosmulax, tosdivax
+        .import negax, pushax
+        .import imul16x16r32, idiv32by16r16
         .import return0, return1
 
         .include "tgi-kernel.inc"
+        .include "zeropage.inc"
 
         .macpack longbranch
 
@@ -65,7 +67,7 @@ CLIP_TOP        = $08
         sbc     _tgi_yres+1
         bvs     L1
         eor     #$80
-L1:     bmi     L2
+L1:     bmi     L2             
 
         ldy     #CLIP_NONE              ; No clipping actually
 
@@ -201,17 +203,26 @@ L4:     tya
 
 .proc   muldiv_dydx
 
-        tax
-        tya                             ; Move value into a/x
-
-        jsr     pushax
+        sty     ptr1                    ; lhs
+        sta     ptr1+1
         lda     _tgi_clip_dy
-        ldx     _tgi_clip_dy+1
-        jsr     tosmulax
-        jsr     pushax
+        ldx     _tgi_clip_dy+1          ; rhs
+        jsr     imul16x16r32            ; Multiplicate
+
+; Move the result of the multiplication into ptr1:ptr2
+
+        sta     ptr1
+        stx     ptr1+1
+        ldy     sreg
+        sty     ptr2
+        ldy     sreg+1
+        sty     ptr2+1
+
+; Load divisor and divide
+
         lda     _tgi_clip_dx
         ldx     _tgi_clip_dx+1
-        jmp     tosdivax
+        jmp     idiv32by16r16
 
 .endproc
 
@@ -223,17 +234,26 @@ L4:     tya
 
 .proc   muldiv_dxdy
 
-        tax
-        tya                             ; Move value into a/x
-
-        jsr     pushax
+        sty     ptr1                    ; lhs
+        sta     ptr1+1
         lda     _tgi_clip_dx
-        ldx     _tgi_clip_dx+1
-        jsr     tosmulax
-        jsr     pushax
+        ldx     _tgi_clip_dx+1          ; rhs
+        jsr     imul16x16r32            ; Multiplicate
+
+; Move the result of the multiplication into ptr1:ptr2
+
+        sta     ptr1
+        stx     ptr1+1
+        ldy     sreg
+        sty     ptr2
+        ldy     sreg+1
+        sty     ptr2+1
+
+; Load divisor and divide
+
         lda     _tgi_clip_dy
         ldx     _tgi_clip_dy+1
-        jmp     tosdivax
+        jmp     idiv32by16r16
 
 .endproc
 
-- 
2.39.5