]> git.sur5r.net Git - cc65/commitdiff
Apply faster popptr1 to functions and/or use register instead of stack to save accu.
authorIrgendwerA8 <c.krueger.b@web.de>
Mon, 21 May 2018 16:18:01 +0000 (18:18 +0200)
committerIrgendwerA8 <c.krueger.b@web.de>
Mon, 21 May 2018 16:18:01 +0000 (18:18 +0200)
libsrc/runtime/mul.s
libsrc/runtime/mul8.s
libsrc/runtime/mulax3.s
libsrc/runtime/mulax5.s
libsrc/runtime/mulax7.s
libsrc/runtime/mulax9.s

index 6344e3a32e89d937293549b5fa761c939a5aa936..a9b53293be6ec2d73260b85dcf866f5fdc036096 100644 (file)
@@ -6,8 +6,8 @@
 
         .export         tosumulax, tosmulax
         .import         mul8x16, mul8x16a       ; in mul8.s
-        .import         popsreg
-        .importzp       sreg, tmp1, ptr4
+        .import         popptr1
+        .importzp       tmp1, ptr1, ptr4
 
 
 ;---------------------------------------------------------------------------
@@ -19,12 +19,12 @@ tosumulax:
         txa                     ; High byte zero
         beq     @L3             ; Do 8x16 multiplication if high byte zero
         stx     ptr4+1          ; Save right operand
-        jsr     popsreg         ; Get left operand
+        jsr     popptr1         ; Get left operand (Y=0 by popptr1)
 
-; Do ptr4:ptr4+1 * sreg:sreg+1 --> AX
+; Do ptr4:ptr4+1 * ptr1:ptr1+1 --> AX
 
-        lda     #0
-        ldx     sreg+1          ; Get high byte into register for speed
+        tya                    ; A = 0        
+        ldx     ptr1+1          ; check if lhs is 8 bit only
         beq     @L4             ; -> we can do 8x16 after swap
         sta     tmp1
         ldy     #16             ; Number of bits
@@ -34,12 +34,12 @@ tosumulax:
 @L0:    bcc     @L1
 
         clc
-        adc     sreg
-        pha
-        txa                     ; hi byte of left op
+        adc     ptr1
+        tax
+        lda     ptr1+1          ; hi byte of left op         
         adc     tmp1
         sta     tmp1
-        pla
+        txa
 
 @L1:    ror     tmp1
         ror     a
@@ -59,9 +59,9 @@ tosumulax:
 ; If the high byte of rhs is zero, swap the operands and use the 8x16
 ; routine. On entry, A and X are zero
 
-@L4:    ldy     sreg            ; Save right operand (8 bit)
+@L4:    ldy     ptr1            ; Save right operand (8 bit)
         ldx     ptr4            ; Copy left 16 bit operand to right
-        stx     sreg
+        stx     ptr1
         ldx     ptr4+1          ; Don't store, this is done later
         sty     ptr4            ; Copy low 8 bit of right op to left
         ldy     #8
index 9d4dfcbf4519b267fefc15e068d99b5e7ff13b1b..395d64a4c9b6afcba0efeb8901ac0b7500e6e754 100644 (file)
@@ -6,8 +6,8 @@
 
         .export         tosumula0, tosmula0
         .export         mul8x16, mul8x16a
-        .import         popsreg
-        .importzp       sreg, ptr4
+        .import         popptr1
+        .importzp       ptr1, ptr4
 
 
 ;---------------------------------------------------------------------------
 tosmula0:
 tosumula0:
         sta     ptr4
-mul8x16:jsr     popsreg         ; Get left operand
+mul8x16:jsr     popptr1         ; Get left operand (Y=0 by popptr1)
 
-        lda     #0              ; Clear byte 1
+        tya                     ; Clear byte 1
         ldy     #8              ; Number of bits
-        ldx     sreg+1          ; Get into register for speed
+        ldx     ptr1+1          ; check if lhs is 8 bit only
         beq     mul8x8          ; Do 8x8 multiplication if high byte zero
 mul8x16a:
         sta     ptr4+1          ; Clear byte 2
@@ -29,12 +29,12 @@ mul8x16a:
 @L0:    bcc     @L1
 
         clc
-        adc     sreg
-        pha
-        txa                     ; hi byte of left op
+        adc     ptr1
+        tax
+        lda     ptr1+1          ; hi byte of left op
         adc     ptr4+1
         sta     ptr4+1
-        pla
+        txa
 
 @L1:    ror     ptr4+1
         ror     a
@@ -52,7 +52,7 @@ mul8x8:
         lsr     ptr4            ; Get first bit into carry
 @L0:    bcc     @L1
         clc
-        adc     sreg
+        adc     ptr1
 @L1:    ror
         ror     ptr4
         dey
index 472bc60ec5da17826a3669906aa930ba91fac259..82cc033c37694a6675788f7cde1b1c41549024c4 100644 (file)
         rol     ptr1+1
         clc
         adc     ptr1
-        pha
+        tay
         txa
         adc     ptr1+1
         tax
-        pla
+        tya
         rts
 
 .endproc
index 7e5ed11d9c0e1aa0029192c210cc8a197939c421..bf5eaefe8fdc455138cd63002a40fe89028fe406 100644 (file)
         rol     ptr1+1
         clc
         adc     ptr1
-        pha
+        tay
         txa
         adc     ptr1+1
         tax
-        pla
+        tya
         rts
 
 .endproc
index 90313180c198edb927b32713c84238dc9634e075..3414ebc9e87801b56616da69de48dca8b68b259f 100644 (file)
         rol     ptr1+1                  ; * 8
         sec
         sbc     ptr1
-        pha
+        tay
         txa
         eor     #$ff
         adc     ptr1+1                  ; * (8 - 1)
         tax
-        pla
+        tya
         rts
 
 .endproc
index d2dd89529fc9223135157242738d60ccc4328cde..d175d55aa5e7b6fa8647bd560cc818b317c17e11 100644 (file)
         rol     ptr1+1                  ; * 8
         clc
         adc     ptr1                    ; * (8+1)
-        pha
+        tay
         txa
         adc     ptr1+1
         tax
-        pla
+        tya
         rts
 
 .endproc