]> git.sur5r.net Git - cc65/blobdiff - libsrc/runtime/umul16x16r32.s
Minor math optimizations
[cc65] / libsrc / runtime / umul16x16r32.s
index b51ed7343bb2dc9b0d0f4ae7d89a8f75de6405a8..cd2dae351e73bd169d59f4b902594dea86bf57cb 100644 (file)
@@ -5,48 +5,59 @@
 ;
 
         .export         umul16x16r32, umul16x16r32m
+        .export         umul16x16r16, umul16x16r16m
 
         .include        "zeropage.inc"
 
 
 ;---------------------------------------------------------------------------
-; 16x16 => 32 unsigned multiplication routine.
+; 16x16 => 32 unsigned multiplication routine. Because the overhead for a
+; 16x16 => 16 unsigned multiplication routine is small, we will tag it with 
+; the matching labels, as well.
 ;
-;   lhs         rhs           result          result also in
-; -------------------------------------------------------------
-;   ptr1        ax            ax:sreg          ptr1:sreg
+;  routine         LHS         RHS        result          result also in
+; -----------------------------------------------------------------------
+;  umul16x16r32    ax          ptr1       ax:sreg          ptr1:sreg
+;  umul16x16r32m   ptr3        ptr1       ax:sreg          ptr1:sreg
+;  umul16x16r16    ax          ptr1       ax               ptr1
+;  umul16x16r16m   ptr3        ptr1       ax               ptr1
+;
+; ptr3 is left intact by the routine.
 ;
 
 umul16x16r32:
+umul16x16r16:
         sta     ptr3
         stx     ptr3+1
 
 umul16x16r32m:
-               lda     #0
-               sta     sreg+1
-               ldy     #16             ; Number of bits
+umul16x16r16m:
+        lda     #0
+        sta     sreg+1
+        ldy     #16             ; Number of bits
 
         lsr     ptr1+1
         ror     ptr1            ; Get first bit into carry
 @L0:    bcc     @L1
 
-       clc
-       adc     ptr3
-       pha
-               lda     ptr3+1
-       adc     sreg+1
-       sta     sreg+1
-       pla
+        clc
+        adc     ptr3
+        tax
+        lda     ptr3+1
+        adc     sreg+1
+        sta     sreg+1
+        txa
 
 @L1:    ror     sreg+1
-       ror     a
-       ror     ptr1+1
-       ror     ptr1
+        ror     a
+        ror     ptr1+1
+        ror     ptr1
         dey
         bne     @L0
 
         sta     sreg            ; Save byte 3
-       lda     ptr1            ; Load the result
-       ldx     ptr1+1
-       rts                     ; Done
+        lda     ptr1            ; Load the result
+        ldx     ptr1+1
+        rts                     ; Done
+