]> git.sur5r.net Git - cc65/commitdiff
Added new 8x16 unsigned multiplication routine.
authoruz <uz@b7a2c559-68d2-44c3-8de9-860c34a00d81>
Sun, 10 Jul 2011 14:51:03 +0000 (14:51 +0000)
committeruz <uz@b7a2c559-68d2-44c3-8de9-860c34a00d81>
Sun, 10 Jul 2011 14:51:03 +0000 (14:51 +0000)
git-svn-id: svn://svn.cc65.org/cc65/trunk@5078 b7a2c559-68d2-44c3-8de9-860c34a00d81

libsrc/runtime/Makefile
libsrc/runtime/umul8x16r24.s [new file with mode: 0644]

index 23daed8fd83e1ae15f19b83383eb581ad01a6469..f661f39a11d7d842623c6ece2686e6ea87806c71 100644 (file)
@@ -221,6 +221,7 @@ OBJS =      add.o           \
         udiv32by16r16.o \
                umod.o          \
         umul16x16r32.o  \
+        umul8x16r24.o   \
         umul8x8r16.o    \
                xor.o           \
         zeropage.o
diff --git a/libsrc/runtime/umul8x16r24.s b/libsrc/runtime/umul8x16r24.s
new file mode 100644 (file)
index 0000000..9a982d5
--- /dev/null
@@ -0,0 +1,63 @@
+;
+; Ullrich von Bassewitz, 2011-07-10
+;
+; CC65 runtime: 8x16 => 24 unsigned multiplication
+;
+
+        .export         umul8x16r24, umul8x16r24m
+        .export         umul8x16r16, umul8x16r16m
+
+        .include        "zeropage.inc"
+
+
+;---------------------------------------------------------------------------
+; 8x16 => 24 unsigned multiplication routine. Because the overhead for a
+; 8x16 => 16 unsigned multiplication routine is small, we will tag it with
+; the matching labels as well.
+;
+;  routine         lhs         rhs        result          result also in
+; -----------------------------------------------------------------------
+;  umul8x16r24     ptr1-lo     ax         ax:sreg-lo      ptr1:sreg-lo
+;  umul8x16r24m    ptr1-lo     ptr3       ax:sreg-lo      ptr1:sreg-lo
+;
+; ptr3 ist left intact by the routine.
+;
+
+umul8x16r24:
+umul8x16r16:
+        sta     ptr3
+        stx     ptr3+1
+
+umul8x16r24m:
+umul8x16r16m:
+               ldx     #0
+        stx     ptr1+1
+               stx     sreg
+
+               ldy     #8              ; Number of bits
+        ldx     ptr3            ; Get into register for speed
+        lda     ptr1
+        ror     a               ; Get next bit into carry
+@L0:    bcc     @L1
+
+        clc
+        pha
+        txa
+        adc     ptr1+1
+        sta     ptr1+1
+        lda     ptr3+1
+        adc     sreg
+        sta     sreg
+        pla
+
+@L1:    ror     sreg
+       ror     ptr1+1
+       ror     a
+        dey
+        bne     @L0
+
+        sta     ptr1            ; Save low byte of result
+        ldx     ptr1+1          ; Load high byte of result
+       rts                     ; Done
+
+