From: uz Date: Wed, 4 Nov 2009 13:45:27 +0000 (+0000) Subject: The upper 16 bit differ between signed and unsigned multiplication, so while X-Git-Tag: V2.13.1~91 X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=7c804513fad888f5ab6b2a36564769ddc95c276b;p=cc65 The upper 16 bit differ between signed and unsigned multiplication, so while for 16x16=16, the unsigned routine can be used, regardless of signedness, this is not true for 16x16=32. git-svn-id: svn://svn.cc65.org/cc65/trunk@4438 b7a2c559-68d2-44c3-8de9-860c34a00d81 --- diff --git a/libsrc/runtime/Makefile b/libsrc/runtime/Makefile index b92863cab..adb35a8f9 100644 --- a/libsrc/runtime/Makefile +++ b/libsrc/runtime/Makefile @@ -81,6 +81,7 @@ OBJS = add.o \ ge.o \ gt.o \ icmp.o \ + imul16x16r32.o \ incax1.o \ incax2.o \ incax3.o \ @@ -152,7 +153,6 @@ OBJS = add.o \ makebool.o \ mod.o \ mul.o \ - mul16x16r32.o \ mul8.o \ mul8x8r16.o \ mulax3.o \ @@ -219,6 +219,7 @@ OBJS = add.o \ ule.o \ ult.o \ umod.o \ + umul16x16r32.o \ xor.o \ zeropage.o diff --git a/libsrc/runtime/imul16x16r32.s b/libsrc/runtime/imul16x16r32.s new file mode 100644 index 000000000..d105b72a3 --- /dev/null +++ b/libsrc/runtime/imul16x16r32.s @@ -0,0 +1,50 @@ +; +; Ullrich von Bassewitz, 2010-11-03 +; +; CC65 runtime: 16x16 => 32 signed multiplication +; + + .export imul16x16r32, _cc65_imul16x16r32 + .import popax, negax, umul16x16r32m, negeax + .importzp ptr1, ptr3, tmp1 + + +;--------------------------------------------------------------------------- +; 16x16 => 32 signed multiplication routine. +; +; lhs rhs result result also in +; ------------------------------------------------------------- +; ptr1 ax ax:sreg +; +; There is probably a faster way to do this. +; + + +_cc65_imul16x16r32: + sta ptr1 + stx ptr1+1 + jsr popax + +imul16x16r32: + stx tmp1 + cpx #0 + bpl @L1 + jsr negax +@L1: sta ptr3 + stx ptr3+1 + + lda ptr1+1 + eor tmp1 + sta tmp1 + lda ptr1 + ldx ptr1+1 + bpl @L2 + jsr negax + sta ptr1 + stx ptr1+1 +@L2: jsr umul16x16r32m + ldy tmp1 + bpl @L3 + jmp negeax +@L3: rts + diff --git a/libsrc/runtime/mul16x16r32.s b/libsrc/runtime/mul16x16r32.s deleted file mode 100644 index 72d9fe35f..000000000 --- a/libsrc/runtime/mul16x16r32.s +++ /dev/null @@ -1,56 +0,0 @@ -; -; Ullrich von Bassewitz, 2010-11-03 -; -; CC65 runtime: 16x16 => 32 multiplication -; - - .export umul16x16r32, _cc65_umul16x16r32 - .export mul16x16r32 := umul16x16r32 - .import popax - .importzp ptr1, ptr2, ptr3, ptr4, sreg - - -;--------------------------------------------------------------------------- -; 16x16 => 32 multiplication routine. -; -; lhs rhs result result also in -; ------------------------------------------------------------- -; ptr1 ax ax:sreg ptr1:sreg -; - -_cc65_umul16x16r32: - sta ptr1 - stx ptr1+1 - jsr popax - -umul16x16r32: - sta ptr3 - stx ptr3+1 - lda #0 - sta sreg+1 - ldy #16 ; Number of bits - - lsr ptr1+1 - ror ptr1 ; Get first bit into carry -@L0: bcc @L1 - - clc - adc ptr3 - pha - txa ; hi byte of left op - adc sreg+1 - sta sreg+1 - pla - -@L1: ror sreg+1 - ror a - ror ptr1+1 - ror ptr1 - dey - bne @L0 - - sta sreg ; Save byte 3 - lda ptr1 ; Load the result - ldx ptr1+1 - rts ; Done - diff --git a/libsrc/runtime/umul16x16r32.s b/libsrc/runtime/umul16x16r32.s new file mode 100644 index 000000000..d86162c7a --- /dev/null +++ b/libsrc/runtime/umul16x16r32.s @@ -0,0 +1,57 @@ +; +; Ullrich von Bassewitz, 2010-11-03 +; +; CC65 runtime: 16x16 => 32 unsigned multiplication +; + + .export _cc65_umul16x16r32, umul16x16r32, umul16x16r32m + .import popax + .importzp ptr1, ptr3, sreg + + +;--------------------------------------------------------------------------- +; 16x16 => 32 unsigned multiplication routine. +; +; lhs rhs result result also in +; ------------------------------------------------------------- +; ptr1 ax ax:sreg ptr1:sreg +; + +_cc65_umul16x16r32: + sta ptr1 + stx ptr1+1 + jsr popax + +umul16x16r32: + sta ptr3 + stx ptr3+1 + +umul16x16r32m: + lda #0 + sta sreg+1 + ldy #16 ; Number of bits + + lsr ptr1+1 + ror ptr1 ; Get first bit into carry +@L0: bcc @L1 + + clc + adc ptr3 + pha + lda ptr3+1 + adc sreg+1 + sta sreg+1 + pla + +@L1: ror sreg+1 + ror a + ror ptr1+1 + ror ptr1 + dey + bne @L0 + + sta sreg ; Save byte 3 + lda ptr1 ; Load the result + ldx ptr1+1 + rts ; Done +