From 7c804513fad888f5ab6b2a36564769ddc95c276b Mon Sep 17 00:00:00 2001 From: uz Date: Wed, 4 Nov 2009 13:45:27 +0000 Subject: [PATCH] The upper 16 bit differ between signed and unsigned multiplication, so while for 16x16=16, the unsigned routine can be used, regardless of signedness, this is not true for 16x16=32. git-svn-id: svn://svn.cc65.org/cc65/trunk@4438 b7a2c559-68d2-44c3-8de9-860c34a00d81 --- libsrc/runtime/Makefile | 3 +- libsrc/runtime/imul16x16r32.s | 50 +++++++++++++++++++ .../runtime/{mul16x16r32.s => umul16x16r32.s} | 13 ++--- 3 files changed, 59 insertions(+), 7 deletions(-) create mode 100644 libsrc/runtime/imul16x16r32.s rename libsrc/runtime/{mul16x16r32.s => umul16x16r32.s} (77%) diff --git a/libsrc/runtime/Makefile b/libsrc/runtime/Makefile index b92863cab..adb35a8f9 100644 --- a/libsrc/runtime/Makefile +++ b/libsrc/runtime/Makefile @@ -81,6 +81,7 @@ OBJS = add.o \ ge.o \ gt.o \ icmp.o \ + imul16x16r32.o \ incax1.o \ incax2.o \ incax3.o \ @@ -152,7 +153,6 @@ OBJS = add.o \ makebool.o \ mod.o \ mul.o \ - mul16x16r32.o \ mul8.o \ mul8x8r16.o \ mulax3.o \ @@ -219,6 +219,7 @@ OBJS = add.o \ ule.o \ ult.o \ umod.o \ + umul16x16r32.o \ xor.o \ zeropage.o diff --git a/libsrc/runtime/imul16x16r32.s b/libsrc/runtime/imul16x16r32.s new file mode 100644 index 000000000..d105b72a3 --- /dev/null +++ b/libsrc/runtime/imul16x16r32.s @@ -0,0 +1,50 @@ +; +; Ullrich von Bassewitz, 2010-11-03 +; +; CC65 runtime: 16x16 => 32 signed multiplication +; + + .export imul16x16r32, _cc65_imul16x16r32 + .import popax, negax, umul16x16r32m, negeax + .importzp ptr1, ptr3, tmp1 + + +;--------------------------------------------------------------------------- +; 16x16 => 32 signed multiplication routine. +; +; lhs rhs result result also in +; ------------------------------------------------------------- +; ptr1 ax ax:sreg +; +; There is probably a faster way to do this. +; + + +_cc65_imul16x16r32: + sta ptr1 + stx ptr1+1 + jsr popax + +imul16x16r32: + stx tmp1 + cpx #0 + bpl @L1 + jsr negax +@L1: sta ptr3 + stx ptr3+1 + + lda ptr1+1 + eor tmp1 + sta tmp1 + lda ptr1 + ldx ptr1+1 + bpl @L2 + jsr negax + sta ptr1 + stx ptr1+1 +@L2: jsr umul16x16r32m + ldy tmp1 + bpl @L3 + jmp negeax +@L3: rts + diff --git a/libsrc/runtime/mul16x16r32.s b/libsrc/runtime/umul16x16r32.s similarity index 77% rename from libsrc/runtime/mul16x16r32.s rename to libsrc/runtime/umul16x16r32.s index 72d9fe35f..d86162c7a 100644 --- a/libsrc/runtime/mul16x16r32.s +++ b/libsrc/runtime/umul16x16r32.s @@ -1,17 +1,16 @@ ; ; Ullrich von Bassewitz, 2010-11-03 ; -; CC65 runtime: 16x16 => 32 multiplication +; CC65 runtime: 16x16 => 32 unsigned multiplication ; - .export umul16x16r32, _cc65_umul16x16r32 - .export mul16x16r32 := umul16x16r32 + .export _cc65_umul16x16r32, umul16x16r32, umul16x16r32m .import popax - .importzp ptr1, ptr2, ptr3, ptr4, sreg + .importzp ptr1, ptr3, sreg ;--------------------------------------------------------------------------- -; 16x16 => 32 multiplication routine. +; 16x16 => 32 unsigned multiplication routine. ; ; lhs rhs result result also in ; ------------------------------------------------------------- @@ -26,6 +25,8 @@ _cc65_umul16x16r32: umul16x16r32: sta ptr3 stx ptr3+1 + +umul16x16r32m: lda #0 sta sreg+1 ldy #16 ; Number of bits @@ -37,7 +38,7 @@ umul16x16r32: clc adc ptr3 pha - txa ; hi byte of left op + lda ptr3+1 adc sreg+1 sta sreg+1 pla -- 2.39.5