for 16x16=16, the unsigned routine can be used, regardless of signedness, this
is not true for 16x16=32.
git-svn-id: svn://svn.cc65.org/cc65/trunk@4438
b7a2c559-68d2-44c3-8de9-
860c34a00d81
ge.o \
gt.o \
icmp.o \
+ imul16x16r32.o \
incax1.o \
incax2.o \
incax3.o \
makebool.o \
mod.o \
mul.o \
- mul16x16r32.o \
mul8.o \
mul8x8r16.o \
mulax3.o \
ule.o \
ult.o \
umod.o \
+ umul16x16r32.o \
xor.o \
zeropage.o
--- /dev/null
+;
+; Ullrich von Bassewitz, 2010-11-03
+;
+; CC65 runtime: 16x16 => 32 signed multiplication
+;
+
+ .export imul16x16r32, _cc65_imul16x16r32
+ .import popax, negax, umul16x16r32m, negeax
+ .importzp ptr1, ptr3, tmp1
+
+
+;---------------------------------------------------------------------------
+; 16x16 => 32 signed multiplication routine.
+;
+; lhs rhs result result also in
+; -------------------------------------------------------------
+; ptr1 ax ax:sreg
+;
+; There is probably a faster way to do this.
+;
+
+
+_cc65_imul16x16r32:
+ sta ptr1
+ stx ptr1+1
+ jsr popax
+
+imul16x16r32:
+ stx tmp1
+ cpx #0
+ bpl @L1
+ jsr negax
+@L1: sta ptr3
+ stx ptr3+1
+
+ lda ptr1+1
+ eor tmp1
+ sta tmp1
+ lda ptr1
+ ldx ptr1+1
+ bpl @L2
+ jsr negax
+ sta ptr1
+ stx ptr1+1
+@L2: jsr umul16x16r32m
+ ldy tmp1
+ bpl @L3
+ jmp negeax
+@L3: rts
+
+++ /dev/null
-;
-; Ullrich von Bassewitz, 2010-11-03
-;
-; CC65 runtime: 16x16 => 32 multiplication
-;
-
- .export umul16x16r32, _cc65_umul16x16r32
- .export mul16x16r32 := umul16x16r32
- .import popax
- .importzp ptr1, ptr2, ptr3, ptr4, sreg
-
-
-;---------------------------------------------------------------------------
-; 16x16 => 32 multiplication routine.
-;
-; lhs rhs result result also in
-; -------------------------------------------------------------
-; ptr1 ax ax:sreg ptr1:sreg
-;
-
-_cc65_umul16x16r32:
- sta ptr1
- stx ptr1+1
- jsr popax
-
-umul16x16r32:
- sta ptr3
- stx ptr3+1
- lda #0
- sta sreg+1
- ldy #16 ; Number of bits
-
- lsr ptr1+1
- ror ptr1 ; Get first bit into carry
-@L0: bcc @L1
-
- clc
- adc ptr3
- pha
- txa ; hi byte of left op
- adc sreg+1
- sta sreg+1
- pla
-
-@L1: ror sreg+1
- ror a
- ror ptr1+1
- ror ptr1
- dey
- bne @L0
-
- sta sreg ; Save byte 3
- lda ptr1 ; Load the result
- ldx ptr1+1
- rts ; Done
-
--- /dev/null
+;
+; Ullrich von Bassewitz, 2010-11-03
+;
+; CC65 runtime: 16x16 => 32 unsigned multiplication
+;
+
+ .export _cc65_umul16x16r32, umul16x16r32, umul16x16r32m
+ .import popax
+ .importzp ptr1, ptr3, sreg
+
+
+;---------------------------------------------------------------------------
+; 16x16 => 32 unsigned multiplication routine.
+;
+; lhs rhs result result also in
+; -------------------------------------------------------------
+; ptr1 ax ax:sreg ptr1:sreg
+;
+
+_cc65_umul16x16r32:
+ sta ptr1
+ stx ptr1+1
+ jsr popax
+
+umul16x16r32:
+ sta ptr3
+ stx ptr3+1
+
+umul16x16r32m:
+ lda #0
+ sta sreg+1
+ ldy #16 ; Number of bits
+
+ lsr ptr1+1
+ ror ptr1 ; Get first bit into carry
+@L0: bcc @L1
+
+ clc
+ adc ptr3
+ pha
+ lda ptr3+1
+ adc sreg+1
+ sta sreg+1
+ pla
+
+@L1: ror sreg+1
+ ror a
+ ror ptr1+1
+ ror ptr1
+ dey
+ bne @L0
+
+ sta sreg ; Save byte 3
+ lda ptr1 ; Load the result
+ ldx ptr1+1
+ rts ; Done
+