;
-; Ullrich von Bassewitz, 07.08.1998
+; Ullrich von Bassewitz, 2009-08-17
;
; CC65 runtime: multiplication for ints
;
- .export tosumula0, tosumulax, tosmula0, tosmulax
- .import popsreg
- .importzp sreg, tmp1, ptr4
+ .export tosumulax, tosmulax
+ .import mul8x16, mul8x16a ; in mul8.s
+ .import popsreg
+ .importzp sreg, tmp1, ptr4
+
+
+;---------------------------------------------------------------------------
+; 16x16 multiplication routine
-tosmula0:
-tosumula0:
- ldx #0
tosmulax:
tosumulax:
-mul16: sta ptr4
- stx ptr4+1 ; Save right operand
- jsr popsreg ; Get left operand
-
-; Do ptr4*sreg --> AX (see mult-div.s from "The Fridge").
-
- lda #0
- sta tmp1
- ldx sreg+1 ; Get into register for speed
- ldy #16 ; Number of bits
-L0: lsr tmp1
- ror a
- ror ptr4+1
- ror ptr4
- bcc L1
- clc
- adc sreg
- pha
+ sta ptr4
+ txa ; High byte zero
+ beq @L3 ; Do 8x16 multiplication if high byte zero
+ stx ptr4+1 ; Save right operand
+ jsr popsreg ; Get left operand
+
+; Do ptr4:ptr4+1 * sreg:sreg+1 --> AX
+
+ lda #0
+ ldx sreg+1 ; Get high byte into register for speed
+ beq @L4 ; -> we can do 8x16 after swap
+ sta tmp1
+ ldy #16 ; Number of bits
+
+ lsr ptr4+1
+ ror ptr4 ; Get first bit into carry
+@L0: bcc @L1
+
+ clc
+ adc sreg
+ pha
txa ; hi byte of left op
- adc tmp1
- sta tmp1
- pla
-L1: dey
- bpl L0
- lda ptr4 ; Load the result
- ldx ptr4+1
- rts ; Done
+ adc tmp1
+ sta tmp1
+ pla
+
+@L1: ror tmp1
+ ror a
+ ror ptr4+1
+ ror ptr4
+ dey
+ bne @L0
+
+ lda ptr4 ; Load the result
+ ldx ptr4+1
+ rts ; Done
+
+; High byte of rhs is zero, jump to the 8x16 routine instead
+
+@L3: jmp mul8x16
+
+; If the high byte of rhs is zero, swap the operands and use the 8x16
+; routine. On entry, A and X are zero
+
+@L4: ldy sreg ; Save right operand (8 bit)
+ ldx ptr4 ; Copy left 16 bit operand to right
+ stx sreg
+ ldx ptr4+1 ; Don't store, this is done later
+ sty ptr4 ; Copy low 8 bit of right op to left
+ ldy #8
+ jmp mul8x16a
--- /dev/null
+;
+; Ullrich von Bassewitz, 2009-08-17
+;
+; CC65 runtime: multiplication for ints. Short versions.
+;
+
+ .export tosumula0, tosmula0
+ .export mul8x16, mul8x16a
+ .import popsreg
+ .importzp sreg, ptr4
+
+
+;---------------------------------------------------------------------------
+; 8x16 routine with external entry points used by the 16x16 routine in mul.s
+
+tosmula0:
+tosumula0:
+ sta ptr4
+mul8x16:jsr popsreg ; Get left operand
+
+ lda #0 ; Clear byte 1
+ ldy #8 ; Number of bits
+ ldx sreg+1 ; Get into register for speed
+ beq mul8x8 ; Do 8x8 multiplication if high byte zero
+mul8x16a:
+ sta ptr4+1 ; Clear byte 2
+
+ lsr ptr4 ; Get first bit into carry
+@L0: bcc @L1
+
+ clc
+ adc sreg
+ pha
+ txa ; hi byte of left op
+ adc ptr4+1
+ sta ptr4+1
+ pla
+
+@L1: ror ptr4+1
+ ror a
+ ror ptr4
+ dey
+ bne @L0
+ tax
+ lda ptr4 ; Load the result
+ rts
+
+;---------------------------------------------------------------------------
+; 8x8 multiplication routine
+
+mul8x8:
+ lsr ptr4 ; Get first bit into carry
+@L0: bcc @L1
+ clc
+ adc sreg
+@L1: ror
+ ror ptr4
+ dey
+ bne @L0
+ tax
+ lda ptr4 ; Load the result
+ rts ; Done
+