--- /dev/null
+; Ullrich von Bassewitz, 2011-07-10
+; CC65 runtime: 8x16 => 24 unsigned multiplication
+ .export umul8x16r24, umul8x16r24m
+ .export umul8x16r16, umul8x16r16m
+ .include "zeropage.inc"
+; 8x16 => 24 unsigned multiplication routine. Because the overhead for a
+; 8x16 => 16 unsigned multiplication routine is small, we will tag it with
+; the matching labels as well.
+; routine lhs rhs result result also in
+; -----------------------------------------------------------------------
+; umul8x16r24 ptr1-lo ax ax:sreg-lo ptr1:sreg-lo
+; umul8x16r24m ptr1-lo ptr3 ax:sreg-lo ptr1:sreg-lo
+; ptr3 ist left intact by the routine.
+ sta ptr3
+ stx ptr3+1
+ ldx #0
+ stx ptr1+1
+ stx sreg
+ ldy #8 ; Number of bits
+ ldx ptr3 ; Get into register for speed
+ lda ptr1
+ ror a ; Get next bit into carry
+@L0: bcc @L1
+ clc
+ pha
+ txa
+ adc ptr1+1
+ sta ptr1+1
+ lda ptr3+1
+ adc sreg
+ sta sreg
+ pla
+@L1: ror sreg
+ ror ptr1+1
+ ror a
+ dey
+ bne @L0
+ sta ptr1 ; Save low byte of result
+ ldx ptr1+1 ; Load high byte of result
+ rts ; Done