From cc6559c3f6a81e3b4ad676c2242505466e6e9ec1 Mon Sep 17 00:00:00 2001 From: IrgendwerA8 Date: Thu, 28 Feb 2019 17:30:34 +0100 Subject: [PATCH] Minor math optimizations --- libsrc/runtime/ludiv.s | 8 ++++---- libsrc/runtime/udiv32by16r16.s | 8 ++++---- libsrc/runtime/umul16x16r32.s | 4 ++-- libsrc/runtime/umul8x16r24.s | 6 ++++++ 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/libsrc/runtime/ludiv.s b/libsrc/runtime/ludiv.s index 54af4780e..8a3126d72 100644 --- a/libsrc/runtime/ludiv.s +++ b/libsrc/runtime/ludiv.s @@ -78,7 +78,7 @@ L0: asl ptr1 ; Do a subtraction. we do not have enough space to store the intermediate ; result, so we may have to do the subtraction twice. - pha + tax cmp ptr3 lda ptr2+1 sbc ptr3+1 @@ -91,9 +91,9 @@ L0: asl ptr1 ; Overflow, do the subtraction again, this time store the result sta tmp4 ; We have the high byte already - pla + txa sbc ptr3 ; byte 0 - pha + tax lda ptr2+1 sbc ptr3+1 sta ptr2+1 ; byte 1 @@ -102,7 +102,7 @@ L0: asl ptr1 sta tmp3 ; byte 2 inc ptr1 ; Set result bit -L1: pla +L1: txa dey bne L0 sta ptr2 diff --git a/libsrc/runtime/udiv32by16r16.s b/libsrc/runtime/udiv32by16r16.s index 9897f9908..27f1176dd 100644 --- a/libsrc/runtime/udiv32by16r16.s +++ b/libsrc/runtime/udiv32by16r16.s @@ -34,19 +34,19 @@ L0: asl ptr1 rol a rol sreg+1 - pha + tax cmp ptr3 lda sreg+1 sbc ptr3+1 bcc L1 sta sreg+1 - pla + txa sbc ptr3 - pha + tax inc ptr1 -L1: pla +L1: txa dey bne L0 sta sreg diff --git a/libsrc/runtime/umul16x16r32.s b/libsrc/runtime/umul16x16r32.s index 9ecd1596e..cd2dae351 100644 --- a/libsrc/runtime/umul16x16r32.s +++ b/libsrc/runtime/umul16x16r32.s @@ -42,11 +42,11 @@ umul16x16r16m: clc adc ptr3 - pha + tax lda ptr3+1 adc sreg+1 sta sreg+1 - pla + txa @L1: ror sreg+1 ror a diff --git a/libsrc/runtime/umul8x16r24.s b/libsrc/runtime/umul8x16r24.s index ff7d0bae6..c006082a4 100644 --- a/libsrc/runtime/umul8x16r24.s +++ b/libsrc/runtime/umul8x16r24.s @@ -9,6 +9,7 @@ .include "zeropage.inc" + .macpack cpu ;--------------------------------------------------------------------------- ; 8x16 => 24 unsigned multiplication routine. Because the overhead for a @@ -30,9 +31,14 @@ umul8x16r16: umul8x16r24m: umul8x16r16m: +.if (.cpu .bitand ::CPU_ISET_65SC02) + stz ptr1+1 + stz sreg +.else ldx #0 stx ptr1+1 stx sreg +.endif ldy #8 ; Number of bits ldx ptr3 ; Get into register for speed -- 2.39.2