From: IrgendwerA8 Date: Thu, 21 Mar 2019 14:59:45 +0000 (+0100) Subject: Optimized mul20 & mul40 and extracted to new library. X-Git-Url: https://git.sur5r.net/?p=cc65;a=commitdiff_plain;h=399250a105da67f2137ba96316ac9da16633780b Optimized mul20 & mul40 and extracted to new library. --- diff --git a/doc/index.sgml b/doc/index.sgml index aecfb7de9..55e69f40f 100644 --- a/doc/index.sgml +++ b/doc/index.sgml @@ -97,6 +97,9 @@ Describes Christian Krüger's macro package for writing self modifying assembler code. + + Library for speed optimized math functions. + Describes the o65 file format that is used for dynamically loadable modules diff --git a/doc/specialmath.sgml b/doc/specialmath.sgml new file mode 100644 index 000000000..18de970eb --- /dev/null +++ b/doc/specialmath.sgml @@ -0,0 +1,38 @@ + + +
+Special math functions +<author>Christian Krüger + +<abstract> +This library provides functions for speed optimized math operations. +</abstract> + +<!-- Table of contents --> +<toc> + +<!-- Begin the document --> + +<sect>Multiplication<p> + +When accessing screen memory often a multiplication of the vertical position is needed to calculate +the target address. A quite common horizontal span for 8-bit systems is 20 or 40 bytes (beside e.g. 32 bytes, where the multiplication can be accomplished by shifting 5 times). + +<p> +<tscreen><verb> + unsigned int __fastcall__ mul20(unsigned char value); +</verb></tscreen> + +The 8 bit <tt>value</tt> is multiplied by 20 and returned as 16 bit value. +</p> + +<p> +<tscreen><verb> + unsigned int __fastcall__ mul40(unsigned char value); +</verb></tscreen> + +The 8 bit <tt>value</tt> is multiplied by 40 and returned as 16 bit value. +</p> + + +</article> diff --git a/include/specialmath.h b/include/specialmath.h new file mode 100644 index 000000000..fa2f65736 --- /dev/null +++ b/include/specialmath.h @@ -0,0 +1,46 @@ +/*****************************************************************************/ +/* */ +/* specialmath.h */ +/* */ +/* Optimized math routines for special usages */ +/* */ +/* */ +/* */ +/* (C) 2019 Christian 'Irgendwer' Krueger */ +/* */ +/* This software is provided 'as-is', without any expressed or implied */ +/* warranty. In no event will the authors be held liable for any damages */ +/* arising from the use of this software. */ +/* */ +/* Permission is granted to anyone to use this software for any purpose, */ +/* including commercial applications, and to alter it and redistribute it */ +/* freely, subject to the following restrictions: */ +/* */ +/* 1. The origin of this software must not be misrepresented; you must not */ +/* claim that you wrote the original software. If you use this software */ +/* in a product, an acknowledgment in the product documentation would be */ +/* appreciated but is not required. */ +/* 2. Altered source versions must be plainly marked as such, and must not */ +/* be misrepresented as being the original software. */ +/* 3. This notice may not be removed or altered from any source */ +/* distribution. */ +/* */ +/*****************************************************************************/ + +#ifndef _SPECIALMATH_H +#define _SPECIALMATH_H + + +/* Multiply an 8 bit unsigned value by 20 and return the 16 bit unsigned result */ + +unsigned int __fastcall__ mul20(unsigned char value); + + +/* Multiply an 8 bit unsigned value by 40 and return the 16 bit unsigned result */ + +unsigned int __fastcall__ mul40(unsigned char value); + + + +/* End of specialmath.h */ +#endif diff --git a/libsrc/Makefile b/libsrc/Makefile index 0ebec46b1..4e1a3520d 100644 --- a/libsrc/Makefile +++ b/libsrc/Makefile @@ -181,6 +181,7 @@ SRCDIRS += common \ mouse \ runtime \ serial \ + specialmath \ tgi \ zlib diff --git a/libsrc/atari/cputc.s b/libsrc/atari/cputc.s index 7132fdca6..cf66fdacf 100644 --- a/libsrc/atari/cputc.s +++ b/libsrc/atari/cputc.s @@ -7,7 +7,7 @@ .export _cputcxy, _cputc .export plot, cputdirect, putchar - .import gotoxy, mul40 + .import gotoxy, _mul40 .importzp tmp4,ptr4 .import _revflag,setcursor @@ -71,8 +71,7 @@ putchar: sta (OLDADR),y lda ROWCRS - jsr mul40 ; destroys tmp4 - clc + jsr _mul40 ; destroys tmp4, carry is cleared adc SAVMSC ; add start of screen memory sta ptr4 txa diff --git a/libsrc/atari/mcbtxtchar.s b/libsrc/atari/mcbtxtchar.s index 90a25f673..4ff79c651 100644 --- a/libsrc/atari/mcbtxtchar.s +++ b/libsrc/atari/mcbtxtchar.s @@ -12,7 +12,7 @@ .export _mouse_txt_callbacks .importzp tmp4 - .import mul40,loc_tmp + .import _mul40 .importzp mouse_txt_char ; screen code of mouse cursor .include "atari.inc" @@ -104,22 +104,15 @@ movex: ; Move the mouse cursor y position to the value in A/X. movey: - tax - ldy tmp4 ; mul40 uses tmp4 - lda loc_tmp ; and this local variable - pha - txa ; get parameter back + ldy tmp4 ; mul40 uses tmp4, save in Y lsr a ; convert y position to character line lsr a lsr a - jsr mul40 - clc + jsr _mul40 ; carry is cleared by _mul40 adc SAVMSC sta scrptr txa adc SAVMSC+1 sta scrptr+1 - pla - sta loc_tmp - sty tmp4 + sty tmp4 ; restore tmp4 rts diff --git a/libsrc/atari/mul40.s b/libsrc/atari/mul40.s deleted file mode 100644 index 96235bf6c..000000000 --- a/libsrc/atari/mul40.s +++ /dev/null @@ -1,35 +0,0 @@ -; -; Christian Groessler, June 2000 -; -; mul40 -; multiplies A by 40 and returns result in AX -; uses tmp4 - - .importzp tmp4 - .export mul40,loc_tmp - -.proc mul40 - - ldx #0 - stx tmp4 - sta loc_tmp - asl a - rol tmp4 - asl a - rol tmp4 ; val * 4 - adc loc_tmp - bcc L1 - inc tmp4 ; val * 5 -L1: asl a - rol tmp4 ; val * 10 - asl a - rol tmp4 - asl a - rol tmp4 ; val * 40 - ldx tmp4 - rts - -.endproc - - .bss -loc_tmp:.res 1 diff --git a/libsrc/atari/scroll.s b/libsrc/atari/scroll.s index 5e8428cc2..4bc0d72ed 100644 --- a/libsrc/atari/scroll.s +++ b/libsrc/atari/scroll.s @@ -8,7 +8,7 @@ .include "atari.inc" .importzp tmp1,tmp4,ptr1,ptr2 - .import mul40,_clrscr + .import _mul40,_clrscr .export __scroll .proc __scroll @@ -40,7 +40,7 @@ down_ok:lda SAVMSC sta ptr2+1 lda tmp1 - jsr mul40 + jsr _mul40 sta tmp4 lda ptr2 sec @@ -103,8 +103,7 @@ up: sta tmp1 ; # of lines to scroll jmp _clrscr ;multiply by 40 (xsize) -up_ok: jsr mul40 - clc +up_ok: jsr _mul40 ; carry is cleared by _mul40 adc SAVMSC ; add start of screen mem sta ptr2 txa diff --git a/libsrc/atari/setcursor.s b/libsrc/atari/setcursor.s index cf596d4fe..c6d844047 100644 --- a/libsrc/atari/setcursor.s +++ b/libsrc/atari/setcursor.s @@ -4,7 +4,7 @@ ; cursor handling, internal function .include "atari.inc" - .import cursor,mul40 + .import cursor,_mul40 .export setcursor .proc setcursor @@ -14,8 +14,7 @@ sta (OLDADR),y lda ROWCRS - jsr mul40 - clc + jsr _mul40 ; function leaves with carry clear! adc SAVMSC ; add start of screen memory sta OLDADR txa diff --git a/libsrc/atari5200/cputc.s b/libsrc/atari5200/cputc.s index 860eea88d..185ad8da8 100644 --- a/libsrc/atari5200/cputc.s +++ b/libsrc/atari5200/cputc.s @@ -10,7 +10,7 @@ .export _cputcxy, _cputc .export plot, cputdirect, putchar - .import gotoxy, mul20 + .import gotoxy, _mul20 .importzp ptr4 .import setcursor @@ -75,8 +75,7 @@ putchar: pha ; save char lda ROWCRS_5200 - jsr mul20 ; destroys tmp4 - clc + jsr _mul20 ; destroys tmp4, carry is cleared adc SAVMSC ; add start of screen memory sta ptr4 txa diff --git a/libsrc/atari5200/mul20.s b/libsrc/atari5200/mul20.s deleted file mode 100644 index fc67b34e4..000000000 --- a/libsrc/atari5200/mul20.s +++ /dev/null @@ -1,33 +0,0 @@ -; -; Christian Groessler, April 2014 -; -; mul20 -; multiplies A by 20 and returns result in AX -; uses tmp4 - - .importzp tmp4 - .export mul20,loc_tmp - -.proc mul20 - - ldx #0 - stx tmp4 - sta loc_tmp - asl a - rol tmp4 - asl a - rol tmp4 ; val * 4 - adc loc_tmp - bcc L1 - inc tmp4 ; val * 5 -L1: asl a - rol tmp4 ; val * 10 - asl a - rol tmp4 ; val * 20 - ldx tmp4 - rts - -.endproc - - .bss -loc_tmp:.res 1 diff --git a/libsrc/specialmath/mul20.s b/libsrc/specialmath/mul20.s new file mode 100644 index 000000000..3339f7dd2 --- /dev/null +++ b/libsrc/specialmath/mul20.s @@ -0,0 +1,47 @@ +; mul20.s +; +; This file is part of +; cc65 - a freeware C compiler for 6502 based systems +; +; https://github.com/cc65/cc65 +; +; See "LICENSE" file for legal information. +; +; +; unsigned int __fastcall__ mul20(unsigned char value); +; +; REMARKS: Function is defined to return with carry-flag cleared + + + .importzp tmp4 + .export _mul20 + +.proc _mul20 ; = 30 bytes, 41/46 cycles + + sta tmp4 ; remember value for later addition... + ldx #0 ; clear high-byte + asl a ; * 2 + bcc mul4 ; high-byte affected? + ldx #2 ; this will be the 1st high-bit soon... + +mul4: asl a ; * 4 + bcc mul5 ; high-byte affected? + inx ; => yes, apply to 0 high-bit + clc ; prepare addition + +mul5: adc tmp4 ; * 5 + bcc mul10 ; high-byte affected? + inx ; yes, correct... + +mul10: stx tmp4 ; continue with classic shifting... + + asl a ; * 10 + rol tmp4 + + asl a ; * 20 + rol tmp4 + + ldx tmp4 ; deliver high-byte in X + rts + +.endproc diff --git a/libsrc/specialmath/mul40.s b/libsrc/specialmath/mul40.s new file mode 100644 index 000000000..110351935 --- /dev/null +++ b/libsrc/specialmath/mul40.s @@ -0,0 +1,50 @@ +; mul40.s +; +; This file is part of +; cc65 - a freeware C compiler for 6502 based systems +; +; https://github.com/cc65/cc65 +; +; See "LICENSE" file for legal information. +; +; +; unsigned int __fastcall__ mul40(unsigned char value); +; +; REMARKS: Function is defined to return with carry-flag cleared + + + .importzp tmp4 + .export _mul40 + +.proc _mul40 ; = 33 bytes, 48/53 cycles + + sta tmp4 ; remember value for later addition... + ldx #0 ; clear high-byte + asl a ; * 2 + bcc mul4 ; high-byte affected? + ldx #2 ; this will be the 1st high-bit soon... + +mul4: asl a ; * 4 + bcc mul5 ; high-byte affected? + inx ; => yes, apply to 0 high-bit + clc ; prepare addition + +mul5: adc tmp4 ; * 5 + bcc mul10 ; high-byte affected? + inx ; yes, correct... + +mul10: stx tmp4 ; continue with classic shifting... + + asl a ; * 10 + rol tmp4 + + asl a ; * 20 + rol tmp4 + + asl a ; * 40 + rol tmp4 + + ldx tmp4 ; deliver high-byte in X + rts + +.endproc diff --git a/test/val/lib_specialmath_mulxx.c b/test/val/lib_specialmath_mulxx.c new file mode 100644 index 000000000..de7cc1e29 --- /dev/null +++ b/test/val/lib_specialmath_mulxx.c @@ -0,0 +1,18 @@ +#include <specialmath.h> +#include "unittest.h" + +TEST +{ + unsigned i; + + for (i=0; i < 256; ++i) + { + ASSERT_AreEqual(i*20, mul20(i), "%u", "Invalid 'mul20(%u)' calculation!" COMMA i); + } + + for (i=0; i < 256; ++i) + { + ASSERT_AreEqual(i*40, mul40(i), "%u", "Invalid 'mul40(%u)' calculation!" COMMA i); + } +} +ENDTEST