From: IrgendwerA8 <c.krueger.b@web.de>
Date: Thu, 21 Mar 2019 14:59:45 +0000 (+0100)
Subject: Optimized mul20 & mul40 and extracted to new library.
X-Git-Url: https://git.sur5r.net/?p=cc65;a=commitdiff_plain;h=399250a105da67f2137ba96316ac9da16633780b

Optimized mul20 & mul40 and extracted to new library.
---

diff --git a/doc/index.sgml b/doc/index.sgml
index aecfb7de9..55e69f40f 100644
--- a/doc/index.sgml
+++ b/doc/index.sgml
@@ -97,6 +97,9 @@
   Describes Christian Kr&uuml;ger's macro package for writing self modifying
   assembler code.
 
+  <tag><htmlurl url="specialmath.html" name="specialmath.html"></tag>
+  Library for speed optimized math functions.  
+
   <tag><url name="6502 Binary Relocation Format document"
         url="http://www.6502.org/users/andre/o65/fileformat.html"></tag>
   Describes the o65 file format that is used for dynamically loadable modules
diff --git a/doc/specialmath.sgml b/doc/specialmath.sgml
new file mode 100644
index 000000000..18de970eb
--- /dev/null
+++ b/doc/specialmath.sgml
@@ -0,0 +1,38 @@
+<!doctype linuxdoc system>
+
+<article>
+<title>Special math functions
+<author>Christian Kr&uuml;ger
+
+<abstract>
+This library provides functions for speed optimized math operations.
+</abstract>
+
+<!-- Table of contents -->
+<toc>
+
+<!-- Begin the document -->
+
+<sect>Multiplication<p>
+
+When accessing screen memory often a multiplication of the vertical position is needed to calculate
+the target address. A quite common horizontal span for 8-bit systems is 20 or 40 bytes (beside e.g. 32 bytes, where the multiplication can be accomplished by shifting 5 times).
+
+<p>
+<tscreen><verb>
+    unsigned int __fastcall__ mul20(unsigned char value);
+</verb></tscreen>
+
+The 8 bit <tt>value</tt> is multiplied by 20 and returned as 16 bit value.
+</p>
+
+<p>
+<tscreen><verb>
+    unsigned int __fastcall__ mul40(unsigned char value);
+</verb></tscreen>
+
+The 8 bit <tt>value</tt> is multiplied by 40 and returned as 16 bit value.
+</p>
+
+
+</article>
diff --git a/include/specialmath.h b/include/specialmath.h
new file mode 100644
index 000000000..fa2f65736
--- /dev/null
+++ b/include/specialmath.h
@@ -0,0 +1,46 @@
+/*****************************************************************************/
+/*                                                                           */
+/*                              specialmath.h                                */
+/*                                                                           */
+/*                 Optimized math routines for special usages                */
+/*                                                                           */
+/*                                                                           */
+/*                                                                           */
+/* (C) 2019 Christian 'Irgendwer' Krueger                                    */
+/*                                                                           */
+/* This software is provided 'as-is', without any expressed or implied       */
+/* warranty.  In no event will the authors be held liable for any damages    */
+/* arising from the use of this software.                                    */
+/*                                                                           */
+/* Permission is granted to anyone to use this software for any purpose,     */
+/* including commercial applications, and to alter it and redistribute it    */
+/* freely, subject to the following restrictions:                            */
+/*                                                                           */
+/* 1. The origin of this software must not be misrepresented; you must not   */
+/*    claim that you wrote the original software. If you use this software   */
+/*    in a product, an acknowledgment in the product documentation would be  */
+/*    appreciated but is not required.                                       */
+/* 2. Altered source versions must be plainly marked as such, and must not   */
+/*    be misrepresented as being the original software.                      */
+/* 3. This notice may not be removed or altered from any source              */
+/*    distribution.                                                          */
+/*                                                                           */
+/*****************************************************************************/
+
+#ifndef _SPECIALMATH_H
+#define _SPECIALMATH_H
+
+
+/* Multiply an 8 bit unsigned value by 20 and return the 16 bit unsigned result */
+
+unsigned int __fastcall__ mul20(unsigned char value);
+
+
+/* Multiply an 8 bit unsigned value by 40 and return the 16 bit unsigned result */
+
+unsigned int __fastcall__ mul40(unsigned char value);
+
+
+
+/* End of specialmath.h */
+#endif
diff --git a/libsrc/Makefile b/libsrc/Makefile
index 0ebec46b1..4e1a3520d 100644
--- a/libsrc/Makefile
+++ b/libsrc/Makefile
@@ -181,6 +181,7 @@ SRCDIRS += common   \
            mouse    \
            runtime  \
            serial   \
+           specialmath \
            tgi      \
            zlib
 
diff --git a/libsrc/atari/cputc.s b/libsrc/atari/cputc.s
index 7132fdca6..cf66fdacf 100644
--- a/libsrc/atari/cputc.s
+++ b/libsrc/atari/cputc.s
@@ -7,7 +7,7 @@
 
         .export         _cputcxy, _cputc
         .export         plot, cputdirect, putchar
-        .import         gotoxy, mul40
+        .import         gotoxy, _mul40
         .importzp       tmp4,ptr4
         .import         _revflag,setcursor
 
@@ -71,8 +71,7 @@ putchar:
         sta     (OLDADR),y
 
         lda     ROWCRS
-        jsr     mul40           ; destroys tmp4
-        clc
+        jsr     _mul40          ; destroys tmp4, carry is cleared
         adc     SAVMSC          ; add start of screen memory
         sta     ptr4
         txa
diff --git a/libsrc/atari/mcbtxtchar.s b/libsrc/atari/mcbtxtchar.s
index 90a25f673..4ff79c651 100644
--- a/libsrc/atari/mcbtxtchar.s
+++ b/libsrc/atari/mcbtxtchar.s
@@ -12,7 +12,7 @@
 
         .export         _mouse_txt_callbacks
         .importzp       tmp4
-        .import         mul40,loc_tmp
+        .import         _mul40
         .importzp       mouse_txt_char          ; screen code of mouse cursor
 
         .include        "atari.inc"
@@ -104,22 +104,15 @@ movex:
 
 ; Move the mouse cursor y position to the value in A/X.
 movey:
-        tax
-        ldy     tmp4            ; mul40 uses tmp4
-        lda     loc_tmp         ; and this local variable
-        pha
-        txa                     ; get parameter back
+        ldy     tmp4            ; mul40 uses tmp4, save in Y
         lsr     a               ; convert y position to character line
         lsr     a
         lsr     a
-        jsr     mul40
-        clc
+        jsr     _mul40          ; carry is cleared by _mul40
         adc     SAVMSC
         sta     scrptr
         txa
         adc     SAVMSC+1
         sta     scrptr+1
-        pla
-        sta     loc_tmp
-        sty     tmp4
+        sty     tmp4            ; restore tmp4
         rts
diff --git a/libsrc/atari/mul40.s b/libsrc/atari/mul40.s
deleted file mode 100644
index 96235bf6c..000000000
--- a/libsrc/atari/mul40.s
+++ /dev/null
@@ -1,35 +0,0 @@
-;
-; Christian Groessler, June 2000
-;
-; mul40
-; multiplies A by 40 and returns result in AX
-; uses tmp4
-
-        .importzp       tmp4
-        .export         mul40,loc_tmp
-
-.proc   mul40
-
-        ldx     #0
-        stx     tmp4
-        sta     loc_tmp
-        asl     a
-        rol     tmp4
-        asl     a
-        rol     tmp4            ; val * 4
-        adc     loc_tmp
-        bcc     L1
-        inc     tmp4            ; val * 5
-L1:     asl     a
-        rol     tmp4            ; val * 10
-        asl     a
-        rol     tmp4
-        asl     a
-        rol     tmp4            ; val * 40
-        ldx     tmp4
-        rts
-
-.endproc
-
-        .bss
-loc_tmp:.res    1
diff --git a/libsrc/atari/scroll.s b/libsrc/atari/scroll.s
index 5e8428cc2..4bc0d72ed 100644
--- a/libsrc/atari/scroll.s
+++ b/libsrc/atari/scroll.s
@@ -8,7 +8,7 @@
 
         .include        "atari.inc"
         .importzp       tmp1,tmp4,ptr1,ptr2
-        .import         mul40,_clrscr
+        .import         _mul40,_clrscr
         .export         __scroll
 
 .proc   __scroll
@@ -40,7 +40,7 @@ down_ok:lda     SAVMSC
         sta     ptr2+1
 
         lda     tmp1
-        jsr     mul40
+        jsr     _mul40
         sta     tmp4
         lda     ptr2
         sec
@@ -103,8 +103,7 @@ up:     sta     tmp1            ; # of lines to scroll
         jmp     _clrscr
 
         ;multiply by 40 (xsize)
-up_ok:  jsr     mul40
-        clc
+up_ok:  jsr     _mul40          ; carry is cleared by _mul40
         adc     SAVMSC          ; add start of screen mem
         sta     ptr2
         txa
diff --git a/libsrc/atari/setcursor.s b/libsrc/atari/setcursor.s
index cf596d4fe..c6d844047 100644
--- a/libsrc/atari/setcursor.s
+++ b/libsrc/atari/setcursor.s
@@ -4,7 +4,7 @@
 ; cursor handling, internal function
 
         .include "atari.inc"
-        .import cursor,mul40
+        .import cursor,_mul40
         .export setcursor
 
 .proc   setcursor
@@ -14,8 +14,7 @@
         sta     (OLDADR),y
 
         lda     ROWCRS
-        jsr     mul40
-        clc
+        jsr     _mul40          ; function leaves with carry clear!
         adc     SAVMSC          ; add start of screen memory
         sta     OLDADR
         txa
diff --git a/libsrc/atari5200/cputc.s b/libsrc/atari5200/cputc.s
index 860eea88d..185ad8da8 100644
--- a/libsrc/atari5200/cputc.s
+++ b/libsrc/atari5200/cputc.s
@@ -10,7 +10,7 @@
 
         .export         _cputcxy, _cputc
         .export         plot, cputdirect, putchar
-        .import         gotoxy, mul20
+        .import         gotoxy, _mul20
         .importzp       ptr4
         .import         setcursor
 
@@ -75,8 +75,7 @@ putchar:
         pha                     ; save char
 
         lda     ROWCRS_5200
-        jsr     mul20           ; destroys tmp4
-        clc
+        jsr     _mul20          ; destroys tmp4, carry is cleared
         adc     SAVMSC          ; add start of screen memory
         sta     ptr4
         txa
diff --git a/libsrc/atari5200/mul20.s b/libsrc/atari5200/mul20.s
deleted file mode 100644
index fc67b34e4..000000000
--- a/libsrc/atari5200/mul20.s
+++ /dev/null
@@ -1,33 +0,0 @@
-;
-; Christian Groessler, April 2014
-;
-; mul20
-; multiplies A by 20 and returns result in AX
-; uses tmp4
-
-        .importzp       tmp4
-        .export         mul20,loc_tmp
-
-.proc   mul20
-
-        ldx     #0
-        stx     tmp4
-        sta     loc_tmp
-        asl     a
-        rol     tmp4
-        asl     a
-        rol     tmp4            ; val * 4
-        adc     loc_tmp
-        bcc     L1
-        inc     tmp4            ; val * 5
-L1:     asl     a
-        rol     tmp4            ; val * 10
-        asl     a
-        rol     tmp4            ; val * 20
-        ldx     tmp4
-        rts
-
-.endproc
-
-        .bss
-loc_tmp:.res    1
diff --git a/libsrc/specialmath/mul20.s b/libsrc/specialmath/mul20.s
new file mode 100644
index 000000000..3339f7dd2
--- /dev/null
+++ b/libsrc/specialmath/mul20.s
@@ -0,0 +1,47 @@
+; mul20.s
+;
+; This file is part of
+; cc65 - a freeware C compiler for 6502 based systems
+;
+; https://github.com/cc65/cc65
+;
+; See "LICENSE" file for legal information.
+;
+;
+; unsigned int __fastcall__ mul20(unsigned char value);
+; 
+; REMARKS: Function is defined to return with carry-flag cleared
+
+
+        .importzp       tmp4
+        .export         _mul20
+
+.proc   _mul20                  ; = 30 bytes, 41/46 cycles
+
+        sta     tmp4            ; remember value for later addition...
+        ldx     #0              ; clear high-byte
+        asl     a               ; * 2
+        bcc     mul4            ; high-byte affected?
+        ldx     #2              ; this will be the 1st high-bit soon...
+
+mul4:   asl     a               ; * 4                  
+        bcc     mul5            ; high-byte affected?
+        inx                     ; => yes, apply to 0 high-bit
+        clc                     ; prepare addition
+
+mul5:   adc     tmp4            ; * 5
+        bcc     mul10           ; high-byte affected?
+        inx                     ; yes, correct...
+
+mul10:  stx     tmp4            ; continue with classic shifting...
+        
+        asl     a               ; * 10
+        rol     tmp4                                    
+
+        asl     a               ; * 20 
+        rol     tmp4
+
+        ldx     tmp4            ; deliver high-byte in X
+        rts
+
+.endproc
diff --git a/libsrc/specialmath/mul40.s b/libsrc/specialmath/mul40.s
new file mode 100644
index 000000000..110351935
--- /dev/null
+++ b/libsrc/specialmath/mul40.s
@@ -0,0 +1,50 @@
+; mul40.s
+;
+; This file is part of
+; cc65 - a freeware C compiler for 6502 based systems
+;
+; https://github.com/cc65/cc65
+;
+; See "LICENSE" file for legal information.
+;
+;
+; unsigned int __fastcall__ mul40(unsigned char value);
+; 
+; REMARKS: Function is defined to return with carry-flag cleared
+
+
+        .importzp       tmp4
+        .export         _mul40
+
+.proc   _mul40                  ; = 33 bytes, 48/53 cycles
+
+        sta     tmp4            ; remember value for later addition...
+        ldx     #0              ; clear high-byte
+        asl     a               ; * 2
+        bcc     mul4            ; high-byte affected?
+        ldx     #2              ; this will be the 1st high-bit soon...
+
+mul4:   asl     a               ; * 4                  
+        bcc     mul5            ; high-byte affected?
+        inx                     ; => yes, apply to 0 high-bit
+        clc                     ; prepare addition
+
+mul5:   adc     tmp4            ; * 5
+        bcc     mul10           ; high-byte affected?
+        inx                     ; yes, correct...
+
+mul10:  stx     tmp4            ; continue with classic shifting...
+        
+        asl     a               ; * 10
+        rol     tmp4                                    
+
+        asl     a               ; * 20 
+        rol     tmp4
+
+        asl     a               ; * 40
+        rol     tmp4
+
+        ldx     tmp4            ; deliver high-byte in X
+        rts
+
+.endproc
diff --git a/test/val/lib_specialmath_mulxx.c b/test/val/lib_specialmath_mulxx.c
new file mode 100644
index 000000000..de7cc1e29
--- /dev/null
+++ b/test/val/lib_specialmath_mulxx.c
@@ -0,0 +1,18 @@
+#include <specialmath.h>
+#include "unittest.h"
+
+TEST
+{
+    unsigned i;
+    
+    for (i=0; i < 256; ++i)
+    {
+        ASSERT_AreEqual(i*20, mul20(i), "%u", "Invalid 'mul20(%u)' calculation!" COMMA i);
+    }
+
+    for (i=0; i < 256; ++i)
+    {
+        ASSERT_AreEqual(i*40, mul40(i), "%u", "Invalid 'mul40(%u)' calculation!" COMMA i);
+    }
+}
+ENDTEST