Merge pull request #226 from greg-king5/pce-memset

author Oliver Schmidt <ol.sc@web.de>

Sat, 7 Nov 2015 10:37:36 +0000 (11:37 +0100)

committer Oliver Schmidt <ol.sc@web.de>

Sat, 7 Nov 2015 10:37:36 +0000 (11:37 +0100)
author Oliver Schmidt <ol.sc@web.de>
Sat, 7 Nov 2015 10:37:36 +0000 (11:37 +0100)
committer Oliver Schmidt <ol.sc@web.de>
Sat, 7 Nov 2015 10:37:36 +0000 (11:37 +0100)
diff --git a/libsrc/pce/memcpy.s b/libsrc/pce/memcpy.s

index e3b7bde34f00c33855d06c582a216423fa768c25..40f831e30d13e58d94c0a213ad7cd4d0c26e4404 100644 (file)
--- a/libsrc/pce/memcpy.s
+++ b/libsrc/pce/memcpy.s
@@ -8,9 +8,10 @@
  ;
  ; void* __fastcall__ memcpy (void* dest, const void* src, size_t size);
  ;
-; NOTE:  This function contains entry points for memmove, which will resort
-; to memcpy for an incrementing copy.  Don't change this module without looking
-; at "pce/memmove.s"!
+; NOTE:  This function contains entry points for memmove(), which resorts to
+; memcpy() for incrementing copies.  The PC-Engine memset() uses this memcpy()
+; to fill memory quickly.  Don't change this module without looking at
+; "pce/memmove.s" and "pce/memset.s"!
  ;
  
          .export         _memcpy
@@ -34,25 +35,25 @@ _memcpy:
          jsr     memcpy_getparams
  
  memcpy_increment:
-        ldy     #$73                    ; TII
+        ldy     #$73                    ; TII opcode
  
  memcpy_transfer:
-        sty     transfer+opcode
+        sty     transfer + opcode
  
          lda     ptr1
          ldx     ptr1+1
-        sta     transfer+source
-        stx     transfer+source+1
+        sta     transfer + source
+        stx     transfer + source+1
  
          lda     ptr2
          ldx     ptr2+1
-        sta     transfer+destination
-        stx     transfer+destination+1
+        sta     transfer + destination
+        stx     transfer + destination+1
  
          lda     ptr3
          ldx     ptr3+1
-        sta     transfer+length
-        stx     transfer+length+1
+        sta     transfer + length
+        stx     transfer + length+1
  
          jmp     transfer
  
diff --git a/libsrc/pce/memmove.s b/libsrc/pce/memmove.s

index 9a7feebf5e6a1c4b048a20943e1372dd3bca69da..4b80da2af4690dc49e2c2202a1d3ef214f795a08 100644 (file)
--- a/libsrc/pce/memmove.s
+++ b/libsrc/pce/memmove.s
@@ -59,5 +59,5 @@ _memmove:
          dec     ptr2+1
  @L2:    dec     ptr2
  
-        ldy     #$C3                    ; TDD
+        ldy     #$C3                    ; TDD opcode
          jmp     memcpy_transfer
diff --git a/libsrc/pce/memset.s b/libsrc/pce/memset.s

new file mode 100644 (file)

index 0000000..45a78d5
--- /dev/null
+++ b/libsrc/pce/memset.s
@@ -0,0 +1,67 @@
+;
+; This file, instead of "common/memset.s", will be assembled for the pce
+; target.  This version is smaller and faster because it uses a HuC6280
+; block-copy instruction.
+;
+; 1998-05-29, Ullrich von Bassewitz
+; 2015-11-06, Greg King
+;
+; void* __fastcall__ _bzero (void* ptr, size_t n);
+; void  __fastcall__  bzero (void* ptr, size_t n);
+; void* __fastcall__ memset (void* ptr, int c, size_t n);
+;
+; NOTE: bzero() will return its first argument, as memset() does.  It is no
+;       problem to declare the return value as void, because it can be ignored.
+;       _bzero() (note the leading underscore) is declared with the proper
+;       return type because the compiler will replace memset() by _bzero() if
+;       the fill value is zero; and, the optimizer looks at the return type
+;       to see if the value in .XA is of any use.
+;
+; NOTE: This function uses entry points from "pce/memcpy.s"!
+;
+
+        .export         __bzero, _bzero, _memset
+
+        .import         memcpy_getparams, memcpy_increment
+        .import         pushax, popax
+        .importzp       ptr1, ptr2, ptr3
+
+        .macpack        longbranch
+
+
+; ----------------------------------------------------------------------
+__bzero:
+_bzero: pha
+        cla                             ; fill with zeros
+        jsr     pushax                  ; (high byte isn't important)
+        pla
+
+_memset:
+        jsr     memcpy_getparams
+
+; The fill byte is put at the beginning of the buffer; then, the buffer is
+; copied to a second buffer that starts one byte above the start of the first
+; buffer.  Normally, we would use memmove() to avoid trouble; but here, we
+; exploit that overlap, by using memcpy().  Therefore, the fill value is copied
+; from each byte to the next byte, all the way to the end of the buffer.
+
+        lda     ptr1                    ; get fill value
+        sta     (ptr2)
+
+        lda     ptr3                    ; count first byte
+        bne     @L3
+        dec     ptr3+1
+@L3:    dec     a
+        sta     ptr3
+        ora     ptr3+1
+        jeq     popax                   ; return ptr. if no more bytes
+
+        lda     ptr2                    ; point to first buffer
+        ldx     ptr2+1
+        sta     ptr1
+        stx     ptr1+1
+        inc     ptr2                    ; point to second buffer
+        bne     @L2
+        inc     ptr2+1
+
+@L2:    jmp     memcpy_increment
author	Oliver Schmidt <ol.sc@web.de>
	Sat, 7 Nov 2015 10:37:36 +0000 (11:37 +0100)
committer	Oliver Schmidt <ol.sc@web.de>
	Sat, 7 Nov 2015 10:37:36 +0000 (11:37 +0100)
libsrc/pce/memcpy.s		patch \| blob \| history
libsrc/pce/memmove.s		patch \| blob \| history
libsrc/pce/memset.s	[new file with mode: 0644]	patch \| blob