2 ; Ullrich von Bassewitz, 2003-08-20
3 ; Performance increase (about 20%) by
4 ; Christian Krueger, 2009-09-13
6 ; void* __fastcall__ memcpy (void* dest, const void* src, size_t n);
8 ; NOTE: This function contains entry points for memmove, which will ressort
9 ; to memcpy for an upwards copy. Don't change this module without looking
13 .export _memcpy, memcpy_upwards, memcpy_getparams
15 .importzp sp, ptr1, ptr2, ptr3
17 ; ----------------------------------------------------------------------
21 memcpy_upwards: ; assert Y = 0
22 ldx ptr3+1 ; Get high byte of n
25 L1: .repeat 2 ; Unroll this a bit to make it faster...
26 lda (ptr1),Y ; copy a byte
33 dex ; Next 256 byte block
34 bne L1 ; Repeat if any
36 ; the following section could be 10% faster if we were able to copy
37 ; back to front - unfortunately we are forced to copy strict from
38 ; low to high since this function is also used for
39 ; memmove and blocks could be overlapping!
42 ldx ptr3 ; Get the low byte of n
43 beq done ; something to copy
45 L3: lda (ptr1),Y ; copy a byte
53 done: jmp popax ; Pop ptr and return as result
55 ; ----------------------------------------------------------------------
56 ; Get the parameters from stack as follows:
61 ; First argument (dest) will remain on stack and is returned in a/x!
63 memcpy_getparams: ; IMPORTANT! Function has to leave with Y=0!
65 stx ptr3+1 ; save n to ptr3
69 stx ptr1+1 ; save src to ptr1
72 ldy #1 ; (direct stack access is three cycles faster
73 ; (total cycle count with return))
76 stx ptr2+1 ; save high byte of ptr2
78 lda (sp),y ; Get ptr2 low