;
-; void* memset (void* ptr, int c, size_t n);
-; void* _bzero (void* ptr, size_t n);
-; void bzero (void* ptr, size_t n);
+; void* __fastcall__ memset (void* ptr, int c, size_t n);
+; void* __fastcall__ _bzero (void* ptr, size_t n);
+; void __fastcall__ bzero (void* ptr, size_t n);
;
; Ullrich von Bassewitz, 29.05.1998
+; Performance increase (about 20%) by
+; Christian Krueger, 12.09.2009
;
; NOTE: bzero will return it's first argument as memset does. It is no problem
; to declare the return value as void, since it may be ignored. _bzero
.export _memset, _bzero, __bzero
.import popax
- .importzp sp, ptr1, ptr2, ptr3, tmp1
+ .importzp sp, ptr1, ptr2, ptr3
_bzero:
__bzero:
sta ptr3
stx ptr3+1 ; Save n
- lda #0 ; Fill with zeros
+ ldx #0 ; Fill with zeros
beq common
-
+
_memset:
sta ptr3 ; Save n
stx ptr3+1
jsr popax ; Get c
+ tax
; Common stuff for memset and bzero from here
-common: sta tmp1 ; Save the fill value
- ldy #1
+common: ; Fill value is in X!
+ ldy #1
lda (sp),y
- tax
- dey
+ sta ptr1+1 ; save high byte of ptr
+ dey ; Y = 0
lda (sp),y ; Get ptr
sta ptr1
- stx ptr1+1 ; Save work copy
- lda tmp1 ; Load fill value
- ldy #0
+ lsr ptr3+1 ; divide number of
+ ror ptr3 ; bytes by two to increase
+ bcc evenCount ; speed (ptr3 = ptr3/2)
+oddCount:
+ ; y is still 0 here
+ txa ; restore fill value
+ sta (ptr1),y ; save value and increase
+ inc ptr1 ; dest. pointer
+ bne evenCount
+ inc ptr1+1
+evenCount:
+ lda ptr1 ; build second pointer section
+ clc
+ adc ptr3 ; ptr2 = ptr1 + (length/2) <- ptr3
+ sta ptr2
+ lda ptr1+1
+ adc ptr3+1
+ sta ptr2+1
+
+ txa ; restore fill value
ldx ptr3+1 ; Get high byte of n
beq L2 ; Jump if zero
-; Set 256 byte blocks
-
+; Set 256/512 byte blocks
+ ; y is still 0 here
L1: .repeat 2 ; Unroll this a bit to make it faster
- sta (ptr1),y ; Set one byte
- iny
+ sta (ptr1),y ; Set byte in lower section
+ sta (ptr2),y ; Set byte in upper section
+ iny
.endrepeat
bne L1
inc ptr1+1
+ inc ptr2+1
dex ; Next 256 byte block
bne L1 ; Repeat if any
; Set the remaining bytes if any
-L2: ldx ptr3 ; Get the low byte of n
- beq L9 ; Low byte is zero
-
-L3: sta (ptr1),y ; Set one byte
- iny
- dex ; Done?
- bne L3
-
-L9: jmp popax ; Pop ptr and return as result
+L2: ldy ptr3 ; Get the low byte of n
+ bne L3 ; something to set?
+ jmp popax ; no -> Pop ptr and return as result
+L3a: sta (ptr1),y ; set bytes in low
+ sta (ptr2),y ; and high section
+L3: dey
+ bne L3a
+ sta (ptr1),y ; Set remaining byte(s)
+ sta (ptr2),y
+ jmp popax ; Pop ptr and return as result
+