2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 * - contributed to gcc-3.4 on Sep 30, 2003
6 * - adapted for the Linux kernel on Oct 2, 2003
9 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
11 * SPDX-License-Identifier: GPL-2.0+
15 #include <linux/linkage.h>
16 #include <asm/assembler.h>
19 * U-Boot compatibility bit, define empty UNWIND() macro as, since we
20 * do not support stack unwinding and define CONFIG_AEABI to make all
21 * of the functions available without diverging from Linux code.
28 .macro ARM_DIV_BODY dividend, divisor, result, curbit
30 #if __LINUX_ARM_ARCH__ >= 5
33 clz \result, \dividend
34 sub \result, \curbit, \result
36 mov \divisor, \divisor, lsl \result
37 mov \curbit, \curbit, lsl \result
42 @ Initially shift the divisor left 3 bits if possible,
43 @ set curbit accordingly. This allows for curbit to be located
44 @ at the left end of each 4 bit nibbles in the division loop
45 @ to save one loop in most cases.
46 tst \divisor, #0xe0000000
47 moveq \divisor, \divisor, lsl #3
51 @ Unless the divisor is very big, shift it up in multiples of
52 @ four bits, since this is the amount of unwinding in the main
53 @ division loop. Continue shifting until the divisor is
54 @ larger than the dividend.
55 1: cmp \divisor, #0x10000000
56 cmplo \divisor, \dividend
57 movlo \divisor, \divisor, lsl #4
58 movlo \curbit, \curbit, lsl #4
61 @ For very big divisors, we must shift it a bit at a time, or
62 @ we will be in danger of overflowing.
63 1: cmp \divisor, #0x80000000
64 cmplo \divisor, \dividend
65 movlo \divisor, \divisor, lsl #1
66 movlo \curbit, \curbit, lsl #1
74 1: cmp \dividend, \divisor
75 subhs \dividend, \dividend, \divisor
76 orrhs \result, \result, \curbit
77 cmp \dividend, \divisor, lsr #1
78 subhs \dividend, \dividend, \divisor, lsr #1
79 orrhs \result, \result, \curbit, lsr #1
80 cmp \dividend, \divisor, lsr #2
81 subhs \dividend, \dividend, \divisor, lsr #2
82 orrhs \result, \result, \curbit, lsr #2
83 cmp \dividend, \divisor, lsr #3
84 subhs \dividend, \dividend, \divisor, lsr #3
85 orrhs \result, \result, \curbit, lsr #3
86 cmp \dividend, #0 @ Early termination?
87 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
88 movne \divisor, \divisor, lsr #4
94 .macro ARM_DIV2_ORDER divisor, order
96 #if __LINUX_ARM_ARCH__ >= 5
99 rsb \order, \order, #31
103 cmp \divisor, #(1 << 16)
104 movhs \divisor, \divisor, lsr #16
108 cmp \divisor, #(1 << 8)
109 movhs \divisor, \divisor, lsr #8
110 addhs \order, \order, #8
112 cmp \divisor, #(1 << 4)
113 movhs \divisor, \divisor, lsr #4
114 addhs \order, \order, #4
116 cmp \divisor, #(1 << 2)
117 addhi \order, \order, #3
118 addls \order, \order, \divisor, lsr #1
125 .macro ARM_MOD_BODY dividend, divisor, order, spare
127 #if __LINUX_ARM_ARCH__ >= 5
130 clz \spare, \dividend
131 sub \order, \order, \spare
132 mov \divisor, \divisor, lsl \order
138 @ Unless the divisor is very big, shift it up in multiples of
139 @ four bits, since this is the amount of unwinding in the main
140 @ division loop. Continue shifting until the divisor is
141 @ larger than the dividend.
142 1: cmp \divisor, #0x10000000
143 cmplo \divisor, \dividend
144 movlo \divisor, \divisor, lsl #4
145 addlo \order, \order, #4
148 @ For very big divisors, we must shift it a bit at a time, or
149 @ we will be in danger of overflowing.
150 1: cmp \divisor, #0x80000000
151 cmplo \divisor, \dividend
152 movlo \divisor, \divisor, lsl #1
153 addlo \order, \order, #1
158 @ Perform all needed subtractions to keep only the reminder.
159 @ Do comparisons in batch of 4 first.
160 subs \order, \order, #3 @ yes, 3 is intended here
163 1: cmp \dividend, \divisor
164 subhs \dividend, \dividend, \divisor
165 cmp \dividend, \divisor, lsr #1
166 subhs \dividend, \dividend, \divisor, lsr #1
167 cmp \dividend, \divisor, lsr #2
168 subhs \dividend, \dividend, \divisor, lsr #2
169 cmp \dividend, \divisor, lsr #3
170 subhs \dividend, \dividend, \divisor, lsr #3
172 mov \divisor, \divisor, lsr #4
173 subsge \order, \order, #4
180 @ Either 1, 2 or 3 comparison/subtractions are left.
184 cmp \dividend, \divisor
185 subhs \dividend, \dividend, \divisor
186 mov \divisor, \divisor, lsr #1
187 3: cmp \dividend, \divisor
188 subhs \dividend, \dividend, \divisor
189 mov \divisor, \divisor, lsr #1
190 4: cmp \dividend, \divisor
191 subhs \dividend, \dividend, \divisor
196 .pushsection .text.__udivsi3, "ax"
209 ARM_DIV_BODY r0, r1, r2, r3
218 12: ARM_DIV2_ORDER r1, r2
225 ENDPROC(__aeabi_uidiv)
228 .pushsection .text.__umodsi3, "ax"
232 subs r2, r1, #1 @ compare divisor with 1
234 cmpne r0, r1 @ compare dividend with divisor
236 tsthi r1, r2 @ see if divisor is power of 2
240 ARM_MOD_BODY r0, r1, r2, r3
248 .pushsection .text.__divsi3, "ax"
254 eor ip, r0, r1 @ save the sign of the result.
256 rsbmi r1, r1, #0 @ loops below use unsigned.
257 subs r2, r1, #1 @ division by 1 or -1 ?
260 rsbmi r3, r0, #0 @ positive dividend value
263 tst r1, r2 @ divisor is power of 2 ?
266 ARM_DIV_BODY r3, r1, r0, r2
272 10: teq ip, r0 @ same sign ?
277 moveq r0, ip, asr #31
281 12: ARM_DIV2_ORDER r1, r2
290 ENDPROC(__aeabi_idiv)
293 .pushsection .text.__modsi3, "ax"
299 rsbmi r1, r1, #0 @ loops below use unsigned.
300 movs ip, r0 @ preserve sign of dividend
301 rsbmi r0, r0, #0 @ if negative make positive
302 subs r2, r1, #1 @ compare divisor with 1
303 cmpne r0, r1 @ compare dividend with divisor
305 tsthi r1, r2 @ see if divisor is power of 2
309 ARM_MOD_BODY r0, r1, r2, r3
321 .pushsection .text.__aeabi_uidivmod, "ax"
322 ENTRY(__aeabi_uidivmod)
324 UNWIND(.save {r0, r1, ip, lr} )
326 stmfd sp!, {r0, r1, ip, lr}
328 ldmfd sp!, {r1, r2, ip, lr}
334 ENDPROC(__aeabi_uidivmod)
337 .pushsection .text.__aeabi_uidivmod, "ax"
338 ENTRY(__aeabi_idivmod)
340 UNWIND(.save {r0, r1, ip, lr} )
342 stmfd sp!, {r0, r1, ip, lr}
344 ldmfd sp!, {r1, r2, ip, lr}
350 ENDPROC(__aeabi_idivmod)
355 .pushsection .text.Ldiv0, "ax"
363 mov r0, #0 @ About as wrong as it could be.
370 /* Thumb-1 specialities */
371 #if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
372 .pushsection .text.__gnu_thumb1_case_sqi, "ax"
373 ENTRY(__gnu_thumb1_case_sqi)
383 ENDPROC(__gnu_thumb1_case_sqi)
386 .pushsection .text.__gnu_thumb1_case_uqi, "ax"
387 ENTRY(__gnu_thumb1_case_uqi)
397 ENDPROC(__gnu_thumb1_case_uqi)
400 .pushsection .text.__gnu_thumb1_case_shi, "ax"
401 ENTRY(__gnu_thumb1_case_shi)
412 ENDPROC(__gnu_thumb1_case_shi)
415 .pushsection .text.__gnu_thumb1_case_uhi, "ax"
416 ENTRY(__gnu_thumb1_case_uhi)
427 ENDPROC(__gnu_thumb1_case_uhi)