# SPDX-License-Identifier: GPL-2.0+
#
-lib-$(CONFIG_USE_PRIVATE_LIBGCC) += _ashldi3.o _ashrdi3.o _divsi3.o \
- _lshrdi3.o _modsi3.o _udivsi3.o _umodsi3.o div0.o \
- _uldivmod.o
+lib-$(CONFIG_USE_PRIVATE_LIBGCC) += ashldi3.o ashrdi3.o divsi3.o \
+ lshrdi3.o modsi3.o udivsi3.o umodsi3.o div0.o \
+ uldivmod.o
ifdef CONFIG_CPU_V7M
obj-y += vectors_m.o crt0.o
+++ /dev/null
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
- Free Software Foundation, Inc.
-
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-#include <linux/linkage.h>
-
-#ifdef __ARMEB__
-#define al r1
-#define ah r0
-#else
-#define al r0
-#define ah r1
-#endif
-
-.globl __ashldi3
-__ashldi3:
-ENTRY(__aeabi_llsl)
-
- subs r3, r2, #32
- rsb ip, r2, #32
- movmi ah, ah, lsl r2
- movpl ah, al, lsl r3
- orrmi ah, ah, al, lsr ip
- mov al, al, lsl r2
- mov pc, lr
-ENDPROC(__aeabi_llsl)
+++ /dev/null
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
- Free Software Foundation, Inc.
-
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-#include <linux/linkage.h>
-
-#ifdef __ARMEB__
-#define al r1
-#define ah r0
-#else
-#define al r0
-#define ah r1
-#endif
-
-.globl __ashrdi3
-__ashrdi3:
-ENTRY(__aeabi_lasr)
-
- subs r3, r2, #32
- rsb ip, r2, #32
- movmi al, al, lsr r2
- movpl al, ah, asr r3
- orrmi al, al, ah, lsl ip
- mov ah, ah, asr r2
- mov pc, lr
-ENDPROC(__aeabi_lasr)
+++ /dev/null
-#include <linux/linkage.h>
-
-.macro ARM_DIV_BODY dividend, divisor, result, curbit
-
-#if __LINUX_ARM_ARCH__ >= 5
-
- clz \curbit, \divisor
- clz \result, \dividend
- sub \result, \curbit, \result
- mov \curbit, #1
- mov \divisor, \divisor, lsl \result
- mov \curbit, \curbit, lsl \result
- mov \result, #0
-
-#else
-
- @ Initially shift the divisor left 3 bits if possible,
- @ set curbit accordingly. This allows for curbit to be located
- @ at the left end of each 4 bit nibbles in the division loop
- @ to save one loop in most cases.
- tst \divisor, #0xe0000000
- moveq \divisor, \divisor, lsl #3
- moveq \curbit, #8
- movne \curbit, #1
-
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
-1: cmp \divisor, #0x10000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #4
- movlo \curbit, \curbit, lsl #4
- blo 1b
-
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
-1: cmp \divisor, #0x80000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #1
- movlo \curbit, \curbit, lsl #1
- blo 1b
-
- mov \result, #0
-
-#endif
-
- @ Division loop
-1: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- orrhs \result, \result, \curbit
- cmp \dividend, \divisor, lsr #1
- subhs \dividend, \dividend, \divisor, lsr #1
- orrhs \result, \result, \curbit, lsr #1
- cmp \dividend, \divisor, lsr #2
- subhs \dividend, \dividend, \divisor, lsr #2
- orrhs \result, \result, \curbit, lsr #2
- cmp \dividend, \divisor, lsr #3
- subhs \dividend, \dividend, \divisor, lsr #3
- orrhs \result, \result, \curbit, lsr #3
- cmp \dividend, #0 @ Early termination?
- movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
- movne \divisor, \divisor, lsr #4
- bne 1b
-
-.endm
-
-.macro ARM_DIV2_ORDER divisor, order
-
-#if __LINUX_ARM_ARCH__ >= 5
-
- clz \order, \divisor
- rsb \order, \order, #31
-
-#else
-
- cmp \divisor, #(1 << 16)
- movhs \divisor, \divisor, lsr #16
- movhs \order, #16
- movlo \order, #0
-
- cmp \divisor, #(1 << 8)
- movhs \divisor, \divisor, lsr #8
- addhs \order, \order, #8
-
- cmp \divisor, #(1 << 4)
- movhs \divisor, \divisor, lsr #4
- addhs \order, \order, #4
-
- cmp \divisor, #(1 << 2)
- addhi \order, \order, #3
- addls \order, \order, \divisor, lsr #1
-
-#endif
-
-.endm
-
- .align 5
-.globl __divsi3
-__divsi3:
-ENTRY(__aeabi_idiv)
- cmp r1, #0
- eor ip, r0, r1 @ save the sign of the result.
- beq Ldiv0
- rsbmi r1, r1, #0 @ loops below use unsigned.
- subs r2, r1, #1 @ division by 1 or -1 ?
- beq 10f
- movs r3, r0
- rsbmi r3, r0, #0 @ positive dividend value
- cmp r3, r1
- bls 11f
- tst r1, r2 @ divisor is power of 2 ?
- beq 12f
-
- ARM_DIV_BODY r3, r1, r0, r2
-
- cmp ip, #0
- rsbmi r0, r0, #0
- mov pc, lr
-
-10: teq ip, r0 @ same sign ?
- rsbmi r0, r0, #0
- mov pc, lr
-
-11: movlo r0, #0
- moveq r0, ip, asr #31
- orreq r0, r0, #1
- mov pc, lr
-
-12: ARM_DIV2_ORDER r1, r2
-
- cmp ip, #0
- mov r0, r3, lsr r2
- rsbmi r0, r0, #0
- mov pc, lr
-
-Ldiv0:
-
- str lr, [sp, #-4]!
- bl __div0
- mov r0, #0 @ About as wrong as it could be.
- ldr pc, [sp], #4
-ENDPROC(__aeabi_idiv)
+++ /dev/null
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
- Free Software Foundation, Inc.
-
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-#include <linux/linkage.h>
-
-#ifdef __ARMEB__
-#define al r1
-#define ah r0
-#else
-#define al r0
-#define ah r1
-#endif
-
-.globl __lshrdi3
-__lshrdi3:
-ENTRY(__aeabi_llsr)
-
- subs r3, r2, #32
- rsb ip, r2, #32
- movmi al, al, lsr r2
- movpl al, ah, lsr r3
- orrmi al, al, ah, lsl ip
- mov ah, ah, lsr r2
- mov pc, lr
-ENDPROC(__aeabi_llsr)
+++ /dev/null
-#include <linux/linkage.h>
-
-.macro ARM_MOD_BODY dividend, divisor, order, spare
-
-#if __LINUX_ARM_ARCH__ >= 5
-
- clz \order, \divisor
- clz \spare, \dividend
- sub \order, \order, \spare
- mov \divisor, \divisor, lsl \order
-
-#else
-
- mov \order, #0
-
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
-1: cmp \divisor, #0x10000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #4
- addlo \order, \order, #4
- blo 1b
-
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
-1: cmp \divisor, #0x80000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #1
- addlo \order, \order, #1
- blo 1b
-
-#endif
-
- @ Perform all needed substractions to keep only the reminder.
- @ Do comparisons in batch of 4 first.
- subs \order, \order, #3 @ yes, 3 is intended here
- blt 2f
-
-1: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- cmp \dividend, \divisor, lsr #1
- subhs \dividend, \dividend, \divisor, lsr #1
- cmp \dividend, \divisor, lsr #2
- subhs \dividend, \dividend, \divisor, lsr #2
- cmp \dividend, \divisor, lsr #3
- subhs \dividend, \dividend, \divisor, lsr #3
- cmp \dividend, #1
- mov \divisor, \divisor, lsr #4
- subges \order, \order, #4
- bge 1b
-
- tst \order, #3
- teqne \dividend, #0
- beq 5f
-
- @ Either 1, 2 or 3 comparison/substractions are left.
-2: cmn \order, #2
- blt 4f
- beq 3f
- cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- mov \divisor, \divisor, lsr #1
-3: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- mov \divisor, \divisor, lsr #1
-4: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
-5:
-.endm
-
- .align 5
-ENTRY(__modsi3)
- cmp r1, #0
- beq Ldiv0
- rsbmi r1, r1, #0 @ loops below use unsigned.
- movs ip, r0 @ preserve sign of dividend
- rsbmi r0, r0, #0 @ if negative make positive
- subs r2, r1, #1 @ compare divisor with 1
- cmpne r0, r1 @ compare dividend with divisor
- moveq r0, #0
- tsthi r1, r2 @ see if divisor is power of 2
- andeq r0, r0, r2
- bls 10f
-
- ARM_MOD_BODY r0, r1, r2, r3
-
-10: cmp ip, #0
- rsbmi r0, r0, #0
- mov pc, lr
-ENDPROC(__modsi3)
-
-Ldiv0:
-
- str lr, [sp, #-4]!
- bl __div0
- mov r0, #0 @ About as wrong as it could be.
- ldr pc, [sp], #4
+++ /dev/null
-#include <linux/linkage.h>
-
-/* # 1 "libgcc1.S" */
-@ libgcc1 routines for ARM cpu.
-@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
-dividend .req r0
-divisor .req r1
-result .req r2
-curbit .req r3
-/* ip .req r12 */
-/* sp .req r13 */
-/* lr .req r14 */
-/* pc .req r15 */
- .text
- .globl __udivsi3
- .type __udivsi3 ,function
- .globl __aeabi_uidiv
- .type __aeabi_uidiv ,function
- .align 0
- __udivsi3:
- __aeabi_uidiv:
- cmp divisor, #0
- beq Ldiv0
- mov curbit, #1
- mov result, #0
- cmp dividend, divisor
- bcc Lgot_result
-Loop1:
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
- cmp divisor, #0x10000000
- cmpcc divisor, dividend
- movcc divisor, divisor, lsl #4
- movcc curbit, curbit, lsl #4
- bcc Loop1
-Lbignum:
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
- cmp divisor, #0x80000000
- cmpcc divisor, dividend
- movcc divisor, divisor, lsl #1
- movcc curbit, curbit, lsl #1
- bcc Lbignum
-Loop3:
- @ Test for possible subtractions, and note which bits
- @ are done in the result. On the final pass, this may subtract
- @ too much from the dividend, but the result will be ok, since the
- @ "bit" will have been shifted out at the bottom.
- cmp dividend, divisor
- subcs dividend, dividend, divisor
- orrcs result, result, curbit
- cmp dividend, divisor, lsr #1
- subcs dividend, dividend, divisor, lsr #1
- orrcs result, result, curbit, lsr #1
- cmp dividend, divisor, lsr #2
- subcs dividend, dividend, divisor, lsr #2
- orrcs result, result, curbit, lsr #2
- cmp dividend, divisor, lsr #3
- subcs dividend, dividend, divisor, lsr #3
- orrcs result, result, curbit, lsr #3
- cmp dividend, #0 @ Early termination?
- movnes curbit, curbit, lsr #4 @ No, any more bits to do?
- movne divisor, divisor, lsr #4
- bne Loop3
-Lgot_result:
- mov r0, result
- mov pc, lr
-Ldiv0:
- str lr, [sp, #-4]!
- bl __div0 (PLT)
- mov r0, #0 @ about as wrong as it could be
- ldmia sp!, {pc}
- .size __udivsi3 , . - __udivsi3
-
-ENTRY(__aeabi_uidivmod)
-
- stmfd sp!, {r0, r1, ip, lr}
- bl __aeabi_uidiv
- ldmfd sp!, {r1, r2, ip, lr}
- mul r3, r0, r2
- sub r1, r1, r3
- mov pc, lr
-ENDPROC(__aeabi_uidivmod)
-
-ENTRY(__aeabi_idivmod)
-
- stmfd sp!, {r0, r1, ip, lr}
- bl __aeabi_idiv
- ldmfd sp!, {r1, r2, ip, lr}
- mul r3, r0, r2
- sub r1, r1, r3
- mov pc, lr
-ENDPROC(__aeabi_idivmod)
+++ /dev/null
-/*
- * Copyright 2010, Google Inc.
- *
- * Brought in from coreboot uldivmod.S
- *
- * SPDX-License-Identifier: GPL-2.0
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/* We don't use Thumb instructions for now */
-#define ARM(x...) x
-#define THUMB(x...)
-
-/*
- * A, Q = r0 + (r1 << 32)
- * B, R = r2 + (r3 << 32)
- * A / B = Q ... R
- */
-
-A_0 .req r0
-A_1 .req r1
-B_0 .req r2
-B_1 .req r3
-C_0 .req r4
-C_1 .req r5
-D_0 .req r6
-D_1 .req r7
-
-Q_0 .req r0
-Q_1 .req r1
-R_0 .req r2
-R_1 .req r3
-
-THUMB(
-TMP .req r8
-)
-
-ENTRY(__aeabi_uldivmod)
- stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr}
- @ Test if B == 0
- orrs ip, B_0, B_1 @ Z set -> B == 0
- beq L_div_by_0
- @ Test if B is power of 2: (B & (B - 1)) == 0
- subs C_0, B_0, #1
- sbc C_1, B_1, #0
- tst C_0, B_0
- tsteq B_1, C_1
- beq L_pow2
- @ Test if A_1 == B_1 == 0
- orrs ip, A_1, B_1
- beq L_div_32_32
-
-L_div_64_64:
-/* CLZ only exists in ARM architecture version 5 and above. */
-#ifdef HAVE_CLZ
- mov C_0, #1
- mov C_1, #0
- @ D_0 = clz A
- teq A_1, #0
- clz D_0, A_1
- clzeq ip, A_0
- addeq D_0, D_0, ip
- @ D_1 = clz B
- teq B_1, #0
- clz D_1, B_1
- clzeq ip, B_0
- addeq D_1, D_1, ip
- @ if clz B - clz A > 0
- subs D_0, D_1, D_0
- bls L_done_shift
- @ B <<= (clz B - clz A)
- subs D_1, D_0, #32
- rsb ip, D_0, #32
- movmi B_1, B_1, lsl D_0
-ARM( orrmi B_1, B_1, B_0, lsr ip )
-THUMB( lsrmi TMP, B_0, ip )
-THUMB( orrmi B_1, B_1, TMP )
- movpl B_1, B_0, lsl D_1
- mov B_0, B_0, lsl D_0
- @ C = 1 << (clz B - clz A)
- movmi C_1, C_1, lsl D_0
-ARM( orrmi C_1, C_1, C_0, lsr ip )
-THUMB( lsrmi TMP, C_0, ip )
-THUMB( orrmi C_1, C_1, TMP )
- movpl C_1, C_0, lsl D_1
- mov C_0, C_0, lsl D_0
-L_done_shift:
- mov D_0, #0
- mov D_1, #0
- @ C: current bit; D: result
-#else
- @ C: current bit; D: result
- mov C_0, #1
- mov C_1, #0
- mov D_0, #0
- mov D_1, #0
-L_lsl_4:
- cmp B_1, #0x10000000
- cmpcc B_1, A_1
- cmpeq B_0, A_0
- bcs L_lsl_1
- @ B <<= 4
- mov B_1, B_1, lsl #4
- orr B_1, B_1, B_0, lsr #28
- mov B_0, B_0, lsl #4
- @ C <<= 4
- mov C_1, C_1, lsl #4
- orr C_1, C_1, C_0, lsr #28
- mov C_0, C_0, lsl #4
- b L_lsl_4
-L_lsl_1:
- cmp B_1, #0x80000000
- cmpcc B_1, A_1
- cmpeq B_0, A_0
- bcs L_subtract
- @ B <<= 1
- mov B_1, B_1, lsl #1
- orr B_1, B_1, B_0, lsr #31
- mov B_0, B_0, lsl #1
- @ C <<= 1
- mov C_1, C_1, lsl #1
- orr C_1, C_1, C_0, lsr #31
- mov C_0, C_0, lsl #1
- b L_lsl_1
-#endif
-L_subtract:
- @ if A >= B
- cmp A_1, B_1
- cmpeq A_0, B_0
- bcc L_update
- @ A -= B
- subs A_0, A_0, B_0
- sbc A_1, A_1, B_1
- @ D |= C
- orr D_0, D_0, C_0
- orr D_1, D_1, C_1
-L_update:
- @ if A == 0: break
- orrs ip, A_1, A_0
- beq L_exit
- @ C >>= 1
- movs C_1, C_1, lsr #1
- movs C_0, C_0, rrx
- @ if C == 0: break
- orrs ip, C_1, C_0
- beq L_exit
- @ B >>= 1
- movs B_1, B_1, lsr #1
- mov B_0, B_0, rrx
- b L_subtract
-L_exit:
- @ Note: A, B & Q, R are aliases
- mov R_0, A_0
- mov R_1, A_1
- mov Q_0, D_0
- mov Q_1, D_1
- ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
-
-L_div_32_32:
- @ Note: A_0 & r0 are aliases
- @ Q_1 r1
- mov r1, B_0
- bl __aeabi_uidivmod
- mov R_0, r1
- mov R_1, #0
- mov Q_1, #0
- ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
-
-L_pow2:
-#ifdef HAVE_CLZ
- @ Note: A, B and Q, R are aliases
- @ R = A & (B - 1)
- and C_0, A_0, C_0
- and C_1, A_1, C_1
- @ Q = A >> log2(B)
- @ Note: B must not be 0 here!
- clz D_0, B_0
- add D_1, D_0, #1
- rsbs D_0, D_0, #31
- bpl L_1
- clz D_0, B_1
- rsb D_0, D_0, #31
- mov A_0, A_1, lsr D_0
- add D_0, D_0, #32
-L_1:
- movpl A_0, A_0, lsr D_0
-ARM( orrpl A_0, A_0, A_1, lsl D_1 )
-THUMB( lslpl TMP, A_1, D_1 )
-THUMB( orrpl A_0, A_0, TMP )
- mov A_1, A_1, lsr D_0
- @ Mov back C to R
- mov R_0, C_0
- mov R_1, C_1
- ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
-#else
- @ Note: A, B and Q, R are aliases
- @ R = A & (B - 1)
- and C_0, A_0, C_0
- and C_1, A_1, C_1
- @ Q = A >> log2(B)
- @ Note: B must not be 0 here!
- @ Count the leading zeroes in B.
- mov D_0, #0
- orrs B_0, B_0, B_0
- @ If B is greater than 1 << 31, divide A and B by 1 << 32.
- moveq A_0, A_1
- moveq A_1, #0
- moveq B_0, B_1
- @ Count the remaining leading zeroes in B.
- movs B_1, B_0, lsl #16
- addeq D_0, #16
- moveq B_0, B_0, lsr #16
- tst B_0, #0xff
- addeq D_0, #8
- moveq B_0, B_0, lsr #8
- tst B_0, #0xf
- addeq D_0, #4
- moveq B_0, B_0, lsr #4
- tst B_0, #0x3
- addeq D_0, #2
- moveq B_0, B_0, lsr #2
- tst B_0, #0x1
- addeq D_0, #1
- @ Shift A to the right by the appropriate amount.
- rsb D_1, D_0, #32
- mov Q_0, A_0, lsr D_0
- orr Q_0, A_1, lsl D_1
- mov Q_1, A_1, lsr D_0
- @ Move C to R
- mov R_0, C_0
- mov R_1, C_1
- ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
-#endif
-
-L_div_by_0:
- bl __div0
- @ As wrong as it could be
- mov Q_0, #0
- mov Q_1, #0
- mov R_0, #0
- mov R_1, #0
- ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
-ENDPROC(__aeabi_uldivmod)
+++ /dev/null
-#include <linux/linkage.h>
-
-/* # 1 "libgcc1.S" */
-@ libgcc1 routines for ARM cpu.
-@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
-/* # 145 "libgcc1.S" */
-dividend .req r0
-divisor .req r1
-overdone .req r2
-curbit .req r3
-/* ip .req r12 */
-/* sp .req r13 */
-/* lr .req r14 */
-/* pc .req r15 */
- .text
- .type __umodsi3 ,function
- .align 0
- ENTRY(__umodsi3)
- cmp divisor, #0
- beq Ldiv0
- mov curbit, #1
- cmp dividend, divisor
- movcc pc, lr
-Loop1:
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
- cmp divisor, #0x10000000
- cmpcc divisor, dividend
- movcc divisor, divisor, lsl #4
- movcc curbit, curbit, lsl #4
- bcc Loop1
-Lbignum:
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
- cmp divisor, #0x80000000
- cmpcc divisor, dividend
- movcc divisor, divisor, lsl #1
- movcc curbit, curbit, lsl #1
- bcc Lbignum
-Loop3:
- @ Test for possible subtractions. On the final pass, this may
- @ subtract too much from the dividend, so keep track of which
- @ subtractions are done, we can fix them up afterwards...
- mov overdone, #0
- cmp dividend, divisor
- subcs dividend, dividend, divisor
- cmp dividend, divisor, lsr #1
- subcs dividend, dividend, divisor, lsr #1
- orrcs overdone, overdone, curbit, ror #1
- cmp dividend, divisor, lsr #2
- subcs dividend, dividend, divisor, lsr #2
- orrcs overdone, overdone, curbit, ror #2
- cmp dividend, divisor, lsr #3
- subcs dividend, dividend, divisor, lsr #3
- orrcs overdone, overdone, curbit, ror #3
- mov ip, curbit
- cmp dividend, #0 @ Early termination?
- movnes curbit, curbit, lsr #4 @ No, any more bits to do?
- movne divisor, divisor, lsr #4
- bne Loop3
- @ Any subtractions that we should not have done will be recorded in
- @ the top three bits of "overdone". Exactly which were not needed
- @ are governed by the position of the bit, stored in ip.
- @ If we terminated early, because dividend became zero,
- @ then none of the below will match, since the bit in ip will not be
- @ in the bottom nibble.
- ands overdone, overdone, #0xe0000000
- moveq pc, lr @ No fixups needed
- tst overdone, ip, ror #3
- addne dividend, dividend, divisor, lsr #3
- tst overdone, ip, ror #2
- addne dividend, dividend, divisor, lsr #2
- tst overdone, ip, ror #1
- addne dividend, dividend, divisor, lsr #1
- mov pc, lr
-Ldiv0:
- str lr, [sp, #-4]!
- bl __div0 (PLT)
- mov r0, #0 @ about as wrong as it could be
- ldmia sp!, {pc}
- .size __umodsi3 , . - __umodsi3
-/* # 320 "libgcc1.S" */
-/* # 421 "libgcc1.S" */
-/* # 433 "libgcc1.S" */
-/* # 456 "libgcc1.S" */
-/* # 500 "libgcc1.S" */
-/* # 580 "libgcc1.S" */
-ENDPROC(__umodsi3)
--- /dev/null
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+ Free Software Foundation, Inc.
+
+ * SPDX-License-Identifier: GPL-2.0+
+ */
+
+#include <linux/linkage.h>
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+.globl __ashldi3
+__ashldi3:
+ENTRY(__aeabi_llsl)
+
+ subs r3, r2, #32
+ rsb ip, r2, #32
+ movmi ah, ah, lsl r2
+ movpl ah, al, lsl r3
+ orrmi ah, ah, al, lsr ip
+ mov al, al, lsl r2
+ mov pc, lr
+ENDPROC(__aeabi_llsl)
--- /dev/null
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+ Free Software Foundation, Inc.
+
+ * SPDX-License-Identifier: GPL-2.0+
+ */
+
+#include <linux/linkage.h>
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+.globl __ashrdi3
+__ashrdi3:
+ENTRY(__aeabi_lasr)
+
+ subs r3, r2, #32
+ rsb ip, r2, #32
+ movmi al, al, lsr r2
+ movpl al, ah, asr r3
+ orrmi al, al, ah, lsl ip
+ mov ah, ah, asr r2
+ mov pc, lr
+ENDPROC(__aeabi_lasr)
--- /dev/null
+#include <linux/linkage.h>
+
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz \curbit, \divisor
+ clz \result, \dividend
+ sub \result, \curbit, \result
+ mov \curbit, #1
+ mov \divisor, \divisor, lsl \result
+ mov \curbit, \curbit, lsl \result
+ mov \result, #0
+
+#else
+
+ @ Initially shift the divisor left 3 bits if possible,
+ @ set curbit accordingly. This allows for curbit to be located
+ @ at the left end of each 4 bit nibbles in the division loop
+ @ to save one loop in most cases.
+ tst \divisor, #0xe0000000
+ moveq \divisor, \divisor, lsl #3
+ moveq \curbit, #8
+ movne \curbit, #1
+
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ movlo \curbit, \curbit, lsl #4
+ blo 1b
+
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ movlo \curbit, \curbit, lsl #1
+ blo 1b
+
+ mov \result, #0
+
+#endif
+
+ @ Division loop
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ orrhs \result, \result, \curbit
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ orrhs \result, \result, \curbit, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ orrhs \result, \result, \curbit, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ orrhs \result, \result, \curbit, lsr #3
+ cmp \dividend, #0 @ Early termination?
+ movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
+ movne \divisor, \divisor, lsr #4
+ bne 1b
+
+.endm
+
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ rsb \order, \order, #31
+
+#else
+
+ cmp \divisor, #(1 << 16)
+ movhs \divisor, \divisor, lsr #16
+ movhs \order, #16
+ movlo \order, #0
+
+ cmp \divisor, #(1 << 8)
+ movhs \divisor, \divisor, lsr #8
+ addhs \order, \order, #8
+
+ cmp \divisor, #(1 << 4)
+ movhs \divisor, \divisor, lsr #4
+ addhs \order, \order, #4
+
+ cmp \divisor, #(1 << 2)
+ addhi \order, \order, #3
+ addls \order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+
+ .align 5
+.globl __divsi3
+__divsi3:
+ENTRY(__aeabi_idiv)
+ cmp r1, #0
+ eor ip, r0, r1 @ save the sign of the result.
+ beq Ldiv0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ subs r2, r1, #1 @ division by 1 or -1 ?
+ beq 10f
+ movs r3, r0
+ rsbmi r3, r0, #0 @ positive dividend value
+ cmp r3, r1
+ bls 11f
+ tst r1, r2 @ divisor is power of 2 ?
+ beq 12f
+
+ ARM_DIV_BODY r3, r1, r0, r2
+
+ cmp ip, #0
+ rsbmi r0, r0, #0
+ mov pc, lr
+
+10: teq ip, r0 @ same sign ?
+ rsbmi r0, r0, #0
+ mov pc, lr
+
+11: movlo r0, #0
+ moveq r0, ip, asr #31
+ orreq r0, r0, #1
+ mov pc, lr
+
+12: ARM_DIV2_ORDER r1, r2
+
+ cmp ip, #0
+ mov r0, r3, lsr r2
+ rsbmi r0, r0, #0
+ mov pc, lr
+
+Ldiv0:
+
+ str lr, [sp, #-4]!
+ bl __div0
+ mov r0, #0 @ About as wrong as it could be.
+ ldr pc, [sp], #4
+ENDPROC(__aeabi_idiv)
--- /dev/null
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+ Free Software Foundation, Inc.
+
+ * SPDX-License-Identifier: GPL-2.0+
+ */
+
+#include <linux/linkage.h>
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+.globl __lshrdi3
+__lshrdi3:
+ENTRY(__aeabi_llsr)
+
+ subs r3, r2, #32
+ rsb ip, r2, #32
+ movmi al, al, lsr r2
+ movpl al, ah, lsr r3
+ orrmi al, al, ah, lsl ip
+ mov ah, ah, lsr r2
+ mov pc, lr
+ENDPROC(__aeabi_llsr)
--- /dev/null
+#include <linux/linkage.h>
+
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ clz \spare, \dividend
+ sub \order, \order, \spare
+ mov \divisor, \divisor, lsl \order
+
+#else
+
+ mov \order, #0
+
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ addlo \order, \order, #4
+ blo 1b
+
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ addlo \order, \order, #1
+ blo 1b
+
+#endif
+
+ @ Perform all needed substractions to keep only the reminder.
+ @ Do comparisons in batch of 4 first.
+ subs \order, \order, #3 @ yes, 3 is intended here
+ blt 2f
+
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ cmp \dividend, #1
+ mov \divisor, \divisor, lsr #4
+ subges \order, \order, #4
+ bge 1b
+
+ tst \order, #3
+ teqne \dividend, #0
+ beq 5f
+
+ @ Either 1, 2 or 3 comparison/substractions are left.
+2: cmn \order, #2
+ blt 4f
+ beq 3f
+ cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+3: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+4: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+5:
+.endm
+
+ .align 5
+ENTRY(__modsi3)
+ cmp r1, #0
+ beq Ldiv0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ movs ip, r0 @ preserve sign of dividend
+ rsbmi r0, r0, #0 @ if negative make positive
+ subs r2, r1, #1 @ compare divisor with 1
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ bls 10f
+
+ ARM_MOD_BODY r0, r1, r2, r3
+
+10: cmp ip, #0
+ rsbmi r0, r0, #0
+ mov pc, lr
+ENDPROC(__modsi3)
+
+Ldiv0:
+
+ str lr, [sp, #-4]!
+ bl __div0
+ mov r0, #0 @ About as wrong as it could be.
+ ldr pc, [sp], #4
--- /dev/null
+#include <linux/linkage.h>
+
+/* # 1 "libgcc1.S" */
+@ libgcc1 routines for ARM cpu.
+@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+dividend .req r0
+divisor .req r1
+result .req r2
+curbit .req r3
+/* ip .req r12 */
+/* sp .req r13 */
+/* lr .req r14 */
+/* pc .req r15 */
+ .text
+ .globl __udivsi3
+ .type __udivsi3 ,function
+ .globl __aeabi_uidiv
+ .type __aeabi_uidiv ,function
+ .align 0
+ __udivsi3:
+ __aeabi_uidiv:
+ cmp divisor, #0
+ beq Ldiv0
+ mov curbit, #1
+ mov result, #0
+ cmp dividend, divisor
+ bcc Lgot_result
+Loop1:
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+ cmp divisor, #0x10000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #4
+ movcc curbit, curbit, lsl #4
+ bcc Loop1
+Lbignum:
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+ cmp divisor, #0x80000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #1
+ movcc curbit, curbit, lsl #1
+ bcc Lbignum
+Loop3:
+ @ Test for possible subtractions, and note which bits
+ @ are done in the result. On the final pass, this may subtract
+ @ too much from the dividend, but the result will be ok, since the
+ @ "bit" will have been shifted out at the bottom.
+ cmp dividend, divisor
+ subcs dividend, dividend, divisor
+ orrcs result, result, curbit
+ cmp dividend, divisor, lsr #1
+ subcs dividend, dividend, divisor, lsr #1
+ orrcs result, result, curbit, lsr #1
+ cmp dividend, divisor, lsr #2
+ subcs dividend, dividend, divisor, lsr #2
+ orrcs result, result, curbit, lsr #2
+ cmp dividend, divisor, lsr #3
+ subcs dividend, dividend, divisor, lsr #3
+ orrcs result, result, curbit, lsr #3
+ cmp dividend, #0 @ Early termination?
+ movnes curbit, curbit, lsr #4 @ No, any more bits to do?
+ movne divisor, divisor, lsr #4
+ bne Loop3
+Lgot_result:
+ mov r0, result
+ mov pc, lr
+Ldiv0:
+ str lr, [sp, #-4]!
+ bl __div0 (PLT)
+ mov r0, #0 @ about as wrong as it could be
+ ldmia sp!, {pc}
+ .size __udivsi3 , . - __udivsi3
+
+ENTRY(__aeabi_uidivmod)
+
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_uidiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ mov pc, lr
+ENDPROC(__aeabi_uidivmod)
+
+ENTRY(__aeabi_idivmod)
+
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_idiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ mov pc, lr
+ENDPROC(__aeabi_idivmod)
--- /dev/null
+/*
+ * Copyright 2010, Google Inc.
+ *
+ * Brought in from coreboot uldivmod.S
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/* We don't use Thumb instructions for now */
+#define ARM(x...) x
+#define THUMB(x...)
+
+/*
+ * A, Q = r0 + (r1 << 32)
+ * B, R = r2 + (r3 << 32)
+ * A / B = Q ... R
+ */
+
+A_0 .req r0
+A_1 .req r1
+B_0 .req r2
+B_1 .req r3
+C_0 .req r4
+C_1 .req r5
+D_0 .req r6
+D_1 .req r7
+
+Q_0 .req r0
+Q_1 .req r1
+R_0 .req r2
+R_1 .req r3
+
+THUMB(
+TMP .req r8
+)
+
+ENTRY(__aeabi_uldivmod)
+ stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr}
+ @ Test if B == 0
+ orrs ip, B_0, B_1 @ Z set -> B == 0
+ beq L_div_by_0
+ @ Test if B is power of 2: (B & (B - 1)) == 0
+ subs C_0, B_0, #1
+ sbc C_1, B_1, #0
+ tst C_0, B_0
+ tsteq B_1, C_1
+ beq L_pow2
+ @ Test if A_1 == B_1 == 0
+ orrs ip, A_1, B_1
+ beq L_div_32_32
+
+L_div_64_64:
+/* CLZ only exists in ARM architecture version 5 and above. */
+#ifdef HAVE_CLZ
+ mov C_0, #1
+ mov C_1, #0
+ @ D_0 = clz A
+ teq A_1, #0
+ clz D_0, A_1
+ clzeq ip, A_0
+ addeq D_0, D_0, ip
+ @ D_1 = clz B
+ teq B_1, #0
+ clz D_1, B_1
+ clzeq ip, B_0
+ addeq D_1, D_1, ip
+ @ if clz B - clz A > 0
+ subs D_0, D_1, D_0
+ bls L_done_shift
+ @ B <<= (clz B - clz A)
+ subs D_1, D_0, #32
+ rsb ip, D_0, #32
+ movmi B_1, B_1, lsl D_0
+ARM( orrmi B_1, B_1, B_0, lsr ip )
+THUMB( lsrmi TMP, B_0, ip )
+THUMB( orrmi B_1, B_1, TMP )
+ movpl B_1, B_0, lsl D_1
+ mov B_0, B_0, lsl D_0
+ @ C = 1 << (clz B - clz A)
+ movmi C_1, C_1, lsl D_0
+ARM( orrmi C_1, C_1, C_0, lsr ip )
+THUMB( lsrmi TMP, C_0, ip )
+THUMB( orrmi C_1, C_1, TMP )
+ movpl C_1, C_0, lsl D_1
+ mov C_0, C_0, lsl D_0
+L_done_shift:
+ mov D_0, #0
+ mov D_1, #0
+ @ C: current bit; D: result
+#else
+ @ C: current bit; D: result
+ mov C_0, #1
+ mov C_1, #0
+ mov D_0, #0
+ mov D_1, #0
+L_lsl_4:
+ cmp B_1, #0x10000000
+ cmpcc B_1, A_1
+ cmpeq B_0, A_0
+ bcs L_lsl_1
+ @ B <<= 4
+ mov B_1, B_1, lsl #4
+ orr B_1, B_1, B_0, lsr #28
+ mov B_0, B_0, lsl #4
+ @ C <<= 4
+ mov C_1, C_1, lsl #4
+ orr C_1, C_1, C_0, lsr #28
+ mov C_0, C_0, lsl #4
+ b L_lsl_4
+L_lsl_1:
+ cmp B_1, #0x80000000
+ cmpcc B_1, A_1
+ cmpeq B_0, A_0
+ bcs L_subtract
+ @ B <<= 1
+ mov B_1, B_1, lsl #1
+ orr B_1, B_1, B_0, lsr #31
+ mov B_0, B_0, lsl #1
+ @ C <<= 1
+ mov C_1, C_1, lsl #1
+ orr C_1, C_1, C_0, lsr #31
+ mov C_0, C_0, lsl #1
+ b L_lsl_1
+#endif
+L_subtract:
+ @ if A >= B
+ cmp A_1, B_1
+ cmpeq A_0, B_0
+ bcc L_update
+ @ A -= B
+ subs A_0, A_0, B_0
+ sbc A_1, A_1, B_1
+ @ D |= C
+ orr D_0, D_0, C_0
+ orr D_1, D_1, C_1
+L_update:
+ @ if A == 0: break
+ orrs ip, A_1, A_0
+ beq L_exit
+ @ C >>= 1
+ movs C_1, C_1, lsr #1
+ movs C_0, C_0, rrx
+ @ if C == 0: break
+ orrs ip, C_1, C_0
+ beq L_exit
+ @ B >>= 1
+ movs B_1, B_1, lsr #1
+ mov B_0, B_0, rrx
+ b L_subtract
+L_exit:
+ @ Note: A, B & Q, R are aliases
+ mov R_0, A_0
+ mov R_1, A_1
+ mov Q_0, D_0
+ mov Q_1, D_1
+ ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
+
+L_div_32_32:
+ @ Note: A_0 & r0 are aliases
+ @ Q_1 r1
+ mov r1, B_0
+ bl __aeabi_uidivmod
+ mov R_0, r1
+ mov R_1, #0
+ mov Q_1, #0
+ ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
+
+L_pow2:
+#ifdef HAVE_CLZ
+ @ Note: A, B and Q, R are aliases
+ @ R = A & (B - 1)
+ and C_0, A_0, C_0
+ and C_1, A_1, C_1
+ @ Q = A >> log2(B)
+ @ Note: B must not be 0 here!
+ clz D_0, B_0
+ add D_1, D_0, #1
+ rsbs D_0, D_0, #31
+ bpl L_1
+ clz D_0, B_1
+ rsb D_0, D_0, #31
+ mov A_0, A_1, lsr D_0
+ add D_0, D_0, #32
+L_1:
+ movpl A_0, A_0, lsr D_0
+ARM( orrpl A_0, A_0, A_1, lsl D_1 )
+THUMB( lslpl TMP, A_1, D_1 )
+THUMB( orrpl A_0, A_0, TMP )
+ mov A_1, A_1, lsr D_0
+ @ Mov back C to R
+ mov R_0, C_0
+ mov R_1, C_1
+ ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
+#else
+ @ Note: A, B and Q, R are aliases
+ @ R = A & (B - 1)
+ and C_0, A_0, C_0
+ and C_1, A_1, C_1
+ @ Q = A >> log2(B)
+ @ Note: B must not be 0 here!
+ @ Count the leading zeroes in B.
+ mov D_0, #0
+ orrs B_0, B_0, B_0
+ @ If B is greater than 1 << 31, divide A and B by 1 << 32.
+ moveq A_0, A_1
+ moveq A_1, #0
+ moveq B_0, B_1
+ @ Count the remaining leading zeroes in B.
+ movs B_1, B_0, lsl #16
+ addeq D_0, #16
+ moveq B_0, B_0, lsr #16
+ tst B_0, #0xff
+ addeq D_0, #8
+ moveq B_0, B_0, lsr #8
+ tst B_0, #0xf
+ addeq D_0, #4
+ moveq B_0, B_0, lsr #4
+ tst B_0, #0x3
+ addeq D_0, #2
+ moveq B_0, B_0, lsr #2
+ tst B_0, #0x1
+ addeq D_0, #1
+ @ Shift A to the right by the appropriate amount.
+ rsb D_1, D_0, #32
+ mov Q_0, A_0, lsr D_0
+ orr Q_0, A_1, lsl D_1
+ mov Q_1, A_1, lsr D_0
+ @ Move C to R
+ mov R_0, C_0
+ mov R_1, C_1
+ ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
+#endif
+
+L_div_by_0:
+ bl __div0
+ @ As wrong as it could be
+ mov Q_0, #0
+ mov Q_1, #0
+ mov R_0, #0
+ mov R_1, #0
+ ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
+ENDPROC(__aeabi_uldivmod)
--- /dev/null
+#include <linux/linkage.h>
+
+/* # 1 "libgcc1.S" */
+@ libgcc1 routines for ARM cpu.
+@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+/* # 145 "libgcc1.S" */
+dividend .req r0
+divisor .req r1
+overdone .req r2
+curbit .req r3
+/* ip .req r12 */
+/* sp .req r13 */
+/* lr .req r14 */
+/* pc .req r15 */
+ .text
+ .type __umodsi3 ,function
+ .align 0
+ ENTRY(__umodsi3)
+ cmp divisor, #0
+ beq Ldiv0
+ mov curbit, #1
+ cmp dividend, divisor
+ movcc pc, lr
+Loop1:
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+ cmp divisor, #0x10000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #4
+ movcc curbit, curbit, lsl #4
+ bcc Loop1
+Lbignum:
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+ cmp divisor, #0x80000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #1
+ movcc curbit, curbit, lsl #1
+ bcc Lbignum
+Loop3:
+ @ Test for possible subtractions. On the final pass, this may
+ @ subtract too much from the dividend, so keep track of which
+ @ subtractions are done, we can fix them up afterwards...
+ mov overdone, #0
+ cmp dividend, divisor
+ subcs dividend, dividend, divisor
+ cmp dividend, divisor, lsr #1
+ subcs dividend, dividend, divisor, lsr #1
+ orrcs overdone, overdone, curbit, ror #1
+ cmp dividend, divisor, lsr #2
+ subcs dividend, dividend, divisor, lsr #2
+ orrcs overdone, overdone, curbit, ror #2
+ cmp dividend, divisor, lsr #3
+ subcs dividend, dividend, divisor, lsr #3
+ orrcs overdone, overdone, curbit, ror #3
+ mov ip, curbit
+ cmp dividend, #0 @ Early termination?
+ movnes curbit, curbit, lsr #4 @ No, any more bits to do?
+ movne divisor, divisor, lsr #4
+ bne Loop3
+ @ Any subtractions that we should not have done will be recorded in
+ @ the top three bits of "overdone". Exactly which were not needed
+ @ are governed by the position of the bit, stored in ip.
+ @ If we terminated early, because dividend became zero,
+ @ then none of the below will match, since the bit in ip will not be
+ @ in the bottom nibble.
+ ands overdone, overdone, #0xe0000000
+ moveq pc, lr @ No fixups needed
+ tst overdone, ip, ror #3
+ addne dividend, dividend, divisor, lsr #3
+ tst overdone, ip, ror #2
+ addne dividend, dividend, divisor, lsr #2
+ tst overdone, ip, ror #1
+ addne dividend, dividend, divisor, lsr #1
+ mov pc, lr
+Ldiv0:
+ str lr, [sp, #-4]!
+ bl __div0 (PLT)
+ mov r0, #0 @ about as wrong as it could be
+ ldmia sp!, {pc}
+ .size __umodsi3 , . - __umodsi3
+/* # 320 "libgcc1.S" */
+/* # 421 "libgcc1.S" */
+/* # 433 "libgcc1.S" */
+/* # 456 "libgcc1.S" */
+/* # 500 "libgcc1.S" */
+/* # 580 "libgcc1.S" */
+ENDPROC(__umodsi3)