-
Notifications
You must be signed in to change notification settings - Fork 8
/
ldivmod.S
252 lines (193 loc) · 5.01 KB
/
ldivmod.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
/* Runtime ABI for the ARM Cortex-M0
* ldivmod.S: 64 bit division (quotient and remainder)
*
* Copyright (c) 2012-2017 Jörg Mische <bobbl@gmx.de>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
.syntax unified
.text
.thumb
.cpu cortex-m0
@ {long long quotient, long long remainder}
@ __aeabi_ldivmod(long long numerator, long long denominator)
@
@ Divide r1:r0 by r3:r2 and return the quotient in r1:r0 and the remainder in
@ r3:r2 (all signed)
@
.thumb_func
.global __aeabi_ldivmod
__aeabi_ldivmod:
cmp r1, #0
bge .Lnumerator_pos
push {r4, lr}
movs r4, #0 @ num = -num
rsbs r0, r0, #0
sbcs r4, r1
mov r1, r4
cmp r3, #0
bge .Lboth_neg
movs r4, #0 @ den = -den
rsbs r2, r2, #0
sbcs r4, r3
mov r3, r4
bl __aeabi_uldivmod
movs r4, #0 @ rem = -rem
rsbs r2, r2, #0
sbcs r4, r3
mov r3, r4
pop {r4, pc}
.Lboth_neg:
bl __aeabi_uldivmod
movs r4, #0 @ quot = -quot
rsbs r0, r0, #0
sbcs r4, r1
mov r1, r4
movs r4, #0 @ rem = -rem
rsbs r2, r2, #0
sbcs r4, r3
mov r3, r4
pop {r4, pc}
.Lnumerator_pos:
cmp r3, #0
bge .Luldivmod
push {r4, lr}
movs r4, #0 @ den = -den
rsbs r2, r2, #0
sbcs r4, r3
mov r3, r4
bl __aeabi_uldivmod
movs r4, #0 @ quot = -quot
rsbs r0, r0, #0
sbcs r4, r1
mov r1, r4
pop {r4, pc}
@ unsigned long long __udivdi3(unsigned long long num, unsigned long long denom)
@
@ libgcc wrapper: just an alias for __aeabi_uldivmod(), the remainder is ignored
@
.thumb_func
.global __udivdi3
__udivdi3:
@ {unsigned long long quotient, unsigned long long remainder}
@ __aeabi_uldivmod(unsigned long long numerator, unsigned long long denominator)
@
@ Divide r1:r0 by r3:r2 and return the quotient in r1:r0 and the remainder
@ in r3:r2 (all unsigned)
@
.thumb_func
.global __aeabi_uldivmod
__aeabi_uldivmod:
.Luldivmod:
cmp r3, #0
bne .L_large_denom
cmp r2, #0
beq .L_divison_by_0
cmp r1, #0
beq .L_fallback_32bits
@ case 1: num >= 2^32 and denom < 2^32
@ Result might be > 2^32, therefore we first calculate the upper 32
@ bits of the result. It is done similar to the calculation of the
@ lower 32 bits, but with a denominator that is shifted by 32.
@ Hence the lower 32 bits of the denominator are always 0 and the
@ costly 64 bit shift and sub operations can be replaced by cheap 32
@ bit operations.
push {r4, r5, r6, r7, lr}
@ shift left the denominator until it is greater than the numerator
@ denom(r7:r6) = r3:r2 << 32
movs r5, #1 @ bitmask
adds r7, r2, #0 @ dont shift if denominator would overflow
bmi .L_upper_result
cmp r1, r7
blo .L_upper_result
.L_denom_shift_loop1:
lsls r5, #1
lsls r7, #1
bmi .L_upper_result @ dont shift if overflow
cmp r1, r7
bhs .L_denom_shift_loop1
.L_upper_result:
mov r3, r1
mov r2, r0
movs r1, #0 @ upper result = 0
b .L_sub_entry1
.L_sub_loop1:
lsrs r7, #1 @ denom(r7:r6) >>= 1
.L_sub_entry1:
cmp r3, r7
bcc .L_dont_sub1 @ if (num>denom)
subs r3, r7 @ num -= denom
orrs r1, r5 @ result(r7:r6) |= bitmask(r5)
.L_dont_sub1:
lsrs r5, #1 @ bitmask(r5) >>= 1
bne .L_sub_loop1
movs r5, #1
lsls r5, #31
lsls r6, r7, #31 @ denom(r7:r6) = (r7:0) >> 1
lsrs r7, #1 @ dont forget least significant bit!
b .L_lower_result
@ case 2: division by 0
@ call __aeabi_ldiv0
.L_divison_by_0:
b __aeabi_ldiv0
@ case 3: num < 2^32 and denom < 2^32
@ fallback to 32 bit division
.L_fallback_32bits:
mov r1, r2
push {lr}
bl __aeabi_uidivmod
mov r2, r1
movs r1, #0
movs r3, #0
pop {pc}
@ case 4: denom >= 2^32
@ result is smaller than 2^32
.L_large_denom:
push {r4, r5, r6, r7, lr}
mov r7, r3
mov r6, r2
mov r3, r1
mov r2, r0
@ Shift left the denominator until it is greater than the numerator
movs r1, #0 @ high word of result is 0
movs r5, #1 @ bitmask
adds r7, #0 @ dont shift if denominator would overflow
bmi .L_lower_result
cmp r3, r7
blo .L_lower_result
.L_denom_shift_loop4:
lsls r5, #1
lsls r7, #1
lsls r6, #1
adcs r7, r1 @ r1=0
bmi .L_lower_result @ dont shift if overflow
cmp r3, r7
bhs .L_denom_shift_loop4
.L_lower_result:
eors r0, r0
.L_sub_loop4:
mov r4, r3
cmp r2, r6
sbcs r4, r7
bcc .L_dont_sub4 @ if (num>denom)
subs r2, r6 @ numerator -= denom
sbcs r3, r7
orrs r0, r5 @ result(r1:r0) |= bitmask(r5)
.L_dont_sub4:
lsls r4, r7, #31 @ denom(r7:r6) >>= 1
lsrs r6, #1
lsrs r7, #1
orrs r6, r4
lsrs r5, #1 @ bitmask(r5) >>= 1
bne .L_sub_loop4
pop {r4, r5, r6, r7, pc}