1/* 2 * File: arch/blackfin/lib/udivsi3.S 3 * Based on: 4 * Author: 5 * 6 * Created: 7 * Description: 8 * 9 * Modified: 10 * Copyright 2004-2006 Analog Devices Inc. 11 * 12 * Bugs: Enter bugs at http://blackfin.uclinux.org/ 13 * 14 * This program is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU General Public License as published by 16 * the Free Software Foundation; either version 2 of the License, or 17 * (at your option) any later version. 18 * 19 * This program is distributed in the hope that it will be useful, 20 * but WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 * GNU General Public License for more details. 23 * 24 * You should have received a copy of the GNU General Public License 25 * along with this program; if not, see the file COPYING, or write 26 * to the Free Software Foundation, Inc., 27 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 28 */ 29 30#include <linux/linkage.h> 31 32#define CARRY AC0 33 34#ifdef CONFIG_ARITHMETIC_OPS_L1 35.section .l1.text 36#else 37.text 38#endif 39 40 41ENTRY(___udivsi3) 42 43 CC = R0 < R1 (IU); /* If X < Y, always return 0 */ 44 IF CC JUMP .Lreturn_ident; 45 46 R2 = R1 << 16; 47 CC = R2 <= R0 (IU); 48 IF CC JUMP .Lidents; 49 50 R2 = R0 >> 31; /* if X is a 31-bit number */ 51 R3 = R1 >> 15; /* and Y is a 15-bit number */ 52 R2 = R2 | R3; /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/ 53 CC = R2; 54 IF CC JUMP .Ly_16bit; 55 56/* METHOD 1: FAST DIVQ 57 We know we have a 31-bit dividend, and 15-bit divisor so we can use the 58 simple divq approach (first setting AQ to 0 - implying unsigned division, 59 then 16 DIVQ's). 60*/ 61 62 AQ = CC; /* Clear AQ (CC==0) */ 63 64/* ISR States: When dividing two integers (32.0/16.0) using divide primitives, 65 we need to shift the dividend one bit to the left. 66 We have already checked that we have a 31-bit number so we are safe to do 67 that. 68*/ 69 R0 <<= 1; 70 DIVQ(R0, R1); // 1 71 DIVQ(R0, R1); // 2 72 DIVQ(R0, R1); // 3 73 DIVQ(R0, R1); // 4 74 DIVQ(R0, R1); // 5 75 DIVQ(R0, R1); // 6 76 DIVQ(R0, R1); // 7 77 DIVQ(R0, R1); // 8 78 DIVQ(R0, R1); // 9 79 DIVQ(R0, R1); // 10 80 DIVQ(R0, R1); // 11 81 DIVQ(R0, R1); // 12 82 DIVQ(R0, R1); // 13 83 DIVQ(R0, R1); // 14 84 DIVQ(R0, R1); // 15 85 DIVQ(R0, R1); // 16 86 R0 = R0.L (Z); 87 RTS; 88 89.Ly_16bit: 90 /* We know that the upper 17 bits of Y might have bits set, 91 ** or that the sign bit of X might have a bit. If Y is a 92 ** 16-bit number, but not bigger, then we can use the builtins 93 ** with a post-divide correction. 94 ** R3 currently holds Y>>15, which means R3's LSB is the 95 ** bit we're interested in. 96 */ 97 98 /* According to the ISR, to use the Divide primitives for 99 ** unsigned integer divide, the useable range is 31 bits 100 */ 101 CC = ! BITTST(R0, 31); 102 103 /* IF condition is true we can scale our inputs and use the divide primitives, 104 ** with some post-adjustment 105 */ 106 R3 += -1; /* if so, Y is 0x00008nnn */ 107 CC &= AZ; 108 109 /* If condition is true we can scale our inputs and use the divide primitives, 110 ** with some post-adjustment 111 */ 112 R3 = R1 >> 1; /* Pre-scaled divisor for primitive case */ 113 R2 = R0 >> 16; 114 115 R2 = R3 - R2; /* shifted divisor < upper 16 bits of dividend */ 116 CC &= CARRY; 117 IF CC JUMP .Lshift_and_correct; 118 119 /* Fall through to the identities */ 120 121/* METHOD 2: identities and manual calculation 122 We are not able to use the divide primites, but may still catch some special 123 cases. 124*/ 125.Lidents: 126 /* Test for common identities. Value to be returned is placed in R2. */ 127 CC = R0 == 0; /* 0/Y => 0 */ 128 IF CC JUMP .Lreturn_r0; 129 CC = R0 == R1; /* X==Y => 1 */ 130 IF CC JUMP .Lreturn_ident; 131 CC = R1 == 1; /* X/1 => X */ 132 IF CC JUMP .Lreturn_ident; 133 134 R2.L = ONES R1; 135 R2 = R2.L (Z); 136 CC = R2 == 1; 137 IF CC JUMP .Lpower_of_two; 138 139 [--SP] = (R7:5); /* Push registers R5-R7 */ 140 141 /* Idents don't match. Go for the full operation. */ 142 143 144 R6 = 2; /* assume we'll shift two */ 145 R3 = 1; 146 147 P2 = R1; 148 /* If either R0 or R1 have sign set, */ 149 /* divide them by two, and note it's */ 150 /* been done. */ 151 CC = R1 < 0; 152 R2 = R1 >> 1; 153 IF CC R1 = R2; /* Possibly-shifted R1 */ 154 IF !CC R6 = R3; /* R1 doesn't, so at most 1 shifted */ 155 156 P0 = 0; 157 R3 = -R1; 158 [--SP] = R3; 159 R2 = R0 >> 1; 160 R2 = R0 >> 1; 161 CC = R0 < 0; 162 IF CC P0 = R6; /* Number of values divided */ 163 IF !CC R2 = R0; /* Shifted R0 */ 164 165 /* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */ 166 167 /* r2 holds Copy dividend */ 168 R3 = 0; /* Clear partial remainder */ 169 R7 = 0; /* Initialise quotient bit */ 170 171 P1 = 32; /* Set loop counter */ 172 LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */ 173.Lulst: R6 = R2 >> 31; /* R6 = sign bit of R2, for carry */ 174 R2 = R2 << 1; /* Shift 64 bit dividend up by 1 bit */ 175 R3 = R3 << 1 || R5 = [SP]; 176 R3 = R3 | R6; /* Include any carry */ 177 CC = R7 < 0; /* Check quotient(AQ) */ 178 /* If AQ==0, we'll sub divisor */ 179 IF CC R5 = R1; /* and if AQ==1, we'll add it. */ 180 R3 = R3 + R5; /* Add/sub divsor to partial remainder */ 181 R7 = R3 ^ R1; /* Generate next quotient bit */ 182 183 R5 = R7 >> 31; /* Get AQ */ 184 BITTGL(R5, 0); /* Invert it, to get what we'll shift */ 185.Lulend: R2 = R2 + R5; /* and "shift" it in. */ 186 187 CC = P0 == 0; /* Check how many inputs we shifted */ 188 IF CC JUMP .Lno_mult; /* if none... */ 189 R6 = R2 << 1; 190 CC = P0 == 1; 191 IF CC R2 = R6; /* if 1, Q = Q*2 */ 192 IF !CC R1 = P2; /* if 2, restore stored divisor */ 193 194 R3 = R2; /* Copy of R2 */ 195 R3 *= R1; /* Q * divisor */ 196 R5 = R0 - R3; /* Z = (dividend - Q * divisor) */ 197 CC = R1 <= R5 (IU); /* Check if divisor <= Z? */ 198 R6 = CC; /* if yes, R6 = 1 */ 199 R2 = R2 + R6; /* if yes, add one to quotient(Q) */ 200.Lno_mult: 201 SP += 4; 202 (R7:5) = [SP++]; /* Pop registers R5-R7 */ 203 R0 = R2; /* Store quotient */ 204 RTS; 205 206.Lreturn_ident: 207 CC = R0 < R1 (IU); /* If X < Y, always return 0 */ 208 R2 = 0; 209 IF CC JUMP .Ltrue_return_ident; 210 R2 = -1 (X); /* X/0 => 0xFFFFFFFF */ 211 CC = R1 == 0; 212 IF CC JUMP .Ltrue_return_ident; 213 R2 = -R2; /* R2 now 1 */ 214 CC = R0 == R1; /* X==Y => 1 */ 215 IF CC JUMP .Ltrue_return_ident; 216 R2 = R0; /* X/1 => X */ 217 /*FALLTHRU*/ 218 219.Ltrue_return_ident: 220 R0 = R2; 221.Lreturn_r0: 222 RTS; 223 224.Lpower_of_two: 225 /* Y has a single bit set, which means it's a power of two. 226 ** That means we can perform the division just by shifting 227 ** X to the right the appropriate number of bits 228 */ 229 230 /* signbits returns the number of sign bits, minus one. 231 ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need 232 ** to shift right n-signbits spaces. It also means 0x80000000 233 ** is a special case, because that *also* gives a signbits of 0 234 */ 235 236 R2 = R0 >> 31; 237 CC = R1 < 0; 238 IF CC JUMP .Ltrue_return_ident; 239 240 R1.l = SIGNBITS R1; 241 R1 = R1.L (Z); 242 R1 += -30; 243 R0 = LSHIFT R0 by R1.L; 244 RTS; 245 246/* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION 247 Two scaling operations are required to use the divide primitives with a 248 divisor > 0x7FFFF. 249 Firstly (as in method 1) we need to shift the dividend 1 to the left for 250 integer division. 251 Secondly we need to shift both the divisor and dividend 1 to the right so 252 both are in range for the primitives. 253 The left/right shift of the dividend does nothing so we can skip it. 254*/ 255.Lshift_and_correct: 256 R2 = R0; 257 // R3 is already R1 >> 1 258 CC=!CC; 259 AQ = CC; /* Clear AQ, got here with CC = 0 */ 260 DIVQ(R2, R3); // 1 261 DIVQ(R2, R3); // 2 262 DIVQ(R2, R3); // 3 263 DIVQ(R2, R3); // 4 264 DIVQ(R2, R3); // 5 265 DIVQ(R2, R3); // 6 266 DIVQ(R2, R3); // 7 267 DIVQ(R2, R3); // 8 268 DIVQ(R2, R3); // 9 269 DIVQ(R2, R3); // 10 270 DIVQ(R2, R3); // 11 271 DIVQ(R2, R3); // 12 272 DIVQ(R2, R3); // 13 273 DIVQ(R2, R3); // 14 274 DIVQ(R2, R3); // 15 275 DIVQ(R2, R3); // 16 276 277 /* According to the Instruction Set Reference: 278 To divide by a divisor > 0x7FFF, 279 1. prescale and perform divide to obtain quotient (Q) (done above), 280 2. multiply quotient by unscaled divisor (result M) 281 3. subtract the product from the divident to get an error (E = X - M) 282 4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q) 283 */ 284 R3 = R2.L (Z); /* Q = X' / Y' */ 285 R2 = R3; /* Preserve Q */ 286 R2 *= R1; /* M = Q * Y */ 287 R2 = R0 - R2; /* E = X - M */ 288 R0 = R3; /* Copy Q into result reg */ 289 290/* Correction: If result of the multiply is negative, we overflowed 291 and need to correct the result by subtracting 1 from the result.*/ 292 R3 = 0xFFFF (Z); 293 R2 = R2 >> 16; /* E >> 16 */ 294 CC = R2 == R3; 295 R3 = 1 ; 296 R1 = R0 - R3; 297 IF CC R0 = R1; 298 RTS; 299 300ENDPROC(___udivsi3) 301