1/* libgcc functions for Blackfin. 2 Copyright (C) 2005-2020 Free Software Foundation, Inc. 3 Contributed by Analog Devices. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 3, or (at your option) 10any later version. 11 12GCC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26#ifdef L_divsi3 27.text 28.align 2 29.global ___divsi3; 30.type ___divsi3, STT_FUNC; 31 32___divsi3: 33 [--SP]= RETS; 34 [--SP] = R7; 35 36 R2 = -R0; 37 CC = R0 < 0; 38 IF CC R0 = R2; 39 R7 = CC; 40 41 R2 = -R1; 42 CC = R1 < 0; 43 IF CC R1 = R2; 44 R2 = CC; 45 R7 = R7 ^ R2; 46 47 CALL ___udivsi3; 48 49 CC = R7; 50 R1 = -R0; 51 IF CC R0 = R1; 52 53 R7 = [SP++]; 54 RETS = [SP++]; 55 RTS; 56#endif 57 58#ifdef L_modsi3 59.align 2 60.global ___modsi3; 61.type ___modsi3, STT_FUNC; 62 63___modsi3: 64 [--SP] = RETS; 65 [--SP] = R0; 66 [--SP] = R1; 67 CALL ___divsi3; 68 R2 = [SP++]; 69 R1 = [SP++]; 70 R2 *= R0; 71 R0 = R1 - R2; 72 RETS = [SP++]; 73 RTS; 74#endif 75 76#ifdef L_udivsi3 77.align 2 78.global ___udivsi3; 79.type ___udivsi3, STT_FUNC; 80 81___udivsi3: 82 P0 = 32; 83 LSETUP (0f, 1f) LC0 = P0; 84 /* upper half of dividend */ 85 R3 = 0; 860: 87 /* The first time round in the loop we shift in garbage, but since we 88 perform 33 shifts, it doesn't matter. */ 89 R0 = ROT R0 BY 1; 90 R3 = ROT R3 BY 1; 91 R2 = R3 - R1; 92 CC = R3 < R1 (IU); 931: 94 /* Last instruction of the loop. */ 95 IF ! CC R3 = R2; 96 97 /* Shift in the last bit. */ 98 R0 = ROT R0 BY 1; 99 /* R0 is the result, R3 contains the remainder. */ 100 R0 = ~ R0; 101 RTS; 102#endif 103 104#ifdef L_umodsi3 105.align 2 106.global ___umodsi3; 107.type ___umodsi3, STT_FUNC; 108 109___umodsi3: 110 [--SP] = RETS; 111 CALL ___udivsi3; 112 R0 = R3; 113 RETS = [SP++]; 114 RTS; 115#endif 116 117#ifdef L_umulsi3_highpart 118.align 2 119.global ___umulsi3_highpart; 120.type ___umulsi3_highpart, STT_FUNC; 121 122___umulsi3_highpart: 123 A1 = R1.L * R0.L (FU); 124 A1 = A1 >> 16; 125 A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU); 126 A1 += R0.L * R1.H (FU); 127 A1 = A1 >> 16; 128 A0 += A1; 129 R0 = A0 (FU); 130 RTS; 131#endif 132 133#ifdef L_smulsi3_highpart 134.align 2 135.global ___smulsi3_highpart; 136.type ___smulsi3_highpart, STT_FUNC; 137 138___smulsi3_highpart: 139 A1 = R1.L * R0.L (FU); 140 A1 = A1 >> 16; 141 A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M); 142 A1 += R1.H * R0.L (IS,M); 143 A1 = A1 >>> 16; 144 R0 = (A0 += A1); 145 RTS; 146#endif 147 148#ifdef L_muldi3 149.align 2 150.global ___muldi3; 151.type ___muldi3, STT_FUNC; 152 153/* 154 R1:R0 * R3:R2 155 = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l 156[X] = (R1.h * R3.h) * 2^96 157[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80 158[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64 159[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48 160[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32 161[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16 162[T4] + (R0.l * R2.l) 163 164 We can discard the first three lines marked "X" since we produce 165 only a 64 bit result. So, we need ten 16-bit multiplies. 166 167 Individual mul-acc results: 168[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h 169[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h 170[E3] = R0.l * R2.h + R2.l * R0.h 171[E4] = R0.l * R2.l 172 173 We also need to add high parts from lower-level results to higher ones: 174 E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4 175 176 One interesting property is that all parts of the result that depend 177 on the sign of the multiplication are discarded. Those would be the 178 multiplications involving R1.h and R3.h, but only the top 16 bit of 179 the 32 bit result depend on the sign, and since R1.h and R3.h only 180 occur in E1, the top half of these results is cut off. 181 So, we can just use FU mode for all of the 16-bit multiplies, and 182 ignore questions of when to use mixed mode. */ 183 184___muldi3: 185 /* [SP] technically is part of the caller's frame, but we can 186 use it as scratch space. */ 187 A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */ 188 A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */ 189 A0 += A1; /* E1 */ 190 R4 = A0.w; 191 A0 = R0.l * R3.l (FU); /* E2 */ 192 A0 += R2.l * R1.l (FU); /* E2 */ 193 194 A1 = R2.L * R0.L (FU); /* E4 */ 195 R3 = A1.w; 196 A1 = A1 >> 16; /* E3c */ 197 A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */ 198 A1 += R0.L * R2.H (FU); /* E3c */ 199 R0 = A1.w; 200 A1 = A1 >> 16; /* E2c */ 201 A0 += A1; /* E2c */ 202 R1 = A0.w; 203 204 /* low(result) = low(E3c):low(E4) */ 205 R0 = PACK (R0.l, R3.l); 206 /* high(result) = E2c + (E1 << 16) */ 207 R1.h = R1.h + R4.l (NS) || R4 = [SP]; 208 RTS; 209 210.size ___muldi3, .-___muldi3 211#endif 212