lib2hw_mul.S revision 1.6
1; Copyright (C) 2014-2020 Free Software Foundation, Inc. 2; Contributed by Red Hat. 3; 4; This file is free software; you can redistribute it and/or modify it 5; under the terms of the GNU General Public License as published by the 6; Free Software Foundation; either version 3, or (at your option) any 7; later version. 8; 9; This file is distributed in the hope that it will be useful, but 10; WITHOUT ANY WARRANTY; without even the implied warranty of 11; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12; General Public License for more details. 13; 14; Under Section 7 of GPL version 3, you are granted additional 15; permissions described in the GCC Runtime Library Exception, version 16; 3.1, as published by the Free Software Foundation. 17; 18; You should have received a copy of the GNU General Public License and 19; a copy of the GCC Runtime Library Exception along with this program; 20; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 21; <http://www.gnu.org/licenses/>. 22 23 ;; Macro to start a multiply function. Each function has three 24 ;; names, and hence three entry points - although they all go 25 ;; through the same code. The first name is the version generated 26 ;; by GCC. The second is the MSP430 EABI mandated name for the 27 ;; *software* version of the function. The third is the EABI 28 ;; mandated name for the *hardware* version of the function. 29 ;; 30 ;; Since we are using the hardware and software names to point 31 ;; to the same code this effectively means that we are mapping 32 ;; the software function onto the hardware function. Thus if 33 ;; the library containing this code is linked into an application 34 ;; (before the libgcc.a library) *all* multiply functions will 35 ;; be mapped onto the hardware versions. 36 ;; 37 ;; We construct each function in its own section so that linker 38 ;; garbage collection can be used to delete any unused functions 39 ;; from this file. 40.macro start_func gcc_name eabi_soft_name eabi_hard_name 41 .pushsection .text.\gcc_name,"ax",@progbits 42 .p2align 1 43 .global \eabi_hard_name 44 .type \eabi_hard_name , @function 45\eabi_hard_name: 46 .global \eabi_soft_name 47 .type \eabi_soft_name , @function 48\eabi_soft_name: 49 .global \gcc_name 50 .type \gcc_name , @function 51\gcc_name: 52 PUSH.W sr ; Save current interrupt state 53 DINT ; Disable interrupts 54 NOP ; Account for latency 55.endm 56 57 58 ;; End a function started with the start_func macro. 59.macro end_func name 60#ifdef __MSP430X_LARGE__ 61 POP.W sr 62 RETA 63#else 64 RETI 65#endif 66 .size \name , . - \name 67 .popsection 68.endm 69 70 71 ;; Like the start_func macro except that it is used to 72 ;; create a false entry point that just jumps to the 73 ;; software function (implemented elsewhere). 74.macro fake_func gcc_name eabi_soft_name eabi_hard_name 75 .pushsection .text.\gcc_name,"ax",@progbits 76 .p2align 1 77 .global \eabi_hard_name 78 .type \eabi_hard_name , @function 79\eabi_hard_name: 80 .global \gcc_name 81 .type \gcc_name , @function 82\gcc_name: 83#ifdef __MSP430X_LARGE__ 84 BRA #\eabi_soft_name 85#else 86 BR #\eabi_soft_name 87#endif 88 .size \gcc_name , . - \gcc_name 89 .popsection 90.endm 91 92 93.macro mult16 OP1, OP2, RESULT 94;* * 16-bit hardware multiply: int16 = int16 * int16 95;* 96;* - Operand 1 is in R12 97;* - Operand 2 is in R13 98;* - Result is in R12 99;* 100;* To ensure that the multiply is performed atomically, interrupts are 101;* disabled upon routine entry. Interrupt state is restored upon exit. 102;* 103;* Registers used: R12, R13 104;* 105;* Macro arguments are the memory locations of the hardware registers. 106 107 MOV.W r12, &\OP1 ; Load operand 1 into multiplier 108 MOV.W r13, &\OP2 ; Load operand 2 which triggers MPY 109 MOV.W &\RESULT, r12 ; Move result into return register 110.endm 111 112.macro mult1632 OP1, OP2, RESLO, RESHI 113;* * 16-bit hardware multiply with a 32-bit result: 114;* int32 = int16 * int16 115;* uint32 = uint16 * uint16 116;* 117;* - Operand 1 is in R12 118;* - Operand 2 is in R13 119;* - Result is in R12, R13 120;* 121;* To ensure that the multiply is performed atomically, interrupts are 122;* disabled upon routine entry. Interrupt state is restored upon exit. 123;* 124;* Registers used: R12, R13 125;* 126;* Macro arguments are the memory locations of the hardware registers. 127 128 MOV.W r12, &\OP1 ; Load operand 1 into multiplier 129 MOV.W r13, &\OP2 ; Load operand 2 which triggers MPY 130 MOV.W &\RESLO, r12 ; Move low result into return register 131 MOV.W &\RESHI, r13 ; Move high result into return register 132.endm 133 134.macro mult32 OP1, OP2, MAC_OP1, MAC_OP2, RESLO, RESHI 135;* * 32-bit hardware multiply with a 32-bit result using 16 multiply and accumulate: 136;* int32 = int32 * int32 137;* 138;* - Operand 1 is in R12, R13 139;* - Operand 2 is in R14, R15 140;* - Result is in R12, R13 141;* 142;* To ensure that the multiply is performed atomically, interrupts are 143;* disabled upon routine entry. Interrupt state is restored upon exit. 144;* 145;* Registers used: R12, R13, R14, R15 146;* 147;* Macro arguments are the memory locations of the hardware registers. 148 149 MOV.W r12, &\OP1 ; Load operand 1 Low into multiplier 150 MOV.W r14, &\OP2 ; Load operand 2 Low which triggers MPY 151 MOV.W r12, &\MAC_OP1 ; Load operand 1 Low into mac 152 MOV.W &\RESLO, r12 ; Low 16-bits of result ready for return 153 MOV.W &\RESHI, &\RESLO ; MOV intermediate mpy high into low 154 MOV.W r15, &\MAC_OP2 ; Load operand 2 High, trigger MAC 155 MOV.W r13, &\MAC_OP1 ; Load operand 1 High 156 MOV.W r14, &\MAC_OP2 ; Load operand 2 Lo, trigger MAC 157 MOV.W &\RESLO, r13 ; Upper 16-bits result ready for return 158.endm 159 160 161.macro mult32_hw OP1_LO OP1_HI OP2_LO OP2_HI RESLO RESHI 162;* * 32-bit hardware multiply with a 32-bit result 163;* int32 = int32 * int32 164;* 165;* - Operand 1 is in R12, R13 166;* - Operand 2 is in R14, R15 167;* - Result is in R12, R13 168;* 169;* To ensure that the multiply is performed atomically, interrupts are 170;* disabled upon routine entry. Interrupt state is restored upon exit. 171;* 172;* Registers used: R12, R13, R14, R15 173;* 174;* Macro arguments are the memory locations of the hardware registers. 175 176 MOV.W r12, &\OP1_LO ; Load operand 1 Low into multiplier 177 MOV.W r13, &\OP1_HI ; Load operand 1 High into multiplier 178 MOV.W r14, &\OP2_LO ; Load operand 2 Low into multiplier 179 MOV.W r15, &\OP2_HI ; Load operand 2 High, trigger MPY 180 MOV.W &\RESLO, r12 ; Ready low 16-bits for return 181 MOV.W &\RESHI, r13 ; Ready high 16-bits for return 182.endm 183 184.macro mult3264_hw OP1_LO OP1_HI OP2_LO OP2_HI RES0 RES1 RES2 RES3 185;* * 32-bit hardware multiply with a 64-bit result 186;* int64 = int32 * int32 187;* uint64 = uint32 * uint32 188;* 189;* - Operand 1 is in R12, R13 190;* - Operand 2 is in R14, R15 191;* - Result is in R12, R13, R14, R15 192;* 193;* To ensure that the multiply is performed atomically, interrupts are 194;* disabled upon routine entry. Interrupt state is restored upon exit. 195;* 196;* Registers used: R12, R13, R14, R15 197;* 198;* Macro arguments are the memory locations of the hardware registers. 199 200 MOV.W r12, &\OP1_LO ; Load operand 1 Low into multiplier 201 MOV.W r13, &\OP1_HI ; Load operand 1 High into multiplier 202 MOV.W r14, &\OP2_LO ; Load operand 2 Low into multiplier 203 MOV.W r15, &\OP2_HI ; Load operand 2 High, trigger MPY 204 MOV.W &\RES0, R12 ; Ready low 16-bits for return 205 MOV.W &\RES1, R13 ; 206 MOV.W &\RES2, R14 ; 207 MOV.W &\RES3, R15 ; Ready high 16-bits for return 208.endm 209 210 211;; EABI mandated names: 212;; 213;; int16 __mspabi_mpyi (int16 x, int16 y) 214;; Multiply int by int. 215;; int16 __mspabi_mpyi_hw (int16 x, int16 y) 216;; Multiply int by int. Uses hardware MPY16 or MPY32. 217;; int16 __mspabi_mpyi_f5hw (int16 x, int16 y) 218;; Multiply int by int. Uses hardware MPY32 (F5xx devices and up). 219;; 220;; int32 __mspabi_mpyl (int32 x, int32 y); 221;; Multiply long by long. 222;; int32 __mspabi_mpyl_hw (int32 x, int32 y) 223;; Multiply long by long. Uses hardware MPY16. 224;; int32 __mspabi_mpyl_hw32 (int32 x, int32 y) 225;; Multiply long by long. Uses hardware MPY32 (F4xx devices). 226;; int32 __mspabi_mpyl_f5hw (int32 x, int32 y) 227;; Multiply long by long. Uses hardware MPY32 (F5xx devices and up). 228;; 229;; int64 __mspabi_mpyll (int64 x, int64 y) 230;; Multiply long long by long long. 231;; int64 __mspabi_mpyll_hw (int64 x, int64 y) 232;; Multiply long long by long long. Uses hardware MPY16. 233;; int64 __mspabi_mpyll_hw32 (int64 x, int64 y) 234;; Multiply long long by long long. Uses hardware MPY32 (F4xx devices). 235;; int64 __mspabi_mpyll_f5hw (int64 x, int64 y) 236;; Multiply long long by long long. Uses hardware MPY32 (F5xx devices and up). 237;; 238;; int32 __mspabi_mpysl (int16 x, int16 y) 239;; Multiply int by int; result is long. 240;; int32 __mspabi_mpysl_hw(int16 x, int16 y) 241;; Multiply int by int; result is long. Uses hardware MPY16 or MPY32 242;; int32 __mspabi_mpysl_f5hw(int16 x, int16 y) 243;; Multiply int by int; result is long. Uses hardware MPY32 (F5xx devices and up). 244;; 245;; int64 __mspabi_mpysll(int32 x, int32 y) 246;; Multiply long by long; result is long long. 247;; int64 __mspabi_mpysll_hw(int32 x, int32 y) 248;; Multiply long by long; result is long long. Uses hardware MPY16. 249;; int64 __mspabi_mpysll_hw32(int32 x, int32 y) 250;; Multiply long by long; result is long long. Uses hardware MPY32 (F4xx devices). 251;; int64 __mspabi_mpysll_f5hw(int32 x, int32 y) 252;; Multiply long by long; result is long long. Uses hardware MPY32 (F5xx devices and up). 253;; 254;; uint32 __mspabi_mpyul(uint16 x, uint16 y) 255;; Multiply unsigned int by unsigned int; result is unsigned long. 256;; uint32 __mspabi_mpyul_hw(uint16 x, uint16 y) 257;; Multiply unsigned int by unsigned int; result is unsigned long. Uses hardware MPY16 or MPY32 258;; uint32 __mspabi_mpyul_f5hw(uint16 x, uint16 y) 259;; Multiply unsigned int by unsigned int; result is unsigned long. Uses hardware MPY32 (F5xx devices and up). 260;; 261;; uint64 __mspabi_mpyull(uint32 x, uint32 y) 262;; Multiply unsigned long by unsigned long; result is unsigned long long. 263;; uint64 __mspabi_mpyull_hw(uint32 x, uint32 y) 264;; Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY16 265;; uint64 __mspabi_mpyull_hw32(uint32 x, uint32 y) 266;; Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY32 (F4xx devices). 267;; uint64 __mspabi_mpyull_f5hw(uint32 x, uint32 y) 268;; Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY32 (F5xx devices and up) 269 270;;;; The register names below are the standardised versions used across TI 271;;;; literature. 272 273;; Hardware multiply register addresses for devices with 16-bit hardware 274;; multiply. 275.set MPY, 0x0130 276.set MPYS, 0x0132 277.set MAC, 0x0134 278.set OP2, 0x0138 279.set RESLO, 0x013A 280.set RESHI, 0x013C 281;; Hardware multiply register addresses for devices with 32-bit (non-f5) 282;; hardware multiply. 283.set MPY32L, 0x0140 284.set MPY32H, 0x0142 285.set MPYS32L, 0x0144 286.set MPYS32H, 0x0146 287.set OP2L, 0x0150 288.set OP2H, 0x0152 289.set RES0, 0x0154 290.set RES1, 0x0156 291.set RES2, 0x0158 292.set RES3, 0x015A 293;; Hardware multiply register addresses for devices with f5series hardware 294;; multiply. 295;; The F5xxx series of MCUs support the same 16-bit and 32-bit multiply 296;; as the second generation hardware, but they are accessed from different 297;; memory registers. 298;; These names AREN'T standard. We've appended _F5 to the standard names. 299.set MPY_F5, 0x04C0 300.set MPYS_F5, 0x04C2 301.set MAC_F5, 0x04C4 302.set OP2_F5, 0x04C8 303.set RESLO_F5, 0x04CA 304.set RESHI_F5, 0x04CC 305.set MPY32L_F5, 0x04D0 306.set MPY32H_F5, 0x04D2 307.set MPYS32L_F5, 0x04D4 308.set MPYS32H_F5, 0x04D6 309.set OP2L_F5, 0x04E0 310.set OP2H_F5, 0x04E2 311.set RES0_F5, 0x04E4 312.set RES1_F5, 0x04E6 313.set RES2_F5, 0x04E8 314.set RES3_F5, 0x04EA 315 316#if defined MUL_16 317;; First generation MSP430 hardware multiplies ... 318 319 start_func __mulhi2 __mspabi_mpyi __mspabi_mpyi_hw 320 mult16 MPY, OP2, RESLO 321 end_func __mulhi2 322 323 start_func __mulhisi2 __mspabi_mpysl __mspabi_mpysl_hw 324 mult1632 MPYS, OP2, RESLO, RESHI 325 end_func __mulhisi2 326 327 start_func __umulhisi2 __mspabi_mpyul __mspabi_mpyul_hw 328 mult1632 MPY, OP2, RESLO, RESHI 329 end_func __umulhisi2 330 331 start_func __mulsi2 __mspabi_mpyl __mspabi_mpyl_hw 332 mult32 MPY, OP2, MAC, OP2, RESLO, RESHI 333 end_func __mulsi2 334 335 ;; FIXME: We do not have hardware implementations of these 336 ;; routines, so just jump to the software versions instead. 337 fake_func __mulsidi2 __mspabi_mpysll __mspabi_mpysll_hw 338 fake_func __umulsidi2 __mspabi_mpyull __mspabi_mpyull_hw 339 fake_func __muldi3 __mspabi_mpyll __mspabi_mpyll_hw 340 341#elif defined MUL_32 342;; Second generation MSP430 hardware multiplies ... 343 344 start_func __mulhi2 __mspabi_mpyi __mspabi_mpyi_hw 345 mult16 MPY, OP2, RESLO 346 end_func __mulhi2 347 348 start_func __mulhisi2 __mspabi_mpysl __mspabi_mpysl_hw 349 mult1632 MPYS, OP2, RESLO, RESHI 350 end_func __mulhisi2 351 352 start_func __umulhisi2 __mspabi_mpyul __mspabi_mpyul_hw 353 mult1632 MPY, OP2, RESLO, RESHI 354 end_func __umulhisi2 355 356 start_func __mulsi2_hw32 __mspabi_mpyl __mspabi_mpyl_hw32 357 mult32_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1 358 end_func __mulsi2_hw32 359 360 start_func __mulsidi2 __mspabi_mpysll __mspabi_mpysll_hw32 361 mult3264_hw MPYS32L, MPYS32H, OP2L, OP2H, RES0, RES1, RES2, RES3 362 end_func __mulsidi2 363 364 start_func __umulsidi2 __mspabi_mpyull __mspabi_mpyull_hw32 365 mult3264_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1, RES2, RES3 366 end_func __umulsidi2 367 368 ;; FIXME: Add a hardware version of this function. 369 fake_func __muldi3 __mspabi_mpyll __mspabi_mpyll_hw32 370 371#elif defined MUL_F5 372/* The F5xxx series of MCUs support the same 16-bit and 32-bit multiply 373 as the second generation hardware, but they are accessed from different 374 memory registers. */ 375 376 start_func __mulhi2_f5 __mspabi_mpyi __mspabi_mpyi_f5hw 377 mult16 MPY_F5, OP2_F5, RESLO_F5 378 end_func __mulhi2_f5 379 380 start_func __mulhisi2 __mspabi_mpysl __mspabi_mpysl_f5hw 381 mult1632 MPYS_F5, OP2_F5, RESLO_F5, RESHI_F5 382 end_func __mulhisi2 383 384 start_func __umulhisi2 __mspabi_mpyul __mspabi_mpyul_f5hw 385 mult1632 MPY_F5, OP2_F5, RESLO_F5, RESHI_F5 386 end_func __umulhisi2 387 388 start_func __mulsi2_f5 __mspabi_mpyl __mspabi_mpyl_f5hw 389 mult32_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5 390 end_func __mulsi2_f5 391 392 start_func __mulsidi2 __mspabi_mpysll __mspabi_mpysll_f5hw 393 mult3264_hw MPYS32L_F5, MPYS32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5 394 end_func __mulsidi2 395 396 start_func __umulsidi2 __mspabi_mpyull __mspabi_mpyull_f5hw 397 mult3264_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5 398 end_func __umulsidi2 399 400 ;; FIXME: Add a hardware version of this function. 401 fake_func __muldi3 __mspabi_mpyll __mspabi_mpyll_f5hw 402 403#else 404#error MUL type not defined 405#endif 406