lib2hw_mul.S revision 1.6
1;   Copyright (C) 2014-2020 Free Software Foundation, Inc.
2;   Contributed by Red Hat.
3;
4; This file is free software; you can redistribute it and/or modify it
5; under the terms of the GNU General Public License as published by the
6; Free Software Foundation; either version 3, or (at your option) any
7; later version.
8;
9; This file is distributed in the hope that it will be useful, but
10; WITHOUT ANY WARRANTY; without even the implied warranty of
11; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12; General Public License for more details.
13;
14; Under Section 7 of GPL version 3, you are granted additional
15; permissions described in the GCC Runtime Library Exception, version
16; 3.1, as published by the Free Software Foundation.
17;
18; You should have received a copy of the GNU General Public License and
19; a copy of the GCC Runtime Library Exception along with this program;
20; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
21; <http://www.gnu.org/licenses/>.
22
23	;;  Macro to start a multiply function.  Each function has three
24	;; names, and hence three entry points - although they all go
25	;; through the same code.  The first name is the version generated
26	;; by GCC.  The second is the MSP430 EABI mandated name for the
27	;; *software* version of the function.  The third is the EABI
28	;; mandated name for the *hardware* version of the function.
29	;;
30	;;  Since we are using the hardware and software names to point
31	;; to the same code this effectively means that we are mapping
32	;; the software function onto the hardware function.  Thus if
33	;; the library containing this code is linked into an application
34	;; (before the libgcc.a library) *all* multiply functions will
35	;; be mapped onto the hardware versions.
36	;;
37	;;  We construct each function in its own section so that linker
38	;; garbage collection can be used to delete any unused functions
39	;; from this file.
40.macro start_func gcc_name eabi_soft_name eabi_hard_name
41	.pushsection .text.\gcc_name,"ax",@progbits
42	.p2align 1
43	.global \eabi_hard_name
44	.type \eabi_hard_name , @function
45\eabi_hard_name:
46	.global \eabi_soft_name
47	.type \eabi_soft_name , @function
48\eabi_soft_name:
49	.global \gcc_name
50	.type \gcc_name , @function
51\gcc_name:
52	PUSH.W	sr			; Save current interrupt state
53	DINT				; Disable interrupts
54	NOP				; Account for latency
55.endm
56
57
58	;; End a function started with the start_func macro.
59.macro end_func name
60#ifdef __MSP430X_LARGE__
61	POP.W  sr
62        RETA
63#else
64	RETI
65#endif
66	.size \name , . - \name
67	.popsection
68.endm
69
70
71	;; Like the start_func macro except that it is used to
72	;; create a false entry point that just jumps to the
73	;; software function (implemented elsewhere).
74.macro fake_func gcc_name  eabi_soft_name  eabi_hard_name
75 	.pushsection .text.\gcc_name,"ax",@progbits
76	.p2align 1
77	.global \eabi_hard_name
78	.type \eabi_hard_name , @function
79\eabi_hard_name:
80	.global \gcc_name
81	.type \gcc_name , @function
82\gcc_name:
83#ifdef __MSP430X_LARGE__
84	BRA	#\eabi_soft_name
85#else
86	BR	#\eabi_soft_name
87#endif
88	.size \gcc_name , . - \gcc_name
89	.popsection
90.endm
91
92
93.macro mult16 OP1, OP2, RESULT
94;* * 16-bit hardware multiply:  int16 = int16 * int16
95;*
96;*   - Operand 1 is in R12
97;*   - Operand 2 is in R13
98;*   - Result is in R12
99;*
100;* To ensure that the multiply is performed atomically, interrupts are
101;* disabled upon routine entry.  Interrupt state is restored upon exit.
102;*
103;*   Registers used:  R12, R13
104;*
105;* Macro arguments are the memory locations of the hardware registers.
106
107	MOV.W	r12, &\OP1		; Load operand 1 into multiplier
108	MOV.W	r13, &\OP2		; Load operand 2 which triggers MPY
109	MOV.W	&\RESULT, r12		; Move result into return register
110.endm
111
112.macro mult1632 OP1, OP2, RESLO, RESHI
113;* * 16-bit hardware multiply with a 32-bit result:
114;*	int32 = int16 * int16
115;* 	uint32 = uint16 * uint16
116;*
117;*   - Operand 1 is in R12
118;*   - Operand 2 is in R13
119;*   - Result is in R12, R13
120;*
121;* To ensure that the multiply is performed atomically, interrupts are
122;* disabled upon routine entry.  Interrupt state is restored upon exit.
123;*
124;*   Registers used:  R12, R13
125;*
126;* Macro arguments are the memory locations of the hardware registers.
127
128	MOV.W	r12, &\OP1		; Load operand 1 into multiplier
129	MOV.W	r13, &\OP2		; Load operand 2 which triggers MPY
130	MOV.W	&\RESLO, r12		; Move low result into return register
131	MOV.W	&\RESHI, r13		; Move high result into return register
132.endm
133
134.macro mult32 OP1, OP2, MAC_OP1, MAC_OP2, RESLO, RESHI
135;* * 32-bit hardware multiply with a 32-bit result using 16 multiply and accumulate:
136;*	int32 = int32 * int32
137;*
138;*   - Operand 1 is in R12, R13
139;*   - Operand 2 is in R14, R15
140;*   - Result    is in R12, R13
141;*
142;* To ensure that the multiply is performed atomically, interrupts are
143;* disabled upon routine entry.  Interrupt state is restored upon exit.
144;*
145;*   Registers used:  R12, R13, R14, R15
146;*
147;* Macro arguments are the memory locations of the hardware registers.
148
149	MOV.W	r12, &\OP1		; Load operand 1 Low into multiplier
150	MOV.W	r14, &\OP2		; Load operand 2 Low which triggers MPY
151	MOV.W	r12, &\MAC_OP1		; Load operand 1 Low into mac
152	MOV.W   &\RESLO, r12		; Low 16-bits of result ready for return
153	MOV.W   &\RESHI, &\RESLO	; MOV intermediate mpy high into low
154	MOV.W	r15, &\MAC_OP2		; Load operand 2 High, trigger MAC
155	MOV.W	r13, &\MAC_OP1		; Load operand 1 High
156	MOV.W	r14, &\MAC_OP2		; Load operand 2 Lo, trigger MAC
157	MOV.W	&\RESLO, r13		; Upper 16-bits result ready for return
158.endm
159
160
161.macro mult32_hw  OP1_LO  OP1_HI  OP2_LO  OP2_HI  RESLO  RESHI
162;* * 32-bit hardware multiply with a 32-bit result
163;*	int32 = int32 * int32
164;*
165;*   - Operand 1 is in R12, R13
166;*   - Operand 2 is in R14, R15
167;*   - Result    is in R12, R13
168;*
169;* To ensure that the multiply is performed atomically, interrupts are
170;* disabled upon routine entry.  Interrupt state is restored upon exit.
171;*
172;*   Registers used:  R12, R13, R14, R15
173;*
174;* Macro arguments are the memory locations of the hardware registers.
175
176	MOV.W	r12, &\OP1_LO		; Load operand 1 Low into multiplier
177	MOV.W	r13, &\OP1_HI		; Load operand 1 High into multiplier
178	MOV.W	r14, &\OP2_LO		; Load operand 2 Low into multiplier
179	MOV.W	r15, &\OP2_HI		; Load operand 2 High, trigger MPY
180	MOV.W	&\RESLO, r12		; Ready low 16-bits for return
181	MOV.W   &\RESHI, r13		; Ready high 16-bits for return
182.endm
183
184.macro mult3264_hw  OP1_LO  OP1_HI  OP2_LO  OP2_HI  RES0 RES1 RES2 RES3
185;* * 32-bit hardware multiply with a 64-bit result
186;*	int64 = int32 * int32
187;*	uint64 = uint32 * uint32
188;*
189;*   - Operand 1 is in R12, R13
190;*   - Operand 2 is in R14, R15
191;*   - Result    is in R12, R13, R14, R15
192;*
193;* To ensure that the multiply is performed atomically, interrupts are
194;* disabled upon routine entry.  Interrupt state is restored upon exit.
195;*
196;*   Registers used:  R12, R13, R14, R15
197;*
198;* Macro arguments are the memory locations of the hardware registers.
199
200	MOV.W	r12, &\OP1_LO		; Load operand 1 Low into multiplier
201	MOV.W	r13, &\OP1_HI		; Load operand 1 High into multiplier
202	MOV.W	r14, &\OP2_LO		; Load operand 2 Low into multiplier
203	MOV.W	r15, &\OP2_HI		; Load operand 2 High, trigger MPY
204	MOV.W	&\RES0, R12		; Ready low 16-bits for return
205	MOV.W   &\RES1, R13		;
206	MOV.W	&\RES2, R14		;
207	MOV.W   &\RES3, R15		; Ready high 16-bits for return
208.endm
209
210
211;; EABI mandated names:
212;;
213;; int16 __mspabi_mpyi (int16 x, int16 y)
214;;            Multiply int by int.
215;; int16 __mspabi_mpyi_hw (int16 x, int16 y)
216;;            Multiply int by int. Uses hardware MPY16 or MPY32.
217;; int16 __mspabi_mpyi_f5hw (int16 x, int16 y)
218;;            Multiply int by int. Uses hardware MPY32 (F5xx devices and up).
219;;
220;; int32 __mspabi_mpyl (int32 x, int32 y);
221;;  	      Multiply long by long.
222;; int32 __mspabi_mpyl_hw (int32 x, int32 y)
223;; 	      Multiply long by long. Uses hardware MPY16.
224;; int32 __mspabi_mpyl_hw32 (int32 x, int32 y)
225;; 	      Multiply long by long. Uses hardware MPY32 (F4xx devices).
226;; int32 __mspabi_mpyl_f5hw (int32 x, int32 y)
227;; 	      Multiply long by long. Uses hardware MPY32 (F5xx devices and up).
228;;
229;; int64 __mspabi_mpyll (int64 x, int64 y)
230;; 	      Multiply long long by long long.
231;; int64 __mspabi_mpyll_hw (int64 x, int64 y)
232;; 	      Multiply long long by long long. Uses hardware MPY16.
233;; int64 __mspabi_mpyll_hw32 (int64 x, int64 y)
234;; 	      Multiply long long by long long. Uses hardware MPY32 (F4xx devices).
235;; int64 __mspabi_mpyll_f5hw (int64 x, int64 y)
236;; 	      Multiply long long by long long. Uses hardware MPY32 (F5xx devices and up).
237;;
238;; int32 __mspabi_mpysl (int16 x, int16 y)
239;;            Multiply int by int; result is long.
240;; int32 __mspabi_mpysl_hw(int16 x, int16 y)
241;; 	      Multiply int by int; result is long. Uses hardware MPY16 or MPY32
242;; int32 __mspabi_mpysl_f5hw(int16 x, int16 y)
243;; 	      Multiply int by int; result is long. Uses hardware MPY32 (F5xx devices and up).
244;;
245;; int64 __mspabi_mpysll(int32 x, int32 y)
246;;            Multiply long by long; result is long long.
247;; int64 __mspabi_mpysll_hw(int32 x, int32 y)
248;; 	      Multiply long by long; result is long long. Uses hardware MPY16.
249;; int64 __mspabi_mpysll_hw32(int32 x, int32 y)
250;; 	      Multiply long by long; result is long long. Uses hardware MPY32 (F4xx devices).
251;; int64 __mspabi_mpysll_f5hw(int32 x, int32 y)
252;; 	      Multiply long by long; result is long long. Uses hardware MPY32 (F5xx devices and up).
253;;
254;; uint32 __mspabi_mpyul(uint16 x, uint16 y)
255;; 	      Multiply unsigned int by unsigned int; result is unsigned long.
256;; uint32 __mspabi_mpyul_hw(uint16 x, uint16 y)
257;; 	      Multiply unsigned int by unsigned int; result is unsigned long. Uses hardware MPY16 or MPY32
258;; uint32 __mspabi_mpyul_f5hw(uint16 x, uint16 y)
259;; 	      Multiply unsigned int by unsigned int; result is unsigned long. Uses hardware MPY32 (F5xx devices and up).
260;;
261;; uint64 __mspabi_mpyull(uint32 x, uint32 y)
262;; 	      Multiply unsigned long by unsigned long; result is unsigned long long.
263;; uint64 __mspabi_mpyull_hw(uint32 x, uint32 y)
264;; 	      Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY16
265;; uint64 __mspabi_mpyull_hw32(uint32 x, uint32 y)
266;; 	      Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY32 (F4xx devices).
267;; uint64 __mspabi_mpyull_f5hw(uint32 x, uint32 y)
268;;            Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY32 (F5xx devices and up)
269
270;;;; The register names below are the standardised versions used across TI
271;;;; literature.
272
273;; Hardware multiply register addresses for devices with 16-bit hardware
274;; multiply.
275.set MPY,	0x0130
276.set MPYS,	0x0132
277.set MAC, 	0x0134
278.set OP2, 	0x0138
279.set RESLO,	0x013A
280.set RESHI,	0x013C
281;; Hardware multiply register addresses for devices with 32-bit (non-f5)
282;; hardware multiply.
283.set MPY32L,	0x0140
284.set MPY32H,	0x0142
285.set MPYS32L,	0x0144
286.set MPYS32H,	0x0146
287.set OP2L,	0x0150
288.set OP2H,	0x0152
289.set RES0,	0x0154
290.set RES1,	0x0156
291.set RES2,	0x0158
292.set RES3,	0x015A
293;; Hardware multiply register addresses for devices with f5series hardware
294;; multiply.
295;; The F5xxx series of MCUs support the same 16-bit and 32-bit multiply
296;; as the second generation hardware, but they are accessed from different
297;; memory registers.
298;; These names AREN'T standard.  We've appended _F5 to the standard names.
299.set MPY_F5,		0x04C0
300.set MPYS_F5,		0x04C2
301.set MAC_F5,		0x04C4
302.set OP2_F5,		0x04C8
303.set RESLO_F5,		0x04CA
304.set RESHI_F5,		0x04CC
305.set MPY32L_F5,		0x04D0
306.set MPY32H_F5,		0x04D2
307.set MPYS32L_F5,	0x04D4
308.set MPYS32H_F5,	0x04D6
309.set OP2L_F5,		0x04E0
310.set OP2H_F5,		0x04E2
311.set RES0_F5,		0x04E4
312.set RES1_F5,		0x04E6
313.set RES2_F5,		0x04E8
314.set RES3_F5,		0x04EA
315
316#if defined MUL_16
317;;  First generation MSP430 hardware multiplies ...
318
319	start_func __mulhi2 __mspabi_mpyi  __mspabi_mpyi_hw
320	mult16 MPY, OP2, RESLO
321	end_func   __mulhi2
322
323	start_func __mulhisi2  __mspabi_mpysl  __mspabi_mpysl_hw
324	mult1632 MPYS, OP2, RESLO, RESHI
325	end_func   __mulhisi2
326
327	start_func __umulhisi2  __mspabi_mpyul  __mspabi_mpyul_hw
328	mult1632 MPY, OP2, RESLO, RESHI
329	end_func   __umulhisi2
330
331	start_func __mulsi2  __mspabi_mpyl  __mspabi_mpyl_hw
332	mult32 MPY, OP2, MAC, OP2, RESLO, RESHI
333	end_func   __mulsi2
334
335	;; FIXME: We do not have hardware implementations of these
336	;; routines, so just jump to the software versions instead.
337	fake_func __mulsidi2   __mspabi_mpysll  __mspabi_mpysll_hw
338	fake_func __umulsidi2  __mspabi_mpyull  __mspabi_mpyull_hw
339	fake_func __muldi3     __mspabi_mpyll   __mspabi_mpyll_hw
340
341#elif defined MUL_32
342;;  Second generation MSP430 hardware multiplies ...
343
344	start_func __mulhi2  __mspabi_mpyi  __mspabi_mpyi_hw
345	mult16 MPY, OP2, RESLO
346	end_func   __mulhi2
347
348	start_func __mulhisi2  __mspabi_mpysl  __mspabi_mpysl_hw
349	mult1632 MPYS, OP2, RESLO, RESHI
350	end_func   __mulhisi2
351
352	start_func __umulhisi2  __mspabi_mpyul  __mspabi_mpyul_hw
353	mult1632 MPY, OP2, RESLO, RESHI
354	end_func   __umulhisi2
355
356	start_func __mulsi2_hw32  __mspabi_mpyl  __mspabi_mpyl_hw32
357	mult32_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1
358	end_func   __mulsi2_hw32
359
360	start_func __mulsidi2  __mspabi_mpysll  __mspabi_mpysll_hw32
361	mult3264_hw MPYS32L, MPYS32H, OP2L, OP2H, RES0, RES1, RES2, RES3
362	end_func   __mulsidi2
363
364	start_func __umulsidi2 __mspabi_mpyull  __mspabi_mpyull_hw32
365	mult3264_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1, RES2, RES3
366	end_func   __umulsidi2
367
368	;; FIXME: Add a hardware version of this function.
369	fake_func __muldi3    __mspabi_mpyll  __mspabi_mpyll_hw32
370
371#elif defined MUL_F5
372/* The F5xxx series of MCUs support the same 16-bit and 32-bit multiply
373   as the second generation hardware, but they are accessed from different
374   memory registers.  */
375
376	start_func __mulhi2_f5 __mspabi_mpyi  __mspabi_mpyi_f5hw
377	mult16 MPY_F5, OP2_F5, RESLO_F5
378	end_func   __mulhi2_f5
379
380	start_func __mulhisi2  __mspabi_mpysl  __mspabi_mpysl_f5hw
381	mult1632 MPYS_F5, OP2_F5, RESLO_F5, RESHI_F5
382	end_func   __mulhisi2
383
384	start_func __umulhisi2  __mspabi_mpyul  __mspabi_mpyul_f5hw
385	mult1632 MPY_F5, OP2_F5, RESLO_F5, RESHI_F5
386	end_func   __umulhisi2
387
388	start_func __mulsi2_f5  __mspabi_mpyl  __mspabi_mpyl_f5hw
389	mult32_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5
390	end_func   __mulsi2_f5
391
392	start_func __mulsidi2  __mspabi_mpysll  __mspabi_mpysll_f5hw
393	mult3264_hw MPYS32L_F5, MPYS32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5
394	end_func   __mulsidi2
395
396	start_func __umulsidi2  __mspabi_mpyull  __mspabi_mpyull_f5hw
397	mult3264_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5
398	end_func   __umulsidi2
399
400	;; FIXME: Add a hardware version of this function.
401	fake_func __muldi3   __mspabi_mpyll __mspabi_mpyll_f5hw
402
403#else
404#error MUL type not defined
405#endif
406