lib2hw_mul.S revision 1.3
1;   Copyright (C) 2014-2017 Free Software Foundation, Inc.
2;   Contributed by Red Hat.
3;
4; This file is free software; you can redistribute it and/or modify it
5; under the terms of the GNU General Public License as published by the
6; Free Software Foundation; either version 3, or (at your option) any
7; later version.
8;
9; This file is distributed in the hope that it will be useful, but
10; WITHOUT ANY WARRANTY; without even the implied warranty of
11; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12; General Public License for more details.
13;
14; Under Section 7 of GPL version 3, you are granted additional
15; permissions described in the GCC Runtime Library Exception, version
16; 3.1, as published by the Free Software Foundation.
17;
18; You should have received a copy of the GNU General Public License and
19; a copy of the GCC Runtime Library Exception along with this program;
20; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
21; <http://www.gnu.org/licenses/>.
22
23	;;  Macro to start a multiply function.  Each function has three
24	;; names, and hence three entry points - although they all go
25	;; through the same code.  The first name is the version generated
26	;; by GCC.  The second is the MSP430 EABI mandated name for the
27	;; *software* version of the function.  The third is the EABI
28	;; mandated name for the *hardware* version of the function.
29	;;
30	;;  Since we are using the hardware and software names to point
31	;; to the same code this effectively means that we are mapping
32	;; the software function onto the hardware function.  Thus if
33	;; the library containing this code is linked into an application
34	;; (before the libgcc.a library) *all* multiply functions will
35	;; be mapped onto the hardware versions.
36	;;
37	;;  We construct each function in its own section so that linker
38	;; garbage collection can be used to delete any unused functions
39	;; from this file.
40.macro start_func gcc_name eabi_soft_name eabi_hard_name
41	.pushsection .text.\gcc_name,"ax",@progbits
42	.p2align 1
43	.global \eabi_hard_name
44	.type \eabi_hard_name , @function
45\eabi_hard_name:
46	.global \eabi_soft_name
47	.type \eabi_soft_name , @function
48\eabi_soft_name:
49	.global \gcc_name
50	.type \gcc_name , @function
51\gcc_name:
52	PUSH.W	sr			; Save current interrupt state
53	DINT				; Disable interrupts
54	NOP				; Account for latency
55.endm
56
57
58	;; End a function started with the start_func macro.
59.macro end_func name
60#ifdef __MSP430X_LARGE__
61	POP.W  sr
62        RETA
63#else
64	RETI
65#endif
66	.size \name , . - \name
67	.popsection
68.endm
69
70
71	;; Like the start_func macro except that it is used to
72	;; create a false entry point that just jumps to the
73	;; software function (implemented elsewhere).
74.macro fake_func gcc_name  eabi_soft_name  eabi_hard_name
75 	.pushsection .text.\gcc_name,"ax",@progbits
76	.p2align 1
77	.global \eabi_hard_name
78	.type \eabi_hard_name , @function
79\eabi_hard_name:
80	.global \gcc_name
81	.type \gcc_name , @function
82\gcc_name:
83#ifdef __MSP430X_LARGE__
84	BRA	\eabi_soft_name
85#else
86	BR	\eabi_soft_name
87#endif
88	.size \gcc_name , . - \gcc_name
89	.popsection
90.endm
91
92
93.macro mult16 OP1, OP2, RESULT
94;* * 16-bit hardware multiply:  int16 = int16 * int16
95;*
96;*   - Operand 1 is in R12
97;*   - Operand 2 is in R13
98;*   - Result is in R12
99;*
100;* To ensure that the multiply is performed atomically, interrupts are
101;* disabled upon routine entry.  Interrupt state is restored upon exit.
102;*
103;*   Registers used:  R12, R13
104;*
105;* Macro arguments are the memory locations of the hardware registers.
106
107	MOV.W	r12, &\OP1		; Load operand 1 into multiplier
108	MOV.W	r13, &\OP2		; Load operand 2 which triggers MPY
109	MOV.W	&\RESULT, r12		; Move result into return register
110.endm
111
112.macro mult1632 OP1, OP2, RESULT_LO, RESULT_HI
113;* * 16-bit hardware multiply with a 32-bit result:
114;*	int32 = int16 * int16
115;* 	uint32 = uint16 * uint16
116;*
117;*   - Operand 1 is in R12
118;*   - Operand 2 is in R13
119;*   - Result is in R12, R13
120;*
121;* To ensure that the multiply is performed atomically, interrupts are
122;* disabled upon routine entry.  Interrupt state is restored upon exit.
123;*
124;*   Registers used:  R12, R13
125;*
126;* Macro arguments are the memory locations of the hardware registers.
127
128	MOV.W	r12, &\OP1		; Load operand 1 into multiplier
129	MOV.W	r13, &\OP2		; Load operand 2 which triggers MPY
130	MOV.W	&\RESULT_LO, r12	; Move low result into return register
131	MOV.W	&\RESULT_HI, r13	; Move high result into return register
132.endm
133
134.macro mult32 OP1, OP2, MAC_OP1, MAC_OP2, RESULT_LO, RESULT_HI
135;* * 32-bit hardware multiply with a 32-bit result using 16 multiply and accumulate:
136;*	int32 = int32 * int32
137;*
138;*   - Operand 1 is in R12, R13
139;*   - Operand 2 is in R14, R15
140;*   - Result    is in R12, R13
141;*
142;* To ensure that the multiply is performed atomically, interrupts are
143;* disabled upon routine entry.  Interrupt state is restored upon exit.
144;*
145;*   Registers used:  R12, R13, R14, R15
146;*
147;* Macro arguments are the memory locations of the hardware registers.
148
149	MOV.W	r12, &\OP1		; Load operand 1 Low into multiplier
150	MOV.W	r14, &\OP2		; Load operand 2 Low which triggers MPY
151	MOV.W	r12, &\MAC_OP1		; Load operand 1 Low into mac
152	MOV.W   &\RESULT_LO, r12	; Low 16-bits of result ready for return
153	MOV.W   &\RESULT_HI, &\RESULT_LO; MOV intermediate mpy high into low
154	MOV.W	r15, &\MAC_OP2		; Load operand 2 High, trigger MAC
155	MOV.W	r13, &\MAC_OP1		; Load operand 1 High
156	MOV.W	r14, &\MAC_OP2		; Load operand 2 Lo, trigger MAC
157	MOV.W	&\RESULT_LO, r13        ; Upper 16-bits result ready for return
158.endm
159
160
161.macro mult32_hw  OP1_LO  OP1_HI  OP2_LO  OP2_HI  RESULT_LO  RESULT_HI
162;* * 32-bit hardware multiply with a 32-bit result
163;*	int32 = int32 * int32
164;*
165;*   - Operand 1 is in R12, R13
166;*   - Operand 2 is in R14, R15
167;*   - Result    is in R12, R13
168;*
169;* To ensure that the multiply is performed atomically, interrupts are
170;* disabled upon routine entry.  Interrupt state is restored upon exit.
171;*
172;*   Registers used:  R12, R13, R14, R15
173;*
174;* Macro arguments are the memory locations of the hardware registers.
175
176	MOV.W	r12, &\OP1_LO		; Load operand 1 Low into multiplier
177	MOV.W	r13, &\OP1_HI		; Load operand 1 High into multiplier
178	MOV.W	r14, &\OP2_LO		; Load operand 2 Low into multiplier
179	MOV.W	r15, &\OP2_HI		; Load operand 2 High, trigger MPY
180	MOV.W	&\RESULT_LO, r12	; Ready low 16-bits for return
181	MOV.W   &\RESULT_HI, r13	; Ready high 16-bits for return
182.endm
183
184.macro mult3264_hw  OP1_LO  OP1_HI  OP2_LO  OP2_HI  RES0 RES1 RES2 RES3
185;* * 32-bit hardware multiply with a 64-bit result
186;*	int64 = int32 * int32
187;*	uint64 = uint32 * uint32
188;*
189;*   - Operand 1 is in R12, R13
190;*   - Operand 2 is in R14, R15
191;*   - Result    is in R12, R13, R14, R15
192;*
193;* To ensure that the multiply is performed atomically, interrupts are
194;* disabled upon routine entry.  Interrupt state is restored upon exit.
195;*
196;*   Registers used:  R12, R13, R14, R15
197;*
198;* Macro arguments are the memory locations of the hardware registers.
199
200	MOV.W	r12, &\OP1_LO		; Load operand 1 Low into multiplier
201	MOV.W	r13, &\OP1_HI		; Load operand 1 High into multiplier
202	MOV.W	r14, &\OP2_LO		; Load operand 2 Low into multiplier
203	MOV.W	r15, &\OP2_HI		; Load operand 2 High, trigger MPY
204	MOV.W	&\RES0, R12		; Ready low 16-bits for return
205	MOV.W   &\RES1, R13		;
206	MOV.W	&\RES2, R14		;
207	MOV.W   &\RES3, R15		; Ready high 16-bits for return
208.endm
209
210
211;; EABI mandated names:
212;;
213;; int16 __mspabi_mpyi (int16 x, int16 y)
214;;            Multiply int by int.
215;; int16 __mspabi_mpyi_hw (int16 x, int16 y)
216;;            Multiply int by int. Uses hardware MPY16 or MPY32.
217;; int16 __mspabi_mpyi_f5hw (int16 x, int16 y)
218;;            Multiply int by int. Uses hardware MPY32 (F5xx devices and up).
219;;
220;; int32 __mspabi_mpyl (int32 x, int32 y);
221;;  	      Multiply long by long.
222;; int32 __mspabi_mpyl_hw (int32 x, int32 y)
223;; 	      Multiply long by long. Uses hardware MPY16.
224;; int32 __mspabi_mpyl_hw32 (int32 x, int32 y)
225;; 	      Multiply long by long. Uses hardware MPY32 (F4xx devices).
226;; int32 __mspabi_mpyl_f5hw (int32 x, int32 y)
227;; 	      Multiply long by long. Uses hardware MPY32 (F5xx devices and up).
228;;
229;; int64 __mspabi_mpyll (int64 x, int64 y)
230;; 	      Multiply long long by long long.
231;; int64 __mspabi_mpyll_hw (int64 x, int64 y)
232;; 	      Multiply long long by long long. Uses hardware MPY16.
233;; int64 __mspabi_mpyll_hw32 (int64 x, int64 y)
234;; 	      Multiply long long by long long. Uses hardware MPY32 (F4xx devices).
235;; int64 __mspabi_mpyll_f5hw (int64 x, int64 y)
236;; 	      Multiply long long by long long. Uses hardware MPY32 (F5xx devices and up).
237;;
238;; int32 __mspabi_mpysl (int16 x, int16 y)
239;;            Multiply int by int; result is long.
240;; int32 __mspabi_mpysl_hw(int16 x, int16 y)
241;; 	      Multiply int by int; result is long. Uses hardware MPY16 or MPY32
242;; int32 __mspabi_mpysl_f5hw(int16 x, int16 y)
243;; 	      Multiply int by int; result is long. Uses hardware MPY32 (F5xx devices and up).
244;;
245;; int64 __mspabi_mpysll(int32 x, int32 y)
246;;            Multiply long by long; result is long long.
247;; int64 __mspabi_mpysll_hw(int32 x, int32 y)
248;; 	      Multiply long by long; result is long long. Uses hardware MPY16.
249;; int64 __mspabi_mpysll_hw32(int32 x, int32 y)
250;; 	      Multiply long by long; result is long long. Uses hardware MPY32 (F4xx devices).
251;; int64 __mspabi_mpysll_f5hw(int32 x, int32 y)
252;; 	      Multiply long by long; result is long long. Uses hardware MPY32 (F5xx devices and up).
253;;
254;; uint32 __mspabi_mpyul(uint16 x, uint16 y)
255;; 	      Multiply unsigned int by unsigned int; result is unsigned long.
256;; uint32 __mspabi_mpyul_hw(uint16 x, uint16 y)
257;; 	      Multiply unsigned int by unsigned int; result is unsigned long. Uses hardware MPY16 or MPY32
258;; uint32 __mspabi_mpyul_f5hw(uint16 x, uint16 y)
259;; 	      Multiply unsigned int by unsigned int; result is unsigned long. Uses hardware MPY32 (F5xx devices and up).
260;;
261;; uint64 __mspabi_mpyull(uint32 x, uint32 y)
262;; 	      Multiply unsigned long by unsigned long; result is unsigned long long.
263;; uint64 __mspabi_mpyull_hw(uint32 x, uint32 y)
264;; 	      Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY16
265;; uint64 __mspabi_mpyull_hw32(uint32 x, uint32 y)
266;; 	      Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY32 (F4xx devices).
267;; uint64 _ _mspabi_mpyull_f5hw(uint32 x, uint32 y)
268;;            Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY32 (F5xx devices and up)
269
270
271
272.set MPY_OP1,   0x0130
273.set MPY_OP1_S, 0x0132
274.set MAC_OP1, 	0x0134
275.set MPY_OP2, 	0x0138
276.set MAC_OP2, 	0x0138
277.set RESULT_LO, 0x013A
278.set RESULT_HI, 0x013C
279
280#if defined MUL_16
281;;  First generation MSP430 hardware multiplies ...
282
283	start_func __mulhi2 __mspabi_mpyi  __mspabi_mpyi_hw
284	mult16 MPY_OP1, MPY_OP2, RESULT_LO
285	end_func   __mulhi2
286
287	start_func __mulsihi2  __mspabi_mpysl  __mspabi_mpysl_hw
288	mult1632 MPY_OP1_S, MPY_OP2, RESULT_LO, RESULT_HI
289	end_func   __mulsihi2
290
291	start_func __umulsihi2  __mspabi_mpyul  _mspabi_mpyul_hw
292	mult1632 MPY_OP1, MPY_OP2, RESULT_LO, RESULT_HI
293	end_func   __umulsihi2
294
295	start_func __mulsi2  __mspabi_mpyl  __mspabi_mpyl_hw
296	mult32 MPY_OP1, MPY_OP2, MAC_OP1, MAC_OP2, RESULT_LO, RESULT_HI
297	end_func   __mulsi2
298
299	;; FIXME: We do not have hardware implementations of these
300	;; routines, so just jump to the software versions instead.
301	fake_func __muldisi2   __mspabi_mpysll  __mspabi_mpysll_hw
302	fake_func __umuldisi2  __mspabi_mpyull  __mspabi_mpyull_hw
303	fake_func __muldi3     __mspabi_mpyll   __mspabi_mpyll_hw
304
305#elif defined MUL_32
306;;  Second generation MSP430 hardware multiplies ...
307
308	start_func __mulhi2  __mspabi_mpyi  __mspabi_mpyi_hw
309	mult16 MPY_OP1, MPY_OP2, RESULT_LO
310	end_func   __mulhi2
311
312	start_func __mulsihi2  __mspabi_mpysl  __mspabi_mpysl_hw
313	mult1632 MPY_OP1_S, MPY_OP2, RESULT_LO, RESULT_HI
314	end_func   __mulsihi2
315
316	start_func __umulsihi2  __mspabi_mpyul  _mspabi_mpyul_hw
317	mult1632 MPY_OP1, MPY_OP2, RESULT_LO, RESULT_HI
318	end_func   __umulsihi2
319
320	start_func __mulsi2_hw32  __mspabi_mpyl  __mspabi_mpyl_hw32
321	mult32_hw 0x0140, 0x0142, 0x0150, 0x0152, 0x0154, 0x0156
322	end_func   __mulsi2_hw32
323
324	start_func __muldisi2  __mspabi_mpysll  __mspabi_mpysll_hw32
325	mult3264_hw 0x0144, 0x146, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015A
326	end_func   __muldisi2
327
328	start_func __umuldisi2 __mspabi_mpyull  __mspabi_mpyull_hw32
329	mult3264_hw 0x0140, 0x142, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015A
330	end_func   __umuldisi2
331
332	;; FIXME: Add a hardware version of this function.
333	fake_func __muldi3    __mspabi_mpyll  __mspabi_mpyll_hw32
334
335#elif defined MUL_F5
336/* The F5xxx series of MCUs support the same 16-bit and 32-bit multiply
337   as the second generation hardware, but they are accessed from different
338   memory registers.  */
339
340	start_func __mulhi2_f5 __mspabi_mpyi  __mspabi_mpyi_f5hw
341	mult16 0x04C0, 0x04C8, 0x04CA
342	end_func   __mulhi2_f5
343
344	start_func __mulsihi2  __mspabi_mpysl  __mspabi_mpysl_f5hw
345	mult1632 0x04C2, 0x04C8, 0x04CA, 0x04CC
346	end_func   __mulsihi2
347
348	start_func __umulsihi2  __mspabi_mpyul  _mspabi_mpyul_f5hw
349	mult1632 0x04C0, 0x04C8, 0x04CA, 0x04CC
350	end_func   __umulsihi2
351
352	start_func __mulsi2_f5  __mspabi_mpyl  __mspabi_mpyl_f5hw
353	mult32_hw 0x04D0, 0x04D2, 0x04E0, 0x04E2, 0x04E4, 0x04E6
354	end_func   __mulsi2_f5
355
356	start_func __muldisi2  __mspabi_mpysll  __mspabi_mpysll_f5hw
357	mult3264_hw 0x04D4, 0x04D6, 0x04E0, 0x04E2, 0x04E4, 0x04E6, 0x04E8, 0x04EA
358	end_func   __muldisi2
359
360	start_func __umuldisi2  __mspabi_mpyull  __mspabi_mpyull_f5hw
361	mult3264_hw 0x04D0, 0x04D2, 0x04E0, 0x04E2, 0x04E4, 0x04E6, 0x04E8, 0x04EA
362	end_func   __umuldisi2
363
364	;; FIXME: Add a hardware version of this function.
365	fake_func __muldi3   __mspabi_mpyll __mspabi_mpyll_f5hw
366
367#else
368#error MUL type not defined
369#endif
370