lib2hw_mul.S revision 1.1
1;   Copyright (C) 2014-2015 Free Software Foundation, Inc.
2;   Contributed by Red Hat.
3;
4; This file is free software; you can redistribute it and/or modify it
5; under the terms of the GNU General Public License as published by the
6; Free Software Foundation; either version 3, or (at your option) any
7; later version.
8;
9; This file is distributed in the hope that it will be useful, but
10; WITHOUT ANY WARRANTY; without even the implied warranty of
11; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12; General Public License for more details.
13;
14; Under Section 7 of GPL version 3, you are granted additional
15; permissions described in the GCC Runtime Library Exception, version
16; 3.1, as published by the Free Software Foundation.
17;
18; You should have received a copy of the GNU General Public License and
19; a copy of the GCC Runtime Library Exception along with this program;
20; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
21; <http://www.gnu.org/licenses/>.
22
23.macro start_func name
24	.pushsection .text.\name,"ax",@progbits
25	.align 2
26	.global \name
27	.type \name , @function
28\name:
29	PUSH.W	sr			; Save current interrupt state
30	DINT				; Disable interrupts
31	NOP				; Account for latency
32.endm
33
34.macro end_func name
35#ifdef __MSP430X_LARGE__
36	POP.W  sr
37        RETA
38#else
39	RETI
40#endif
41	.size \name , . - \name
42	.popsection
43.endm
44
45.macro mult16 OP1, OP2, RESULT
46;* * 16-bit hardware multiply:  int16 = int16 * int16
47;*
48;*   - Operand 1 is in R12
49;*   - Operand 2 is in R13
50;*   - Result is in R12
51;*
52;* To ensure that the multiply is performed atomically, interrupts are
53;* disabled upon routine entry.  Interrupt state is restored upon exit.
54;*
55;*   Registers used:  R12, R13
56;*
57;* Macro arguments are the memory locations of the hardware registers.
58
59	MOV.W	r12, &\OP1		; Load operand 1 into multiplier
60	MOV.W	r13, &\OP2		; Load operand 2 which triggers MPY
61	MOV.W	&\RESULT, r12		; Move result into return register
62.endm
63
64.macro mult1632 OP1, OP2, RESULT_LO, RESULT_HI
65;* * 16-bit hardware multiply with a 32-bit result:
66;*	int32 = int16 * int16
67;* 	uint32 = uint16 * uint16
68;*
69;*   - Operand 1 is in R12
70;*   - Operand 2 is in R13
71;*   - Result is in R12, R13
72;*
73;* To ensure that the multiply is performed atomically, interrupts are
74;* disabled upon routine entry.  Interrupt state is restored upon exit.
75;*
76;*   Registers used:  R12, R13
77;*
78;* Macro arguments are the memory locations of the hardware registers.
79
80	MOV.W	r12, &\OP1		; Load operand 1 into multiplier
81	MOV.W	r13, &\OP2		; Load operand 2 which triggers MPY
82	MOV.W	&\RESULT_LO, r12	; Move low result into return register
83	MOV.W	&\RESULT_HI, r13	; Move high result into return register
84.endm
85
86.macro mult32 OP1, OP2, MAC_OP1, MAC_OP2, RESULT_LO, RESULT_HI
87;* * 32-bit hardware multiply with a 32-bit result using 16 multiply and accumulate:
88;*	int32 = int32 * int32
89;*
90;*   - Operand 1 is in R12, R13
91;*   - Operand 2 is in R14, R15
92;*   - Result    is in R12, R13
93;*
94;* To ensure that the multiply is performed atomically, interrupts are
95;* disabled upon routine entry.  Interrupt state is restored upon exit.
96;*
97;*   Registers used:  R12, R13, R14, R15
98;*
99;* Macro arguments are the memory locations of the hardware registers.
100
101	MOV.W	r12, &\OP1		; Load operand 1 Low into multiplier
102	MOV.W	r14, &\OP2		; Load operand 2 Low which triggers MPY
103	MOV.W	r12, &\MAC_OP1		; Load operand 1 Low into mac
104	MOV.W   &\RESULT_LO, r12	; Low 16-bits of result ready for return
105	MOV.W   &\RESULT_HI, &\RESULT_LO; MOV intermediate mpy high into low
106	MOV.W	r15, &\MAC_OP2		; Load operand 2 High, trigger MAC
107	MOV.W	r13, &\MAC_OP1		; Load operand 1 High
108	MOV.W	r14, &\MAC_OP2		; Load operand 2 Lo, trigger MAC
109	MOV.W	&\RESULT_LO, r13        ; Upper 16-bits result ready for return
110.endm
111
112
113.macro mult32_hw  OP1_LO  OP1_HI  OP2_LO  OP2_HI  RESULT_LO  RESULT_HI
114;* * 32-bit hardware multiply with a 32-bit result
115;*	int32 = int32 * int32
116;*
117;*   - Operand 1 is in R12, R13
118;*   - Operand 2 is in R14, R15
119;*   - Result    is in R12, R13
120;*
121;* To ensure that the multiply is performed atomically, interrupts are
122;* disabled upon routine entry.  Interrupt state is restored upon exit.
123;*
124;*   Registers used:  R12, R13, R14, R15
125;*
126;* Macro arguments are the memory locations of the hardware registers.
127
128	MOV.W	r12, &\OP1_LO		; Load operand 1 Low into multiplier
129	MOV.W	r13, &\OP1_HI		; Load operand 1 High into multiplier
130	MOV.W	r14, &\OP2_LO		; Load operand 2 Low into multiplier
131	MOV.W	r15, &\OP2_HI		; Load operand 2 High, trigger MPY
132	MOV.W	&\RESULT_LO, r12	; Ready low 16-bits for return
133	MOV.W   &\RESULT_HI, r13	; Ready high 16-bits for return
134.endm
135
136.macro mult3264_hw  OP1_LO  OP1_HI  OP2_LO  OP2_HI  RES0 RES1 RES2 RES3
137;* * 32-bit hardware multiply with a 64-bit result
138;*	int64 = int32 * int32
139;*	uint64 = uint32 * uint32
140;*
141;*   - Operand 1 is in R12, R13
142;*   - Operand 2 is in R14, R15
143;*   - Result    is in R12, R13, R14, R15
144;*
145;* To ensure that the multiply is performed atomically, interrupts are
146;* disabled upon routine entry.  Interrupt state is restored upon exit.
147;*
148;*   Registers used:  R12, R13, R14, R15
149;*
150;* Macro arguments are the memory locations of the hardware registers.
151
152	MOV.W	r12, &\OP1_LO		; Load operand 1 Low into multiplier
153	MOV.W	r13, &\OP1_HI		; Load operand 1 High into multiplier
154	MOV.W	r14, &\OP2_LO		; Load operand 2 Low into multiplier
155	MOV.W	r15, &\OP2_HI		; Load operand 2 High, trigger MPY
156	MOV.W	&\RES0, R12		; Ready low 16-bits for return
157	MOV.W   &\RES1, R13		;
158	MOV.W	&\RES2, R14		;
159	MOV.W   &\RES3, R15		; Ready high 16-bits for return
160.endm
161
162
163;;  First generation MSP430 hardware multiplies ....
164
165.set MPY_OP1,   0x0130
166.set MPY_OP1_S, 0x0132
167.set MAC_OP1, 	0x0134
168.set MPY_OP2, 	0x0138
169.set MAC_OP2, 	0x0138
170.set RESULT_LO, 0x013A
171.set RESULT_HI, 0x013C
172
173	start_func __mulhi2
174	mult16 MPY_OP1, MPY_OP2, RESULT_LO
175	end_func __mulhi2
176
177	start_func __mulsihi2
178	mult1632 MPY_OP1_S, MPY_OP2, RESULT_LO, RESULT_HI
179	end_func __mulsihi2
180
181	start_func __umulsihi2
182	mult1632 MPY_OP1, MPY_OP2, RESULT_LO, RESULT_HI
183	end_func __umulsihi2
184
185	start_func __mulsi2
186	mult32 MPY_OP1, MPY_OP2, MAC_OP1, MAC_OP2, RESULT_LO, RESULT_HI
187	end_func __mulsi2
188
189	start_func __mulsi2_hw32
190	mult32_hw 0x0140, 0x0142, 0x0150, 0x0152, 0x0154, 0x0156
191	end_func __mulsi2_hw32
192
193	start_func __muldisi2_hw32
194	mult3264_hw 0x0144, 0x146, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015A
195	end_func __muldisi2_hw32
196
197	start_func __umuldisi2_hw32
198	mult3264_hw 0x0140, 0x142, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015A
199	end_func __umuldisi2_hw32
200
201/* The F5xxx series of MCUs support the same 16-bit hardware
202   multiply, but it is accessed from different memory registers.  */
203
204	start_func __mulhi2_f5
205	mult16 0x04C0, 0x04C8, 0x04CA
206	end_func __mulhi2_f5
207
208	start_func __mulsihi2_f5
209	mult1632 0x04C2, 0x04C8, 0x04CA, 0x04CC
210	end_func __mulsihi2_f5
211
212	start_func __umulsihi2_f5
213	mult1632 0x04C0, 0x04C8, 0x04CA, 0x04CC
214	end_func __umulsihi2_f5
215
216	start_func __mulsi2_f5
217	mult32_hw 0x04D0, 0x04D2, 0x04E0, 0x04E2, 0x04E4, 0x04E6
218	end_func __mulsi2_f5
219
220	start_func __muldisi2_f5
221	mult3264_hw 0x04D4, 0x04D6, 0x04E0, 0x04E2, 0x04E4, 0x04E6, 0x04E8, 0x04EA
222	end_func __muldisi2_f5
223
224	start_func __umuldisi2_f5
225	mult3264_hw 0x04D0, 0x04D2, 0x04E0, 0x04E2, 0x04E4, 0x04E6, 0x04E8, 0x04EA
226	end_func __umuldisi2_f5
227