1;; Copyright (C) 2011-2015 Free Software Foundation, Inc.
2;;
3;; This file is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify it
6;; under the terms of the GNU General Public License as published
7;; by the Free Software Foundation; either version 3, or (at your
8;; option) any later version.
9;;
10;; GCC is distributed in the hope that it will be useful, but WITHOUT
11;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
13;; License for more details.
14;;
15;; You should have received a copy of the GNU General Public License
16;; along with GCC; see the file COPYING3.  If not see
17;; <http://www.gnu.org/licenses/>.
18;;
19;; This file contains ARM instructions that support fixed-point operations.
20
21(define_insn "add<mode>3"
22  [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
23	(plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
24		    (match_operand:FIXED 2 "s_register_operand" "l,r")))]
25  "TARGET_32BIT"
26  "add%?\\t%0, %1, %2"
27  [(set_attr "predicable" "yes")
28   (set_attr "predicable_short_it" "yes,no")
29   (set_attr "type" "alu_sreg")])
30
31(define_insn "add<mode>3"
32  [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
33	(plus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
34		     (match_operand:ADDSUB 2 "s_register_operand" "r")))]
35  "TARGET_INT_SIMD"
36  "sadd<qaddsub_suf>%?\\t%0, %1, %2"
37  [(set_attr "predicable" "yes")
38   (set_attr "predicable_short_it" "no")
39   (set_attr "type" "alu_dsp_reg")])
40
41(define_insn "usadd<mode>3"
42  [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
43	(us_plus:UQADDSUB (match_operand:UQADDSUB 1 "s_register_operand" "r")
44			  (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
45  "TARGET_INT_SIMD"
46  "uqadd<qaddsub_suf>%?\\t%0, %1, %2"
47  [(set_attr "predicable" "yes")
48   (set_attr "predicable_short_it" "no")
49   (set_attr "type" "alu_dsp_reg")])
50
51(define_insn "ssadd<mode>3"
52  [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
53	(ss_plus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r")
54			 (match_operand:QADDSUB 2 "s_register_operand" "r")))]
55  "TARGET_INT_SIMD"
56  "qadd<qaddsub_suf>%?\\t%0, %1, %2"
57  [(set_attr "predicable" "yes")
58   (set_attr "predicable_short_it" "no")
59   (set_attr "type" "alu_dsp_reg")])
60
61(define_insn "sub<mode>3"
62  [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
63	(minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
64		     (match_operand:FIXED 2 "s_register_operand" "l,r")))]
65  "TARGET_32BIT"
66  "sub%?\\t%0, %1, %2"
67  [(set_attr "predicable" "yes")
68   (set_attr "predicable_short_it" "yes,no")
69   (set_attr "type" "alu_sreg")])
70
71(define_insn "sub<mode>3"
72  [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
73	(minus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
74		      (match_operand:ADDSUB 2 "s_register_operand" "r")))]
75  "TARGET_INT_SIMD"
76  "ssub<qaddsub_suf>%?\\t%0, %1, %2"
77  [(set_attr "predicable" "yes")
78   (set_attr "predicable_short_it" "no")
79   (set_attr "type" "alu_dsp_reg")])
80
81(define_insn "ussub<mode>3"
82  [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
83	(us_minus:UQADDSUB
84	  (match_operand:UQADDSUB 1 "s_register_operand" "r")
85	  (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
86  "TARGET_INT_SIMD"
87  "uqsub<qaddsub_suf>%?\\t%0, %1, %2"
88  [(set_attr "predicable" "yes")
89   (set_attr "predicable_short_it" "no")
90   (set_attr "type" "alu_dsp_reg")])
91
92(define_insn "sssub<mode>3"
93  [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
94	(ss_minus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r")
95			  (match_operand:QADDSUB 2 "s_register_operand" "r")))]
96  "TARGET_INT_SIMD"
97  "qsub<qaddsub_suf>%?\\t%0, %1, %2"
98  [(set_attr "predicable" "yes")
99   (set_attr "predicable_short_it" "no")
100   (set_attr "type" "alu_dsp_reg")])
101
102;; Fractional multiplies.
103
104; Note: none of these do any rounding.
105
106(define_expand "mulqq3"
107  [(set (match_operand:QQ 0 "s_register_operand" "")
108	(mult:QQ (match_operand:QQ 1 "s_register_operand" "")
109		 (match_operand:QQ 2 "s_register_operand" "")))]
110  "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
111{
112  rtx tmp1 = gen_reg_rtx (HImode);
113  rtx tmp2 = gen_reg_rtx (HImode);
114  rtx tmp3 = gen_reg_rtx (SImode);
115
116  emit_insn (gen_extendqihi2 (tmp1, gen_lowpart (QImode, operands[1])));
117  emit_insn (gen_extendqihi2 (tmp2, gen_lowpart (QImode, operands[2])));
118  emit_insn (gen_mulhisi3 (tmp3, tmp1, tmp2));
119  emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp3, GEN_INT (8),
120		       GEN_INT (7)));
121  DONE;
122})
123
124(define_expand "mulhq3"
125  [(set (match_operand:HQ 0 "s_register_operand" "")
126	(mult:HQ (match_operand:HQ 1 "s_register_operand" "")
127		 (match_operand:HQ 2 "s_register_operand" "")))]
128  "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
129{
130  rtx tmp = gen_reg_rtx (SImode);
131
132  emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
133			   gen_lowpart (HImode, operands[2])));
134  /* We're doing a s.15 * s.15 multiplication, getting an s.30 result.  Extract
135     an s.15 value from that.  This won't overflow/saturate for _Fract
136     values.  */
137  emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp,
138		       GEN_INT (16), GEN_INT (15)));
139  DONE;
140})
141
142(define_expand "mulsq3"
143  [(set (match_operand:SQ 0 "s_register_operand" "")
144	(mult:SQ (match_operand:SQ 1 "s_register_operand" "")
145		 (match_operand:SQ 2 "s_register_operand" "")))]
146  "TARGET_32BIT && arm_arch3m"
147{
148  rtx tmp1 = gen_reg_rtx (DImode);
149  rtx tmp2 = gen_reg_rtx (SImode);
150  rtx tmp3 = gen_reg_rtx (SImode);
151
152  /* s.31 * s.31 -> s.62 multiplication.  */
153  emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
154			   gen_lowpart (SImode, operands[2])));
155  emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (31)));
156  emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (1)));
157  emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
158
159  DONE;
160})
161
162;; Accumulator multiplies.
163
164(define_expand "mulsa3"
165  [(set (match_operand:SA 0 "s_register_operand" "")
166	(mult:SA (match_operand:SA 1 "s_register_operand" "")
167		 (match_operand:SA 2 "s_register_operand" "")))]
168  "TARGET_32BIT && arm_arch3m"
169{
170  rtx tmp1 = gen_reg_rtx (DImode);
171  rtx tmp2 = gen_reg_rtx (SImode);
172  rtx tmp3 = gen_reg_rtx (SImode);
173
174  emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
175			   gen_lowpart (SImode, operands[2])));
176  emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (15)));
177  emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (17)));
178  emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
179
180  DONE;
181})
182
183(define_expand "mulusa3"
184  [(set (match_operand:USA 0 "s_register_operand" "")
185	(mult:USA (match_operand:USA 1 "s_register_operand" "")
186		  (match_operand:USA 2 "s_register_operand" "")))]
187  "TARGET_32BIT && arm_arch3m"
188{
189  rtx tmp1 = gen_reg_rtx (DImode);
190  rtx tmp2 = gen_reg_rtx (SImode);
191  rtx tmp3 = gen_reg_rtx (SImode);
192
193  emit_insn (gen_umulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
194			    gen_lowpart (SImode, operands[2])));
195  emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (16)));
196  emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (16)));
197  emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
198
199  DONE;
200})
201
202;; The code sequence emitted by this insn pattern uses the Q flag, which GCC
203;; doesn't generally know about, so we don't bother expanding to individual
204;; instructions.  It may be better to just use an out-of-line asm libcall for
205;; this.
206
207(define_insn "ssmulsa3"
208  [(set (match_operand:SA 0 "s_register_operand" "=r")
209	(ss_mult:SA (match_operand:SA 1 "s_register_operand" "r")
210		    (match_operand:SA 2 "s_register_operand" "r")))
211   (clobber (match_scratch:DI 3 "=r"))
212   (clobber (match_scratch:SI 4 "=r"))
213   (clobber (reg:CC CC_REGNUM))]
214  "TARGET_32BIT && arm_arch6"
215{
216  /* s16.15 * s16.15 -> s32.30.  */
217  output_asm_insn ("smull\\t%Q3, %R3, %1, %2", operands);
218
219  if (TARGET_ARM)
220    output_asm_insn ("msr\\tAPSR_nzcvq, #0", operands);
221  else
222    {
223      output_asm_insn ("mov\\t%4, #0", operands);
224      output_asm_insn ("msr\\tAPSR_nzcvq, %4", operands);
225    }
226
227  /* We have:
228      31  high word  0     31  low word  0
229
230    [ S i i .... i i i ] [ i f f f ... f f ]
231                        |
232			v
233	     [ S i ... i f ... f f ]
234
235    Need 16 integral bits, so saturate at 15th bit of high word.  */
236
237  output_asm_insn ("ssat\\t%R3, #15, %R3", operands);
238  output_asm_insn ("mrs\\t%4, APSR", operands);
239  output_asm_insn ("tst\\t%4, #1<<27", operands);
240  if (arm_restrict_it)
241    {
242      output_asm_insn ("mvn\\t%4, %R3, asr #32", operands);
243      output_asm_insn ("it\\tne", operands);
244      output_asm_insn ("movne\\t%Q3, %4", operands);
245    }
246  else
247    {
248      if (TARGET_THUMB2)
249        output_asm_insn ("it\\tne", operands);
250      output_asm_insn ("mvnne\\t%Q3, %R3, asr #32", operands);
251    }
252  output_asm_insn ("mov\\t%0, %Q3, lsr #15", operands);
253  output_asm_insn ("orr\\t%0, %0, %R3, asl #17", operands);
254  return "";
255}
256  [(set_attr "conds" "clob")
257   (set_attr "type" "multiple")
258   (set (attr "length")
259	(if_then_else (eq_attr "is_thumb" "yes")
260		      (if_then_else (match_test "arm_restrict_it")
261		                    (const_int 40)
262		                    (const_int 38))
263		      (const_int 32)))])
264
265;; Same goes for this.
266
267(define_insn "usmulusa3"
268  [(set (match_operand:USA 0 "s_register_operand" "=r")
269	(us_mult:USA (match_operand:USA 1 "s_register_operand" "r")
270		     (match_operand:USA 2 "s_register_operand" "r")))
271   (clobber (match_scratch:DI 3 "=r"))
272   (clobber (match_scratch:SI 4 "=r"))
273   (clobber (reg:CC CC_REGNUM))]
274  "TARGET_32BIT && arm_arch6"
275{
276  /* 16.16 * 16.16 -> 32.32.  */
277  output_asm_insn ("umull\\t%Q3, %R3, %1, %2", operands);
278
279  if (TARGET_ARM)
280    output_asm_insn ("msr\\tAPSR_nzcvq, #0", operands);
281  else
282    {
283      output_asm_insn ("mov\\t%4, #0", operands);
284      output_asm_insn ("msr\\tAPSR_nzcvq, %4", operands);
285    }
286
287  /* We have:
288      31  high word  0     31  low word  0
289
290    [ i i i .... i i i ] [ f f f f ... f f ]
291                        |
292			v
293	     [ i i ... i f ... f f ]
294
295    Need 16 integral bits, so saturate at 16th bit of high word.  */
296
297  output_asm_insn ("usat\\t%R3, #16, %R3", operands);
298  output_asm_insn ("mrs\\t%4, APSR", operands);
299  output_asm_insn ("tst\\t%4, #1<<27", operands);
300  if (arm_restrict_it)
301    {
302      output_asm_insn ("sbfx\\t%4, %R3, #15, #1", operands);
303      output_asm_insn ("it\\tne", operands);
304      output_asm_insn ("movne\\t%Q3, %4", operands);
305    }
306  else
307    {
308      if (TARGET_THUMB2)
309        output_asm_insn ("it\\tne", operands);
310      output_asm_insn ("sbfxne\\t%Q3, %R3, #15, #1", operands);
311    }
312  output_asm_insn ("lsr\\t%0, %Q3, #16", operands);
313  output_asm_insn ("orr\\t%0, %0, %R3, asl #16", operands);
314  return "";
315}
316  [(set_attr "conds" "clob")
317   (set_attr "type" "multiple")
318   (set (attr "length")
319	(if_then_else (eq_attr "is_thumb" "yes")
320		      (if_then_else (match_test "arm_restrict_it")
321		                    (const_int 40)
322		                    (const_int 38))
323		      (const_int 32)))])
324
325(define_expand "mulha3"
326  [(set (match_operand:HA 0 "s_register_operand" "")
327	(mult:HA (match_operand:HA 1 "s_register_operand" "")
328		 (match_operand:HA 2 "s_register_operand" "")))]
329  "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
330{
331  rtx tmp = gen_reg_rtx (SImode);
332
333  emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
334			   gen_lowpart (HImode, operands[2])));
335  emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp, GEN_INT (16),
336		       GEN_INT (7)));
337
338  DONE;
339})
340
341(define_expand "muluha3"
342  [(set (match_operand:UHA 0 "s_register_operand" "")
343	(mult:UHA (match_operand:UHA 1 "s_register_operand" "")
344		  (match_operand:UHA 2 "s_register_operand" "")))]
345  "TARGET_DSP_MULTIPLY"
346{
347  rtx tmp1 = gen_reg_rtx (SImode);
348  rtx tmp2 = gen_reg_rtx (SImode);
349  rtx tmp3 = gen_reg_rtx (SImode);
350
351  /* 8.8 * 8.8 -> 16.16 multiply.  */
352  emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1])));
353  emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2])));
354  emit_insn (gen_mulsi3 (tmp3, tmp1, tmp2));
355  emit_insn (gen_extzv (gen_lowpart (SImode, operands[0]), tmp3,
356			GEN_INT (16), GEN_INT (8)));
357
358  DONE;
359})
360
361(define_expand "ssmulha3"
362  [(set (match_operand:HA 0 "s_register_operand" "")
363	(ss_mult:HA (match_operand:HA 1 "s_register_operand" "")
364		    (match_operand:HA 2 "s_register_operand" "")))]
365  "TARGET_32BIT && TARGET_DSP_MULTIPLY && arm_arch6"
366{
367  rtx tmp = gen_reg_rtx (SImode);
368  rtx rshift;
369
370  emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
371			   gen_lowpart (HImode, operands[2])));
372
373  rshift = gen_rtx_ASHIFTRT (SImode, tmp, GEN_INT (7));
374
375  emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (HImode, operands[0]),
376			  gen_rtx_SS_TRUNCATE (HImode, rshift)));
377
378  DONE;
379})
380
381(define_expand "usmuluha3"
382  [(set (match_operand:UHA 0 "s_register_operand" "")
383	(us_mult:UHA (match_operand:UHA 1 "s_register_operand" "")
384		     (match_operand:UHA 2 "s_register_operand" "")))]
385  "TARGET_INT_SIMD"
386{
387  rtx tmp1 = gen_reg_rtx (SImode);
388  rtx tmp2 = gen_reg_rtx (SImode);
389  rtx tmp3 = gen_reg_rtx (SImode);
390  rtx rshift_tmp = gen_reg_rtx (SImode);
391
392  /* Note: there's no smul[bt][bt] equivalent for unsigned multiplies.  Use a
393     normal 32x32->32-bit multiply instead.  */
394  emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1])));
395  emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2])));
396
397  emit_insn (gen_mulsi3 (tmp3, tmp1, tmp2));
398
399  /* The operand to "usat" is signed, so we cannot use the "..., asr #8"
400     form of that instruction since the multiplication result TMP3 may have the
401     top bit set, thus be negative and saturate to zero.  Use a separate
402     logical right-shift instead.  */
403  emit_insn (gen_lshrsi3 (rshift_tmp, tmp3, GEN_INT (8)));
404  emit_insn (gen_arm_usatsihi (gen_lowpart (HImode, operands[0]), rshift_tmp));
405
406  DONE;
407})
408
409(define_insn "arm_ssatsihi_shift"
410  [(set (match_operand:HI 0 "s_register_operand" "=r")
411	(ss_truncate:HI (match_operator:SI 1 "sat_shift_operator"
412			  [(match_operand:SI 2 "s_register_operand" "r")
413			   (match_operand:SI 3 "immediate_operand" "I")])))]
414  "TARGET_32BIT && arm_arch6"
415  "ssat%?\\t%0, #16, %2%S1"
416  [(set_attr "predicable" "yes")
417   (set_attr "predicable_short_it" "no")
418   (set_attr "shift" "1")
419   (set_attr "type" "alu_shift_imm")])
420
421(define_insn "arm_usatsihi"
422  [(set (match_operand:HI 0 "s_register_operand" "=r")
423	(us_truncate:HI (match_operand:SI 1 "s_register_operand")))]
424  "TARGET_INT_SIMD"
425  "usat%?\\t%0, #16, %1"
426  [(set_attr "predicable" "yes")
427   (set_attr "predicable_short_it" "no")
428   (set_attr "type" "alu_imm")]
429)
430