1;; ARM ldrd/strd peephole optimizations.
2;;
3;; Copyright (C) 2013-2015 Free Software Foundation, Inc.
4;;
5;; Written by Greta Yorsh <greta.yorsh@arm.com>
6
7;; This file is part of GCC.
8;;
9;; GCC is free software; you can redistribute it and/or modify it
10;; under the terms of the GNU General Public License as published by
11;; the Free Software Foundation; either version 3, or (at your option)
12;; any later version.
13;;
14;; GCC is distributed in the hope that it will be useful, but
15;; WITHOUT ANY WARRANTY; without even the implied warranty of
16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17;; General Public License for more details.
18;;
19;; You should have received a copy of the GNU General Public License
20;; along with GCC; see the file COPYING3.  If not see
21;; <http://www.gnu.org/licenses/>.
22
23;; The following peephole optimizations identify consecutive memory
24;; accesses, and try to rearrange the operands to enable generation of
25;; ldrd/strd.
26
27(define_peephole2 ; ldrd
28  [(set (match_operand:SI 0 "arm_general_register_operand" "")
29        (match_operand:SI 2 "memory_operand" ""))
30   (set (match_operand:SI 1 "arm_general_register_operand" "")
31        (match_operand:SI 3 "memory_operand" ""))]
32  "TARGET_LDRD
33     && current_tune->prefer_ldrd_strd
34     && !optimize_function_for_size_p (cfun)"
35  [(const_int 0)]
36{
37  if (!gen_operands_ldrd_strd (operands, true, false, false))
38    FAIL;
39  else if (TARGET_ARM)
40  {
41    /* In ARM state, the destination registers of LDRD/STRD must be
42       consecutive. We emit DImode access.  */
43    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
44    operands[2] = adjust_address (operands[2], DImode, 0);
45    /* Emit [(set (match_dup 0) (match_dup 2))] */
46    emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2]));
47    DONE;
48  }
49  else if (TARGET_THUMB2)
50  {
51    /* Emit the pattern:
52       [(parallel [(set (match_dup 0) (match_dup 2))
53                   (set (match_dup 1) (match_dup 3))])] */
54    rtx t1 = gen_rtx_SET (VOIDmode, operands[0], operands[2]);
55    rtx t2 = gen_rtx_SET (VOIDmode, operands[1], operands[3]);
56    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
57    DONE;
58  }
59})
60
61(define_peephole2 ; strd
62  [(set (match_operand:SI 2 "memory_operand" "")
63	(match_operand:SI 0 "arm_general_register_operand" ""))
64   (set (match_operand:SI 3 "memory_operand" "")
65	(match_operand:SI 1 "arm_general_register_operand" ""))]
66  "TARGET_LDRD
67     && current_tune->prefer_ldrd_strd
68     && !optimize_function_for_size_p (cfun)"
69  [(const_int 0)]
70{
71  if (!gen_operands_ldrd_strd (operands, false, false, false))
72    FAIL;
73  else if (TARGET_ARM)
74  {
75    /* In ARM state, the destination registers of LDRD/STRD must be
76       consecutive. We emit DImode access.  */
77    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
78    operands[2] = adjust_address (operands[2], DImode, 0);
79    /* Emit [(set (match_dup 2) (match_dup 0))]  */
80    emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[0]));
81    DONE;
82  }
83  else if (TARGET_THUMB2)
84  {
85    /* Emit the pattern:
86       [(parallel [(set (match_dup 2) (match_dup 0))
87                   (set (match_dup 3) (match_dup 1))])]  */
88    rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
89    rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
90    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
91    DONE;
92  }
93})
94
95;; The following peepholes reorder registers to enable LDRD/STRD.
96(define_peephole2 ; strd of constants
97  [(set (match_operand:SI 0 "arm_general_register_operand" "")
98        (match_operand:SI 4 "const_int_operand" ""))
99   (set (match_operand:SI 2 "memory_operand" "")
100        (match_dup 0))
101   (set (match_operand:SI 1 "arm_general_register_operand" "")
102        (match_operand:SI 5 "const_int_operand" ""))
103   (set (match_operand:SI 3 "memory_operand" "")
104        (match_dup 1))]
105 "TARGET_LDRD
106  && current_tune->prefer_ldrd_strd
107  && !optimize_function_for_size_p (cfun)"
108  [(const_int 0)]
109{
110  if (!gen_operands_ldrd_strd (operands, false, true, false))
111    FAIL;
112  else if (TARGET_ARM)
113  {
114   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
115   operands[2] = adjust_address (operands[2], DImode, 0);
116   /* Emit the pattern:
117      [(set (match_dup 0) (match_dup 4))
118      (set (match_dup 1) (match_dup 5))
119      (set (match_dup 2) tmp)]  */
120   emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
121   emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
122   emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
123   DONE;
124  }
125  else if (TARGET_THUMB2)
126  {
127    /* Emit the pattern:
128       [(set (match_dup 0) (match_dup 4))
129        (set (match_dup 1) (match_dup 5))
130        (parallel [(set (match_dup 2) (match_dup 0))
131                   (set (match_dup 3) (match_dup 1))])]  */
132    emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
133    emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
134    rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
135    rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
136    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
137    DONE;
138  }
139})
140
141(define_peephole2 ; strd of constants
142  [(set (match_operand:SI 0 "arm_general_register_operand" "")
143        (match_operand:SI 4 "const_int_operand" ""))
144   (set (match_operand:SI 1 "arm_general_register_operand" "")
145        (match_operand:SI 5 "const_int_operand" ""))
146   (set (match_operand:SI 2 "memory_operand" "")
147        (match_dup 0))
148   (set (match_operand:SI 3 "memory_operand" "")
149        (match_dup 1))]
150 "TARGET_LDRD
151  && current_tune->prefer_ldrd_strd
152  && !optimize_function_for_size_p (cfun)"
153   [(const_int 0)]
154{
155  if (!gen_operands_ldrd_strd (operands, false, true, false))
156     FAIL;
157  else if (TARGET_ARM)
158  {
159   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
160   operands[2] = adjust_address (operands[2], DImode, 0);
161   /* Emit the pattern
162      [(set (match_dup 0) (match_dup 4))
163       (set (match_dup 1) (match_dup 5))
164       (set (match_dup 2) tmp)]  */
165   emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
166   emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
167   emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp));
168   DONE;
169  }
170  else if (TARGET_THUMB2)
171  {
172    /*  Emit the pattern:
173        [(set (match_dup 0) (match_dup 4))
174         (set (match_dup 1) (match_dup 5))
175         (parallel [(set (match_dup 2) (match_dup 0))
176                    (set (match_dup 3) (match_dup 1))])]  */
177    emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4]));
178    emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5]));
179    rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]);
180    rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]);
181    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
182    DONE;
183  }
184})
185
186;; The following two peephole optimizations are only relevant for ARM
187;; mode where LDRD/STRD require consecutive registers.
188
189(define_peephole2 ; swap the destination registers of two loads
190		  ; before a commutative operation.
191  [(set (match_operand:SI 0 "arm_general_register_operand" "")
192        (match_operand:SI 2 "memory_operand" ""))
193   (set (match_operand:SI 1 "arm_general_register_operand" "")
194        (match_operand:SI 3 "memory_operand" ""))
195   (set (match_operand:SI 4 "arm_general_register_operand" "")
196        (match_operator:SI 5 "commutative_binary_operator"
197			   [(match_operand 6 "arm_general_register_operand" "")
198			    (match_operand 7 "arm_general_register_operand" "") ]))]
199  "TARGET_LDRD && TARGET_ARM
200   && current_tune->prefer_ldrd_strd
201   && !optimize_function_for_size_p (cfun)
202   && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
203        ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
204   && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
205   && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
206  [(set (match_dup 0) (match_dup 2))
207   (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
208  {
209    if (!gen_operands_ldrd_strd (operands, true, false, true))
210     {
211        FAIL;
212     }
213    else
214     {
215        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
216        operands[2] = adjust_address (operands[2], DImode, 0);
217     }
218   }
219)
220
221(define_peephole2 ; swap the destination registers of two loads
222		  ; before a commutative operation that sets the flags.
223  [(set (match_operand:SI 0 "arm_general_register_operand" "")
224        (match_operand:SI 2 "memory_operand" ""))
225   (set (match_operand:SI 1 "arm_general_register_operand" "")
226        (match_operand:SI 3 "memory_operand" ""))
227   (parallel
228      [(set (match_operand:SI 4 "arm_general_register_operand" "")
229	    (match_operator:SI 5 "commutative_binary_operator"
230			       [(match_operand 6 "arm_general_register_operand" "")
231				(match_operand 7 "arm_general_register_operand" "") ]))
232       (clobber (reg:CC CC_REGNUM))])]
233  "TARGET_LDRD && TARGET_ARM
234   && current_tune->prefer_ldrd_strd
235   && !optimize_function_for_size_p (cfun)
236   && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
237       ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
238   && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
239   && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
240  [(set (match_dup 0) (match_dup 2))
241   (parallel
242      [(set (match_dup 4)
243	    (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
244       (clobber (reg:CC CC_REGNUM))])]
245  {
246    if (!gen_operands_ldrd_strd (operands, true, false, true))
247     {
248        FAIL;
249     }
250    else
251     {
252        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
253        operands[2] = adjust_address (operands[2], DImode, 0);
254     }
255   }
256)
257
258;; TODO: Handle LDRD/STRD with writeback:
259;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
260;; (b) Patterns may be followed by an update of the base address.
261