1;; ARM ldrd/strd peephole optimizations. 2;; 3;; Copyright (C) 2013-2015 Free Software Foundation, Inc. 4;; 5;; Written by Greta Yorsh <greta.yorsh@arm.com> 6 7;; This file is part of GCC. 8;; 9;; GCC is free software; you can redistribute it and/or modify it 10;; under the terms of the GNU General Public License as published by 11;; the Free Software Foundation; either version 3, or (at your option) 12;; any later version. 13;; 14;; GCC is distributed in the hope that it will be useful, but 15;; WITHOUT ANY WARRANTY; without even the implied warranty of 16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17;; General Public License for more details. 18;; 19;; You should have received a copy of the GNU General Public License 20;; along with GCC; see the file COPYING3. If not see 21;; <http://www.gnu.org/licenses/>. 22 23;; The following peephole optimizations identify consecutive memory 24;; accesses, and try to rearrange the operands to enable generation of 25;; ldrd/strd. 26 27(define_peephole2 ; ldrd 28 [(set (match_operand:SI 0 "arm_general_register_operand" "") 29 (match_operand:SI 2 "memory_operand" "")) 30 (set (match_operand:SI 1 "arm_general_register_operand" "") 31 (match_operand:SI 3 "memory_operand" ""))] 32 "TARGET_LDRD 33 && current_tune->prefer_ldrd_strd 34 && !optimize_function_for_size_p (cfun)" 35 [(const_int 0)] 36{ 37 if (!gen_operands_ldrd_strd (operands, true, false, false)) 38 FAIL; 39 else if (TARGET_ARM) 40 { 41 /* In ARM state, the destination registers of LDRD/STRD must be 42 consecutive. We emit DImode access. */ 43 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); 44 operands[2] = adjust_address (operands[2], DImode, 0); 45 /* Emit [(set (match_dup 0) (match_dup 2))] */ 46 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2])); 47 DONE; 48 } 49 else if (TARGET_THUMB2) 50 { 51 /* Emit the pattern: 52 [(parallel [(set (match_dup 0) (match_dup 2)) 53 (set (match_dup 1) (match_dup 3))])] */ 54 rtx t1 = gen_rtx_SET (VOIDmode, operands[0], operands[2]); 55 rtx t2 = gen_rtx_SET (VOIDmode, operands[1], operands[3]); 56 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); 57 DONE; 58 } 59}) 60 61(define_peephole2 ; strd 62 [(set (match_operand:SI 2 "memory_operand" "") 63 (match_operand:SI 0 "arm_general_register_operand" "")) 64 (set (match_operand:SI 3 "memory_operand" "") 65 (match_operand:SI 1 "arm_general_register_operand" ""))] 66 "TARGET_LDRD 67 && current_tune->prefer_ldrd_strd 68 && !optimize_function_for_size_p (cfun)" 69 [(const_int 0)] 70{ 71 if (!gen_operands_ldrd_strd (operands, false, false, false)) 72 FAIL; 73 else if (TARGET_ARM) 74 { 75 /* In ARM state, the destination registers of LDRD/STRD must be 76 consecutive. We emit DImode access. */ 77 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); 78 operands[2] = adjust_address (operands[2], DImode, 0); 79 /* Emit [(set (match_dup 2) (match_dup 0))] */ 80 emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[0])); 81 DONE; 82 } 83 else if (TARGET_THUMB2) 84 { 85 /* Emit the pattern: 86 [(parallel [(set (match_dup 2) (match_dup 0)) 87 (set (match_dup 3) (match_dup 1))])] */ 88 rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); 89 rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); 90 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); 91 DONE; 92 } 93}) 94 95;; The following peepholes reorder registers to enable LDRD/STRD. 96(define_peephole2 ; strd of constants 97 [(set (match_operand:SI 0 "arm_general_register_operand" "") 98 (match_operand:SI 4 "const_int_operand" "")) 99 (set (match_operand:SI 2 "memory_operand" "") 100 (match_dup 0)) 101 (set (match_operand:SI 1 "arm_general_register_operand" "") 102 (match_operand:SI 5 "const_int_operand" "")) 103 (set (match_operand:SI 3 "memory_operand" "") 104 (match_dup 1))] 105 "TARGET_LDRD 106 && current_tune->prefer_ldrd_strd 107 && !optimize_function_for_size_p (cfun)" 108 [(const_int 0)] 109{ 110 if (!gen_operands_ldrd_strd (operands, false, true, false)) 111 FAIL; 112 else if (TARGET_ARM) 113 { 114 rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0])); 115 operands[2] = adjust_address (operands[2], DImode, 0); 116 /* Emit the pattern: 117 [(set (match_dup 0) (match_dup 4)) 118 (set (match_dup 1) (match_dup 5)) 119 (set (match_dup 2) tmp)] */ 120 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); 121 emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); 122 emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp)); 123 DONE; 124 } 125 else if (TARGET_THUMB2) 126 { 127 /* Emit the pattern: 128 [(set (match_dup 0) (match_dup 4)) 129 (set (match_dup 1) (match_dup 5)) 130 (parallel [(set (match_dup 2) (match_dup 0)) 131 (set (match_dup 3) (match_dup 1))])] */ 132 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); 133 emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); 134 rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); 135 rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); 136 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); 137 DONE; 138 } 139}) 140 141(define_peephole2 ; strd of constants 142 [(set (match_operand:SI 0 "arm_general_register_operand" "") 143 (match_operand:SI 4 "const_int_operand" "")) 144 (set (match_operand:SI 1 "arm_general_register_operand" "") 145 (match_operand:SI 5 "const_int_operand" "")) 146 (set (match_operand:SI 2 "memory_operand" "") 147 (match_dup 0)) 148 (set (match_operand:SI 3 "memory_operand" "") 149 (match_dup 1))] 150 "TARGET_LDRD 151 && current_tune->prefer_ldrd_strd 152 && !optimize_function_for_size_p (cfun)" 153 [(const_int 0)] 154{ 155 if (!gen_operands_ldrd_strd (operands, false, true, false)) 156 FAIL; 157 else if (TARGET_ARM) 158 { 159 rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0])); 160 operands[2] = adjust_address (operands[2], DImode, 0); 161 /* Emit the pattern 162 [(set (match_dup 0) (match_dup 4)) 163 (set (match_dup 1) (match_dup 5)) 164 (set (match_dup 2) tmp)] */ 165 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); 166 emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); 167 emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp)); 168 DONE; 169 } 170 else if (TARGET_THUMB2) 171 { 172 /* Emit the pattern: 173 [(set (match_dup 0) (match_dup 4)) 174 (set (match_dup 1) (match_dup 5)) 175 (parallel [(set (match_dup 2) (match_dup 0)) 176 (set (match_dup 3) (match_dup 1))])] */ 177 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); 178 emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); 179 rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); 180 rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); 181 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); 182 DONE; 183 } 184}) 185 186;; The following two peephole optimizations are only relevant for ARM 187;; mode where LDRD/STRD require consecutive registers. 188 189(define_peephole2 ; swap the destination registers of two loads 190 ; before a commutative operation. 191 [(set (match_operand:SI 0 "arm_general_register_operand" "") 192 (match_operand:SI 2 "memory_operand" "")) 193 (set (match_operand:SI 1 "arm_general_register_operand" "") 194 (match_operand:SI 3 "memory_operand" "")) 195 (set (match_operand:SI 4 "arm_general_register_operand" "") 196 (match_operator:SI 5 "commutative_binary_operator" 197 [(match_operand 6 "arm_general_register_operand" "") 198 (match_operand 7 "arm_general_register_operand" "") ]))] 199 "TARGET_LDRD && TARGET_ARM 200 && current_tune->prefer_ldrd_strd 201 && !optimize_function_for_size_p (cfun) 202 && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7]))) 203 ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6])))) 204 && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) 205 && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" 206 [(set (match_dup 0) (match_dup 2)) 207 (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] 208 { 209 if (!gen_operands_ldrd_strd (operands, true, false, true)) 210 { 211 FAIL; 212 } 213 else 214 { 215 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); 216 operands[2] = adjust_address (operands[2], DImode, 0); 217 } 218 } 219) 220 221(define_peephole2 ; swap the destination registers of two loads 222 ; before a commutative operation that sets the flags. 223 [(set (match_operand:SI 0 "arm_general_register_operand" "") 224 (match_operand:SI 2 "memory_operand" "")) 225 (set (match_operand:SI 1 "arm_general_register_operand" "") 226 (match_operand:SI 3 "memory_operand" "")) 227 (parallel 228 [(set (match_operand:SI 4 "arm_general_register_operand" "") 229 (match_operator:SI 5 "commutative_binary_operator" 230 [(match_operand 6 "arm_general_register_operand" "") 231 (match_operand 7 "arm_general_register_operand" "") ])) 232 (clobber (reg:CC CC_REGNUM))])] 233 "TARGET_LDRD && TARGET_ARM 234 && current_tune->prefer_ldrd_strd 235 && !optimize_function_for_size_p (cfun) 236 && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7]))) 237 ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6])))) 238 && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) 239 && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" 240 [(set (match_dup 0) (match_dup 2)) 241 (parallel 242 [(set (match_dup 4) 243 (match_op_dup 5 [(match_dup 6) (match_dup 7)])) 244 (clobber (reg:CC CC_REGNUM))])] 245 { 246 if (!gen_operands_ldrd_strd (operands, true, false, true)) 247 { 248 FAIL; 249 } 250 else 251 { 252 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); 253 operands[2] = adjust_address (operands[2], DImode, 0); 254 } 255 } 256) 257 258;; TODO: Handle LDRD/STRD with writeback: 259;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY 260;; (b) Patterns may be followed by an update of the base address. 261