1/* Natural loop analysis code for GNU compiler. 2 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 3 Free Software Foundation, Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21#include "config.h" 22#include "system.h" 23#include "coretypes.h" 24#include "tm.h" 25#include "rtl.h" 26#include "hard-reg-set.h" 27#include "obstack.h" 28#include "basic-block.h" 29#include "cfgloop.h" 30#include "expr.h" 31#include "output.h" 32#include "graphds.h" 33#include "params.h" 34 35/* Checks whether BB is executed exactly once in each LOOP iteration. */ 36 37bool 38just_once_each_iteration_p (const struct loop *loop, const_basic_block bb) 39{ 40 /* It must be executed at least once each iteration. */ 41 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) 42 return false; 43 44 /* And just once. */ 45 if (bb->loop_father != loop) 46 return false; 47 48 /* But this was not enough. We might have some irreducible loop here. */ 49 if (bb->flags & BB_IRREDUCIBLE_LOOP) 50 return false; 51 52 return true; 53} 54 55/* Marks blocks and edges that are part of non-recognized loops; i.e. we 56 throw away all latch edges and mark blocks inside any remaining cycle. 57 Everything is a bit complicated due to fact we do not want to do this 58 for parts of cycles that only "pass" through some loop -- i.e. for 59 each cycle, we want to mark blocks that belong directly to innermost 60 loop containing the whole cycle. 61 62 LOOPS is the loop tree. */ 63 64#define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block) 65#define BB_REPR(BB) ((BB)->index + 1) 66 67bool 68mark_irreducible_loops (void) 69{ 70 basic_block act; 71 struct graph_edge *ge; 72 edge e; 73 edge_iterator ei; 74 int src, dest; 75 unsigned depth; 76 struct graph *g; 77 int num = number_of_loops (); 78 struct loop *cloop; 79 bool irred_loop_found = false; 80 int i; 81 82 gcc_assert (current_loops != NULL); 83 84 /* Reset the flags. */ 85 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb) 86 { 87 act->flags &= ~BB_IRREDUCIBLE_LOOP; 88 FOR_EACH_EDGE (e, ei, act->succs) 89 e->flags &= ~EDGE_IRREDUCIBLE_LOOP; 90 } 91 92 /* Create the edge lists. */ 93 g = new_graph (last_basic_block + num); 94 95 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb) 96 FOR_EACH_EDGE (e, ei, act->succs) 97 { 98 /* Ignore edges to exit. */ 99 if (e->dest == EXIT_BLOCK_PTR) 100 continue; 101 102 src = BB_REPR (act); 103 dest = BB_REPR (e->dest); 104 105 /* Ignore latch edges. */ 106 if (e->dest->loop_father->header == e->dest 107 && e->dest->loop_father->latch == act) 108 continue; 109 110 /* Edges inside a single loop should be left where they are. Edges 111 to subloop headers should lead to representative of the subloop, 112 but from the same place. 113 114 Edges exiting loops should lead from representative 115 of the son of nearest common ancestor of the loops in that 116 act lays. */ 117 118 if (e->dest->loop_father->header == e->dest) 119 dest = LOOP_REPR (e->dest->loop_father); 120 121 if (!flow_bb_inside_loop_p (act->loop_father, e->dest)) 122 { 123 depth = 1 + loop_depth (find_common_loop (act->loop_father, 124 e->dest->loop_father)); 125 if (depth == loop_depth (act->loop_father)) 126 cloop = act->loop_father; 127 else 128 cloop = VEC_index (loop_p, act->loop_father->superloops, depth); 129 130 src = LOOP_REPR (cloop); 131 } 132 133 add_edge (g, src, dest)->data = e; 134 } 135 136 /* Find the strongly connected components. */ 137 graphds_scc (g, NULL); 138 139 /* Mark the irreducible loops. */ 140 for (i = 0; i < g->n_vertices; i++) 141 for (ge = g->vertices[i].succ; ge; ge = ge->succ_next) 142 { 143 edge real = (edge) ge->data; 144 /* edge E in graph G is irreducible if it connects two vertices in the 145 same scc. */ 146 147 /* All edges should lead from a component with higher number to the 148 one with lower one. */ 149 gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component); 150 151 if (g->vertices[ge->src].component != g->vertices[ge->dest].component) 152 continue; 153 154 real->flags |= EDGE_IRREDUCIBLE_LOOP; 155 irred_loop_found = true; 156 if (flow_bb_inside_loop_p (real->src->loop_father, real->dest)) 157 real->src->flags |= BB_IRREDUCIBLE_LOOP; 158 } 159 160 free_graph (g); 161 162 loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS); 163 return irred_loop_found; 164} 165 166/* Counts number of insns inside LOOP. */ 167int 168num_loop_insns (const struct loop *loop) 169{ 170 basic_block *bbs, bb; 171 unsigned i, ninsns = 0; 172 rtx insn; 173 174 bbs = get_loop_body (loop); 175 for (i = 0; i < loop->num_nodes; i++) 176 { 177 bb = bbs[i]; 178 FOR_BB_INSNS (bb, insn) 179 if (NONDEBUG_INSN_P (insn)) 180 ninsns++; 181 } 182 free (bbs); 183 184 if (!ninsns) 185 ninsns = 1; /* To avoid division by zero. */ 186 187 return ninsns; 188} 189 190/* Counts number of insns executed on average per iteration LOOP. */ 191int 192average_num_loop_insns (const struct loop *loop) 193{ 194 basic_block *bbs, bb; 195 unsigned i, binsns, ninsns, ratio; 196 rtx insn; 197 198 ninsns = 0; 199 bbs = get_loop_body (loop); 200 for (i = 0; i < loop->num_nodes; i++) 201 { 202 bb = bbs[i]; 203 204 binsns = 0; 205 FOR_BB_INSNS (bb, insn) 206 if (NONDEBUG_INSN_P (insn)) 207 binsns++; 208 209 ratio = loop->header->frequency == 0 210 ? BB_FREQ_MAX 211 : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency; 212 ninsns += binsns * ratio; 213 } 214 free (bbs); 215 216 ninsns /= BB_FREQ_MAX; 217 if (!ninsns) 218 ninsns = 1; /* To avoid division by zero. */ 219 220 return ninsns; 221} 222 223/* Returns expected number of iterations of LOOP, according to 224 measured or guessed profile. No bounding is done on the 225 value. */ 226 227gcov_type 228expected_loop_iterations_unbounded (const struct loop *loop) 229{ 230 edge e; 231 edge_iterator ei; 232 233 if (loop->latch->count || loop->header->count) 234 { 235 gcov_type count_in, count_latch, expected; 236 237 count_in = 0; 238 count_latch = 0; 239 240 FOR_EACH_EDGE (e, ei, loop->header->preds) 241 if (e->src == loop->latch) 242 count_latch = e->count; 243 else 244 count_in += e->count; 245 246 if (count_in == 0) 247 expected = count_latch * 2; 248 else 249 expected = (count_latch + count_in - 1) / count_in; 250 251 return expected; 252 } 253 else 254 { 255 int freq_in, freq_latch; 256 257 freq_in = 0; 258 freq_latch = 0; 259 260 FOR_EACH_EDGE (e, ei, loop->header->preds) 261 if (e->src == loop->latch) 262 freq_latch = EDGE_FREQUENCY (e); 263 else 264 freq_in += EDGE_FREQUENCY (e); 265 266 if (freq_in == 0) 267 return freq_latch * 2; 268 269 return (freq_latch + freq_in - 1) / freq_in; 270 } 271} 272 273/* Returns expected number of LOOP iterations. The returned value is bounded 274 by REG_BR_PROB_BASE. */ 275 276unsigned 277expected_loop_iterations (const struct loop *loop) 278{ 279 gcov_type expected = expected_loop_iterations_unbounded (loop); 280 return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected); 281} 282 283/* Returns the maximum level of nesting of subloops of LOOP. */ 284 285unsigned 286get_loop_level (const struct loop *loop) 287{ 288 const struct loop *ploop; 289 unsigned mx = 0, l; 290 291 for (ploop = loop->inner; ploop; ploop = ploop->next) 292 { 293 l = get_loop_level (ploop); 294 if (l >= mx) 295 mx = l + 1; 296 } 297 return mx; 298} 299 300/* Returns estimate on cost of computing SEQ. */ 301 302static unsigned 303seq_cost (const_rtx seq, bool speed) 304{ 305 unsigned cost = 0; 306 rtx set; 307 308 for (; seq; seq = NEXT_INSN (seq)) 309 { 310 set = single_set (seq); 311 if (set) 312 cost += rtx_cost (set, SET, speed); 313 else 314 cost++; 315 } 316 317 return cost; 318} 319 320/* The properties of the target. */ 321 322unsigned target_avail_regs; /* Number of available registers. */ 323unsigned target_res_regs; /* Number of registers reserved for temporary 324 expressions. */ 325unsigned target_reg_cost[2]; /* The cost for register when there still 326 is some reserve, but we are approaching 327 the number of available registers. */ 328unsigned target_spill_cost[2]; /* The cost for register when we need 329 to spill. */ 330 331/* Initialize the constants for computing set costs. */ 332 333void 334init_set_costs (void) 335{ 336 int speed; 337 rtx seq; 338 rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER); 339 rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1); 340 rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2); 341 rtx mem = validize_mem (gen_rtx_MEM (SImode, addr)); 342 unsigned i; 343 344 target_avail_regs = 0; 345 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 346 if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) 347 && !fixed_regs[i]) 348 target_avail_regs++; 349 350 target_res_regs = 3; 351 352 for (speed = 0; speed < 2; speed++) 353 { 354 crtl->maybe_hot_insn_p = speed; 355 /* Set up the costs for using extra registers: 356 357 1) If not many free registers remain, we should prefer having an 358 additional move to decreasing the number of available registers. 359 (TARGET_REG_COST). 360 2) If no registers are available, we need to spill, which may require 361 storing the old value to memory and loading it back 362 (TARGET_SPILL_COST). */ 363 364 start_sequence (); 365 emit_move_insn (reg1, reg2); 366 seq = get_insns (); 367 end_sequence (); 368 target_reg_cost [speed] = seq_cost (seq, speed); 369 370 start_sequence (); 371 emit_move_insn (mem, reg1); 372 emit_move_insn (reg2, mem); 373 seq = get_insns (); 374 end_sequence (); 375 target_spill_cost [speed] = seq_cost (seq, speed); 376 } 377 default_rtl_profile (); 378} 379 380/* Estimates cost of increased register pressure caused by making N_NEW new 381 registers live around the loop. N_OLD is the number of registers live 382 around the loop. */ 383 384unsigned 385estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed) 386{ 387 unsigned cost; 388 unsigned regs_needed = n_new + n_old; 389 390 /* If we have enough registers, we should use them and not restrict 391 the transformations unnecessarily. */ 392 if (regs_needed + target_res_regs <= target_avail_regs) 393 return 0; 394 395 if (regs_needed <= target_avail_regs) 396 /* If we are close to running out of registers, try to preserve 397 them. */ 398 cost = target_reg_cost [speed] * n_new; 399 else 400 /* If we run out of registers, it is very expensive to add another 401 one. */ 402 cost = target_spill_cost [speed] * n_new; 403 404 if (optimize && (flag_ira_region == IRA_REGION_ALL 405 || flag_ira_region == IRA_REGION_MIXED) 406 && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM) 407 /* IRA regional allocation deals with high register pressure 408 better. So decrease the cost (to do more accurate the cost 409 calculation for IRA, we need to know how many registers lives 410 through the loop transparently). */ 411 cost /= 2; 412 413 return cost; 414} 415 416/* Sets EDGE_LOOP_EXIT flag for all loop exits. */ 417 418void 419mark_loop_exit_edges (void) 420{ 421 basic_block bb; 422 edge e; 423 424 if (number_of_loops () <= 1) 425 return; 426 427 FOR_EACH_BB (bb) 428 { 429 edge_iterator ei; 430 431 FOR_EACH_EDGE (e, ei, bb->succs) 432 { 433 if (loop_outer (bb->loop_father) 434 && loop_exit_edge_p (bb->loop_father, e)) 435 e->flags |= EDGE_LOOP_EXIT; 436 else 437 e->flags &= ~EDGE_LOOP_EXIT; 438 } 439 } 440} 441 442